From 6ef4ea3c944b9fc5d78317d1172cdcd10f9724f1 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Thu, 5 Dec 2024 21:44:12 +0100 Subject: [PATCH 0001/2157] Input: tsc2007 - accept standard properties Only some driver-specific properties were accepted, change it to use the now-available standard properties which are found in devicetrees containing this chip. Signed-off-by: Andreas Kemnade Acked-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20241205204413.2466775-2-akemnade@kernel.org Signed-off-by: Kevin Hilman --- drivers/input/touchscreen/tsc2007.h | 2 ++ drivers/input/touchscreen/tsc2007_core.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/tsc2007.h b/drivers/input/touchscreen/tsc2007.h index 69b08dd6c8df..e346fb4f7552 100644 --- a/drivers/input/touchscreen/tsc2007.h +++ b/drivers/input/touchscreen/tsc2007.h @@ -19,6 +19,7 @@ #ifndef _TSC2007_H #define _TSC2007_H +#include struct gpio_desc; #define TSC2007_MEASURE_TEMP0 (0x0 << 4) @@ -63,6 +64,7 @@ struct tsc2007 { struct i2c_client *client; + struct touchscreen_properties prop; u16 model; u16 x_plate_ohms; u16 max_rt; diff --git a/drivers/input/touchscreen/tsc2007_core.c b/drivers/input/touchscreen/tsc2007_core.c index 8d832a372b89..5252301686ec 100644 --- a/drivers/input/touchscreen/tsc2007_core.c +++ b/drivers/input/touchscreen/tsc2007_core.c @@ -142,8 +142,7 @@ static irqreturn_t tsc2007_soft_irq(int irq, void *handle) rt = ts->max_rt - rt; input_report_key(input, BTN_TOUCH, 1); - input_report_abs(input, ABS_X, tc.x); - input_report_abs(input, ABS_Y, tc.y); + touchscreen_report_pos(input, &ts->prop, tc.x, tc.y, false); input_report_abs(input, ABS_PRESSURE, rt); input_sync(input); @@ -339,9 +338,9 @@ static int tsc2007_probe(struct i2c_client *client) input_set_drvdata(input_dev, ts); input_set_capability(input_dev, EV_KEY, BTN_TOUCH); - input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, ts->fuzzx, 0); input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, ts->fuzzy, 0); + touchscreen_parse_properties(input_dev, false, &ts->prop); input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, ts->fuzzz, 0); From a22a29655c42d0263a7a84d5d808bfd55f20c53a Mon Sep 17 00:00:00 2001 From: Joshua Murphy Date: Sat, 18 Jan 2025 19:12:36 +0000 Subject: [PATCH 0002/2157] net/9p/fd: support ipv6 for trans=tcp Allows specifying an IPv6 address when mounting a remote 9p file system. Signed-off-by: Joshua Murphy Message-ID: <20250118192122.327-2-joshuamurphy@posteo.net> Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 56 ++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 196060dc6138..2fea50bf047a 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -954,64 +955,55 @@ static void p9_fd_close(struct p9_client *client) kfree(ts); } -/* - * stolen from NFS - maybe should be made a generic function? - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} - static int p9_bind_privport(struct socket *sock) { - struct sockaddr_in cl; + struct sockaddr_storage stor = { 0 }; int port, err = -EINVAL; - memset(&cl, 0, sizeof(cl)); - cl.sin_family = AF_INET; - cl.sin_addr.s_addr = htonl(INADDR_ANY); + stor.ss_family = sock->ops->family; + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY); + else + ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any; for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { - cl.sin_port = htons((ushort)port); - err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port); + else + ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&stor, sizeof(stor)); if (err != -EADDRINUSE) break; } return err; } - static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; + char port_str[6]; struct socket *csocket; - struct sockaddr_in sin_server; + struct sockaddr_storage stor = { 0 }; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; - if (addr == NULL || valid_ipaddr4(addr) < 0) + if (!addr) return -EINVAL; + sprintf(port_str, "%u", opts.port); + err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr, + port_str, &stor); + if (err < 0) + return err; + csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(opts.port); - err = __sock_create(current->nsproxy->net_ns, PF_INET, + err = __sock_create(current->nsproxy->net_ns, stor.ss_family, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -1030,8 +1022,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) } err = READ_ONCE(csocket->ops)->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); + (struct sockaddr *)&stor, + sizeof(stor), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); From 0dd6770a72f138dabea9eae87f3da6ffa68f0d06 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sat, 11 Jan 2025 12:18:03 -0600 Subject: [PATCH 0003/2157] w1: fix NULL pointer dereference in probe The w1_uart_probe() function calls w1_uart_serdev_open() (which includes devm_serdev_device_open()) before setting the client ops via serdev_device_set_client_ops(). This ordering can trigger a NULL pointer dereference in the serdev controller's receive_buf handler, as it assumes serdev->ops is valid when SERPORT_ACTIVE is set. This is similar to the issue fixed in commit 5e700b384ec1 ("platform/chrome: cros_ec_uart: properly fix race condition") where devm_serdev_device_open() was called before fully initializing the device. Fix the race by ensuring client ops are set before enabling the port via w1_uart_serdev_open(). Fixes: a3c08804364e ("w1: add UART w1 bus driver") Signed-off-by: Chenyuan Yang Acked-by: Christoph Winklhofer Link: https://lore.kernel.org/r/20250111181803.2283611-1-chenyuan0y@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/w1/masters/w1-uart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/w1/masters/w1-uart.c b/drivers/w1/masters/w1-uart.c index a31782e56ba7..c87eea347806 100644 --- a/drivers/w1/masters/w1-uart.c +++ b/drivers/w1/masters/w1-uart.c @@ -372,11 +372,11 @@ static int w1_uart_probe(struct serdev_device *serdev) init_completion(&w1dev->rx_byte_received); mutex_init(&w1dev->rx_mutex); + serdev_device_set_drvdata(serdev, w1dev); + serdev_device_set_client_ops(serdev, &w1_uart_serdev_ops); ret = w1_uart_serdev_open(w1dev); if (ret < 0) return ret; - serdev_device_set_drvdata(serdev, w1dev); - serdev_device_set_client_ops(serdev, &w1_uart_serdev_ops); return w1_add_master_device(&w1dev->bus); } From 33c145297840dddf0dc23d5822159c26aba920d3 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Fri, 24 Jan 2025 10:26:34 +0800 Subject: [PATCH 0004/2157] w1: w1_therm: w1: Use HWMON_CHANNEL_INFO macro to simplify code Use HWMON_CHANNEL_INFO macro to simplify code. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250124022635.16647-9-lihuisong@huawei.com Signed-off-by: Krzysztof Kozlowski --- drivers/w1/slaves/w1_therm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index c85e80c7e130..9ccedb3264fb 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -444,18 +444,8 @@ static int w1_read(struct device *dev, enum hwmon_sensor_types type, } } -static const u32 w1_temp_config[] = { - HWMON_T_INPUT, - 0 -}; - -static const struct hwmon_channel_info w1_temp = { - .type = hwmon_temp, - .config = w1_temp_config, -}; - static const struct hwmon_channel_info * const w1_info[] = { - &w1_temp, + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), NULL }; From f656fa4013388a52d1fdd82268955c5e7be63ad2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 27 Jan 2025 14:15:33 -0800 Subject: [PATCH 0005/2157] dm-crypt: switch to using the crc32 library Now that the crc32() library function takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. (However, this only affects the TCW IV mode of dm-crypt, which is a compatibility mode that is rarely used compared to other dm-crypt modes.) Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- drivers/md/Kconfig | 1 + drivers/md/dm-crypt.c | 41 ++++++++++------------------------------- 2 files changed, 11 insertions(+), 31 deletions(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0b1870a09e1f..06f809e70f15 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -267,6 +267,7 @@ config DM_CRYPT depends on BLK_DEV_DM depends on (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) depends on (TRUSTED_KEYS || TRUSTED_KEYS=n) + select CRC32 select CRYPTO select CRYPTO_CBC select CRYPTO_ESSIV diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 02a2919f4e5a..9dfdb63220d7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -125,7 +126,6 @@ struct iv_lmk_private { #define TCW_WHITENING_SIZE 16 struct iv_tcw_private { - struct crypto_shash *crc32_tfm; u8 *iv_seed; u8 *whitening; }; @@ -607,10 +607,6 @@ static void crypt_iv_tcw_dtr(struct crypt_config *cc) tcw->iv_seed = NULL; kfree_sensitive(tcw->whitening); tcw->whitening = NULL; - - if (tcw->crc32_tfm && !IS_ERR(tcw->crc32_tfm)) - crypto_free_shash(tcw->crc32_tfm); - tcw->crc32_tfm = NULL; } static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti, @@ -628,13 +624,6 @@ static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti, return -EINVAL; } - tcw->crc32_tfm = crypto_alloc_shash("crc32", 0, - CRYPTO_ALG_ALLOCATES_MEMORY); - if (IS_ERR(tcw->crc32_tfm)) { - ti->error = "Error initializing CRC32 in TCW"; - return PTR_ERR(tcw->crc32_tfm); - } - tcw->iv_seed = kzalloc(cc->iv_size, GFP_KERNEL); tcw->whitening = kzalloc(TCW_WHITENING_SIZE, GFP_KERNEL); if (!tcw->iv_seed || !tcw->whitening) { @@ -668,36 +657,28 @@ static int crypt_iv_tcw_wipe(struct crypt_config *cc) return 0; } -static int crypt_iv_tcw_whitening(struct crypt_config *cc, - struct dm_crypt_request *dmreq, - u8 *data) +static void crypt_iv_tcw_whitening(struct crypt_config *cc, + struct dm_crypt_request *dmreq, u8 *data) { struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; __le64 sector = cpu_to_le64(dmreq->iv_sector); u8 buf[TCW_WHITENING_SIZE]; - SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm); - int i, r; + int i; /* xor whitening with sector number */ crypto_xor_cpy(buf, tcw->whitening, (u8 *)§or, 8); crypto_xor_cpy(&buf[8], tcw->whitening + 8, (u8 *)§or, 8); /* calculate crc32 for every 32bit part and xor it */ - desc->tfm = tcw->crc32_tfm; - for (i = 0; i < 4; i++) { - r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]); - if (r) - goto out; - } + for (i = 0; i < 4; i++) + put_unaligned_le32(crc32(0, &buf[i * 4], 4), &buf[i * 4]); crypto_xor(&buf[0], &buf[12], 4); crypto_xor(&buf[4], &buf[8], 4); /* apply whitening (8 bytes) to whole sector */ for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) crypto_xor(data + i * 8, buf, 8); -out: memzero_explicit(buf, sizeof(buf)); - return r; } static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, @@ -707,13 +688,12 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; __le64 sector = cpu_to_le64(dmreq->iv_sector); u8 *src; - int r = 0; /* Remove whitening from ciphertext */ if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { sg = crypt_get_sg_data(cc, dmreq->sg_in); src = kmap_local_page(sg_page(sg)); - r = crypt_iv_tcw_whitening(cc, dmreq, src + sg->offset); + crypt_iv_tcw_whitening(cc, dmreq, src + sg->offset); kunmap_local(src); } @@ -723,7 +703,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, crypto_xor_cpy(&iv[8], tcw->iv_seed + 8, (u8 *)§or, cc->iv_size - 8); - return r; + return 0; } static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, @@ -731,7 +711,6 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, { struct scatterlist *sg; u8 *dst; - int r; if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) return 0; @@ -739,10 +718,10 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, /* Apply whitening on ciphertext */ sg = crypt_get_sg_data(cc, dmreq->sg_out); dst = kmap_local_page(sg_page(sg)); - r = crypt_iv_tcw_whitening(cc, dmreq, dst + sg->offset); + crypt_iv_tcw_whitening(cc, dmreq, dst + sg->offset); kunmap_local(dst); - return r; + return 0; } static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv, From 82596487635012460c19d4dd257d5d59147cbf27 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 29 Jan 2025 13:58:55 +0100 Subject: [PATCH 0006/2157] dm-verity: Document restart_on_error and panic_on_error options This patch adds documentation for options introduced in commit f811b83879fb ("dm-verity: introduce the options restart_on_error and panic_on_error"). Signed-off-by: Milan Broz Signed-off-by: Mikulas Patocka --- Documentation/admin-guide/device-mapper/verity.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index a65c1602cb23..fb3a045d0c72 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -87,6 +87,15 @@ panic_on_corruption Panic the device when a corrupted block is discovered. This option is not compatible with ignore_corruption and restart_on_corruption. +restart_on_error + Restart the system when an I/O error is detected. + This option can be combined with the restart_on_corruption option. + +panic_on_error + Panic the device when an I/O error is detected. This option is + not compatible with the restart_on_error option but can be combined + with the panic_on_corruption option. + ignore_zero_blocks Do not verify blocks that are expected to contain zeroes and always return zeroes instead. This may be useful if the partition contains unused blocks From d8955df3837f7ae1b555a66e5153235f6919b7a5 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 29 Jan 2025 13:58:56 +0100 Subject: [PATCH 0007/2157] dm-integrity: Document Inline mode for storing integrity data This patch adds documentation for new 'I' mode for dm-integrity introduced in commit fb0987682c62 ("dm-integrity: introduce the Inline mode"). Signed-off-by: Milan Broz Signed-off-by: Mikulas Patocka --- Documentation/admin-guide/device-mapper/dm-integrity.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index d8a5f14d0e3c..c2e18ecc065c 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -92,6 +92,11 @@ Target arguments: allowed. This mode is useful for data recovery if the device cannot be activated in any of the other standard modes. + I - inline mode - in this mode, dm-integrity will store integrity + data directly in the underlying device sectors. + The underlying device must have an integrity profile that + allows storing user integrity data and provides enough + space for the selected integrity tag. 5. the number of additional arguments From 8892606045fda29be7adfc5fba21bb1dfe079b93 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 29 Jan 2025 13:58:57 +0100 Subject: [PATCH 0008/2157] dm-crypt: Document integrity_key_size option. This patch adds documentation for new option introduced in commit 4441686b24a1 ("dm-crypt: Allow to specify the integrity key size as option"). Signed-off-by: Milan Broz Signed-off-by: Mikulas Patocka --- Documentation/admin-guide/device-mapper/dm-crypt.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 9f8139ff97d6..4467f6d4b632 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -146,6 +146,11 @@ integrity:: integrity for the encrypted device. The additional space is then used for storing authentication tag (and persistent IV if needed). +integrity_key_size: + Optionally set the integrity key size if it differs from the digest size. + It allows the use of wrapped key algorithms where the key size is + independent of the cryptographic key size. + sector_size: Use as the encryption unit instead of 512 bytes sectors. This option can be in range 512 - 4096 bytes and must be power of two. From 3280c9313c9adce01550cc9f00edfb1dc7c744da Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Wed, 29 Jan 2025 18:26:05 -0500 Subject: [PATCH 0009/2157] dm vdo: use a short static string for thread name prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also remove MODULE_NAME and a BUG_ON check, both unneeded. This fixes a warning about string truncation in snprintf that will never happen in practice: drivers/md/dm-vdo/vdo.c: In function ‘vdo_make’: drivers/md/dm-vdo/vdo.c:564:5: error: ‘%s’ directive output may be truncated writing up to 55 bytes into a region of size 16 [-Werror=format-truncation=] "%s%u", MODULE_NAME, instance); ^~ drivers/md/dm-vdo/vdo.c:563:2: note: ‘snprintf’ output between 2 and 66 bytes into a destination of size 16 snprintf(vdo->thread_name_prefix, sizeof(vdo->thread_name_prefix), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "%s%u", MODULE_NAME, instance); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: John Garry Reviewed-by: John Garry Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/vdo.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c index a7e32baab4af..80b608674022 100644 --- a/drivers/md/dm-vdo/vdo.c +++ b/drivers/md/dm-vdo/vdo.c @@ -31,9 +31,7 @@ #include #include -#include #include -#include #include #include #include @@ -142,12 +140,6 @@ static void finish_vdo_request_queue(void *ptr) vdo_unregister_allocating_thread(); } -#ifdef MODULE -#define MODULE_NAME THIS_MODULE->name -#else -#define MODULE_NAME "dm-vdo" -#endif /* MODULE */ - static const struct vdo_work_queue_type default_queue_type = { .start = start_vdo_request_queue, .finish = finish_vdo_request_queue, @@ -559,8 +551,7 @@ int vdo_make(unsigned int instance, struct device_config *config, char **reason, *vdo_ptr = vdo; snprintf(vdo->thread_name_prefix, sizeof(vdo->thread_name_prefix), - "%s%u", MODULE_NAME, instance); - BUG_ON(vdo->thread_name_prefix[0] == '\0'); + "vdo%u", instance); result = vdo_allocate(vdo->thread_config.thread_count, struct vdo_thread, __func__, &vdo->threads); if (result != VDO_SUCCESS) { From f4e99b846c90163d350c69d6581ac38dd5818eb8 Mon Sep 17 00:00:00 2001 From: Chung Chung Date: Wed, 29 Jan 2025 18:27:12 -0500 Subject: [PATCH 0010/2157] dm vdo indexer: prevent unterminated string warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix array initialization that triggers a warning: error: initializer-string for array of ‘unsigned char’ is too long [-Werror=unterminated-string-initialization] Signed-off-by: Chung Chung Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/indexer/index-layout.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-vdo/indexer/index-layout.c b/drivers/md/dm-vdo/indexer/index-layout.c index af8fab83b0f3..61edf2b72427 100644 --- a/drivers/md/dm-vdo/indexer/index-layout.c +++ b/drivers/md/dm-vdo/indexer/index-layout.c @@ -54,7 +54,6 @@ * Each save also has a unique nonce. */ -#define MAGIC_SIZE 32 #define NONCE_INFO_SIZE 32 #define MAX_SAVES 2 @@ -98,9 +97,11 @@ enum region_type { #define SUPER_VERSION_CURRENT 3 #define SUPER_VERSION_MAXIMUM 7 -static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*"; +static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*"; static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */ +#define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1) + struct region_header { u64 magic; u64 region_blocks; From 148a9cec84d06cbeb6e1fa7aca3f1603d1771c0e Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Fri, 31 Jan 2025 20:50:07 -0500 Subject: [PATCH 0011/2157] dm vdo: remove checks that can not fail Remove checks that can't fail. Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/constants.h | 3 --- drivers/md/dm-vdo/encodings.c | 20 +------------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h index a8c4d6e24b38..2a8b03779f87 100644 --- a/drivers/md/dm-vdo/constants.h +++ b/drivers/md/dm-vdo/constants.h @@ -44,9 +44,6 @@ enum { /* The default size of each slab journal, in blocks */ DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224, - /* Unit test minimum */ - MINIMUM_VDO_SLAB_JOURNAL_BLOCKS = 2, - /* * The initial size of lbn_operations and pbn_operations, which is based upon the expected * maximum number of outstanding VIOs. This value was chosen to make it highly unlikely diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c index 100e92f8f866..b7cc0f41caca 100644 --- a/drivers/md/dm-vdo/encodings.c +++ b/drivers/md/dm-vdo/encodings.c @@ -711,24 +711,11 @@ int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_block ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks); meta_blocks = (ref_blocks + slab_journal_blocks); - /* Make sure test code hasn't configured slabs to be too small. */ + /* Make sure configured slabs are not too small. */ if (meta_blocks >= slab_size) return VDO_BAD_CONFIGURATION; - /* - * If the slab size is very small, assume this must be a unit test and override the number - * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their - * data_blocks fields to be the exact capacity of the configured volume, and that used to - * fall out since they use a power of two for the number of data blocks, the slab size was - * a power of two, and every block in a slab was a data block. - * - * TODO: Try to figure out some way of structuring testParameters and unit tests so this - * hack isn't needed without having to edit several unit tests every time the metadata size - * changes by one block. - */ data_blocks = slab_size - meta_blocks; - if ((slab_size < 1024) && !is_power_of_2(data_blocks)) - data_blocks = ((block_count_t) 1 << ilog2(data_blocks)); /* * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in @@ -1221,11 +1208,6 @@ int vdo_validate_config(const struct vdo_config *config, if (result != VDO_SUCCESS) return result; - result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS, - "slab journal size meets minimum size"); - if (result != VDO_SUCCESS) - return result; - result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size, "slab journal size is within expected bound"); if (result != VDO_SUCCESS) From 2b515cea77e370332715034401d2b7360ef4d4bc Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 31 Jan 2025 21:18:03 -0500 Subject: [PATCH 0012/2157] dm vdo vio-pool: add a pool pointer to pooled_vio This allows us to simplify the return_vio_to_pool interface. Also, we don't need to use vdo_forget on local variables or arguments that are about to go out of scope anyway. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/block-map.c | 9 ++++----- drivers/md/dm-vdo/slab-depot.c | 15 +++++++-------- drivers/md/dm-vdo/vio.c | 8 +++++--- drivers/md/dm-vdo/vio.h | 4 +++- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c index 89cb7942ec5c..bc836f95f8b5 100644 --- a/drivers/md/dm-vdo/block-map.c +++ b/drivers/md/dm-vdo/block-map.c @@ -1544,7 +1544,7 @@ static void write_page_if_not_dirtied(struct vdo_waiter *waiter, void *context) static void return_to_pool(struct block_map_zone *zone, struct pooled_vio *vio) { - return_vio_to_pool(zone->vio_pool, vio); + return_vio_to_pool(vio); check_for_drain_complete(zone); } @@ -1837,7 +1837,7 @@ static void finish_block_map_page_load(struct vdo_completion *completion) if (!vdo_copy_valid_page(vio->data, nonce, pbn, page)) vdo_format_block_map_page(page, nonce, pbn, false); - return_vio_to_pool(zone->vio_pool, pooled); + return_vio_to_pool(pooled); /* Release our claim to the load and wake any waiters */ release_page_lock(data_vio, "load"); @@ -1851,10 +1851,9 @@ static void handle_io_error(struct vdo_completion *completion) struct vio *vio = as_vio(completion); struct pooled_vio *pooled = container_of(vio, struct pooled_vio, vio); struct data_vio *data_vio = completion->parent; - struct block_map_zone *zone = pooled->context; vio_record_metadata_io_error(vio); - return_vio_to_pool(zone->vio_pool, pooled); + return_vio_to_pool(pooled); abort_load(data_vio, result); } @@ -2499,7 +2498,7 @@ static void finish_cursor(struct cursor *cursor) struct cursors *cursors = cursor->parent; struct vdo_completion *completion = cursors->completion; - return_vio_to_pool(cursors->pool, vdo_forget(cursor->vio)); + return_vio_to_pool(vdo_forget(cursor->vio)); if (--cursors->active_roots > 0) return; diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 8f0a35c63af6..7249b281f99f 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -414,8 +414,7 @@ static void complete_reaping(struct vdo_completion *completion) { struct slab_journal *journal = completion->parent; - return_vio_to_pool(journal->slab->allocator->vio_pool, - vio_as_pooled_vio(as_vio(vdo_forget(completion)))); + return_vio_to_pool(vio_as_pooled_vio(as_vio(completion))); finish_reaping(journal); reap_slab_journal(journal); } @@ -698,7 +697,7 @@ static void complete_write(struct vdo_completion *completion) sequence_number_t committed = get_committing_sequence_number(pooled); list_del_init(&pooled->list_entry); - return_vio_to_pool(journal->slab->allocator->vio_pool, vdo_forget(pooled)); + return_vio_to_pool(pooled); if (result != VDO_SUCCESS) { vio_record_metadata_io_error(as_vio(completion)); @@ -1076,7 +1075,7 @@ static void finish_reference_block_write(struct vdo_completion *completion) /* Release the slab journal lock. */ adjust_slab_journal_block_reference(&slab->journal, block->slab_journal_lock_to_release, -1); - return_vio_to_pool(slab->allocator->vio_pool, pooled); + return_vio_to_pool(pooled); /* * We can't clear the is_writing flag earlier as releasing the slab journal lock may cause @@ -1170,7 +1169,7 @@ static void handle_io_error(struct vdo_completion *completion) struct vdo_slab *slab = ((struct reference_block *) completion->parent)->slab; vio_record_metadata_io_error(vio); - return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio)); + return_vio_to_pool(vio_as_pooled_vio(vio)); slab->active_count--; vdo_enter_read_only_mode(slab->allocator->depot->vdo, result); check_if_slab_drained(slab); @@ -2242,7 +2241,7 @@ static void finish_reference_block_load(struct vdo_completion *completion) struct vdo_slab *slab = block->slab; unpack_reference_block((struct packed_reference_block *) vio->data, block); - return_vio_to_pool(slab->allocator->vio_pool, pooled); + return_vio_to_pool(pooled); slab->active_count--; clear_provisional_references(block); @@ -2429,7 +2428,7 @@ static void finish_loading_journal(struct vdo_completion *completion) initialize_journal_state(journal); } - return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio)); + return_vio_to_pool(vio_as_pooled_vio(vio)); vdo_finish_loading_with_result(&slab->state, allocate_counters_if_clean(slab)); } @@ -2449,7 +2448,7 @@ static void handle_load_error(struct vdo_completion *completion) struct vio *vio = as_vio(completion); vio_record_metadata_io_error(vio); - return_vio_to_pool(journal->slab->allocator->vio_pool, vio_as_pooled_vio(vio)); + return_vio_to_pool(vio_as_pooled_vio(vio)); vdo_finish_loading_with_result(&journal->slab->state, result); } diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c index e710f3c5a972..4d96989a716d 100644 --- a/drivers/md/dm-vdo/vio.c +++ b/drivers/md/dm-vdo/vio.c @@ -345,6 +345,7 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id, } pooled->context = context; + pooled->pool = pool; list_add_tail(&pooled->pool_entry, &pool->available); } @@ -419,12 +420,13 @@ void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter) } /** - * return_vio_to_pool() - Return a vio to the pool - * @pool: The vio pool. + * return_vio_to_pool() - Return a vio to its pool * @vio: The pooled vio to return. */ -void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio) +void return_vio_to_pool(struct pooled_vio *vio) { + struct vio_pool *pool = vio->pool; + VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()), "vio pool entry returned on same thread as it was acquired"); diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h index 3490e9f59b04..2e3f878e2074 100644 --- a/drivers/md/dm-vdo/vio.h +++ b/drivers/md/dm-vdo/vio.h @@ -30,6 +30,8 @@ struct pooled_vio { void *context; /* The list entry used by the pool */ struct list_head pool_entry; + /* The pool this vio is allocated from */ + struct vio_pool *pool; }; /** @@ -194,6 +196,6 @@ int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t th void free_vio_pool(struct vio_pool *pool); bool __must_check is_vio_pool_busy(struct vio_pool *pool); void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter); -void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio); +void return_vio_to_pool(struct pooled_vio *vio); #endif /* VIO_H */ From 979a0fd396f4924e7ee7ca23186ffeeeefd8b4ca Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 31 Jan 2025 21:18:04 -0500 Subject: [PATCH 0013/2157] dm vdo vio-pool: support pools with multiple data blocks per vio Support pools with multiple data blocks per vio Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/block-map.c | 2 +- drivers/md/dm-vdo/slab-depot.c | 2 +- drivers/md/dm-vdo/vio.c | 10 ++++++---- drivers/md/dm-vdo/vio.h | 7 ++++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c index bc836f95f8b5..1f7cdd837ff9 100644 --- a/drivers/md/dm-vdo/block-map.c +++ b/drivers/md/dm-vdo/block-map.c @@ -2745,7 +2745,7 @@ static int __must_check initialize_block_map_zone(struct block_map *map, if (result != VDO_SUCCESS) return result; - result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE, + result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE, 1, zone->thread_id, VIO_TYPE_BLOCK_MAP_INTERIOR, VIO_PRIORITY_METADATA, zone, &zone->vio_pool); if (result != VDO_SUCCESS) diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 7249b281f99f..aca11271d66e 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -3999,7 +3999,7 @@ static int __must_check initialize_block_allocator(struct slab_depot *depot, return result; vdo_initialize_completion(&allocator->completion, vdo, VDO_BLOCK_ALLOCATOR_COMPLETION); - result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, allocator->thread_id, + result = make_vio_pool(vdo, BLOCK_ALLOCATOR_VIO_POOL_SIZE, 1, allocator->thread_id, VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, allocator, &allocator->vio_pool); if (result != VDO_SUCCESS) diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c index 4d96989a716d..f69fb74a238c 100644 --- a/drivers/md/dm-vdo/vio.c +++ b/drivers/md/dm-vdo/vio.c @@ -301,6 +301,7 @@ void vio_record_metadata_io_error(struct vio *vio) * make_vio_pool() - Create a new vio pool. * @vdo: The vdo. * @pool_size: The number of vios in the pool. + * @block_count: The number of 4k blocks per vio. * @thread_id: The ID of the thread using this pool. * @vio_type: The type of vios in the pool. * @priority: The priority with which vios from the pool should be enqueued. @@ -309,13 +310,14 @@ void vio_record_metadata_io_error(struct vio *vio) * * Return: A success or error code. */ -int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id, +int make_vio_pool(struct vdo *vdo, size_t pool_size, size_t block_count, thread_id_t thread_id, enum vio_type vio_type, enum vio_priority priority, void *context, struct vio_pool **pool_ptr) { struct vio_pool *pool; char *ptr; int result; + size_t per_vio_size = VDO_BLOCK_SIZE * block_count; result = vdo_allocate_extended(struct vio_pool, pool_size, struct pooled_vio, __func__, &pool); @@ -326,7 +328,7 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id, INIT_LIST_HEAD(&pool->available); INIT_LIST_HEAD(&pool->busy); - result = vdo_allocate(pool_size * VDO_BLOCK_SIZE, char, + result = vdo_allocate(pool_size * per_vio_size, char, "VIO pool buffer", &pool->buffer); if (result != VDO_SUCCESS) { free_vio_pool(pool); @@ -334,10 +336,10 @@ int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id, } ptr = pool->buffer; - for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += VDO_BLOCK_SIZE) { + for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += per_vio_size) { struct pooled_vio *pooled = &pool->vios[pool->size]; - result = allocate_vio_components(vdo, vio_type, priority, NULL, 1, ptr, + result = allocate_vio_components(vdo, vio_type, priority, NULL, block_count, ptr, &pooled->vio); if (result != VDO_SUCCESS) { free_vio_pool(pool); diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h index 2e3f878e2074..8a7e3f72e80b 100644 --- a/drivers/md/dm-vdo/vio.h +++ b/drivers/md/dm-vdo/vio.h @@ -190,9 +190,10 @@ static inline struct pooled_vio *vio_as_pooled_vio(struct vio *vio) struct vio_pool; -int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id, - enum vio_type vio_type, enum vio_priority priority, - void *context, struct vio_pool **pool_ptr); +int __must_check make_vio_pool(struct vdo *vdo, size_t pool_size, size_t block_count, + thread_id_t thread_id, enum vio_type vio_type, + enum vio_priority priority, void *context, + struct vio_pool **pool_ptr); void free_vio_pool(struct vio_pool *pool); bool __must_check is_vio_pool_busy(struct vio_pool *pool); void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter); From f979da512553a41a657f2c1198277e84d66f8ce3 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 31 Jan 2025 21:18:05 -0500 Subject: [PATCH 0014/2157] dm vdo vio-pool: allow variable-sized metadata vios With larger-sized metadata vio pools, vdo will sometimes need to issue I/O with a smaller size than the allocated size. Since vio_reset_bio is where the bvec array and I/O size are initialized, this reset interface must now specify what I/O size to use. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/io-submitter.c | 6 ++++-- drivers/md/dm-vdo/io-submitter.h | 18 +++++++++++++--- drivers/md/dm-vdo/types.h | 3 +++ drivers/md/dm-vdo/vio.c | 36 +++++++++++++++++++------------- drivers/md/dm-vdo/vio.h | 2 ++ 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c index 421e5436c32c..11d47770b54d 100644 --- a/drivers/md/dm-vdo/io-submitter.c +++ b/drivers/md/dm-vdo/io-submitter.c @@ -327,6 +327,7 @@ void vdo_submit_data_vio(struct data_vio *data_vio) * @error_handler: the handler for submission or I/O errors (may be NULL) * @operation: the type of I/O to perform * @data: the buffer to read or write (may be NULL) + * @size: the I/O amount in bytes * * The vio is enqueued on a vdo bio queue so that bio submission (which may block) does not block * other vdo threads. @@ -338,7 +339,7 @@ void vdo_submit_data_vio(struct data_vio *data_vio) */ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, bio_end_io_t callback, vdo_action_fn error_handler, - blk_opf_t operation, char *data) + blk_opf_t operation, char *data, int size) { int result; struct vdo_completion *completion = &vio->completion; @@ -349,7 +350,8 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, vdo_reset_completion(completion); completion->error_handler = error_handler; - result = vio_reset_bio(vio, data, callback, operation | REQ_META, physical); + result = vio_reset_bio_with_size(vio, data, size, callback, operation | REQ_META, + physical); if (result != VDO_SUCCESS) { continue_vio(vio, result); return; diff --git a/drivers/md/dm-vdo/io-submitter.h b/drivers/md/dm-vdo/io-submitter.h index 80748699496f..3088f11055fd 100644 --- a/drivers/md/dm-vdo/io-submitter.h +++ b/drivers/md/dm-vdo/io-submitter.h @@ -8,6 +8,7 @@ #include +#include "constants.h" #include "types.h" struct io_submitter; @@ -26,14 +27,25 @@ void vdo_submit_data_vio(struct data_vio *data_vio); void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, bio_end_io_t callback, vdo_action_fn error_handler, - blk_opf_t operation, char *data); + blk_opf_t operation, char *data, int size); static inline void vdo_submit_metadata_vio(struct vio *vio, physical_block_number_t physical, bio_end_io_t callback, vdo_action_fn error_handler, blk_opf_t operation) { __submit_metadata_vio(vio, physical, callback, error_handler, - operation, vio->data); + operation, vio->data, vio->block_count * VDO_BLOCK_SIZE); +} + +static inline void vdo_submit_metadata_vio_with_size(struct vio *vio, + physical_block_number_t physical, + bio_end_io_t callback, + vdo_action_fn error_handler, + blk_opf_t operation, + int size) +{ + __submit_metadata_vio(vio, physical, callback, error_handler, + operation, vio->data, size); } static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback, @@ -41,7 +53,7 @@ static inline void vdo_submit_flush_vio(struct vio *vio, bio_end_io_t callback, { /* FIXME: Can we just use REQ_OP_FLUSH? */ __submit_metadata_vio(vio, 0, callback, error_handler, - REQ_OP_WRITE | REQ_PREFLUSH, NULL); + REQ_OP_WRITE | REQ_PREFLUSH, NULL, 0); } #endif /* VDO_IO_SUBMITTER_H */ diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h index dbe892b10f26..cdf36e7d7702 100644 --- a/drivers/md/dm-vdo/types.h +++ b/drivers/md/dm-vdo/types.h @@ -376,6 +376,9 @@ struct vio { /* The size of this vio in blocks */ unsigned int block_count; + /* The amount of data to be read or written, in bytes */ + unsigned int io_size; + /* The data being read or written. */ char *data; diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c index f69fb74a238c..e7f4153e55e3 100644 --- a/drivers/md/dm-vdo/vio.c +++ b/drivers/md/dm-vdo/vio.c @@ -188,14 +188,23 @@ void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callb /* * Prepares the bio to perform IO with the specified buffer. May only be used on a VDO-allocated - * bio, as it assumes the bio wraps a 4k buffer that is 4k aligned, but there does not have to be a - * vio associated with the bio. + * bio, as it assumes the bio wraps a 4k-multiple buffer that is 4k aligned, but there does not + * have to be a vio associated with the bio. */ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback, blk_opf_t bi_opf, physical_block_number_t pbn) { - int bvec_count, offset, len, i; + return vio_reset_bio_with_size(vio, data, vio->block_count * VDO_BLOCK_SIZE, + callback, bi_opf, pbn); +} + +int vio_reset_bio_with_size(struct vio *vio, char *data, int size, bio_end_io_t callback, + blk_opf_t bi_opf, physical_block_number_t pbn) +{ + int bvec_count, offset, i; struct bio *bio = vio->bio; + int vio_size = vio->block_count * VDO_BLOCK_SIZE; + int remaining; bio_reset(bio, bio->bi_bdev, bi_opf); vdo_set_bio_properties(bio, vio, callback, bi_opf, pbn); @@ -205,22 +214,21 @@ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback, bio->bi_ioprio = 0; bio->bi_io_vec = bio->bi_inline_vecs; bio->bi_max_vecs = vio->block_count + 1; - len = VDO_BLOCK_SIZE * vio->block_count; + if (VDO_ASSERT(size <= vio_size, "specified size %d is not greater than allocated %d", + size, vio_size) != VDO_SUCCESS) + size = vio_size; + vio->io_size = size; offset = offset_in_page(data); - bvec_count = DIV_ROUND_UP(offset + len, PAGE_SIZE); + bvec_count = DIV_ROUND_UP(offset + size, PAGE_SIZE); + remaining = size; - /* - * If we knew that data was always on one page, or contiguous pages, we wouldn't need the - * loop. But if we're using vmalloc, it's not impossible that the data is in different - * pages that can't be merged in bio_add_page... - */ - for (i = 0; (i < bvec_count) && (len > 0); i++) { + for (i = 0; (i < bvec_count) && (remaining > 0); i++) { struct page *page; int bytes_added; int bytes = PAGE_SIZE - offset; - if (bytes > len) - bytes = len; + if (bytes > remaining) + bytes = remaining; page = is_vmalloc_addr(data) ? vmalloc_to_page(data) : virt_to_page(data); bytes_added = bio_add_page(bio, page, bytes, offset); @@ -232,7 +240,7 @@ int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback, } data += bytes; - len -= bytes; + remaining -= bytes; offset = 0; } diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h index 8a7e3f72e80b..4bfcb21901f1 100644 --- a/drivers/md/dm-vdo/vio.h +++ b/drivers/md/dm-vdo/vio.h @@ -125,6 +125,8 @@ void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callb int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback, blk_opf_t bi_opf, physical_block_number_t pbn); +int vio_reset_bio_with_size(struct vio *vio, char *data, int size, bio_end_io_t callback, + blk_opf_t bi_opf, physical_block_number_t pbn); void update_vio_error_stats(struct vio *vio, const char *format, ...) __printf(2, 3); From 0ce46f4f751bc15375aea501a991ec931278b415 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 31 Jan 2025 21:18:06 -0500 Subject: [PATCH 0015/2157] dm vdo slab-depot: read refcount blocks in large chunks at load time At startup, vdo loads all the reference count data before the device reports that it is ready. Using a pool of large metadata vios can improve the startup speed of vdo. The pool of large vios is released after the device is ready. During normal operation, reference counts are updated 4kB at a time, as before. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/slab-depot.c | 63 +++++++++++++++++++++++++--------- drivers/md/dm-vdo/slab-depot.h | 13 ++++++- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index aca11271d66e..c4d3bbdf5b69 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -1170,7 +1170,7 @@ static void handle_io_error(struct vdo_completion *completion) vio_record_metadata_io_error(vio); return_vio_to_pool(vio_as_pooled_vio(vio)); - slab->active_count--; + slab->active_count -= vio->io_size / VDO_BLOCK_SIZE; vdo_enter_read_only_mode(slab->allocator->depot->vdo, result); check_if_slab_drained(slab); } @@ -2239,13 +2239,20 @@ static void finish_reference_block_load(struct vdo_completion *completion) struct pooled_vio *pooled = vio_as_pooled_vio(vio); struct reference_block *block = completion->parent; struct vdo_slab *slab = block->slab; + unsigned int block_count = vio->io_size / VDO_BLOCK_SIZE; + unsigned int i; + char *data = vio->data; - unpack_reference_block((struct packed_reference_block *) vio->data, block); + for (i = 0; i < block_count; i++, block++, data += VDO_BLOCK_SIZE) { + struct packed_reference_block *packed = (struct packed_reference_block *) data; + + unpack_reference_block(packed, block); + clear_provisional_references(block); + slab->free_blocks -= block->allocated_count; + } return_vio_to_pool(pooled); - slab->active_count--; - clear_provisional_references(block); + slab->active_count -= block_count; - slab->free_blocks -= block->allocated_count; check_if_slab_drained(slab); } @@ -2259,23 +2266,25 @@ static void load_reference_block_endio(struct bio *bio) } /** - * load_reference_block() - After a block waiter has gotten a VIO from the VIO pool, load the - * block. - * @waiter: The waiter of the block to load. + * load_reference_block_group() - After a block waiter has gotten a VIO from the VIO pool, load + * a set of blocks. + * @waiter: The waiter of the first block to load. * @context: The VIO returned by the pool. */ -static void load_reference_block(struct vdo_waiter *waiter, void *context) +static void load_reference_block_group(struct vdo_waiter *waiter, void *context) { struct pooled_vio *pooled = context; struct vio *vio = &pooled->vio; struct reference_block *block = container_of(waiter, struct reference_block, waiter); - size_t block_offset = (block - block->slab->reference_blocks); + u32 block_offset = block - block->slab->reference_blocks; + u32 max_block_count = block->slab->reference_block_count - block_offset; + u32 block_count = min_t(int, vio->block_count, max_block_count); vio->completion.parent = block; - vdo_submit_metadata_vio(vio, block->slab->ref_counts_origin + block_offset, - load_reference_block_endio, handle_io_error, - REQ_OP_READ); + vdo_submit_metadata_vio_with_size(vio, block->slab->ref_counts_origin + block_offset, + load_reference_block_endio, handle_io_error, + REQ_OP_READ, block_count * VDO_BLOCK_SIZE); } /** @@ -2285,14 +2294,21 @@ static void load_reference_block(struct vdo_waiter *waiter, void *context) static void load_reference_blocks(struct vdo_slab *slab) { block_count_t i; + u64 blocks_per_vio = slab->allocator->refcount_blocks_per_big_vio; + struct vio_pool *pool = slab->allocator->refcount_big_vio_pool; + + if (!pool) { + pool = slab->allocator->vio_pool; + blocks_per_vio = 1; + } slab->free_blocks = slab->block_count; slab->active_count = slab->reference_block_count; - for (i = 0; i < slab->reference_block_count; i++) { + for (i = 0; i < slab->reference_block_count; i += blocks_per_vio) { struct vdo_waiter *waiter = &slab->reference_blocks[i].waiter; - waiter->callback = load_reference_block; - acquire_vio_from_pool(slab->allocator->vio_pool, waiter); + waiter->callback = load_reference_block_group; + acquire_vio_from_pool(pool, waiter); } } @@ -2699,6 +2715,7 @@ static void finish_scrubbing(struct slab_scrubber *scrubber, int result) vdo_log_info("VDO commencing normal operation"); else if (prior_state == VDO_RECOVERING) vdo_log_info("Exiting recovery mode"); + free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool)); } /* @@ -3982,6 +3999,7 @@ static int __must_check initialize_block_allocator(struct slab_depot *depot, struct vdo *vdo = depot->vdo; block_count_t max_free_blocks = depot->slab_config.data_blocks; unsigned int max_priority = (2 + ilog2(max_free_blocks)); + u32 reference_block_count, refcount_reads_needed, refcount_blocks_per_vio; *allocator = (struct block_allocator) { .depot = depot, @@ -4005,6 +4023,18 @@ static int __must_check initialize_block_allocator(struct slab_depot *depot, if (result != VDO_SUCCESS) return result; + /* Initialize the refcount-reading vio pool. */ + reference_block_count = vdo_get_saved_reference_count_size(depot->slab_config.slab_blocks); + refcount_reads_needed = DIV_ROUND_UP(reference_block_count, MAX_BLOCKS_PER_VIO); + refcount_blocks_per_vio = DIV_ROUND_UP(reference_block_count, refcount_reads_needed); + allocator->refcount_blocks_per_big_vio = refcount_blocks_per_vio; + result = make_vio_pool(vdo, BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE, + allocator->refcount_blocks_per_big_vio, allocator->thread_id, + VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, + NULL, &allocator->refcount_big_vio_pool); + if (result != VDO_SUCCESS) + return result; + result = initialize_slab_scrubber(allocator); if (result != VDO_SUCCESS) return result; @@ -4222,6 +4252,7 @@ void vdo_free_slab_depot(struct slab_depot *depot) uninitialize_allocator_summary(allocator); uninitialize_scrubber_vio(&allocator->scrubber); free_vio_pool(vdo_forget(allocator->vio_pool)); + free_vio_pool(vdo_forget(allocator->refcount_big_vio_pool)); vdo_free_priority_table(vdo_forget(allocator->prioritized_slabs)); } diff --git a/drivers/md/dm-vdo/slab-depot.h b/drivers/md/dm-vdo/slab-depot.h index f234853501ca..fadc0c9d4dc4 100644 --- a/drivers/md/dm-vdo/slab-depot.h +++ b/drivers/md/dm-vdo/slab-depot.h @@ -45,6 +45,13 @@ enum { /* The number of vios in the vio pool is proportional to the throughput of the VDO. */ BLOCK_ALLOCATOR_VIO_POOL_SIZE = 128, + + /* + * The number of vios in the vio pool used for loading reference count data. A slab's + * refcounts is capped at ~8MB, and we process one at a time in a zone, so 9 should be + * plenty. + */ + BLOCK_ALLOCATOR_REFCOUNT_VIO_POOL_SIZE = 9, }; /* @@ -248,7 +255,7 @@ struct vdo_slab { /* A list of the dirty blocks waiting to be written out */ struct vdo_wait_queue dirty_blocks; - /* The number of blocks which are currently writing */ + /* The number of blocks which are currently reading or writing */ size_t active_count; /* A waiter object for updating the slab summary */ @@ -425,6 +432,10 @@ struct block_allocator { /* The vio pool for reading and writing block allocator metadata */ struct vio_pool *vio_pool; + /* The vio pool for large initial reads of ref count areas */ + struct vio_pool *refcount_big_vio_pool; + /* How many ref count blocks are read per vio at initial load */ + u32 refcount_blocks_per_big_vio; /* The dm_kcopyd client for erasing slab journals */ struct dm_kcopyd_client *eraser; /* Iterator over the slabs to be erased */ From 6011fadfee652cc109df7e925a025e46b0aa13e5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 28 Jan 2025 16:16:27 +0100 Subject: [PATCH 0016/2157] staging: bcm2835-camera: drop vb2_ops_wait_prepare/finish Since commit 88785982a19d ("media: vb2: use lock if wait_prepare/finish are NULL") it is no longer needed to set the wait_prepare/finish vb2_ops callbacks as long as the lock field in vb2_queue is set. Since the vb2_ops_wait_prepare/finish callbacks already rely on that field, we can safely drop these callbacks. This simplifies the code and this is a step towards the goal of deleting these callbacks. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/f01dc977-bc4a-46f5-abd7-35089c8f2031@xs4all.nl Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index deec33f63bcf..b839b50ac26a 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -658,8 +658,6 @@ static const struct vb2_ops bcm2835_mmal_video_qops = { .buf_queue = buffer_queue, .start_streaming = start_streaming, .stop_streaming = stop_streaming, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; /* ------------------------------------------------------------------ From 8e4d3729efb4e4716cb12a25e06dbe40f1a0191f Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Sun, 19 Jan 2025 17:22:47 +0800 Subject: [PATCH 0017/2157] staging: gpib: Remove unnecessary .owner assignment The driver core automatically sets the '.owner' field, so there is no need to assign 'THIS_MODULE' explicitly. This change fixes the warning reported by the kernel test robot: cocci warnings: (new ones prefixed by >>) >> drivers/staging/gpib/eastwood/fluke_gpib.c:1145:3-8: No need to set .owner here. The core will do it. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501182304.q5PrJvhd-lkp@intel.com/ Signed-off-by: Yu-Chun Lin Link: https://lore.kernel.org/r/20250119092247.1873176-1-eleanor15x@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/eastwood/fluke_gpib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index 0304c5de4ccd..d5b1a03abf11 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -1143,7 +1143,6 @@ MODULE_DEVICE_TABLE(of, fluke_gpib_of_match); static struct platform_driver fluke_gpib_platform_driver = { .driver = { .name = "fluke_gpib", - .owner = THIS_MODULE, .of_match_table = fluke_gpib_of_match, }, .probe = &fluke_gpib_probe From 76d54fd5471b10ee993c217928a39d7351eaff5c Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Mon, 20 Jan 2025 15:50:30 +0100 Subject: [PATCH 0018/2157] staging: gpib: Use min for calculating transfer length In the accel read and write functions the transfer length was being calculated by an if statement setting it to the lesser of the remaining bytes to read/write and the fifo size. Replace both instances with min() which is clearer and more compact. Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202501182153.qHfL4Fbc-lkp@intel.com/ Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250120145030.29684-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 3f4f95b7fe34..0ba592dc9849 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -66,10 +66,7 @@ int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t lengt int j; int count; - if (num_fifo_bytes - i < agilent_82350b_fifo_size) - block_size = num_fifo_bytes - i; - else - block_size = agilent_82350b_fifo_size; + block_size = min(num_fifo_bytes - i, agilent_82350b_fifo_size); set_transfer_counter(a_priv, block_size); writeb(ENABLE_TI_TO_SRAM | DIRECTION_GPIB_TO_HOST, a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG); @@ -200,10 +197,7 @@ int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t leng for (i = 1; i < fifotransferlength;) { clear_bit(WRITE_READY_BN, &tms_priv->state); - if (fifotransferlength - i < agilent_82350b_fifo_size) - block_size = fifotransferlength - i; - else - block_size = agilent_82350b_fifo_size; + block_size = min(fifotransferlength - i, agilent_82350b_fifo_size); set_transfer_counter(a_priv, block_size); for (j = 0; j < block_size; ++j, ++i) { // load data into board's sram From cd45f6ef14b7140b77d28b38b9f0a7f7d93ed52b Mon Sep 17 00:00:00 2001 From: Ajith P V Date: Wed, 22 Jan 2025 13:25:44 +0530 Subject: [PATCH 0019/2157] staging: gpib: fix prefixing 0x with decimal output - pr_err() of pc2a_common_attach() in pc2_gpib uses 0x%d. - The config->ibbase is of u32 and correct prefix is 0x%x. - This error reported by checkpatch with below message: ERROR: Prefixing 0x with decimal output is defective Signed-off-by: Ajith P V Link: https://lore.kernel.org/r/20250122075545.33146-1-ajithpv.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/pc2/pc2_gpib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/pc2/pc2_gpib.c b/drivers/staging/gpib/pc2/pc2_gpib.c index c0b07cb63d9a..6a1c0cb1d0ed 100644 --- a/drivers/staging/gpib/pc2/pc2_gpib.c +++ b/drivers/staging/gpib/pc2/pc2_gpib.c @@ -505,7 +505,7 @@ static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *co case 0x62e1: break; default: - pr_err("PCIIa base range invalid, must be one of 0x[0246]2e1, but is 0x%d\n", + pr_err("PCIIa base range invalid, must be one of 0x[0246]2e1, but is 0x%x\n", config->ibbase); return -1; } From 2f548210a5a5d4cb5f20af39b684a9f6de58cd0e Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Wed, 22 Jan 2025 11:38:58 +0100 Subject: [PATCH 0020/2157] staging: gpib: Add missing interface entry point Declaring the driver entry points as static caused a warning that the serial_poll_status function of the agilent_82350b driver was unused. Add the entry point to the corresponding interface structure initializations where it was missing. Fixes: 09a4655ee1eb ("staging: gpib: Add HP/Agilent/Keysight 8235xx PCI GPIB driver") Signed-off-by: Dave Penkler Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250122103859.25499-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 0ba592dc9849..cd7fe7d814ce 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -842,6 +842,7 @@ static gpib_interface_t agilent_82350b_unaccel_interface = { .primary_address = agilent_82350b_primary_address, .secondary_address = agilent_82350b_secondary_address, .serial_poll_response = agilent_82350b_serial_poll_response, + .serial_poll_status = agilent_82350b_serial_poll_status, .t1_delay = agilent_82350b_t1_delay, .return_to_local = agilent_82350b_return_to_local, }; @@ -869,6 +870,7 @@ static gpib_interface_t agilent_82350b_interface = { .primary_address = agilent_82350b_primary_address, .secondary_address = agilent_82350b_secondary_address, .serial_poll_response = agilent_82350b_serial_poll_response, + .serial_poll_status = agilent_82350b_serial_poll_status, .t1_delay = agilent_82350b_t1_delay, .return_to_local = agilent_82350b_return_to_local, }; From 8418753187ba216f8931432dd8a6ee2f23977ecd Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Wed, 22 Jan 2025 11:38:59 +0100 Subject: [PATCH 0021/2157] staging: gpib: Make static, reduce fwd declarations A number of drivers were unnecessarily not declaring their entry points as static. Declare them as static. In order to reduce the number of forward declarations the gpib interface structure initializations have been moved to after the code of the entry points that they use. This also makes the structure of the drivers to be more consistent between them. Signed-off-by: Dave Penkler Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250122103859.25499-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82350b/agilent_82350b.c | 108 +-- .../gpib/agilent_82350b/agilent_82350b.h | 47 - drivers/staging/gpib/cb7210/cb7210.c | 89 +- drivers/staging/gpib/cb7210/cb7210.h | 45 - drivers/staging/gpib/cec/cec.h | 29 - drivers/staging/gpib/cec/cec_gpib.c | 51 +- drivers/staging/gpib/hp_82335/hp82335.c | 52 +- drivers/staging/gpib/hp_82335/hp82335.h | 30 - drivers/staging/gpib/hp_82341/hp_82341.c | 71 +- drivers/staging/gpib/hp_82341/hp_82341.h | 40 - drivers/staging/gpib/pc2/pc2_gpib.c | 247 +++-- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 872 ++++++++---------- 12 files changed, 713 insertions(+), 968 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index cd7fe7d814ce..5c62ec24fced 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -20,8 +20,14 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for Agilent 82350b"); -int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) +static int read_transfer_counter(struct agilent_82350b_priv *a_priv); +static unsigned short read_and_clear_event_status(gpib_board_t *board); +static void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count); +static int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written); + +static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -148,9 +154,8 @@ static int translate_wait_return_value(gpib_board_t *board, int retval) return 0; } -int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) - +static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, + int send_eoi, size_t *bytes_written) { struct agilent_82350b_priv *a_priv = board->private_data; struct tms9914_priv *tms_priv = &a_priv->tms9914_priv; @@ -245,8 +250,7 @@ int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t leng return 0; } -unsigned short read_and_clear_event_status(gpib_board_t *board) - +static unsigned short read_and_clear_event_status(gpib_board_t *board) { struct agilent_82350b_priv *a_priv = board->private_data; unsigned long flags; @@ -259,7 +263,7 @@ unsigned short read_and_clear_event_status(gpib_board_t *board) return status; } -irqreturn_t agilent_82350b_interrupt(int irq, void *arg) +static irqreturn_t agilent_82350b_interrupt(int irq, void *arg) { int tms9914_status1 = 0, tms9914_status2 = 0; @@ -292,12 +296,11 @@ irqreturn_t agilent_82350b_interrupt(int irq, void *arg) return retval; } -void agilent_82350b_detach(gpib_board_t *board); +static void agilent_82350b_detach(gpib_board_t *board); const char *driver_name = "agilent_82350b"; -int read_transfer_counter(struct agilent_82350b_priv *a_priv) - +static int read_transfer_counter(struct agilent_82350b_priv *a_priv) { int lo, mid, value; @@ -308,8 +311,7 @@ int read_transfer_counter(struct agilent_82350b_priv *a_priv) return value; } -void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count) - +static void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count) { int complement = -count; @@ -320,8 +322,8 @@ void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count) } // wrappers for interface functions -int agilent_82350b_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) +static int agilent_82350b_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct agilent_82350b_priv *priv = board->private_data; @@ -329,8 +331,8 @@ int agilent_82350b_read(gpib_board_t *board, uint8_t *buffer, size_t length, int return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct agilent_82350b_priv *priv = board->private_data; @@ -338,8 +340,8 @@ int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, in return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t length, - size_t *bytes_written) +static int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { struct agilent_82350b_priv *priv = board->private_data; @@ -347,7 +349,7 @@ int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t length, return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -int agilent_82350b_take_control(gpib_board_t *board, int synchronous) +static int agilent_82350b_take_control(gpib_board_t *board, int synchronous) { struct agilent_82350b_priv *priv = board->private_data; @@ -355,7 +357,7 @@ int agilent_82350b_take_control(gpib_board_t *board, int synchronous) return tms9914_take_control_workaround(board, &priv->tms9914_priv, synchronous); } -int agilent_82350b_go_to_standby(gpib_board_t *board) +static int agilent_82350b_go_to_standby(gpib_board_t *board) { struct agilent_82350b_priv *priv = board->private_data; @@ -363,7 +365,7 @@ int agilent_82350b_go_to_standby(gpib_board_t *board) return tms9914_go_to_standby(board, &priv->tms9914_priv); } -void agilent_82350b_request_system_control(gpib_board_t *board, int request_control) +static void agilent_82350b_request_system_control(gpib_board_t *board, int request_control) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -381,7 +383,7 @@ void agilent_82350b_request_system_control(gpib_board_t *board, int request_cont tms9914_request_system_control(board, &a_priv->tms9914_priv, request_control); } -void agilent_82350b_interface_clear(gpib_board_t *board, int assert) +static void agilent_82350b_interface_clear(gpib_board_t *board, int assert) { struct agilent_82350b_priv *priv = board->private_data; @@ -389,104 +391,91 @@ void agilent_82350b_interface_clear(gpib_board_t *board, int assert) tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -void agilent_82350b_remote_enable(gpib_board_t *board, int enable) - +static void agilent_82350b_remote_enable(gpib_board_t *board, int enable) { struct agilent_82350b_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -int agilent_82350b_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) - +static int agilent_82350b_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -void agilent_82350b_disable_eos(gpib_board_t *board) - +static void agilent_82350b_disable_eos(gpib_board_t *board) { struct agilent_82350b_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -unsigned int agilent_82350b_update_status(gpib_board_t *board, unsigned int clear_mask) - +static unsigned int agilent_82350b_update_status(gpib_board_t *board, unsigned int clear_mask) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -int agilent_82350b_primary_address(gpib_board_t *board, unsigned int address) - +static int agilent_82350b_primary_address(gpib_board_t *board, unsigned int address) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -int agilent_82350b_secondary_address(gpib_board_t *board, unsigned int address, int enable) - +static int agilent_82350b_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -int agilent_82350b_parallel_poll(gpib_board_t *board, uint8_t *result) - +static int agilent_82350b_parallel_poll(gpib_board_t *board, uint8_t *result) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -void agilent_82350b_parallel_poll_configure(gpib_board_t *board, uint8_t config) - +static void agilent_82350b_parallel_poll_configure(gpib_board_t *board, uint8_t config) { struct agilent_82350b_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -void agilent_82350b_parallel_poll_response(gpib_board_t *board, int ist) - +static void agilent_82350b_parallel_poll_response(gpib_board_t *board, int ist) { struct agilent_82350b_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -void agilent_82350b_serial_poll_response(gpib_board_t *board, uint8_t status) - +static void agilent_82350b_serial_poll_response(gpib_board_t *board, uint8_t status) { struct agilent_82350b_priv *priv = board->private_data; tms9914_serial_poll_response(board, &priv->tms9914_priv, status); } -uint8_t agilent_82350b_serial_poll_status(gpib_board_t *board) - +static uint8_t agilent_82350b_serial_poll_status(gpib_board_t *board) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_serial_poll_status(board, &priv->tms9914_priv); } -int agilent_82350b_line_status(const gpib_board_t *board) - +static int agilent_82350b_line_status(const gpib_board_t *board) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_line_status(board, &priv->tms9914_priv); } -unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int nanosec) - +static unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int nanosec) { struct agilent_82350b_priv *a_priv = board->private_data; static const int nanosec_per_clock = 30; @@ -501,16 +490,14 @@ unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int nanosec) return value * nanosec_per_clock; } -void agilent_82350b_return_to_local(gpib_board_t *board) - +static void agilent_82350b_return_to_local(gpib_board_t *board) { struct agilent_82350b_priv *priv = board->private_data; tms9914_return_to_local(board, &priv->tms9914_priv); } -int agilent_82350b_allocate_private(gpib_board_t *board) - +static int agilent_82350b_allocate_private(gpib_board_t *board) { board->private_data = kzalloc(sizeof(struct agilent_82350b_priv), GFP_KERNEL); if (!board->private_data) @@ -518,15 +505,13 @@ int agilent_82350b_allocate_private(gpib_board_t *board) return 0; } -void agilent_82350b_free_private(gpib_board_t *board) - +static void agilent_82350b_free_private(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; } static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t *config) - { struct agilent_82350b_priv *a_priv = board->private_data; static const unsigned int firmware_length = 5302; @@ -774,20 +759,17 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c return 0; } -int agilent_82350b_unaccel_attach(gpib_board_t *board, const gpib_board_config_t *config) - +static int agilent_82350b_unaccel_attach(gpib_board_t *board, const gpib_board_config_t *config) { return agilent_82350b_generic_attach(board, config, 0); } -int agilent_82350b_accel_attach(gpib_board_t *board, const gpib_board_config_t *config) - +static int agilent_82350b_accel_attach(gpib_board_t *board, const gpib_board_config_t *config) { return agilent_82350b_generic_attach(board, config, 1); } -void agilent_82350b_detach(gpib_board_t *board) - +static void agilent_82350b_detach(gpib_board_t *board) { struct agilent_82350b_priv *a_priv = board->private_data; struct tms9914_priv *tms_priv; @@ -897,7 +879,6 @@ static struct pci_driver agilent_82350b_pci_driver = { }; static int __init agilent_82350b_init_module(void) - { int result; @@ -930,7 +911,6 @@ static int __init agilent_82350b_init_module(void) } static void __exit agilent_82350b_exit_module(void) - { gpib_unregister_driver(&agilent_82350b_interface); gpib_unregister_driver(&agilent_82350b_unaccel_interface); diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.h b/drivers/staging/gpib/agilent_82350b/agilent_82350b.h index 32b322113c10..8b96ad12647e 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.h +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.h @@ -60,53 +60,6 @@ struct agilent_82350b_priv { // driver name extern const char *driver_name; -// init functions - -int agilent_82350b_unaccel_attach(gpib_board_t *board, const gpib_board_config_t *config); -int agilent_82350b_accel_attach(gpib_board_t *board, const gpib_board_config_t *config); - -// interface functions -int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read); -int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); -int agilent_82350b_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read); -int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); -int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t length, - size_t *bytes_written); -int agilent_82350b_take_control(gpib_board_t *board, int synchronous); -int agilent_82350b_go_to_standby(gpib_board_t *board); -void agilent_82350b_request_system_control(gpib_board_t *board, int request_control); -void agilent_82350b_interface_clear(gpib_board_t *board, int assert); -void agilent_82350b_remote_enable(gpib_board_t *board, int enable); -int agilent_82350b_enable_eos(gpib_board_t *board, uint8_t eos_byte, int - compare_8_bits); -void agilent_82350b_disable_eos(gpib_board_t *board); -unsigned int agilent_82350b_update_status(gpib_board_t *board, unsigned int clear_mask); -int agilent_82350b_primary_address(gpib_board_t *board, unsigned int address); -int agilent_82350b_secondary_address(gpib_board_t *board, unsigned int address, int - enable); -int agilent_82350b_parallel_poll(gpib_board_t *board, uint8_t *result); -void agilent_82350b_parallel_poll_configure(gpib_board_t *board, uint8_t config); -void agilent_82350b_parallel_poll_response(gpib_board_t *board, int ist); -void agilent_82350b_serial_poll_response(gpib_board_t *board, uint8_t status); -void agilent_82350b_return_to_local(gpib_board_t *board); -uint8_t agilent_82350b_serial_poll_status(gpib_board_t *board); -int agilent_82350b_line_status(const gpib_board_t *board); -unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int nanosec); - -// interrupt service routines -irqreturn_t agilent_82350b_interrupt(int irq, void *arg); - -// utility functions -int agilent_82350b_allocate_private(gpib_board_t *board); -void agilent_82350b_free_private(gpib_board_t *board); -unsigned short read_and_clear_event_status(gpib_board_t *board); -int read_transfer_counter(struct agilent_82350b_priv *a_priv); -void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count); - //registers enum agilent_82350b_gpib_registers diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 4d22f647a453..381c508f62eb 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -23,7 +23,10 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver Measurement Computing boards using cb7210.2 and cbi488.2"); -static inline int have_fifo_word(const struct cb7210_priv *cb_priv) +static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, + int *end, size_t *bytes_read); + + static inline int have_fifo_word(const struct cb7210_priv *cb_priv) { if (((cb7210_read_byte(cb_priv, HS_STATUS)) & (HS_RX_MSB_NOT_EMPTY | HS_RX_LSB_NOT_EMPTY)) == @@ -165,8 +168,8 @@ static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t * return retval; } -int cb7210_accel_read(gpib_board_t *board, uint8_t *buffer, - size_t length, int *end, size_t *bytes_read) +static int cb7210_accel_read(gpib_board_t *board, uint8_t *buffer, + size_t length, int *end, size_t *bytes_read) { ssize_t retval; struct cb7210_priv *cb_priv = board->private_data; @@ -347,8 +350,8 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ return retval; } -int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -375,7 +378,7 @@ int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int return retval; } -int cb7210_line_status(const gpib_board_t *board) +static int cb7210_line_status(const gpib_board_t *board) { int status = ValidALL; int bsr_bits; @@ -407,7 +410,7 @@ int cb7210_line_status(const gpib_board_t *board) return status; } -unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -424,13 +427,13 @@ unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec) return retval; } -irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board); +static irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board); /* * GPIB interrupt service routines */ -irqreturn_t cb_pci_interrupt(int irq, void *arg) +static irqreturn_t cb_pci_interrupt(int irq, void *arg) { int bits; gpib_board_t *board = arg; @@ -462,7 +465,7 @@ irqreturn_t cb_pci_interrupt(int irq, void *arg) return cb7210_locked_internal_interrupt(arg); } -irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) +static irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) { int hs_status, status1, status2; struct cb7210_priv *priv = board->private_data; @@ -516,7 +519,7 @@ irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) return IRQ_HANDLED; } -irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board) +static irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board) { unsigned long flags; irqreturn_t retval; @@ -527,7 +530,7 @@ irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board) return retval; } -irqreturn_t cb7210_interrupt(int irq, void *arg) +static irqreturn_t cb7210_interrupt(int irq, void *arg) { return cb7210_internal_interrupt(arg); } @@ -539,43 +542,45 @@ static void cb_pci_detach(gpib_board_t *board); static void cb_isa_detach(gpib_board_t *board); // wrappers for interface functions -int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, + int *end, size_t *bytes_read) { struct cb7210_priv *priv = board->private_data; return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -int cb7210_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written) +static int cb7210_write(gpib_board_t *board, uint8_t *buffer, size_t length, + int send_eoi, size_t *bytes_written) { struct cb7210_priv *priv = board->private_data; return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -int cb7210_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int cb7210_command(gpib_board_t *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { struct cb7210_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -int cb7210_take_control(gpib_board_t *board, int synchronous) +static int cb7210_take_control(gpib_board_t *board, int synchronous) { struct cb7210_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -int cb7210_go_to_standby(gpib_board_t *board) +static int cb7210_go_to_standby(gpib_board_t *board) { struct cb7210_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -void cb7210_request_system_control(gpib_board_t *board, int request_control) +static void cb7210_request_system_control(gpib_board_t *board, int request_control) { struct cb7210_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -589,91 +594,91 @@ void cb7210_request_system_control(gpib_board_t *board, int request_control) nec7210_request_system_control(board, nec_priv, request_control); } -void cb7210_interface_clear(gpib_board_t *board, int assert) +static void cb7210_interface_clear(gpib_board_t *board, int assert) { struct cb7210_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -void cb7210_remote_enable(gpib_board_t *board, int enable) +static void cb7210_remote_enable(gpib_board_t *board, int enable) { struct cb7210_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -int cb7210_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int cb7210_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct cb7210_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -void cb7210_disable_eos(gpib_board_t *board) +static void cb7210_disable_eos(gpib_board_t *board) { struct cb7210_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -unsigned int cb7210_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int cb7210_update_status(gpib_board_t *board, unsigned int clear_mask) { struct cb7210_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -int cb7210_primary_address(gpib_board_t *board, unsigned int address) +static int cb7210_primary_address(gpib_board_t *board, unsigned int address) { struct cb7210_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -int cb7210_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int cb7210_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct cb7210_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -int cb7210_parallel_poll(gpib_board_t *board, uint8_t *result) +static int cb7210_parallel_poll(gpib_board_t *board, uint8_t *result) { struct cb7210_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -void cb7210_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) +static void cb7210_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) { struct cb7210_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, configuration); } -void cb7210_parallel_poll_response(gpib_board_t *board, int ist) +static void cb7210_parallel_poll_response(gpib_board_t *board, int ist) { struct cb7210_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -void cb7210_serial_poll_response(gpib_board_t *board, uint8_t status) +static void cb7210_serial_poll_response(gpib_board_t *board, uint8_t status) { struct cb7210_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -uint8_t cb7210_serial_poll_status(gpib_board_t *board) +static uint8_t cb7210_serial_poll_status(gpib_board_t *board) { struct cb7210_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -void cb7210_return_to_local(gpib_board_t *board) +static void cb7210_return_to_local(gpib_board_t *board) { struct cb7210_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -862,14 +867,14 @@ static int cb7210_allocate_private(gpib_board_t *board) return 0; } -void cb7210_generic_detach(gpib_board_t *board) +static void cb7210_generic_detach(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; } // generic part of attach functions shared by all cb7210 boards -int cb7210_generic_attach(gpib_board_t *board) +static int cb7210_generic_attach(gpib_board_t *board) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -887,7 +892,7 @@ int cb7210_generic_attach(gpib_board_t *board) return 0; } -int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) +static int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) { struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -923,7 +928,7 @@ int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) return 0; } -int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -1004,7 +1009,7 @@ int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) return cb7210_init(cb_priv, board); } -void cb_pci_detach(gpib_board_t *board) +static void cb_pci_detach(gpib_board_t *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1027,7 +1032,7 @@ void cb_pci_detach(gpib_board_t *board) cb7210_generic_detach(board); } -int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) { int isr_flags = 0; struct cb7210_priv *cb_priv; @@ -1061,7 +1066,7 @@ int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) return cb7210_init(cb_priv, board); } -void cb_isa_detach(gpib_board_t *board) +static void cb_isa_detach(gpib_board_t *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1351,7 +1356,7 @@ static struct pcmcia_driver cb_gpib_cs_driver = { .resume = cb_gpib_resume, }; -void cb_pcmcia_cleanup_module(void) +static void cb_pcmcia_cleanup_module(void) { DEBUG(0, "cb_gpib_cs: unloading\n"); pcmcia_unregister_driver(&cb_gpib_cs_driver); @@ -1441,7 +1446,7 @@ static gpib_interface_t cb_pcmcia_accel_interface = { .return_to_local = cb7210_return_to_local, }; -int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -1478,7 +1483,7 @@ int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) return cb7210_init(cb_priv, board); } -void cb_pcmcia_detach(gpib_board_t *board) +static void cb_pcmcia_detach(gpib_board_t *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; diff --git a/drivers/staging/gpib/cb7210/cb7210.h b/drivers/staging/gpib/cb7210/cb7210.h index d56cd905cc8c..2108fe7a8ce5 100644 --- a/drivers/staging/gpib/cb7210/cb7210.h +++ b/drivers/staging/gpib/cb7210/cb7210.h @@ -36,51 +36,6 @@ struct cb7210_priv { unsigned in_fifo_half_full : 1; }; -// interrupt service routines -irqreturn_t cb_pci_interrupt(int irq, void *arg); -irqreturn_t cb7210_interrupt(int irq, void *arg); -irqreturn_t cb7210_internal_interrupt(gpib_board_t *board); - -// interface functions -int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, - int *end, size_t *bytes_read); -int cb7210_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, - int *end, size_t *bytes_read); -int cb7210_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written); -int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written); -int cb7210_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written); -int cb7210_take_control(gpib_board_t *board, int synchronous); -int cb7210_go_to_standby(gpib_board_t *board); -void cb7210_request_system_control(gpib_board_t *board, int request_control); -void cb7210_interface_clear(gpib_board_t *board, int assert); -void cb7210_remote_enable(gpib_board_t *board, int enable); -int cb7210_enable_eos(gpib_board_t *board, uint8_t eos_byte, - int compare_8_bits); -void cb7210_disable_eos(gpib_board_t *board); -unsigned int cb7210_update_status(gpib_board_t *board, unsigned int clear_mask); -int cb7210_primary_address(gpib_board_t *board, unsigned int address); -int cb7210_secondary_address(gpib_board_t *board, unsigned int address, - int enable); -int cb7210_parallel_poll(gpib_board_t *board, uint8_t *result); -void cb7210_serial_poll_response(gpib_board_t *board, uint8_t status); -uint8_t cb7210_serial_poll_status(gpib_board_t *board); -void cb7210_parallel_poll_configure(gpib_board_t *board, uint8_t configuration); -void cb7210_parallel_poll_response(gpib_board_t *board, int ist); -int cb7210_line_status(const gpib_board_t *board); -unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec); -void cb7210_return_to_local(gpib_board_t *board); - -// utility functions -void cb7210_generic_detach(gpib_board_t *board); -int cb7210_generic_attach(gpib_board_t *board); -int cb7210_init(struct cb7210_priv *priv, gpib_board_t *board); - -// pcmcia init/cleanup -int cb_pcmcia_init_module(void); -void cb_pcmcia_cleanup_module(void); - // pci-gpib register offset static const int cb7210_reg_offset = 1; diff --git a/drivers/staging/gpib/cec/cec.h b/drivers/staging/gpib/cec/cec.h index 040ca70ed708..3ce2869c7429 100644 --- a/drivers/staging/gpib/cec/cec.h +++ b/drivers/staging/gpib/cec/cec.h @@ -16,34 +16,5 @@ struct cec_priv { unsigned int irq; }; -// interface functions -int cec_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int cec_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); -int cec_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written); -int cec_take_control(gpib_board_t *board, int synchronous); -int cec_go_to_standby(gpib_board_t *board); -void cec_request_system_control(gpib_board_t *board, int request_control); -void cec_interface_clear(gpib_board_t *board, int assert); -void cec_remote_enable(gpib_board_t *board, int enable); -int cec_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits); -void cec_disable_eos(gpib_board_t *board); -unsigned int cec_update_status(gpib_board_t *board, unsigned int clear_mask); -int cec_primary_address(gpib_board_t *board, unsigned int address); -int cec_secondary_address(gpib_board_t *board, unsigned int address, int enable); -int cec_parallel_poll(gpib_board_t *board, uint8_t *result); -void cec_parallel_poll_configure(gpib_board_t *board, uint8_t configuration); -void cec_parallel_poll_response(gpib_board_t *board, int ist); -void cec_serial_poll_response(gpib_board_t *board, uint8_t status); -void cec_return_to_local(gpib_board_t *board); - -// interrupt service routines -irqreturn_t cec_interrupt(int irq, void *arg); - -// utility functions -void cec_free_private(gpib_board_t *board); -int cec_generic_attach(gpib_board_t *board); -void cec_init(struct cec_priv *priv, const gpib_board_t *board); - // offset between consecutive nec7210 registers static const int cec_reg_offset = 1; diff --git a/drivers/staging/gpib/cec/cec_gpib.c b/drivers/staging/gpib/cec/cec_gpib.c index d056cd1d6b3e..18933223711e 100644 --- a/drivers/staging/gpib/cec/cec_gpib.c +++ b/drivers/staging/gpib/cec/cec_gpib.c @@ -19,7 +19,7 @@ MODULE_DESCRIPTION("GPIB driver for CEC PCI and PCMCIA boards"); * GPIB interrupt service routines */ -irqreturn_t cec_interrupt(int irq, void *arg) +static irqreturn_t cec_interrupt(int irq, void *arg) { gpib_board_t *board = arg; struct cec_priv *priv = board->private_data; @@ -41,120 +41,121 @@ static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config static void cec_pci_detach(gpib_board_t *board); // wrappers for interface functions -int cec_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +static int cec_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct cec_priv *priv = board->private_data; return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -int cec_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int cec_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct cec_priv *priv = board->private_data; return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -int cec_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int cec_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct cec_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -int cec_take_control(gpib_board_t *board, int synchronous) +static int cec_take_control(gpib_board_t *board, int synchronous) { struct cec_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -int cec_go_to_standby(gpib_board_t *board) +static int cec_go_to_standby(gpib_board_t *board) { struct cec_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -void cec_request_system_control(gpib_board_t *board, int request_control) +static void cec_request_system_control(gpib_board_t *board, int request_control) { struct cec_priv *priv = board->private_data; nec7210_request_system_control(board, &priv->nec7210_priv, request_control); } -void cec_interface_clear(gpib_board_t *board, int assert) +static void cec_interface_clear(gpib_board_t *board, int assert) { struct cec_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -void cec_remote_enable(gpib_board_t *board, int enable) +static void cec_remote_enable(gpib_board_t *board, int enable) { struct cec_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -int cec_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int cec_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct cec_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -void cec_disable_eos(gpib_board_t *board) +static void cec_disable_eos(gpib_board_t *board) { struct cec_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -unsigned int cec_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int cec_update_status(gpib_board_t *board, unsigned int clear_mask) { struct cec_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -int cec_primary_address(gpib_board_t *board, unsigned int address) +static int cec_primary_address(gpib_board_t *board, unsigned int address) { struct cec_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -int cec_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int cec_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct cec_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -int cec_parallel_poll(gpib_board_t *board, uint8_t *result) +static int cec_parallel_poll(gpib_board_t *board, uint8_t *result) { struct cec_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -void cec_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void cec_parallel_poll_configure(gpib_board_t *board, uint8_t config) { struct cec_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, config); } -void cec_parallel_poll_response(gpib_board_t *board, int ist) +static void cec_parallel_poll_response(gpib_board_t *board, int ist) { struct cec_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -void cec_serial_poll_response(gpib_board_t *board, uint8_t status) +static void cec_serial_poll_response(gpib_board_t *board, uint8_t status) { struct cec_priv *priv = board->private_data; @@ -175,7 +176,7 @@ static unsigned int cec_t1_delay(gpib_board_t *board, unsigned int nano_sec) return nec7210_t1_delay(board, &priv->nec7210_priv, nano_sec); } -void cec_return_to_local(gpib_board_t *board) +static void cec_return_to_local(gpib_board_t *board) { struct cec_priv *priv = board->private_data; @@ -223,13 +224,13 @@ static int cec_allocate_private(gpib_board_t *board) return 0; } -void cec_free_private(gpib_board_t *board) +static void cec_free_private(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; } -int cec_generic_attach(gpib_board_t *board) +static int cec_generic_attach(gpib_board_t *board) { struct cec_priv *cec_priv; struct nec7210_priv *nec_priv; @@ -247,7 +248,7 @@ int cec_generic_attach(gpib_board_t *board) return 0; } -void cec_init(struct cec_priv *cec_priv, const gpib_board_t *board) +static void cec_init(struct cec_priv *cec_priv, const gpib_board_t *board) { struct nec7210_priv *nec_priv = &cec_priv->nec7210_priv; @@ -259,7 +260,7 @@ void cec_init(struct cec_priv *cec_priv, const gpib_board_t *board) nec7210_board_online(nec_priv, board); } -int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct cec_priv *cec_priv; struct nec7210_priv *nec_priv; @@ -319,7 +320,7 @@ int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void cec_pci_detach(gpib_board_t *board) +static void cec_pci_detach(gpib_board_t *board) { struct cec_priv *cec_priv = board->private_data; struct nec7210_priv *nec_priv; diff --git a/drivers/staging/gpib/hp_82335/hp82335.c b/drivers/staging/gpib/hp_82335/hp82335.c index 700d1ba029d2..451d5dc6d340 100644 --- a/drivers/staging/gpib/hp_82335/hp82335.c +++ b/drivers/staging/gpib/hp_82335/hp82335.c @@ -21,124 +21,126 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for HP 82335 interface cards"); static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config); - static void hp82335_detach(gpib_board_t *board); +static irqreturn_t hp82335_interrupt(int irq, void *arg); // wrappers for interface functions -int hp82335_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +static int hp82335_read(gpib_board_t *board, uint8_t *buffer, size_t length, + int *end, size_t *bytes_read) { struct hp82335_priv *priv = board->private_data; return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -int hp82335_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int hp82335_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct hp82335_priv *priv = board->private_data; return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -int hp82335_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int hp82335_command(gpib_board_t *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { struct hp82335_priv *priv = board->private_data; return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -int hp82335_take_control(gpib_board_t *board, int synchronous) +static int hp82335_take_control(gpib_board_t *board, int synchronous) { struct hp82335_priv *priv = board->private_data; return tms9914_take_control(board, &priv->tms9914_priv, synchronous); } -int hp82335_go_to_standby(gpib_board_t *board) +static int hp82335_go_to_standby(gpib_board_t *board) { struct hp82335_priv *priv = board->private_data; return tms9914_go_to_standby(board, &priv->tms9914_priv); } -void hp82335_request_system_control(gpib_board_t *board, int request_control) +static void hp82335_request_system_control(gpib_board_t *board, int request_control) { struct hp82335_priv *priv = board->private_data; tms9914_request_system_control(board, &priv->tms9914_priv, request_control); } -void hp82335_interface_clear(gpib_board_t *board, int assert) +static void hp82335_interface_clear(gpib_board_t *board, int assert) { struct hp82335_priv *priv = board->private_data; tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -void hp82335_remote_enable(gpib_board_t *board, int enable) +static void hp82335_remote_enable(gpib_board_t *board, int enable) { struct hp82335_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -int hp82335_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int hp82335_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct hp82335_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -void hp82335_disable_eos(gpib_board_t *board) +static void hp82335_disable_eos(gpib_board_t *board) { struct hp82335_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -unsigned int hp82335_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int hp82335_update_status(gpib_board_t *board, unsigned int clear_mask) { struct hp82335_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -int hp82335_primary_address(gpib_board_t *board, unsigned int address) +static int hp82335_primary_address(gpib_board_t *board, unsigned int address) { struct hp82335_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -int hp82335_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int hp82335_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct hp82335_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -int hp82335_parallel_poll(gpib_board_t *board, uint8_t *result) +static int hp82335_parallel_poll(gpib_board_t *board, uint8_t *result) { struct hp82335_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -void hp82335_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void hp82335_parallel_poll_configure(gpib_board_t *board, uint8_t config) { struct hp82335_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -void hp82335_parallel_poll_response(gpib_board_t *board, int ist) +static void hp82335_parallel_poll_response(gpib_board_t *board, int ist) { struct hp82335_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -void hp82335_serial_poll_response(gpib_board_t *board, uint8_t status) +static void hp82335_serial_poll_response(gpib_board_t *board, uint8_t status) { struct hp82335_priv *priv = board->private_data; @@ -166,7 +168,7 @@ static unsigned int hp82335_t1_delay(gpib_board_t *board, unsigned int nano_sec) return tms9914_t1_delay(board, &priv->tms9914_priv, nano_sec); } -void hp82335_return_to_local(gpib_board_t *board) +static void hp82335_return_to_local(gpib_board_t *board) { struct hp82335_priv *priv = board->private_data; @@ -201,7 +203,7 @@ static gpib_interface_t hp82335_interface = { .return_to_local = hp82335_return_to_local, }; -int hp82335_allocate_private(gpib_board_t *board) +static int hp82335_allocate_private(gpib_board_t *board) { board->private_data = kzalloc(sizeof(struct hp82335_priv), GFP_KERNEL); if (!board->private_data) @@ -209,7 +211,7 @@ int hp82335_allocate_private(gpib_board_t *board) return 0; } -void hp82335_free_private(gpib_board_t *board) +static void hp82335_free_private(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; @@ -237,7 +239,7 @@ static void hp82335_clear_interrupt(struct hp82335_priv *hp_priv) writeb(0, tms_priv->mmiobase + HPREG_INTR_CLEAR); } -int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct hp82335_priv *hp_priv; struct tms9914_priv *tms_priv; @@ -304,7 +306,7 @@ int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void hp82335_detach(gpib_board_t *board) +static void hp82335_detach(gpib_board_t *board) { struct hp82335_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv; @@ -348,7 +350,7 @@ module_exit(hp82335_exit_module); * GPIB interrupt service routines */ -irqreturn_t hp82335_interrupt(int irq, void *arg) +static irqreturn_t hp82335_interrupt(int irq, void *arg) { int status1, status2; gpib_board_t *board = arg; diff --git a/drivers/staging/gpib/hp_82335/hp82335.h b/drivers/staging/gpib/hp_82335/hp82335.h index 4b185d7c5188..0c252a712ec9 100644 --- a/drivers/staging/gpib/hp_82335/hp82335.h +++ b/drivers/staging/gpib/hp_82335/hp82335.h @@ -17,36 +17,6 @@ struct hp82335_priv { unsigned long raw_iobase; }; -// interface functions -int hp82335_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int hp82335_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written); -int hp82335_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written); -int hp82335_take_control(gpib_board_t *board, int synchronous); -int hp82335_go_to_standby(gpib_board_t *board); -void hp82335_request_system_control(gpib_board_t *board, int request_control); -void hp82335_interface_clear(gpib_board_t *board, int assert); -void hp82335_remote_enable(gpib_board_t *board, int enable); -int hp82335_enable_eos(gpib_board_t *board, uint8_t eos_byte, int - compare_8_bits); -void hp82335_disable_eos(gpib_board_t *board); -unsigned int hp82335_update_status(gpib_board_t *board, unsigned int clear_mask); -int hp82335_primary_address(gpib_board_t *board, unsigned int address); -int hp82335_secondary_address(gpib_board_t *board, unsigned int address, int - enable); -int hp82335_parallel_poll(gpib_board_t *board, uint8_t *result); -void hp82335_parallel_poll_configure(gpib_board_t *board, uint8_t config); -void hp82335_parallel_poll_response(gpib_board_t *board, int ist); -void hp82335_serial_poll_response(gpib_board_t *board, uint8_t status); -void hp82335_return_to_local(gpib_board_t *board); - -// interrupt service routines -irqreturn_t hp82335_interrupt(int irq, void *arg); - -// utility functions -int hp82335_allocate_private(gpib_board_t *board); -void hp82335_free_private(gpib_board_t *board); - // size of io memory region used static const int hp82335_rom_size = 0x2000; static const int hp82335_upper_iomem_size = 0x2000; diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 0ddae295912f..9cd2a2f50ff1 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -17,8 +17,15 @@ MODULE_LICENSE("GPL"); -int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) +static unsigned short read_and_clear_event_status(gpib_board_t *board); +static void set_transfer_counter(struct hp_82341_priv *hp_priv, int count); +static int read_transfer_counter(struct hp_82341_priv *hp_priv); +static int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written); +static irqreturn_t hp_82341_interrupt(int irq, void *arg); + +static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct hp_82341_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv = &hp_priv->tms9914_priv; @@ -163,8 +170,8 @@ static int restart_write_fifo(gpib_board_t *board, struct hp_82341_priv *hp_priv return 0; } -int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written) +static int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, + int send_eoi, size_t *bytes_written) { struct hp_82341_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv = &hp_priv->tms9914_priv; @@ -249,43 +256,45 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi static void hp_82341_detach(gpib_board_t *board); // wrappers for interface functions -int hp_82341_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +static int hp_82341_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct hp_82341_priv *priv = board->private_data; return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct hp_82341_priv *priv = board->private_data; return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -int hp_82341_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int hp_82341_command(gpib_board_t *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { struct hp_82341_priv *priv = board->private_data; return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -int hp_82341_take_control(gpib_board_t *board, int synchronous) +static int hp_82341_take_control(gpib_board_t *board, int synchronous) { struct hp_82341_priv *priv = board->private_data; return tms9914_take_control(board, &priv->tms9914_priv, synchronous); } -int hp_82341_go_to_standby(gpib_board_t *board) +static int hp_82341_go_to_standby(gpib_board_t *board) { struct hp_82341_priv *priv = board->private_data; return tms9914_go_to_standby(board, &priv->tms9914_priv); } -void hp_82341_request_system_control(gpib_board_t *board, int request_control) +static void hp_82341_request_system_control(gpib_board_t *board, int request_control) { struct hp_82341_priv *priv = board->private_data; @@ -297,77 +306,77 @@ void hp_82341_request_system_control(gpib_board_t *board, int request_control) tms9914_request_system_control(board, &priv->tms9914_priv, request_control); } -void hp_82341_interface_clear(gpib_board_t *board, int assert) +static void hp_82341_interface_clear(gpib_board_t *board, int assert) { struct hp_82341_priv *priv = board->private_data; tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -void hp_82341_remote_enable(gpib_board_t *board, int enable) +static void hp_82341_remote_enable(gpib_board_t *board, int enable) { struct hp_82341_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -int hp_82341_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int hp_82341_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct hp_82341_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -void hp_82341_disable_eos(gpib_board_t *board) +static void hp_82341_disable_eos(gpib_board_t *board) { struct hp_82341_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -unsigned int hp_82341_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int hp_82341_update_status(gpib_board_t *board, unsigned int clear_mask) { struct hp_82341_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -int hp_82341_primary_address(gpib_board_t *board, unsigned int address) +static int hp_82341_primary_address(gpib_board_t *board, unsigned int address) { struct hp_82341_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -int hp_82341_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int hp_82341_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct hp_82341_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -int hp_82341_parallel_poll(gpib_board_t *board, uint8_t *result) +static int hp_82341_parallel_poll(gpib_board_t *board, uint8_t *result) { struct hp_82341_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -void hp_82341_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void hp_82341_parallel_poll_configure(gpib_board_t *board, uint8_t config) { struct hp_82341_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -void hp_82341_parallel_poll_response(gpib_board_t *board, int ist) +static void hp_82341_parallel_poll_response(gpib_board_t *board, int ist) { struct hp_82341_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -void hp_82341_serial_poll_response(gpib_board_t *board, uint8_t status) +static void hp_82341_serial_poll_response(gpib_board_t *board, uint8_t status) { struct hp_82341_priv *priv = board->private_data; @@ -395,7 +404,7 @@ static unsigned int hp_82341_t1_delay(gpib_board_t *board, unsigned int nano_sec return tms9914_t1_delay(board, &priv->tms9914_priv, nano_sec); } -void hp_82341_return_to_local(gpib_board_t *board) +static void hp_82341_return_to_local(gpib_board_t *board) { struct hp_82341_priv *priv = board->private_data; @@ -457,7 +466,7 @@ static gpib_interface_t hp_82341_interface = { .return_to_local = hp_82341_return_to_local, }; -int hp_82341_allocate_private(gpib_board_t *board) +static int hp_82341_allocate_private(gpib_board_t *board) { board->private_data = kzalloc(sizeof(struct hp_82341_priv), GFP_KERNEL); if (!board->private_data) @@ -465,7 +474,7 @@ int hp_82341_allocate_private(gpib_board_t *board) return 0; } -void hp_82341_free_private(gpib_board_t *board) +static void hp_82341_free_private(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; @@ -678,7 +687,7 @@ static int clear_xilinx(struct hp_82341_priv *hp_priv) return 0; } -int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct hp_82341_priv *hp_priv; struct tms9914_priv *tms_priv; @@ -774,7 +783,7 @@ int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void hp_82341_detach(gpib_board_t *board) +static void hp_82341_detach(gpib_board_t *board) { struct hp_82341_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv; @@ -837,7 +846,7 @@ module_exit(hp_82341_exit_module); /* * GPIB interrupt service routines */ -unsigned short read_and_clear_event_status(gpib_board_t *board) +static unsigned short read_and_clear_event_status(gpib_board_t *board) { struct hp_82341_priv *hp_priv = board->private_data; unsigned long flags; @@ -850,7 +859,7 @@ unsigned short read_and_clear_event_status(gpib_board_t *board) return status; } -irqreturn_t hp_82341_interrupt(int irq, void *arg) +static irqreturn_t hp_82341_interrupt(int irq, void *arg) { int status1, status2; gpib_board_t *board = arg; @@ -885,7 +894,7 @@ irqreturn_t hp_82341_interrupt(int irq, void *arg) return retval; } -int read_transfer_counter(struct hp_82341_priv *hp_priv) +static int read_transfer_counter(struct hp_82341_priv *hp_priv) { int lo, mid, value; @@ -896,7 +905,7 @@ int read_transfer_counter(struct hp_82341_priv *hp_priv) return value; } -void set_transfer_counter(struct hp_82341_priv *hp_priv, int count) +static void set_transfer_counter(struct hp_82341_priv *hp_priv, int count) { int complement = -count; diff --git a/drivers/staging/gpib/hp_82341/hp_82341.h b/drivers/staging/gpib/hp_82341/hp_82341.h index 0065ebd9747c..370a3d4576eb 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.h +++ b/drivers/staging/gpib/hp_82341/hp_82341.h @@ -26,42 +26,6 @@ struct hp_82341_priv { enum hp_82341_hardware_version hw_version; }; - -// interface functions -int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read); -int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); -int hp_82341_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read); -int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); -int hp_82341_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written); -int hp_82341_take_control(gpib_board_t *board, int synchronous); -int hp_82341_go_to_standby(gpib_board_t *board); -void hp_82341_request_system_control(gpib_board_t *board, int request_control); -void hp_82341_interface_clear(gpib_board_t *board, int assert); -void hp_82341_remote_enable(gpib_board_t *board, int enable); -int hp_82341_enable_eos(gpib_board_t *board, uint8_t eos_byte, int - compare_8_bits); -void hp_82341_disable_eos(gpib_board_t *board); -unsigned int hp_82341_update_status(gpib_board_t *board, unsigned int clear_mask); -int hp_82341_primary_address(gpib_board_t *board, unsigned int address); -int hp_82341_secondary_address(gpib_board_t *board, unsigned int address, int - enable); -int hp_82341_parallel_poll(gpib_board_t *board, uint8_t *result); -void hp_82341_parallel_poll_configure(gpib_board_t *board, uint8_t config); -void hp_82341_parallel_poll_response(gpib_board_t *board, int ist); -void hp_82341_serial_poll_response(gpib_board_t *board, uint8_t status); -void hp_82341_return_to_local(gpib_board_t *board); - -// interrupt service routines -irqreturn_t hp_82341_interrupt(int irq, void *arg); - -// utility functions -int hp_82341_allocate_private(gpib_board_t *board); -void hp_82341_free_private(gpib_board_t *board); - static const int hp_82341_region_iosize = 0x8; static const int hp_82341_num_io_regions = 4; static const int hp_82341_fifo_size = 0xffe; @@ -199,7 +163,3 @@ enum hp_82341d_pnp_pio_bits { HP_82341D_LEGACY_MODE_BIT = 0x4, HP_82341D_NOT_PROG_BIT = 0x8, // clear to reinitialize xilinx }; - -unsigned short read_and_clear_event_status(gpib_board_t *board); -int read_transfer_counter(struct hp_82341_priv *hp_priv); -void set_transfer_counter(struct hp_82341_priv *hp_priv, int count); diff --git a/drivers/staging/gpib/pc2/pc2_gpib.c b/drivers/staging/gpib/pc2/pc2_gpib.c index 6a1c0cb1d0ed..3eccd4c54afa 100644 --- a/drivers/staging/gpib/pc2/pc2_gpib.c +++ b/drivers/staging/gpib/pc2/pc2_gpib.c @@ -49,15 +49,6 @@ static inline unsigned int CLEAR_INTR_REG(unsigned int irq) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for PC2/PC2a and compatible devices"); -static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int pc2a_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int pc2a_cb7210_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int pc2_2a_attach(gpib_board_t *board, const gpib_board_config_t *config); - -static void pc2_detach(gpib_board_t *board); -static void pc2a_detach(gpib_board_t *board); -static void pc2_2a_detach(gpib_board_t *board); - /* * GPIB interrupt service routines */ @@ -238,118 +229,6 @@ static void pc2_return_to_local(gpib_board_t *board) nec7210_return_to_local(board, &priv->nec7210_priv); } -static gpib_interface_t pc2_interface = { - .name = "pcII", - .attach = pc2_attach, - .detach = pc2_detach, - .read = pc2_read, - .write = pc2_write, - .command = pc2_command, - .take_control = pc2_take_control, - .go_to_standby = pc2_go_to_standby, - .request_system_control = pc2_request_system_control, - .interface_clear = pc2_interface_clear, - .remote_enable = pc2_remote_enable, - .enable_eos = pc2_enable_eos, - .disable_eos = pc2_disable_eos, - .parallel_poll = pc2_parallel_poll, - .parallel_poll_configure = pc2_parallel_poll_configure, - .parallel_poll_response = pc2_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = NULL, - .update_status = pc2_update_status, - .primary_address = pc2_primary_address, - .secondary_address = pc2_secondary_address, - .serial_poll_response = pc2_serial_poll_response, - .serial_poll_status = pc2_serial_poll_status, - .t1_delay = pc2_t1_delay, - .return_to_local = pc2_return_to_local, -}; - -static gpib_interface_t pc2a_interface = { - .name = "pcIIa", - .attach = pc2a_attach, - .detach = pc2a_detach, - .read = pc2_read, - .write = pc2_write, - .command = pc2_command, - .take_control = pc2_take_control, - .go_to_standby = pc2_go_to_standby, - .request_system_control = pc2_request_system_control, - .interface_clear = pc2_interface_clear, - .remote_enable = pc2_remote_enable, - .enable_eos = pc2_enable_eos, - .disable_eos = pc2_disable_eos, - .parallel_poll = pc2_parallel_poll, - .parallel_poll_configure = pc2_parallel_poll_configure, - .parallel_poll_response = pc2_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = NULL, - .update_status = pc2_update_status, - .primary_address = pc2_primary_address, - .secondary_address = pc2_secondary_address, - .serial_poll_response = pc2_serial_poll_response, - .serial_poll_status = pc2_serial_poll_status, - .t1_delay = pc2_t1_delay, - .return_to_local = pc2_return_to_local, -}; - -static gpib_interface_t pc2a_cb7210_interface = { - .name = "pcIIa_cb7210", - .attach = pc2a_cb7210_attach, - .detach = pc2a_detach, - .read = pc2_read, - .write = pc2_write, - .command = pc2_command, - .take_control = pc2_take_control, - .go_to_standby = pc2_go_to_standby, - .request_system_control = pc2_request_system_control, - .interface_clear = pc2_interface_clear, - .remote_enable = pc2_remote_enable, - .enable_eos = pc2_enable_eos, - .disable_eos = pc2_disable_eos, - .parallel_poll = pc2_parallel_poll, - .parallel_poll_configure = pc2_parallel_poll_configure, - .parallel_poll_response = pc2_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = NULL, //XXX - .update_status = pc2_update_status, - .primary_address = pc2_primary_address, - .secondary_address = pc2_secondary_address, - .serial_poll_response = pc2_serial_poll_response, - .serial_poll_status = pc2_serial_poll_status, - .t1_delay = pc2_t1_delay, - .return_to_local = pc2_return_to_local, -}; - -static gpib_interface_t pc2_2a_interface = { - .name = "pcII_IIa", - .attach = pc2_2a_attach, - .detach = pc2_2a_detach, - .read = pc2_read, - .write = pc2_write, - .command = pc2_command, - .take_control = pc2_take_control, - .go_to_standby = pc2_go_to_standby, - .request_system_control = pc2_request_system_control, - .interface_clear = pc2_interface_clear, - .remote_enable = pc2_remote_enable, - .enable_eos = pc2_enable_eos, - .disable_eos = pc2_disable_eos, - .parallel_poll = pc2_parallel_poll, - .parallel_poll_configure = pc2_parallel_poll_configure, - .parallel_poll_response = pc2_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = NULL, - .update_status = pc2_update_status, - .primary_address = pc2_primary_address, - .secondary_address = pc2_secondary_address, - .serial_poll_response = pc2_serial_poll_response, - .serial_poll_status = pc2_serial_poll_status, - .t1_delay = pc2_t1_delay, - .return_to_local = pc2_return_to_local, -}; - static int allocate_private(gpib_board_t *board) { struct pc2_priv *priv; @@ -411,7 +290,7 @@ static int pc2_generic_attach(gpib_board_t *board, const gpib_board_config_t *co return 0; } -int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) { int isr_flags = 0; struct pc2_priv *pc2_priv; @@ -455,7 +334,7 @@ int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void pc2_detach(gpib_board_t *board) +static void pc2_detach(gpib_board_t *board) { struct pc2_priv *pc2_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -575,17 +454,17 @@ static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *co return 0; } -int pc2a_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2a_attach(gpib_board_t *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2a_iosize, NEC7210); } -int pc2a_cb7210_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2a_cb7210_attach(gpib_board_t *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2a_iosize, CB7210); } -int pc2_2a_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2_2a_attach(gpib_board_t *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2_2a_iosize, NAT4882); } @@ -623,16 +502,128 @@ static void pc2a_common_detach(gpib_board_t *board, unsigned int num_registers) free_private(board); } -void pc2a_detach(gpib_board_t *board) +static void pc2a_detach(gpib_board_t *board) { pc2a_common_detach(board, pc2a_iosize); } -void pc2_2a_detach(gpib_board_t *board) +static void pc2_2a_detach(gpib_board_t *board) { pc2a_common_detach(board, pc2_2a_iosize); } +static gpib_interface_t pc2_interface = { + .name = "pcII", + .attach = pc2_attach, + .detach = pc2_detach, + .read = pc2_read, + .write = pc2_write, + .command = pc2_command, + .take_control = pc2_take_control, + .go_to_standby = pc2_go_to_standby, + .request_system_control = pc2_request_system_control, + .interface_clear = pc2_interface_clear, + .remote_enable = pc2_remote_enable, + .enable_eos = pc2_enable_eos, + .disable_eos = pc2_disable_eos, + .parallel_poll = pc2_parallel_poll, + .parallel_poll_configure = pc2_parallel_poll_configure, + .parallel_poll_response = pc2_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = NULL, + .update_status = pc2_update_status, + .primary_address = pc2_primary_address, + .secondary_address = pc2_secondary_address, + .serial_poll_response = pc2_serial_poll_response, + .serial_poll_status = pc2_serial_poll_status, + .t1_delay = pc2_t1_delay, + .return_to_local = pc2_return_to_local, +}; + +static gpib_interface_t pc2a_interface = { + .name = "pcIIa", + .attach = pc2a_attach, + .detach = pc2a_detach, + .read = pc2_read, + .write = pc2_write, + .command = pc2_command, + .take_control = pc2_take_control, + .go_to_standby = pc2_go_to_standby, + .request_system_control = pc2_request_system_control, + .interface_clear = pc2_interface_clear, + .remote_enable = pc2_remote_enable, + .enable_eos = pc2_enable_eos, + .disable_eos = pc2_disable_eos, + .parallel_poll = pc2_parallel_poll, + .parallel_poll_configure = pc2_parallel_poll_configure, + .parallel_poll_response = pc2_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = NULL, + .update_status = pc2_update_status, + .primary_address = pc2_primary_address, + .secondary_address = pc2_secondary_address, + .serial_poll_response = pc2_serial_poll_response, + .serial_poll_status = pc2_serial_poll_status, + .t1_delay = pc2_t1_delay, + .return_to_local = pc2_return_to_local, +}; + +static gpib_interface_t pc2a_cb7210_interface = { + .name = "pcIIa_cb7210", + .attach = pc2a_cb7210_attach, + .detach = pc2a_detach, + .read = pc2_read, + .write = pc2_write, + .command = pc2_command, + .take_control = pc2_take_control, + .go_to_standby = pc2_go_to_standby, + .request_system_control = pc2_request_system_control, + .interface_clear = pc2_interface_clear, + .remote_enable = pc2_remote_enable, + .enable_eos = pc2_enable_eos, + .disable_eos = pc2_disable_eos, + .parallel_poll = pc2_parallel_poll, + .parallel_poll_configure = pc2_parallel_poll_configure, + .parallel_poll_response = pc2_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = NULL, //XXX + .update_status = pc2_update_status, + .primary_address = pc2_primary_address, + .secondary_address = pc2_secondary_address, + .serial_poll_response = pc2_serial_poll_response, + .serial_poll_status = pc2_serial_poll_status, + .t1_delay = pc2_t1_delay, + .return_to_local = pc2_return_to_local, +}; + +static gpib_interface_t pc2_2a_interface = { + .name = "pcII_IIa", + .attach = pc2_2a_attach, + .detach = pc2_2a_detach, + .read = pc2_read, + .write = pc2_write, + .command = pc2_command, + .take_control = pc2_take_control, + .go_to_standby = pc2_go_to_standby, + .request_system_control = pc2_request_system_control, + .interface_clear = pc2_interface_clear, + .remote_enable = pc2_remote_enable, + .enable_eos = pc2_enable_eos, + .disable_eos = pc2_disable_eos, + .parallel_poll = pc2_parallel_poll, + .parallel_poll_configure = pc2_parallel_poll_configure, + .parallel_poll_response = pc2_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = NULL, + .update_status = pc2_update_status, + .primary_address = pc2_primary_address, + .secondary_address = pc2_secondary_address, + .serial_poll_response = pc2_serial_poll_response, + .serial_poll_status = pc2_serial_poll_status, + .t1_delay = pc2_t1_delay, + .return_to_local = pc2_return_to_local, +}; + static int __init pc2_init_module(void) { int ret; diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index b39ab2abe495..2e1c3cbebaca 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -47,49 +47,7 @@ struct tnt4882_priv { unsigned short auxg_bits; // bits written to auxiliary register G }; -// interface functions -static int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, - int *end, size_t *bytes_read); -static int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, - int *end, size_t *bytes_read); -static int tnt4882_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written); -static int tnt4882_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written); -static int tnt4882_command(gpib_board_t *board, uint8_t *buffer, size_t length, - size_t *bytes_written); -static int tnt4882_command_unaccel(gpib_board_t *board, uint8_t *buffer, - size_t length, size_t *bytes_written); -static int tnt4882_take_control(gpib_board_t *board, int synchronous); -static int tnt4882_go_to_standby(gpib_board_t *board); -static void tnt4882_request_system_control(gpib_board_t *board, int request_control); -static void tnt4882_interface_clear(gpib_board_t *board, int assert); -static void tnt4882_remote_enable(gpib_board_t *board, int enable); -static int tnt4882_enable_eos(gpib_board_t *board, uint8_t eos_byte, int - compare_8_bits); -static void tnt4882_disable_eos(gpib_board_t *board); -static unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clear_mask); -static int tnt4882_primary_address(gpib_board_t *board, unsigned int address); -static int tnt4882_secondary_address(gpib_board_t *board, unsigned int address, - int enable); -static int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result); -static void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config); -static void tnt4882_parallel_poll_response(gpib_board_t *board, int ist); -static void tnt4882_serial_poll_response(gpib_board_t *board, uint8_t status); -static uint8_t tnt4882_serial_poll_status(gpib_board_t *board); -static int tnt4882_line_status(const gpib_board_t *board); -static unsigned int tnt4882_t1_delay(gpib_board_t *board, unsigned int nano_sec); -static void tnt4882_return_to_local(gpib_board_t *board); - -// interrupt service routines static irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board); -static irqreturn_t tnt4882_interrupt(int irq, void *arg); - -// utility functions -static int tnt4882_allocate_private(gpib_board_t *board); -static void tnt4882_free_private(gpib_board_t *board); -static void tnt4882_init(struct tnt4882_priv *tnt_priv, const gpib_board_t *board); -static void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, gpib_board_t *board); // register offset for nec7210 compatible registers static const int atgpib_reg_offset = 2; @@ -188,7 +146,7 @@ static inline void tnt_writeb(struct tnt4882_priv *priv, unsigned short value, u MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for National Instruments boards using tnt4882 or compatible chips"); -int tnt4882_line_status(const gpib_board_t *board) +static int tnt4882_line_status(const gpib_board_t *board) { int status = ValidALL; int bcsr_bits; @@ -218,7 +176,7 @@ int tnt4882_line_status(const gpib_board_t *board) return status; } -unsigned int tnt4882_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int tnt4882_t1_delay(gpib_board_t *board, unsigned int nano_sec) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; @@ -314,8 +272,8 @@ static void tnt4882_release_holdoff(gpib_board_t *board, struct tnt4882_priv *tn } } -int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) +static int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { size_t count = 0; ssize_t retval = 0; @@ -596,18 +554,19 @@ static int generic_write(gpib_board_t *board, uint8_t *buffer, size_t length, return retval; } -int tnt4882_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int tnt4882_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { return generic_write(board, buffer, length, send_eoi, 0, bytes_written); } -int tnt4882_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int tnt4882_command(gpib_board_t *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { return generic_write(board, buffer, length, 0, 1, bytes_written); } -irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board) +static irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board) { struct tnt4882_priv *priv = board->private_data; int isr0_bits, isr3_bits, imr3_bits; @@ -642,28 +601,14 @@ irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board) return IRQ_HANDLED; } -irqreturn_t tnt4882_interrupt(int irq, void *arg) +static irqreturn_t tnt4882_interrupt(int irq, void *arg) { return tnt4882_internal_interrupt(arg); } -static int ni_tnt_isa_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ni_nat4882_isa_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ni_nec_isa_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config); - -static void ni_isa_detach(gpib_board_t *board); -static void ni_pci_detach(gpib_board_t *board); - -#ifdef GPIB_PCMCIA -static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config); -static void ni_pcmcia_detach(gpib_board_t *board); -static int init_ni_gpib_cs(void); -static void __exit exit_ni_gpib_cs(void); -#endif - // wrappers for interface functions -int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +static int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + size_t *bytes_read) { struct tnt4882_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -682,37 +627,37 @@ int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, return retval; } -int tnt4882_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int tnt4882_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { struct tnt4882_priv *priv = board->private_data; return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -int tnt4882_command_unaccel(gpib_board_t *board, uint8_t *buffer, - size_t length, size_t *bytes_written) +static int tnt4882_command_unaccel(gpib_board_t *board, uint8_t *buffer, + size_t length, size_t *bytes_written) { struct tnt4882_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -int tnt4882_take_control(gpib_board_t *board, int synchronous) +static int tnt4882_take_control(gpib_board_t *board, int synchronous) { struct tnt4882_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -int tnt4882_go_to_standby(gpib_board_t *board) +static int tnt4882_go_to_standby(gpib_board_t *board) { struct tnt4882_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -void tnt4882_request_system_control(gpib_board_t *board, int request_control) +static void tnt4882_request_system_control(gpib_board_t *board, int request_control) { struct tnt4882_priv *priv = board->private_data; @@ -727,35 +672,35 @@ void tnt4882_request_system_control(gpib_board_t *board, int request_control) } } -void tnt4882_interface_clear(gpib_board_t *board, int assert) +static void tnt4882_interface_clear(gpib_board_t *board, int assert) { struct tnt4882_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -void tnt4882_remote_enable(gpib_board_t *board, int enable) +static void tnt4882_remote_enable(gpib_board_t *board, int enable) { struct tnt4882_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -int tnt4882_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int tnt4882_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) { struct tnt4882_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -void tnt4882_disable_eos(gpib_board_t *board) +static void tnt4882_disable_eos(gpib_board_t *board) { struct tnt4882_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clear_mask) { unsigned long flags; u8 line_status; @@ -775,22 +720,21 @@ unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clear_mask) return board->status; } -int tnt4882_primary_address(gpib_board_t *board, unsigned int address) +static int tnt4882_primary_address(gpib_board_t *board, unsigned int address) { struct tnt4882_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -int tnt4882_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int tnt4882_secondary_address(gpib_board_t *board, unsigned int address, int enable) { struct tnt4882_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result) - +static int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result) { struct tnt4882_priv *tnt_priv = board->private_data; @@ -807,7 +751,7 @@ int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result) } } -void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config) { struct tnt4882_priv *priv = board->private_data; @@ -825,7 +769,7 @@ void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config) } } -void tnt4882_parallel_poll_response(gpib_board_t *board, int ist) +static void tnt4882_parallel_poll_response(gpib_board_t *board, int ist) { struct tnt4882_priv *priv = board->private_data; @@ -835,7 +779,7 @@ void tnt4882_parallel_poll_response(gpib_board_t *board, int ist) /* this is just used by the old nec7210 isa interfaces, the newer * boards use tnt4882_serial_poll_response2 */ -void tnt4882_serial_poll_response(gpib_board_t *board, uint8_t status) +static void tnt4882_serial_poll_response(gpib_board_t *board, uint8_t status) { struct tnt4882_priv *priv = board->private_data; @@ -876,20 +820,331 @@ static void tnt4882_serial_poll_response2(gpib_board_t *board, uint8_t status, spin_unlock_irqrestore(&board->spinlock, flags); } -uint8_t tnt4882_serial_poll_status(gpib_board_t *board) +static uint8_t tnt4882_serial_poll_status(gpib_board_t *board) { struct tnt4882_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -void tnt4882_return_to_local(gpib_board_t *board) +static void tnt4882_return_to_local(gpib_board_t *board) { struct tnt4882_priv *priv = board->private_data; nec7210_return_to_local(board, &priv->nec7210_priv); } +static void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, gpib_board_t *board) +{ + struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; + + tnt_priv->imr0_bits = 0; + tnt_writeb(tnt_priv, tnt_priv->imr0_bits, IMR0); + tnt_priv->imr3_bits = 0; + tnt_writeb(tnt_priv, tnt_priv->imr3_bits, IMR3); + tnt_readb(tnt_priv, IMR0); + tnt_readb(tnt_priv, IMR3); + nec7210_board_reset(nec_priv, board); +} + +static int tnt4882_allocate_private(gpib_board_t *board) +{ + struct tnt4882_priv *tnt_priv; + + board->private_data = kmalloc(sizeof(struct tnt4882_priv), GFP_KERNEL); + if (!board->private_data) + return -1; + tnt_priv = board->private_data; + memset(tnt_priv, 0, sizeof(struct tnt4882_priv)); + init_nec7210_private(&tnt_priv->nec7210_priv); + return 0; +} + +static void tnt4882_free_private(gpib_board_t *board) +{ + kfree(board->private_data); + board->private_data = NULL; +} + +static void tnt4882_init(struct tnt4882_priv *tnt_priv, const gpib_board_t *board) +{ + struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; + + /* Turbo488 software reset */ + tnt_writeb(tnt_priv, SOFT_RESET, CMDR); + udelay(1); + + // turn off one-chip mode + tnt_writeb(tnt_priv, NODMA, HSSEL); + tnt_writeb(tnt_priv, 0, ACCWR); + // make sure we are in 7210 mode + tnt_writeb(tnt_priv, AUX_7210, AUXCR); + udelay(1); + // registers might be swapped, so write it to the swapped address too + tnt_writeb(tnt_priv, AUX_7210, SWAPPED_AUXCR); + udelay(1); + // turn on one-chip mode + if (nec_priv->type == TNT4882 || nec_priv->type == TNT5004) + tnt_writeb(tnt_priv, NODMA | TNT_ONE_CHIP_BIT, HSSEL); + else + tnt_writeb(tnt_priv, NODMA, HSSEL); + + nec7210_board_reset(nec_priv, board); + // read-clear isr0 + tnt_readb(tnt_priv, ISR0); + + // enable passing of nat4882 interrupts + tnt_priv->imr3_bits = HR_TLCI; + tnt_writeb(tnt_priv, tnt_priv->imr3_bits, IMR3); + + // enable interrupt + tnt_writeb(tnt_priv, 0x1, INTRT); + + // force immediate holdoff + write_byte(&tnt_priv->nec7210_priv, AUX_HLDI, AUXMR); + + set_bit(RFD_HOLDOFF_BN, &nec_priv->state); + + tnt_priv->auxg_bits = AUXRG | NTNL_BIT; + write_byte(&tnt_priv->nec7210_priv, tnt_priv->auxg_bits, AUXMR); + + nec7210_board_online(nec_priv, board); + // enable interface clear interrupt for event queue + tnt_priv->imr0_bits = TNT_IMR0_ALWAYS_BITS | TNT_ATNI_BIT | TNT_IFCIE_BIT; + tnt_writeb(tnt_priv, tnt_priv->imr0_bits, IMR0); +} + +static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +{ + struct tnt4882_priv *tnt_priv; + struct nec7210_priv *nec_priv; + int isr_flags = IRQF_SHARED; + int retval; + struct mite_struct *mite; + + board->status = 0; + + if (tnt4882_allocate_private(board)) + return -ENOMEM; + tnt_priv = board->private_data; + nec_priv = &tnt_priv->nec7210_priv; + nec_priv->type = TNT4882; + nec_priv->read_byte = nec7210_locking_iomem_read_byte; + nec_priv->write_byte = nec7210_locking_iomem_write_byte; + nec_priv->offset = atgpib_reg_offset; + + if (!mite_devices) { + pr_err("no National Instruments PCI boards found\n"); + return -1; + } + + for (mite = mite_devices; mite; mite = mite->next) { + short found_board; + + if (mite->used) + continue; + if (config->pci_bus >= 0 && config->pci_bus != mite->pcidev->bus->number) + continue; + if (config->pci_slot >= 0 && config->pci_slot != PCI_SLOT(mite->pcidev->devfn)) + continue; + switch (mite_device_id(mite)) { + case PCI_DEVICE_ID_NI_GPIB: + case PCI_DEVICE_ID_NI_GPIB_PLUS: + case PCI_DEVICE_ID_NI_GPIB_PLUS2: + case PCI_DEVICE_ID_NI_PXIGPIB: + case PCI_DEVICE_ID_NI_PMCGPIB: + case PCI_DEVICE_ID_NI_PCIEGPIB: + case PCI_DEVICE_ID_NI_PCIE2GPIB: +// support for Measurement Computing PCI-488 + case PCI_DEVICE_ID_MC_PCI488: + case PCI_DEVICE_ID_CEC_NI_GPIB: + found_board = 1; + break; + default: + found_board = 0; + break; + } + if (found_board) + break; + } + if (!mite) { + pr_err("no NI PCI-GPIB boards found\n"); + return -1; + } + tnt_priv->mite = mite; + retval = mite_setup(tnt_priv->mite); + if (retval < 0) { + pr_err("tnt4882: error setting up mite.\n"); + return retval; + } + + nec_priv->mmiobase = tnt_priv->mite->daq_io_addr; + + // get irq + if (request_irq(mite_irq(tnt_priv->mite), tnt4882_interrupt, isr_flags, + "ni-pci-gpib", board)) { + pr_err("gpib: can't request IRQ %d\n", mite_irq(tnt_priv->mite)); + return -1; + } + tnt_priv->irq = mite_irq(tnt_priv->mite); + pr_info("tnt4882: irq %i\n", tnt_priv->irq); + + // TNT5004 detection + switch (tnt_readb(tnt_priv, CSR) & 0xf0) { + case 0x30: + nec_priv->type = TNT4882; + pr_info("tnt4882: TNT4882 chipset detected\n"); + break; + case 0x40: + nec_priv->type = TNT5004; + pr_info("tnt4882: TNT5004 chipset detected\n"); + break; + } + tnt4882_init(tnt_priv, board); + + return 0; +} + +static void ni_pci_detach(gpib_board_t *board) +{ + struct tnt4882_priv *tnt_priv = board->private_data; + struct nec7210_priv *nec_priv; + + if (tnt_priv) { + nec_priv = &tnt_priv->nec7210_priv; + + if (nec_priv->mmiobase) + tnt4882_board_reset(tnt_priv, board); + if (tnt_priv->irq) + free_irq(tnt_priv->irq, board); + if (tnt_priv->mite) + mite_unsetup(tnt_priv->mite); + } + tnt4882_free_private(board); +} + +static int ni_isapnp_find(struct pnp_dev **dev) +{ + *dev = pnp_find_dev(NULL, ISAPNP_VENDOR_ID_NI, + ISAPNP_FUNCTION(ISAPNP_ID_NI_ATGPIB_TNT), NULL); + if (!*dev || !(*dev)->card) { + pr_err("tnt4882: failed to find isapnp board\n"); + return -ENODEV; + } + if (pnp_device_attach(*dev) < 0) { + pr_err("tnt4882: atgpib/tnt board already active, skipping\n"); + return -EBUSY; + } + if (pnp_activate_dev(*dev) < 0) { + pnp_device_detach(*dev); + pr_err("tnt4882: failed to activate() atgpib/tnt, aborting\n"); + return -EAGAIN; + } + if (!pnp_port_valid(*dev, 0) || !pnp_irq_valid(*dev, 0)) { + pnp_device_detach(*dev); + pr_err("tnt4882: invalid port or irq for atgpib/tnt, aborting\n"); + return -ENOMEM; + } + return 0; +} + +static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t *config, + enum nec7210_chipset chipset) +{ + struct tnt4882_priv *tnt_priv; + struct nec7210_priv *nec_priv; + int isr_flags = 0; + u32 iobase; + int irq; + + board->status = 0; + + if (tnt4882_allocate_private(board)) + return -ENOMEM; + tnt_priv = board->private_data; + nec_priv = &tnt_priv->nec7210_priv; + nec_priv->type = chipset; + nec_priv->read_byte = nec7210_locking_ioport_read_byte; + nec_priv->write_byte = nec7210_locking_ioport_write_byte; + nec_priv->offset = atgpib_reg_offset; + + // look for plug-n-play board + if (config->ibbase == 0) { + struct pnp_dev *dev; + int retval; + + retval = ni_isapnp_find(&dev); + if (retval < 0) + return retval; + tnt_priv->pnp_dev = dev; + iobase = pnp_port_start(dev, 0); + irq = pnp_irq(dev, 0); + } else { + iobase = config->ibbase; + irq = config->ibirq; + } + // allocate ioports + if (!request_region(iobase, atgpib_iosize, "atgpib")) { + pr_err("tnt4882: failed to allocate ioports\n"); + return -1; + } + nec_priv->mmiobase = ioport_map(iobase, atgpib_iosize); + if (!nec_priv->mmiobase) + return -1; + + // get irq + if (request_irq(irq, tnt4882_interrupt, isr_flags, "atgpib", board)) { + pr_err("gpib: can't request IRQ %d\n", irq); + return -1; + } + tnt_priv->irq = irq; + + tnt4882_init(tnt_priv, board); + + return 0; +} + +static int ni_tnt_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +{ + return ni_isa_attach_common(board, config, TNT4882); +} + +static int ni_nat4882_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +{ + return ni_isa_attach_common(board, config, NAT4882); +} + +static int ni_nec_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +{ + return ni_isa_attach_common(board, config, NEC7210); +} + +static void ni_isa_detach(gpib_board_t *board) +{ + struct tnt4882_priv *tnt_priv = board->private_data; + struct nec7210_priv *nec_priv; + + if (tnt_priv) { + nec_priv = &tnt_priv->nec7210_priv; + if (nec_priv->iobase) + tnt4882_board_reset(tnt_priv, board); + if (tnt_priv->irq) + free_irq(tnt_priv->irq, board); + if (nec_priv->mmiobase) + ioport_unmap(nec_priv->mmiobase); + if (nec_priv->iobase) + release_region(nec_priv->iobase, atgpib_iosize); + if (tnt_priv->pnp_dev) + pnp_device_detach(tnt_priv->pnp_dev); + } + tnt4882_free_private(board); +} + +static int tnt4882_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + return 0; +} + static gpib_interface_t ni_pci_interface = { .name = "ni_pci", .attach = ni_pci_attach, @@ -1114,375 +1369,6 @@ static gpib_interface_t ni_nec_isa_accel_interface = { .return_to_local = tnt4882_return_to_local, }; -#ifdef GPIB_PCMCIA -static gpib_interface_t ni_pcmcia_interface = { - .name = "ni_pcmcia", - .attach = ni_pcmcia_attach, - .detach = ni_pcmcia_detach, - .read = tnt4882_accel_read, - .write = tnt4882_accel_write, - .command = tnt4882_command, - .take_control = tnt4882_take_control, - .go_to_standby = tnt4882_go_to_standby, - .request_system_control = tnt4882_request_system_control, - .interface_clear = tnt4882_interface_clear, - .remote_enable = tnt4882_remote_enable, - .enable_eos = tnt4882_enable_eos, - .disable_eos = tnt4882_disable_eos, - .parallel_poll = tnt4882_parallel_poll, - .parallel_poll_configure = tnt4882_parallel_poll_configure, - .parallel_poll_response = tnt4882_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = tnt4882_line_status, - .update_status = tnt4882_update_status, - .primary_address = tnt4882_primary_address, - .secondary_address = tnt4882_secondary_address, - .serial_poll_response = tnt4882_serial_poll_response, - .serial_poll_status = tnt4882_serial_poll_status, - .t1_delay = tnt4882_t1_delay, - .return_to_local = tnt4882_return_to_local, -}; - -static gpib_interface_t ni_pcmcia_accel_interface = { - .name = "ni_pcmcia_accel", - .attach = ni_pcmcia_attach, - .detach = ni_pcmcia_detach, - .read = tnt4882_accel_read, - .write = tnt4882_accel_write, - .command = tnt4882_command, - .take_control = tnt4882_take_control, - .go_to_standby = tnt4882_go_to_standby, - .request_system_control = tnt4882_request_system_control, - .interface_clear = tnt4882_interface_clear, - .remote_enable = tnt4882_remote_enable, - .enable_eos = tnt4882_enable_eos, - .disable_eos = tnt4882_disable_eos, - .parallel_poll = tnt4882_parallel_poll, - .parallel_poll_configure = tnt4882_parallel_poll_configure, - .parallel_poll_response = tnt4882_parallel_poll_response, - .local_parallel_poll_mode = NULL, // XXX - .line_status = tnt4882_line_status, - .update_status = tnt4882_update_status, - .primary_address = tnt4882_primary_address, - .secondary_address = tnt4882_secondary_address, - .serial_poll_response = tnt4882_serial_poll_response, - .serial_poll_status = tnt4882_serial_poll_status, - .t1_delay = tnt4882_t1_delay, - .return_to_local = tnt4882_return_to_local, -}; -#endif - -void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, gpib_board_t *board) -{ - struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; - - tnt_priv->imr0_bits = 0; - tnt_writeb(tnt_priv, tnt_priv->imr0_bits, IMR0); - tnt_priv->imr3_bits = 0; - tnt_writeb(tnt_priv, tnt_priv->imr3_bits, IMR3); - tnt_readb(tnt_priv, IMR0); - tnt_readb(tnt_priv, IMR3); - nec7210_board_reset(nec_priv, board); -} - -int tnt4882_allocate_private(gpib_board_t *board) -{ - struct tnt4882_priv *tnt_priv; - - board->private_data = kmalloc(sizeof(struct tnt4882_priv), GFP_KERNEL); - if (!board->private_data) - return -1; - tnt_priv = board->private_data; - memset(tnt_priv, 0, sizeof(struct tnt4882_priv)); - init_nec7210_private(&tnt_priv->nec7210_priv); - return 0; -} - -void tnt4882_free_private(gpib_board_t *board) -{ - kfree(board->private_data); - board->private_data = NULL; -} - -void tnt4882_init(struct tnt4882_priv *tnt_priv, const gpib_board_t *board) -{ - struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; - - /* Turbo488 software reset */ - tnt_writeb(tnt_priv, SOFT_RESET, CMDR); - udelay(1); - - // turn off one-chip mode - tnt_writeb(tnt_priv, NODMA, HSSEL); - tnt_writeb(tnt_priv, 0, ACCWR); - // make sure we are in 7210 mode - tnt_writeb(tnt_priv, AUX_7210, AUXCR); - udelay(1); - // registers might be swapped, so write it to the swapped address too - tnt_writeb(tnt_priv, AUX_7210, SWAPPED_AUXCR); - udelay(1); - // turn on one-chip mode - if (nec_priv->type == TNT4882 || nec_priv->type == TNT5004) - tnt_writeb(tnt_priv, NODMA | TNT_ONE_CHIP_BIT, HSSEL); - else - tnt_writeb(tnt_priv, NODMA, HSSEL); - - nec7210_board_reset(nec_priv, board); - // read-clear isr0 - tnt_readb(tnt_priv, ISR0); - - // enable passing of nat4882 interrupts - tnt_priv->imr3_bits = HR_TLCI; - tnt_writeb(tnt_priv, tnt_priv->imr3_bits, IMR3); - - // enable interrupt - tnt_writeb(tnt_priv, 0x1, INTRT); - - // force immediate holdoff - write_byte(&tnt_priv->nec7210_priv, AUX_HLDI, AUXMR); - - set_bit(RFD_HOLDOFF_BN, &nec_priv->state); - - tnt_priv->auxg_bits = AUXRG | NTNL_BIT; - write_byte(&tnt_priv->nec7210_priv, tnt_priv->auxg_bits, AUXMR); - - nec7210_board_online(nec_priv, board); - // enable interface clear interrupt for event queue - tnt_priv->imr0_bits = TNT_IMR0_ALWAYS_BITS | TNT_ATNI_BIT | TNT_IFCIE_BIT; - tnt_writeb(tnt_priv, tnt_priv->imr0_bits, IMR0); -} - -int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) -{ - struct tnt4882_priv *tnt_priv; - struct nec7210_priv *nec_priv; - int isr_flags = IRQF_SHARED; - int retval; - struct mite_struct *mite; - - board->status = 0; - - if (tnt4882_allocate_private(board)) - return -ENOMEM; - tnt_priv = board->private_data; - nec_priv = &tnt_priv->nec7210_priv; - nec_priv->type = TNT4882; - nec_priv->read_byte = nec7210_locking_iomem_read_byte; - nec_priv->write_byte = nec7210_locking_iomem_write_byte; - nec_priv->offset = atgpib_reg_offset; - - if (!mite_devices) { - pr_err("no National Instruments PCI boards found\n"); - return -1; - } - - for (mite = mite_devices; mite; mite = mite->next) { - short found_board; - - if (mite->used) - continue; - if (config->pci_bus >= 0 && config->pci_bus != mite->pcidev->bus->number) - continue; - if (config->pci_slot >= 0 && config->pci_slot != PCI_SLOT(mite->pcidev->devfn)) - continue; - switch (mite_device_id(mite)) { - case PCI_DEVICE_ID_NI_GPIB: - case PCI_DEVICE_ID_NI_GPIB_PLUS: - case PCI_DEVICE_ID_NI_GPIB_PLUS2: - case PCI_DEVICE_ID_NI_PXIGPIB: - case PCI_DEVICE_ID_NI_PMCGPIB: - case PCI_DEVICE_ID_NI_PCIEGPIB: - case PCI_DEVICE_ID_NI_PCIE2GPIB: -// support for Measurement Computing PCI-488 - case PCI_DEVICE_ID_MC_PCI488: - case PCI_DEVICE_ID_CEC_NI_GPIB: - found_board = 1; - break; - default: - found_board = 0; - break; - } - if (found_board) - break; - } - if (!mite) { - pr_err("no NI PCI-GPIB boards found\n"); - return -1; - } - tnt_priv->mite = mite; - retval = mite_setup(tnt_priv->mite); - if (retval < 0) { - pr_err("tnt4882: error setting up mite.\n"); - return retval; - } - - nec_priv->mmiobase = tnt_priv->mite->daq_io_addr; - - // get irq - if (request_irq(mite_irq(tnt_priv->mite), tnt4882_interrupt, isr_flags, - "ni-pci-gpib", board)) { - pr_err("gpib: can't request IRQ %d\n", mite_irq(tnt_priv->mite)); - return -1; - } - tnt_priv->irq = mite_irq(tnt_priv->mite); - pr_info("tnt4882: irq %i\n", tnt_priv->irq); - - // TNT5004 detection - switch (tnt_readb(tnt_priv, CSR) & 0xf0) { - case 0x30: - nec_priv->type = TNT4882; - pr_info("tnt4882: TNT4882 chipset detected\n"); - break; - case 0x40: - nec_priv->type = TNT5004; - pr_info("tnt4882: TNT5004 chipset detected\n"); - break; - } - tnt4882_init(tnt_priv, board); - - return 0; -} - -void ni_pci_detach(gpib_board_t *board) -{ - struct tnt4882_priv *tnt_priv = board->private_data; - struct nec7210_priv *nec_priv; - - if (tnt_priv) { - nec_priv = &tnt_priv->nec7210_priv; - - if (nec_priv->mmiobase) - tnt4882_board_reset(tnt_priv, board); - if (tnt_priv->irq) - free_irq(tnt_priv->irq, board); - if (tnt_priv->mite) - mite_unsetup(tnt_priv->mite); - } - tnt4882_free_private(board); -} - -static int ni_isapnp_find(struct pnp_dev **dev) -{ - *dev = pnp_find_dev(NULL, ISAPNP_VENDOR_ID_NI, - ISAPNP_FUNCTION(ISAPNP_ID_NI_ATGPIB_TNT), NULL); - if (!*dev || !(*dev)->card) { - pr_err("tnt4882: failed to find isapnp board\n"); - return -ENODEV; - } - if (pnp_device_attach(*dev) < 0) { - pr_err("tnt4882: atgpib/tnt board already active, skipping\n"); - return -EBUSY; - } - if (pnp_activate_dev(*dev) < 0) { - pnp_device_detach(*dev); - pr_err("tnt4882: failed to activate() atgpib/tnt, aborting\n"); - return -EAGAIN; - } - if (!pnp_port_valid(*dev, 0) || !pnp_irq_valid(*dev, 0)) { - pnp_device_detach(*dev); - pr_err("tnt4882: invalid port or irq for atgpib/tnt, aborting\n"); - return -ENOMEM; - } - return 0; -} - -static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t *config, - enum nec7210_chipset chipset) -{ - struct tnt4882_priv *tnt_priv; - struct nec7210_priv *nec_priv; - int isr_flags = 0; - u32 iobase; - int irq; - - board->status = 0; - - if (tnt4882_allocate_private(board)) - return -ENOMEM; - tnt_priv = board->private_data; - nec_priv = &tnt_priv->nec7210_priv; - nec_priv->type = chipset; - nec_priv->read_byte = nec7210_locking_ioport_read_byte; - nec_priv->write_byte = nec7210_locking_ioport_write_byte; - nec_priv->offset = atgpib_reg_offset; - - // look for plug-n-play board - if (config->ibbase == 0) { - struct pnp_dev *dev; - int retval; - - retval = ni_isapnp_find(&dev); - if (retval < 0) - return retval; - tnt_priv->pnp_dev = dev; - iobase = pnp_port_start(dev, 0); - irq = pnp_irq(dev, 0); - } else { - iobase = config->ibbase; - irq = config->ibirq; - } - // allocate ioports - if (!request_region(iobase, atgpib_iosize, "atgpib")) { - pr_err("tnt4882: failed to allocate ioports\n"); - return -1; - } - nec_priv->mmiobase = ioport_map(iobase, atgpib_iosize); - if (!nec_priv->mmiobase) - return -1; - - // get irq - if (request_irq(irq, tnt4882_interrupt, isr_flags, "atgpib", board)) { - pr_err("gpib: can't request IRQ %d\n", irq); - return -1; - } - tnt_priv->irq = irq; - - tnt4882_init(tnt_priv, board); - - return 0; -} - -int ni_tnt_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) -{ - return ni_isa_attach_common(board, config, TNT4882); -} - -int ni_nat4882_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) -{ - return ni_isa_attach_common(board, config, NAT4882); -} - -int ni_nec_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) -{ - return ni_isa_attach_common(board, config, NEC7210); -} - -void ni_isa_detach(gpib_board_t *board) -{ - struct tnt4882_priv *tnt_priv = board->private_data; - struct nec7210_priv *nec_priv; - - if (tnt_priv) { - nec_priv = &tnt_priv->nec7210_priv; - if (nec_priv->iobase) - tnt4882_board_reset(tnt_priv, board); - if (tnt_priv->irq) - free_irq(tnt_priv->irq, board); - if (nec_priv->mmiobase) - ioport_unmap(nec_priv->mmiobase); - if (nec_priv->iobase) - release_region(nec_priv->iobase, atgpib_iosize); - if (tnt_priv->pnp_dev) - pnp_device_detach(tnt_priv->pnp_dev); - } - tnt4882_free_private(board); -} - -static int tnt4882_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - return 0; -} - static const struct pci_device_id tnt4882_pci_table[] = { {PCI_DEVICE(PCI_VENDOR_ID_NATINST, PCI_DEVICE_ID_NI_GPIB)}, {PCI_DEVICE(PCI_VENDOR_ID_NATINST, PCI_DEVICE_ID_NI_GPIB_PLUS)}, @@ -1510,6 +1396,13 @@ static const struct pnp_device_id tnt4882_pnp_table[] = { }; MODULE_DEVICE_TABLE(pnp, tnt4882_pnp_table); +#ifdef GPIB_PCMCIA +static gpib_interface_t ni_pcmcia_interface; +static gpib_interface_t ni_pcmcia_accel_interface; +static int __init init_ni_gpib_cs(void); +static void __exit exit_ni_gpib_cs(void); +#endif + static int __init tnt4882_init_module(void) { int result; @@ -1676,7 +1569,6 @@ module_param(pc_debug, int, 0); static int ni_gpib_config(struct pcmcia_device *link); static void ni_gpib_release(struct pcmcia_device *link); -static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config); static void ni_pcmcia_detach(gpib_board_t *board); /* @@ -1854,12 +1746,12 @@ static struct pcmcia_driver ni_gpib_cs_driver = { .resume = ni_gpib_resume, }; -int __init init_ni_gpib_cs(void) +static int __init init_ni_gpib_cs(void) { return pcmcia_register_driver(&ni_gpib_cs_driver); } -void __exit exit_ni_gpib_cs(void) +static void __exit exit_ni_gpib_cs(void) { DEBUG(0, "ni_gpib_cs: unloading\n"); pcmcia_unregister_driver(&ni_gpib_cs_driver); @@ -1867,7 +1759,7 @@ void __exit exit_ni_gpib_cs(void) static const int pcmcia_gpib_iosize = 32; -int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) { struct local_info_t *info; struct tnt4882_priv *tnt_priv; @@ -1920,7 +1812,7 @@ int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void ni_pcmcia_detach(gpib_board_t *board) +static void ni_pcmcia_detach(gpib_board_t *board) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1941,6 +1833,62 @@ void ni_pcmcia_detach(gpib_board_t *board) tnt4882_free_private(board); } +static gpib_interface_t ni_pcmcia_interface = { + .name = "ni_pcmcia", + .attach = ni_pcmcia_attach, + .detach = ni_pcmcia_detach, + .read = tnt4882_accel_read, + .write = tnt4882_accel_write, + .command = tnt4882_command, + .take_control = tnt4882_take_control, + .go_to_standby = tnt4882_go_to_standby, + .request_system_control = tnt4882_request_system_control, + .interface_clear = tnt4882_interface_clear, + .remote_enable = tnt4882_remote_enable, + .enable_eos = tnt4882_enable_eos, + .disable_eos = tnt4882_disable_eos, + .parallel_poll = tnt4882_parallel_poll, + .parallel_poll_configure = tnt4882_parallel_poll_configure, + .parallel_poll_response = tnt4882_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = tnt4882_line_status, + .update_status = tnt4882_update_status, + .primary_address = tnt4882_primary_address, + .secondary_address = tnt4882_secondary_address, + .serial_poll_response = tnt4882_serial_poll_response, + .serial_poll_status = tnt4882_serial_poll_status, + .t1_delay = tnt4882_t1_delay, + .return_to_local = tnt4882_return_to_local, +}; + +static gpib_interface_t ni_pcmcia_accel_interface = { + .name = "ni_pcmcia_accel", + .attach = ni_pcmcia_attach, + .detach = ni_pcmcia_detach, + .read = tnt4882_accel_read, + .write = tnt4882_accel_write, + .command = tnt4882_command, + .take_control = tnt4882_take_control, + .go_to_standby = tnt4882_go_to_standby, + .request_system_control = tnt4882_request_system_control, + .interface_clear = tnt4882_interface_clear, + .remote_enable = tnt4882_remote_enable, + .enable_eos = tnt4882_enable_eos, + .disable_eos = tnt4882_disable_eos, + .parallel_poll = tnt4882_parallel_poll, + .parallel_poll_configure = tnt4882_parallel_poll_configure, + .parallel_poll_response = tnt4882_parallel_poll_response, + .local_parallel_poll_mode = NULL, // XXX + .line_status = tnt4882_line_status, + .update_status = tnt4882_update_status, + .primary_address = tnt4882_primary_address, + .secondary_address = tnt4882_secondary_address, + .serial_poll_response = tnt4882_serial_poll_response, + .serial_poll_status = tnt4882_serial_poll_status, + .t1_delay = tnt4882_t1_delay, + .return_to_local = tnt4882_return_to_local, +}; + #endif // GPIB_PCMCIA module_init(tnt4882_init_module); From 03ec050c437bb4e7c5d215bbeedaa93932f13b35 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 24 Jan 2025 11:58:58 +0100 Subject: [PATCH 0022/2157] staging: gpib: Fix pr_err format warning This patch fixes the following compile warning: drivers/staging/gpib/hp_82341/hp_82341.c: In function 'hp_82341_attach': ./include/linux/kern_levels.h:5:25: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'u32' {aka 'unsigned int'} [-Wformat=] It was introduced in commit baf8855c9160 ("staging: gpib: fix address space mixup") but was not detected as the build of the driver depended on BROKEN. Fixes: baf8855c9160 ("staging: gpib: fix address space mixup") Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250124105900.27592-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82341/hp_82341.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 9cd2a2f50ff1..800f99c05566 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -727,7 +727,7 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi for (i = 0; i < hp_82341_num_io_regions; ++i) { start_addr = iobase + i * hp_priv->io_region_offset; if (!request_region(start_addr, hp_82341_region_iosize, "hp_82341")) { - pr_err("hp_82341: failed to allocate io ports 0x%lx-0x%lx\n", + pr_err("hp_82341: failed to allocate io ports 0x%x-0x%x\n", start_addr, start_addr + hp_82341_region_iosize - 1); return -EIO; From fa757a8446b15fc173e587f8d6d278fbc6a2d041 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 24 Jan 2025 11:58:59 +0100 Subject: [PATCH 0023/2157] pnp: isapnp: Export isapnp_read_byte again The build of drivers/staging/gpib/hp_82341 driver was failing with: ERROR: modpost: "isapnp_read_byte" [drivers/staging/gpib/hp_82341/hp_82341.ko] undefined! because the symbol was not exported for modules. There were no errors building with allyesconfig The symbol was removed by Link: https://lore.kernel.org/all/20051030202734.GN4180@stusta.de/ because it was no longer used by any mainline modules. Export the symbol again. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20241014162054.2b91b5af@canb.auug.org.au Cc: Jaroslav Kysela Cc: Rafael Cc: Arnd Bergmann Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250124105900.27592-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/pnp/isapnp/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index c43d8ad02529..d2ff76e74a05 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -843,6 +843,7 @@ EXPORT_SYMBOL(isapnp_protocol); EXPORT_SYMBOL(isapnp_present); EXPORT_SYMBOL(isapnp_cfg_begin); EXPORT_SYMBOL(isapnp_cfg_end); +EXPORT_SYMBOL(isapnp_read_byte); EXPORT_SYMBOL(isapnp_write_byte); static int isapnp_get_resources(struct pnp_dev *dev) From 7b66aae77da56f2eabd92d3fb012d2fb98212bbd Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 24 Jan 2025 11:59:00 +0100 Subject: [PATCH 0024/2157] staging: gpib: Remove depends on BROKEN The build of drivers/staging/gpib/hp_82341 driver was failing with: ERROR: modpost: "isapnp_read_byte" [drivers/staging/gpib/hp_82341/hp_82341.ko] undefined! The driver was marked BROKEN to fix this issue Link: https://lore.kernel.org/all/2024101412-outsider-icing-052e@gregkh/ Remove the dependency on BROKEN Signed-off-by: Dave Penkler Link: https://lore.kernel.org/linux-staging/20250122103859.25499-3-dpenkler@gmail.com/T/#u Link: https://lore.kernel.org/r/20250124105900.27592-4-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/gpib/Kconfig b/drivers/staging/gpib/Kconfig index 81510db3072e..2bf11df122e2 100644 --- a/drivers/staging/gpib/Kconfig +++ b/drivers/staging/gpib/Kconfig @@ -169,7 +169,6 @@ config GPIB_HP82341 tristate "HP82341x" select GPIB_COMMON select GPIB_TMS9914 - depends on BROKEN depends on ISA_BUS || EISA help GPIB driver for HP82341 A/B/C/D boards From 856a2d5946c14a387e9eba12ddc95198efc02d71 Mon Sep 17 00:00:00 2001 From: Madhu M Date: Sat, 18 Jan 2025 22:54:54 +0530 Subject: [PATCH 0025/2157] usb: typec: ucsi: Rename SET_UOM UCSI command to SET_CCOM Rename the SET_UOM UCSI command to SET_CCOM as per the UCSI 3.0 spec. Signed-off-by: Madhu M Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250118172455.701348-1-madhu.m@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/debugfs.c | 2 +- drivers/usb/typec/ucsi/trace.c | 2 +- drivers/usb/typec/ucsi/ucsi.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/debugfs.c b/drivers/usb/typec/ucsi/debugfs.c index 83ff23086d79..638ac41b4b01 100644 --- a/drivers/usb/typec/ucsi/debugfs.c +++ b/drivers/usb/typec/ucsi/debugfs.c @@ -28,7 +28,7 @@ static int ucsi_cmd(void *data, u64 val) ucsi->debugfs->status = 0; switch (UCSI_COMMAND(val)) { - case UCSI_SET_UOM: + case UCSI_SET_CCOM: case UCSI_SET_UOR: case UCSI_SET_PDR: case UCSI_CONNECTOR_RESET: diff --git a/drivers/usb/typec/ucsi/trace.c b/drivers/usb/typec/ucsi/trace.c index cb62ad835761..596a9542d401 100644 --- a/drivers/usb/typec/ucsi/trace.c +++ b/drivers/usb/typec/ucsi/trace.c @@ -12,7 +12,7 @@ static const char * const ucsi_cmd_strs[] = { [UCSI_SET_NOTIFICATION_ENABLE] = "SET_NOTIFICATION_ENABLE", [UCSI_GET_CAPABILITY] = "GET_CAPABILITY", [UCSI_GET_CONNECTOR_CAPABILITY] = "GET_CONNECTOR_CAPABILITY", - [UCSI_SET_UOM] = "SET_UOM", + [UCSI_SET_CCOM] = "SET_CCOM", [UCSI_SET_UOR] = "SET_UOR", [UCSI_SET_PDM] = "SET_PDM", [UCSI_SET_PDR] = "SET_PDR", diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 82735eb34f0e..ebfd8a953a93 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -106,7 +106,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num); #define UCSI_GET_CAPABILITY_SIZE 128 #define UCSI_GET_CONNECTOR_CAPABILITY 0x07 #define UCSI_GET_CONNECTOR_CAPABILITY_SIZE 32 -#define UCSI_SET_UOM 0x08 +#define UCSI_SET_CCOM 0x08 #define UCSI_SET_UOR 0x09 #define UCSI_SET_PDM 0x0a #define UCSI_SET_PDR 0x0b From 9bc3442914692109ac7194449e1a0866ca39b1f1 Mon Sep 17 00:00:00 2001 From: Madhu M Date: Sat, 18 Jan 2025 22:54:55 +0530 Subject: [PATCH 0026/2157] usb: typec: ucsi: Enable UCSI commands in debugfs Enable the UCSI commands UCSI_SET_NEW_CAM, UCSI_GET_ERROR_STATUS, UCSI_GET_CAM_CS, and UCSI_GET_LPM_PPM_INFO support in debugfs to enhance PD/TypeC debugging capability. Signed-off-by: Madhu M Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250118172455.701348-2-madhu.m@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/debugfs.c | 4 ++++ drivers/usb/typec/ucsi/ucsi.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/typec/ucsi/debugfs.c b/drivers/usb/typec/ucsi/debugfs.c index 638ac41b4b01..eae2b18a2d8a 100644 --- a/drivers/usb/typec/ucsi/debugfs.c +++ b/drivers/usb/typec/ucsi/debugfs.c @@ -33,6 +33,7 @@ static int ucsi_cmd(void *data, u64 val) case UCSI_SET_PDR: case UCSI_CONNECTOR_RESET: case UCSI_SET_SINK_PATH: + case UCSI_SET_NEW_CAM: ret = ucsi_send_command(ucsi, val, NULL, 0); break; case UCSI_GET_CAPABILITY: @@ -42,6 +43,9 @@ static int ucsi_cmd(void *data, u64 val) case UCSI_GET_PDOS: case UCSI_GET_CABLE_PROPERTY: case UCSI_GET_CONNECTOR_STATUS: + case UCSI_GET_ERROR_STATUS: + case UCSI_GET_CAM_CS: + case UCSI_GET_LPM_PPM_INFO: ret = ucsi_send_command(ucsi, val, &ucsi->debugfs->response, sizeof(ucsi->debugfs->response)); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index ebfd8a953a93..17061c64cfb7 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -121,7 +121,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num); #define UCSI_GET_CONNECTOR_STATUS_SIZE 152 #define UCSI_GET_ERROR_STATUS 0x13 #define UCSI_GET_PD_MESSAGE 0x15 +#define UCSI_GET_CAM_CS 0x18 #define UCSI_SET_SINK_PATH 0x1c +#define UCSI_GET_LPM_PPM_INFO 0x22 #define UCSI_CONNECTOR_NUMBER(_num_) ((u64)(_num_) << 16) #define UCSI_COMMAND(_cmd_) ((_cmd_) & 0xff) From 7abbfe6e694ee8cdd9455436f17e9aec080d3ab6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 28 Jan 2025 16:06:08 +0100 Subject: [PATCH 0027/2157] usb: gadget: uvc: drop vb2_ops_wait_prepare/finish Since commit 88785982a19d ("media: vb2: use lock if wait_prepare/finish are NULL") it is no longer needed to set the wait_prepare/finish vb2_ops callbacks as long as the lock field in vb2_queue is set. Since the vb2_ops_wait_prepare/finish callbacks already rely on that field, we can safely drop these callbacks. This simplifies the code and this is a step towards the goal of deleting these callbacks. Signed-off-by: Hans Verkuil Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/2fb746b8-31c4-44e0-bf7b-7a0758edf52a@xs4all.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_queue.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index 5eaeae3e2441..9a1bbd79ff5a 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -122,8 +122,6 @@ static const struct vb2_ops uvc_queue_qops = { .queue_setup = uvc_queue_setup, .buf_prepare = uvc_buffer_prepare, .buf_queue = uvc_buffer_queue, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; int uvcg_queue_init(struct uvc_video_queue *queue, struct device *dev, enum v4l2_buf_type type, From 41d5e3806cf589f658f92c75195095df0b66f66a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 28 Jan 2025 20:51:13 +0100 Subject: [PATCH 0028/2157] usb: host: max3421-hcd: Add missing spi_device_id table "maxim,max3421" DT compatible is missing its SPI device ID entry, not allowing module autoloading and leading to the following message: "SPI driver max3421-hcd has no spi_device_id for maxim,max3421" Fix this by adding the spi_device_id table. Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20250128195114.56321-1-alexander.stein@mailbox.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/max3421-hcd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 0881fdd1823e..dcf31a592f5d 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1946,6 +1946,12 @@ max3421_remove(struct spi_device *spi) usb_put_hcd(hcd); } +static const struct spi_device_id max3421_spi_ids[] = { + { "max3421" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, max3421_spi_ids); + static const struct of_device_id max3421_of_match_table[] = { { .compatible = "maxim,max3421", }, {}, @@ -1955,6 +1961,7 @@ MODULE_DEVICE_TABLE(of, max3421_of_match_table); static struct spi_driver max3421_driver = { .probe = max3421_probe, .remove = max3421_remove, + .id_table = max3421_spi_ids, .driver = { .name = "max3421-hcd", .of_match_table = max3421_of_match_table, From 667ecac55861281c1f5e107c8550ae893b3984f6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 20 Jan 2025 11:16:43 +0200 Subject: [PATCH 0029/2157] usb: typec: ucsi: return CCI and message from sync_control callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the drivers emulate or handle some of the commands in the platform-specific way. The code ends up being split between several callbacks, which complicates emulation. In preparation to reworking such drivers, move read_cci() and read_message_in() calls into ucsi_sync_control_common(). Signed-off-by: Dmitry Baryshkov Reviewed-by: Łukasz Bartosik Link: https://lore.kernel.org/r/20250120-ucsi-merge-commands-v2-1-462a1ec22ecc@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/cros_ec_ucsi.c | 5 +++-- drivers/usb/typec/ucsi/ucsi.c | 19 +++++++++++-------- drivers/usb/typec/ucsi/ucsi.h | 6 ++++-- drivers/usb/typec/ucsi/ucsi_acpi.c | 5 +++-- drivers/usb/typec/ucsi/ucsi_ccg.c | 5 +++-- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c index c605c8616726..744f0709a40e 100644 --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c @@ -105,12 +105,13 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) return 0; } -static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd, u32 *cci, + void *data, size_t size) { struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); int ret; - ret = ucsi_sync_control_common(ucsi, cmd); + ret = ucsi_sync_control_common(ucsi, cmd, cci, data, size); switch (ret) { case -EBUSY: /* EC may return -EBUSY if CCI.busy is set. diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index fcf499cc9458..559390a07a4e 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -55,7 +55,8 @@ void ucsi_notify_common(struct ucsi *ucsi, u32 cci) } EXPORT_SYMBOL_GPL(ucsi_notify_common); -int ucsi_sync_control_common(struct ucsi *ucsi, u64 command) +int ucsi_sync_control_common(struct ucsi *ucsi, u64 command, u32 *cci, + void *data, size_t size) { bool ack = UCSI_COMMAND(command) == UCSI_ACK_CC_CI; int ret; @@ -80,6 +81,13 @@ int ucsi_sync_control_common(struct ucsi *ucsi, u64 command) else clear_bit(COMMAND_PENDING, &ucsi->flags); + if (!ret && cci) + ret = ucsi->ops->read_cci(ucsi, cci); + + if (!ret && data && + (*cci & UCSI_CCI_COMMAND_COMPLETE)) + ret = ucsi->ops->read_message_in(ucsi, data, size); + return ret; } EXPORT_SYMBOL_GPL(ucsi_sync_control_common); @@ -95,7 +103,7 @@ static int ucsi_acknowledge(struct ucsi *ucsi, bool conn_ack) ctrl |= UCSI_ACK_CONNECTOR_CHANGE; } - return ucsi->ops->sync_control(ucsi, ctrl); + return ucsi->ops->sync_control(ucsi, ctrl, NULL, NULL, 0); } static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, @@ -108,9 +116,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (size > UCSI_MAX_DATA_LENGTH(ucsi)) return -EINVAL; - ret = ucsi->ops->sync_control(ucsi, command); - if (ucsi->ops->read_cci(ucsi, cci)) - return -EIO; + ret = ucsi->ops->sync_control(ucsi, command, cci, data, size); if (*cci & UCSI_CCI_BUSY) return ucsi_run_command(ucsi, UCSI_CANCEL, cci, NULL, 0, false) ?: -EBUSY; @@ -127,9 +133,6 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, else err = 0; - if (!err && data && UCSI_CCI_LENGTH(*cci)) - err = ucsi->ops->read_message_in(ucsi, data, size); - /* * Don't ACK connection change if there was an error. */ diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 17061c64cfb7..feb012db4c89 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -77,7 +77,8 @@ struct ucsi_operations { int (*read_version)(struct ucsi *ucsi, u16 *version); int (*read_cci)(struct ucsi *ucsi, u32 *cci); int (*read_message_in)(struct ucsi *ucsi, void *val, size_t val_len); - int (*sync_control)(struct ucsi *ucsi, u64 command); + int (*sync_control)(struct ucsi *ucsi, u64 command, u32 *cci, + void *data, size_t size); int (*async_control)(struct ucsi *ucsi, u64 command); bool (*update_altmodes)(struct ucsi *ucsi, struct ucsi_altmode *orig, struct ucsi_altmode *updated); @@ -531,7 +532,8 @@ void ucsi_altmode_update_active(struct ucsi_connector *con); int ucsi_resume(struct ucsi *ucsi); void ucsi_notify_common(struct ucsi *ucsi, u32 cci); -int ucsi_sync_control_common(struct ucsi *ucsi, u64 command); +int ucsi_sync_control_common(struct ucsi *ucsi, u64 command, u32 *cci, + void *data, size_t size); #if IS_ENABLED(CONFIG_POWER_SUPPLY) int ucsi_register_port_psy(struct ucsi_connector *con); diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 5c5515551963..8b02082201ec 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -122,12 +122,13 @@ static int ucsi_gram_read_message_in(struct ucsi *ucsi, void *val, size_t val_le return ret; } -static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command) +static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, + void *data, size_t size) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); int ret; - ret = ucsi_sync_control_common(ucsi, command); + ret = ucsi_sync_control_common(ucsi, command, cci, data, size); if (ret < 0) return ret; diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 740171f24ef9..02ac04a52239 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -628,7 +628,8 @@ static int ucsi_ccg_async_control(struct ucsi *ucsi, u64 command) return ccg_write(uc, reg, (u8 *)&command, sizeof(command)); } -static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command) +static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, + void *data, size_t size) { struct ucsi_ccg *uc = ucsi_get_drvdata(ucsi); struct ucsi_connector *con; @@ -652,7 +653,7 @@ static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command) ucsi_ccg_update_set_new_cam_cmd(uc, con, &command); } - ret = ucsi_sync_control_common(ucsi, command); + ret = ucsi_sync_control_common(ucsi, command, cci, data, size); err_put: pm_runtime_put_sync(uc->dev); From 7f82635494ef3391ff6b542249793c7febf99c3f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 20 Jan 2025 11:16:44 +0200 Subject: [PATCH 0030/2157] usb: typec: ucsi: ccg: move command quirks to ucsi_ccg_sync_control() It is easier to keep all command-specific quirks in a single place. Move them to ucsi_ccg_sync_control() as the code now allows us to return modified messages data. Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250120-ucsi-merge-commands-v2-2-462a1ec22ecc@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 62 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 02ac04a52239..47498ee6cca8 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -222,7 +222,6 @@ struct ucsi_ccg { u16 fw_build; struct work_struct pm_work; - u64 last_cmd_sent; bool has_multiple_dp; struct ucsi_ccg_altmode orig[UCSI_MAX_ALTMODES]; struct ucsi_ccg_altmode updated[UCSI_MAX_ALTMODES]; @@ -538,9 +537,10 @@ static void ucsi_ccg_update_set_new_cam_cmd(struct ucsi_ccg *uc, * first and then vdo=0x3 */ static void ucsi_ccg_nvidia_altmode(struct ucsi_ccg *uc, - struct ucsi_altmode *alt) + struct ucsi_altmode *alt, + u64 command) { - switch (UCSI_ALTMODE_OFFSET(uc->last_cmd_sent)) { + switch (UCSI_ALTMODE_OFFSET(command)) { case NVIDIA_FTB_DP_OFFSET: if (alt[0].mid == USB_TYPEC_NVIDIA_VLINK_DBG_VDO) alt[0].mid = USB_TYPEC_NVIDIA_VLINK_DP_VDO | @@ -578,37 +578,11 @@ static int ucsi_ccg_read_cci(struct ucsi *ucsi, u32 *cci) static int ucsi_ccg_read_message_in(struct ucsi *ucsi, void *val, size_t val_len) { struct ucsi_ccg *uc = ucsi_get_drvdata(ucsi); - struct ucsi_capability *cap; - struct ucsi_altmode *alt; spin_lock(&uc->op_lock); memcpy(val, uc->op_data.message_in, val_len); spin_unlock(&uc->op_lock); - switch (UCSI_COMMAND(uc->last_cmd_sent)) { - case UCSI_GET_CURRENT_CAM: - if (uc->has_multiple_dp) - ucsi_ccg_update_get_current_cam_cmd(uc, (u8 *)val); - break; - case UCSI_GET_ALTERNATE_MODES: - if (UCSI_ALTMODE_RECIPIENT(uc->last_cmd_sent) == - UCSI_RECIPIENT_SOP) { - alt = val; - if (alt[0].svid == USB_TYPEC_NVIDIA_VLINK_SID) - ucsi_ccg_nvidia_altmode(uc, alt); - } - break; - case UCSI_GET_CAPABILITY: - if (uc->fw_build == CCG_FW_BUILD_NVIDIA_TEGRA) { - cap = val; - cap->features &= ~UCSI_CAP_ALT_MODE_DETAILS; - } - break; - default: - break; - } - uc->last_cmd_sent = 0; - return 0; } @@ -639,11 +613,9 @@ static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, mutex_lock(&uc->lock); pm_runtime_get_sync(uc->dev); - uc->last_cmd_sent = command; - - if (UCSI_COMMAND(uc->last_cmd_sent) == UCSI_SET_NEW_CAM && + if (UCSI_COMMAND(command) == UCSI_SET_NEW_CAM && uc->has_multiple_dp) { - con_index = (uc->last_cmd_sent >> 16) & + con_index = (command >> 16) & UCSI_CMD_CONNECTOR_MASK; if (con_index == 0) { ret = -EINVAL; @@ -655,6 +627,30 @@ static int ucsi_ccg_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, ret = ucsi_sync_control_common(ucsi, command, cci, data, size); + switch (UCSI_COMMAND(command)) { + case UCSI_GET_CURRENT_CAM: + if (uc->has_multiple_dp) + ucsi_ccg_update_get_current_cam_cmd(uc, (u8 *)data); + break; + case UCSI_GET_ALTERNATE_MODES: + if (UCSI_ALTMODE_RECIPIENT(command) == UCSI_RECIPIENT_SOP) { + struct ucsi_altmode *alt = data; + + if (alt[0].svid == USB_TYPEC_NVIDIA_VLINK_SID) + ucsi_ccg_nvidia_altmode(uc, alt, command); + } + break; + case UCSI_GET_CAPABILITY: + if (uc->fw_build == CCG_FW_BUILD_NVIDIA_TEGRA) { + struct ucsi_capability *cap = data; + + cap->features &= ~UCSI_CAP_ALT_MODE_DETAILS; + } + break; + default: + break; + } + err_put: pm_runtime_put_sync(uc->dev); mutex_unlock(&uc->lock); From f9cf5401526c5bfb85d91f14664bf75d1889e7d2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 20 Jan 2025 11:16:45 +0200 Subject: [PATCH 0031/2157] usb: typec: ucsi: acpi: move LG Gram quirk to ucsi_gram_sync_control() It is easier to keep all command-specific quirks in a single place. Move them to ucsi_gram_sync_control() as the code now allows us to return modified messages data. Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250120-ucsi-merge-commands-v2-3-462a1ec22ecc@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 8b02082201ec..ada5d0d21ee6 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -99,17 +99,23 @@ static const struct ucsi_operations ucsi_acpi_ops = { .async_control = ucsi_acpi_async_control }; -static int ucsi_gram_read_message_in(struct ucsi *ucsi, void *val, size_t val_len) +static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, + void *val, size_t len) { u16 bogus_change = UCSI_CONSTAT_POWER_LEVEL_CHANGE | UCSI_CONSTAT_PDOS_CHANGE; struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); int ret; - ret = ucsi_acpi_read_message_in(ucsi, val, val_len); + ret = ucsi_sync_control_common(ucsi, command, cci, val, len); if (ret < 0) return ret; + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS && + ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) && + ua->cmd & UCSI_GET_PDOS_SRC_PDOS) + ua->check_bogus_event = true; + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_CONNECTOR_STATUS && ua->check_bogus_event) { /* Clear the bogus change */ @@ -122,28 +128,10 @@ static int ucsi_gram_read_message_in(struct ucsi *ucsi, void *val, size_t val_le return ret; } -static int ucsi_gram_sync_control(struct ucsi *ucsi, u64 command, u32 *cci, - void *data, size_t size) -{ - struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - int ret; - - ret = ucsi_sync_control_common(ucsi, command, cci, data, size); - if (ret < 0) - return ret; - - if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS && - ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) && - ua->cmd & UCSI_GET_PDOS_SRC_PDOS) - ua->check_bogus_event = true; - - return ret; -} - static const struct ucsi_operations ucsi_gram_ops = { .read_version = ucsi_acpi_read_version, .read_cci = ucsi_acpi_read_cci, - .read_message_in = ucsi_gram_read_message_in, + .read_message_in = ucsi_acpi_read_message_in, .sync_control = ucsi_gram_sync_control, .async_control = ucsi_acpi_async_control }; From 9570d99f44c969ebf3bd7d52434a491c1c1db470 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 3 Feb 2025 15:58:41 +0900 Subject: [PATCH 0032/2157] usb: phy: mxs: silence EPROBE_DEFER error on boot Use dev_err_probe to silence EPROBE_DEFER error on boot on i.MX8ULP: [ 0.127301] mxs_phy 29910000.usb-phy: can't get the clock, err=-517 Signed-off-by: Dominique Martinet Link: https://lore.kernel.org/r/20250203-defer_usb2-v3-1-428182286ce3@atmark-techno.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-mxs-usb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index 7490f1798b46..7069dd3f4d0d 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -769,11 +769,9 @@ static int mxs_phy_probe(struct platform_device *pdev) return PTR_ERR(base); clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, - "can't get the clock, err=%ld", PTR_ERR(clk)); - return PTR_ERR(clk); - } + if (IS_ERR(clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(clk), + "can't get the clock\n"); mxs_phy = devm_kzalloc(&pdev->dev, sizeof(*mxs_phy), GFP_KERNEL); if (!mxs_phy) From 42bc7faaf3a0da2adff71e292efe3eb428018c07 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 3 Feb 2025 15:33:49 +0900 Subject: [PATCH 0033/2157] usb: usb251xb: silence EPROBE_DEFER error on boot Use dev_err_probe to silence EPROBE_DEFER error on boot: [ 0.757677] usb251xb 1-002c: failed to get ofdata: -517 Signed-off-by: Dominique Martinet Acked-by: Richard Leitner Link: https://lore.kernel.org/r/20250203-defer_usb1-v2-1-eba405ebee2c@atmark-techno.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb251xb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index e24cdb667307..4fb453ca5450 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -636,10 +636,8 @@ static int usb251xb_probe(struct usb251xb *hub) if (np && usb_data) { err = usb251xb_get_ofdata(hub, usb_data); - if (err) { - dev_err(dev, "failed to get ofdata: %d\n", err); - return err; - } + if (err) + return dev_err_probe(dev, err, "failed to get ofdata\n"); } /* From 51333bfbf18f730a78bbb85895f9c9e4c994d883 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 18 Jan 2025 22:10:17 +0100 Subject: [PATCH 0034/2157] usb: musb: Constify struct musb_fifo_cfg 'struct musb_fifo_cfg' are not modified in these drivers. Constifying these structures moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 64381 5537 202 70120 111e8 drivers/usb/musb/musb_core.o After: ===== text data bss dec hex filename 64957 4929 202 70088 111c8 drivers/usb/musb/musb_core.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/26e6f3e25dc8e785d0034dd7e59918e455563e60.1737234596.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- .../driver-api/usb/writing_musb_glue_layer.rst | 2 +- drivers/usb/musb/jz4740.c | 4 ++-- drivers/usb/musb/mediatek.c | 2 +- drivers/usb/musb/mpfs.c | 2 +- drivers/usb/musb/musb_core.c | 14 +++++++------- drivers/usb/musb/sunxi.c | 4 ++-- include/linux/usb/musb.h | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/driver-api/usb/writing_musb_glue_layer.rst b/Documentation/driver-api/usb/writing_musb_glue_layer.rst index e755c8551bba..0bb96ecdf527 100644 --- a/Documentation/driver-api/usb/writing_musb_glue_layer.rst +++ b/Documentation/driver-api/usb/writing_musb_glue_layer.rst @@ -613,7 +613,7 @@ endpoints configuration from the hardware, so we use line 12 instruction to bypass reading the configuration from silicon, and rely on a hard-coded table that describes the endpoints configuration instead:: - static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { + static const struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, }, diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c index acdeb1117cd3..df56c972986f 100644 --- a/drivers/usb/musb/jz4740.c +++ b/drivers/usb/musb/jz4740.c @@ -59,7 +59,7 @@ static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) return IRQ_NONE; } -static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { +static const struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, }, @@ -205,7 +205,7 @@ static const struct musb_hdrc_platform_data jz4740_musb_pdata = { .platform_ops = &jz4740_musb_ops, }; -static struct musb_fifo_cfg jz4770_musb_fifo_cfg[] = { +static const struct musb_fifo_cfg jz4770_musb_fifo_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c index aa988d74b58d..c6cbe718b1da 100644 --- a/drivers/usb/musb/mediatek.c +++ b/drivers/usb/musb/mediatek.c @@ -365,7 +365,7 @@ static const struct musb_platform_ops mtk_musb_ops = { #define MTK_MUSB_MAX_EP_NUM 8 #define MTK_MUSB_RAM_BITS 11 -static struct musb_fifo_cfg mtk_musb_mode_cfg[] = { +static const struct musb_fifo_cfg mtk_musb_mode_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, diff --git a/drivers/usb/musb/mpfs.c b/drivers/usb/musb/mpfs.c index 7edc8429b274..71e4271cba75 100644 --- a/drivers/usb/musb/mpfs.c +++ b/drivers/usb/musb/mpfs.c @@ -29,7 +29,7 @@ struct mpfs_glue { struct clk *clk; }; -static struct musb_fifo_cfg mpfs_musb_mode_cfg[] = { +static const struct musb_fifo_cfg mpfs_musb_mode_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 7f349f5e781d..96fa700eaed1 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1271,7 +1271,7 @@ MODULE_PARM_DESC(fifo_mode, "initial endpoint configuration"); */ /* mode 0 - fits in 2KB */ -static struct musb_fifo_cfg mode_0_cfg[] = { +static const struct musb_fifo_cfg mode_0_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_RXTX, .maxpacket = 512, }, @@ -1280,7 +1280,7 @@ static struct musb_fifo_cfg mode_0_cfg[] = { }; /* mode 1 - fits in 4KB */ -static struct musb_fifo_cfg mode_1_cfg[] = { +static const struct musb_fifo_cfg mode_1_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, .mode = BUF_DOUBLE, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, .mode = BUF_DOUBLE, }, { .hw_ep_num = 2, .style = FIFO_RXTX, .maxpacket = 512, .mode = BUF_DOUBLE, }, @@ -1289,7 +1289,7 @@ static struct musb_fifo_cfg mode_1_cfg[] = { }; /* mode 2 - fits in 4KB */ -static struct musb_fifo_cfg mode_2_cfg[] = { +static const struct musb_fifo_cfg mode_2_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, @@ -1299,7 +1299,7 @@ static struct musb_fifo_cfg mode_2_cfg[] = { }; /* mode 3 - fits in 4KB */ -static struct musb_fifo_cfg mode_3_cfg[] = { +static const struct musb_fifo_cfg mode_3_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, .mode = BUF_DOUBLE, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, .mode = BUF_DOUBLE, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, @@ -1309,7 +1309,7 @@ static struct musb_fifo_cfg mode_3_cfg[] = { }; /* mode 4 - fits in 16KB */ -static struct musb_fifo_cfg mode_4_cfg[] = { +static const struct musb_fifo_cfg mode_4_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, @@ -1340,7 +1340,7 @@ static struct musb_fifo_cfg mode_4_cfg[] = { }; /* mode 5 - fits in 8KB */ -static struct musb_fifo_cfg mode_5_cfg[] = { +static const struct musb_fifo_cfg mode_5_cfg[] = { { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 512, }, @@ -1447,7 +1447,7 @@ fifo_setup(struct musb *musb, struct musb_hw_ep *hw_ep, return offset + (maxpacket << ((c_size & MUSB_FIFOSZ_DPB) ? 1 : 0)); } -static struct musb_fifo_cfg ep0_cfg = { +static const struct musb_fifo_cfg ep0_cfg = { .style = FIFO_RXTX, .maxpacket = 64, }; diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index eac1cde86be3..a6bd3e968cc7 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -629,7 +629,7 @@ static const struct musb_platform_ops sunxi_musb_ops = { #define SUNXI_MUSB_RAM_BITS 11 /* Allwinner OTG supports up to 5 endpoints */ -static struct musb_fifo_cfg sunxi_musb_mode_cfg_5eps[] = { +static const struct musb_fifo_cfg sunxi_musb_mode_cfg_5eps[] = { MUSB_EP_FIFO_SINGLE(1, FIFO_TX, 512), MUSB_EP_FIFO_SINGLE(1, FIFO_RX, 512), MUSB_EP_FIFO_SINGLE(2, FIFO_TX, 512), @@ -643,7 +643,7 @@ static struct musb_fifo_cfg sunxi_musb_mode_cfg_5eps[] = { }; /* H3/V3s OTG supports only 4 endpoints */ -static struct musb_fifo_cfg sunxi_musb_mode_cfg_4eps[] = { +static const struct musb_fifo_cfg sunxi_musb_mode_cfg_4eps[] = { MUSB_EP_FIFO_SINGLE(1, FIFO_TX, 512), MUSB_EP_FIFO_SINGLE(1, FIFO_RX, 512), MUSB_EP_FIFO_SINGLE(2, FIFO_TX, 512), diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 3963e55e88a3..fbdef950f06c 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -61,7 +61,7 @@ struct musb_hdrc_eps_bits { }; struct musb_hdrc_config { - struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ + const struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ unsigned fifo_cfg_size; /* size of the fifo configuration */ /* MUSB configuration-specific details */ From b51c1e8d2f49342b2087338c72511326fdb7b172 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Fri, 24 Jan 2025 19:40:23 +0000 Subject: [PATCH 0035/2157] usb: typec: thunderbolt: Fix loops that iterate TYPEC_PLUG_SOP_P and TYPEC_PLUG_SOP_PP Fixes these Smatch static checker warnings: drivers/usb/typec/altmodes/thunderbolt.c:116 tbt_altmode_work() warn: why is zero skipped 'i' drivers/usb/typec/altmodes/thunderbolt.c:147 tbt_enter_modes_ordered() warn: why is zero skipped 'i' drivers/usb/typec/altmodes/thunderbolt.c:328 tbt_altmode_remove() warn: why is zero skipped 'i' Fixes: 100e25738659 ("usb: typec: Add driver for Thunderbolt 3 Alternate Mode") Signed-off-by: Benson Leung Reported-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/Z5Psp615abaaId6J@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/thunderbolt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/altmodes/thunderbolt.c b/drivers/usb/typec/altmodes/thunderbolt.c index 1b475b1d98e7..94e47d30e598 100644 --- a/drivers/usb/typec/altmodes/thunderbolt.c +++ b/drivers/usb/typec/altmodes/thunderbolt.c @@ -112,7 +112,7 @@ static void tbt_altmode_work(struct work_struct *work) return; disable_plugs: - for (int i = TYPEC_PLUG_SOP_PP; i > 0; --i) { + for (int i = TYPEC_PLUG_SOP_PP; i >= 0; --i) { if (tbt->plug[i]) typec_altmode_put_plug(tbt->plug[i]); @@ -143,7 +143,7 @@ static int tbt_enter_modes_ordered(struct typec_altmode *alt) if (tbt->plug[TYPEC_PLUG_SOP_P]) { ret = typec_cable_altmode_enter(alt, TYPEC_PLUG_SOP_P, NULL); if (ret < 0) { - for (int i = TYPEC_PLUG_SOP_PP; i > 0; --i) { + for (int i = TYPEC_PLUG_SOP_PP; i >= 0; --i) { if (tbt->plug[i]) typec_altmode_put_plug(tbt->plug[i]); @@ -324,7 +324,7 @@ static void tbt_altmode_remove(struct typec_altmode *alt) { struct tbt_altmode *tbt = typec_altmode_get_drvdata(alt); - for (int i = TYPEC_PLUG_SOP_PP; i > 0; --i) { + for (int i = TYPEC_PLUG_SOP_PP; i >= 0; --i) { if (tbt->plug[i]) typec_altmode_put_plug(tbt->plug[i]); } From 9682c35ff6ecd76d9462d4749b8b413d3e8e605e Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Fri, 24 Jan 2025 19:40:38 +0000 Subject: [PATCH 0036/2157] usb: typec: thunderbolt: Remove IS_ERR check for plug Fixes these Smatch static checker warnings: drivers/usb/typec/altmodes/thunderbolt.c:354 tbt_ready() warn: 'plug' is not an error pointer Fixes: 100e25738659 ("usb: typec: Add driver for Thunderbolt 3 Alternate Mode") Signed-off-by: Benson Leung Reported-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/Z5PstnlA52Z1F2SU@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/thunderbolt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/altmodes/thunderbolt.c b/drivers/usb/typec/altmodes/thunderbolt.c index 94e47d30e598..6eadf7835f8f 100644 --- a/drivers/usb/typec/altmodes/thunderbolt.c +++ b/drivers/usb/typec/altmodes/thunderbolt.c @@ -351,10 +351,10 @@ static bool tbt_ready(struct typec_altmode *alt) */ for (int i = 0; i < TYPEC_PLUG_SOP_PP + 1; i++) { plug = typec_altmode_get_plug(tbt->alt, i); - if (IS_ERR(plug)) + if (!plug) continue; - if (!plug || plug->svid != USB_TYPEC_TBT_SID) + if (plug->svid != USB_TYPEC_TBT_SID) break; plug->desc = "Thunderbolt3"; From 39bc50e00f8c54852e95b6a7fe8afd886709841d Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 8 Jan 2025 13:49:33 +0100 Subject: [PATCH 0037/2157] iio: adc: ad7380: do not use iio_device_claim_direct_scoped anymore Conditionnal scoped handlers are turning out to be a real pain: readability issues, compiler and linker handling issues among others so rollback and remove the scoped version of iio_dvice_claim_direct_mode. To impove code readability factorize code to set oversampling ratio. Signed-off-by: Julien Stephan Link: https://patch.msgid.link/20250108-ad7380-add-alert-support-v4-1-1751802471ba@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 110 ++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 4f32cb22f140..bc7d58850a3e 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -675,15 +675,21 @@ static const struct regmap_config ad7380_regmap_config = { static int ad7380_debugfs_reg_access(struct iio_dev *indio_dev, u32 reg, u32 writeval, u32 *readval) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct ad7380_state *st = iio_priv(indio_dev); + struct ad7380_state *st = iio_priv(indio_dev); + int ret; - if (readval) - return regmap_read(st->regmap, reg, readval); - else - return regmap_write(st->regmap, reg, writeval); - } - unreachable(); + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + if (readval) + ret = regmap_read(st->regmap, reg, readval); + else + ret = regmap_write(st->regmap, reg, writeval); + + iio_device_release_direct_mode(indio_dev); + + return ret; } /* @@ -920,6 +926,7 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, { struct ad7380_state *st = iio_priv(indio_dev); const struct iio_scan_type *scan_type; + int ret; scan_type = iio_get_current_scan_type(indio_dev, chan); @@ -928,11 +935,16 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - return ad7380_read_direct(st, chan->scan_index, - scan_type, val); - } - unreachable(); + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad7380_read_direct(st, chan->scan_index, + scan_type, val); + + iio_device_release_direct_mode(indio_dev); + + return ret; case IIO_CHAN_INFO_SCALE: /* * According to the datasheet, the LSB size is: @@ -1008,47 +1020,59 @@ static int ad7380_osr_to_regval(int ratio) return -EINVAL; } +static int ad7380_set_oversampling_ratio(struct ad7380_state *st, int val) +{ + int ret, osr, boost; + + osr = ad7380_osr_to_regval(val); + if (osr < 0) + return osr; + + /* always enable resolution boost when oversampling is enabled */ + boost = osr > 0 ? 1 : 0; + + ret = regmap_update_bits(st->regmap, + AD7380_REG_ADDR_CONFIG1, + AD7380_CONFIG1_OSR | AD7380_CONFIG1_RES, + FIELD_PREP(AD7380_CONFIG1_OSR, osr) | + FIELD_PREP(AD7380_CONFIG1_RES, boost)); + + if (ret) + return ret; + + st->oversampling_ratio = val; + st->resolution_boost_enabled = boost; + + /* + * Perform a soft reset. This will flush the oversampling + * block and FIFO but will maintain the content of the + * configurable registers. + */ + ret = regmap_update_bits(st->regmap, + AD7380_REG_ADDR_CONFIG2, + AD7380_CONFIG2_RESET, + FIELD_PREP(AD7380_CONFIG2_RESET, + AD7380_CONFIG2_RESET_SOFT)); + return ret; +} static int ad7380_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct ad7380_state *st = iio_priv(indio_dev); - int ret, osr, boost; + int ret; switch (mask) { case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - osr = ad7380_osr_to_regval(val); - if (osr < 0) - return osr; + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; - /* always enable resolution boost when oversampling is enabled */ - boost = osr > 0 ? 1 : 0; + ret = ad7380_set_oversampling_ratio(st, val); - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = regmap_update_bits(st->regmap, - AD7380_REG_ADDR_CONFIG1, - AD7380_CONFIG1_OSR | AD7380_CONFIG1_RES, - FIELD_PREP(AD7380_CONFIG1_OSR, osr) | - FIELD_PREP(AD7380_CONFIG1_RES, boost)); + iio_device_release_direct_mode(indio_dev); - if (ret) - return ret; - - st->oversampling_ratio = val; - st->resolution_boost_enabled = boost; - - /* - * Perform a soft reset. This will flush the oversampling - * block and FIFO but will maintain the content of the - * configurable registers. - */ - return regmap_update_bits(st->regmap, - AD7380_REG_ADDR_CONFIG2, - AD7380_CONFIG2_RESET, - FIELD_PREP(AD7380_CONFIG2_RESET, - AD7380_CONFIG2_RESET_SOFT)); - } - unreachable(); + return ret; default: return -EINVAL; } From 85e5605279dff928f389cdfa1bd9c34d176011d9 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 8 Jan 2025 13:49:34 +0100 Subject: [PATCH 0038/2157] iio: adc: ad7380: enable regmap cache Enable regmap cache, to avoid useless access on spi bus. Reviewed-by: David Lechner Signed-off-by: Julien Stephan Link: https://patch.msgid.link/20250108-ad7380-add-alert-support-v4-2-1751802471ba@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index bc7d58850a3e..b97d2978289e 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -663,6 +663,20 @@ static int ad7380_regmap_reg_read(void *context, unsigned int reg, return 0; } +static const struct reg_default ad7380_reg_defaults[] = { + { AD7380_REG_ADDR_ALERT_LOW_TH, 0x800 }, + { AD7380_REG_ADDR_ALERT_HIGH_TH, 0x7FF }, +}; + +static const struct regmap_range ad7380_volatile_reg_ranges[] = { + regmap_reg_range(AD7380_REG_ADDR_CONFIG2, AD7380_REG_ADDR_ALERT), +}; + +static const struct regmap_access_table ad7380_volatile_regs = { + .yes_ranges = ad7380_volatile_reg_ranges, + .n_yes_ranges = ARRAY_SIZE(ad7380_volatile_reg_ranges), +}; + static const struct regmap_config ad7380_regmap_config = { .reg_bits = 3, .val_bits = 12, @@ -670,6 +684,10 @@ static const struct regmap_config ad7380_regmap_config = { .reg_write = ad7380_regmap_reg_write, .max_register = AD7380_REG_ADDR_ALERT_HIGH_TH, .can_sleep = true, + .reg_defaults = ad7380_reg_defaults, + .num_reg_defaults = ARRAY_SIZE(ad7380_reg_defaults), + .volatile_table = &ad7380_volatile_regs, + .cache_type = REGCACHE_MAPLE, }; static int ad7380_debugfs_reg_access(struct iio_dev *indio_dev, u32 reg, From adc59fe0c2222ee578fe40f4ae7ef6bb380dcd73 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 8 Jan 2025 13:49:35 +0100 Subject: [PATCH 0039/2157] iio: adc: ad7380: do not store osr in private data structure Since regmap cache is now enabled, we don't need to store the oversampling ratio in the private data structure. Reviewed-by: David Lechner Signed-off-by: Julien Stephan Link: https://patch.msgid.link/20250108-ad7380-add-alert-support-v4-3-1751802471ba@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 79 +++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index b97d2978289e..a532de442208 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -582,7 +582,6 @@ struct ad7380_state { const struct ad7380_chip_info *chip_info; struct spi_device *spi; struct regmap *regmap; - unsigned int oversampling_ratio; bool resolution_boost_enabled; unsigned int ch; bool seq; @@ -710,6 +709,36 @@ static int ad7380_debugfs_reg_access(struct iio_dev *indio_dev, u32 reg, return ret; } +/** + * ad7380_regval_to_osr - convert OSR register value to ratio + * @regval: register value to check + * + * Returns: the ratio corresponding to the OSR register. If regval is not in + * bound, return 1 (oversampling disabled) + * + */ +static int ad7380_regval_to_osr(unsigned int regval) +{ + if (regval >= ARRAY_SIZE(ad7380_oversampling_ratios)) + return 1; + + return ad7380_oversampling_ratios[regval]; +} + +static int ad7380_get_osr(struct ad7380_state *st, int *val) +{ + u32 tmp; + int ret; + + ret = regmap_read(st->regmap, AD7380_REG_ADDR_CONFIG1, &tmp); + if (ret) + return ret; + + *val = ad7380_regval_to_osr(FIELD_GET(AD7380_CONFIG1_OSR, tmp)); + + return 0; +} + /* * When switching channel, the ADC require an additional settling time. * According to the datasheet, data is value on the third CS low. We already @@ -725,11 +754,15 @@ static int ad7380_set_ch(struct ad7380_state *st, unsigned int ch) .unit = SPI_DELAY_UNIT_NSECS, } }; - int ret; + int oversampling_ratio, ret; if (st->ch == ch) return 0; + ret = ad7380_get_osr(st, &oversampling_ratio); + if (ret) + return ret; + ret = regmap_update_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, AD7380_CONFIG1_CH, @@ -740,9 +773,9 @@ static int ad7380_set_ch(struct ad7380_state *st, unsigned int ch) st->ch = ch; - if (st->oversampling_ratio > 1) + if (oversampling_ratio > 1) xfer.delay.value = T_CONVERT_0_NS + - T_CONVERT_X_NS * (st->oversampling_ratio - 1) * + T_CONVERT_X_NS * (oversampling_ratio - 1) * st->chip_info->num_simult_channels / AD7380_NUM_SDO_LINES; return spi_sync_transfer(st->spi, &xfer, 1); @@ -753,20 +786,25 @@ static int ad7380_set_ch(struct ad7380_state *st, unsigned int ch) * @st: device instance specific state * @scan_type: current scan type */ -static void ad7380_update_xfers(struct ad7380_state *st, +static int ad7380_update_xfers(struct ad7380_state *st, const struct iio_scan_type *scan_type) { struct spi_transfer *xfer = st->seq ? st->seq_xfer : st->normal_xfer; unsigned int t_convert = T_CONVERT_NS; + int oversampling_ratio, ret; /* * In the case of oversampling, conversion time is higher than in normal * mode. Technically T_CONVERT_X_NS is lower for some chips, but we use * the maximum value for simplicity for now. */ - if (st->oversampling_ratio > 1) + ret = ad7380_get_osr(st, &oversampling_ratio); + if (ret) + return ret; + + if (oversampling_ratio > 1) t_convert = T_CONVERT_0_NS + T_CONVERT_X_NS * - (st->oversampling_ratio - 1) * + (oversampling_ratio - 1) * st->chip_info->num_simult_channels / AD7380_NUM_SDO_LINES; if (st->seq) { @@ -779,7 +817,7 @@ static void ad7380_update_xfers(struct ad7380_state *st, st->chip_info->num_simult_channels; xfer[3].rx_buf = xfer[2].rx_buf + xfer[2].len; /* Additional delay required here when oversampling is enabled */ - if (st->oversampling_ratio > 1) + if (oversampling_ratio > 1) xfer[2].delay.value = t_convert; else xfer[2].delay.value = 0; @@ -791,6 +829,8 @@ static void ad7380_update_xfers(struct ad7380_state *st, xfer[1].len = BITS_TO_BYTES(scan_type->storagebits) * st->chip_info->num_simult_channels; } + + return 0; } static int ad7380_triggered_buffer_preenable(struct iio_dev *indio_dev) @@ -798,6 +838,7 @@ static int ad7380_triggered_buffer_preenable(struct iio_dev *indio_dev) struct ad7380_state *st = iio_priv(indio_dev); const struct iio_scan_type *scan_type; struct spi_message *msg = &st->normal_msg; + int ret; /* * Currently, we always read all channels at the same time. The scan_type @@ -809,7 +850,6 @@ static int ad7380_triggered_buffer_preenable(struct iio_dev *indio_dev) if (st->chip_info->has_mux) { unsigned int index; - int ret; /* * Depending on the requested scan_mask and current state, @@ -840,7 +880,9 @@ static int ad7380_triggered_buffer_preenable(struct iio_dev *indio_dev) } - ad7380_update_xfers(st, scan_type); + ret = ad7380_update_xfers(st, scan_type); + if (ret) + return ret; return spi_optimize_message(st->spi, msg); } @@ -913,7 +955,9 @@ static int ad7380_read_direct(struct ad7380_state *st, unsigned int scan_index, return ret; } - ad7380_update_xfers(st, scan_type); + ret = ad7380_update_xfers(st, scan_type); + if (ret) + return ret; ret = spi_sync(st->spi, &st->normal_msg); if (ret < 0) @@ -991,7 +1035,16 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - *val = st->oversampling_ratio; + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad7380_get_osr(st, val); + + iio_device_release_direct_mode(indio_dev); + + if (ret) + return ret; return IIO_VAL_INT; default: @@ -1058,7 +1111,6 @@ static int ad7380_set_oversampling_ratio(struct ad7380_state *st, int val) if (ret) return ret; - st->oversampling_ratio = val; st->resolution_boost_enabled = boost; /* @@ -1134,7 +1186,6 @@ static int ad7380_init(struct ad7380_state *st, bool external_ref_en) } /* This is the default value after reset. */ - st->oversampling_ratio = 1; st->ch = 0; st->seq = false; From 27d1a4dbe1e150692c39a9056af018cb792234fd Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 8 Jan 2025 13:49:36 +0100 Subject: [PATCH 0040/2157] iio: adc: ad7380: add alert support The alert functionality is an out of range indicator and can be used as an early indicator of an out of bounds conversion result. ALERT_LOW_THRESHOLD and ALERT_HIGH_THRESHOLD registers are common to all channels. When using 1 SDO line (only mode supported by the driver right now), i.e data outputs only on SDOA, SDOB (or SDOD for 4 channels variants) is used as an alert pin. The alert pin is updated at the end of the conversion (set to low if an alert occurs) and is cleared on a falling edge of CS. The ALERT register contains information about the exact alert status: channel and direction. ALERT register can be accessed using debugfs if enabled. User can set high/low thresholds and enable alert detection using the regular iio events attributes: events/in_thresh_falling_value events/in_thresh_rising_value events/thresh_either_en In most use cases, user will hardwire the alert pin to trigger a shutdown. In theory, we could generate userspace IIO events for alerts, but this is not implemented yet for several reasons [1]. This can be implemented later if a real use case actually requires it. Signed-off-by: Julien Stephan [1] https://lore.kernel.org/all/4be16272-5197-4fa1-918c-c4cdfcaee02e@baylibre.com/ Reviewed-by: David Lechner Link: https://patch.msgid.link/20250108-ad7380-add-alert-support-v4-4-1751802471ba@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 192 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index a532de442208..a18dcd664c1b 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -112,6 +113,24 @@ struct ad7380_chip_info { const struct ad7380_timing_specs *timing_specs; }; +static const struct iio_event_spec ad7380_events[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_shared_by_dir = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_shared_by_dir = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_shared_by_all = BIT(IIO_EV_INFO_ENABLE), + }, +}; + enum { AD7380_SCAN_TYPE_NORMAL, AD7380_SCAN_TYPE_RESOLUTION_BOOST, @@ -214,6 +233,8 @@ static const struct iio_scan_type ad7380_scan_type_16_u[] = { .has_ext_scan_type = 1, \ .ext_scan_type = ad7380_scan_type_##bits##_##sign, \ .num_ext_scan_type = ARRAY_SIZE(ad7380_scan_type_##bits##_##sign), \ + .event_spec = ad7380_events, \ + .num_event_specs = ARRAY_SIZE(ad7380_events), \ } #define AD7380_CHANNEL(index, bits, diff, sign) \ @@ -1157,12 +1178,183 @@ static int ad7380_get_current_scan_type(const struct iio_dev *indio_dev, : AD7380_SCAN_TYPE_NORMAL; } +static int ad7380_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ad7380_state *st = iio_priv(indio_dev); + int tmp, ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = regmap_read(st->regmap, AD7380_REG_ADDR_CONFIG1, &tmp); + + iio_device_release_direct_mode(indio_dev); + + if (ret) + return ret; + + return FIELD_GET(AD7380_CONFIG1_ALERTEN, tmp); +} + +static int ad7380_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + bool state) +{ + struct ad7380_state *st = iio_priv(indio_dev); + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = regmap_update_bits(st->regmap, + AD7380_REG_ADDR_CONFIG1, + AD7380_CONFIG1_ALERTEN, + FIELD_PREP(AD7380_CONFIG1_ALERTEN, state)); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad7380_get_alert_th(struct ad7380_state *st, + enum iio_event_direction dir, + int *val) +{ + int ret, tmp; + + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->regmap, + AD7380_REG_ADDR_ALERT_HIGH_TH, + &tmp); + if (ret) + return ret; + + *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp); + return IIO_VAL_INT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->regmap, + AD7380_REG_ADDR_ALERT_LOW_TH, + &tmp); + if (ret) + return ret; + + *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp); + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7380_read_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, + int *val, int *val2) +{ + struct ad7380_state *st = iio_priv(indio_dev); + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad7380_get_alert_th(st, dir, val); + + iio_device_release_direct_mode(indio_dev); + return ret; + default: + return -EINVAL; + } +} + +static int ad7380_set_alert_th(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_direction dir, + int val) +{ + struct ad7380_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; + u16 th; + + /* + * According to the datasheet, + * AD7380_REG_ADDR_ALERT_HIGH_TH[11:0] are the 12 MSB of the + * 16-bits internal alert high register. LSB are set to 0xf. + * AD7380_REG_ADDR_ALERT_LOW_TH[11:0] are the 12 MSB of the + * 16 bits internal alert low register. LSB are set to 0x0. + * + * When alert is enabled the conversion from the adc is compared + * immediately to the alert high/low thresholds, before any + * oversampling. This means that the thresholds are the same for + * normal mode and oversampling mode. + */ + + /* Extract the 12 MSB of val */ + scan_type = iio_get_current_scan_type(indio_dev, chan); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + th = val >> (scan_type->realbits - 12); + + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->regmap, + AD7380_REG_ADDR_ALERT_HIGH_TH, + th); + case IIO_EV_DIR_FALLING: + return regmap_write(st->regmap, + AD7380_REG_ADDR_ALERT_LOW_TH, + th); + default: + return -EINVAL; + } +} + +static int ad7380_write_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, + int val, int val2) +{ + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad7380_set_alert_th(indio_dev, chan, dir, val); + + iio_device_release_direct_mode(indio_dev); + return ret; + default: + return -EINVAL; + } +} + static const struct iio_info ad7380_info = { .read_raw = &ad7380_read_raw, .read_avail = &ad7380_read_avail, .write_raw = &ad7380_write_raw, .get_current_scan_type = &ad7380_get_current_scan_type, .debugfs_reg_access = &ad7380_debugfs_reg_access, + .read_event_config = &ad7380_read_event_config, + .write_event_config = &ad7380_write_event_config, + .read_event_value = &ad7380_read_event_value, + .write_event_value = &ad7380_write_event_value, }; static int ad7380_init(struct ad7380_state *st, bool external_ref_en) From 7ad920ce342997e8a790a937f076383993a5178d Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 8 Jan 2025 13:49:37 +0100 Subject: [PATCH 0041/2157] docs: iio: ad7380: add alert support Add a section for alert support, explaining how user can use iio events attributes to enable alert and set thresholds. Reviewed-by: David Lechner Signed-off-by: Julien Stephan Link: https://patch.msgid.link/20250108-ad7380-add-alert-support-v4-5-1751802471ba@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad7380.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/iio/ad7380.rst b/Documentation/iio/ad7380.rst index c46127700e14..cff688bcc2d9 100644 --- a/Documentation/iio/ad7380.rst +++ b/Documentation/iio/ad7380.rst @@ -92,6 +92,38 @@ must restart iiod using the following command: root:~# systemctl restart iiod +Alert +----- + +2 channels variants of the ad738x family, can use the SDOB line as an alert pin +when configured in 1 SDO line mode. 4 channels variants, can use SDOD as an +alert pin when configured in 1 or 2 SDO line(s) mode, although only 1 SDO line +mode is currently supported by the driver (see `SPI wiring modes`_). + +At the end of a conversion the active-low alert pin gets asserted if the +conversion result exceeds the alert high limit or falls below the alert low +limit. It is cleared, on a falling edge of CS. The alert pin is common to all +channels. + +User can enable alert using the regular iio events attribute: + +.. code-block:: bash + + events/thresh_either_en + +The high and low thresholds are common to all channels and can also be set using +regular iio events attributes: + +.. code-block:: bash + + events/in_thresh_falling_value + events/in_thresh_rising_value + +If debugfs is available, user can read the ALERT register to determine the +faulty channel and direction. + +In most use cases, user will hardwire the alert pin to trigger a shutdown. + Channel selection and sequencer (single-end chips only) ------------------------------------------------------- @@ -144,7 +176,6 @@ Unimplemented features - Rolling average oversampling - Power down mode - CRC indication -- Alert Device buffers From 0302507541a8fdc0fe2805554c2c0e404fd38f4c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Jan 2025 13:58:47 +0100 Subject: [PATCH 0042/2157] dt-bindings: iio: Correct indentation and style in DTS example DTS example in the bindings should be indented with 2- or 4-spaces and aligned with opening '- |', so correct any differences like 3-spaces or mixtures 2- and 4-spaces in one binding. No functional changes here, but saves some comments during reviews of new patches built on existing code. Signed-off-by: Krzysztof Kozlowski Acked-by: Nuno Sa Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250107125848.226899-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../bindings/iio/dac/adi,ad5380.yaml | 18 +++++++++--------- .../iio/humidity/sciosense,ens210.yaml | 12 ++++++------ .../iio/temperature/maxim,max31865.yaml | 18 +++++++++--------- .../bindings/iio/temperature/ti,tmp117.yaml | 6 +++--- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml index 9eb9928500e2..3e323f1a5458 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml @@ -55,18 +55,18 @@ examples: #address-cells = <1>; #size-cells = <0>; dac@0 { - reg = <0>; - compatible = "adi,ad5390-5"; - vref-supply = <&dacvref>; + reg = <0>; + compatible = "adi,ad5390-5"; + vref-supply = <&dacvref>; }; }; - | i2c { - #address-cells = <1>; - #size-cells = <0>; - dac@42 { - reg = <0x42>; - compatible = "adi,ad5380-3"; - }; + #address-cells = <1>; + #size-cells = <0>; + dac@42 { + reg = <0x42>; + compatible = "adi,ad5380-3"; + }; }; ... diff --git a/Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml b/Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml index ed0ea938f7f8..1e25cf781cf1 100644 --- a/Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml +++ b/Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml @@ -43,13 +43,13 @@ additionalProperties: false examples: - | i2c { - #address-cells = <1>; - #size-cells = <0>; + #address-cells = <1>; + #size-cells = <0>; - temperature-sensor@43 { - compatible = "sciosense,ens210"; - reg = <0x43>; - }; + temperature-sensor@43 { + compatible = "sciosense,ens210"; + reg = <0x43>; + }; }; ... diff --git a/Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml b/Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml index 7cc365e0ebc8..7c0c6ab6fc69 100644 --- a/Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml @@ -40,15 +40,15 @@ unevaluatedProperties: false examples: - | spi { - #address-cells = <1>; - #size-cells = <0>; + #address-cells = <1>; + #size-cells = <0>; - temperature-sensor@0 { - compatible = "maxim,max31865"; - reg = <0>; - spi-max-frequency = <400000>; - spi-cpha; - maxim,3-wire; - }; + temperature-sensor@0 { + compatible = "maxim,max31865"; + reg = <0>; + spi-max-frequency = <400000>; + spi-cpha; + maxim,3-wire; + }; }; ... diff --git a/Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml b/Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml index 58aa1542776b..fbba5e934861 100644 --- a/Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml +++ b/Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml @@ -44,8 +44,8 @@ examples: #size-cells = <0>; tmp117@48 { - compatible = "ti,tmp117"; - reg = <0x48>; - vcc-supply = <&pmic_reg_3v3>; + compatible = "ti,tmp117"; + reg = <0x48>; + vcc-supply = <&pmic_reg_3v3>; }; }; From a4a947a74190594a6ebde765d829f216a74c7329 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 9 Jan 2025 12:23:25 -0600 Subject: [PATCH 0043/2157] iio: adc: stm32: Drop unnecessary DT property presence check There's no reason to check for regulator supply property presence before calling devm_regulator_get_optional() as that will return -ENODEV if the supply is not present. Signed-off-by: Rob Herring (Arm) Acked-by: Fabrice Gasnier Tested-by: Fabrice Gasnier Link: https://patch.msgid.link/20250109182325.3973684-2-robh@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 2201ee9987ae..0914148d1a22 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -615,8 +615,7 @@ static int stm32_adc_core_switches_probe(struct device *dev, } /* Booster can be used to supply analog switches (optional) */ - if (priv->cfg->has_syscfg & HAS_VBOOSTER && - of_property_read_bool(np, "booster-supply")) { + if (priv->cfg->has_syscfg & HAS_VBOOSTER) { priv->booster = devm_regulator_get_optional(dev, "booster"); if (IS_ERR(priv->booster)) { ret = PTR_ERR(priv->booster); @@ -628,8 +627,7 @@ static int stm32_adc_core_switches_probe(struct device *dev, } /* Vdd can be used to supply analog switches (optional) */ - if (priv->cfg->has_syscfg & HAS_ANASWVDD && - of_property_read_bool(np, "vdd-supply")) { + if (priv->cfg->has_syscfg & HAS_ANASWVDD) { priv->vdd = devm_regulator_get_optional(dev, "vdd"); if (IS_ERR(priv->vdd)) { ret = PTR_ERR(priv->vdd); From 470cb490d1b75cf25f3139dcf0226967bcc6e217 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 13 Jan 2025 15:43:18 -0600 Subject: [PATCH 0044/2157] iio: adc: ad7173: move fwnode_irq_get_byname() call site Move the call to fwnode_irq_get_byname() from the driver-specific ad7173_fw_parse_device_config() to the shared ad_sd_init() function. The main reason for this is that we want struct ad_sigma_delta_info to be static const data that describes the actual ADC chip, not the application-specific configuration or any runtime state. Previously, this struct was being used to pass the IRQ number to the shared ad_sd_init() function. Now, this is replaced by a boolean flag that is set at compile time and the ad_sd_init() function handles looking up the IRQ number instead. This also has the added benefit that if any other drivers need to make use of this in the future, they just have to set the flag and the shared code will take care of the rest rather than duplicating the code in each driver. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250113-iio-adc-ad7313-fix-non-const-info-struct-v4-1-b63be3ecac4a@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 7 +------ drivers/iio/adc/ad_sigma_delta.c | 11 ++++++++--- include/linux/iio/adc/ad_sigma_delta.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 6c4ed10ae580..bb9cddd8c9d3 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -871,6 +871,7 @@ static const struct ad_sigma_delta_info ad7173_sigma_delta_info = { .disable_one = ad7173_disable_one, .set_mode = ad7173_set_mode, .has_registers = true, + .has_named_irqs = true, .addr_shift = 0, .read_mask = BIT(6), .status_ch_mask = GENMASK(3, 0), @@ -1515,12 +1516,6 @@ static int ad7173_fw_parse_device_config(struct iio_dev *indio_dev) return ret; } - ret = fwnode_irq_get_byname(dev_fwnode(dev), "rdy"); - if (ret < 0) - return dev_err_probe(dev, ret, "Interrupt 'rdy' is required\n"); - - st->sigma_delta_info.irq_line = ret; - return ad7173_fw_parse_channel_config(indio_dev); } diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index d5d81581ab34..10e635fc4fa4 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -801,10 +801,15 @@ int ad_sd_init(struct ad_sigma_delta *sigma_delta, struct iio_dev *indio_dev, spin_lock_init(&sigma_delta->irq_lock); - if (info->irq_line) - sigma_delta->irq_line = info->irq_line; - else + if (info->has_named_irqs) { + sigma_delta->irq_line = fwnode_irq_get_byname(dev_fwnode(&spi->dev), + "rdy"); + if (sigma_delta->irq_line < 0) + return dev_err_probe(&spi->dev, sigma_delta->irq_line, + "Interrupt 'rdy' is required\n"); + } else { sigma_delta->irq_line = spi->irq; + } sigma_delta->rdy_gpiod = devm_gpiod_get_optional(&spi->dev, "rdy", GPIOD_IN); if (IS_ERR(sigma_delta->rdy_gpiod)) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 417073c52380..f242b285081b 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -46,6 +46,7 @@ struct iio_dev; * modify or drop the sample data, it, may be NULL. * @has_registers: true if the device has writable and readable registers, false * if there is just one read-only sample data shift register. + * @has_named_irqs: Set to true if there is more than one IRQ line. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. * @status_ch_mask: Mask for the channel number stored in status register. @@ -53,7 +54,6 @@ struct iio_dev; * be used. * @irq_flags: flags for the interrupt used by the triggered buffer * @num_slots: Number of sequencer slots - * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used * @num_resetclks: Number of SPI clk cycles with MOSI=1 to reset the chip. */ struct ad_sigma_delta_info { @@ -64,13 +64,13 @@ struct ad_sigma_delta_info { int (*disable_one)(struct ad_sigma_delta *, unsigned int chan); int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; + bool has_named_irqs; unsigned int addr_shift; unsigned int read_mask; unsigned int status_ch_mask; unsigned int data_reg; unsigned long irq_flags; unsigned int num_slots; - int irq_line; unsigned int num_resetclks; }; From 4310e15b314009e83241610f993eb466fede77df Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 13 Jan 2025 15:43:19 -0600 Subject: [PATCH 0045/2157] iio: adc: ad7173: don't make copy of ad_sigma_delta_info struct Use two separate static const struct ad_sigma_delta_info instances instead of making a copy for each driver instance. Typically in the IIO subsystem, we use multiple static const instances of the same struct when there are different variants of the same family of devices as opposed to making a copy for each driver instance and modifying it. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250113-iio-adc-ad7313-fix-non-const-info-struct-v4-2-b63be3ecac4a@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 482 +++++++++++++++++++++------------------ 1 file changed, 255 insertions(+), 227 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index bb9cddd8c9d3..8b438c689594 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -171,6 +171,7 @@ struct ad7173_device_info { unsigned int clock; unsigned int id; char *name; + const struct ad_sigma_delta_info *sd_info; bool has_current_inputs; bool has_vincom_input; bool has_temp; @@ -206,7 +207,6 @@ struct ad7173_channel { struct ad7173_state { struct ad_sigma_delta sd; - struct ad_sigma_delta_info sigma_delta_info; const struct ad7173_device_info *info; struct ad7173_channel *channels; struct regulator_bulk_data regulators[3]; @@ -265,228 +265,6 @@ static unsigned int ad4111_current_channel_config[] = { 0x18B, /* 12:IIN3+ 11:IIN3− */ }; -static const struct ad7173_device_info ad4111_device_info = { - .name = "ad4111", - .id = AD4111_ID, - .num_voltage_in_div = 8, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 8, - .num_gpios = 2, - .higher_gpio_bits = true, - .has_temp = true, - .has_vincom_input = true, - .has_input_buf = true, - .has_current_inputs = true, - .has_int_ref = true, - .has_internal_fs_calibration = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad4112_device_info = { - .name = "ad4112", - .id = AD4112_ID, - .num_voltage_in_div = 8, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 8, - .num_gpios = 2, - .higher_gpio_bits = true, - .has_vincom_input = true, - .has_temp = true, - .has_input_buf = true, - .has_current_inputs = true, - .has_int_ref = true, - .has_internal_fs_calibration = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad4113_device_info = { - .name = "ad4113", - .id = AD4113_ID, - .num_voltage_in_div = 8, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 8, - .num_gpios = 2, - .data_reg_only_16bit = true, - .higher_gpio_bits = true, - .has_vincom_input = true, - .has_input_buf = true, - .has_int_ref = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad4114_device_info = { - .name = "ad4114", - .id = AD4114_ID, - .num_voltage_in_div = 16, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 16, - .num_gpios = 4, - .has_vincom_input = true, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_internal_fs_calibration = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad4115_device_info = { - .name = "ad4115", - .id = AD4115_ID, - .num_voltage_in_div = 16, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 16, - .num_gpios = 4, - .has_vincom_input = true, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_internal_fs_calibration = true, - .clock = 8 * HZ_PER_MHZ, - .sinc5_data_rates = ad4115_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad4115_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad4116_device_info = { - .name = "ad4116", - .id = AD4116_ID, - .num_voltage_in_div = 11, - .num_channels = 16, - .num_configs = 8, - .num_voltage_in = 16, - .num_gpios = 4, - .has_vincom_input = true, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_internal_fs_calibration = true, - .clock = 4 * HZ_PER_MHZ, - .sinc5_data_rates = ad4116_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad4116_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7172_2_device_info = { - .name = "ad7172-2", - .id = AD7172_2_ID, - .num_voltage_in = 5, - .num_channels = 4, - .num_configs = 4, - .num_gpios = 2, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_pow_supply_monitoring = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7172_4_device_info = { - .name = "ad7172-4", - .id = AD7172_4_ID, - .num_voltage_in = 9, - .num_channels = 8, - .num_configs = 8, - .num_gpios = 4, - .has_input_buf = true, - .has_ref2 = true, - .has_pow_supply_monitoring = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7173_8_device_info = { - .name = "ad7173-8", - .id = AD7173_ID, - .num_voltage_in = 17, - .num_channels = 16, - .num_configs = 8, - .num_gpios = 4, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_ref2 = true, - .clock = 2 * HZ_PER_MHZ, - .sinc5_data_rates = ad7173_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7175_2_device_info = { - .name = "ad7175-2", - .id = AD7175_2_ID, - .num_voltage_in = 5, - .num_channels = 4, - .num_configs = 4, - .num_gpios = 2, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_pow_supply_monitoring = true, - .clock = 16 * HZ_PER_MHZ, - .sinc5_data_rates = ad7175_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7175_8_device_info = { - .name = "ad7175-8", - .id = AD7175_8_ID, - .num_voltage_in = 17, - .num_channels = 16, - .num_configs = 8, - .num_gpios = 4, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_ref2 = true, - .has_pow_supply_monitoring = true, - .clock = 16 * HZ_PER_MHZ, - .sinc5_data_rates = ad7175_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7176_2_device_info = { - .name = "ad7176-2", - .id = AD7176_ID, - .num_voltage_in = 5, - .num_channels = 4, - .num_configs = 4, - .num_gpios = 2, - .has_int_ref = true, - .clock = 16 * HZ_PER_MHZ, - .sinc5_data_rates = ad7175_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), -}; - -static const struct ad7173_device_info ad7177_2_device_info = { - .name = "ad7177-2", - .id = AD7177_ID, - .num_voltage_in = 5, - .num_channels = 4, - .num_configs = 4, - .num_gpios = 2, - .has_temp = true, - .has_input_buf = true, - .has_int_ref = true, - .has_pow_supply_monitoring = true, - .clock = 16 * HZ_PER_MHZ, - .odr_start_value = AD7177_ODR_START_VALUE, - .sinc5_data_rates = ad7175_sinc5_data_rates, - .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), -}; - static const char *const ad7173_ref_sel_str[] = { [AD7173_SETUP_REF_SEL_EXT_REF] = "vref", [AD7173_SETUP_REF_SEL_EXT_REF2] = "vref2", @@ -864,7 +642,7 @@ static int ad7173_disable_one(struct ad_sigma_delta *sd, unsigned int chan) return ad_sd_write_reg(sd, AD7173_REG_CH(chan), 2, 0); } -static const struct ad_sigma_delta_info ad7173_sigma_delta_info = { +static const struct ad_sigma_delta_info ad7173_sigma_delta_info_4_slots = { .set_channel = ad7173_set_channel, .append_status = ad7173_append_status, .disable_all = ad7173_disable_all, @@ -877,6 +655,258 @@ static const struct ad_sigma_delta_info ad7173_sigma_delta_info = { .status_ch_mask = GENMASK(3, 0), .data_reg = AD7173_REG_DATA, .num_resetclks = 64, + .num_slots = 4, +}; + +static const struct ad_sigma_delta_info ad7173_sigma_delta_info_8_slots = { + .set_channel = ad7173_set_channel, + .append_status = ad7173_append_status, + .disable_all = ad7173_disable_all, + .disable_one = ad7173_disable_one, + .set_mode = ad7173_set_mode, + .has_registers = true, + .has_named_irqs = true, + .addr_shift = 0, + .read_mask = BIT(6), + .status_ch_mask = GENMASK(3, 0), + .data_reg = AD7173_REG_DATA, + .num_resetclks = 64, + .num_slots = 8, +}; + +static const struct ad7173_device_info ad4111_device_info = { + .name = "ad4111", + .id = AD4111_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 8, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 8, + .num_gpios = 2, + .higher_gpio_bits = true, + .has_temp = true, + .has_vincom_input = true, + .has_input_buf = true, + .has_current_inputs = true, + .has_int_ref = true, + .has_internal_fs_calibration = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad4112_device_info = { + .name = "ad4112", + .id = AD4112_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 8, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 8, + .num_gpios = 2, + .higher_gpio_bits = true, + .has_vincom_input = true, + .has_temp = true, + .has_input_buf = true, + .has_current_inputs = true, + .has_int_ref = true, + .has_internal_fs_calibration = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad4113_device_info = { + .name = "ad4113", + .id = AD4113_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 8, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 8, + .num_gpios = 2, + .data_reg_only_16bit = true, + .higher_gpio_bits = true, + .has_vincom_input = true, + .has_input_buf = true, + .has_int_ref = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad4114_device_info = { + .name = "ad4114", + .id = AD4114_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 16, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 16, + .num_gpios = 4, + .has_vincom_input = true, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_internal_fs_calibration = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad4115_device_info = { + .name = "ad4115", + .id = AD4115_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 16, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 16, + .num_gpios = 4, + .has_vincom_input = true, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_internal_fs_calibration = true, + .clock = 8 * HZ_PER_MHZ, + .sinc5_data_rates = ad4115_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad4115_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad4116_device_info = { + .name = "ad4116", + .id = AD4116_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in_div = 11, + .num_channels = 16, + .num_configs = 8, + .num_voltage_in = 16, + .num_gpios = 4, + .has_vincom_input = true, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_internal_fs_calibration = true, + .clock = 4 * HZ_PER_MHZ, + .sinc5_data_rates = ad4116_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad4116_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7172_2_device_info = { + .name = "ad7172-2", + .id = AD7172_2_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in = 5, + .num_channels = 4, + .num_configs = 4, + .num_gpios = 2, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_pow_supply_monitoring = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7172_4_device_info = { + .name = "ad7172-4", + .id = AD7172_4_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in = 9, + .num_channels = 8, + .num_configs = 8, + .num_gpios = 4, + .has_input_buf = true, + .has_ref2 = true, + .has_pow_supply_monitoring = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7173_8_device_info = { + .name = "ad7173-8", + .id = AD7173_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in = 17, + .num_channels = 16, + .num_configs = 8, + .num_gpios = 4, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_ref2 = true, + .clock = 2 * HZ_PER_MHZ, + .sinc5_data_rates = ad7173_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7175_2_device_info = { + .name = "ad7175-2", + .id = AD7175_2_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in = 5, + .num_channels = 4, + .num_configs = 4, + .num_gpios = 2, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_pow_supply_monitoring = true, + .clock = 16 * HZ_PER_MHZ, + .sinc5_data_rates = ad7175_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7175_8_device_info = { + .name = "ad7175-8", + .id = AD7175_8_ID, + .sd_info = &ad7173_sigma_delta_info_8_slots, + .num_voltage_in = 17, + .num_channels = 16, + .num_configs = 8, + .num_gpios = 4, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_ref2 = true, + .has_pow_supply_monitoring = true, + .clock = 16 * HZ_PER_MHZ, + .sinc5_data_rates = ad7175_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7176_2_device_info = { + .name = "ad7176-2", + .id = AD7176_ID, + .sd_info = &ad7173_sigma_delta_info_4_slots, + .num_voltage_in = 5, + .num_channels = 4, + .num_configs = 4, + .num_gpios = 2, + .has_int_ref = true, + .clock = 16 * HZ_PER_MHZ, + .sinc5_data_rates = ad7175_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), +}; + +static const struct ad7173_device_info ad7177_2_device_info = { + .name = "ad7177-2", + .id = AD7177_ID, + .sd_info = &ad7173_sigma_delta_info_4_slots, + .num_voltage_in = 5, + .num_channels = 4, + .num_configs = 4, + .num_gpios = 2, + .has_temp = true, + .has_input_buf = true, + .has_int_ref = true, + .has_pow_supply_monitoring = true, + .clock = 16 * HZ_PER_MHZ, + .odr_start_value = AD7177_ODR_START_VALUE, + .sinc5_data_rates = ad7175_sinc5_data_rates, + .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), }; static int ad7173_setup(struct iio_dev *indio_dev) @@ -1547,9 +1577,7 @@ static int ad7173_probe(struct spi_device *spi) spi->mode = SPI_MODE_3; spi_setup(spi); - st->sigma_delta_info = ad7173_sigma_delta_info; - st->sigma_delta_info.num_slots = st->info->num_configs; - ret = ad_sd_init(&st->sd, indio_dev, spi, &st->sigma_delta_info); + ret = ad_sd_init(&st->sd, indio_dev, spi, st->info->sd_info); if (ret) return ret; From 8ec5a6fc3b58a91635b8c514c7bed4ecf7c60dd2 Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Tue, 14 Jan 2025 12:31:55 -0500 Subject: [PATCH 0046/2157] iio: adc: ad7625: drop BSD license tag The ad7625 driver was submitted under a dual BSD/GPL license, but this isn't a requirement for the code, and adds extra complexity. To make it consistent with similar drivers, drop the BSD tag and leave it as GPL-2.0-only. Suggested-by: Nuno Sa Signed-off-by: Trevor Gamblin Link: https://patch.msgid.link/20250114-ad7625_license-v1-1-6555b7be05ab@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7625.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7625.c b/drivers/iio/adc/ad7625.c index afa9bf4ddf3c..adb4e25dd7ea 100644 --- a/drivers/iio/adc/ad7625.c +++ b/drivers/iio/adc/ad7625.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// SPDX-License-Identifier: (GPL-2.0-only) /* * Analog Devices Inc. AD7625 ADC driver * @@ -680,5 +680,5 @@ module_platform_driver(ad7625_driver); MODULE_AUTHOR("Trevor Gamblin "); MODULE_DESCRIPTION("Analog Devices AD7625 ADC"); -MODULE_LICENSE("Dual BSD/GPL"); +MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("IIO_BACKEND"); From 465c79ad0665fc647c7643b1fd9bdfecf199f627 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 20:27:16 +0100 Subject: [PATCH 0047/2157] iio: Use str_enable_disable-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250114192716.912476-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/dht11.c | 3 ++- drivers/iio/proximity/irsd200.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index c97e25448772..48c59d09eea7 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +100,7 @@ static void dht11_edges_print(struct dht11 *dht11) for (i = 1; i < dht11->num_edges; ++i) { dev_dbg(dht11->dev, "%d: %lld ns %s\n", i, dht11->edges[i].ts - dht11->edges[i - 1].ts, - dht11->edges[i - 1].value ? "high" : "low"); + str_high_low(dht11->edges[i - 1].value)); } } #endif /* CONFIG_DYNAMIC_DEBUG */ diff --git a/drivers/iio/proximity/irsd200.c b/drivers/iio/proximity/irsd200.c index b09d15230111..b0ffd3574013 100644 --- a/drivers/iio/proximity/irsd200.c +++ b/drivers/iio/proximity/irsd200.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -783,7 +784,7 @@ static int irsd200_set_trigger_state(struct iio_trigger *trig, bool state) ret = regmap_field_write(data->regfields[IRS_REGF_INTR_DATA], state); if (ret) { dev_err(data->dev, "Could not %s data interrupt source (%d)\n", - state ? "enable" : "disable", ret); + str_enable_disable(state), ret); } return ret; From 0f3a7135e36d749bb5eb9c30b752d6d652536918 Mon Sep 17 00:00:00 2001 From: Vasiliy Doylov Date: Thu, 16 Jan 2025 16:52:42 +0300 Subject: [PATCH 0048/2157] dt-bindings: iio: accel: mc3230: document mc3510c The MC3510C is a 3 asix digital accelerometer. It handled by the same driver as MC3230. Document it as a trivial device. Acked-by: Krzysztof Kozlowski Signed-off-by: Vasiliy Doylov Link: https://patch.msgid.link/20250116-mainlining-mc3510c-v4-1-a41308b85ec2@gmail.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index fadbd3c041c8..6c34e4c0dcc6 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -187,6 +187,8 @@ properties: - maxim,max6621 # mCube 3-axis 8-bit digital accelerometer - mcube,mc3230 + # mCube 3-axis 8-bit digital accelerometer + - mcube,mc3510c # Measurement Specialities I2C temperature and humidity sensor - meas,htu21 # Measurement Specialities I2C pressure and temperature sensor From e59c6acfd5fa700e6df6cc535f8df14125f3acce Mon Sep 17 00:00:00 2001 From: Vasiliy Doylov Date: Thu, 16 Jan 2025 16:52:43 +0300 Subject: [PATCH 0049/2157] iio: accel: mc3230: add mount matrix support This patch allows to read a mount-matrix device tree property and report to user-space or in-kernel iio clients. Signed-off-by: Vasiliy Doylov Link: https://patch.msgid.link/20250116-mainlining-mc3510c-v4-2-a41308b85ec2@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mc3230.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c index caa40a14a631..48787c0494ae 100644 --- a/drivers/iio/accel/mc3230.c +++ b/drivers/iio/accel/mc3230.c @@ -44,18 +44,34 @@ static const int mc3230_nscale = 115411765; .channel2 = IIO_MOD_##axis, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ + .ext_info = mc3230_ext_info, \ } +struct mc3230_data { + struct i2c_client *client; + struct iio_mount_matrix orientation; +}; + +static const struct iio_mount_matrix * +mc3230_get_mount_matrix(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct mc3230_data *data = iio_priv(indio_dev); + + return &data->orientation; +} + +static const struct iio_chan_spec_ext_info mc3230_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_DIR, mc3230_get_mount_matrix), + { } +}; + static const struct iio_chan_spec mc3230_channels[] = { MC3230_CHANNEL(MC3230_REG_XOUT, X), MC3230_CHANNEL(MC3230_REG_YOUT, Y), MC3230_CHANNEL(MC3230_REG_ZOUT, Z), }; -struct mc3230_data { - struct i2c_client *client; -}; - static int mc3230_set_opcon(struct mc3230_data *data, int opcon) { int ret; @@ -141,6 +157,10 @@ static int mc3230_probe(struct i2c_client *client) if (ret < 0) return ret; + ret = iio_read_mount_matrix(&client->dev, &data->orientation); + if (ret) + return ret; + ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "device_register failed\n"); From c7fee7653ac2c158117652771a8eb38bec384562 Mon Sep 17 00:00:00 2001 From: Vasiliy Doylov Date: Thu, 16 Jan 2025 16:52:44 +0300 Subject: [PATCH 0050/2157] iio: accel: mc3230: add OF match table This will make the driver auto loaded via device-tree. Signed-off-by: Vasiliy Doylov Link: https://patch.msgid.link/20250116-mainlining-mc3510c-v4-3-a41308b85ec2@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mc3230.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c index 48787c0494ae..443b5b30c831 100644 --- a/drivers/iio/accel/mc3230.c +++ b/drivers/iio/accel/mc3230.c @@ -205,9 +205,16 @@ static const struct i2c_device_id mc3230_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, mc3230_i2c_id); +static const struct of_device_id mc3230_of_match[] = { + { .compatible = "mcube,mc3230" }, + { } +}; +MODULE_DEVICE_TABLE(of, mc3230_of_match); + static struct i2c_driver mc3230_driver = { .driver = { .name = "mc3230", + .of_match_table = mc3230_of_match, .pm = pm_sleep_ptr(&mc3230_pm_ops), }, .probe = mc3230_probe, From 4e78ce08dbcd026683d771d8048b28449c94bee5 Mon Sep 17 00:00:00 2001 From: Vasiliy Doylov Date: Thu, 16 Jan 2025 16:52:45 +0300 Subject: [PATCH 0051/2157] iio: accel: mc3230: add multiple devices support Refactor code to support multiple generations of MCUBE devices by defining name, chip id and product code in mc3230_chip_info struct. Signed-off-by: Vasiliy Doylov Link: https://patch.msgid.link/20250116-mainlining-mc3510c-v4-4-a41308b85ec2@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mc3230.c | 52 ++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c index 443b5b30c831..c8d394c3ecf0 100644 --- a/drivers/iio/accel/mc3230.c +++ b/drivers/iio/accel/mc3230.c @@ -22,20 +22,29 @@ #define MC3230_MODE_OPCON_STANDBY 0x03 #define MC3230_REG_CHIP_ID 0x18 -#define MC3230_CHIP_ID 0x01 - #define MC3230_REG_PRODUCT_CODE 0x3b -#define MC3230_PRODUCT_CODE 0x19 /* * The accelerometer has one measurement range: * * -1.5g - +1.5g (8-bit, signed) * - * scale = (1.5 + 1.5) * 9.81 / (2^8 - 1) = 0.115411765 */ -static const int mc3230_nscale = 115411765; +struct mc3230_chip_info { + const char *name; + const u8 chip_id; + const u8 product_code; + const int scale; +}; + +static const struct mc3230_chip_info mc3230_chip_info = { + .name = "mc3230", + .chip_id = 0x01, + .product_code = 0x19, + /* (1.5 + 1.5) * 9.81 / (2^8 - 1) = 0.115411765 */ + .scale = 115411765, +}; #define MC3230_CHANNEL(reg, axis) { \ .type = IIO_ACCEL, \ @@ -48,6 +57,7 @@ static const int mc3230_nscale = 115411765; } struct mc3230_data { + const struct mc3230_chip_info *chip_info; struct i2c_client *client; struct iio_mount_matrix orientation; }; @@ -111,7 +121,7 @@ static int mc3230_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 0; - *val2 = mc3230_nscale; + *val2 = data->chip_info->scale; return IIO_VAL_INT_PLUS_NANO; default: return -EINVAL; @@ -127,15 +137,28 @@ static int mc3230_probe(struct i2c_client *client) int ret; struct iio_dev *indio_dev; struct mc3230_data *data; + const struct mc3230_chip_info *chip_info; + + chip_info = i2c_get_match_data(client); + if (chip_info == NULL) { + dev_err(&client->dev, "failed to get match data"); + return -ENODATA; + } /* First check chip-id and product-id */ ret = i2c_smbus_read_byte_data(client, MC3230_REG_CHIP_ID); - if (ret != MC3230_CHIP_ID) - return (ret < 0) ? ret : -ENODEV; + if (ret != chip_info->chip_id) { + dev_info(&client->dev, + "chip id check fail: 0x%x != 0x%x !\n", + ret, chip_info->chip_id); + } ret = i2c_smbus_read_byte_data(client, MC3230_REG_PRODUCT_CODE); - if (ret != MC3230_PRODUCT_CODE) - return (ret < 0) ? ret : -ENODEV; + if (ret != chip_info->product_code) { + dev_info(&client->dev, + "product code check fail: 0x%x != 0x%x !\n", + ret, chip_info->product_code); + } indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); if (!indio_dev) { @@ -144,11 +167,12 @@ static int mc3230_probe(struct i2c_client *client) } data = iio_priv(indio_dev); + data->chip_info = chip_info; data->client = client; i2c_set_clientdata(client, indio_dev); indio_dev->info = &mc3230_info; - indio_dev->name = "mc3230"; + indio_dev->name = chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = mc3230_channels; indio_dev->num_channels = ARRAY_SIZE(mc3230_channels); @@ -200,13 +224,13 @@ static int mc3230_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(mc3230_pm_ops, mc3230_suspend, mc3230_resume); static const struct i2c_device_id mc3230_i2c_id[] = { - { "mc3230" }, - {} + { "mc3230", (kernel_ulong_t)&mc3230_chip_info }, + { } }; MODULE_DEVICE_TABLE(i2c, mc3230_i2c_id); static const struct of_device_id mc3230_of_match[] = { - { .compatible = "mcube,mc3230" }, + { .compatible = "mcube,mc3230", &mc3230_chip_info }, { } }; MODULE_DEVICE_TABLE(of, mc3230_of_match); From d438fc93ca45a4b31fee5d53a2124c3920892607 Mon Sep 17 00:00:00 2001 From: Vasiliy Doylov Date: Thu, 16 Jan 2025 16:52:46 +0300 Subject: [PATCH 0052/2157] iio: accel: mc3230: add mc3510c support This change integrates mc3510c support into the mc3230 driver. MC3510C uses the same registers as MC3230, but a different value scale. Tested on Huawei MediaPad T3 10 (huawei-agassi) Signed-off-by: Vasiliy Doylov Link: https://patch.msgid.link/20250116-mainlining-mc3510c-v4-5-a41308b85ec2@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mc3230.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c index c8d394c3ecf0..e2853090fa6e 100644 --- a/drivers/iio/accel/mc3230.c +++ b/drivers/iio/accel/mc3230.c @@ -46,6 +46,14 @@ static const struct mc3230_chip_info mc3230_chip_info = { .scale = 115411765, }; +static const struct mc3230_chip_info mc3510c_chip_info = { + .name = "mc3510c", + .chip_id = 0x23, + .product_code = 0x10, + /* Was obtained empirically */ + .scale = 625000000, +}; + #define MC3230_CHANNEL(reg, axis) { \ .type = IIO_ACCEL, \ .address = reg, \ @@ -225,12 +233,14 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mc3230_pm_ops, mc3230_suspend, mc3230_resume); static const struct i2c_device_id mc3230_i2c_id[] = { { "mc3230", (kernel_ulong_t)&mc3230_chip_info }, + { "mc3510c", (kernel_ulong_t)&mc3510c_chip_info }, { } }; MODULE_DEVICE_TABLE(i2c, mc3230_i2c_id); static const struct of_device_id mc3230_of_match[] = { { .compatible = "mcube,mc3230", &mc3230_chip_info }, + { .compatible = "mcube,mc3510c", &mc3510c_chip_info }, { } }; MODULE_DEVICE_TABLE(of, mc3230_of_match); From 7ecbbb5bb8fbec0697142acaf756a5f0e046046b Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Sat, 18 Jan 2025 07:55:49 -0300 Subject: [PATCH 0053/2157] iio: imu: bmi270: add temperature channel The BMI270 IMU includes a temperature sensor. Add a channel for reading the temperature. Signed-off-by: Gustavo Silva Link: https://patch.msgid.link/20250118-bmi270-temp-v2-1-50bc85f36ab2@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270_core.c | 48 +++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index 7fec52e0b486..464dcdd657c4 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,11 @@ #define BMI270_INTERNAL_STATUS_REG 0x21 #define BMI270_INTERNAL_STATUS_MSG_MSK GENMASK(3, 0) #define BMI270_INTERNAL_STATUS_MSG_INIT_OK 0x01 - #define BMI270_INTERNAL_STATUS_AXES_REMAP_ERR_MSK BIT(5) #define BMI270_INTERNAL_STATUS_ODR_50HZ_ERR_MSK BIT(6) +#define BMI270_TEMPERATURE_0_REG 0x22 + #define BMI270_ACC_CONF_REG 0x40 #define BMI270_ACC_CONF_ODR_MSK GENMASK(3, 0) #define BMI270_ACC_CONF_ODR_100HZ 0x08 @@ -69,6 +71,10 @@ #define BMI270_PWR_CTRL_ACCEL_EN_MSK BIT(2) #define BMI270_PWR_CTRL_TEMP_EN_MSK BIT(3) +/* See datasheet section 4.6.14, Temperature Sensor */ +#define BMI270_TEMP_OFFSET 11776 +#define BMI270_TEMP_SCALE 1953125 + #define BMI260_INIT_DATA_FILE "bmi260-init-data.fw" #define BMI270_INIT_DATA_FILE "bmi270-init-data.fw" @@ -109,6 +115,7 @@ EXPORT_SYMBOL_NS_GPL(bmi270_chip_info, "IIO_BMI270"); enum bmi270_sensor_type { BMI270_ACCEL = 0, BMI270_GYRO, + BMI270_TEMP, }; struct bmi270_scale { @@ -136,6 +143,10 @@ static const struct bmi270_scale bmi270_gyro_scale[] = { { 0, 66 }, }; +static const struct bmi270_scale bmi270_temp_scale[] = { + { BMI270_TEMP_SCALE / MICRO, BMI270_TEMP_SCALE % MICRO }, +}; + struct bmi270_scale_item { const struct bmi270_scale *tbl; int num; @@ -150,6 +161,10 @@ static const struct bmi270_scale_item bmi270_scale_table[] = { .tbl = bmi270_gyro_scale, .num = ARRAY_SIZE(bmi270_gyro_scale), }, + [BMI270_TEMP] = { + .tbl = bmi270_temp_scale, + .num = ARRAY_SIZE(bmi270_temp_scale), + }, }; static const struct bmi270_odr bmi270_accel_odr[] = { @@ -255,7 +270,7 @@ static int bmi270_set_scale(struct bmi270_data *data, int chan_type, int uscale) } static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, - int *uscale) + int *scale, int *uscale) { int ret; unsigned int val; @@ -280,6 +295,10 @@ static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, val = FIELD_GET(BMI270_GYR_CONF_RANGE_MSK, val); bmi270_scale_item = bmi270_scale_table[BMI270_GYRO]; break; + case IIO_TEMP: + val = 0; + bmi270_scale_item = bmi270_scale_table[BMI270_TEMP]; + break; default: return -EINVAL; } @@ -287,6 +306,7 @@ static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, if (val >= bmi270_scale_item.num) return -EINVAL; + *scale = bmi270_scale_item.tbl[val].scale; *uscale = bmi270_scale_item.tbl[val].uscale; return 0; } @@ -399,6 +419,9 @@ static int bmi270_get_data(struct bmi270_data *bmi270_device, case IIO_ANGL_VEL: reg = BMI270_ANG_VEL_X_REG + (axis - IIO_MOD_X) * 2; break; + case IIO_TEMP: + reg = BMI270_TEMPERATURE_0_REG; + break; default: return -EINVAL; } @@ -427,9 +450,16 @@ static int bmi270_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - *val = 0; - ret = bmi270_get_scale(bmi270_device, chan->type, val2); + ret = bmi270_get_scale(bmi270_device, chan->type, val, val2); return ret ? ret : IIO_VAL_INT_PLUS_MICRO; + case IIO_CHAN_INFO_OFFSET: + switch (chan->type) { + case IIO_TEMP: + *val = BMI270_TEMP_OFFSET; + return IIO_VAL_INT; + default: + return -EINVAL; + } case IIO_CHAN_INFO_SAMP_FREQ: ret = bmi270_get_odr(bmi270_device, chan->type, val, val2); return ret ? ret : IIO_VAL_INT_PLUS_MICRO; @@ -544,6 +574,13 @@ static const struct iio_chan_spec bmi270_channels[] = { BMI270_ANG_VEL_CHANNEL(X), BMI270_ANG_VEL_CHANNEL(Y), BMI270_ANG_VEL_CHANNEL(Z), + { + .type = IIO_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_OFFSET), + .scan_index = -1, /* No buffer support */ + }, IIO_CHAN_SOFT_TIMESTAMP(BMI270_SCAN_TIMESTAMP), }; @@ -646,7 +683,8 @@ static int bmi270_configure_imu(struct bmi270_data *bmi270_device) ret = regmap_set_bits(regmap, BMI270_PWR_CTRL_REG, BMI270_PWR_CTRL_AUX_EN_MSK | BMI270_PWR_CTRL_GYR_EN_MSK | - BMI270_PWR_CTRL_ACCEL_EN_MSK); + BMI270_PWR_CTRL_ACCEL_EN_MSK | + BMI270_PWR_CTRL_TEMP_EN_MSK); if (ret) return dev_err_probe(dev, ret, "Failed to enable accelerometer and gyroscope"); From 998d20e4e99d909f14d96fdf0bdcf860f7efe3ef Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Wed, 13 Nov 2024 15:52:59 -0500 Subject: [PATCH 0054/2157] iio: adc: ad4695: make ad4695_exit_conversion_mode() more robust Ensure that conversion mode is successfully exited when the command is issued by adding an extra transfer beforehand, matching the minimum CNV high and low times from the AD4695 datasheet. The AD4695 has a quirk where the exit command only works during a conversion, so guarantee this happens by triggering a conversion in ad4695_exit_conversion_mode(). Then make this even more robust by ensuring that the exit command is run at AD4695_REG_ACCESS_SCLK_HZ rather than the bus maximum. Signed-off-by: Trevor Gamblin Reviewed-by: David Lechner Tested-by: David Lechner Link: https://patch.msgid.link/20241113-tgamblin-ad4695_improvements-v2-2-b6bb7c758fc4@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index b79d135a5471..22fdc454b0ce 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -92,6 +92,8 @@ #define AD4695_T_REFBUF_MS 100 #define AD4695_T_REGCONFIG_NS 20 #define AD4695_T_SCK_CNV_DELAY_NS 80 +#define AD4695_T_CNVL_NS 80 +#define AD4695_T_CNVH_NS 10 #define AD4695_REG_ACCESS_SCLK_HZ (10 * MEGA) /* Max number of voltage input channels. */ @@ -364,11 +366,31 @@ static int ad4695_enter_advanced_sequencer_mode(struct ad4695_state *st, u32 n) */ static int ad4695_exit_conversion_mode(struct ad4695_state *st) { - struct spi_transfer xfer = { - .tx_buf = &st->cnv_cmd2, - .len = 1, - .delay.value = AD4695_T_REGCONFIG_NS, - .delay.unit = SPI_DELAY_UNIT_NSECS, + /* + * An extra transfer is needed to trigger a conversion here so + * that we can be 100% sure the command will be processed by the + * ADC, rather than relying on it to be in the correct state + * when this function is called (this chip has a quirk where the + * command only works when reading a conversion, and if the + * previous conversion was already read then it won't work). The + * actual conversion command is then run at the slower + * AD4695_REG_ACCESS_SCLK_HZ speed to guarantee this works. + */ + struct spi_transfer xfers[] = { + { + .delay.value = AD4695_T_CNVL_NS, + .delay.unit = SPI_DELAY_UNIT_NSECS, + .cs_change = 1, + .cs_change_delay.value = AD4695_T_CNVH_NS, + .cs_change_delay.unit = SPI_DELAY_UNIT_NSECS, + }, + { + .speed_hz = AD4695_REG_ACCESS_SCLK_HZ, + .tx_buf = &st->cnv_cmd2, + .len = 1, + .delay.value = AD4695_T_REGCONFIG_NS, + .delay.unit = SPI_DELAY_UNIT_NSECS, + }, }; /* @@ -377,7 +399,7 @@ static int ad4695_exit_conversion_mode(struct ad4695_state *st) */ st->cnv_cmd2 = AD4695_CMD_EXIT_CNV_MODE << 3; - return spi_sync_transfer(st->spi, &xfer, 1); + return spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers)); } static int ad4695_set_ref_voltage(struct ad4695_state *st, int vref_mv) From 1093f83b2cfbca9b30b80ea0152fc78255ef398a Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Wed, 13 Nov 2024 15:53:00 -0500 Subject: [PATCH 0055/2157] iio: adc: ad4695: add custom regmap bus callbacks Add a custom implementation of regmap read/write callbacks using the SPI bus. This allows them to be performed at a lower SCLK rate than data reads. Previously, all SPI transfers were being performed at a lower speed, but with this change sample data is read at the max bus speed while the register reads/writes remain at the lower rate. Also remove .can_multi_write from the AD4695 driver's regmap_configs, as this isn't implemented or needed. For some background context, see: https://lore.kernel.org/linux-iio/20241028163907.00007e12@Huawei.com/ Suggested-by: David Lechner Signed-off-by: Trevor Gamblin Reviewed-by: David Lechner Tested-by: David Lechner Link: https://patch.msgid.link/20241113-tgamblin-ad4695_improvements-v2-3-b6bb7c758fc4@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 +- drivers/iio/adc/ad4695.c | 74 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 849c90203071..a3e8ac569ce4 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -51,9 +51,9 @@ config AD4130 config AD4695 tristate "Analog Device AD4695 ADC Driver" depends on SPI - select REGMAP_SPI select IIO_BUFFER select IIO_TRIGGERED_BUFFER + select REGMAP help Say yes here to build support for Analog Devices AD4695 and similar analog to digital converters (ADC). diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 22fdc454b0ce..13cf01d35301 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -150,6 +150,8 @@ struct ad4695_state { /* Commands to send for single conversion. */ u16 cnv_cmd; u8 cnv_cmd2; + /* Buffer for storing data from regmap bus reads/writes */ + u8 regmap_bus_data[4]; }; static const struct regmap_range ad4695_regmap_rd_ranges[] = { @@ -194,7 +196,6 @@ static const struct regmap_config ad4695_regmap_config = { .max_register = AD4695_REG_AS_SLOT(127), .rd_table = &ad4695_regmap_rd_table, .wr_table = &ad4695_regmap_wr_table, - .can_multi_write = true, }; static const struct regmap_range ad4695_regmap16_rd_ranges[] = { @@ -226,7 +227,67 @@ static const struct regmap_config ad4695_regmap16_config = { .max_register = AD4695_REG_GAIN_IN(15), .rd_table = &ad4695_regmap16_rd_table, .wr_table = &ad4695_regmap16_wr_table, - .can_multi_write = true, +}; + +static int ad4695_regmap_bus_reg_write(void *context, const void *data, + size_t count) +{ + struct ad4695_state *st = context; + struct spi_transfer xfer = { + .speed_hz = AD4695_REG_ACCESS_SCLK_HZ, + .len = count, + .tx_buf = st->regmap_bus_data, + }; + + if (count > ARRAY_SIZE(st->regmap_bus_data)) + return -EINVAL; + + memcpy(st->regmap_bus_data, data, count); + + return spi_sync_transfer(st->spi, &xfer, 1); +} + +static int ad4695_regmap_bus_reg_read(void *context, const void *reg, + size_t reg_size, void *val, + size_t val_size) +{ + struct ad4695_state *st = context; + struct spi_transfer xfers[] = { + { + .speed_hz = AD4695_REG_ACCESS_SCLK_HZ, + .len = reg_size, + .tx_buf = &st->regmap_bus_data[0], + }, { + .speed_hz = AD4695_REG_ACCESS_SCLK_HZ, + .len = val_size, + .rx_buf = &st->regmap_bus_data[2], + }, + }; + int ret; + + if (reg_size > 2) + return -EINVAL; + + if (val_size > 2) + return -EINVAL; + + memcpy(&st->regmap_bus_data[0], reg, reg_size); + + ret = spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers)); + if (ret) + return ret; + + memcpy(val, &st->regmap_bus_data[2], val_size); + + return 0; +} + +static const struct regmap_bus ad4695_regmap_bus = { + .write = ad4695_regmap_bus_reg_write, + .read = ad4695_regmap_bus_reg_read, + .read_flag_mask = 0x80, + .reg_format_endian_default = REGMAP_ENDIAN_BIG, + .val_format_endian_default = REGMAP_ENDIAN_BIG, }; static const struct iio_chan_spec ad4695_channel_template = { @@ -1061,15 +1122,14 @@ static int ad4695_probe(struct spi_device *spi) if (!st->chip_info) return -EINVAL; - /* Registers cannot be read at the max allowable speed */ - spi->max_speed_hz = AD4695_REG_ACCESS_SCLK_HZ; - - st->regmap = devm_regmap_init_spi(spi, &ad4695_regmap_config); + st->regmap = devm_regmap_init(dev, &ad4695_regmap_bus, st, + &ad4695_regmap_config); if (IS_ERR(st->regmap)) return dev_err_probe(dev, PTR_ERR(st->regmap), "Failed to initialize regmap\n"); - st->regmap16 = devm_regmap_init_spi(spi, &ad4695_regmap16_config); + st->regmap16 = devm_regmap_init(dev, &ad4695_regmap_bus, st, + &ad4695_regmap16_config); if (IS_ERR(st->regmap16)) return dev_err_probe(dev, PTR_ERR(st->regmap16), "Failed to initialize regmap16\n"); From 6eaf49f1ba15752f2c13621e5ddf27edf34a35a9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 9 Dec 2024 15:16:24 -0300 Subject: [PATCH 0056/2157] iio: adc: ti-ads124s08: Switch to fsleep() According to Documentation/timers/delay_sleep_functions.rst, fsleep() is the preferred delay function to be used in non-atomic context, so switch to it accordingly. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Fabio Estevam Link: https://patch.msgid.link/20241209181624.1260868-1-festevam@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads124s08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c index f452f57f11c9..77c299bb4ebc 100644 --- a/drivers/iio/adc/ti-ads124s08.c +++ b/drivers/iio/adc/ti-ads124s08.c @@ -184,7 +184,7 @@ static int ads124s_reset(struct iio_dev *indio_dev) if (priv->reset_gpio) { gpiod_set_value_cansleep(priv->reset_gpio, 0); - udelay(200); + fsleep(200); gpiod_set_value_cansleep(priv->reset_gpio, 1); } else { return ads124s_write_cmd(indio_dev, ADS124S08_CMD_RESET); From 32f80e203401da168826a023d667918b936f85a8 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 16 Dec 2024 10:56:37 +0200 Subject: [PATCH 0057/2157] iio: gts: Simplify available scale table build Make available scale building more clear. This hurts the performance quite a bit by looping throgh the scales many times instead of doing everything in one loop. It however simplifies logic by: - decoupling the gain and scale allocations & computations - keeping the temporary 'per_time_gains' table inside the per_time_scales computation function. - separating building the 'all scales' table in own function and doing it based on the already computed per-time scales. Signed-off-by: Matti Vaittinen Tested-by: subhajit.ghosh@tweaklogic.com Link: https://patch.msgid.link/Z1_rRXqdhxhL6wBw@mva-rohm Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-gts-helper.c | 282 ++++++++++++++++---------- 1 file changed, 179 insertions(+), 103 deletions(-) diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c index d70ebe3bf774..200364b42ead 100644 --- a/drivers/iio/industrialio-gts-helper.c +++ b/drivers/iio/industrialio-gts-helper.c @@ -160,16 +160,123 @@ static void iio_gts_purge_avail_scale_table(struct iio_gts *gts) gts->num_avail_all_scales = 0; } +static int scale_eq(int *sc1, int *sc2) +{ + return sc1[0] == sc2[0] && sc1[1] == sc2[1]; +} + +static int scale_smaller(int *sc1, int *sc2) +{ + if (sc1[0] != sc2[0]) + return sc1[0] < sc2[0]; + + /* If integer parts are equal, fixp parts */ + return sc1[1] < sc2[1]; +} + +/* + * Do a single table listing all the unique scales that any combination of + * supported gains and times can provide. + */ +static int do_combined_scaletable(struct iio_gts *gts, + size_t all_scales_tbl_bytes) +{ + int t_idx, i, new_idx; + int **scales = gts->per_time_avail_scale_tables; + int *all_scales = kcalloc(gts->num_itime, all_scales_tbl_bytes, + GFP_KERNEL); + + if (!all_scales) + return -ENOMEM; + /* + * Create table containing all of the supported scales by looping + * through all of the per-time scales and copying the unique scales + * into one sorted table. + * + * We assume all the gains for same integration time were unique. + * It is likely the first time table had greatest time multiplier as + * the times are in the order of preference and greater times are + * usually preferred. Hence we start from the last table which is likely + * to have the smallest total gains. + */ + t_idx = gts->num_itime - 1; + memcpy(all_scales, scales[t_idx], all_scales_tbl_bytes); + new_idx = gts->num_hwgain * 2; + + while (t_idx-- > 0) { + for (i = 0; i < gts->num_hwgain ; i++) { + int *candidate = &scales[t_idx][i * 2]; + int chk; + + if (scale_smaller(candidate, &all_scales[new_idx - 2])) { + all_scales[new_idx] = candidate[0]; + all_scales[new_idx + 1] = candidate[1]; + new_idx += 2; + + continue; + } + for (chk = 0; chk < new_idx; chk += 2) + if (!scale_smaller(candidate, &all_scales[chk])) + break; + + if (scale_eq(candidate, &all_scales[chk])) + continue; + + memmove(&all_scales[chk + 2], &all_scales[chk], + (new_idx - chk) * sizeof(int)); + all_scales[chk] = candidate[0]; + all_scales[chk + 1] = candidate[1]; + new_idx += 2; + } + } + + gts->num_avail_all_scales = new_idx / 2; + gts->avail_all_scales_table = all_scales; + + return 0; +} + +static void iio_gts_free_int_table_array(int **arr, int num_tables) +{ + int i; + + for (i = 0; i < num_tables; i++) + kfree(arr[i]); + + kfree(arr); +} + +static int iio_gts_alloc_int_table_array(int ***arr, int num_tables, int num_table_items) +{ + int i, **tmp; + + tmp = kcalloc(num_tables, sizeof(**arr), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + for (i = 0; i < num_tables; i++) { + tmp[i] = kcalloc(num_table_items, sizeof(int), GFP_KERNEL); + if (!tmp[i]) + goto err_free; + } + + *arr = tmp; + + return 0; +err_free: + iio_gts_free_int_table_array(tmp, i); + + return -ENOMEM; +} + static int iio_gts_gain_cmp(const void *a, const void *b) { return *(int *)a - *(int *)b; } -static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales) +static int fill_and_sort_scaletables(struct iio_gts *gts, int **gains, int **scales) { - int i, j, new_idx, time_idx, ret = 0; - int *all_gains; - size_t gain_bytes; + int i, j, ret; for (i = 0; i < gts->num_itime; i++) { /* @@ -189,73 +296,71 @@ static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales) } } - gain_bytes = array_size(gts->num_hwgain, sizeof(int)); - all_gains = kcalloc(gts->num_itime, gain_bytes, GFP_KERNEL); - if (!all_gains) - return -ENOMEM; + return 0; +} + +static void compute_per_time_gains(struct iio_gts *gts, int **gains) +{ + int i, j; + + for (i = 0; i < gts->num_itime; i++) { + for (j = 0; j < gts->num_hwgain; j++) + gains[i][j] = gts->hwgain_table[j].gain * + gts->itime_table[i].mul; + } +} + +static int compute_per_time_tables(struct iio_gts *gts, int **scales) +{ + int **per_time_gains; + int ret; /* - * We assume all the gains for same integration time were unique. - * It is likely the first time table had greatest time multiplier as - * the times are in the order of preference and greater times are - * usually preferred. Hence we start from the last table which is likely - * to have the smallest total gains. + * Create a temporary array of the 'total gains' for each integration + * time. */ - time_idx = gts->num_itime - 1; - memcpy(all_gains, gains[time_idx], gain_bytes); - new_idx = gts->num_hwgain; + ret = iio_gts_alloc_int_table_array(&per_time_gains, gts->num_itime, + gts->num_hwgain); + if (ret) + return ret; - while (time_idx-- > 0) { - for (j = 0; j < gts->num_hwgain; j++) { - int candidate = gains[time_idx][j]; - int chk; + compute_per_time_gains(gts, per_time_gains); - if (candidate > all_gains[new_idx - 1]) { - all_gains[new_idx] = candidate; - new_idx++; + /* Convert the gains to scales and populate the scale tables */ + ret = fill_and_sort_scaletables(gts, per_time_gains, scales); - continue; - } - for (chk = 0; chk < new_idx; chk++) - if (candidate <= all_gains[chk]) - break; - - if (candidate == all_gains[chk]) - continue; - - memmove(&all_gains[chk + 1], &all_gains[chk], - (new_idx - chk) * sizeof(int)); - all_gains[chk] = candidate; - new_idx++; - } - } - - gts->avail_all_scales_table = kcalloc(new_idx, 2 * sizeof(int), - GFP_KERNEL); - if (!gts->avail_all_scales_table) { - ret = -ENOMEM; - goto free_out; - } - gts->num_avail_all_scales = new_idx; - - for (i = 0; i < gts->num_avail_all_scales; i++) { - ret = iio_gts_total_gain_to_scale(gts, all_gains[i], - >s->avail_all_scales_table[i * 2], - >s->avail_all_scales_table[i * 2 + 1]); - - if (ret) { - kfree(gts->avail_all_scales_table); - gts->num_avail_all_scales = 0; - goto free_out; - } - } - -free_out: - kfree(all_gains); + iio_gts_free_int_table_array(per_time_gains, gts->num_itime); return ret; } +/* + * Create a table of supported scales for each supported integration time. + * This can be used as available_scales by drivers which don't allow scale + * setting to change the integration time to display correct set of scales + * depending on the used integration time. + */ +static int **create_per_time_scales(struct iio_gts *gts) +{ + int **per_time_scales, ret; + + ret = iio_gts_alloc_int_table_array(&per_time_scales, gts->num_itime, + gts->num_hwgain * 2); + if (ret) + return ERR_PTR(ret); + + ret = compute_per_time_tables(gts, per_time_scales); + if (ret) + goto err_out; + + return per_time_scales; + +err_out: + iio_gts_free_int_table_array(per_time_scales, gts->num_itime); + + return ERR_PTR(ret); +} + /** * iio_gts_build_avail_scale_table - create tables of available scales * @gts: Gain time scale descriptor @@ -275,55 +380,26 @@ static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales) */ static int iio_gts_build_avail_scale_table(struct iio_gts *gts) { - int **per_time_gains, **per_time_scales, i, j, ret = -ENOMEM; + int ret, all_scales_tbl_bytes; + int **per_time_scales; - per_time_gains = kcalloc(gts->num_itime, sizeof(*per_time_gains), GFP_KERNEL); - if (!per_time_gains) - return ret; + if (unlikely(check_mul_overflow(gts->num_hwgain, 2 * sizeof(int), + &all_scales_tbl_bytes))) + return -EOVERFLOW; - per_time_scales = kcalloc(gts->num_itime, sizeof(*per_time_scales), GFP_KERNEL); - if (!per_time_scales) - goto free_gains; + per_time_scales = create_per_time_scales(gts); + if (IS_ERR(per_time_scales)) + return PTR_ERR(per_time_scales); - for (i = 0; i < gts->num_itime; i++) { - per_time_scales[i] = kcalloc(gts->num_hwgain, 2 * sizeof(int), - GFP_KERNEL); - if (!per_time_scales[i]) - goto err_free_out; - - per_time_gains[i] = kcalloc(gts->num_hwgain, sizeof(int), - GFP_KERNEL); - if (!per_time_gains[i]) { - kfree(per_time_scales[i]); - goto err_free_out; - } - - for (j = 0; j < gts->num_hwgain; j++) - per_time_gains[i][j] = gts->hwgain_table[j].gain * - gts->itime_table[i].mul; - } - - ret = gain_to_scaletables(gts, per_time_gains, per_time_scales); - if (ret) - goto err_free_out; - - for (i = 0; i < gts->num_itime; i++) - kfree(per_time_gains[i]); - kfree(per_time_gains); gts->per_time_avail_scale_tables = per_time_scales; - return 0; - -err_free_out: - for (i--; i >= 0; i--) { - kfree(per_time_scales[i]); - kfree(per_time_gains[i]); + ret = do_combined_scaletable(gts, all_scales_tbl_bytes); + if (ret) { + iio_gts_free_int_table_array(per_time_scales, gts->num_itime); + return ret; } - kfree(per_time_scales); -free_gains: - kfree(per_time_gains); - return ret; + return 0; } static void iio_gts_us_to_int_micro(int *time_us, int *int_micro_times, From 6cc60bc38e8428544f8f4f12ddb6cc05fc83a7da Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:12 +0100 Subject: [PATCH 0058/2157] iio: dac: adi-axi-dac: modify stream enable Change suggested from the AXI HDL team, modify the function axi_dac_data_stream_enable() to check for interface busy, to avoid possible issues when starting the stream. Fixes: e61d7178429a ("iio: dac: adi-axi-dac: extend features") Reviewed-by: Nuno Sa Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-3-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/adi-axi-dac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index b143f7ed6847..ac871deb8063 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -585,6 +585,14 @@ static int axi_dac_ddr_disable(struct iio_backend *back) static int axi_dac_data_stream_enable(struct iio_backend *back) { struct axi_dac_state *st = iio_backend_get_priv(back); + int ret, val; + + ret = regmap_read_poll_timeout(st->regmap, + AXI_DAC_UI_STATUS_REG, val, + FIELD_GET(AXI_DAC_UI_STATUS_IF_BUSY, val) == 0, + 10, 100 * KILO); + if (ret) + return ret; return regmap_set_bits(st->regmap, AXI_DAC_CUSTOM_CTRL_REG, AXI_DAC_CUSTOM_CTRL_STREAM_ENABLE); From 18423f825015c8efdaf5c8f692657ad3d2d23100 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Jan 2025 16:13:04 +0200 Subject: [PATCH 0059/2157] serial: mpc52xx_uart: Remove legacy PM hook The legacy PM hook was never implemented. If we would like to achieve this, the entire serial subsystem should switch to use runtime PM first. For now, remove unneeded code. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250117141304.592611-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mpc52xx_uart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index 2204cc3e3b07..37eb701b0b46 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -1351,7 +1351,6 @@ static const struct uart_ops mpc52xx_uart_ops = { .startup = mpc52xx_uart_startup, .shutdown = mpc52xx_uart_shutdown, .set_termios = mpc52xx_uart_set_termios, -/* .pm = mpc52xx_uart_pm, Not supported yet */ .type = mpc52xx_uart_type, .release_port = mpc52xx_uart_release_port, .request_port = mpc52xx_uart_request_port, From f0c8814c1c24999aaff2f04b5bdb1d0da2b6a030 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Jan 2025 16:13:12 +0200 Subject: [PATCH 0060/2157] serial: pch_uart: Remove legacy PM hook The legacy PM hook was never implemented. If we would like to achieve this, the entire serial subsystem should switch to use runtime PM first. For now, remove unneeded code. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250117141313.592645-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index c7cee5fee603..508e8c6f01d4 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -1515,7 +1515,6 @@ static const struct uart_ops pch_uart_ops = { .startup = pch_uart_startup, .shutdown = pch_uart_shutdown, .set_termios = pch_uart_set_termios, -/* .pm = pch_uart_pm, Not supported yet */ .type = pch_uart_type, .release_port = pch_uart_release_port, .request_port = pch_uart_request_port, From 42e128c9d5c5b6632c3b182afb7fab0957b6c337 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 22 Jan 2025 01:25:59 +0000 Subject: [PATCH 0061/2157] tty/ldsem: Remove unused ldsem_down_write_trylock ldsem_down_write_trylock() was added in 2013 by commit 4898e640caf0 ("tty: Add timed, writer-prioritized rw semaphore") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250122012559.441006-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldsem.c | 17 ----------------- include/linux/tty_ldisc.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 3be428c16260..4e18031a5ca3 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -366,23 +366,6 @@ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) return __ldsem_down_write_nested(sem, 0, timeout); } -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -int ldsem_down_write_trylock(struct ld_semaphore *sem) -{ - long count = atomic_long_read(&sem->count); - - while ((count & LDSEM_ACTIVE_MASK) == 0) { - if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { - rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); - lock_acquired(&sem->dep_map, _RET_IP_); - return 1; - } - } - return 0; -} - /* * release a read lock */ diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index af01e89074b2..c5cccc3fc1e8 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -39,7 +39,6 @@ do { \ int ldsem_down_read(struct ld_semaphore *sem, long timeout); int ldsem_down_read_trylock(struct ld_semaphore *sem); int ldsem_down_write(struct ld_semaphore *sem, long timeout); -int ldsem_down_write_trylock(struct ld_semaphore *sem); void ldsem_up_read(struct ld_semaphore *sem); void ldsem_up_write(struct ld_semaphore *sem); From 6bc8fbdd71008a85fdd427a6db6e0e779a177007 Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Wed, 22 Jan 2025 15:23:52 +0800 Subject: [PATCH 0062/2157] dt-bindings: serial: Add a new compatible string for UMS9632 The UART IP version of the ums9632 SoC project has been upgraded. UART controller registers have added valid bits to support new features. In order to distinguish different UART IP versions, we use sc9632-uart to represent upgraded IP and sc9836-uart to represent old IP. Signed-off-by: Wenhua Lin Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250122072352.3663653-1-Wenhua.Lin@unisoc.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/sprd-uart.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.yaml b/Documentation/devicetree/bindings/serial/sprd-uart.yaml index a2a5056eba04..5bf2656afcfd 100644 --- a/Documentation/devicetree/bindings/serial/sprd-uart.yaml +++ b/Documentation/devicetree/bindings/serial/sprd-uart.yaml @@ -17,13 +17,18 @@ properties: oneOf: - items: - enum: - - sprd,sc9632-uart + - sprd,ums9632-uart + - const: sprd,sc9632-uart + - items: + - enum: - sprd,sc9860-uart - sprd,sc9863a-uart - sprd,ums512-uart - sprd,ums9620-uart - const: sprd,sc9836-uart - - const: sprd,sc9836-uart + - enum: + - sprd,sc9632-uart + - sprd,sc9836-uart reg: maxItems: 1 From ed333392bd201e71a010e588e61605a1e2dd07df Mon Sep 17 00:00:00 2001 From: Benjamin Larsson Date: Sun, 19 Jan 2025 14:01:04 +0100 Subject: [PATCH 0063/2157] dt-bindings: serial: 8250: Add Airoha compatibles The Airoha SoC family have a mostly 16550-compatible UART and High-Speed UART hardware with the exception of custom baud rate settings register. Signed-off-by: Benjamin Larsson Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250119130105.2833517-2-benjamin.larsson@genexis.eu Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 0bde2379e864..671944d520d3 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -63,6 +63,8 @@ properties: - const: mrvl,pxa-uart - const: nuvoton,wpcm450-uart - const: nuvoton,npcm750-uart + - const: airoha,airoha-uart + - const: airoha,airoha-hsuart - const: nvidia,tegra20-uart - const: nxp,lpc3220-uart - items: From e12ebf14fa3654f68589292e645ae1ef74786638 Mon Sep 17 00:00:00 2001 From: Benjamin Larsson Date: Sun, 19 Jan 2025 14:01:05 +0100 Subject: [PATCH 0064/2157] serial: Airoha SoC UART and HSUART support Support for Airoha AN7581 SoC UART and HSUART baud rate calculation routine. Signed-off-by: Benjamin Larsson Link: https://lore.kernel.org/r/20250119130105.2833517-3-benjamin.larsson@genexis.eu Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 15 +++++ drivers/tty/serial/8250/8250_airoha.c | 81 +++++++++++++++++++++++++++ drivers/tty/serial/8250/8250_of.c | 2 + drivers/tty/serial/8250/8250_port.c | 26 +++++++++ drivers/tty/serial/8250/Kconfig | 10 ++++ drivers/tty/serial/8250/Makefile | 1 + 6 files changed, 135 insertions(+) create mode 100644 drivers/tty/serial/8250/8250_airoha.c diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 11e05aa014e5..2ebfd94a5818 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -314,6 +314,21 @@ static inline int serial8250_in_MCR(struct uart_8250_port *up) return mctrl; } +/* uart_config[] table port type defines */ +/* Airoha UART */ +#define PORT_AIROHA 124 + +/* Airoha HSUART */ +#define PORT_AIROHA_HS 125 + +#ifdef CONFIG_SERIAL_8250_AIROHA +void airoha8250_set_baud_rate(struct uart_port *port, + unsigned int baud, unsigned int hs); +#else +static inline void airoha8250_set_baud_rate(struct uart_port *port, + unsigned int baud, unsigned int hs) { } +#endif + #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); diff --git a/drivers/tty/serial/8250/8250_airoha.c b/drivers/tty/serial/8250/8250_airoha.c new file mode 100644 index 000000000000..51e675605741 --- /dev/null +++ b/drivers/tty/serial/8250/8250_airoha.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Airoha UART baud rate calculation function + * + * Copyright (c) 2025 Genexis Sweden AB + * Author: Benjamin Larsson + */ + +#include "8250.h" + +/* The Airoha UART is 16550-compatible except for the baud rate calculation. */ + +/* Airoha UART registers */ +#define UART_AIROHA_BRDL 0 +#define UART_AIROHA_BRDH 1 +#define UART_AIROHA_XINCLKDR 10 +#define UART_AIROHA_XYD 11 + +#define XYD_Y 65000 +#define XINDIV_CLOCK 20000000 +#define UART_BRDL_20M 0x01 +#define UART_BRDH_20M 0x00 + +static const int clock_div_tab[] = { 10, 4, 2}; +static const int clock_div_reg[] = { 4, 2, 1}; + +/** + * airoha8250_set_baud_rate() - baud rate calculation routine + * @port: uart port + * @baud: requested uart baud rate + * @hs: uart type selector, 0 for regular uart and 1 for high-speed uart + * + * crystal_clock = 20 MHz (fixed frequency) + * xindiv_clock = crystal_clock / clock_div + * (x/y) = XYD, 32 bit register with 16 bits of x and then 16 bits of y + * clock_div = XINCLK_DIVCNT (default set to 10 (0x4)), + * - 3 bit register [ 1, 2, 4, 8, 10, 12, 16, 20 ] + * + * baud_rate = ((xindiv_clock) * (x/y)) / ([BRDH,BRDL] * 16) + * + * Selecting divider needs to fulfill + * 1.8432 MHz <= xindiv_clk <= APB clock / 2 + * The clocks are unknown but a divider of value 1 did not result in a valid + * waveform. + * + * XYD_y seems to need to be larger then XYD_x for proper waveform generation. + * Setting [BRDH,BRDL] to [0,1] and XYD_y to 65000 gives even values + * for usual baud rates. + */ + +void airoha8250_set_baud_rate(struct uart_port *port, + unsigned int baud, unsigned int hs) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned int xyd_x, nom, denom; + int i; + + /* set DLAB to access the baud rate divider registers (BRDH, BRDL) */ + serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); + /* set baud rate calculation defaults */ + /* set BRDIV ([BRDH,BRDL]) to 1 */ + serial_port_out(port, UART_AIROHA_BRDL, UART_BRDL_20M); + serial_port_out(port, UART_AIROHA_BRDH, UART_BRDH_20M); + /* calculate XYD_x and XINCLKDR register by searching + * through a table of crystal_clock divisors + * + * for the HSUART xyd_x needs to be scaled by a factor of 2 + */ + for (i = 0 ; i < ARRAY_SIZE(clock_div_tab) ; i++) { + denom = (XINDIV_CLOCK/40) / clock_div_tab[i]; + nom = baud * (XYD_Y/40); + xyd_x = ((nom/denom) << 4) >> hs; + if (xyd_x < XYD_Y) + break; + } + serial_port_out(port, UART_AIROHA_XINCLKDR, clock_div_reg[i]); + serial_port_out(port, UART_AIROHA_XYD, (xyd_x<<16) | XYD_Y); + /* unset DLAB */ + serial_port_out(port, UART_LCR, up->lcr); +} diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 64aed7efc569..5315bc1bc06d 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -341,6 +341,8 @@ static const struct of_device_id of_platform_serial_table[] = { { .compatible = "ti,da830-uart", .data = (void *)PORT_DA830, }, { .compatible = "nuvoton,wpcm450-uart", .data = (void *)PORT_NPCM, }, { .compatible = "nuvoton,npcm750-uart", .data = (void *)PORT_NPCM, }, + { .compatible = "airoha,airoha-uart", .data = (void *)PORT_AIROHA, }, + { .compatible = "airoha,airoha-hsuart", .data = (void *)PORT_AIROHA_HS, }, { /* end of list */ }, }; MODULE_DEVICE_TABLE(of, of_platform_serial_table); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index d7976a21cca9..5afbc988dcc3 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -319,6 +319,24 @@ static const struct serial8250_config uart_config[] = { .rxtrig_bytes = {1, 8, 16, 30}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, + /* From here on after additional uart config port defines are placed in 8250.h + */ + [PORT_AIROHA] = { + .name = "Airoha UART", + .fifo_size = 8, + .tx_loadsz = 1, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_CLEAR_RCVR, + .rxtrig_bytes = {1, 4}, + .flags = UART_CAP_FIFO, + }, + [PORT_AIROHA_HS] = { + .name = "Airoha HSUART", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_CLEAR_RCVR, + .rxtrig_bytes = {1, 4}, + .flags = UART_CAP_FIFO, + }, }; /* Uart divisor latch read */ @@ -2865,6 +2883,14 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, serial8250_set_divisor(port, baud, quot, frac); + /* + * Airoha SoCs have custom registers for baud rate settings + */ + if (port->type == PORT_AIROHA) + airoha8250_set_baud_rate(port, baud, 0); + if (port->type == PORT_AIROHA_HS) + airoha8250_set_baud_rate(port, baud, 1); + /* * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR * is written without DLAB set, this mode will be disabled. diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 55d26d16df9b..97fe6ea9393d 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -356,6 +356,16 @@ config SERIAL_8250_ACORN system, say Y to this option. The driver can handle 1, 2, or 3 port cards. If unsure, say N. +config SERIAL_8250_AIROHA + tristate "Airoha UART support" + depends on (ARCH_AIROHA || COMPILE_TEST) && OF && SERIAL_8250 + help + Selecting this option enables an Airoha SoC specific baud rate + calculation routine on an otherwise 16550 compatible UART hardware. + + If you have an Airoha based board and want to use the serial port, + say Y to this option. If unsure, say N. + config SERIAL_8250_BCM2835AUX tristate "BCM2835 auxiliar mini UART support" depends on ARCH_BCM2835 || COMPILE_TEST diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index 1516de629b61..b7f07d5c4cca 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o obj-$(CONFIG_SERIAL_8250_ACORN) += 8250_acorn.o +obj-$(CONFIG_SERIAL_8250_AIROHA) += 8250_airoha.o obj-$(CONFIG_SERIAL_8250_ASPEED_VUART) += 8250_aspeed_vuart.o obj-$(CONFIG_SERIAL_8250_BCM2835AUX) += 8250_bcm2835aux.o obj-$(CONFIG_SERIAL_8250_BCM7271) += 8250_bcm7271.o From 750a2a4228cea80fe427223bb896849e266106b8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 29 Jan 2025 02:00:48 +0000 Subject: [PATCH 0065/2157] serial: mctrl_gpio: Remove unused mctrl_gpio_free mctrl_gpio_free() was added in 2014 by commit 84130aace839 ("tty/serial: Add GPIOLIB helpers for controlling modem lines") It does have a comment saying: '- * Normally, this function will not be called, as the GPIOs will - * be disposed of by the resource management code.' indeed, it doesn't seem to have been used since it was added. Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250129020048.245529-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/serial/driver.rst | 2 +- drivers/tty/serial/serial_mctrl_gpio.c | 28 +--------------------- drivers/tty/serial/serial_mctrl_gpio.h | 16 ++----------- 3 files changed, 4 insertions(+), 42 deletions(-) diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst index 84b43061c11b..df353211fc6b 100644 --- a/Documentation/driver-api/serial/driver.rst +++ b/Documentation/driver-api/serial/driver.rst @@ -101,6 +101,6 @@ Modem control lines via GPIO Some helpers are provided in order to set/get modem control lines via GPIO. .. kernel-doc:: drivers/tty/serial/serial_mctrl_gpio.c - :identifiers: mctrl_gpio_init mctrl_gpio_free mctrl_gpio_to_gpiod + :identifiers: mctrl_gpio_init mctrl_gpio_to_gpiod mctrl_gpio_set mctrl_gpio_get mctrl_gpio_enable_ms mctrl_gpio_disable_ms diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c index 8855688a5b6c..85c172ad1de9 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -217,7 +217,7 @@ static irqreturn_t mctrl_gpio_irq_handle(int irq, void *context) * * This will get the {cts,rts,...}-gpios from device tree if they are present * and request them, set direction etc, and return an allocated structure. - * `devm_*` functions are used, so there's no need to call mctrl_gpio_free(). + * `devm_*` functions are used, so there's no need to explicitly free. * As this sets up the irq handling, make sure to not handle changes to the * gpio input lines in your driver, too. */ @@ -267,32 +267,6 @@ struct mctrl_gpios *mctrl_gpio_init(struct uart_port *port, unsigned int idx) } EXPORT_SYMBOL_GPL(mctrl_gpio_init); -/** - * mctrl_gpio_free - explicitly free uart gpios - * @dev: uart port's device - * @gpios: gpios structure to be freed - * - * This will free the requested gpios in mctrl_gpio_init(). As `devm_*` - * functions are used, there's generally no need to call this function. - */ -void mctrl_gpio_free(struct device *dev, struct mctrl_gpios *gpios) -{ - enum mctrl_gpio_idx i; - - if (gpios == NULL) - return; - - for (i = 0; i < UART_GPIO_MAX; i++) { - if (gpios->irq[i]) - devm_free_irq(gpios->port->dev, gpios->irq[i], gpios); - - if (gpios->gpio[i]) - devm_gpiod_put(dev, gpios->gpio[i]); - } - devm_kfree(dev, gpios); -} -EXPORT_SYMBOL_GPL(mctrl_gpio_free); - /** * mctrl_gpio_enable_ms - enable irqs and handling of changes to the ms lines * @gpios: gpios to enable diff --git a/drivers/tty/serial/serial_mctrl_gpio.h b/drivers/tty/serial/serial_mctrl_gpio.h index fc76910fb105..ec4170084766 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.h +++ b/drivers/tty/serial/serial_mctrl_gpio.h @@ -59,7 +59,7 @@ struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios, /* * Request and set direction of modem control line GPIOs and set up irq * handling. - * devm_* functions are used, so there's no need to call mctrl_gpio_free(). + * devm_* functions are used, so there's no need to explicitly free. * Returns a pointer to the allocated mctrl structure if ok, -ENOMEM on * allocation error. */ @@ -67,20 +67,13 @@ struct mctrl_gpios *mctrl_gpio_init(struct uart_port *port, unsigned int idx); /* * Request and set direction of modem control line GPIOs. - * devm_* functions are used, so there's no need to call mctrl_gpio_free(). + * devm_* functions are used, so there's no need to explicitly free. * Returns a pointer to the allocated mctrl structure if ok, -ENOMEM on * allocation error. */ struct mctrl_gpios *mctrl_gpio_init_noauto(struct device *dev, unsigned int idx); -/* - * Free the mctrl_gpios structure. - * Normally, this function will not be called, as the GPIOs will - * be disposed of by the resource management code. - */ -void mctrl_gpio_free(struct device *dev, struct mctrl_gpios *gpios); - /* * Enable gpio interrupts to report status line changes. */ @@ -139,11 +132,6 @@ struct mctrl_gpios *mctrl_gpio_init_noauto(struct device *dev, unsigned int idx) return NULL; } -static inline -void mctrl_gpio_free(struct device *dev, struct mctrl_gpios *gpios) -{ -} - static inline void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios) { } From 705327813879f14a22bd99ed6c76eecf5acb07f3 Mon Sep 17 00:00:00 2001 From: Manikanta Guntupalli Date: Wed, 29 Jan 2025 15:20:13 +0530 Subject: [PATCH 0066/2157] dt-bindings: serial: pl011: Add optional power-domains property AMD/Xilinx Versal device serial IP has its own power domain, so add an optional property to describe it. Signed-off-by: Manikanta Guntupalli Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250129095013.2145580-1-manikanta.guntupalli@amd.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/pl011.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/pl011.yaml b/Documentation/devicetree/bindings/serial/pl011.yaml index 9571041030b7..3fcf2d042372 100644 --- a/Documentation/devicetree/bindings/serial/pl011.yaml +++ b/Documentation/devicetree/bindings/serial/pl011.yaml @@ -92,6 +92,9 @@ properties: 3000ms. default: 3000 + power-domains: + maxItems: 1 + resets: maxItems: 1 From bfd3d4a40f3905ec70b17dbfa9b78764e59e4b4f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2025 14:14:56 +0200 Subject: [PATCH 0067/2157] serial: 8250_dw: Drop unneeded NULL checks in dw8250_quirks() Since platform data is being provided for all supported hardware, no need to NULL check for it. Drop unneeded checks. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250203121456.3182891-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 6afcf27db3b8..ac3987513564 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -459,8 +459,8 @@ static void dw8250_prepare_rx_dma(struct uart_8250_port *p) static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) { - unsigned int quirks = data->pdata ? data->pdata->quirks : 0; - u32 cpr_value = data->pdata ? data->pdata->cpr_value : 0; + unsigned int quirks = data->pdata->quirks; + u32 cpr_value = data->pdata->cpr_value; if (quirks & DW_UART_QUIRK_CPR_VALUE) data->data.cpr_value = cpr_value; From 2eb2608618ce5878e11bbe68cc8d2699c8f3a81a Mon Sep 17 00:00:00 2001 From: Toshiyuki Sato Date: Tue, 4 Feb 2025 04:44:28 +0000 Subject: [PATCH 0068/2157] serial: amba-pl011: Implement nbcon console Implement the callbacks required for an NBCON console [0] on the amba-pl011 console driver. Referred to the NBCON implementation work for 8250 [1] and imx [2]. The normal-priority write_thread checks for console ownership each time a character is printed. write_atomic holds the console ownership until the entire string is printed. UART register operations are protected from other contexts by uart_port_lock, except for a final flush(nbcon_atomic_flush_unsafe) on panic. The patch has been verified to correctly handle the output and competition of messages with different priorities and flushing panic message to console after nmi panic using ARM64 QEMU and a physical machine(A64FX). Stress testing was conducted on a physical machine(A64FX). The results are as follows: - The output speed and volume of messages using the NBCON console were comparable to the legacy console (data suggests a slight improvement). - When inducing a panic (sysrq-triggered or NMI) under heavy contention on the serial console output, the legacy console resulted in the loss of some or all crash messages. However, using the NBCON console, no message loss occurred. This testing referenced the NBCON console work for 8250 [3]. [0] https://lore.kernel.org/all/ZuRRTbapH0DCj334@pathway.suse.cz/ [1] https://lore.kernel.org/all/20240913140538.221708-1-john.ogness@linutronix.de/T/ [2] https://lore.kernel.org/linux-arm-kernel/20240913-serial-imx-nbcon-v3-1-4c627302335b@geanix.com/T/ [3] https://lore.kernel.org/lkml/ZsdoD6PomBRsB-ow@debarbos-thinkpadt14sgen2i.remote.csb/#t Signed-off-by: Toshiyuki Sato Link: https://lore.kernel.org/r/20250204044428.2191983-1-fj6611ie@aa.jp.fujitsu.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 143 ++++++++++++++++++++++---------- 1 file changed, 97 insertions(+), 46 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 04212c823a91..9a9a1d6306d0 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -272,6 +272,7 @@ struct uart_amba_port { enum pl011_rs485_tx_state rs485_tx_state; struct hrtimer trigger_start_tx; struct hrtimer trigger_stop_tx; + bool console_line_ended; #ifdef CONFIG_DMA_ENGINE /* DMA stuff */ unsigned int dmacr; /* dma control reg */ @@ -2366,50 +2367,7 @@ static void pl011_console_putchar(struct uart_port *port, unsigned char ch) while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) cpu_relax(); pl011_write(ch, uap, REG_DR); -} - -static void -pl011_console_write(struct console *co, const char *s, unsigned int count) -{ - struct uart_amba_port *uap = amba_ports[co->index]; - unsigned int old_cr = 0, new_cr; - unsigned long flags; - int locked = 1; - - clk_enable(uap->clk); - - if (oops_in_progress) - locked = uart_port_trylock_irqsave(&uap->port, &flags); - else - uart_port_lock_irqsave(&uap->port, &flags); - - /* - * First save the CR then disable the interrupts - */ - if (!uap->vendor->always_enabled) { - old_cr = pl011_read(uap, REG_CR); - new_cr = old_cr & ~UART011_CR_CTSEN; - new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; - pl011_write(new_cr, uap, REG_CR); - } - - uart_console_write(&uap->port, s, count, pl011_console_putchar); - - /* - * Finally, wait for transmitter to become empty and restore the - * TCR. Allow feature register bits to be inverted to work around - * errata. - */ - while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) - & uap->vendor->fr_busy) - cpu_relax(); - if (!uap->vendor->always_enabled) - pl011_write(old_cr, uap, REG_CR); - - if (locked) - uart_port_unlock_irqrestore(&uap->port, flags); - - clk_disable(uap->clk); + uap->console_line_ended = (ch == '\n'); } static void pl011_console_get_options(struct uart_amba_port *uap, int *baud, @@ -2472,6 +2430,8 @@ static int pl011_console_setup(struct console *co, char *options) if (ret) return ret; + uap->console_line_ended = true; + if (dev_get_platdata(uap->port.dev)) { struct amba_pl011_data *plat; @@ -2555,14 +2515,105 @@ static int pl011_console_match(struct console *co, char *name, int idx, return -ENODEV; } +static void +pl011_console_write_atomic(struct console *co, struct nbcon_write_context *wctxt) +{ + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0; + + if (!nbcon_enter_unsafe(wctxt)) + return; + + clk_enable(uap->clk); + + if (!uap->vendor->always_enabled) { + old_cr = pl011_read(uap, REG_CR); + pl011_write((old_cr & ~UART011_CR_CTSEN) | (UART01x_CR_UARTEN | UART011_CR_TXE), + uap, REG_CR); + } + + if (!uap->console_line_ended) + uart_console_write(&uap->port, "\n", 1, pl011_console_putchar); + uart_console_write(&uap->port, wctxt->outbuf, wctxt->len, pl011_console_putchar); + + while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) & uap->vendor->fr_busy) + cpu_relax(); + + if (!uap->vendor->always_enabled) + pl011_write(old_cr, uap, REG_CR); + + clk_disable(uap->clk); + + nbcon_exit_unsafe(wctxt); +} + +static void +pl011_console_write_thread(struct console *co, struct nbcon_write_context *wctxt) +{ + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0; + + if (!nbcon_enter_unsafe(wctxt)) + return; + + clk_enable(uap->clk); + + if (!uap->vendor->always_enabled) { + old_cr = pl011_read(uap, REG_CR); + pl011_write((old_cr & ~UART011_CR_CTSEN) | (UART01x_CR_UARTEN | UART011_CR_TXE), + uap, REG_CR); + } + + if (nbcon_exit_unsafe(wctxt)) { + int i; + unsigned int len = READ_ONCE(wctxt->len); + + for (i = 0; i < len; i++) { + if (!nbcon_enter_unsafe(wctxt)) + break; + uart_console_write(&uap->port, wctxt->outbuf + i, 1, pl011_console_putchar); + if (!nbcon_exit_unsafe(wctxt)) + break; + } + } + + while (!nbcon_enter_unsafe(wctxt)) + nbcon_reacquire_nobuf(wctxt); + + while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) & uap->vendor->fr_busy) + cpu_relax(); + + if (!uap->vendor->always_enabled) + pl011_write(old_cr, uap, REG_CR); + + clk_disable(uap->clk); + + nbcon_exit_unsafe(wctxt); +} + +static void +pl011_console_device_lock(struct console *co, unsigned long *flags) +{ + __uart_port_lock_irqsave(&amba_ports[co->index]->port, flags); +} + +static void +pl011_console_device_unlock(struct console *co, unsigned long flags) +{ + __uart_port_unlock_irqrestore(&amba_ports[co->index]->port, flags); +} + static struct uart_driver amba_reg; static struct console amba_console = { .name = "ttyAMA", - .write = pl011_console_write, .device = uart_console_device, .setup = pl011_console_setup, .match = pl011_console_match, - .flags = CON_PRINTBUFFER | CON_ANYTIME, + .write_atomic = pl011_console_write_atomic, + .write_thread = pl011_console_write_thread, + .device_lock = pl011_console_device_lock, + .device_unlock = pl011_console_device_unlock, + .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON, .index = -1, .data = &amba_reg, }; From ab4976976ee117ed53b752bac83453e6f64dad08 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 28 Jan 2025 16:05:02 +0100 Subject: [PATCH 0069/2157] Input: drop vb2_ops_wait_prepare/finish Since commit 88785982a19d ("media: vb2: use lock if wait_prepare/finish are NULL") it is no longer needed to set the wait_prepare/finish vb2_ops callbacks as long as the lock field in vb2_queue is set. Since the vb2_ops_wait_prepare/finish callbacks already rely on that field, we can safely drop these callbacks. This simplifies the code and this is a step towards the goal of deleting these callbacks. Signed-off-by: Hans Verkuil Link: https://lore.kernel.org/r/ec811552-6014-43d4-9fcc-2ac729a8b08e@xs4all.nl Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f54.c | 2 -- drivers/input/touchscreen/atmel_mxt_ts.c | 2 -- drivers/input/touchscreen/sur40.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 5c3da910b5b2..ac4041a69fcd 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -372,8 +372,6 @@ static const struct vb2_ops rmi_f54_queue_ops = { .queue_setup = rmi_f54_queue_setup, .buf_queue = rmi_f54_buffer_queue, .stop_streaming = rmi_f54_stop_streaming, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; static const struct vb2_queue rmi_f54_queue = { diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 3ddabc5a2c99..322d5a3d40a0 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -2535,8 +2535,6 @@ static void mxt_buffer_queue(struct vb2_buffer *vb) static const struct vb2_ops mxt_queue_ops = { .queue_setup = mxt_queue_setup, .buf_queue = mxt_buffer_queue, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; static const struct vb2_queue mxt_queue = { diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index 8365a2ac6fce..7b3b10cbfcfc 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -1108,8 +1108,6 @@ static const struct vb2_ops sur40_queue_ops = { .buf_queue = sur40_buffer_queue, .start_streaming = sur40_start_streaming, .stop_streaming = sur40_stop_streaming, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; static const struct vb2_queue sur40_queue = { From 188e9529a606f35c57e34cf860b99bc2b191b5f4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:06 -0800 Subject: [PATCH 0070/2157] cxl: Remove the CXL_DECODER_MIXED mistake CXL_DECODER_MIXED is a safety mechanism introduced for the case where platform firmware has programmed an endpoint decoder that straddles a DPA partition boundary. While the kernel is careful to only allocate DPA capacity within a single partition there is no guarantee that platform firmware, or anything that touched the device before the current kernel, gets that right. However, __cxl_dpa_reserve() will never get to the CXL_DECODER_MIXED designation because of the way it tracks partition boundaries. A request_resource() that spans ->ram_res and ->pmem_res fails with the following signature: __cxl_dpa_reserve: cxl_port endpoint15: decoder15.0: failed to reserve allocation CXL_DECODER_MIXED is dead defensive programming after the driver has already given up on the device. It has never offered any protection in practice, just delete it. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Reviewed-by: Fan Ni Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864304660.668823.17000888505587850279.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 6 +++--- drivers/cxl/core/region.c | 12 ------------ drivers/cxl/cxl.h | 4 +--- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 50e6a45b30ba..b7f6a2d69f78 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -332,9 +332,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, else if (resource_contains(&cxlds->ram_res, res)) cxled->mode = CXL_DECODER_RAM; else { - dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n", - port->id, cxled->cxld.id, cxled->dpa_res); - cxled->mode = CXL_DECODER_MIXED; + dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", + port->id, cxled->cxld.id, res); + cxled->mode = CXL_DECODER_NONE; } port->hdm_end++; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e8d11a988fd9..34aa3d45fafa 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2738,18 +2738,6 @@ static int poison_by_decoder(struct device *dev, void *arg) if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) return rc; - /* - * Regions are only created with single mode decoders: pmem or ram. - * Linux does not support mixed mode decoders. This means that - * reading poison per endpoint decoder adheres to the requirement - * that poison reads of pmem and ram must be separated. - * CXL 3.0 Spec 8.2.9.8.4.1 - */ - if (cxled->mode == CXL_DECODER_MIXED) { - dev_dbg(dev, "poison list read unsupported in mixed mode\n"); - return rc; - } - cxlmd = cxled_to_memdev(cxled); if (cxled->skip) { offset = cxled->dpa_res->start - cxled->skip; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bbbaa0d0a670..9fd1524ed150 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -380,7 +380,6 @@ enum cxl_decoder_mode { CXL_DECODER_NONE, CXL_DECODER_RAM, CXL_DECODER_PMEM, - CXL_DECODER_MIXED, CXL_DECODER_DEAD, }; @@ -390,10 +389,9 @@ static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) [CXL_DECODER_NONE] = "none", [CXL_DECODER_RAM] = "ram", [CXL_DECODER_PMEM] = "pmem", - [CXL_DECODER_MIXED] = "mixed", }; - if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) + if (mode >= CXL_DECODER_NONE && mode < CXL_DECODER_DEAD) return names[mode]; return "mixed"; } From d77ca6c2b52508c0d2e673e801aec342e5cdbece Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:12 -0800 Subject: [PATCH 0071/2157] cxl: Introduce to_{ram,pmem}_{res,perf}() helpers In preparation for consolidating all DPA partition information into an array of DPA metadata, introduce helpers that hide the layout of the current data. I.e. make the eventual replacement of ->ram_res, ->pmem_res, ->ram_perf, and ->pmem_perf with a new DPA metadata array a no-op for code paths that consume that information, and reduce the noise of follow-on patches. The end goal is to consolidate all DPA information in 'struct cxl_dev_state', but for now the helpers just make it appear that all DPA metadata is relative to @cxlds. As the conversion to generic partition metadata walking is completed, these helpers will naturally be eliminated, or reduced in scope. Cc: Alejandro Lucero Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864305238.668823.16553986866633608541.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 71 +++++++++++++++++++++++------------- drivers/cxl/core/hdm.c | 26 +++++++------ drivers/cxl/core/mbox.c | 18 +++++---- drivers/cxl/core/memdev.c | 42 +++++++++++---------- drivers/cxl/core/region.c | 10 +++-- drivers/cxl/cxlmem.h | 58 +++++++++++++++++++++++++---- drivers/cxl/mem.c | 2 +- tools/testing/cxl/test/cxl.c | 25 +++++++------ 8 files changed, 162 insertions(+), 90 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 8153f8d83a16..797baad483cb 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -258,27 +258,36 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct xarray *dsmas_xa) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct device *dev = cxlds->dev; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, - }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, - }; struct dsmas_entry *dent; unsigned long index; + const struct resource *partition[] = { + to_ram_res(cxlds), + to_pmem_res(cxlds), + }; + struct cxl_dpa_perf *perf[] = { + to_ram_perf(cxlds), + to_pmem_perf(cxlds), + }; xa_for_each(dsmas_xa, index, dent) { - if (resource_size(&cxlds->ram_res) && - range_contains(&ram_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->ram_perf); - else if (resource_size(&cxlds->pmem_res) && - range_contains(&pmem_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->pmem_perf); - else + bool found = false; + + for (int i = 0; i < ARRAY_SIZE(partition); i++) { + const struct resource *res = partition[i]; + struct range range = { + .start = res->start, + .end = res->end, + }; + + if (range_contains(&range, &dent->dpa_range)) { + update_perf_entry(dev, dent, perf[i]); + found = true; + break; + } + } + + if (!found) dev_dbg(dev, "no partition for dsmas dpa: %pra\n", &dent->dpa_range); } @@ -304,6 +313,9 @@ static int match_cxlrd_qos_class(struct device *dev, void *data) static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) { + if (!dpa_perf) + return; + *dpa_perf = (struct cxl_dpa_perf) { .qos_class = CXL_QOS_CLASS_INVALID, }; @@ -312,6 +324,9 @@ static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) static bool cxl_qos_match(struct cxl_port *root_port, struct cxl_dpa_perf *dpa_perf) { + if (!dpa_perf) + return false; + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) return false; @@ -346,7 +361,8 @@ static int match_cxlrd_hb(struct device *dev, void *data) static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *ram_perf = to_ram_perf(cxlds), + *pmem_perf = to_pmem_perf(cxlds); struct cxl_port *root_port; int rc; @@ -359,17 +375,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) root_port = &cxl_root->port; /* Check that the QTG IDs are all sane between end device and root decoders */ - if (!cxl_qos_match(root_port, &mds->ram_perf)) - reset_dpa_perf(&mds->ram_perf); - if (!cxl_qos_match(root_port, &mds->pmem_perf)) - reset_dpa_perf(&mds->pmem_perf); + if (!cxl_qos_match(root_port, ram_perf)) + reset_dpa_perf(ram_perf); + if (!cxl_qos_match(root_port, pmem_perf)) + reset_dpa_perf(pmem_perf); /* Check to make sure that the device's host bridge is under a root decoder */ rc = device_for_each_child(&root_port->dev, cxlmd->endpoint->host_bridge, match_cxlrd_hb); if (!rc) { - reset_dpa_perf(&mds->ram_perf); - reset_dpa_perf(&mds->pmem_perf); + reset_dpa_perf(ram_perf); + reset_dpa_perf(pmem_perf); } return rc; @@ -574,20 +590,23 @@ static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxle enum cxl_decoder_mode mode) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dpa_perf *perf; switch (mode) { case CXL_DECODER_RAM: - perf = &mds->ram_perf; + perf = to_ram_perf(cxlds); break; case CXL_DECODER_PMEM: - perf = &mds->pmem_perf; + perf = to_pmem_perf(cxlds); break; default: return ERR_PTR(-EINVAL); } + if (!perf) + return ERR_PTR(-EINVAL); + if (!dpa_perf_contains(perf, cxled->dpa_res)) return ERR_PTR(-EINVAL); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index b7f6a2d69f78..48b26d3dad26 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -327,9 +327,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, cxled->dpa_res = res; cxled->skip = skipped; - if (resource_contains(&cxlds->pmem_res, res)) + if (resource_contains(to_pmem_res(cxlds), res)) cxled->mode = CXL_DECODER_PMEM; - else if (resource_contains(&cxlds->ram_res, res)) + else if (resource_contains(to_ram_res(cxlds), res)) cxled->mode = CXL_DECODER_RAM; else { dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", @@ -442,11 +442,11 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, * Only allow modes that are supported by the current partition * configuration */ - if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { + if (mode == CXL_DECODER_PMEM && !cxl_pmem_size(cxlds)) { dev_dbg(dev, "no available pmem capacity\n"); return -ENXIO; } - if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { + if (mode == CXL_DECODER_RAM && !cxl_ram_size(cxlds)) { dev_dbg(dev, "no available ram capacity\n"); return -ENXIO; } @@ -464,6 +464,8 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) struct device *dev = &cxled->cxld.dev; resource_size_t start, avail, skip; struct resource *p, *last; + const struct resource *ram_res = to_ram_res(cxlds); + const struct resource *pmem_res = to_pmem_res(cxlds); int rc; down_write(&cxl_dpa_rwsem); @@ -480,37 +482,37 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) goto out; } - for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) + for (p = ram_res->child, last = NULL; p; p = p->sibling) last = p; if (last) free_ram_start = last->end + 1; else - free_ram_start = cxlds->ram_res.start; + free_ram_start = ram_res->start; - for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling) + for (p = pmem_res->child, last = NULL; p; p = p->sibling) last = p; if (last) free_pmem_start = last->end + 1; else - free_pmem_start = cxlds->pmem_res.start; + free_pmem_start = pmem_res->start; if (cxled->mode == CXL_DECODER_RAM) { start = free_ram_start; - avail = cxlds->ram_res.end - start + 1; + avail = ram_res->end - start + 1; skip = 0; } else if (cxled->mode == CXL_DECODER_PMEM) { resource_size_t skip_start, skip_end; start = free_pmem_start; - avail = cxlds->pmem_res.end - start + 1; + avail = pmem_res->end - start + 1; skip_start = free_ram_start; /* * If some pmem is already allocated, then that allocation * already handled the skip. */ - if (cxlds->pmem_res.child && - skip_start == cxlds->pmem_res.child->start) + if (pmem_res->child && + skip_start == pmem_res->child->start) skip_end = skip_start - 1; else skip_end = start - 1; diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..3502f1633ad2 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1270,24 +1270,26 @@ static int add_dpa_res(struct device *dev, struct resource *parent, int cxl_mem_create_range_info(struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; + struct resource *ram_res = to_ram_res(cxlds); + struct resource *pmem_res = to_pmem_res(cxlds); struct device *dev = cxlds->dev; int rc; if (!cxlds->media_ready) { cxlds->dpa_res = DEFINE_RES_MEM(0, 0); - cxlds->ram_res = DEFINE_RES_MEM(0, 0); - cxlds->pmem_res = DEFINE_RES_MEM(0, 0); + *ram_res = DEFINE_RES_MEM(0, 0); + *pmem_res = DEFINE_RES_MEM(0, 0); return 0; } cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); if (mds->partition_align_bytes == 0) { - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, + rc = add_dpa_res(dev, &cxlds->dpa_res, ram_res, 0, mds->volatile_only_bytes, "ram"); if (rc) return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, + return add_dpa_res(dev, &cxlds->dpa_res, pmem_res, mds->volatile_only_bytes, mds->persistent_only_bytes, "pmem"); } @@ -1298,11 +1300,11 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return rc; } - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, + rc = add_dpa_res(dev, &cxlds->dpa_res, ram_res, 0, mds->active_volatile_bytes, "ram"); if (rc) return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, + return add_dpa_res(dev, &cxlds->dpa_res, pmem_res, mds->active_volatile_bytes, mds->active_persistent_bytes, "pmem"); } @@ -1450,8 +1452,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; - mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; + to_ram_perf(&mds->cxlds)->qos_class = CXL_QOS_CLASS_INVALID; + to_pmem_perf(&mds->cxlds)->qos_class = CXL_QOS_CLASS_INVALID; return mds; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index ae3dfcbe8938..c5f8320ed330 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -80,7 +80,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->ram_res); + unsigned long long len = resource_size(to_ram_res(cxlds)); return sysfs_emit(buf, "%#llx\n", len); } @@ -93,7 +93,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->pmem_res); + unsigned long long len = cxl_pmem_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } @@ -198,16 +198,20 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) int rc = 0; /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ - if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); + if (cxl_pmem_size(cxlds)) { + const struct resource *res = to_pmem_res(cxlds); + + offset = res->start; + length = resource_size(res); rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); if (rc) return rc; } - if (resource_size(&cxlds->ram_res)) { - offset = cxlds->ram_res.start; - length = resource_size(&cxlds->ram_res); + if (cxl_ram_size(cxlds)) { + const struct resource *res = to_ram_res(cxlds); + + offset = res->start; + length = resource_size(res); rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); /* * Invalid Physical Address is not an error for @@ -409,9 +413,8 @@ static ssize_t pmem_qos_class_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_pmem_qos_class = @@ -428,9 +431,8 @@ static ssize_t ram_qos_class_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_ram_qos_class = @@ -466,11 +468,11 @@ static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds); - if (a == &dev_attr_ram_qos_class.attr) - if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_ram_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } @@ -485,11 +487,11 @@ static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds); - if (a == &dev_attr_pmem_qos_class.attr) - if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_pmem_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 34aa3d45fafa..6706a1b79549 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2701,7 +2701,7 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, if (ctx->mode == CXL_DECODER_RAM) { offset = ctx->offset; - length = resource_size(&cxlds->ram_res) - offset; + length = cxl_ram_size(cxlds) - offset; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); if (rc == -EFAULT) rc = 0; @@ -2713,9 +2713,11 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, length = resource_size(&cxlds->dpa_res) - offset; if (!length) return 0; - } else if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); + } else if (cxl_pmem_size(cxlds)) { + const struct resource *res = to_pmem_res(cxlds); + + offset = res->start; + length = resource_size(res); } else { return 0; } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..78e92e24d7b5 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -423,8 +423,8 @@ struct cxl_dpa_perf { * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable * @dpa_res: Overall DPA resource tree for the device - * @pmem_res: Active Persistent memory capacity configuration - * @ram_res: Active Volatile memory capacity configuration + * @_pmem_res: Active Persistent memory capacity configuration + * @_ram_res: Active Volatile memory capacity configuration * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device * @cxl_mbox: CXL mailbox context @@ -438,13 +438,41 @@ struct cxl_dev_state { bool rcd; bool media_ready; struct resource dpa_res; - struct resource pmem_res; - struct resource ram_res; + struct resource _pmem_res; + struct resource _ram_res; u64 serial; enum cxl_devtype type; struct cxl_mailbox cxl_mbox; }; +static inline struct resource *to_ram_res(struct cxl_dev_state *cxlds) +{ + return &cxlds->_ram_res; +} + +static inline struct resource *to_pmem_res(struct cxl_dev_state *cxlds) +{ + return &cxlds->_pmem_res; +} + +static inline resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) +{ + const struct resource *res = to_ram_res(cxlds); + + if (!res) + return 0; + return resource_size(res); +} + +static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) +{ + const struct resource *res = to_pmem_res(cxlds); + + if (!res) + return 0; + return resource_size(res); +} + static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) { return dev_get_drvdata(cxl_mbox->host); @@ -471,8 +499,8 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset - * @ram_perf: performance data entry matched to RAM partition - * @pmem_perf: performance data entry matched to PMEM partition + * @_ram_perf: performance data entry matched to RAM partition + * @_pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info @@ -496,8 +524,8 @@ struct cxl_memdev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; - struct cxl_dpa_perf ram_perf; - struct cxl_dpa_perf pmem_perf; + struct cxl_dpa_perf _ram_perf; + struct cxl_dpa_perf _pmem_perf; struct cxl_event_state event; struct cxl_poison_state poison; @@ -505,6 +533,20 @@ struct cxl_memdev_state { struct cxl_fw_state fw; }; +static inline struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev_state *mds = container_of(cxlds, typeof(*mds), cxlds); + + return &mds->_ram_perf; +} + +static inline struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev_state *mds = container_of(cxlds, typeof(*mds), cxlds); + + return &mds->_pmem_perf; +} + static inline struct cxl_memdev_state * to_cxl_memdev_state(struct cxl_dev_state *cxlds) { diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2f03a4d5606e..9675243bd05b 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -152,7 +152,7 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { rc = devm_cxl_add_nvdimm(parent_port, cxlmd); if (rc) { if (rc == -ENODEV) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cc8948f49117..9654513bbccf 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1000,25 +1000,28 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) find_cxl_root(port); struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, + const struct resource *partition[] = { + to_ram_res(cxlds), + to_pmem_res(cxlds), }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, + struct cxl_dpa_perf *perf[] = { + to_ram_perf(cxlds), + to_pmem_perf(cxlds), }; if (!cxl_root) return; - if (range_len(&ram_range)) - dpa_perf_setup(port, &ram_range, &mds->ram_perf); + for (int i = 0; i < ARRAY_SIZE(partition); i++) { + const struct resource *res = partition[i]; + struct range range = { + .start = res->start, + .end = res->end, + }; - if (range_len(&pmem_range)) - dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); + dpa_perf_setup(port, &range, perf[i]); + } cxl_memdev_update_perf(cxlmd); From 8e4c411c533f79407bbc970011aaa73ac602a9d1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:18 -0800 Subject: [PATCH 0072/2157] cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Alejandro Lucero Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 15 ++---- drivers/cxl/core/hdm.c | 88 +++++++++++++++++++++++++++++++++- drivers/cxl/core/mbox.c | 68 +++++++++----------------- drivers/cxl/core/memdev.c | 2 +- drivers/cxl/cxlmem.h | 93 +++++++++++++++++++++++++----------- drivers/cxl/pci.c | 7 ++- tools/testing/cxl/test/cxl.c | 15 ++---- tools/testing/cxl/test/mem.c | 7 ++- 8 files changed, 195 insertions(+), 100 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 797baad483cb..33cabe4aa026 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -261,27 +261,20 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct device *dev = cxlds->dev; struct dsmas_entry *dent; unsigned long index; - const struct resource *partition[] = { - to_ram_res(cxlds), - to_pmem_res(cxlds), - }; - struct cxl_dpa_perf *perf[] = { - to_ram_perf(cxlds), - to_pmem_perf(cxlds), - }; xa_for_each(dsmas_xa, index, dent) { bool found = false; - for (int i = 0; i < ARRAY_SIZE(partition); i++) { - const struct resource *res = partition[i]; + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; struct range range = { .start = res->start, .end = res->end, }; if (range_contains(&range, &dent->dpa_range)) { - update_perf_entry(dev, dent, perf[i]); + update_perf_entry(dev, dent, + &cxlds->part[i].perf); found = true; break; } diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 48b26d3dad26..ea4e792f789f 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -327,9 +327,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, cxled->dpa_res = res; cxled->skip = skipped; - if (resource_contains(to_pmem_res(cxlds), res)) + if (to_pmem_res(cxlds) && resource_contains(to_pmem_res(cxlds), res)) cxled->mode = CXL_DECODER_PMEM; - else if (resource_contains(to_ram_res(cxlds), res)) + else if (to_ram_res(cxlds) && resource_contains(to_ram_res(cxlds), res)) cxled->mode = CXL_DECODER_RAM; else { dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", @@ -342,6 +342,90 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, return 0; } +static int add_dpa_res(struct device *dev, struct resource *parent, + struct resource *res, resource_size_t start, + resource_size_t size, const char *type) +{ + int rc; + + *res = (struct resource) { + .name = type, + .start = start, + .end = start + size - 1, + .flags = IORESOURCE_MEM, + }; + if (resource_size(res) == 0) { + dev_dbg(dev, "DPA(%s): no capacity\n", res->name); + return 0; + } + rc = request_resource(parent, res); + if (rc) { + dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, + res, rc); + return rc; + } + + dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); + + return 0; +} + +static const char *cxl_mode_name(enum cxl_partition_mode mode) +{ + switch (mode) { + case CXL_PARTMODE_RAM: + return "ram"; + case CXL_PARTMODE_PMEM: + return "pmem"; + default: + return ""; + }; +} + +/* if this fails the caller must destroy @cxlds, there is no recovery */ +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) +{ + struct device *dev = cxlds->dev; + + guard(rwsem_write)(&cxl_dpa_rwsem); + + if (cxlds->nr_partitions) + return -EBUSY; + + if (!info->size || !info->nr_partitions) { + cxlds->dpa_res = DEFINE_RES_MEM(0, 0); + cxlds->nr_partitions = 0; + return 0; + } + + cxlds->dpa_res = DEFINE_RES_MEM(0, info->size); + + for (int i = 0; i < info->nr_partitions; i++) { + const struct cxl_dpa_part_info *part = &info->part[i]; + int rc; + + cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID; + cxlds->part[i].mode = part->mode; + + /* Require ordered + contiguous partitions */ + if (i) { + const struct cxl_dpa_part_info *prev = &info->part[i - 1]; + + if (prev->range.end + 1 != part->range.start) + return -EINVAL; + } + rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res, + part->range.start, range_len(&part->range), + cxl_mode_name(part->mode)); + if (rc) + return rc; + cxlds->nr_partitions++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_dpa_setup); + int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 3502f1633ad2..62bb3653362f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1241,57 +1241,39 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return rc; } -static int add_dpa_res(struct device *dev, struct resource *parent, - struct resource *res, resource_size_t start, - resource_size_t size, const char *type) +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) { - int rc; + int i = info->nr_partitions; - res->name = type; - res->start = start; - res->end = start + size - 1; - res->flags = IORESOURCE_MEM; - if (resource_size(res) == 0) { - dev_dbg(dev, "DPA(%s): no capacity\n", res->name); - return 0; - } - rc = request_resource(parent, res); - if (rc) { - dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, - res, rc); - return rc; - } + if (size == 0) + return; - dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); - - return 0; + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; } -int cxl_mem_create_range_info(struct cxl_memdev_state *mds) +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) { struct cxl_dev_state *cxlds = &mds->cxlds; - struct resource *ram_res = to_ram_res(cxlds); - struct resource *pmem_res = to_pmem_res(cxlds); struct device *dev = cxlds->dev; int rc; if (!cxlds->media_ready) { - cxlds->dpa_res = DEFINE_RES_MEM(0, 0); - *ram_res = DEFINE_RES_MEM(0, 0); - *pmem_res = DEFINE_RES_MEM(0, 0); + info->size = 0; return 0; } - cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); + info->size = mds->total_bytes; if (mds->partition_align_bytes == 0) { - rc = add_dpa_res(dev, &cxlds->dpa_res, ram_res, 0, - mds->volatile_only_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, pmem_res, - mds->volatile_only_bytes, - mds->persistent_only_bytes, "pmem"); + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; } rc = cxl_mem_get_partition_info(mds); @@ -1300,15 +1282,13 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return rc; } - rc = add_dpa_res(dev, &cxlds->dpa_res, ram_res, 0, - mds->active_volatile_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, pmem_res, - mds->active_volatile_bytes, - mds->active_persistent_bytes, "pmem"); + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { @@ -1452,8 +1432,6 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - to_ram_perf(&mds->cxlds)->qos_class = CXL_QOS_CLASS_INVALID; - to_pmem_perf(&mds->cxlds)->qos_class = CXL_QOS_CLASS_INVALID; return mds; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index c5f8320ed330..be0eb57086e1 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -80,7 +80,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(to_ram_res(cxlds)); + unsigned long long len = cxl_ram_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 78e92e24d7b5..e33b2d5efed9 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -97,6 +97,24 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +struct cxl_dpa_info { + u64 size; + struct cxl_dpa_part_info { + struct range range; + enum cxl_partition_mode mode; + } part[CXL_NR_PARTITIONS_MAX]; + int nr_partitions; +}; + +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info); + static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) { @@ -408,6 +426,18 @@ struct cxl_dpa_perf { int qos_class; }; +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + /** * struct cxl_dev_state - The driver device state * @@ -423,8 +453,8 @@ struct cxl_dpa_perf { * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable * @dpa_res: Overall DPA resource tree for the device - * @_pmem_res: Active Persistent memory capacity configuration - * @_ram_res: Active Volatile memory capacity configuration + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device * @cxl_mbox: CXL mailbox context @@ -438,21 +468,47 @@ struct cxl_dev_state { bool rcd; bool media_ready; struct resource dpa_res; - struct resource _pmem_res; - struct resource _ram_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; u64 serial; enum cxl_devtype type; struct cxl_mailbox cxl_mbox; }; -static inline struct resource *to_ram_res(struct cxl_dev_state *cxlds) + +/* Static RAM is only expected at partition 0. */ +static inline const struct resource *to_ram_res(struct cxl_dev_state *cxlds) { - return &cxlds->_ram_res; + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return NULL; + return &cxlds->part[0].res; } -static inline struct resource *to_pmem_res(struct cxl_dev_state *cxlds) +/* + * Static PMEM may be at partition index 0 when there is no static RAM + * capacity. + */ +static inline const struct resource *to_pmem_res(struct cxl_dev_state *cxlds) { - return &cxlds->_pmem_res; + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return &cxlds->part[i].res; + return NULL; +} + +static inline struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) +{ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return NULL; + return &cxlds->part[0].perf; +} + +static inline struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) +{ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return &cxlds->part[i].perf; + return NULL; } static inline resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) @@ -499,8 +555,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset - * @_ram_perf: performance data entry matched to RAM partition - * @_pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info @@ -524,29 +578,12 @@ struct cxl_memdev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; - struct cxl_dpa_perf _ram_perf; - struct cxl_dpa_perf _pmem_perf; - struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; struct cxl_fw_state fw; }; -static inline struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) -{ - struct cxl_memdev_state *mds = container_of(cxlds, typeof(*mds), cxlds); - - return &mds->_ram_perf; -} - -static inline struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) -{ - struct cxl_memdev_state *mds = container_of(cxlds, typeof(*mds), cxlds); - - return &mds->_pmem_perf; -} - static inline struct cxl_memdev_state * to_cxl_memdev_state(struct cxl_dev_state *cxlds) { @@ -860,7 +897,7 @@ int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox, int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); -int cxl_mem_create_range_info(struct cxl_memdev_state *mds); +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index a96e54c6259e..b2c943a4de0a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -903,6 +903,7 @@ __ATTRIBUTE_GROUPS(cxl_rcd); static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dpa_info range_info = { 0 }; struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_register_map map; @@ -993,7 +994,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); + if (rc) + return rc; + + rc = cxl_dpa_setup(cxlds, &range_info); if (rc) return rc; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 9654513bbccf..083a66a52731 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1001,26 +1001,19 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; - const struct resource *partition[] = { - to_ram_res(cxlds), - to_pmem_res(cxlds), - }; - struct cxl_dpa_perf *perf[] = { - to_ram_perf(cxlds), - to_pmem_perf(cxlds), - }; if (!cxl_root) return; - for (int i = 0; i < ARRAY_SIZE(partition); i++) { - const struct resource *res = partition[i]; + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; struct range range = { .start = res->start, .end = res->end, }; - dpa_perf_setup(port, &range, perf[i]); + dpa_perf_setup(port, &range, perf); } cxl_memdev_update_perf(cxlmd); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..495199238335 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1494,6 +1494,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; struct cxl_mailbox *cxl_mbox; + struct cxl_dpa_info range_info = { 0 }; int rc; mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); @@ -1554,7 +1555,11 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); + if (rc) + return rc; + + rc = cxl_dpa_setup(cxlds, &range_info); if (rc) return rc; From 991d98f17d31644826977e49477544987000a08a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:24 -0800 Subject: [PATCH 0073/2157] cxl: Make cxl_dpa_alloc() DPA partition number agnostic cxl_dpa_alloc() is a hard coded nest of assumptions around PMEM allocations being distinct from RAM allocations in specific ways when in practice the allocation rules are only relative to DPA partition index. The rules for cxl_dpa_alloc() are: - allocations can only come from 1 partition - if allocating at partition-index-N, all free space in partitions less than partition-index-N must be skipped over Use the new 'struct cxl_dpa_partition' array to support allocation with an arbitrary number of DPA partitions on the device. A follow-on patch can go further to cleanup 'enum cxl_decoder_mode' concept and supersede it with looking up the memory properties from partition metadata. Until then cxl_part_mode() temporarily bridges code that looks up partitions by @cxled->mode. Reviewed-by: Ira Weiny Reviewed-by: Alejandro Lucero Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864306400.668823.12143134425285426523.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 205 +++++++++++++++++++++++++++++------------ drivers/cxl/cxlmem.h | 14 +++ 2 files changed, 159 insertions(+), 60 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index ea4e792f789f..78ecb88bad7e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -223,6 +223,37 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL"); +/* See request_skip() kernel-doc */ +static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds, + const resource_size_t skip_base, + const resource_size_t skip_len, + const char *requester) +{ + const resource_size_t skip_end = skip_base + skip_len - 1; + + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *part_res = &cxlds->part[i].res; + resource_size_t adjust_start, adjust_end, size; + + adjust_start = max(skip_base, part_res->start); + adjust_end = min(skip_end, part_res->end); + + if (adjust_end < adjust_start) + continue; + + size = adjust_end - adjust_start + 1; + + if (!requester) + __release_region(&cxlds->dpa_res, adjust_start, size); + else if (!__request_region(&cxlds->dpa_res, adjust_start, size, + requester, 0)) + return adjust_start - skip_base; + } + + return skip_len; +} +#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL) + /* * Must be called in a context that synchronizes against this decoder's * port ->remove() callback (like an endpoint decoder sysfs attribute) @@ -241,7 +272,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) skip_start = res->start - cxled->skip; __release_region(&cxlds->dpa_res, res->start, resource_size(res)); if (cxled->skip) - __release_region(&cxlds->dpa_res, skip_start, cxled->skip); + release_skip(cxlds, skip_start, cxled->skip); cxled->skip = 0; cxled->dpa_res = NULL; put_device(&cxled->cxld.dev); @@ -268,6 +299,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) __cxl_dpa_release(cxled); } +/** + * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree + * @cxlds: CXL.mem device context that parents @cxled + * @cxled: Endpoint decoder establishing new allocation that skips lower DPA + * @skip_base: DPA < start of new DPA allocation (DPAnew) + * @skip_len: @skip_base + @skip_len == DPAnew + * + * DPA 'skip' arises from out-of-sequence DPA allocation events relative + * to free capacity across multiple partitions. It is a wasteful event + * as usable DPA gets thrown away, but if a deployment has, for example, + * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM + * DPA, the free RAM DPA must be sacrificed to start allocating PMEM. + * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder + * Protection" for more details. + * + * A 'skip' always covers the last allocated DPA in a previous partition + * to the start of the current partition to allocate. Allocations never + * start in the middle of a partition, and allocations are always + * de-allocated in reverse order (see cxl_dpa_free(), or natural devm + * unwind order from forced in-order allocation). + * + * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip' + * would always be contained to a single partition. Given + * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip' + * might span "tail capacity of partition[0], all of partition[1], ..., + * all of partition[N-1]" to support allocating from partition[N]. That + * in turn interacts with the partition 'struct resource' boundaries + * within @cxlds->dpa_res whereby 'skip' requests need to be divided by + * partition. I.e. this is a quirk of using a 'struct resource' tree to + * detect range conflicts while also tracking partition boundaries in + * @cxlds->dpa_res. + */ +static int request_skip(struct cxl_dev_state *cxlds, + struct cxl_endpoint_decoder *cxled, + const resource_size_t skip_base, + const resource_size_t skip_len) +{ + resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len, + dev_name(&cxled->cxld.dev)); + + if (skipped == skip_len) + return 0; + + dev_dbg(cxlds->dev, + "%s: failed to reserve skipped space (%pa %pa %pa)\n", + dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped); + + release_skip(cxlds, skip_base, skipped); + + return -EBUSY; +} + static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) @@ -276,7 +359,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &port->dev; + enum cxl_decoder_mode mode; struct resource *res; + int rc; lockdep_assert_held_write(&cxl_dpa_rwsem); @@ -305,14 +390,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, } if (skipped) { - res = __request_region(&cxlds->dpa_res, base - skipped, skipped, - dev_name(&cxled->cxld.dev), 0); - if (!res) { - dev_dbg(dev, - "decoder%d.%d: failed to reserve skipped space\n", - port->id, cxled->cxld.id); - return -EBUSY; - } + rc = request_skip(cxlds, cxled, base - skipped, skipped); + if (rc) + return rc; } res = __request_region(&cxlds->dpa_res, base, len, dev_name(&cxled->cxld.dev), 0); @@ -320,22 +400,23 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n", port->id, cxled->cxld.id); if (skipped) - __release_region(&cxlds->dpa_res, base - skipped, - skipped); + release_skip(cxlds, base - skipped, skipped); return -EBUSY; } cxled->dpa_res = res; cxled->skip = skipped; - if (to_pmem_res(cxlds) && resource_contains(to_pmem_res(cxlds), res)) - cxled->mode = CXL_DECODER_PMEM; - else if (to_ram_res(cxlds) && resource_contains(to_ram_res(cxlds), res)) - cxled->mode = CXL_DECODER_RAM; - else { + mode = CXL_DECODER_NONE; + for (int i = 0; cxlds->nr_partitions; i++) + if (resource_contains(&cxlds->part[i].res, res)) { + mode = cxl_part_mode(cxlds->part[i].mode); + break; + } + + if (mode == CXL_DECODER_NONE) dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", port->id, cxled->cxld.id, res); - cxled->mode = CXL_DECODER_NONE; - } + cxled->mode = mode; port->hdm_end++; get_device(&cxled->cxld.dev); @@ -542,15 +623,13 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - resource_size_t free_ram_start, free_pmem_start; struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - resource_size_t start, avail, skip; + struct resource *res, *prev = NULL; + resource_size_t start, avail, skip, skip_start; struct resource *p, *last; - const struct resource *ram_res = to_ram_res(cxlds); - const struct resource *pmem_res = to_pmem_res(cxlds); - int rc; + int part, rc; down_write(&cxl_dpa_rwsem); if (cxled->cxld.region) { @@ -566,47 +645,53 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) goto out; } - for (p = ram_res->child, last = NULL; p; p = p->sibling) - last = p; - if (last) - free_ram_start = last->end + 1; - else - free_ram_start = ram_res->start; + part = -1; + for (int i = 0; i < cxlds->nr_partitions; i++) { + if (cxled->mode == cxl_part_mode(cxlds->part[i].mode)) { + part = i; + break; + } + } - for (p = pmem_res->child, last = NULL; p; p = p->sibling) - last = p; - if (last) - free_pmem_start = last->end + 1; - else - free_pmem_start = pmem_res->start; - - if (cxled->mode == CXL_DECODER_RAM) { - start = free_ram_start; - avail = ram_res->end - start + 1; - skip = 0; - } else if (cxled->mode == CXL_DECODER_PMEM) { - resource_size_t skip_start, skip_end; - - start = free_pmem_start; - avail = pmem_res->end - start + 1; - skip_start = free_ram_start; - - /* - * If some pmem is already allocated, then that allocation - * already handled the skip. - */ - if (pmem_res->child && - skip_start == pmem_res->child->start) - skip_end = skip_start - 1; - else - skip_end = start - 1; - skip = skip_end - skip_start + 1; - } else { - dev_dbg(dev, "mode not set\n"); - rc = -EINVAL; + if (part < 0) { + rc = -EBUSY; goto out; } + res = &cxlds->part[part].res; + for (p = res->child, last = NULL; p; p = p->sibling) + last = p; + if (last) + start = last->end + 1; + else + start = res->start; + + /* + * To allocate at partition N, a skip needs to be calculated for all + * unallocated space at lower partitions indices. + * + * If a partition has any allocations, the search can end because a + * previous cxl_dpa_alloc() invocation is assumed to have accounted for + * all previous partitions. + */ + skip_start = CXL_RESOURCE_NONE; + for (int i = part; i; i--) { + prev = &cxlds->part[i - 1].res; + for (p = prev->child, last = NULL; p; p = p->sibling) + last = p; + if (last) { + skip_start = last->end + 1; + break; + } + skip_start = prev->start; + } + + avail = res->end - start + 1; + if (skip_start == CXL_RESOURCE_NONE) + skip = 0; + else + skip = res->start - skip_start; + if (size > avail) { dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, cxl_decoder_mode_name(cxled->mode), &avail); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index e33b2d5efed9..9fe615d96573 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -529,6 +529,20 @@ static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) return resource_size(res); } +/* + * Translate the operational mode of memory capacity with the + * operational mode of a decoder + * TODO: kill 'enum cxl_decoder_mode' to obviate this helper + */ +static inline enum cxl_decoder_mode cxl_part_mode(enum cxl_partition_mode mode) +{ + if (mode == CXL_PARTMODE_RAM) + return CXL_DECODER_RAM; + if (mode == CXL_PARTMODE_PMEM) + return CXL_DECODER_PMEM; + return CXL_DECODER_NONE; +} + static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) { return dev_get_drvdata(cxl_mbox->host); From be5cbd0840275c68b3b7d0685d7acc26436c0d99 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:29 -0800 Subject: [PATCH 0074/2157] cxl: Kill enum cxl_decoder_mode Now that the operational mode of DPA capacity (ram vs pmem... etc) is tracked in the partition, and no code paths have dependencies on the mode implying the partition index, the ambiguous 'enum cxl_decoder_mode' can be cleaned up, specifically this ambiguity on whether the operation mode implied anything about the partition order. Endpoint decoders simply reference their assigned partition where the operational mode can be retrieved as partition mode. With this in place PMEM can now be partition0 which happens today when the RAM capacity size is zero. Dynamic RAM can appear above PMEM when DCD arrives, etc. Code sequences that hard coded the "PMEM after RAM" assumption can now just iterate partitions and consult the partition mode after the fact. Reviewed-by: Ira Weiny Reviewed-by: Alejandro Lucero Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864306972.668823.3327008645125276726.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 18 ++---- drivers/cxl/core/core.h | 4 +- drivers/cxl/core/hdm.c | 68 +++++++++---------- drivers/cxl/core/memdev.c | 15 +---- drivers/cxl/core/port.c | 21 ++++-- drivers/cxl/core/region.c | 133 +++++++++++++++++++++----------------- drivers/cxl/cxl.h | 37 +++-------- drivers/cxl/cxlmem.h | 19 ------ 8 files changed, 137 insertions(+), 178 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 33cabe4aa026..f96ae28022b0 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -579,23 +579,15 @@ static bool dpa_perf_contains(struct cxl_dpa_perf *perf, return range_contains(&perf->dpa_range, &dpa); } -static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dpa_perf *perf; - switch (mode) { - case CXL_DECODER_RAM: - perf = to_ram_perf(cxlds); - break; - case CXL_DECODER_PMEM: - perf = to_pmem_perf(cxlds); - break; - default: + if (cxled->part < 0) return ERR_PTR(-EINVAL); - } + perf = &cxlds->part[cxled->part].perf; if (!perf) return ERR_PTR(-EINVAL); @@ -659,7 +651,7 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, if (cxlds->rcd) return -ENODEV; - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return PTR_ERR(perf); @@ -1065,7 +1057,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, lockdep_assert_held(&cxl_dpa_rwsem); - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return; diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..22dac79c5192 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -72,8 +72,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, resource_size_t length); struct dentry *cxl_debugfs_create_dir(const char *dir); -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode); +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 78ecb88bad7e..d705dec1471e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -359,7 +359,6 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &port->dev; - enum cxl_decoder_mode mode; struct resource *res; int rc; @@ -406,17 +405,21 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, cxled->dpa_res = res; cxled->skip = skipped; - mode = CXL_DECODER_NONE; - for (int i = 0; cxlds->nr_partitions; i++) - if (resource_contains(&cxlds->part[i].res, res)) { - mode = cxl_part_mode(cxlds->part[i].mode); - break; - } + /* + * When allocating new capacity, ->part is already set, when + * discovering decoder settings at initial enumeration, ->part + * is not set. + */ + if (cxled->part < 0) + for (int i = 0; cxlds->nr_partitions; i++) + if (resource_contains(&cxlds->part[i].res, res)) { + cxled->part = i; + break; + } - if (mode == CXL_DECODER_NONE) + if (cxled->part < 0) dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", port->id, cxled->cxld.id, res); - cxled->mode = mode; port->hdm_end++; get_device(&cxled->cxld.dev); @@ -583,40 +586,33 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) return rc; } -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - - switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: - break; - default: - dev_dbg(dev, "unsupported mode: %d\n", mode); - return -EINVAL; - } + int part; guard(rwsem_write)(&cxl_dpa_rwsem); if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) return -EBUSY; - /* - * Only allow modes that are supported by the current partition - * configuration - */ - if (mode == CXL_DECODER_PMEM && !cxl_pmem_size(cxlds)) { - dev_dbg(dev, "no available pmem capacity\n"); - return -ENXIO; + for (part = 0; part < cxlds->nr_partitions; part++) + if (cxlds->part[part].mode == mode) + break; + + if (part >= cxlds->nr_partitions) { + dev_dbg(dev, "unsupported mode: %d\n", mode); + return -EINVAL; } - if (mode == CXL_DECODER_RAM && !cxl_ram_size(cxlds)) { - dev_dbg(dev, "no available ram capacity\n"); + + if (!resource_size(&cxlds->part[part].res)) { + dev_dbg(dev, "no available capacity for mode: %d\n", mode); return -ENXIO; } - cxled->mode = mode; + cxled->part = part; return 0; } @@ -645,15 +641,9 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) goto out; } - part = -1; - for (int i = 0; i < cxlds->nr_partitions; i++) { - if (cxled->mode == cxl_part_mode(cxlds->part[i].mode)) { - part = i; - break; - } - } - + part = cxled->part; if (part < 0) { + dev_dbg(dev, "partition not set\n"); rc = -EBUSY; goto out; } @@ -694,7 +684,7 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) if (size > avail) { dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, - cxl_decoder_mode_name(cxled->mode), &avail); + res->name, &avail); rc = -ENOSPC; goto out; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index be0eb57086e1..615cbd861f66 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -198,17 +198,8 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) int rc = 0; /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ - if (cxl_pmem_size(cxlds)) { - const struct resource *res = to_pmem_res(cxlds); - - offset = res->start; - length = resource_size(res); - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc) - return rc; - } - if (cxl_ram_size(cxlds)) { - const struct resource *res = to_ram_res(cxlds); + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *res = &cxlds->part[i].res; offset = res->start; length = resource_size(res); @@ -217,7 +208,7 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) * Invalid Physical Address is not an error for * volatile addresses. Device support is optional. */ - if (rc == -EFAULT) + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) rc = 0; } return rc; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..fc24c5bc9f2a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -194,25 +194,35 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ + int part = READ_ONCE(cxled->part); + const char *desc; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); + if (part < 0) + desc = "none"; + else + desc = cxlds->part[part].res.name; + + return sysfs_emit(buf, "%s\n", desc); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; ssize_t rc; if (sysfs_streq(buf, "pmem")) - mode = CXL_DECODER_PMEM; + mode = CXL_PARTMODE_PMEM; else if (sysfs_streq(buf, "ram")) - mode = CXL_DECODER_RAM; + mode = CXL_PARTMODE_RAM; else return -EINVAL; - rc = cxl_dpa_set_mode(cxled, mode); + rc = cxl_dpa_set_part(cxled, mode); if (rc) return rc; @@ -1899,6 +1909,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) return ERR_PTR(-ENOMEM); cxled->pos = -1; + cxled->part = -1; cxld = &cxled->cxld; rc = cxl_decoder_init(port, cxld); if (rc) { diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6706a1b79549..84ce625b8591 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -144,7 +144,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - if (cxlr->mode != CXL_DECODER_PMEM) + if (cxlr->mode != CXL_PARTMODE_PMEM) rc = sysfs_emit(buf, "\n"); else rc = sysfs_emit(buf, "%pUb\n", &p->uuid); @@ -441,7 +441,7 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, * Support tooling that expects to find a 'uuid' attribute for all * regions regardless of mode. */ - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) return 0444; return a->mode; } @@ -603,8 +603,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_region *cxlr = to_cxl_region(dev); + const char *desc; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); + if (cxlr->mode == CXL_PARTMODE_RAM) + desc = "ram"; + else if (cxlr->mode == CXL_PARTMODE_PMEM) + desc = "pmem"; + else + desc = ""; + + return sysfs_emit(buf, "%s\n", desc); } static DEVICE_ATTR_RO(mode); @@ -630,7 +638,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) /* ways, granularity and uuid (if PMEM) need to be set before HPA */ if (!p->interleave_ways || !p->interleave_granularity || - (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) + (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); @@ -1888,6 +1896,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_region_params *p = &cxlr->params; struct cxl_port *ep_port, *root_port; struct cxl_dport *dport; @@ -1902,17 +1911,17 @@ static int cxl_region_attach(struct cxl_region *cxlr, return rc; } - if (cxled->mode != cxlr->mode) { - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); - return -EINVAL; - } - - if (cxled->mode == CXL_DECODER_DEAD) { + if (cxled->part < 0) { dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); return -ENODEV; } + if (cxlds->part[cxled->part].mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n", + dev_name(&cxled->cxld.dev), cxlr->mode); + return -EINVAL; + } + /* all full of members, or interleave config not established? */ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_dbg(&cxlr->dev, "region already active\n"); @@ -2115,7 +2124,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) { down_write(&cxl_region_rwsem); - cxled->mode = CXL_DECODER_DEAD; + cxled->part = -1; cxl_region_detach(cxled); up_write(&cxl_region_rwsem); } @@ -2471,7 +2480,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, */ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, int id, - enum cxl_decoder_mode mode, + enum cxl_partition_mode mode, enum cxl_decoder_type type) { struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent); @@ -2525,13 +2534,13 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_decoder_mode mode, int id) + enum cxl_partition_mode mode, int id) { int rc; switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: + case CXL_PARTMODE_RAM: + case CXL_PARTMODE_PMEM: break; default: dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); @@ -2551,7 +2560,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, } static ssize_t create_region_store(struct device *dev, const char *buf, - size_t len, enum cxl_decoder_mode mode) + size_t len, enum cxl_partition_mode mode) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; @@ -2572,7 +2581,7 @@ static ssize_t create_pmem_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_PMEM); + return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM); } DEVICE_ATTR_RW(create_pmem_region); @@ -2580,7 +2589,7 @@ static ssize_t create_ram_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_RAM); + return create_region_store(dev, buf, len, CXL_PARTMODE_RAM); } DEVICE_ATTR_RW(create_ram_region); @@ -2678,7 +2687,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); struct cxl_poison_context { struct cxl_port *port; - enum cxl_decoder_mode mode; + int part; u64 offset; }; @@ -2686,49 +2695,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, struct cxl_poison_context *ctx) { struct cxl_dev_state *cxlds = cxlmd->cxlds; + const struct resource *res; + struct resource *p, *last; u64 offset, length; int rc = 0; - /* - * Collect poison for the remaining unmapped resources - * after poison is collected by committed endpoints. - * - * Knowing that PMEM must always follow RAM, get poison - * for unmapped resources based on the last decoder's mode: - * ram: scan remains of ram range, then any pmem range - * pmem: scan remains of pmem range - */ - - if (ctx->mode == CXL_DECODER_RAM) { - offset = ctx->offset; - length = cxl_ram_size(cxlds) - offset; - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT) - rc = 0; - if (rc) - return rc; - } - if (ctx->mode == CXL_DECODER_PMEM) { - offset = ctx->offset; - length = resource_size(&cxlds->dpa_res) - offset; - if (!length) - return 0; - } else if (cxl_pmem_size(cxlds)) { - const struct resource *res = to_pmem_res(cxlds); - - offset = res->start; - length = resource_size(res); - } else { + if (ctx->part < 0) return 0; + + /* + * Collect poison for the remaining unmapped resources after + * poison is collected by committed endpoints decoders. + */ + for (int i = ctx->part; i < cxlds->nr_partitions; i++) { + res = &cxlds->part[i].res; + for (p = res->child, last = NULL; p; p = p->sibling) + last = p; + if (last) + offset = last->end + 1; + else + offset = res->start; + length = res->end - offset + 1; + if (!length) + break; + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) + continue; + if (rc) + break; } - return cxl_mem_get_poison(cxlmd, offset, length, NULL); + return rc; } static int poison_by_decoder(struct device *dev, void *arg) { struct cxl_poison_context *ctx = arg; struct cxl_endpoint_decoder *cxled; + enum cxl_partition_mode mode; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; u64 offset, length; int rc = 0; @@ -2737,15 +2742,18 @@ static int poison_by_decoder(struct device *dev, void *arg) return rc; cxled = to_cxl_endpoint_decoder(dev); - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + if (!cxled->dpa_res) return rc; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + mode = cxlds->part[cxled->part].mode; + if (cxled->skip) { offset = cxled->dpa_res->start - cxled->skip; length = cxled->skip; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2754,7 +2762,7 @@ static int poison_by_decoder(struct device *dev, void *arg) offset = cxled->dpa_res->start; length = cxled->dpa_res->end - offset + 1; rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2762,7 +2770,7 @@ static int poison_by_decoder(struct device *dev, void *arg) /* Iterate until commit_end is reached */ if (cxled->cxld.id == ctx->port->commit_end) { ctx->offset = cxled->dpa_res->end + 1; - ctx->mode = cxled->mode; + ctx->part = cxled->part; return 1; } @@ -2775,7 +2783,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) int rc = 0; ctx = (struct cxl_poison_context) { - .port = port + .port = port, + .part = -1, }; rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); @@ -3220,14 +3229,18 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct range *hpa = &cxled->cxld.hpa_range; + int rc, part = READ_ONCE(cxled->part); struct cxl_region_params *p; struct cxl_region *cxlr; struct resource *res; - int rc; + + if (part < 0) + return ERR_PTR(-EBUSY); do { - cxlr = __create_region(cxlrd, cxled->mode, + cxlr = __create_region(cxlrd, cxlds->part[part].mode, atomic_read(&cxlrd->region_id)); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); @@ -3430,9 +3443,9 @@ static int cxl_region_probe(struct device *dev) return rc; switch (cxlr->mode) { - case CXL_DECODER_PMEM: + case CXL_PARTMODE_PMEM: return devm_cxl_add_pmem_region(cxlr); - case CXL_DECODER_RAM: + case CXL_PARTMODE_RAM: /* * The region can not be manged by CXL if any portion of * it is already online as 'System RAM' diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 9fd1524ed150..94a34833eedd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -372,30 +372,6 @@ struct cxl_decoder { void (*reset)(struct cxl_decoder *cxld); }; -/* - * CXL_DECODER_DEAD prevents endpoints from being reattached to regions - * while cxld_unregister() is running - */ -enum cxl_decoder_mode { - CXL_DECODER_NONE, - CXL_DECODER_RAM, - CXL_DECODER_PMEM, - CXL_DECODER_DEAD, -}; - -static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) -{ - static const char * const names[] = { - [CXL_DECODER_NONE] = "none", - [CXL_DECODER_RAM] = "ram", - [CXL_DECODER_PMEM] = "pmem", - }; - - if (mode >= CXL_DECODER_NONE && mode < CXL_DECODER_DEAD) - return names[mode]; - return "mixed"; -} - /* * Track whether this decoder is reserved for region autodiscovery, or * free for userspace provisioning. @@ -410,16 +386,16 @@ enum cxl_decoder_state { * @cxld: base cxl_decoder_object * @dpa_res: actively claimed DPA span of this decoder * @skip: offset into @dpa_res where @cxld.hpa_range maps - * @mode: which memory type / access-mode-partition this decoder targets * @state: autodiscovery state + * @part: partition index this decoder maps * @pos: interleave position in @cxld.region */ struct cxl_endpoint_decoder { struct cxl_decoder cxld; struct resource *dpa_res; resource_size_t skip; - enum cxl_decoder_mode mode; enum cxl_decoder_state state; + int part; int pos; }; @@ -504,6 +480,11 @@ struct cxl_region_params { int nr_targets; }; +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic @@ -523,7 +504,7 @@ struct cxl_region_params { * struct cxl_region - CXL region * @dev: This region's device * @id: This region's id. Id is globally unique across all regions - * @mode: Endpoint decoder allocation / access mode + * @mode: Operational mode of the mapped capacity * @type: Endpoint decoder target type * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge @@ -536,7 +517,7 @@ struct cxl_region_params { struct cxl_region { struct device dev; int id; - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; enum cxl_decoder_type type; struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_pmem_region *cxlr_pmem; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 9fe615d96573..f218d43dec9f 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -97,11 +97,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - #define CXL_NR_PARTITIONS_MAX 2 struct cxl_dpa_info { @@ -529,20 +524,6 @@ static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) return resource_size(res); } -/* - * Translate the operational mode of memory capacity with the - * operational mode of a decoder - * TODO: kill 'enum cxl_decoder_mode' to obviate this helper - */ -static inline enum cxl_decoder_mode cxl_part_mode(enum cxl_partition_mode mode) -{ - if (mode == CXL_PARTMODE_RAM) - return CXL_DECODER_RAM; - if (mode == CXL_PARTMODE_PMEM) - return CXL_DECODER_PMEM; - return CXL_DECODER_NONE; -} - static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) { return dev_get_drvdata(cxl_mbox->host); From 58d60bbe0a99539afb1f29d03c28f06747f94531 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Feb 2025 20:24:35 -0800 Subject: [PATCH 0075/2157] cxl: Cleanup partition size and perf helpers Now that the 'struct cxl_dpa_partition' array contains both size and performance information, all paths that iterate over that information can use a loop rather than hard-code 'ram' and 'pmem' lookups. Remove, or reduce the scope of the temporary helpers that bridged the pre-'struct cxl_dpa_partition' state of the code to the post-'struct cxl_dpa_partition' state. - to_{ram,pmem}_perf(): scope reduced to just sysfs_emit + is_visible() helpers - to_{ram,pmem}_res(): fold into their only users cxl_{ram,pmem}_size() - cxl_ram_size(): scope reduced to ram_size_show() (Note, cxl_pmem_size() also used to gate nvdimm registration) In short, memdev sysfs ABI already made the promise that 0-sized partitions will show for memdevs, but that can be avoided for future partitions by using dynamic sysfs group visibility (new relative to when the partition ABI first shipped upstream). Cc: Dave Jiang Cc: Alejandro Lucero Cc: Ira Weiny Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Tested-by: Alejandro Lucero Link: https://patch.msgid.link/173864307519.668823.10800104022426067621.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 49 +++++++++++++++++---------------- drivers/cxl/core/memdev.c | 23 ++++++++++++++++ drivers/cxl/cxlmem.h | 58 ++++++--------------------------------- 3 files changed, 57 insertions(+), 73 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index f96ae28022b0..bfba7f5019cf 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -306,9 +306,6 @@ static int match_cxlrd_qos_class(struct device *dev, void *data) static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) { - if (!dpa_perf) - return; - *dpa_perf = (struct cxl_dpa_perf) { .qos_class = CXL_QOS_CLASS_INVALID, }; @@ -317,9 +314,6 @@ static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) static bool cxl_qos_match(struct cxl_port *root_port, struct cxl_dpa_perf *dpa_perf) { - if (!dpa_perf) - return false; - if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) return false; @@ -351,37 +345,46 @@ static int match_cxlrd_hb(struct device *dev, void *data) return 0; } -static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +static void cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_dpa_perf *ram_perf = to_ram_perf(cxlds), - *pmem_perf = to_pmem_perf(cxlds); struct cxl_port *root_port; - int rc; struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(cxlmd->endpoint); + /* + * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to + * succeed when called in the cxl_endpoint_port_probe() path. + */ if (!cxl_root) - return -ENODEV; + return; root_port = &cxl_root->port; - /* Check that the QTG IDs are all sane between end device and root decoders */ - if (!cxl_qos_match(root_port, ram_perf)) - reset_dpa_perf(ram_perf); - if (!cxl_qos_match(root_port, pmem_perf)) - reset_dpa_perf(pmem_perf); + /* + * Save userspace from needing to check if a qos class has any matches + * by hiding qos class info if the memdev is not mapped by a root + * decoder, or the partition class does not match any root decoder + * class. + */ + if (!device_for_each_child(&root_port->dev, + cxlmd->endpoint->host_bridge, + match_cxlrd_hb)) { + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; - /* Check to make sure that the device's host bridge is under a root decoder */ - rc = device_for_each_child(&root_port->dev, - cxlmd->endpoint->host_bridge, match_cxlrd_hb); - if (!rc) { - reset_dpa_perf(ram_perf); - reset_dpa_perf(pmem_perf); + reset_dpa_perf(perf); + } + return; } - return rc; + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + + if (!cxl_qos_match(root_port, perf)) + reset_dpa_perf(perf); + } } static void discard_dsmas(struct xarray *xa) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 615cbd861f66..63c6c681125d 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -75,6 +75,14 @@ static ssize_t label_storage_size_show(struct device *dev, } static DEVICE_ATTR_RO(label_storage_size); +static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) +{ + /* Static RAM is only expected at partition 0. */ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return 0; + return resource_size(&cxlds->part[0].res); +} + static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -399,6 +407,14 @@ static struct attribute *cxl_memdev_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) +{ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return &cxlds->part[i].perf; + return NULL; +} + static ssize_t pmem_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -417,6 +433,13 @@ static struct attribute *cxl_memdev_pmem_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) +{ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return NULL; + return &cxlds->part[0].perf; +} + static ssize_t ram_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index f218d43dec9f..36a091597066 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -470,58 +470,16 @@ struct cxl_dev_state { struct cxl_mailbox cxl_mbox; }; - -/* Static RAM is only expected at partition 0. */ -static inline const struct resource *to_ram_res(struct cxl_dev_state *cxlds) -{ - if (cxlds->part[0].mode != CXL_PARTMODE_RAM) - return NULL; - return &cxlds->part[0].res; -} - -/* - * Static PMEM may be at partition index 0 when there is no static RAM - * capacity. - */ -static inline const struct resource *to_pmem_res(struct cxl_dev_state *cxlds) -{ - for (int i = 0; i < cxlds->nr_partitions; i++) - if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) - return &cxlds->part[i].res; - return NULL; -} - -static inline struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) -{ - if (cxlds->part[0].mode != CXL_PARTMODE_RAM) - return NULL; - return &cxlds->part[0].perf; -} - -static inline struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) -{ - for (int i = 0; i < cxlds->nr_partitions; i++) - if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) - return &cxlds->part[i].perf; - return NULL; -} - -static inline resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) -{ - const struct resource *res = to_ram_res(cxlds); - - if (!res) - return 0; - return resource_size(res); -} - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { - const struct resource *res = to_pmem_res(cxlds); - - if (!res) - return 0; - return resource_size(res); + /* + * Static PMEM may be at partition index 0 when there is no static RAM + * capacity. + */ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return resource_size(&cxlds->part[i].res); + return 0; } static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) From bbb89c177208ad2557cb29ff04d4b13a37b36c23 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 10 Jan 2025 18:22:04 -0600 Subject: [PATCH 0076/2157] tools/counter: gitignore counter_watch_events Ignore the executable counter_watch_events when building in-tree. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20250110-counter-ti-eqep-add-direction-support-v2-1-c6b6f96d2db9@baylibre.com Signed-off-by: William Breathitt Gray --- tools/counter/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/counter/.gitignore b/tools/counter/.gitignore index 9fd290d4bf43..22d8727d2696 100644 --- a/tools/counter/.gitignore +++ b/tools/counter/.gitignore @@ -1,2 +1,3 @@ /counter_example +/counter_watch_events /include/linux/counter.h From a1cd339599a8cff197805c9c71c9cab83cec59c2 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 10 Jan 2025 18:22:05 -0600 Subject: [PATCH 0077/2157] counter: add direction change event Add COUNTER_EVENT_DIRECTION_CHANGE to be used by drivers to emit events when a counter detects a change in direction. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20250110-counter-ti-eqep-add-direction-support-v2-2-c6b6f96d2db9@baylibre.com Signed-off-by: William Breathitt Gray --- include/uapi/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index 008a691c254b..350b45d616bb 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -65,6 +65,8 @@ enum counter_event_type { COUNTER_EVENT_CHANGE_OF_STATE, /* Count value captured */ COUNTER_EVENT_CAPTURE, + /* Direction change detected */ + COUNTER_EVENT_DIRECTION_CHANGE, }; /** From 37f7a388b3f1b14eaeb295c2fe554d15e34e8ab9 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 10 Jan 2025 18:22:06 -0600 Subject: [PATCH 0078/2157] tools/counter: add direction change event to watcher Add support for the new COUNTER_EVENT_DIRECTION_CHANGE to the counter_watch_events tool. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20250110-counter-ti-eqep-add-direction-support-v2-3-c6b6f96d2db9@baylibre.com Signed-off-by: William Breathitt Gray --- tools/counter/counter_watch_events.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/counter/counter_watch_events.c b/tools/counter/counter_watch_events.c index 107631e0f2e3..15e21b0c5ffd 100644 --- a/tools/counter/counter_watch_events.c +++ b/tools/counter/counter_watch_events.c @@ -38,6 +38,7 @@ static const char * const counter_event_type_name[] = { "COUNTER_EVENT_INDEX", "COUNTER_EVENT_CHANGE_OF_STATE", "COUNTER_EVENT_CAPTURE", + "COUNTER_EVENT_DIRECTION_CHANGE", }; static const char * const counter_component_type_name[] = { @@ -118,6 +119,7 @@ static void print_usage(void) " evt_index (COUNTER_EVENT_INDEX)\n" " evt_change_of_state (COUNTER_EVENT_CHANGE_OF_STATE)\n" " evt_capture (COUNTER_EVENT_CAPTURE)\n" + " evt_direction_change (COUNTER_EVENT_DIRECTION_CHANGE)\n" "\n" " chan= channel for this watch [default: 0]\n" " id= component id for this watch [default: 0]\n" @@ -157,6 +159,7 @@ enum { WATCH_EVENT_INDEX, WATCH_EVENT_CHANGE_OF_STATE, WATCH_EVENT_CAPTURE, + WATCH_EVENT_DIRECTION_CHANGE, WATCH_CHANNEL, WATCH_ID, WATCH_PARENT, @@ -183,6 +186,7 @@ static char * const counter_watch_subopts[WATCH_SUBOPTS_MAX + 1] = { [WATCH_EVENT_INDEX] = "evt_index", [WATCH_EVENT_CHANGE_OF_STATE] = "evt_change_of_state", [WATCH_EVENT_CAPTURE] = "evt_capture", + [WATCH_EVENT_DIRECTION_CHANGE] = "evt_direction_change", /* channel, id, parent */ [WATCH_CHANNEL] = "chan", [WATCH_ID] = "id", @@ -278,6 +282,7 @@ int main(int argc, char **argv) case WATCH_EVENT_INDEX: case WATCH_EVENT_CHANGE_OF_STATE: case WATCH_EVENT_CAPTURE: + case WATCH_EVENT_DIRECTION_CHANGE: /* match counter_event_type: subtract enum value */ ret -= WATCH_EVENT_OVERFLOW; watches[i].event = ret; From c2a756660324fceca26780a50950e6d91dfdc210 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 10 Jan 2025 18:22:07 -0600 Subject: [PATCH 0079/2157] counter: ti-eqep: add direction support Add support for reading the direction and for emitting direction change events to the ti-eqep counter driver. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20250110-counter-ti-eqep-add-direction-support-v2-4-c6b6f96d2db9@baylibre.com Signed-off-by: William Breathitt Gray --- drivers/counter/ti-eqep.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index bc586eff0dae..d21c157e531a 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -107,6 +107,15 @@ #define QCLR_PCE BIT(1) #define QCLR_INT BIT(0) +#define QEPSTS_UPEVNT BIT(7) +#define QEPSTS_FDF BIT(6) +#define QEPSTS_QDF BIT(5) +#define QEPSTS_QDLF BIT(4) +#define QEPSTS_COEF BIT(3) +#define QEPSTS_CDEF BIT(2) +#define QEPSTS_FIMF BIT(1) +#define QEPSTS_PCEF BIT(0) + /* EQEP Inputs */ enum { TI_EQEP_SIGNAL_QEPA, /* QEPA/XCLK */ @@ -286,6 +295,9 @@ static int ti_eqep_events_configure(struct counter_device *counter) case COUNTER_EVENT_UNDERFLOW: qeint |= QEINT_PCU; break; + case COUNTER_EVENT_DIRECTION_CHANGE: + qeint |= QEINT_QDC; + break; } } @@ -298,6 +310,7 @@ static int ti_eqep_watch_validate(struct counter_device *counter, switch (watch->event) { case COUNTER_EVENT_OVERFLOW: case COUNTER_EVENT_UNDERFLOW: + case COUNTER_EVENT_DIRECTION_CHANGE: if (watch->channel != 0) return -EINVAL; @@ -368,11 +381,27 @@ static int ti_eqep_position_enable_write(struct counter_device *counter, return 0; } +static int ti_eqep_direction_read(struct counter_device *counter, + struct counter_count *count, + enum counter_count_direction *direction) +{ + struct ti_eqep_cnt *priv = counter_priv(counter); + u32 qepsts; + + regmap_read(priv->regmap16, QEPSTS, &qepsts); + + *direction = (qepsts & QEPSTS_QDF) ? COUNTER_COUNT_DIRECTION_FORWARD + : COUNTER_COUNT_DIRECTION_BACKWARD; + + return 0; +} + static struct counter_comp ti_eqep_position_ext[] = { COUNTER_COMP_CEILING(ti_eqep_position_ceiling_read, ti_eqep_position_ceiling_write), COUNTER_COMP_ENABLE(ti_eqep_position_enable_read, ti_eqep_position_enable_write), + COUNTER_COMP_DIRECTION(ti_eqep_direction_read), }; static struct counter_signal ti_eqep_signals[] = { @@ -439,6 +468,9 @@ static irqreturn_t ti_eqep_irq_handler(int irq, void *dev_id) if (qflg & QFLG_PCU) counter_push_event(counter, COUNTER_EVENT_UNDERFLOW, 0); + if (qflg & QFLG_QDC) + counter_push_event(counter, COUNTER_EVENT_DIRECTION_CHANGE, 0); + regmap_write(priv->regmap16, QCLR, qflg); return IRQ_HANDLED; From 78d9ee370ed33cd8b662981fe1f47cff7b4f0e43 Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Mon, 3 Feb 2025 14:14:27 +0300 Subject: [PATCH 0080/2157] dt-bindings: eeprom: at24: Add compatible for Puya P24C64F Add the compatible for another 64Kb EEPROM from Puya. Signed-off-by: Danila Tikhonov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250203111429.22062-3-danila@jiaxyga.com Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index c9e4afbdc448..e7496ece8ae9 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -130,7 +130,9 @@ properties: - const: giantec,gt24c32a - const: atmel,24c32 - items: - - const: onnn,n24s64b + - enum: + - onnn,n24s64b + - puya,p24c64f - const: atmel,24c64 - items: - enum: From 03480898cefe9cb5ba921dba9304703f7574b687 Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Mon, 3 Feb 2025 14:14:28 +0300 Subject: [PATCH 0081/2157] dt-bindings: eeprom: at24: Add compatible for Giantec GT24P128E Add the compatible for another 128Kb EEPROM from Giantec. Signed-off-by: Danila Tikhonov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250203111429.22062-4-danila@jiaxyga.com Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index e7496ece8ae9..0ac68646c077 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -136,6 +136,7 @@ properties: - const: atmel,24c64 - items: - enum: + - giantec,gt24p128e - giantec,gt24p128f - renesas,r1ex24128 - samsung,s524ad0xd1 From 285cec318bf5a7a6c8ba999b2b6ec96f9a20590f Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 30 Dec 2024 15:10:03 +0800 Subject: [PATCH 0082/2157] fs/ntfs3: Keep write operations atomic syzbot reported a NULL pointer dereference in __generic_file_write_iter. [1] Before the write operation is completed, the user executes ioctl[2] to clear the compress flag of the file, which causes the is_compressed() judgment to return 0, further causing the program to enter the wrong process and call the wrong ops ntfs_aops_cmpr, which triggers the null pointer dereference of write_begin. Use inode lock to synchronize ioctl and write to avoid this case. [1] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000006 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=000000011896d000 [0000000000000000] pgd=0800000118b44403, p4d=0800000118b44403, pud=0800000117517403, pmd=0000000000000000 Internal error: Oops: 0000000086000006 [#1] PREEMPT SMP Modules linked in: CPU: 0 UID: 0 PID: 6427 Comm: syz-executor347 Not tainted 6.13.0-rc3-syzkaller-g573067a5a685 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : generic_perform_write+0x29c/0x868 mm/filemap.c:4055 sp : ffff80009d4978a0 x29: ffff80009d4979c0 x28: dfff800000000000 x27: ffff80009d497bc8 x26: 0000000000000000 x25: ffff80009d497960 x24: ffff80008ba71c68 x23: 0000000000000000 x22: ffff0000c655dac0 x21: 0000000000001000 x20: 000000000000000c x19: 1ffff00013a92f2c x18: ffff0000e183aa1c x17: 0004060000000014 x16: ffff800083275834 x15: 0000000000000001 x14: 0000000000000000 x13: 0000000000000001 x12: ffff0000c655dac0 x11: 0000000000ff0100 x10: 0000000000ff0100 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffff80009d497980 x4 : ffff80009d497960 x3 : 0000000000001000 x2 : 0000000000000000 x1 : ffff0000e183a928 x0 : ffff0000d60b0fc0 Call trace: 0x0 (P) __generic_file_write_iter+0xfc/0x204 mm/filemap.c:4156 ntfs_file_write_iter+0x54c/0x630 fs/ntfs3/file.c:1267 new_sync_write fs/read_write.c:586 [inline] vfs_write+0x920/0xcf4 fs/read_write.c:679 ksys_write+0x15c/0x26c fs/read_write.c:731 __do_sys_write fs/read_write.c:742 [inline] __se_sys_write fs/read_write.c:739 [inline] __arm64_sys_write+0x7c/0x90 fs/read_write.c:739 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x84/0x108 arch/arm64/kernel/entry-common.c:762 [2] ioctl$FS_IOC_SETFLAGS(r0, 0x40086602, &(0x7f00000000c0)=0x20) Reported-by: syzbot+5d0bdc98770e6c55a0fd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5d0bdc98770e6c55a0fd Signed-off-by: Lizhi Xu Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 3f96a11804c9..d3a31bad21f9 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1228,21 +1228,22 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; int err; - err = check_write_restriction(inode); - if (err) - return err; - - if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { - ntfs_inode_warn(inode, "direct i/o + compressed not supported"); - return -EOPNOTSUPP; - } - if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; inode_lock(inode); } + ret = check_write_restriction(inode); + if (ret) + goto out; + + if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { + ntfs_inode_warn(inode, "direct i/o + compressed not supported"); + ret = -EOPNOTSUPP; + goto out; + } + ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; From e2d74c47a3d3d84a5fa444f380c126328b44f4db Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 11 Oct 2024 15:40:23 +0800 Subject: [PATCH 0083/2157] fs/ntfs3: Factor out ntfs_{create/remove}_procdir() Introduce ntfs_create_procdir() and ntfs_remove_procdir() to create/remove "/proc/fs/ntfs3/.." Signed-off-by: Ye Bin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 59 +++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 6a0f6b0a3ab2..415492fc655a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -555,6 +555,40 @@ static const struct proc_ops ntfs3_label_fops = { .proc_write = ntfs3_label_write, }; +static void ntfs_create_procdir(struct super_block *sb) +{ + struct proc_dir_entry *e; + + if (!proc_info_root) + return; + + e = proc_mkdir(sb->s_id, proc_info_root); + if (e) { + struct ntfs_sb_info *sbi = sb->s_fs_info; + + proc_create_data("volinfo", 0444, e, + &ntfs3_volinfo_fops, sb); + proc_create_data("label", 0644, e, + &ntfs3_label_fops, sb); + sbi->procdir = e; + } +} + +static void ntfs_remove_procdir(struct super_block *sb) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + + if (!sbi->procdir) + return; + + remove_proc_entry("label", sbi->procdir); + remove_proc_entry("volinfo", sbi->procdir); + remove_proc_entry(sb->s_id, proc_info_root); + sbi->procdir = NULL; +} +#else +static void ntfs_create_procdir(struct super_block *sb) {} +static void ntfs_remove_procdir(struct super_block *sb) {} #endif static struct kmem_cache *ntfs_inode_cachep; @@ -644,15 +678,7 @@ static void ntfs_put_super(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; -#ifdef CONFIG_PROC_FS - // Remove /proc/fs/ntfs3/.. - if (sbi->procdir) { - remove_proc_entry("label", sbi->procdir); - remove_proc_entry("volinfo", sbi->procdir); - remove_proc_entry(sb->s_id, proc_info_root); - sbi->procdir = NULL; - } -#endif + ntfs_remove_procdir(sb); /* Mark rw ntfs as clear, if possible. */ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); @@ -1590,20 +1616,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) kfree(boot2); } -#ifdef CONFIG_PROC_FS - /* Create /proc/fs/ntfs3/.. */ - if (proc_info_root) { - struct proc_dir_entry *e = proc_mkdir(sb->s_id, proc_info_root); - static_assert((S_IRUGO | S_IWUSR) == 0644); - if (e) { - proc_create_data("volinfo", S_IRUGO, e, - &ntfs3_volinfo_fops, sb); - proc_create_data("label", S_IRUGO | S_IWUSR, e, - &ntfs3_label_fops, sb); - sbi->procdir = e; - } - } -#endif + ntfs_create_procdir(sb); if (is_legacy_ntfs(sb)) sb->s_flags |= SB_RDONLY; From c5a396295370fa99ddc0c4c4d25f8a3ee4f013d8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 11 Oct 2024 15:40:24 +0800 Subject: [PATCH 0084/2157] fs/ntfs3: Factor out ntfs_{create/remove}_proc_root() Introduce ntfs_create_proc_root()/ntfs_remove_proc_root() for create/remove "/proc/fs/ntfs3". Signed-off-by: Ye Bin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 415492fc655a..66047cf0e6e8 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -586,9 +586,24 @@ static void ntfs_remove_procdir(struct super_block *sb) remove_proc_entry(sb->s_id, proc_info_root); sbi->procdir = NULL; } + +static void ntfs_create_proc_root(void) +{ + proc_info_root = proc_mkdir("fs/ntfs3", NULL); +} + +static void ntfs_remove_proc_root(void) +{ + if (proc_info_root) { + remove_proc_entry("fs/ntfs3", NULL); + proc_info_root = NULL; + } +} #else static void ntfs_create_procdir(struct super_block *sb) {} static void ntfs_remove_procdir(struct super_block *sb) {} +static void ntfs_create_proc_root(void) {} +static void ntfs_remove_proc_root(void) {} #endif static struct kmem_cache *ntfs_inode_cachep; @@ -1866,10 +1881,7 @@ static int __init init_ntfs_fs(void) if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); -#ifdef CONFIG_PROC_FS - /* Create "/proc/fs/ntfs3" */ - proc_info_root = proc_mkdir("fs/ntfs3", NULL); -#endif + ntfs_create_proc_root(); err = ntfs3_init_bitmap(); if (err) @@ -1903,11 +1915,7 @@ static void __exit exit_ntfs_fs(void) unregister_filesystem(&ntfs_fs_type); unregister_as_ntfs_legacy(); ntfs3_exit_bitmap(); - -#ifdef CONFIG_PROC_FS - if (proc_info_root) - remove_proc_entry("fs/ntfs3", NULL); -#endif + ntfs_remove_proc_root(); } MODULE_LICENSE("GPL"); From 1d1a7e2525491f56901f5f63370a0775768044b8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 11 Oct 2024 15:40:25 +0800 Subject: [PATCH 0085/2157] fs/ntfs3: Fix 'proc_info_root' leak when init ntfs failed There's a issue as follows: proc_dir_entry 'fs/ntfs3' already registered WARNING: CPU: 3 PID: 9788 at fs/proc/generic.c:375 proc_register+0x418/0x590 Modules linked in: ntfs3(E+) Call Trace: _proc_mkdir+0x165/0x200 init_ntfs_fs+0x36/0xf90 [ntfs3] do_one_initcall+0x115/0x6c0 do_init_module+0x253/0x760 load_module+0x55f2/0x6c80 init_module_from_file+0xd2/0x130 __x64_sys_finit_module+0xbf/0x130 do_syscall_64+0x72/0x1c0 Above issue happens as missing destroy 'proc_info_root' when error happens after create 'proc_info_root' in init_ntfs_fs(). So destroy 'proc_info_root' in error path in init_ntfs_fs(). Fixes: 7832e123490a ("fs/ntfs3: Add support /proc/fs/ntfs3//volinfo and /proc/fs/ntfs3//label") Signed-off-by: Ye Bin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 66047cf0e6e8..920a1ab47b63 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1885,7 +1885,7 @@ static int __init init_ntfs_fs(void) err = ntfs3_init_bitmap(); if (err) - return err; + goto out2; ntfs_inode_cachep = kmem_cache_create( "ntfs_inode_cache", sizeof(struct ntfs_inode), 0, @@ -1905,6 +1905,8 @@ static int __init init_ntfs_fs(void) kmem_cache_destroy(ntfs_inode_cachep); out1: ntfs3_exit_bitmap(); +out2: + ntfs_remove_proc_root(); return err; } From ff355926445897cc9fdea3b00611e514232c213c Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Mon, 14 Oct 2024 20:16:38 +0800 Subject: [PATCH 0086/2157] fs/ntfs3: Fix WARNING in ntfs_extend_initialized_size Syzbot reported a WARNING in ntfs_extend_initialized_size. The data type of in->i_valid and to is u64 in ntfs_file_mmap(). If their values are greater than LLONG_MAX, overflow will occur because the data types of the parameters valid and new_valid corresponding to the function ntfs_extend_initialized_size() are loff_t. Before calling ntfs_extend_initialized_size() in the ntfs_file_mmap(), the "ni->i_valid < to" has been determined, so the same WARN_ON determination is not required in ntfs_extend_initialized_size(). Just execute the ntfs_extend_initialized_size() in ntfs_extend() to make a WARN_ON check. Reported-and-tested-by: syzbot+e37dd1dfc814b10caa55@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e37dd1dfc814b10caa55 Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index d3a31bad21f9..4d9d84cc3c6f 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -412,6 +412,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, } if (extend_init && !is_compressed(ni)) { + WARN_ON(ni->i_valid >= pos); err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); if (err) goto out; From b432163ebd15a0fb74051949cb61456d6c55ccbd Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 30 Jan 2025 17:03:41 +0300 Subject: [PATCH 0087/2157] fs/ntfs3: Update inode->i_mapping->a_ops on compression state Update inode->i_mapping->a_ops when the compression state changes to ensure correct address space operations. Clear ATTR_FLAG_SPARSED/FILE_ATTRIBUTE_SPARSE_FILE when enabling compression to prevent flag conflicts. v2: Additionally, ensure that all dirty pages are flushed and concurrent access to the page cache is blocked. Fixes: 6b39bfaeec44 ("fs/ntfs3: Add support for the compression attribute") Reported-by: Kun Hu , Jiaji Qin Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 3 ++- fs/ntfs3/file.c | 22 ++++++++++++++++++++-- fs/ntfs3/frecord.c | 6 ++++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index af94e3737470..e946f75eb540 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -2664,8 +2664,9 @@ int attr_set_compress(struct ntfs_inode *ni, bool compr) attr->nres.run_off = cpu_to_le16(run_off); } - /* Update data attribute flags. */ + /* Update attribute flags. */ if (compr) { + attr->flags &= ~ATTR_FLAG_SPARSED; attr->flags |= ATTR_FLAG_COMPRESSED; attr->nres.c_unit = NTFS_LZNT_CUNIT; } else { diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 4d9d84cc3c6f..9b6a3f8d2e7c 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -101,8 +101,26 @@ int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, /* Allowed to change compression for empty files and for directories only. */ if (!is_dedup(ni) && !is_encrypted(ni) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - /* Change compress state. */ - int err = ni_set_compress(inode, flags & FS_COMPR_FL); + int err = 0; + struct address_space *mapping = inode->i_mapping; + + /* write out all data and wait. */ + filemap_invalidate_lock(mapping); + err = filemap_write_and_wait(mapping); + + if (err >= 0) { + /* Change compress state. */ + bool compr = flags & FS_COMPR_FL; + err = ni_set_compress(inode, compr); + + /* For files change a_ops too. */ + if (!err) + mapping->a_ops = compr ? &ntfs_aops_cmpr : + &ntfs_aops; + } + + filemap_invalidate_unlock(mapping); + if (err) return err; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 5df6a0b5add9..81271196c557 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3434,10 +3434,12 @@ int ni_set_compress(struct inode *inode, bool compr) } ni->std_fa = std->fa; - if (compr) + if (compr) { + std->fa &= ~FILE_ATTRIBUTE_SPARSE_FILE; std->fa |= FILE_ATTRIBUTE_COMPRESSED; - else + } else { std->fa &= ~FILE_ATTRIBUTE_COMPRESSED; + } if (ni->std_fa != std->fa) { ni->std_fa = std->fa; From a7550ff59edfc768a8600c1e5c24c304208696a5 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:46:14 +0100 Subject: [PATCH 0088/2157] Input: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/62db561622799dfc8d58682ca41b54e3f1ff6949.1738746904.git.namcao@linutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/walkera0701.c | 3 +-- drivers/input/keyboard/gpio_keys.c | 10 ++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c index 59eea813f258..15370fb82317 100644 --- a/drivers/input/joystick/walkera0701.c +++ b/drivers/input/joystick/walkera0701.c @@ -232,8 +232,7 @@ static void walkera0701_attach(struct parport *pp) goto err_unregister_device; } - hrtimer_init(&w->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - w->timer.function = timer_handler; + hrtimer_setup(&w->timer, timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL); w->input_dev = input_allocate_device(); if (!w->input_dev) { diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 5eef66516e37..166cfc67d98b 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -590,9 +590,8 @@ static int gpio_keys_setup_key(struct platform_device *pdev, INIT_DELAYED_WORK(&bdata->work, gpio_keys_gpio_work_func); - hrtimer_init(&bdata->debounce_timer, - CLOCK_REALTIME, HRTIMER_MODE_REL); - bdata->debounce_timer.function = gpio_keys_debounce_timer; + hrtimer_setup(&bdata->debounce_timer, gpio_keys_debounce_timer, CLOCK_REALTIME, + HRTIMER_MODE_REL); isr = gpio_keys_gpio_isr; irqflags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; @@ -628,9 +627,8 @@ static int gpio_keys_setup_key(struct platform_device *pdev, } bdata->release_delay = button->debounce_interval; - hrtimer_init(&bdata->release_timer, - CLOCK_REALTIME, HRTIMER_MODE_REL_HARD); - bdata->release_timer.function = gpio_keys_irq_timer; + hrtimer_setup(&bdata->release_timer, gpio_keys_irq_timer, CLOCK_REALTIME, + HRTIMER_MODE_REL_HARD); isr = gpio_keys_irq_isr; irqflags = 0; From c1f5c148756786a9370b471735069fcfafd0b47f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Feb 2025 06:15:30 +0100 Subject: [PATCH 0089/2157] Revert "serial: Airoha SoC UART and HSUART support" This reverts commit e12ebf14fa3654f68589292e645ae1ef74786638. It causes build errors in linux-next. Cc: Benjamin Larsson Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20250206135328.4bad1401@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 15 ----- drivers/tty/serial/8250/8250_airoha.c | 81 --------------------------- drivers/tty/serial/8250/8250_of.c | 2 - drivers/tty/serial/8250/8250_port.c | 26 --------- drivers/tty/serial/8250/Kconfig | 10 ---- drivers/tty/serial/8250/Makefile | 1 - 6 files changed, 135 deletions(-) delete mode 100644 drivers/tty/serial/8250/8250_airoha.c diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 2ebfd94a5818..11e05aa014e5 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -314,21 +314,6 @@ static inline int serial8250_in_MCR(struct uart_8250_port *up) return mctrl; } -/* uart_config[] table port type defines */ -/* Airoha UART */ -#define PORT_AIROHA 124 - -/* Airoha HSUART */ -#define PORT_AIROHA_HS 125 - -#ifdef CONFIG_SERIAL_8250_AIROHA -void airoha8250_set_baud_rate(struct uart_port *port, - unsigned int baud, unsigned int hs); -#else -static inline void airoha8250_set_baud_rate(struct uart_port *port, - unsigned int baud, unsigned int hs) { } -#endif - #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); diff --git a/drivers/tty/serial/8250/8250_airoha.c b/drivers/tty/serial/8250/8250_airoha.c deleted file mode 100644 index 51e675605741..000000000000 --- a/drivers/tty/serial/8250/8250_airoha.c +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ - -/* - * Airoha UART baud rate calculation function - * - * Copyright (c) 2025 Genexis Sweden AB - * Author: Benjamin Larsson - */ - -#include "8250.h" - -/* The Airoha UART is 16550-compatible except for the baud rate calculation. */ - -/* Airoha UART registers */ -#define UART_AIROHA_BRDL 0 -#define UART_AIROHA_BRDH 1 -#define UART_AIROHA_XINCLKDR 10 -#define UART_AIROHA_XYD 11 - -#define XYD_Y 65000 -#define XINDIV_CLOCK 20000000 -#define UART_BRDL_20M 0x01 -#define UART_BRDH_20M 0x00 - -static const int clock_div_tab[] = { 10, 4, 2}; -static const int clock_div_reg[] = { 4, 2, 1}; - -/** - * airoha8250_set_baud_rate() - baud rate calculation routine - * @port: uart port - * @baud: requested uart baud rate - * @hs: uart type selector, 0 for regular uart and 1 for high-speed uart - * - * crystal_clock = 20 MHz (fixed frequency) - * xindiv_clock = crystal_clock / clock_div - * (x/y) = XYD, 32 bit register with 16 bits of x and then 16 bits of y - * clock_div = XINCLK_DIVCNT (default set to 10 (0x4)), - * - 3 bit register [ 1, 2, 4, 8, 10, 12, 16, 20 ] - * - * baud_rate = ((xindiv_clock) * (x/y)) / ([BRDH,BRDL] * 16) - * - * Selecting divider needs to fulfill - * 1.8432 MHz <= xindiv_clk <= APB clock / 2 - * The clocks are unknown but a divider of value 1 did not result in a valid - * waveform. - * - * XYD_y seems to need to be larger then XYD_x for proper waveform generation. - * Setting [BRDH,BRDL] to [0,1] and XYD_y to 65000 gives even values - * for usual baud rates. - */ - -void airoha8250_set_baud_rate(struct uart_port *port, - unsigned int baud, unsigned int hs) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned int xyd_x, nom, denom; - int i; - - /* set DLAB to access the baud rate divider registers (BRDH, BRDL) */ - serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); - /* set baud rate calculation defaults */ - /* set BRDIV ([BRDH,BRDL]) to 1 */ - serial_port_out(port, UART_AIROHA_BRDL, UART_BRDL_20M); - serial_port_out(port, UART_AIROHA_BRDH, UART_BRDH_20M); - /* calculate XYD_x and XINCLKDR register by searching - * through a table of crystal_clock divisors - * - * for the HSUART xyd_x needs to be scaled by a factor of 2 - */ - for (i = 0 ; i < ARRAY_SIZE(clock_div_tab) ; i++) { - denom = (XINDIV_CLOCK/40) / clock_div_tab[i]; - nom = baud * (XYD_Y/40); - xyd_x = ((nom/denom) << 4) >> hs; - if (xyd_x < XYD_Y) - break; - } - serial_port_out(port, UART_AIROHA_XINCLKDR, clock_div_reg[i]); - serial_port_out(port, UART_AIROHA_XYD, (xyd_x<<16) | XYD_Y); - /* unset DLAB */ - serial_port_out(port, UART_LCR, up->lcr); -} diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 5315bc1bc06d..64aed7efc569 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -341,8 +341,6 @@ static const struct of_device_id of_platform_serial_table[] = { { .compatible = "ti,da830-uart", .data = (void *)PORT_DA830, }, { .compatible = "nuvoton,wpcm450-uart", .data = (void *)PORT_NPCM, }, { .compatible = "nuvoton,npcm750-uart", .data = (void *)PORT_NPCM, }, - { .compatible = "airoha,airoha-uart", .data = (void *)PORT_AIROHA, }, - { .compatible = "airoha,airoha-hsuart", .data = (void *)PORT_AIROHA_HS, }, { /* end of list */ }, }; MODULE_DEVICE_TABLE(of, of_platform_serial_table); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 5afbc988dcc3..d7976a21cca9 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -319,24 +319,6 @@ static const struct serial8250_config uart_config[] = { .rxtrig_bytes = {1, 8, 16, 30}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, - /* From here on after additional uart config port defines are placed in 8250.h - */ - [PORT_AIROHA] = { - .name = "Airoha UART", - .fifo_size = 8, - .tx_loadsz = 1, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_CLEAR_RCVR, - .rxtrig_bytes = {1, 4}, - .flags = UART_CAP_FIFO, - }, - [PORT_AIROHA_HS] = { - .name = "Airoha HSUART", - .fifo_size = 128, - .tx_loadsz = 128, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_CLEAR_RCVR, - .rxtrig_bytes = {1, 4}, - .flags = UART_CAP_FIFO, - }, }; /* Uart divisor latch read */ @@ -2883,14 +2865,6 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, serial8250_set_divisor(port, baud, quot, frac); - /* - * Airoha SoCs have custom registers for baud rate settings - */ - if (port->type == PORT_AIROHA) - airoha8250_set_baud_rate(port, baud, 0); - if (port->type == PORT_AIROHA_HS) - airoha8250_set_baud_rate(port, baud, 1); - /* * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR * is written without DLAB set, this mode will be disabled. diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 97fe6ea9393d..55d26d16df9b 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -356,16 +356,6 @@ config SERIAL_8250_ACORN system, say Y to this option. The driver can handle 1, 2, or 3 port cards. If unsure, say N. -config SERIAL_8250_AIROHA - tristate "Airoha UART support" - depends on (ARCH_AIROHA || COMPILE_TEST) && OF && SERIAL_8250 - help - Selecting this option enables an Airoha SoC specific baud rate - calculation routine on an otherwise 16550 compatible UART hardware. - - If you have an Airoha based board and want to use the serial port, - say Y to this option. If unsure, say N. - config SERIAL_8250_BCM2835AUX tristate "BCM2835 auxiliar mini UART support" depends on ARCH_BCM2835 || COMPILE_TEST diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index b7f07d5c4cca..1516de629b61 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o obj-$(CONFIG_SERIAL_8250_ACORN) += 8250_acorn.o -obj-$(CONFIG_SERIAL_8250_AIROHA) += 8250_airoha.o obj-$(CONFIG_SERIAL_8250_ASPEED_VUART) += 8250_aspeed_vuart.o obj-$(CONFIG_SERIAL_8250_BCM2835AUX) += 8250_bcm2835aux.o obj-$(CONFIG_SERIAL_8250_BCM7271) += 8250_bcm7271.o From b6ad40c0027c6983da9b702405ad6def373354f8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Feb 2025 06:16:32 +0100 Subject: [PATCH 0090/2157] Revert "dt-bindings: serial: 8250: Add Airoha compatibles" This reverts commit ed333392bd201e71a010e588e61605a1e2dd07df. It was part of a patch series that caused build errors in linux-next. Cc: Benjamin Larsson Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20250206135328.4bad1401@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 671944d520d3..0bde2379e864 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -63,8 +63,6 @@ properties: - const: mrvl,pxa-uart - const: nuvoton,wpcm450-uart - const: nuvoton,npcm750-uart - - const: airoha,airoha-uart - - const: airoha,airoha-hsuart - const: nvidia,tegra20-uart - const: nxp,lpc3220-uart - items: From cc1eb048e7ee4f437430dd983f03116767b46c85 Mon Sep 17 00:00:00 2001 From: Peter Colberg Date: Mon, 27 Jan 2025 15:16:34 -0500 Subject: [PATCH 0091/2157] fpga: m10bmc-sec: update email address for Peter Colberg Update my email address after Altera became a subsidiary of Intel on January 1, 2025. Signed-off-by: Peter Colberg Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20250127201634.17097-1-peter.colberg@altera.com Signed-off-by: Xu Yilun --- .../ABI/testing/sysfs-driver-intel-m10-bmc | 4 ++-- .../testing/sysfs-driver-intel-m10-bmc-sec-update | 14 +++++++------- MAINTAINERS | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc index c12316dfd973..a6e400364932 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc +++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc @@ -17,7 +17,7 @@ Description: Read only. Returns the firmware version of Intel MAX10 What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address Date: January 2021 KernelVersion: 5.12 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the first MAC address in a block of sequential MAC addresses assigned to the board that is managed by the Intel MAX10 BMC. It is stored in @@ -28,7 +28,7 @@ Description: Read only. Returns the first MAC address in a block What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count Date: January 2021 KernelVersion: 5.12 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the number of sequential MAC addresses assigned to the board managed by the Intel MAX10 BMC. This value is stored in FLASH and is mirrored diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update index 9051695d2211..c69fd3894eb4 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update +++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update @@ -1,7 +1,7 @@ What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the static region if one is programmed, else it returns the string: "hash not programmed". This file is only @@ -11,7 +11,7 @@ Description: Read only. Returns the root entry hash for the static What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the partial reconfiguration region if one is programmed, else it returns the string: "hash not programmed". This file @@ -21,7 +21,7 @@ Description: Read only. Returns the root entry hash for the partial What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns the root entry hash for the BMC image if one is programmed, else it returns the string: "hash not programmed". This file is only visible if the @@ -31,7 +31,7 @@ Description: Read only. Returns the root entry hash for the BMC image What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the static region. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -39,7 +39,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the partial reconfiguration region. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -47,7 +47,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns a list of indices for canceled code signing keys for the BMC. The standard bitmap list format is used (e.g. "1,2-6,9"). @@ -55,7 +55,7 @@ Description: Read only. Returns a list of indices for canceled code What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count Date: Sep 2022 KernelVersion: 5.20 -Contact: Peter Colberg +Contact: Peter Colberg Description: Read only. Returns number of times the secure update staging area has been flashed. Format: "%u". diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..3f60ff1957b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11833,7 +11833,7 @@ F: drivers/mfd/intel-m10-bmc* F: include/linux/mfd/intel-m10-bmc.h INTEL MAX10 BMC SECURE UPDATES -M: Peter Colberg +M: Peter Colberg L: linux-fpga@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update From 35ad0d62da83b2e027e2e3c8b3b265ce6a678c5a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 3 Feb 2025 11:01:41 +0200 Subject: [PATCH 0092/2157] MAINTAINERS: Use my kernel.org address for USB4/Thunderbolt work Switch to use my kernel.org address for USB4/Thunderbolt work. Signed-off-by: Mika Westerberg Acked-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..0c98d6c71372 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23571,7 +23571,7 @@ F: drivers/thunderbolt/dma_test.c THUNDERBOLT DRIVER M: Andreas Noever M: Michael Jamet -M: Mika Westerberg +M: Mika Westerberg M: Yehezkel Bernat L: linux-usb@vger.kernel.org S: Maintained @@ -23582,7 +23582,7 @@ F: include/linux/thunderbolt.h THUNDERBOLT NETWORK DRIVER M: Michael Jamet -M: Mika Westerberg +M: Mika Westerberg M: Yehezkel Bernat L: netdev@vger.kernel.org S: Maintained From 84973331442a57bac31b40eaef6f264496c6f3fd Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:16 +0000 Subject: [PATCH 0093/2157] efi/cper, cxl: Prefix protocol error struct and function names with cxl_ Rename the protocol error struct from struct cper_sec_prot_err to struct cxl_cper_sec_prot_err and cper_print_prot_err() to cxl_cper_print_prot_err() to maintain naming consistency. No functional changes. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/firmware/efi/cper.c | 4 ++-- drivers/firmware/efi/cper_cxl.c | 3 ++- drivers/firmware/efi/cper_cxl.h | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index b69e68ef3f02..8e5762f7ef2e 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -624,11 +624,11 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata else goto err_section_too_small; } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { - struct cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); printk("%ssection_type: CXL Protocol Error\n", newpfx); if (gdata->error_data_length >= sizeof(*prot_err)) - cper_print_prot_err(newpfx, prot_err); + cxl_cper_print_prot_err(newpfx, prot_err); else goto err_section_too_small; } else { diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c index a55771b99a97..cbaabcb7382d 100644 --- a/drivers/firmware/efi/cper_cxl.c +++ b/drivers/firmware/efi/cper_cxl.c @@ -55,7 +55,8 @@ enum { USP, /* CXL Upstream Switch Port */ }; -void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err) +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err) { if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE) pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type, diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h index 86bfcf7909ec..0e3ab0ba17c3 100644 --- a/drivers/firmware/efi/cper_cxl.h +++ b/drivers/firmware/efi/cper_cxl.h @@ -18,7 +18,7 @@ #pragma pack(1) /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ -struct cper_sec_prot_err { +struct cxl_cper_sec_prot_err { u64 valid_bits; u8 agent_type; u8 reserved[7]; @@ -61,6 +61,7 @@ struct cper_sec_prot_err { #pragma pack() -void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err); +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err); #endif //__CPER_CXL_ From 958c3a6706863f9f77b21c90d2428473441cd8a1 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:17 +0000 Subject: [PATCH 0094/2157] efi/cper, cxl: Make definitions and structures global In preparation to add tracepoint support, move protocol error UUID definition to a common location, Also, make struct CXL RAS capability, cxl_cper_sec_prot_err and CPER validation flags global for use across different modules. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/firmware/efi/cper.c | 1 + drivers/firmware/efi/cper_cxl.c | 35 +-------------- drivers/firmware/efi/cper_cxl.h | 51 --------------------- include/cxl/event.h | 80 +++++++++++++++++++++++++++++++++ include/linux/cper.h | 4 ++ 5 files changed, 86 insertions(+), 85 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 8e5762f7ef2e..ae1953e2b214 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "cper_cxl.h" /* diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c index cbaabcb7382d..64c0dd27be6e 100644 --- a/drivers/firmware/efi/cper_cxl.c +++ b/drivers/firmware/efi/cper_cxl.c @@ -8,27 +8,9 @@ */ #include +#include #include "cper_cxl.h" -#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0) -#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1) -#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2) -#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3) -#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4) -#define PROT_ERR_VALID_DVSEC BIT_ULL(5) -#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) - -/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ -struct cxl_ras_capability_regs { - u32 uncor_status; - u32 uncor_mask; - u32 uncor_severity; - u32 cor_status; - u32 cor_mask; - u32 cap_control; - u32 header_log[16]; -}; - static const char * const prot_err_agent_type_strs[] = { "Restricted CXL Device", "Restricted CXL Host Downstream Port", @@ -40,21 +22,6 @@ static const char * const prot_err_agent_type_strs[] = { "CXL Upstream Switch Port", }; -/* - * The layout of the enumeration and the values matches CXL Agent Type - * field in the UEFI 2.10 Section N.2.13, - */ -enum { - RCD, /* Restricted CXL Device */ - RCH_DP, /* Restricted CXL Host Downstream Port */ - DEVICE, /* CXL Device */ - LD, /* CXL Logical Device */ - FMLD, /* CXL Fabric Manager managed Logical Device */ - RP, /* CXL Root Port */ - DSP, /* CXL Downstream Switch Port */ - USP, /* CXL Upstream Switch Port */ -}; - void cxl_cper_print_prot_err(const char *pfx, const struct cxl_cper_sec_prot_err *prot_err) { diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h index 0e3ab0ba17c3..5ce1401ee17a 100644 --- a/drivers/firmware/efi/cper_cxl.h +++ b/drivers/firmware/efi/cper_cxl.h @@ -10,57 +10,6 @@ #ifndef LINUX_CPER_CXL_H #define LINUX_CPER_CXL_H -/* CXL Protocol Error Section */ -#define CPER_SEC_CXL_PROT_ERR \ - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ - 0x4B, 0x77, 0x10, 0x48) - -#pragma pack(1) - -/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ -struct cxl_cper_sec_prot_err { - u64 valid_bits; - u8 agent_type; - u8 reserved[7]; - - /* - * Except for RCH Downstream Port, all the remaining CXL Agent - * types are uniquely identified by the PCIe compatible SBDF number. - */ - union { - u64 rcrb_base_addr; - struct { - u8 function; - u8 device; - u8 bus; - u16 segment; - u8 reserved_1[3]; - }; - } agent_addr; - - struct { - u16 vendor_id; - u16 device_id; - u16 subsystem_vendor_id; - u16 subsystem_id; - u8 class_code[2]; - u16 slot; - u8 reserved_1[4]; - } device_id; - - struct { - u32 lower_dw; - u32 upper_dw; - } dev_serial_num; - - u8 capability[60]; - u16 dvsec_len; - u16 err_len; - u8 reserved_2[4]; -}; - -#pragma pack() - void cxl_cper_print_prot_err(const char *pfx, const struct cxl_cper_sec_prot_err *prot_err); diff --git a/include/cxl/event.h b/include/cxl/event.h index 04edd44bd26f..7a6c14c05215 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -164,6 +164,86 @@ struct cxl_cper_work_data { struct cxl_cper_event_rec rec; }; +#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0) +#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1) +#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2) +#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3) +#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4) +#define PROT_ERR_VALID_DVSEC BIT_ULL(5) +#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) + +/* + * The layout of the enumeration and the values matches CXL Agent Type + * field in the UEFI 2.10 Section N.2.13, + */ +enum { + RCD, /* Restricted CXL Device */ + RCH_DP, /* Restricted CXL Host Downstream Port */ + DEVICE, /* CXL Device */ + LD, /* CXL Logical Device */ + FMLD, /* CXL Fabric Manager managed Logical Device */ + RP, /* CXL Root Port */ + DSP, /* CXL Downstream Switch Port */ + USP, /* CXL Upstream Switch Port */ +}; + +#pragma pack(1) + +/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ +struct cxl_cper_sec_prot_err { + u64 valid_bits; + u8 agent_type; + u8 reserved[7]; + + /* + * Except for RCH Downstream Port, all the remaining CXL Agent + * types are uniquely identified by the PCIe compatible SBDF number. + */ + union { + u64 rcrb_base_addr; + struct { + u8 function; + u8 device; + u8 bus; + u16 segment; + u8 reserved_1[3]; + }; + } agent_addr; + + struct { + u16 vendor_id; + u16 device_id; + u16 subsystem_vendor_id; + u16 subsystem_id; + u8 class_code[2]; + u16 slot; + u8 reserved_1[4]; + } device_id; + + struct { + u32 lower_dw; + u32 upper_dw; + } dev_serial_num; + + u8 capability[60]; + u16 dvsec_len; + u16 err_len; + u8 reserved_2[4]; +}; + +#pragma pack() + +/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ +struct cxl_ras_capability_regs { + u32 uncor_status; + u32 uncor_mask; + u32 uncor_severity; + u32 cor_status; + u32 cor_mask; + u32 cap_control; + u32 header_log[16]; +}; + #ifdef CONFIG_ACPI_APEI_GHES int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); diff --git a/include/linux/cper.h b/include/linux/cper.h index 265b0f8fc0b3..5c6d4d5b9975 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -89,6 +89,10 @@ enum { #define CPER_NOTIFY_DMAR \ GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Protocol Error Section */ +#define CPER_SEC_CXL_PROT_ERR \ + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ + 0x4B, 0x77, 0x10, 0x48) /* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ /* From 61eac5f7f6439e8fe99b5fb29406acb0fd7b83c6 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:18 +0000 Subject: [PATCH 0095/2157] efi/cper, cxl: Remove cper_cxl.h Move the declaration of cxl_cper_print_prot_err() to include/linux/cper.h to avoid maintaining a separate header file just for this function declaration. Remove drivers/firmware/efi/cper_cxl.h as its contents have been reorganized. No functional changes. Signed-off-by: Smita Koralahalli Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/firmware/efi/cper.c | 1 - drivers/firmware/efi/cper_cxl.c | 1 - drivers/firmware/efi/cper_cxl.h | 16 ---------------- include/linux/cper.h | 4 ++++ 4 files changed, 4 insertions(+), 18 deletions(-) delete mode 100644 drivers/firmware/efi/cper_cxl.h diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index ae1953e2b214..928409199a1a 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -25,7 +25,6 @@ #include #include #include -#include "cper_cxl.h" /* * CPER record ID need to be unique even after reboot, because record diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c index 64c0dd27be6e..8a7667faf953 100644 --- a/drivers/firmware/efi/cper_cxl.c +++ b/drivers/firmware/efi/cper_cxl.c @@ -9,7 +9,6 @@ #include #include -#include "cper_cxl.h" static const char * const prot_err_agent_type_strs[] = { "Restricted CXL Device", diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h deleted file mode 100644 index 5ce1401ee17a..000000000000 --- a/drivers/firmware/efi/cper_cxl.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * UEFI Common Platform Error Record (CPER) support for CXL Section. - * - * Copyright (C) 2022 Advanced Micro Devices, Inc. - * - * Author: Smita Koralahalli - */ - -#ifndef LINUX_CPER_CXL_H -#define LINUX_CPER_CXL_H - -void cxl_cper_print_prot_err(const char *pfx, - const struct cxl_cper_sec_prot_err *prot_err); - -#endif //__CPER_CXL_ diff --git a/include/linux/cper.h b/include/linux/cper.h index 5c6d4d5b9975..0ed60a91eca9 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -605,4 +605,8 @@ void cper_estatus_print(const char *pfx, int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); int cper_estatus_check(const struct acpi_hest_generic_status *estatus); +struct cxl_cper_sec_prot_err; +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err); + #endif From 315c2f0b53ba2645062627443a12cea73f3dad9c Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Thu, 23 Jan 2025 08:44:19 +0000 Subject: [PATCH 0096/2157] acpi/ghes, cper: Recognize and cache CXL Protocol errors Add support in GHES to detect and process CXL CPER Protocol errors, as defined in UEFI v2.10, section N.2.13. Define struct cxl_cper_prot_err_work_data to cache CXL protocol error information, including RAS capabilities and severity, for further handling. These cached CXL CPER records will later be processed by workqueues within the CXL subsystem. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Tony Luck Reviewed-by: Gregory Price Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250123084421.127697-5-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/acpi/apei/ghes.c | 54 ++++++++++++++++++++++++++++++++++++++++ include/cxl/event.h | 6 +++++ 2 files changed, 60 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b72772494655..4d725d988c43 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -674,6 +674,56 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, + int severity) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cxl_cper_prot_err_work_data wd; + u8 *dvsec_start, *cap_start; + + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { + pr_err_ratelimited("CXL CPER invalid agent type\n"); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); + return; + } + + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", + prot_err->err_len); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) + pr_warn(FW_WARN "CXL CPER no device serial number\n"); + + switch (prot_err->agent_type) { + case RCD: + case DEVICE: + case LD: + case FMLD: + case RP: + case DSP: + case USP: + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); + + dvsec_start = (u8 *)(prot_err + 1); + cap_start = dvsec_start + prot_err->dvsec_len; + + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); + wd.severity = cper_severity_to_aer(severity); + break; + default: + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", + prot_err->agent_type); + return; + } +#endif +} + /* Room for 8 entries for each of the 4 event log queues */ #define CXL_CPER_FIFO_DEPTH 32 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); @@ -777,6 +827,10 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); + + cxl_cper_post_prot_err(prot_err, gdata->error_severity); } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); diff --git a/include/cxl/event.h b/include/cxl/event.h index 7a6c14c05215..8381a07052d0 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -244,6 +244,12 @@ struct cxl_ras_capability_regs { u32 header_log[16]; }; +struct cxl_cper_prot_err_work_data { + struct cxl_cper_sec_prot_err prot_err; + struct cxl_ras_capability_regs ras_cap; + int severity; +}; + #ifdef CONFIG_ACPI_APEI_GHES int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); From c8006fbd0f4fd15fa990786ff9a097b45eef616c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 27 Jan 2025 21:58:59 +0000 Subject: [PATCH 0097/2157] bus: mhi: host: Remove unused functions mhi_device_get() and mhi_queue_dma() haven't been used since 2020's commit 189ff97cca53 ("bus: mhi: core: Add support for data transfer") added them. Remove them. Note that mhi_queue_dma_sync() is used and has been left. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20250127215859.261105-1-linux@treblig.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/main.c | 19 ------------------- drivers/bus/mhi/host/pm.c | 14 -------------- include/linux/mhi.h | 18 ------------------ 3 files changed, 51 deletions(-) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index 4de75674f193..4c91ffd6ed0e 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -1181,25 +1181,6 @@ int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, } EXPORT_SYMBOL_GPL(mhi_queue_skb); -int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir, - struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags) -{ - struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan : - mhi_dev->dl_chan; - struct mhi_buf_info buf_info = { }; - - buf_info.p_addr = mhi_buf->dma_addr; - buf_info.cb_buf = mhi_buf; - buf_info.pre_mapped = true; - buf_info.len = len; - - if (unlikely(mhi_chan->pre_alloc)) - return -EINVAL; - - return mhi_queue(mhi_dev, &buf_info, dir, mflags); -} -EXPORT_SYMBOL_GPL(mhi_queue_dma); - int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, struct mhi_buf_info *info, enum mhi_flags flags) { diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c index 11c0e751f223..2fb27e6f8f88 100644 --- a/drivers/bus/mhi/host/pm.c +++ b/drivers/bus/mhi/host/pm.c @@ -1296,20 +1296,6 @@ int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl) } EXPORT_SYMBOL_GPL(mhi_force_rddm_mode); -void mhi_device_get(struct mhi_device *mhi_dev) -{ - struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; - - mhi_dev->dev_wake++; - read_lock_bh(&mhi_cntrl->pm_lock); - if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state)) - mhi_trigger_resume(mhi_cntrl); - - mhi_cntrl->wake_get(mhi_cntrl, true); - read_unlock_bh(&mhi_cntrl->pm_lock); -} -EXPORT_SYMBOL_GPL(mhi_device_get); - int mhi_device_get_sync(struct mhi_device *mhi_dev) { struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 059dc94d20bb..dd372b0123a6 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -720,12 +720,6 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl); */ void mhi_soc_reset(struct mhi_controller *mhi_cntrl); -/** - * mhi_device_get - Disable device low power mode - * @mhi_dev: Device associated with the channel - */ -void mhi_device_get(struct mhi_device *mhi_dev); - /** * mhi_device_get_sync - Disable device low power mode. Synchronously * take the controller out of suspended state @@ -776,18 +770,6 @@ int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev); */ void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev); -/** - * mhi_queue_dma - Send or receive DMA mapped buffers from client device - * over MHI channel - * @mhi_dev: Device associated with the channels - * @dir: DMA direction for the channel - * @mhi_buf: Buffer for holding the DMA mapped data - * @len: Buffer length - * @mflags: MHI transfer flags used for the transfer - */ -int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir, - struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags); - /** * mhi_queue_buf - Send or receive raw buffers from client device over MHI * channel From 1c7c7388e6c31f46b26a884d80b45efbad8237b2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 6 Feb 2025 21:46:24 -0600 Subject: [PATCH 0098/2157] tools/power turbostat: Clustered Uncore MHz counters should honor show/hide options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clustered uncore frequency counters, UMHz*.* should honor the --show and --hide options. All non-specified counters should be implicityly hidden. But when --show was used, UMHz*.* showed up anyway: $ sudo turbostat -q -S --show Busy% Busy%  UMHz0.0  UMHz1.0  UMHz2.0  UMHz3.0  UMHz4.0 Indeed, there was no string that can be used to explicitly show or hide clustered uncore counters. Even through they are dynamically probed and added, group the clustered UMHz*.* counters with the legacy built-in-counter "UncMHz" for show/hide. turbostat --show Busy% does not show UMHz*.*. turbostat --show UncMHz shows either UncMHz or UMHz*.*, if present turbostat --hide UncMHz hides either UncMHz or UMHz*.*, if present Reported-by: Artem Bityutskiy Signed-off-by: Len Brown Tested-by: Artem Bityutskiy --- tools/power/x86/turbostat/turbostat.8 | 1 + tools/power/x86/turbostat/turbostat.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 99bf905ade81..ed258f248152 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -199,6 +199,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8d5011a0bf60..5eef95956c2f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6703,7 +6703,18 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + /* + * Once add_couter() is called, that counter is always read + * and reported -- So it is effectively (enabled & present). + * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) + * is (enabled). Since we are in this routine, we + * know we will not probe and set (present) the legacy counter. + * + * This allows "--show/--hide UncMHz" to be effective for + * the clustered MHz counters, as a group. + */ + if BIC_IS_ENABLED(BIC_UNCORE_MHZ) + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; From 8ab67b37b81dfaa00a25e95a5f5a020f374848bb Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:13 +0100 Subject: [PATCH 0099/2157] iio: dac: adi-axi-dac: add bus mode setup The ad354xr requires DSPI mode (2 data lanes) to work in buffering mode, so, depending on the DAC type, target TRANSFER_REGISTER "MULTI_IO_MODE" bitfield can be set between: SPI (configuration, entire ad35xxr family), DSPI (ad354xr), QSPI (ad355xr). Also bus IO_MODE must be set accordingly. About removal of AXI_DAC_CUSTOM_CTRL_SYNCED_TRANSFER, according to the HDL history the flag has never been used. So looks like the driver was including it by mistake or in anticipation for something that was never implemented on HDL side. Current HDL updated documentation confirm it is actually not in use anymore and replaced by the IO_MODE bits. Reviewed-by: Nuno Sa Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-4-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-hs.h | 8 ++++++++ drivers/iio/dac/adi-axi-dac.c | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad3552r-hs.h b/drivers/iio/dac/ad3552r-hs.h index 724261d38dea..4a9e35234124 100644 --- a/drivers/iio/dac/ad3552r-hs.h +++ b/drivers/iio/dac/ad3552r-hs.h @@ -8,11 +8,19 @@ struct iio_backend; +enum ad3552r_io_mode { + AD3552R_IO_MODE_SPI, + AD3552R_IO_MODE_DSPI, + AD3552R_IO_MODE_QSPI, +}; + struct ad3552r_hs_platform_data { int (*bus_reg_read)(struct iio_backend *back, u32 reg, u32 *val, size_t data_size); int (*bus_reg_write)(struct iio_backend *back, u32 reg, u32 val, size_t data_size); + int (*bus_set_io_mode)(struct iio_backend *back, + enum ad3552r_io_mode mode); u32 bus_sample_data_clock_hz; }; diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index ac871deb8063..bcaf365feef4 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -64,7 +64,7 @@ #define AXI_DAC_UI_STATUS_IF_BUSY BIT(4) #define AXI_DAC_CUSTOM_CTRL_REG 0x008C #define AXI_DAC_CUSTOM_CTRL_ADDRESS GENMASK(31, 24) -#define AXI_DAC_CUSTOM_CTRL_SYNCED_TRANSFER BIT(2) +#define AXI_DAC_CUSTOM_CTRL_MULTI_IO_MODE GENMASK(3, 2) #define AXI_DAC_CUSTOM_CTRL_STREAM BIT(1) #define AXI_DAC_CUSTOM_CTRL_TRANSFER_DATA BIT(0) @@ -722,6 +722,25 @@ static int axi_dac_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val, return regmap_read(st->regmap, AXI_DAC_CUSTOM_RD_REG, val); } +static int axi_dac_bus_set_io_mode(struct iio_backend *back, + enum ad3552r_io_mode mode) +{ + struct axi_dac_state *st = iio_backend_get_priv(back); + int ival, ret; + + guard(mutex)(&st->lock); + + ret = regmap_update_bits(st->regmap, AXI_DAC_CUSTOM_CTRL_REG, + AXI_DAC_CUSTOM_CTRL_MULTI_IO_MODE, + FIELD_PREP(AXI_DAC_CUSTOM_CTRL_MULTI_IO_MODE, mode)); + if (ret) + return ret; + + return regmap_read_poll_timeout(st->regmap, AXI_DAC_UI_STATUS_REG, ival, + FIELD_GET(AXI_DAC_UI_STATUS_IF_BUSY, ival) == 0, 10, + 100 * KILO); +} + static void axi_dac_child_remove(void *data) { platform_device_unregister(data); @@ -733,6 +752,7 @@ static int axi_dac_create_platform_device(struct axi_dac_state *st, struct ad3552r_hs_platform_data pdata = { .bus_reg_read = axi_dac_bus_reg_read, .bus_reg_write = axi_dac_bus_reg_write, + .bus_set_io_mode = axi_dac_bus_set_io_mode, .bus_sample_data_clock_hz = st->dac_clk_rate, }; struct platform_device_info pi = { From 96873eeaa7959a4b23a4d2b771b8b67cf8a96caf Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:14 +0100 Subject: [PATCH 0100/2157] iio: dac: ad3552r-hs: fix message on wrong chip id Set a better info message on wrong chip id, fixing the expected value as read from the info struct. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-5-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-hs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index 8974df625670..6bf995b50395 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -326,8 +326,9 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) id |= val << 8; if (id != st->model_data->chip_id) - dev_info(st->dev, "Chip ID error. Expected 0x%x, Read 0x%x\n", - AD3552R_ID, id); + dev_warn(st->dev, + "chip ID mismatch, detected 0x%x but expected 0x%x\n", + id, st->model_data->chip_id); /* Clear reset error flag, see ad3552r manual, rev B table 38. */ ret = st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_ERR_STATUS, From 21889245fb538123ac9968eea0018f878b44c8c8 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:15 +0100 Subject: [PATCH 0101/2157] iio: dac: ad3552r-hs: use instruction mode for configuration Use "instruction" mode over initial configuration and all other non-streaming operations. DAC boots in streaming mode as default, and the driver is not changing this mode. Instruction r/w is still working because instruction is processed from the DAC after chip select is deasserted, this works until loop mode is 0 or greater than the instruction size. All initial operations should be more safely done in instruction mode, a mode provided for this. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-6-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-hs.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index 6bf995b50395..25ee716b57cd 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -137,13 +137,20 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) if (ret) return ret; + /* Primary region access, set streaming mode (now in SPI + SDR). */ + ret = ad3552r_qspi_update_reg_bits(st, + AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST, 0, 1); + if (ret) + return ret; + /* Inform DAC chip to switch into DDR mode */ ret = ad3552r_qspi_update_reg_bits(st, AD3552R_REG_ADDR_INTERFACE_CONFIG_D, AD3552R_MASK_SPI_CONFIG_DDR, AD3552R_MASK_SPI_CONFIG_DDR, 1); if (ret) - return ret; + goto exit_err_ddr; /* Inform DAC IP to go for DDR mode from now on */ ret = iio_backend_ddr_enable(st->back); @@ -174,6 +181,11 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) iio_backend_ddr_disable(st->back); +exit_err_ddr: + ad3552r_qspi_update_reg_bits(st, AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST, + AD3552R_MASK_SINGLE_INST, 1); + return ret; } @@ -198,6 +210,14 @@ static int ad3552r_hs_buffer_predisable(struct iio_dev *indio_dev) if (ret) return ret; + /* Back to single instruction mode, disabling loop. */ + ret = ad3552r_qspi_update_reg_bits(st, + AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST, + AD3552R_MASK_SINGLE_INST, 1); + if (ret) + return ret; + return 0; } @@ -308,6 +328,13 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) if (ret) return ret; + ret = st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST | + AD3552R_MASK_SHORT_INSTRUCTION, 1); + if (ret) + return ret; + ret = ad3552r_hs_scratch_pad_test(st); if (ret) return ret; From 67a0f04095e40a95c3cc938f49c9874ada386cb3 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:16 +0100 Subject: [PATCH 0102/2157] iio: dac: ad3552r: share model data structures Preparing for new parts to be added also in the hs driver, set model data structures in ad3552r-common.c, to be accessible from both -hs and non hs driver. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-7-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-common.c | 46 +++++++++++++++++++++++++++++--- drivers/iio/dac/ad3552r-hs.c | 8 ------ drivers/iio/dac/ad3552r.c | 36 ------------------------- drivers/iio/dac/ad3552r.h | 6 +++-- 4 files changed, 46 insertions(+), 50 deletions(-) diff --git a/drivers/iio/dac/ad3552r-common.c b/drivers/iio/dac/ad3552r-common.c index 03e0864f5084..ded90bf57baf 100644 --- a/drivers/iio/dac/ad3552r-common.c +++ b/drivers/iio/dac/ad3552r-common.c @@ -11,23 +11,21 @@ #include "ad3552r.h" -const s32 ad3552r_ch_ranges[AD3552R_MAX_RANGES][2] = { +static const s32 ad3552r_ch_ranges[AD3552R_MAX_RANGES][2] = { [AD3552R_CH_OUTPUT_RANGE_0__2P5V] = { 0, 2500 }, [AD3552R_CH_OUTPUT_RANGE_0__5V] = { 0, 5000 }, [AD3552R_CH_OUTPUT_RANGE_0__10V] = { 0, 10000 }, [AD3552R_CH_OUTPUT_RANGE_NEG_5__5V] = { -5000, 5000 }, [AD3552R_CH_OUTPUT_RANGE_NEG_10__10V] = { -10000, 10000 } }; -EXPORT_SYMBOL_NS_GPL(ad3552r_ch_ranges, "IIO_AD3552R"); -const s32 ad3542r_ch_ranges[AD3542R_MAX_RANGES][2] = { +static const s32 ad3542r_ch_ranges[AD3542R_MAX_RANGES][2] = { [AD3542R_CH_OUTPUT_RANGE_0__2P5V] = { 0, 2500 }, [AD3542R_CH_OUTPUT_RANGE_0__5V] = { 0, 5000 }, [AD3542R_CH_OUTPUT_RANGE_0__10V] = { 0, 10000 }, [AD3542R_CH_OUTPUT_RANGE_NEG_5__5V] = { -5000, 5000 }, [AD3542R_CH_OUTPUT_RANGE_NEG_2P5__7P5V] = { -2500, 7500 } }; -EXPORT_SYMBOL_NS_GPL(ad3542r_ch_ranges, "IIO_AD3552R"); /* Gain * AD3552R_GAIN_SCALE */ static const s32 gains_scaling_table[] = { @@ -37,6 +35,46 @@ static const s32 gains_scaling_table[] = { [AD3552R_CH_GAIN_SCALING_0_125] = 125 }; +const struct ad3552r_model_data ad3541r_model_data = { + .model_name = "ad3541r", + .chip_id = AD3541R_ID, + .num_hw_channels = 1, + .ranges_table = ad3542r_ch_ranges, + .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), + .requires_output_range = true, +}; +EXPORT_SYMBOL_NS_GPL(ad3541r_model_data, "IIO_AD3552R"); + +const struct ad3552r_model_data ad3542r_model_data = { + .model_name = "ad3542r", + .chip_id = AD3542R_ID, + .num_hw_channels = 2, + .ranges_table = ad3542r_ch_ranges, + .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), + .requires_output_range = true, +}; +EXPORT_SYMBOL_NS_GPL(ad3542r_model_data, "IIO_AD3552R"); + +const struct ad3552r_model_data ad3551r_model_data = { + .model_name = "ad3551r", + .chip_id = AD3551R_ID, + .num_hw_channels = 1, + .ranges_table = ad3552r_ch_ranges, + .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), + .requires_output_range = false, +}; +EXPORT_SYMBOL_NS_GPL(ad3551r_model_data, "IIO_AD3552R"); + +const struct ad3552r_model_data ad3552r_model_data = { + .model_name = "ad3552r", + .chip_id = AD3552R_ID, + .num_hw_channels = 2, + .ranges_table = ad3552r_ch_ranges, + .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), + .requires_output_range = false, +}; +EXPORT_SYMBOL_NS_GPL(ad3552r_model_data, "IIO_AD3552R"); + u16 ad3552r_calc_custom_gain(u8 p, u8 n, s16 goffs) { return FIELD_PREP(AD3552R_MASK_CH_RANGE_OVERRIDE, 1) | diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index 25ee716b57cd..98711f742c70 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -532,14 +532,6 @@ static int ad3552r_hs_probe(struct platform_device *pdev) return devm_iio_device_register(&pdev->dev, indio_dev); } -static const struct ad3552r_model_data ad3552r_model_data = { - .model_name = "ad3552r", - .chip_id = AD3552R_ID, - .num_hw_channels = 2, - .ranges_table = ad3552r_ch_ranges, - .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), -}; - static const struct of_device_id ad3552r_hs_of_id[] = { { .compatible = "adi,ad3552r", .data = &ad3552r_model_data }, { } diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index e7206af53af6..9d28e06b80c0 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -649,42 +649,6 @@ static int ad3552r_probe(struct spi_device *spi) return devm_iio_device_register(&spi->dev, indio_dev); } -static const struct ad3552r_model_data ad3541r_model_data = { - .model_name = "ad3541r", - .chip_id = AD3541R_ID, - .num_hw_channels = 1, - .ranges_table = ad3542r_ch_ranges, - .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), - .requires_output_range = true, -}; - -static const struct ad3552r_model_data ad3542r_model_data = { - .model_name = "ad3542r", - .chip_id = AD3542R_ID, - .num_hw_channels = 2, - .ranges_table = ad3542r_ch_ranges, - .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), - .requires_output_range = true, -}; - -static const struct ad3552r_model_data ad3551r_model_data = { - .model_name = "ad3551r", - .chip_id = AD3551R_ID, - .num_hw_channels = 1, - .ranges_table = ad3552r_ch_ranges, - .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), - .requires_output_range = false, -}; - -static const struct ad3552r_model_data ad3552r_model_data = { - .model_name = "ad3552r", - .chip_id = AD3552R_ID, - .num_hw_channels = 2, - .ranges_table = ad3552r_ch_ranges, - .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), - .requires_output_range = false, -}; - static const struct spi_device_id ad3552r_id[] = { { .name = "ad3541r", diff --git a/drivers/iio/dac/ad3552r.h b/drivers/iio/dac/ad3552r.h index 4b5581039ae9..3dc8d1d9c0f9 100644 --- a/drivers/iio/dac/ad3552r.h +++ b/drivers/iio/dac/ad3552r.h @@ -134,8 +134,10 @@ #define AD3542R_MAX_RANGES 5 #define AD3552R_QUAD_SPI 2 -extern const s32 ad3552r_ch_ranges[AD3552R_MAX_RANGES][2]; -extern const s32 ad3542r_ch_ranges[AD3542R_MAX_RANGES][2]; +extern const struct ad3552r_model_data ad3541r_model_data; +extern const struct ad3552r_model_data ad3542r_model_data; +extern const struct ad3552r_model_data ad3551r_model_data; +extern const struct ad3552r_model_data ad3552r_model_data; enum ad3552r_id { AD3541R_ID = 0x400b, From 350d1ebfce826df4038a1d12f574e9738e9f6018 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:17 +0100 Subject: [PATCH 0103/2157] iio: dac: ad3552r-hs: add ad3541/2r support A new FPGA HDL has been developed from ADI to support ad354xr devices. Add support for ad3541r and ad3542r with following additions: - use common device_info structures for hs and non hs drivers, - DMA buffering, use DSPI mode for ad354xr and QSPI for ad355xr, - change sample rate to respect number of lanes. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-8-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-common.c | 4 + drivers/iio/dac/ad3552r-hs.c | 260 +++++++++++++++++++++++++------ drivers/iio/dac/ad3552r.h | 3 + 3 files changed, 216 insertions(+), 51 deletions(-) diff --git a/drivers/iio/dac/ad3552r-common.c b/drivers/iio/dac/ad3552r-common.c index ded90bf57baf..b8807e54fa05 100644 --- a/drivers/iio/dac/ad3552r-common.c +++ b/drivers/iio/dac/ad3552r-common.c @@ -42,6 +42,7 @@ const struct ad3552r_model_data ad3541r_model_data = { .ranges_table = ad3542r_ch_ranges, .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), .requires_output_range = true, + .num_spi_data_lanes = 2, }; EXPORT_SYMBOL_NS_GPL(ad3541r_model_data, "IIO_AD3552R"); @@ -52,6 +53,7 @@ const struct ad3552r_model_data ad3542r_model_data = { .ranges_table = ad3542r_ch_ranges, .num_ranges = ARRAY_SIZE(ad3542r_ch_ranges), .requires_output_range = true, + .num_spi_data_lanes = 2, }; EXPORT_SYMBOL_NS_GPL(ad3542r_model_data, "IIO_AD3552R"); @@ -62,6 +64,7 @@ const struct ad3552r_model_data ad3551r_model_data = { .ranges_table = ad3552r_ch_ranges, .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), .requires_output_range = false, + .num_spi_data_lanes = 4, }; EXPORT_SYMBOL_NS_GPL(ad3551r_model_data, "IIO_AD3552R"); @@ -72,6 +75,7 @@ const struct ad3552r_model_data ad3552r_model_data = { .ranges_table = ad3552r_ch_ranges, .num_ranges = ARRAY_SIZE(ad3552r_ch_ranges), .requires_output_range = false, + .num_spi_data_lanes = 4, }; EXPORT_SYMBOL_NS_GPL(ad3552r_model_data, "IIO_AD3552R"); diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index 98711f742c70..e8e309046f11 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -19,6 +19,31 @@ #include "ad3552r.h" #include "ad3552r-hs.h" +/* + * Important notes for register map access: + * ======================================== + * + * Register address space is divided in 2 regions, primary (config) and + * secondary (DAC). Primary region can only be accessed in simple SPI mode, + * with exception for ad355x models where setting QSPI pin high allows QSPI + * access to both the regions. + * + * Due to the fact that ad3541/2r do not implement QSPI, for proper device + * detection, HDL keeps "QSPI" pin level low at boot (see ad3552r manual, rev B + * table 7, pin 31, digital input). For this reason, actually the working mode + * between SPI, DSPI and QSPI must be set via software, configuring the target + * DAC appropriately, together with the backend API to configure the bus mode + * accordingly. + * + * Also, important to note that none of the three modes allow to read in DDR. + * + * In non-buffering operations, mode is set to simple SPI SDR for all primary + * and secondary region r/w accesses, to avoid to switch the mode each time DAC + * register is accessed (raw accesses, r/w), and to be able to dump registers + * content (possible as non DDR only). + * In buffering mode, driver sets best possible mode, D/QSPI and DDR. + */ + struct ad3552r_hs_state { const struct ad3552r_model_data *model_data; struct gpio_desc *reset_gpio; @@ -27,8 +52,19 @@ struct ad3552r_hs_state { bool single_channel; struct ad3552r_ch_data ch_data[AD3552R_MAX_CH]; struct ad3552r_hs_platform_data *data; + /* INTERFACE_CONFIG_D register cache, in DDR we cannot read values. */ + u32 config_d; }; +static int ad3552r_hs_reg_read(struct ad3552r_hs_state *st, u32 reg, u32 *val, + size_t xfer_size) +{ + /* No chip in the family supports DDR read. Informing of this. */ + WARN_ON_ONCE(st->config_d & AD3552R_MASK_SPI_CONFIG_DDR); + + return st->data->bus_reg_read(st->back, reg, val, xfer_size); +} + static int ad3552r_qspi_update_reg_bits(struct ad3552r_hs_state *st, u32 reg, u32 mask, u32 val, size_t xfer_size) @@ -36,7 +72,7 @@ static int ad3552r_qspi_update_reg_bits(struct ad3552r_hs_state *st, u32 rval; int ret; - ret = st->data->bus_reg_read(st->back, reg, &rval, xfer_size); + ret = ad3552r_hs_reg_read(st, reg, &rval, xfer_size); if (ret) return ret; @@ -56,16 +92,20 @@ static int ad3552r_hs_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: /* - * Using 4 lanes (QSPI), then using 2 as DDR mode is - * considered always on (considering buffering mode always). + * Using a "num_spi_data_lanes" variable since ad3541/2 have + * only DSPI interface, while ad355x is QSPI. Then using 2 as + * DDR mode is considered always on (considering buffering + * mode always). */ *val = DIV_ROUND_CLOSEST(st->data->bus_sample_data_clock_hz * - 4 * 2, chan->scan_type.realbits); + st->model_data->num_spi_data_lanes * 2, + chan->scan_type.realbits); return IIO_VAL_INT; case IIO_CHAN_INFO_RAW: - ret = st->data->bus_reg_read(st->back, + /* For RAW accesses, stay always in simple-spi. */ + ret = ad3552r_hs_reg_read(st, AD3552R_REG_ADDR_CH_DAC_16B(chan->channel), val, 2); if (ret) @@ -93,6 +133,7 @@ static int ad3552r_hs_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: + /* For RAW accesses, stay always in simple-spi. */ iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { return st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_CH_DAC_16B(chan->channel), @@ -104,6 +145,42 @@ static int ad3552r_hs_write_raw(struct iio_dev *indio_dev, } } +static int ad3552r_hs_set_bus_io_mode_hs(struct ad3552r_hs_state *st) +{ + int bus_mode; + + if (st->model_data->num_spi_data_lanes == 4) + bus_mode = AD3552R_IO_MODE_QSPI; + else + bus_mode = AD3552R_IO_MODE_DSPI; + + return st->data->bus_set_io_mode(st->back, bus_mode); +} + +static int ad3552r_hs_set_target_io_mode_hs(struct ad3552r_hs_state *st) +{ + u32 mode_target; + + /* + * Best access for secondary reg area, QSPI where possible, + * else as DSPI. + */ + if (st->model_data->num_spi_data_lanes == 4) + mode_target = AD3552R_QUAD_SPI; + else + mode_target = AD3552R_DUAL_SPI; + + /* + * Better to not use update here, since generally it is already + * set as DDR mode, and it's not possible to read in DDR mode. + */ + return st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_TRANSFER_REGISTER, + FIELD_PREP(AD3552R_MASK_MULTI_IO_MODE, + mode_target) | + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, 1); +} + static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) { struct ad3552r_hs_state *st = iio_priv(indio_dev); @@ -132,10 +209,10 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) return -EINVAL; } - ret = st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_STREAM_MODE, - loop_len, 1); - if (ret) - return ret; + /* + * With ad3541/2r support, QSPI pin is held low at reset from HDL, + * streaming start sequence must respect strictly the order below. + */ /* Primary region access, set streaming mode (now in SPI + SDR). */ ret = ad3552r_qspi_update_reg_bits(st, @@ -144,47 +221,98 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) if (ret) return ret; - /* Inform DAC chip to switch into DDR mode */ + /* + * Set target loop len, keeping the value: streaming writes at address + * 0x2c or 0x2a, in descending loop (2 or 4 bytes), keeping loop len + * value so that it's not cleared hereafter when _CS is deasserted. + */ ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_D, - AD3552R_MASK_SPI_CONFIG_DDR, - AD3552R_MASK_SPI_CONFIG_DDR, 1); + AD3552R_REG_ADDR_TRANSFER_REGISTER, + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, 1); if (ret) - goto exit_err_ddr; + goto exit_err_streaming; + + ret = st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_STREAM_MODE, + loop_len, 1); + if (ret) + goto exit_err_streaming; + + st->config_d |= AD3552R_MASK_SPI_CONFIG_DDR; + ret = st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_INTERFACE_CONFIG_D, + st->config_d, 1); + if (ret) + goto exit_err_streaming; - /* Inform DAC IP to go for DDR mode from now on */ ret = iio_backend_ddr_enable(st->back); - if (ret) { - dev_err(st->dev, "could not set DDR mode, not streaming"); - goto exit_err; - } + if (ret) + goto exit_err_ddr_mode_target; + /* + * From here onward mode is DDR, so reading any register is not possible + * anymore, including calling "ad3552r_qspi_update_reg_bits" function. + */ + + /* Set target to best high speed mode (D or QSPI). */ + ret = ad3552r_hs_set_target_io_mode_hs(st); + if (ret) + goto exit_err_ddr_mode; + + /* Set bus to best high speed mode (D or QSPI). */ + ret = ad3552r_hs_set_bus_io_mode_hs(st); + if (ret) + goto exit_err_bus_mode_target; + + /* + * Backend setup must be done now only, or related register values will + * be disrupted by previous bus accesses. + */ ret = iio_backend_data_transfer_addr(st->back, val); if (ret) - goto exit_err; + goto exit_err_bus_mode_target; ret = iio_backend_data_format_set(st->back, 0, &fmt); if (ret) - goto exit_err; + goto exit_err_bus_mode_target; ret = iio_backend_data_stream_enable(st->back); if (ret) - goto exit_err; + goto exit_err_bus_mode_target; return 0; -exit_err: - ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_D, - AD3552R_MASK_SPI_CONFIG_DDR, - 0, 1); +exit_err_bus_mode_target: + /* Back to simple SPI, not using update to avoid read. */ + st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_TRANSFER_REGISTER, + FIELD_PREP(AD3552R_MASK_MULTI_IO_MODE, + AD3552R_SPI) | + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, 1); + /* + * Back bus to simple SPI, this must be executed together with above + * target mode unwind, and can be done only after it. + */ + st->data->bus_set_io_mode(st->back, AD3552R_IO_MODE_SPI); + +exit_err_ddr_mode: iio_backend_ddr_disable(st->back); -exit_err_ddr: - ad3552r_qspi_update_reg_bits(st, AD3552R_REG_ADDR_INTERFACE_CONFIG_B, - AD3552R_MASK_SINGLE_INST, - AD3552R_MASK_SINGLE_INST, 1); +exit_err_ddr_mode_target: + /* + * Back to SDR. In DDR we cannot read, whatever the mode is, so not + * using update. + */ + st->config_d &= ~AD3552R_MASK_SPI_CONFIG_DDR; + st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_INTERFACE_CONFIG_D, + st->config_d, 1); + +exit_err_streaming: + /* Back to single instruction mode, disabling loop. */ + st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST | + AD3552R_MASK_SHORT_INSTRUCTION, 1); return ret; } @@ -198,11 +326,22 @@ static int ad3552r_hs_buffer_predisable(struct iio_dev *indio_dev) if (ret) return ret; - /* Inform DAC to set in SDR mode */ - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_D, - AD3552R_MASK_SPI_CONFIG_DDR, - 0, 1); + /* + * Set us to simple SPI, even if still in ddr, so to be able to write + * in primary region. + */ + ret = st->data->bus_set_io_mode(st->back, AD3552R_IO_MODE_SPI); + if (ret) + return ret; + + /* + * Back to SDR (in DDR we cannot read, whatever the mode is, so not + * using update). + */ + st->config_d &= ~AD3552R_MASK_SPI_CONFIG_DDR; + ret = st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_INTERFACE_CONFIG_D, + st->config_d, 1); if (ret) return ret; @@ -210,6 +349,17 @@ static int ad3552r_hs_buffer_predisable(struct iio_dev *indio_dev) if (ret) return ret; + /* + * Back to simple SPI for secondary region too now, so to be able to + * dump/read registers there too if needed. + */ + ret = ad3552r_qspi_update_reg_bits(st, + AD3552R_REG_ADDR_TRANSFER_REGISTER, + AD3552R_MASK_MULTI_IO_MODE, + AD3552R_SPI, 1); + if (ret) + return ret; + /* Back to single instruction mode, disabling loop. */ ret = ad3552r_qspi_update_reg_bits(st, AD3552R_REG_ADDR_INTERFACE_CONFIG_B, @@ -324,6 +474,7 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) if (ret) return ret; + /* HDL starts with DDR enabled, disabling it. */ ret = iio_backend_ddr_disable(st->back); if (ret) return ret; @@ -339,15 +490,23 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) if (ret) return ret; - ret = st->data->bus_reg_read(st->back, AD3552R_REG_ADDR_PRODUCT_ID_L, - &val, 1); + /* + * Caching config_d, needed to restore it after streaming, + * and also, to detect possible DDR read, that's not allowed. + */ + ret = st->data->bus_reg_read(st->back, + AD3552R_REG_ADDR_INTERFACE_CONFIG_D, + &st->config_d, 1); + if (ret) + return ret; + + ret = ad3552r_hs_reg_read(st, AD3552R_REG_ADDR_PRODUCT_ID_L, &val, 1); if (ret) return ret; id = val; - ret = st->data->bus_reg_read(st->back, AD3552R_REG_ADDR_PRODUCT_ID_H, - &val, 1); + ret = ad3552r_hs_reg_read(st, AD3552R_REG_ADDR_PRODUCT_ID_H, &val, 1); if (ret) return ret; @@ -357,6 +516,8 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) "chip ID mismatch, detected 0x%x but expected 0x%x\n", id, st->model_data->chip_id); + dev_dbg(st->dev, "chip id %s detected", st->model_data->model_name); + /* Clear reset error flag, see ad3552r manual, rev B table 38. */ ret = st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_ERR_STATUS, AD3552R_MASK_RESET_STATUS, 1); @@ -369,14 +530,6 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) if (ret) return ret; - ret = st->data->bus_reg_write(st->back, - AD3552R_REG_ADDR_TRANSFER_REGISTER, - FIELD_PREP(AD3552R_MASK_MULTI_IO_MODE, - AD3552R_QUAD_SPI) | - AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, 1); - if (ret) - return ret; - ret = iio_backend_data_source_set(st->back, 0, IIO_BACKEND_EXTERNAL); if (ret) return ret; @@ -400,10 +553,12 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) ret = ad3552r_get_drive_strength(st->dev, &val); if (!ret) { - ret = ad3552r_qspi_update_reg_bits(st, + st->config_d |= + FIELD_PREP(AD3552R_MASK_SDO_DRIVE_STRENGTH, val); + + ret = st->data->bus_reg_write(st->back, AD3552R_REG_ADDR_INTERFACE_CONFIG_D, - AD3552R_MASK_SDO_DRIVE_STRENGTH, - val, 1); + st->config_d, 1); if (ret) return ret; } @@ -533,6 +688,9 @@ static int ad3552r_hs_probe(struct platform_device *pdev) } static const struct of_device_id ad3552r_hs_of_id[] = { + { .compatible = "adi,ad3541r", .data = &ad3541r_model_data }, + { .compatible = "adi,ad3542r", .data = &ad3542r_model_data }, + { .compatible = "adi,ad3551r", .data = &ad3551r_model_data }, { .compatible = "adi,ad3552r", .data = &ad3552r_model_data }, { } }; diff --git a/drivers/iio/dac/ad3552r.h b/drivers/iio/dac/ad3552r.h index 3dc8d1d9c0f9..768fa264d39e 100644 --- a/drivers/iio/dac/ad3552r.h +++ b/drivers/iio/dac/ad3552r.h @@ -132,6 +132,8 @@ #define AD3552R_MAX_RANGES 5 #define AD3542R_MAX_RANGES 5 +#define AD3552R_SPI 0 +#define AD3552R_DUAL_SPI 1 #define AD3552R_QUAD_SPI 2 extern const struct ad3552r_model_data ad3541r_model_data; @@ -153,6 +155,7 @@ struct ad3552r_model_data { const s32 (*ranges_table)[2]; int num_ranges; bool requires_output_range; + int num_spi_data_lanes; }; struct ad3552r_ch_data { From 1ec0d78dec8d6c4af391bac7d011ad46ae777632 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Tue, 14 Jan 2025 16:30:18 +0100 Subject: [PATCH 0104/2157] iio: dac: ad3552r-hs: update function name (non functional) Update ad3552r_qspi_update_reg_bits function name to a more generic name, since used mode can be SIMPLE/DUAL/QUAD SPI. Reviewed-by: David Lechner Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250114-wip-bl-ad3552r-axi-v0-iio-testing-carlos-v4-9-979402e33545@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-hs.c | 60 +++++++++++++++++------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index e8e309046f11..c1dae58c1975 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -65,9 +65,8 @@ static int ad3552r_hs_reg_read(struct ad3552r_hs_state *st, u32 reg, u32 *val, return st->data->bus_reg_read(st->back, reg, val, xfer_size); } -static int ad3552r_qspi_update_reg_bits(struct ad3552r_hs_state *st, - u32 reg, u32 mask, u32 val, - size_t xfer_size) +static int ad3552r_hs_update_reg_bits(struct ad3552r_hs_state *st, u32 reg, + u32 mask, u32 val, size_t xfer_size) { u32 rval; int ret; @@ -215,9 +214,9 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) */ /* Primary region access, set streaming mode (now in SPI + SDR). */ - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_B, - AD3552R_MASK_SINGLE_INST, 0, 1); + ret = ad3552r_hs_update_reg_bits(st, + AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST, 0, 1); if (ret) return ret; @@ -226,10 +225,10 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) * 0x2c or 0x2a, in descending loop (2 or 4 bytes), keeping loop len * value so that it's not cleared hereafter when _CS is deasserted. */ - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_TRANSFER_REGISTER, - AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, - AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, 1); + ret = ad3552r_hs_update_reg_bits(st, AD3552R_REG_ADDR_TRANSFER_REGISTER, + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, + AD3552R_MASK_STREAM_LENGTH_KEEP_VALUE, + 1); if (ret) goto exit_err_streaming; @@ -252,7 +251,7 @@ static int ad3552r_hs_buffer_postenable(struct iio_dev *indio_dev) /* * From here onward mode is DDR, so reading any register is not possible - * anymore, including calling "ad3552r_qspi_update_reg_bits" function. + * anymore, including calling "ad3552r_hs_update_reg_bits" function. */ /* Set target to best high speed mode (D or QSPI). */ @@ -353,18 +352,17 @@ static int ad3552r_hs_buffer_predisable(struct iio_dev *indio_dev) * Back to simple SPI for secondary region too now, so to be able to * dump/read registers there too if needed. */ - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_TRANSFER_REGISTER, - AD3552R_MASK_MULTI_IO_MODE, - AD3552R_SPI, 1); + ret = ad3552r_hs_update_reg_bits(st, AD3552R_REG_ADDR_TRANSFER_REGISTER, + AD3552R_MASK_MULTI_IO_MODE, + AD3552R_SPI, 1); if (ret) return ret; /* Back to single instruction mode, disabling loop. */ - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_B, - AD3552R_MASK_SINGLE_INST, - AD3552R_MASK_SINGLE_INST, 1); + ret = ad3552r_hs_update_reg_bits(st, + AD3552R_REG_ADDR_INTERFACE_CONFIG_B, + AD3552R_MASK_SINGLE_INST, + AD3552R_MASK_SINGLE_INST, 1); if (ret) return ret; @@ -381,10 +379,10 @@ static inline int ad3552r_hs_set_output_range(struct ad3552r_hs_state *st, else val = FIELD_PREP(AD3552R_MASK_CH1_RANGE, mode); - return ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_CH0_CH1_OUTPUT_RANGE, - AD3552R_MASK_CH_OUTPUT_RANGE_SEL(ch), - val, 1); + return ad3552r_hs_update_reg_bits(st, + AD3552R_REG_ADDR_CH0_CH1_OUTPUT_RANGE, + AD3552R_MASK_CH_OUTPUT_RANGE_SEL(ch), + val, 1); } static int ad3552r_hs_reset(struct ad3552r_hs_state *st) @@ -400,10 +398,10 @@ static int ad3552r_hs_reset(struct ad3552r_hs_state *st) fsleep(10); gpiod_set_value_cansleep(st->reset_gpio, 0); } else { - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_INTERFACE_CONFIG_A, - AD3552R_MASK_SOFTWARE_RESET, - AD3552R_MASK_SOFTWARE_RESET, 1); + ret = ad3552r_hs_update_reg_bits(st, + AD3552R_REG_ADDR_INTERFACE_CONFIG_A, + AD3552R_MASK_SOFTWARE_RESET, + AD3552R_MASK_SOFTWARE_RESET, 1); if (ret) return ret; } @@ -544,10 +542,10 @@ static int ad3552r_hs_setup(struct ad3552r_hs_state *st) val = ret; - ret = ad3552r_qspi_update_reg_bits(st, - AD3552R_REG_ADDR_SH_REFERENCE_CONFIG, - AD3552R_MASK_REFERENCE_VOLTAGE_SEL, - val, 1); + ret = ad3552r_hs_update_reg_bits(st, + AD3552R_REG_ADDR_SH_REFERENCE_CONFIG, + AD3552R_MASK_REFERENCE_VOLTAGE_SEL, + val, 1); if (ret) return ret; From 3bb415513c52563cc608c4495c6a2ab453ca09ce Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 21 Jan 2025 15:20:07 -0800 Subject: [PATCH 0105/2157] iio: cros_ec: Trace EC sensors command For debugging, add tracing for EC_CMD_MOTION_SENSE_CMD command: - decode the name of the subcommand - provide internal information for the most common sub-commands: setting range, frequency, EC probing frequency, ... - display return status. When enabled, the tracing output is similar to: /sys/kernel/debug/tracing # echo 1 > events/cros_ec/enable ; echo 1 > tracing_on ; cat trace_pipe | grep MOTIONSENSE_CMD_SENSOR_ODR SensorDeviceImp-814 [003] ..... 686.176782: cros_ec_motion_host_cmd: MOTIONSENSE_CMD_SENSOR_ODR, id: 1, data: 200000, result: 4, return: 12500 Signed-off-by: Gwendal Grignou Reviewed-by: Tzung-Bi Shih Link: https://patch.msgid.link/20250121232007.1020666-1-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/Makefile | 3 +- .../cros_ec_sensors/cros_ec_sensors_core.c | 9 ++- .../cros_ec_sensors/cros_ec_sensors_trace.c | 32 +++++++++++ .../cros_ec_sensors/cros_ec_sensors_trace.h | 56 +++++++++++++++++++ 4 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.c create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.h diff --git a/drivers/iio/common/cros_ec_sensors/Makefile b/drivers/iio/common/cros_ec_sensors/Makefile index e0a33ab66d21..c358fa0328ab 100644 --- a/drivers/iio/common/cros_ec_sensors/Makefile +++ b/drivers/iio/common/cros_ec_sensors/Makefile @@ -3,6 +3,7 @@ # Makefile for sensors seen through the ChromeOS EC sensor hub. # -obj-$(CONFIG_IIO_CROS_EC_SENSORS_CORE) += cros_ec_sensors_core.o +cros-ec-sensors-core-objs += cros_ec_sensors_core.o cros_ec_sensors_trace.o +obj-$(CONFIG_IIO_CROS_EC_SENSORS_CORE) += cros-ec-sensors-core.o obj-$(CONFIG_IIO_CROS_EC_SENSORS) += cros_ec_sensors.o obj-$(CONFIG_IIO_CROS_EC_SENSORS_LID_ANGLE) += cros_ec_lid_angle.o diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index 9fc71a73caa1..7751d6f69b12 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -23,6 +23,8 @@ #include #include +#include "cros_ec_sensors_trace.h" + /* * Hard coded to the first device to support sensor fifo. The EC has a 2048 * byte fifo and will trigger an interrupt when fifo is 2/3 full. @@ -413,6 +415,7 @@ EXPORT_SYMBOL_GPL(cros_ec_sensors_core_register); int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *state, u16 opt_length) { + struct ec_response_motion_sense *resp = (struct ec_response_motion_sense *)state->msg->data; int ret; if (opt_length) @@ -423,12 +426,12 @@ int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *state, memcpy(state->msg->data, &state->param, sizeof(state->param)); ret = cros_ec_cmd_xfer_status(state->ec, state->msg); + trace_cros_ec_motion_host_cmd(&state->param, resp, ret); if (ret < 0) return ret; - if (ret && - state->resp != (struct ec_response_motion_sense *)state->msg->data) - memcpy(state->resp, state->msg->data, ret); + if (ret && state->resp != resp) + memcpy(state->resp, resp, ret); return 0; } diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.c new file mode 100644 index 000000000000..c4db949fa775 --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Trace events for the ChromeOS Embedded Controller +// +// Copyright 2025 Google LLC. + +#define TRACE_SYMBOL(a) {a, #a} + +// Generate the list using the following script: +// sed -n 's/^.*\(MOTIONSENSE_CMD.*\) = .*,$/\tTRACE_SYMBOL(\1), \\/p' include/linux/platform_data/cros_ec_commands.h +#define MOTIONSENSE_CMDS \ + TRACE_SYMBOL(MOTIONSENSE_CMD_DUMP), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_INFO), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_EC_RATE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SENSOR_ODR), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SENSOR_RANGE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_KB_WAKE_ANGLE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_DATA), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_FIFO_INFO), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_FIFO_FLUSH), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_FIFO_READ), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_PERFORM_CALIB), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SENSOR_OFFSET), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_LIST_ACTIVITIES), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SET_ACTIVITY), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_LID_ANGLE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_FIFO_INT_ENABLE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SPOOF), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE), \ + TRACE_SYMBOL(MOTIONSENSE_CMD_SENSOR_SCALE) + +#define CREATE_TRACE_POINTS +#include "cros_ec_sensors_trace.h" diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.h b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.h new file mode 100644 index 000000000000..8956f2e8ad08 --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_trace.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Trace events for the ChromeOS Embedded Controller + * + * Copyright 2025 Google LLC. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cros_ec + +#if !defined(_CROS_EC_SENSORS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _CROS_EC_SENSORS_TRACE_H_ + +#include +#include +#include +#include + +#include + +TRACE_EVENT(cros_ec_motion_host_cmd, + TP_PROTO(struct ec_params_motion_sense *param, + struct ec_response_motion_sense *resp, + int retval), + TP_ARGS(param, resp, retval), + TP_STRUCT__entry(__field(uint8_t, cmd) + __field(uint8_t, sensor_id) + __field(uint32_t, data) + __field(int, retval) + __field(int32_t, ret) + ), + TP_fast_assign(__entry->cmd = param->cmd; + __entry->sensor_id = param->sensor_odr.sensor_num; + __entry->data = param->sensor_odr.data; + __entry->retval = retval; + __entry->ret = retval > 0 ? resp->sensor_odr.ret : -1; + ), + TP_printk("%s, id: %d, data: %u, result: %u, return: %d", + __print_symbolic(__entry->cmd, MOTIONSENSE_CMDS), + __entry->sensor_id, + __entry->data, + __entry->retval, + __entry->ret) +); + +#endif /* _CROS_EC_SENSORS_TRACE_H_ */ + +/* this part must be outside header guard */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/iio/common/cros_ec_sensors + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cros_ec_sensors_trace + +#include From 3ea0944dca9b855da474fbe08401fb82b2d9af99 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 22 Jan 2025 17:16:59 -0600 Subject: [PATCH 0106/2157] iio: dac: ad5791: fix storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IIO uses "natural" alignment so storagebits should always be a power of 2. Change storagebits to 32 since that is the natural size to store 24 bits of data. The ad5791 driver currently doesn't use this field anywhere and doesn't support buffered writes, so this does not change anything. We just don't want anyone to think that it is OK to have storagebits = 24 in other drivers. Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250122-iio-dac-ad5791-fix-storagebits-v1-1-53746e0f25cd@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5791.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 57374f78f6b8..034228a7c059 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -294,7 +294,7 @@ static const struct ad5791_chip_info _name##_chip_info = { \ .scan_type = { \ .sign = 'u', \ .realbits = (bits), \ - .storagebits = 24, \ + .storagebits = 32, \ .shift = (_shift), \ }, \ .ext_info = ad5791_ext_info, \ From cf67879bd4280f6243103e281595f9b523c61481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 20 Jan 2025 15:07:09 +0100 Subject: [PATCH 0107/2157] iio: adc: ad7124: Micro-optimize channel disabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The key objective in ad7124_disable_one() is clearing the AD7124_CHANNEL_EN_MSK bit in the channel register. However there is no advantage to keep the other bits in that register because when the channel is used next time, all fields are rewritten anyhow. So instead of using ad7124_spi_write_mask() (which is a register read plus a register write) use a simple register write clearing the complete register. Also do the same in the .disable_all() callback by using the .disable_one() callback there. Signed-off-by: Uwe Kleine-König Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250120140708.1093655-2-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 6ae27cdd3250..2fdeb3247952 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -540,6 +540,14 @@ static int ad7124_append_status(struct ad_sigma_delta *sd, bool append) return 0; } +static int ad7124_disable_one(struct ad_sigma_delta *sd, unsigned int chan) +{ + struct ad7124_state *st = container_of(sd, struct ad7124_state, sd); + + /* The relevant thing here is that AD7124_CHANNEL_EN_MSK is cleared. */ + return ad_sd_write_reg(&st->sd, AD7124_CHANNEL(chan), 2, 0); +} + static int ad7124_disable_all(struct ad_sigma_delta *sd) { struct ad7124_state *st = container_of(sd, struct ad7124_state, sd); @@ -547,7 +555,7 @@ static int ad7124_disable_all(struct ad_sigma_delta *sd) int i; for (i = 0; i < st->num_channels; i++) { - ret = ad7124_spi_write_mask(st, AD7124_CHANNEL(i), AD7124_CHANNEL_EN_MSK, 0, 2); + ret = ad7124_disable_one(sd, i); if (ret < 0) return ret; } @@ -555,13 +563,6 @@ static int ad7124_disable_all(struct ad_sigma_delta *sd) return 0; } -static int ad7124_disable_one(struct ad_sigma_delta *sd, unsigned int chan) -{ - struct ad7124_state *st = container_of(sd, struct ad7124_state, sd); - - return ad7124_spi_write_mask(st, AD7124_CHANNEL(chan), AD7124_CHANNEL_EN_MSK, 0, 2); -} - static const struct ad_sigma_delta_info ad7124_sigma_delta_info = { .set_channel = ad7124_set_channel, .append_status = ad7124_append_status, From 9c7eb1ab2eec47ad9eaf6e11ce14d3d6fd54e677 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 19 Jan 2025 18:31:58 +0100 Subject: [PATCH 0108/2157] iio: light: veml6030: extend regmap to support regfields Add support for regfields as well to simplify register operations, taking into account the different fields for the veml6030/veml7700 and veml6035. Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20250119-veml6030-scale-v2-1-6bfc4062a371@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6030.c | 95 ++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 25 deletions(-) diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c index 9b71825eea9b..8e4eb8b0c192 100644 --- a/drivers/iio/light/veml6030.c +++ b/drivers/iio/light/veml6030.c @@ -59,18 +59,31 @@ #define VEML6035_INT_CHAN BIT(3) #define VEML6035_CHAN_EN BIT(2) +/* Regfields */ +#define VEML6030_GAIN_RF REG_FIELD(VEML6030_REG_ALS_CONF, 11, 12) +#define VEML6030_IT_RF REG_FIELD(VEML6030_REG_ALS_CONF, 6, 9) + +#define VEML6035_GAIN_RF REG_FIELD(VEML6030_REG_ALS_CONF, 10, 12) + enum veml6030_scan { VEML6030_SCAN_ALS, VEML6030_SCAN_WH, VEML6030_SCAN_TIMESTAMP, }; +struct veml6030_rf { + struct regmap_field *it; + struct regmap_field *gain; +}; + struct veml603x_chip { const char *name; const int(*scale_vals)[][2]; const int num_scale_vals; const struct iio_chan_spec *channels; const int num_channels; + const struct reg_field gain_rf; + const struct reg_field it_rf; int (*hw_init)(struct iio_dev *indio_dev, struct device *dev); int (*set_info)(struct iio_dev *indio_dev); int (*set_als_gain)(struct iio_dev *indio_dev, int val, int val2); @@ -91,6 +104,7 @@ struct veml603x_chip { struct veml6030_data { struct i2c_client *client; struct regmap *regmap; + struct veml6030_rf rf; int cur_resolution; int cur_gain; int cur_integration_time; @@ -330,17 +344,17 @@ static const struct regmap_config veml6030_regmap_config = { static int veml6030_get_intgrn_tm(struct iio_dev *indio_dev, int *val, int *val2) { - int ret, reg; + int it_idx, ret; struct veml6030_data *data = iio_priv(indio_dev); - ret = regmap_read(data->regmap, VEML6030_REG_ALS_CONF, ®); + ret = regmap_field_read(data->rf.it, &it_idx); if (ret) { dev_err(&data->client->dev, "can't read als conf register %d\n", ret); return ret; } - switch ((reg >> 6) & 0xF) { + switch (it_idx) { case 0: *val2 = 100000; break; @@ -405,8 +419,7 @@ static int veml6030_set_intgrn_tm(struct iio_dev *indio_dev, return -EINVAL; } - ret = regmap_update_bits(data->regmap, VEML6030_REG_ALS_CONF, - VEML6030_ALS_IT, new_int_time); + ret = regmap_field_write(data->rf.it, new_int_time); if (ret) { dev_err(&data->client->dev, "can't update als integration time %d\n", ret); @@ -510,23 +523,22 @@ static int veml6030_set_als_gain(struct iio_dev *indio_dev, struct veml6030_data *data = iio_priv(indio_dev); if (val == 0 && val2 == 125000) { - new_gain = 0x1000; /* 0x02 << 11 */ + new_gain = 0x01; gain_idx = 3; } else if (val == 0 && val2 == 250000) { - new_gain = 0x1800; + new_gain = 0x11; gain_idx = 2; } else if (val == 1 && val2 == 0) { new_gain = 0x00; gain_idx = 1; } else if (val == 2 && val2 == 0) { - new_gain = 0x800; + new_gain = 0x01; gain_idx = 0; } else { return -EINVAL; } - ret = regmap_update_bits(data->regmap, VEML6030_REG_ALS_CONF, - VEML6030_ALS_GAIN, new_gain); + ret = regmap_field_write(data->rf.gain, new_gain); if (ret) { dev_err(&data->client->dev, "can't set als gain %d\n", ret); @@ -544,30 +556,31 @@ static int veml6035_set_als_gain(struct iio_dev *indio_dev, int val, int val2) struct veml6030_data *data = iio_priv(indio_dev); if (val == 0 && val2 == 125000) { - new_gain = VEML6035_SENS; + new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS); gain_idx = 5; } else if (val == 0 && val2 == 250000) { - new_gain = VEML6035_SENS | VEML6035_GAIN; + new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS | + VEML6035_GAIN); gain_idx = 4; } else if (val == 0 && val2 == 500000) { - new_gain = VEML6035_SENS | VEML6035_GAIN | - VEML6035_DG; + new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS | + VEML6035_GAIN | VEML6035_DG); gain_idx = 3; } else if (val == 1 && val2 == 0) { new_gain = 0x0000; gain_idx = 2; } else if (val == 2 && val2 == 0) { - new_gain = VEML6035_GAIN; + new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_GAIN); gain_idx = 1; } else if (val == 4 && val2 == 0) { - new_gain = VEML6035_GAIN | VEML6035_DG; + new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_GAIN | + VEML6035_DG); gain_idx = 0; } else { return -EINVAL; } - ret = regmap_update_bits(data->regmap, VEML6030_REG_ALS_CONF, - VEML6035_GAIN_M, new_gain); + ret = regmap_field_write(data->rf.gain, new_gain); if (ret) { dev_err(&data->client->dev, "can't set als gain %d\n", ret); return ret; @@ -581,17 +594,17 @@ static int veml6035_set_als_gain(struct iio_dev *indio_dev, int val, int val2) static int veml6030_get_als_gain(struct iio_dev *indio_dev, int *val, int *val2) { - int ret, reg; + int gain, ret; struct veml6030_data *data = iio_priv(indio_dev); - ret = regmap_read(data->regmap, VEML6030_REG_ALS_CONF, ®); + ret = regmap_field_read(data->rf.gain, &gain); if (ret) { dev_err(&data->client->dev, "can't read als conf register %d\n", ret); return ret; } - switch ((reg >> 11) & 0x03) { + switch (gain) { case 0: *val = 1; *val2 = 0; @@ -617,17 +630,17 @@ static int veml6030_get_als_gain(struct iio_dev *indio_dev, static int veml6035_get_als_gain(struct iio_dev *indio_dev, int *val, int *val2) { - int ret, reg; + int gain, ret; struct veml6030_data *data = iio_priv(indio_dev); - ret = regmap_read(data->regmap, VEML6030_REG_ALS_CONF, ®); + ret = regmap_field_read(data->rf.gain, &gain); if (ret) { dev_err(&data->client->dev, - "can't read als conf register %d\n", ret); + "can't read als conf register %d\n", ret); return ret; } - switch (FIELD_GET(VEML6035_GAIN_M, reg)) { + switch (gain) { case 0: *val = 1; *val2 = 0; @@ -990,6 +1003,27 @@ static int veml7700_set_info(struct iio_dev *indio_dev) return 0; } +static int veml6030_regfield_init(struct iio_dev *indio_dev) +{ + struct veml6030_data *data = iio_priv(indio_dev); + struct regmap *regmap = data->regmap; + struct device *dev = &data->client->dev; + struct regmap_field *rm_field; + struct veml6030_rf *rf = &data->rf; + + rm_field = devm_regmap_field_alloc(dev, regmap, data->chip->it_rf); + if (IS_ERR(rm_field)) + return PTR_ERR(rm_field); + rf->it = rm_field; + + rm_field = devm_regmap_field_alloc(dev, regmap, data->chip->gain_rf); + if (IS_ERR(rm_field)) + return PTR_ERR(rm_field); + rf->gain = rm_field; + + return 0; +} + /* * Set ALS gain to 1/8, integration time to 100 ms, PSM to mode 2, * persistence to 1 x integration time and the threshold @@ -1143,6 +1177,11 @@ static int veml6030_probe(struct i2c_client *client) if (ret < 0) return ret; + ret = veml6030_regfield_init(indio_dev); + if (ret) + return dev_err_probe(&client->dev, ret, + "failed to init regfields\n"); + ret = data->chip->hw_init(indio_dev, &client->dev); if (ret < 0) return ret; @@ -1191,6 +1230,8 @@ static const struct veml603x_chip veml6030_chip = { .num_scale_vals = ARRAY_SIZE(veml6030_scale_vals), .channels = veml6030_channels, .num_channels = ARRAY_SIZE(veml6030_channels), + .gain_rf = VEML6030_GAIN_RF, + .it_rf = VEML6030_IT_RF, .hw_init = veml6030_hw_init, .set_info = veml6030_set_info, .set_als_gain = veml6030_set_als_gain, @@ -1203,6 +1244,8 @@ static const struct veml603x_chip veml6035_chip = { .num_scale_vals = ARRAY_SIZE(veml6035_scale_vals), .channels = veml6030_channels, .num_channels = ARRAY_SIZE(veml6030_channels), + .gain_rf = VEML6035_GAIN_RF, + .it_rf = VEML6030_IT_RF, .hw_init = veml6035_hw_init, .set_info = veml6030_set_info, .set_als_gain = veml6035_set_als_gain, @@ -1215,6 +1258,8 @@ static const struct veml603x_chip veml7700_chip = { .num_scale_vals = ARRAY_SIZE(veml6030_scale_vals), .channels = veml7700_channels, .num_channels = ARRAY_SIZE(veml7700_channels), + .gain_rf = VEML6030_GAIN_RF, + .it_rf = VEML6030_IT_RF, .hw_init = veml6030_hw_init, .set_info = veml7700_set_info, .set_als_gain = veml6030_set_als_gain, From 46e28867540434f94ddb745c74466f40669bcc48 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sun, 19 Jan 2025 18:31:59 +0100 Subject: [PATCH 0109/2157] iio: light: veml6030: extend regmap to support caching The configuration registers are not volatile and are not affected by read operations (i.e. not precious), making them suitable to be cached in order to reduce the number of accesses to the device. Add support for caching (RBTREE type). Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20250119-veml6030-scale-v2-2-6bfc4062a371@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6030.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c index 8e4eb8b0c192..3afd4bb1ad53 100644 --- a/drivers/iio/light/veml6030.c +++ b/drivers/iio/light/veml6030.c @@ -333,12 +333,43 @@ static const struct iio_chan_spec veml7700_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(VEML6030_SCAN_TIMESTAMP), }; +static const struct regmap_range veml6030_readable_ranges[] = { + regmap_reg_range(VEML6030_REG_ALS_CONF, VEML6030_REG_ALS_INT), +}; + +static const struct regmap_access_table veml6030_readable_table = { + .yes_ranges = veml6030_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(veml6030_readable_ranges), +}; + +static const struct regmap_range veml6030_writable_ranges[] = { + regmap_reg_range(VEML6030_REG_ALS_CONF, VEML6030_REG_ALS_PSM), +}; + +static const struct regmap_access_table veml6030_writable_table = { + .yes_ranges = veml6030_writable_ranges, + .n_yes_ranges = ARRAY_SIZE(veml6030_writable_ranges), +}; + +static const struct regmap_range veml6030_volatile_ranges[] = { + regmap_reg_range(VEML6030_REG_ALS_DATA, VEML6030_REG_WH_DATA), +}; + +static const struct regmap_access_table veml6030_volatile_table = { + .yes_ranges = veml6030_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(veml6030_volatile_ranges), +}; + static const struct regmap_config veml6030_regmap_config = { .name = "veml6030_regmap", .reg_bits = 8, .val_bits = 16, .max_register = VEML6030_REG_ALS_INT, .val_format_endian = REGMAP_ENDIAN_LITTLE, + .rd_table = &veml6030_readable_table, + .wr_table = &veml6030_writable_table, + .volatile_table = &veml6030_volatile_table, + .cache_type = REGCACHE_RBTREE, }; static int veml6030_get_intgrn_tm(struct iio_dev *indio_dev, From 2b368419955de59600973a48192f04e1704e89d6 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Mon, 20 Jan 2025 22:56:19 +0100 Subject: [PATCH 0110/2157] dt-bindings: iio: magnetometer: add binding for Si7210 Silicon Labs Si7210 is an I2C Hall effect magnetic position and temperature sensor. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Antoni Pokusinski Link: https://patch.msgid.link/20250120215620.39766-2-apokusinski01@gmail.com Signed-off-by: Jonathan Cameron --- .../iio/magnetometer/silabs,si7210.yaml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml diff --git a/Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml b/Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml new file mode 100644 index 000000000000..d4a3f7981c36 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/magnetometer/silabs,si7210.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Si7210 magnetic position and temperature sensor + +maintainers: + - Antoni Pokusinski + +description: | + Silabs Si7210 I2C Hall effect magnetic position and temperature sensor. + https://www.silabs.com/documents/public/data-sheets/si7210-datasheet.pdf + +properties: + compatible: + const: silabs,si7210 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + vdd-supply: + description: Regulator that provides power to the sensor + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + magnetometer@30 { + compatible = "silabs,si7210"; + reg = <0x30>; + interrupt-parent = <&gpio1>; + interrupts = <4 IRQ_TYPE_EDGE_FALLING>; + vdd-supply = <&vdd_3v3_reg>; + }; + }; From 15dbaed45b77ec7f31c9ea75ca90b9ae478b7299 Mon Sep 17 00:00:00 2001 From: Antoni Pokusinski Date: Mon, 20 Jan 2025 22:56:20 +0100 Subject: [PATCH 0111/2157] iio: magnetometer: si7210: add driver for Si7210 Silicon Labs Si7210 is an I2C Hall effect magnetic position and temperature sensor. The driver supports the following functionalities: * reading the temperature measurements * reading the magnetic field measurements in a single-shot mode * choosing the magnetic field measurement scale (20 or 200 mT) Reviewed-by: Andy Shevchenko Signed-off-by: Antoni Pokusinski Link: https://patch.msgid.link/20250120215620.39766-3-apokusinski01@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/Kconfig | 11 + drivers/iio/magnetometer/Makefile | 2 + drivers/iio/magnetometer/si7210.c | 446 ++++++++++++++++++++++++++++++ 3 files changed, 459 insertions(+) create mode 100644 drivers/iio/magnetometer/si7210.c diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 7177cd1d67cb..3debf1320ad1 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -235,6 +235,17 @@ config SENSORS_RM3100_SPI To compile this driver as a module, choose M here: the module will be called rm3100-spi. +config SI7210 + tristate "SI7210 Hall effect sensor" + depends on I2C + select REGMAP_I2C + help + Say Y here to add support for the SI7210 Hall effect sensor. + + This driver can also be compiled as a module. + To compile this driver as a module, choose M here: the module + will be called si7210. + config TI_TMAG5273 tristate "TI TMAG5273 Low-Power Linear 3D Hall-Effect Sensor" depends on I2C diff --git a/drivers/iio/magnetometer/Makefile b/drivers/iio/magnetometer/Makefile index 3e4c2ecd9adf..9297723a97d8 100644 --- a/drivers/iio/magnetometer/Makefile +++ b/drivers/iio/magnetometer/Makefile @@ -31,6 +31,8 @@ obj-$(CONFIG_SENSORS_RM3100) += rm3100-core.o obj-$(CONFIG_SENSORS_RM3100_I2C) += rm3100-i2c.o obj-$(CONFIG_SENSORS_RM3100_SPI) += rm3100-spi.o +obj-$(CONFIG_SI7210) += si7210.o + obj-$(CONFIG_TI_TMAG5273) += tmag5273.o obj-$(CONFIG_YAMAHA_YAS530) += yamaha-yas530.o diff --git a/drivers/iio/magnetometer/si7210.c b/drivers/iio/magnetometer/si7210.c new file mode 100644 index 000000000000..27e3feba7a0f --- /dev/null +++ b/drivers/iio/magnetometer/si7210.c @@ -0,0 +1,446 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Silicon Labs Si7210 Hall Effect sensor driver + * + * Copyright (c) 2024 Antoni Pokusinski + * + * Datasheet: + * https://www.silabs.com/documents/public/data-sheets/si7210-datasheet.pdf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers offsets and masks */ +#define SI7210_REG_DSPSIGM 0xC1 +#define SI7210_REG_DSPSIGL 0xC2 + +#define SI7210_MASK_DSPSIGSEL GENMASK(2, 0) +#define SI7210_REG_DSPSIGSEL 0xC3 + +#define SI7210_MASK_STOP BIT(1) +#define SI7210_MASK_ONEBURST BIT(2) +#define SI7210_REG_POWER_CTRL 0xC4 + +#define SI7210_MASK_ARAUTOINC BIT(0) +#define SI7210_REG_ARAUTOINC 0xC5 + +#define SI7210_REG_A0 0xCA +#define SI7210_REG_A1 0xCB +#define SI7210_REG_A2 0xCC +#define SI7210_REG_A3 0xCE +#define SI7210_REG_A4 0xCF +#define SI7210_REG_A5 0xD0 + +#define SI7210_REG_OTP_ADDR 0xE1 +#define SI7210_REG_OTP_DATA 0xE2 + +#define SI7210_MASK_OTP_READ_EN BIT(1) +#define SI7210_REG_OTP_CTRL 0xE3 + +/* OTP data registers offsets */ +#define SI7210_OTPREG_TMP_OFF 0x1D +#define SI7210_OTPREG_TMP_GAIN 0x1E + +#define SI7210_OTPREG_A0_20 0x21 +#define SI7210_OTPREG_A1_20 0x22 +#define SI7210_OTPREG_A2_20 0x23 +#define SI7210_OTPREG_A3_20 0x24 +#define SI7210_OTPREG_A4_20 0x25 +#define SI7210_OTPREG_A5_20 0x26 + +#define SI7210_OTPREG_A0_200 0x27 +#define SI7210_OTPREG_A1_200 0x28 +#define SI7210_OTPREG_A2_200 0x29 +#define SI7210_OTPREG_A3_200 0x2A +#define SI7210_OTPREG_A4_200 0x2B +#define SI7210_OTPREG_A5_200 0x2C + +#define A_REGS_COUNT 6 + +static const unsigned int a20_otp_regs[A_REGS_COUNT] = { + SI7210_OTPREG_A0_20, SI7210_OTPREG_A1_20, SI7210_OTPREG_A2_20, + SI7210_OTPREG_A3_20, SI7210_OTPREG_A4_20, SI7210_OTPREG_A5_20, +}; + +static const unsigned int a200_otp_regs[A_REGS_COUNT] = { + SI7210_OTPREG_A0_200, SI7210_OTPREG_A1_200, SI7210_OTPREG_A2_200, + SI7210_OTPREG_A3_200, SI7210_OTPREG_A4_200, SI7210_OTPREG_A5_200, +}; + +static const struct regmap_range si7210_read_reg_ranges[] = { + regmap_reg_range(SI7210_REG_DSPSIGM, SI7210_REG_ARAUTOINC), + regmap_reg_range(SI7210_REG_A0, SI7210_REG_A2), + regmap_reg_range(SI7210_REG_A3, SI7210_REG_A5), + regmap_reg_range(SI7210_REG_OTP_ADDR, SI7210_REG_OTP_CTRL), +}; + +static const struct regmap_access_table si7210_readable_regs = { + .yes_ranges = si7210_read_reg_ranges, + .n_yes_ranges = ARRAY_SIZE(si7210_read_reg_ranges), +}; + +static const struct regmap_range si7210_write_reg_ranges[] = { + regmap_reg_range(SI7210_REG_DSPSIGSEL, SI7210_REG_ARAUTOINC), + regmap_reg_range(SI7210_REG_A0, SI7210_REG_A2), + regmap_reg_range(SI7210_REG_A3, SI7210_REG_A5), + regmap_reg_range(SI7210_REG_OTP_ADDR, SI7210_REG_OTP_CTRL), +}; + +static const struct regmap_access_table si7210_writeable_regs = { + .yes_ranges = si7210_write_reg_ranges, + .n_yes_ranges = ARRAY_SIZE(si7210_write_reg_ranges), +}; + +static const struct regmap_range si7210_volatile_reg_ranges[] = { + regmap_reg_range(SI7210_REG_DSPSIGM, SI7210_REG_DSPSIGL), + regmap_reg_range(SI7210_REG_POWER_CTRL, SI7210_REG_POWER_CTRL), +}; + +static const struct regmap_access_table si7210_volatile_regs = { + .yes_ranges = si7210_volatile_reg_ranges, + .n_yes_ranges = ARRAY_SIZE(si7210_volatile_reg_ranges), +}; + +static const struct regmap_config si7210_regmap_conf = { + .reg_bits = 8, + .val_bits = 8, + .max_register = SI7210_REG_OTP_CTRL, + + .rd_table = &si7210_readable_regs, + .wr_table = &si7210_writeable_regs, + .volatile_table = &si7210_volatile_regs, +}; + +struct si7210_data { + struct regmap *regmap; + struct i2c_client *client; + struct regulator *vdd; + struct mutex fetch_lock; /* lock for a single measurement fetch */ + s8 temp_offset; + s8 temp_gain; + s8 scale_20_a[A_REGS_COUNT]; + s8 scale_200_a[A_REGS_COUNT]; + u8 curr_scale; +}; + +static const struct iio_chan_spec si7210_channels[] = { + { + .type = IIO_MAGN, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_OFFSET), + }, { + .type = IIO_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), + }, +}; + +static int si7210_fetch_measurement(struct si7210_data *data, + struct iio_chan_spec const *chan, + u16 *buf) +{ + u8 dspsigsel = chan->type == IIO_MAGN ? 0 : 1; + int ret; + __be16 result; + + guard(mutex)(&data->fetch_lock); + + ret = regmap_update_bits(data->regmap, SI7210_REG_DSPSIGSEL, + SI7210_MASK_DSPSIGSEL, dspsigsel); + if (ret) + return ret; + + ret = regmap_update_bits(data->regmap, SI7210_REG_POWER_CTRL, + SI7210_MASK_ONEBURST | SI7210_MASK_STOP, + SI7210_MASK_ONEBURST & ~SI7210_MASK_STOP); + if (ret) + return ret; + + /* + * Read the contents of the + * registers containing the result: DSPSIGM, DSPSIGL + */ + ret = regmap_bulk_read(data->regmap, SI7210_REG_DSPSIGM, + &result, sizeof(result)); + if (ret) + return ret; + + *buf = be16_to_cpu(result); + + return 0; +} + +static int si7210_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct si7210_data *data = iio_priv(indio_dev); + long long temp; + u16 dspsig; + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = si7210_fetch_measurement(data, chan, &dspsig); + if (ret) + return ret; + + *val = dspsig & GENMASK(14, 0); + return IIO_VAL_INT; + case IIO_CHAN_INFO_SCALE: + *val = 0; + if (data->curr_scale == 20) + *val2 = 12500; + else /* data->curr_scale == 200 */ + *val2 = 125000; + return IIO_VAL_INT_PLUS_MICRO; + case IIO_CHAN_INFO_OFFSET: + *val = -16384; + return IIO_VAL_INT; + case IIO_CHAN_INFO_PROCESSED: + ret = si7210_fetch_measurement(data, chan, &dspsig); + if (ret) + return ret; + + /* temp = 32 * Dspsigm[6:0] + (Dspsigl[7:0] >> 3) */ + temp = FIELD_GET(GENMASK(14, 3), dspsig); + temp = div_s64(-383 * temp * temp, 100) + 160940 * temp - 279800000; + temp *= (1 + (data->temp_gain / 2048)); + temp += (int)(MICRO / 16) * data->temp_offset; + + ret = regulator_get_voltage(data->vdd); + if (ret < 0) + return ret; + + /* temp -= 0.222 * VDD */ + temp -= 222 * div_s64(ret, MILLI); + + *val = div_s64(temp, MILLI); + + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int si7210_set_scale(struct si7210_data *data, unsigned int scale) +{ + s8 *a_otp_values; + int ret; + + if (scale == 20) + a_otp_values = data->scale_20_a; + else if (scale == 200) + a_otp_values = data->scale_200_a; + else + return -EINVAL; + + guard(mutex)(&data->fetch_lock); + + /* Write the registers 0xCA - 0xCC */ + ret = regmap_bulk_write(data->regmap, SI7210_REG_A0, a_otp_values, 3); + if (ret) + return ret; + + /* Write the registers 0xCE - 0xD0 */ + ret = regmap_bulk_write(data->regmap, SI7210_REG_A3, &a_otp_values[3], 3); + if (ret) + return ret; + + data->curr_scale = scale; + + return 0; +} + +static int si7210_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct si7210_data *data = iio_priv(indio_dev); + unsigned int scale; + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + if (val == 0 && val2 == 12500) + scale = 20; + else if (val == 0 && val2 == 125000) + scale = 200; + else + return -EINVAL; + + return si7210_set_scale(data, scale); + default: + return -EINVAL; + } +} + +static int si7210_read_otpreg_val(struct si7210_data *data, unsigned int otpreg, u8 *val) +{ + int ret; + unsigned int otpdata; + + ret = regmap_write(data->regmap, SI7210_REG_OTP_ADDR, otpreg); + if (ret) + return ret; + + ret = regmap_update_bits(data->regmap, SI7210_REG_OTP_CTRL, + SI7210_MASK_OTP_READ_EN, SI7210_MASK_OTP_READ_EN); + if (ret) + return ret; + + ret = regmap_read(data->regmap, SI7210_REG_OTP_DATA, &otpdata); + if (ret) + return ret; + + *val = otpdata; + + return 0; +} + +/* + * According to the datasheet, the primary method to wake up a + * device is to send an empty write. However this is not feasible + * using the current API so we use the other method i.e. read a single + * byte. The device should respond with 0xFF. + */ +static int si7210_device_wake(struct si7210_data *data) +{ + int ret; + + ret = i2c_smbus_read_byte(data->client); + if (ret < 0) + return ret; + + if (ret != 0xFF) + return -EIO; + + return 0; +} + +static int si7210_device_init(struct si7210_data *data) +{ + int ret; + unsigned int i; + + ret = si7210_device_wake(data); + if (ret) + return ret; + + fsleep(1000); + + ret = si7210_read_otpreg_val(data, SI7210_OTPREG_TMP_GAIN, &data->temp_gain); + if (ret) + return ret; + + ret = si7210_read_otpreg_val(data, SI7210_OTPREG_TMP_OFF, &data->temp_offset); + if (ret) + return ret; + + for (i = 0; i < A_REGS_COUNT; i++) { + ret = si7210_read_otpreg_val(data, a20_otp_regs[i], &data->scale_20_a[i]); + if (ret) + return ret; + } + + for (i = 0; i < A_REGS_COUNT; i++) { + ret = si7210_read_otpreg_val(data, a200_otp_regs[i], &data->scale_200_a[i]); + if (ret) + return ret; + } + + ret = regmap_update_bits(data->regmap, SI7210_REG_ARAUTOINC, + SI7210_MASK_ARAUTOINC, SI7210_MASK_ARAUTOINC); + if (ret) + return ret; + + return si7210_set_scale(data, 20); +} + +static const struct iio_info si7210_info = { + .read_raw = si7210_read_raw, + .write_raw = si7210_write_raw, +}; + +static int si7210_probe(struct i2c_client *client) +{ + struct si7210_data *data; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); + if (!indio_dev) + return -ENOMEM; + + data = iio_priv(indio_dev); + data->client = client; + + ret = devm_mutex_init(&client->dev, &data->fetch_lock); + if (ret) + return ret; + + data->regmap = devm_regmap_init_i2c(client, &si7210_regmap_conf); + if (IS_ERR(data->regmap)) + return dev_err_probe(&client->dev, PTR_ERR(data->regmap), + "failed to register regmap\n"); + + data->vdd = devm_regulator_get(&client->dev, "vdd"); + if (IS_ERR(data->vdd)) + return dev_err_probe(&client->dev, PTR_ERR(data->vdd), + "failed to get VDD regulator\n"); + + ret = regulator_enable(data->vdd); + if (ret) + return ret; + + indio_dev->name = dev_name(&client->dev); + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &si7210_info; + indio_dev->channels = si7210_channels; + indio_dev->num_channels = ARRAY_SIZE(si7210_channels); + + ret = si7210_device_init(data); + if (ret) + return dev_err_probe(&client->dev, ret, + "device initialization failed\n"); + + return devm_iio_device_register(&client->dev, indio_dev); +} + +static const struct i2c_device_id si7210_id[] = { + { "si7210" }, + { } +}; +MODULE_DEVICE_TABLE(i2c, si7210_id); + +static const struct of_device_id si7210_dt_ids[] = { + { .compatible = "silabs,si7210" }, + { } +}; +MODULE_DEVICE_TABLE(of, si7210_dt_ids); + +static struct i2c_driver si7210_driver = { + .driver = { + .name = "si7210", + .of_match_table = si7210_dt_ids, + }, + .probe = si7210_probe, + .id_table = si7210_id, +}; +module_i2c_driver(si7210_driver); + +MODULE_AUTHOR("Antoni Pokusinski "); +MODULE_DESCRIPTION("Silicon Labs Si7210 Hall Effect sensor I2C driver"); +MODULE_LICENSE("GPL"); From be464661e7532124f9b5c3ece170c6cd6f38d641 Mon Sep 17 00:00:00 2001 From: Mikael Gonella-Bolduc Date: Wed, 22 Jan 2025 17:59:33 -0500 Subject: [PATCH 0112/2157] dt-bindings: iio: light: Add APDS9160 binding Add device tree bindings for APDS9160 Note: Using alternate email for maintainer Signed-off-by: Mikael Gonella-Bolduc Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250122-apds9160-driver-v5-1-5393be10279a@dimonoff.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/light/brcm,apds9160.yaml | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml diff --git a/Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml b/Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml new file mode 100644 index 000000000000..bb1cc4404a55 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/light/brcm,apds9160.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom Combined Proximity & Ambient light sensor + +maintainers: + - Mikael Gonella-Bolduc + +description: | + Datasheet: https://docs.broadcom.com/docs/APDS-9160-003-DS + +properties: + compatible: + enum: + - brcm,apds9160 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + vdd-supply: true + + ps-cancellation-duration: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Proximity sensor cancellation pulse duration in half clock cycles. + This parameter determines a cancellation pulse duration. + The cancellation is applied in the integration phase to cancel out + unwanted reflected light from very near objects such as tempered glass + in front of the sensor. + default: 0 + maximum: 63 + + ps-cancellation-current-picoamp: + description: + Proximity sensor crosstalk cancellation current in picoampere. + This parameter adjusts the current in steps of 2400 pA up to 276000 pA. + The provided value must be a multiple of 2400 and in one of these ranges + [60000 - 96000] + [120000 - 156000] + [180000 - 216000] + [240000 - 276000] + This parameter is used in conjunction with the cancellation duration. + minimum: 60000 + maximum: 276000 + multipleOf: 2400 + +required: + - compatible + - reg + - vdd-supply + +additionalProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + light-sensor@53 { + compatible = "brcm,apds9160"; + reg = <0x53>; + vdd-supply = <&vdd_reg>; + interrupts = <29 IRQ_TYPE_EDGE_FALLING>; + interrupt-parent = <&pinctrl>; + ps-cancellation-duration = <10>; + ps-cancellation-current-picoamp = <62400>; + }; + }; +... From ec08c3954689ab21fa15e0c0cdc6936480c237b7 Mon Sep 17 00:00:00 2001 From: Mikael Gonella-Bolduc Date: Wed, 22 Jan 2025 17:59:34 -0500 Subject: [PATCH 0113/2157] iio: light: Add APDS9160 ALS & Proximity sensor driver APDS9160 is a combination of ALS and proximity sensors. This patch add supports for: - Intensity clear data and illuminance data - Proximity data - Gain control, rate control - Event thresholds Signed-off-by: Mikael Gonella-Bolduc Link: https://patch.msgid.link/20250122-apds9160-driver-v5-2-5393be10279a@dimonoff.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 7 + drivers/iio/light/Kconfig | 11 + drivers/iio/light/Makefile | 1 + drivers/iio/light/apds9160.c | 1594 ++++++++++++++++++++++++++++++++++ 4 files changed, 1613 insertions(+) create mode 100644 drivers/iio/light/apds9160.c diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..311675697a82 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4427,6 +4427,13 @@ F: kernel/bpf/stackmap.c F: kernel/trace/bpf_trace.c F: lib/buildid.c +BROADCOM APDS9160 AMBIENT LIGHT SENSOR AND PROXIMITY DRIVER +M: Mikael Gonella-Bolduc +L: linux-iio@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml +F: drivers/iio/light/apds9160.c + BROADCOM ASP 2.0 ETHERNET DRIVER M: Justin Chen M: Florian Fainelli diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index e34e551eef3e..9260056f0af8 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -63,6 +63,17 @@ config AL3320A To compile this driver as a module, choose M here: the module will be called al3320a. +config APDS9160 + tristate "APDS9160 combined als and proximity sensor" + depends on I2C + select REGMAP_I2C + help + Say Y here if you want to build support for a Broadcom APDS9160 + combined ambient light and proximity sensor. + + To compile this driver as a module, choose M here: the + module will be called apds9160. + config APDS9300 tristate "APDS9300 ambient light sensor" depends on I2C diff --git a/drivers/iio/light/Makefile b/drivers/iio/light/Makefile index 11a4041b918a..30f5fe47cfb6 100644 --- a/drivers/iio/light/Makefile +++ b/drivers/iio/light/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_ADJD_S311) += adjd_s311.o obj-$(CONFIG_ADUX1020) += adux1020.o obj-$(CONFIG_AL3010) += al3010.o obj-$(CONFIG_AL3320A) += al3320a.o +obj-$(CONFIG_APDS9160) += apds9160.o obj-$(CONFIG_APDS9300) += apds9300.o obj-$(CONFIG_APDS9306) += apds9306.o obj-$(CONFIG_APDS9960) += apds9960.o diff --git a/drivers/iio/light/apds9160.c b/drivers/iio/light/apds9160.c new file mode 100644 index 000000000000..d3f415930ec9 --- /dev/null +++ b/drivers/iio/light/apds9160.c @@ -0,0 +1,1594 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * APDS9160 sensor driver. + * Chip is combined proximity and ambient light sensor. + * Author: 2024 Mikael Gonella-Bolduc + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define APDS9160_REGMAP_NAME "apds9160_regmap" + +/* Main control register */ +#define APDS9160_REG_CTRL 0x00 +#define APDS9160_CTRL_SWRESET BIT(4) /* 1: Activate reset */ +#define APDS9160_CTRL_MODE_RGB BIT(2) /* 0: ALS & IR, 1: RGB & IR */ +#define APDS9160_CTRL_EN_ALS BIT(1) /* 1: ALS active */ +#define APDS9160_CTLR_EN_PS BIT(0) /* 1: PS active */ + +/* Status register */ +#define APDS9160_SR_LS_INT BIT(4) +#define APDS9160_SR_LS_NEW_DATA BIT(3) +#define APDS9160_SR_PS_INT BIT(1) +#define APDS9160_SR_PS_NEW_DATA BIT(0) + +/* Interrupt configuration registers */ +#define APDS9160_REG_INT_CFG 0x19 +#define APDS9160_REG_INT_PST 0x1A +#define APDS9160_INT_CFG_EN_LS BIT(2) /* LS int enable */ +#define APDS9160_INT_CFG_EN_PS BIT(0) /* PS int enable */ + +/* Proximity registers */ +#define APDS9160_REG_PS_LED 0x01 +#define APDS9160_REG_PS_PULSES 0x02 +#define APDS9160_REG_PS_MEAS_RATE 0x03 +#define APDS9160_REG_PS_THRES_HI_LSB 0x1B +#define APDS9160_REG_PS_THRES_HI_MSB 0x1C +#define APDS9160_REG_PS_THRES_LO_LSB 0x1D +#define APDS9160_REG_PS_THRES_LO_MSB 0x1E +#define APDS9160_REG_PS_DATA_LSB 0x08 +#define APDS9160_REG_PS_DATA_MSB 0x09 +#define APDS9160_REG_PS_CAN_LEVEL_DIG_LSB 0x1F +#define APDS9160_REG_PS_CAN_LEVEL_DIG_MSB 0x20 +#define APDS9160_REG_PS_CAN_LEVEL_ANA_DUR 0x21 +#define APDS9160_REG_PS_CAN_LEVEL_ANA_CURRENT 0x22 + +/* Light sensor registers */ +#define APDS9160_REG_LS_MEAS_RATE 0x04 +#define APDS9160_REG_LS_GAIN 0x05 +#define APDS9160_REG_LS_DATA_CLEAR_LSB 0x0A +#define APDS9160_REG_LS_DATA_CLEAR 0x0B +#define APDS9160_REG_LS_DATA_CLEAR_MSB 0x0C +#define APDS9160_REG_LS_DATA_ALS_LSB 0x0D +#define APDS9160_REG_LS_DATA_ALS 0x0E +#define APDS9160_REG_LS_DATA_ALS_MSB 0x0F +#define APDS9160_REG_LS_THRES_UP_LSB 0x24 +#define APDS9160_REG_LS_THRES_UP 0x25 +#define APDS9160_REG_LS_THRES_UP_MSB 0x26 +#define APDS9160_REG_LS_THRES_LO_LSB 0x27 +#define APDS9160_REG_LS_THRES_LO 0x28 +#define APDS9160_REG_LS_THRES_LO_MSB 0x29 +#define APDS9160_REG_LS_THRES_VAR 0x2A + +/* Part identification number register */ +#define APDS9160_REG_ID 0x06 + +/* Status register */ +#define APDS9160_REG_SR 0x07 +#define APDS9160_SR_DATA_ALS BIT(3) +#define APDS9160_SR_DATA_PS BIT(0) + +/* Supported ID:s */ +#define APDS9160_PART_ID_0 0x03 + +#define APDS9160_PS_THRES_MAX 0x7FF +#define APDS9160_LS_THRES_MAX 0xFFFFF +#define APDS9160_CMD_LS_RESOLUTION_25MS 0x04 +#define APDS9160_CMD_LS_RESOLUTION_50MS 0x03 +#define APDS9160_CMD_LS_RESOLUTION_100MS 0x02 +#define APDS9160_CMD_LS_RESOLUTION_200MS 0x01 +#define APDS9160_PS_DATA_MASK 0x7FF + +#define APDS9160_DEFAULT_LS_GAIN 3 +#define APDS9160_DEFAULT_LS_RATE 100 +#define APDS9160_DEFAULT_PS_RATE 100 +#define APDS9160_DEFAULT_PS_CANCELLATION_LEVEL 0 +#define APDS9160_DEFAULT_PS_ANALOG_CANCELLATION 0 +#define APDS9160_DEFAULT_PS_GAIN 1 +#define APDS9160_DEFAULT_PS_CURRENT 100 +#define APDS9160_DEFAULT_PS_RESOLUTION_11BITS 0x03 + +static const struct reg_default apds9160_reg_defaults[] = { + { APDS9160_REG_CTRL, 0x00 }, /* Sensors disabled by default */ + { APDS9160_REG_PS_LED, 0x33 }, /* 60 kHz frequency, 100 mA */ + { APDS9160_REG_PS_PULSES, 0x08 }, /* 8 pulses */ + { APDS9160_REG_PS_MEAS_RATE, 0x05 }, /* 100ms */ + { APDS9160_REG_LS_MEAS_RATE, 0x22 }, /* 100ms */ + { APDS9160_REG_LS_GAIN, 0x01 }, /* 3x */ + { APDS9160_REG_INT_CFG, 0x10 }, /* Interrupts disabled */ + { APDS9160_REG_INT_PST, 0x00 }, + { APDS9160_REG_PS_THRES_HI_LSB, 0xFF }, + { APDS9160_REG_PS_THRES_HI_MSB, 0x07 }, + { APDS9160_REG_PS_THRES_LO_LSB, 0x00 }, + { APDS9160_REG_PS_THRES_LO_MSB, 0x00 }, + { APDS9160_REG_PS_CAN_LEVEL_DIG_LSB, 0x00 }, + { APDS9160_REG_PS_CAN_LEVEL_DIG_MSB, 0x00 }, + { APDS9160_REG_PS_CAN_LEVEL_ANA_DUR, 0x00 }, + { APDS9160_REG_PS_CAN_LEVEL_ANA_CURRENT, 0x00 }, + { APDS9160_REG_LS_THRES_UP_LSB, 0xFF }, + { APDS9160_REG_LS_THRES_UP, 0xFF }, + { APDS9160_REG_LS_THRES_UP_MSB, 0x0F }, + { APDS9160_REG_LS_THRES_LO_LSB, 0x00 }, + { APDS9160_REG_LS_THRES_LO, 0x00 }, + { APDS9160_REG_LS_THRES_LO_MSB, 0x00 }, + { APDS9160_REG_LS_THRES_VAR, 0x00 }, +}; + +static const struct regmap_range apds9160_readable_ranges[] = { + regmap_reg_range(APDS9160_REG_CTRL, APDS9160_REG_LS_THRES_VAR), +}; + +static const struct regmap_access_table apds9160_readable_table = { + .yes_ranges = apds9160_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(apds9160_readable_ranges), +}; + +static const struct regmap_range apds9160_writeable_ranges[] = { + regmap_reg_range(APDS9160_REG_CTRL, APDS9160_REG_LS_GAIN), + regmap_reg_range(APDS9160_REG_INT_CFG, APDS9160_REG_LS_THRES_VAR), +}; + +static const struct regmap_access_table apds9160_writeable_table = { + .yes_ranges = apds9160_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(apds9160_writeable_ranges), +}; + +static const struct regmap_range apds9160_volatile_ranges[] = { + regmap_reg_range(APDS9160_REG_SR, APDS9160_REG_LS_DATA_ALS_MSB), +}; + +static const struct regmap_access_table apds9160_volatile_table = { + .yes_ranges = apds9160_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(apds9160_volatile_ranges), +}; + +static const struct regmap_config apds9160_regmap_config = { + .name = APDS9160_REGMAP_NAME, + .reg_bits = 8, + .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + + .rd_table = &apds9160_readable_table, + .wr_table = &apds9160_writeable_table, + .volatile_table = &apds9160_volatile_table, + + .reg_defaults = apds9160_reg_defaults, + .num_reg_defaults = ARRAY_SIZE(apds9160_reg_defaults), + .max_register = 37, + .cache_type = REGCACHE_RBTREE, +}; + +static const struct iio_event_spec apds9160_event_spec[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_separate = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_separate = BIT(IIO_EV_INFO_VALUE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_separate = BIT(IIO_EV_INFO_ENABLE), + }, +}; + +static const struct iio_chan_spec apds9160_channels[] = { + { + /* Proximity sensor channel */ + .type = IIO_PROXIMITY, + .address = APDS9160_REG_PS_DATA_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_CALIBBIAS), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE), + .event_spec = apds9160_event_spec, + .num_event_specs = ARRAY_SIZE(apds9160_event_spec), + }, + { + /* Proximity sensor led current */ + .type = IIO_CURRENT, + .output = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_RAW), + }, + { + /* Illuminance */ + .type = IIO_LIGHT, + .address = APDS9160_REG_LS_DATA_ALS_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_HARDWAREGAIN) | + BIT(IIO_CHAN_INFO_SCALE), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE), + .event_spec = apds9160_event_spec, + .num_event_specs = ARRAY_SIZE(apds9160_event_spec), + }, + { + /* Clear channel */ + .type = IIO_INTENSITY, + .address = APDS9160_REG_LS_DATA_CLEAR_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .channel2 = IIO_MOD_LIGHT_CLEAR, + .modified = 1, + }, +}; + +static const struct iio_chan_spec apds9160_channels_without_events[] = { + { + /* Proximity sensor channel */ + .type = IIO_PROXIMITY, + .address = APDS9160_REG_PS_DATA_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_CALIBBIAS), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE), + }, + { + /* Proximity sensor led current */ + .type = IIO_CURRENT, + .output = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_RAW), + }, + { + /* Illuminance */ + .type = IIO_LIGHT, + .address = APDS9160_REG_LS_DATA_ALS_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_HARDWAREGAIN) | + BIT(IIO_CHAN_INFO_SCALE), + .info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME) | + BIT(IIO_CHAN_INFO_SCALE), + }, + { + /* Clear channel */ + .type = IIO_INTENSITY, + .address = APDS9160_REG_LS_DATA_CLEAR_LSB, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .channel2 = IIO_MOD_LIGHT_CLEAR, + .modified = 1, + }, +}; + +static const int apds9160_als_rate_avail[] = { + 25, 50, 100, 200 +}; + +static const int apds9160_als_rate_map[][2] = { + { 25, 0x00 }, + { 50, 0x01 }, + { 100, 0x02 }, + { 200, 0x03 }, +}; + +static const int apds9160_als_gain_map[][2] = { + { 1, 0x00 }, + { 3, 0x01 }, + { 6, 0x02 }, + { 18, 0x03 }, + { 54, 0x04 }, +}; + +static const int apds9160_ps_gain_avail[] = { + 1, 2, 4, 8 +}; + +static const int apds9160_ps_gain_map[][2] = { + { 1, 0x00 }, + { 2, 0x01 }, + { 4, 0x02 }, + { 8, 0x03 }, +}; + +static const int apds9160_ps_rate_avail[] = { + 25, 50, 100, 200, 400 +}; + +static const int apds9160_ps_rate_map[][2] = { + { 25, 0x03 }, + { 50, 0x04 }, + { 100, 0x05 }, + { 200, 0x06 }, + { 400, 0x07 }, +}; + +static const int apds9160_ps_led_current_avail[] = { + 10, 25, 50, 100, 150, 175, 200 +}; + +static const int apds9160_ps_led_current_map[][2] = { + { 10, 0x00 }, + { 25, 0x01 }, + { 50, 0x02 }, + { 100, 0x03 }, + { 150, 0x04 }, + { 175, 0x05 }, + { 200, 0x06 }, +}; + +/** + * struct apds9160_scale - apds9160 scale mapping definition + * + * @itime: Integration time in ms + * @gain: Gain multiplier + * @scale1: lux/count resolution + * @scale2: micro lux/count + */ +struct apds9160_scale { + int itime; + int gain; + int scale1; + int scale2; +}; + +/* Scale mapping extracted from datasheet */ +static const struct apds9160_scale apds9160_als_scale_map[] = { + { + .itime = 25, + .gain = 1, + .scale1 = 3, + .scale2 = 272000, + }, + { + .itime = 25, + .gain = 3, + .scale1 = 1, + .scale2 = 77000, + }, + { + .itime = 25, + .gain = 6, + .scale1 = 0, + .scale2 = 525000, + }, + { + .itime = 25, + .gain = 18, + .scale1 = 0, + .scale2 = 169000, + }, + { + .itime = 25, + .gain = 54, + .scale1 = 0, + .scale2 = 49000, + }, + { + .itime = 50, + .gain = 1, + .scale1 = 1, + .scale2 = 639000, + }, + { + .itime = 50, + .gain = 3, + .scale1 = 0, + .scale2 = 538000, + }, + { + .itime = 50, + .gain = 6, + .scale1 = 0, + .scale2 = 263000, + }, + { + .itime = 50, + .gain = 18, + .scale1 = 0, + .scale2 = 84000, + }, + { + .itime = 50, + .gain = 54, + .scale1 = 0, + .scale2 = 25000, + }, + { + .itime = 100, + .gain = 1, + .scale1 = 0, + .scale2 = 819000, + }, + { + .itime = 100, + .gain = 3, + .scale1 = 0, + .scale2 = 269000, + }, + { + .itime = 100, + .gain = 6, + .scale1 = 0, + .scale2 = 131000, + }, + { + .itime = 100, + .gain = 18, + .scale1 = 0, + .scale2 = 42000, + }, + { + .itime = 100, + .gain = 54, + .scale1 = 0, + .scale2 = 12000, + }, + { + .itime = 200, + .gain = 1, + .scale1 = 0, + .scale2 = 409000, + }, + { + .itime = 200, + .gain = 3, + .scale1 = 0, + .scale2 = 135000, + }, + { + .itime = 200, + .gain = 6, + .scale1 = 0, + .scale2 = 66000, + }, + { + .itime = 200, + .gain = 18, + .scale1 = 0, + .scale2 = 21000, + }, + { + .itime = 200, + .gain = 54, + .scale1 = 0, + .scale2 = 6000, + }, +}; + +static const int apds9160_25ms_avail[][2] = { + { 3, 272000 }, + { 1, 77000 }, + { 0, 525000 }, + { 0, 169000 }, + { 0, 49000 }, +}; + +static const int apds9160_50ms_avail[][2] = { + { 1, 639000 }, + { 0, 538000 }, + { 0, 263000 }, + { 0, 84000 }, + { 0, 25000 }, +}; + +static const int apds9160_100ms_avail[][2] = { + { 0, 819000 }, + { 0, 269000 }, + { 0, 131000 }, + { 0, 42000 }, + { 0, 12000 }, +}; + +static const int apds9160_200ms_avail[][2] = { + { 0, 409000 }, + { 0, 135000 }, + { 0, 66000 }, + { 0, 21000 }, + { 0, 6000 }, +}; + +static const struct reg_field apds9160_reg_field_ls_en = + REG_FIELD(APDS9160_REG_CTRL, 1, 1); + +static const struct reg_field apds9160_reg_field_ps_en = + REG_FIELD(APDS9160_REG_CTRL, 0, 0); + +static const struct reg_field apds9160_reg_field_int_ps = + REG_FIELD(APDS9160_REG_INT_CFG, 0, 0); + +static const struct reg_field apds9160_reg_field_int_als = + REG_FIELD(APDS9160_REG_INT_CFG, 2, 2); + +static const struct reg_field apds9160_reg_field_ps_overflow = + REG_FIELD(APDS9160_REG_PS_DATA_MSB, 3, 3); + +static const struct reg_field apds9160_reg_field_als_rate = + REG_FIELD(APDS9160_REG_LS_MEAS_RATE, 0, 2); + +static const struct reg_field apds9160_reg_field_als_gain = + REG_FIELD(APDS9160_REG_LS_GAIN, 0, 2); + +static const struct reg_field apds9160_reg_field_ps_rate = + REG_FIELD(APDS9160_REG_PS_MEAS_RATE, 0, 2); + +static const struct reg_field apds9160_reg_field_als_res = + REG_FIELD(APDS9160_REG_LS_MEAS_RATE, 4, 6); + +static const struct reg_field apds9160_reg_field_ps_current = + REG_FIELD(APDS9160_REG_PS_LED, 0, 2); + +static const struct reg_field apds9160_reg_field_ps_gain = + REG_FIELD(APDS9160_REG_PS_MEAS_RATE, 6, 7); + +static const struct reg_field apds9160_reg_field_ps_resolution = + REG_FIELD(APDS9160_REG_PS_MEAS_RATE, 3, 4); + +struct apds9160_chip { + struct i2c_client *client; + struct regmap *regmap; + + struct regmap_field *reg_enable_ps; + struct regmap_field *reg_enable_als; + struct regmap_field *reg_int_ps; + struct regmap_field *reg_int_als; + struct regmap_field *reg_ps_overflow; + struct regmap_field *reg_als_rate; + struct regmap_field *reg_als_resolution; + struct regmap_field *reg_ps_rate; + struct regmap_field *reg_als_gain; + struct regmap_field *reg_ps_current; + struct regmap_field *reg_ps_gain; + struct regmap_field *reg_ps_resolution; + + struct mutex lock; /* protects state and config data */ + + /* State data */ + int als_int; + int ps_int; + + /* Configuration values */ + int als_itime; + int als_hwgain; + int als_scale1; + int als_scale2; + int ps_rate; + int ps_cancellation_level; + int ps_current; + int ps_gain; +}; + +static int apds9160_set_ps_rate(struct apds9160_chip *data, int val) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_ps_rate_map); idx++) { + int ret; + + if (apds9160_ps_rate_map[idx][0] != val) + continue; + + ret = regmap_field_write(data->reg_ps_rate, + apds9160_ps_rate_map[idx][1]); + if (ret) + return ret; + data->ps_rate = val; + + return ret; + } + + return -EINVAL; +} + +static int apds9160_set_ps_gain(struct apds9160_chip *data, int val) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_ps_gain_map); idx++) { + int ret; + + if (apds9160_ps_gain_map[idx][0] != val) + continue; + + ret = regmap_field_write(data->reg_ps_gain, + apds9160_ps_gain_map[idx][1]); + if (ret) + return ret; + data->ps_gain = val; + + return ret; + } + + return -EINVAL; +} + +/* + * The PS intelligent cancellation level register allows + * for an on-chip substraction of the ADC count caused by + * unwanted reflected light from PS ADC output. + */ +static int apds9160_set_ps_cancellation_level(struct apds9160_chip *data, + int val) +{ + int ret; + __le16 buf; + + if (val < 0 || val > 0xFFFF) + return -EINVAL; + + buf = cpu_to_le16(val); + ret = regmap_bulk_write(data->regmap, APDS9160_REG_PS_CAN_LEVEL_DIG_LSB, + &buf, 2); + if (ret) + return ret; + + data->ps_cancellation_level = val; + + return ret; +} + +/* + * This parameter determines the cancellation pulse duration + * in each of the PWM pulse. The cancellation is applied during the + * integration phase of the PS measurement. + * Duration is programmed in half clock cycles + * A duration value of 0 or 1 will not generate any cancellation pulse + */ +static int apds9160_set_ps_analog_cancellation(struct apds9160_chip *data, + int val) +{ + if (val < 0 || val > 63) + return -EINVAL; + + return regmap_write(data->regmap, APDS9160_REG_PS_CAN_LEVEL_ANA_DUR, + val); +} + +/* + * This parameter works in conjunction with the cancellation pulse duration + * The value determines the current used for crosstalk cancellation + * Coarse value is in steps of 60 nA + * Fine value is in steps of 2.4 nA + */ +static int apds9160_set_ps_cancellation_current(struct apds9160_chip *data, + int coarse_val, + int fine_val) +{ + int val; + + if (coarse_val < 0 || coarse_val > 4) + return -EINVAL; + + if (fine_val < 0 || fine_val > 15) + return -EINVAL; + + /* Coarse value at B4:B5 and fine value at B0:B3 */ + val = (coarse_val << 4) | fine_val; + + return regmap_write(data->regmap, APDS9160_REG_PS_CAN_LEVEL_ANA_CURRENT, + val); +} + +static int apds9160_ps_init_analog_cancellation(struct device *dev, + struct apds9160_chip *data) +{ + int ret, duration, picoamp, idx, coarse, fine; + + ret = device_property_read_u32(dev, + "ps-cancellation-duration", &duration); + if (ret || duration == 0) { + /* Don't fail since this is not required */ + return 0; + } + + ret = device_property_read_u32(dev, + "ps-cancellation-current-picoamp", &picoamp); + if (ret) + return ret; + + if (picoamp < 60000 || picoamp > 276000 || picoamp % 2400 != 0) + return dev_err_probe(dev, -EINVAL, + "Invalid cancellation current\n"); + + /* Compute required coarse and fine value from requested current */ + fine = 0; + coarse = 0; + for (idx = 60000; idx < picoamp; idx += 2400) { + if (fine == 15) { + fine = 0; + coarse++; + idx += 21600; + } else { + fine++; + } + } + + if (picoamp != idx) + dev_warn(dev, + "Invalid cancellation current %i, rounding to %i\n", + picoamp, idx); + + ret = apds9160_set_ps_analog_cancellation(data, duration); + if (ret) + return ret; + + return apds9160_set_ps_cancellation_current(data, coarse, fine); +} + +static int apds9160_set_ps_current(struct apds9160_chip *data, int val) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_ps_led_current_map); idx++) { + int ret; + + if (apds9160_ps_led_current_map[idx][0] != val) + continue; + + ret = regmap_field_write( + data->reg_ps_current, + apds9160_ps_led_current_map[idx][1]); + if (ret) + return ret; + data->ps_current = val; + + return ret; + } + + return -EINVAL; +} + +static int apds9160_set_als_gain(struct apds9160_chip *data, int gain) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_als_gain_map); idx++) { + int ret; + + if (gain != apds9160_als_gain_map[idx][0]) + continue; + + ret = regmap_field_write(data->reg_als_gain, + apds9160_als_gain_map[idx][1]); + if (ret) + return ret; + data->als_hwgain = gain; + + return ret; + } + + return -EINVAL; +} + +static int apds9160_set_als_scale(struct apds9160_chip *data, int val, int val2) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_als_scale_map); idx++) { + if (apds9160_als_scale_map[idx].itime == data->als_itime && + apds9160_als_scale_map[idx].scale1 == val && + apds9160_als_scale_map[idx].scale2 == val2) { + int ret = apds9160_set_als_gain(data, + apds9160_als_scale_map[idx].gain); + if (ret) + return ret; + data->als_scale1 = val; + data->als_scale2 = val2; + + return ret; + } + } + + return -EINVAL; +} + +static int apds9160_set_als_resolution(struct apds9160_chip *data, int val) +{ + switch (val) { + case 25: + return regmap_field_write(data->reg_als_resolution, + APDS9160_CMD_LS_RESOLUTION_25MS); + case 50: + return regmap_field_write(data->reg_als_resolution, + APDS9160_CMD_LS_RESOLUTION_50MS); + case 200: + return regmap_field_write(data->reg_als_resolution, + APDS9160_CMD_LS_RESOLUTION_200MS); + default: + return regmap_field_write(data->reg_als_resolution, + APDS9160_CMD_LS_RESOLUTION_100MS); + } +} + +static int apds9160_set_als_rate(struct apds9160_chip *data, int val) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(apds9160_als_rate_map); idx++) { + if (apds9160_als_rate_map[idx][0] != val) + continue; + + return regmap_field_write(data->reg_als_rate, + apds9160_als_rate_map[idx][1]); + } + + return -EINVAL; +} + +/* + * Setting the integration time ajusts resolution, rate, scale and gain + */ +static int apds9160_set_als_int_time(struct apds9160_chip *data, int val) +{ + int ret; + int idx; + + ret = apds9160_set_als_rate(data, val); + if (ret) + return ret; + + /* Match resolution register with rate */ + ret = apds9160_set_als_resolution(data, val); + if (ret) + return ret; + + data->als_itime = val; + + /* Set the scale minimum gain */ + for (idx = 0; idx < ARRAY_SIZE(apds9160_als_scale_map); idx++) { + if (data->als_itime != apds9160_als_scale_map[idx].itime) + continue; + + return apds9160_set_als_scale(data, + apds9160_als_scale_map[idx].scale1, + apds9160_als_scale_map[idx].scale2); + } + + return -EINVAL; +} + +static int apds9160_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + struct apds9160_chip *data = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_INT_TIME: + switch (chan->type) { + case IIO_LIGHT: + *length = ARRAY_SIZE(apds9160_als_rate_avail); + *vals = (const int *)apds9160_als_rate_avail; + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + case IIO_PROXIMITY: + *length = ARRAY_SIZE(apds9160_ps_rate_avail); + *vals = (const int *)apds9160_ps_rate_avail; + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_PROXIMITY: + *length = ARRAY_SIZE(apds9160_ps_gain_avail); + *vals = (const int *)apds9160_ps_gain_avail; + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + case IIO_LIGHT: + /* The available scales changes depending on itime */ + switch (data->als_itime) { + case 25: + *length = ARRAY_SIZE(apds9160_25ms_avail) * 2; + *vals = (const int *)apds9160_25ms_avail; + *type = IIO_VAL_INT_PLUS_MICRO; + + return IIO_AVAIL_LIST; + case 50: + *length = ARRAY_SIZE(apds9160_50ms_avail) * 2; + *vals = (const int *)apds9160_50ms_avail; + *type = IIO_VAL_INT_PLUS_MICRO; + + return IIO_AVAIL_LIST; + case 100: + *length = ARRAY_SIZE(apds9160_100ms_avail) * 2; + *vals = (const int *)apds9160_100ms_avail; + *type = IIO_VAL_INT_PLUS_MICRO; + + return IIO_AVAIL_LIST; + case 200: + *length = ARRAY_SIZE(apds9160_200ms_avail) * 2; + *vals = (const int *)apds9160_200ms_avail; + *type = IIO_VAL_INT_PLUS_MICRO; + + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } + default: + return -EINVAL; + } + case IIO_CHAN_INFO_RAW: + switch (chan->type) { + case IIO_CURRENT: + *length = ARRAY_SIZE(apds9160_ps_led_current_avail); + *vals = (const int *)apds9160_ps_led_current_avail; + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } + + default: + return -EINVAL; + } +} + +static int apds9160_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_INT_TIME: + return IIO_VAL_INT; + case IIO_CHAN_INFO_CALIBBIAS: + return IIO_VAL_INT; + case IIO_CHAN_INFO_HARDWAREGAIN: + return IIO_VAL_INT; + case IIO_CHAN_INFO_RAW: + return IIO_VAL_INT; + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } +} + +static int apds9160_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) +{ + struct apds9160_chip *data = iio_priv(indio_dev); + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + switch (chan->type) { + case IIO_PROXIMITY: { + __le16 buf; + + ret = regmap_bulk_read(data->regmap, chan->address, + &buf, 2); + if (ret) + return ret; + *val = le16_to_cpu(buf); + /* Remove overflow bits from result */ + *val = FIELD_GET(APDS9160_PS_DATA_MASK, *val); + + return IIO_VAL_INT; + } + case IIO_LIGHT: + case IIO_INTENSITY: { + u8 buf[3]; + + ret = regmap_bulk_read(data->regmap, chan->address, + &buf, 3); + if (ret) + return ret; + *val = get_unaligned_le24(buf); + + return IIO_VAL_INT; + } + case IIO_CURRENT: + *val = data->ps_current; + + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_HARDWAREGAIN: + switch (chan->type) { + case IIO_LIGHT: + *val = data->als_hwgain; + + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_INT_TIME: + switch (chan->type) { + case IIO_PROXIMITY: + *val = data->ps_rate; + + return IIO_VAL_INT; + case IIO_LIGHT: + *val = data->als_itime; + + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_CALIBBIAS: + switch (chan->type) { + case IIO_PROXIMITY: + *val = data->ps_cancellation_level; + + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_PROXIMITY: + *val = data->ps_gain; + + return IIO_VAL_INT; + case IIO_LIGHT: + *val = data->als_scale1; + *val2 = data->als_scale2; + + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } + default: + return -EINVAL; + } +}; + +static int apds9160_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + int val2, long mask) +{ + struct apds9160_chip *data = iio_priv(indio_dev); + + guard(mutex)(&data->lock); + + switch (mask) { + case IIO_CHAN_INFO_INT_TIME: + if (val2 != 0) + return -EINVAL; + switch (chan->type) { + case IIO_PROXIMITY: + return apds9160_set_ps_rate(data, val); + case IIO_LIGHT: + return apds9160_set_als_int_time(data, val); + default: + return -EINVAL; + } + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_PROXIMITY: + return apds9160_set_ps_gain(data, val); + case IIO_LIGHT: + return apds9160_set_als_scale(data, val, val2); + default: + return -EINVAL; + } + case IIO_CHAN_INFO_CALIBBIAS: + if (val2 != 0) + return -EINVAL; + switch (chan->type) { + case IIO_PROXIMITY: + return apds9160_set_ps_cancellation_level(data, val); + default: + return -EINVAL; + } + case IIO_CHAN_INFO_RAW: + if (val2 != 0) + return -EINVAL; + switch (chan->type) { + case IIO_CURRENT: + return apds9160_set_ps_current(data, val); + default: + return -EINVAL; + } + default: + return -EINVAL; + } +} + +static inline int apds9160_get_thres_reg(const struct iio_chan_spec *chan, + enum iio_event_direction dir, u8 *reg) +{ + switch (dir) { + case IIO_EV_DIR_RISING: + switch (chan->type) { + case IIO_PROXIMITY: + *reg = APDS9160_REG_PS_THRES_HI_LSB; + break; + case IIO_LIGHT: + *reg = APDS9160_REG_LS_THRES_UP_LSB; + break; + default: + return -EINVAL; + } break; + case IIO_EV_DIR_FALLING: + switch (chan->type) { + case IIO_PROXIMITY: + *reg = APDS9160_REG_PS_THRES_LO_LSB; + break; + case IIO_LIGHT: + *reg = APDS9160_REG_LS_THRES_LO_LSB; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int apds9160_read_event(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int *val, int *val2) +{ + u8 reg; + int ret; + struct apds9160_chip *data = iio_priv(indio_dev); + + if (info != IIO_EV_INFO_VALUE) + return -EINVAL; + + ret = apds9160_get_thres_reg(chan, dir, ®); + if (ret < 0) + return ret; + + switch (chan->type) { + case IIO_PROXIMITY: { + __le16 buf; + + ret = regmap_bulk_read(data->regmap, reg, &buf, 2); + if (ret < 0) + return ret; + *val = le16_to_cpu(buf); + return IIO_VAL_INT; + } + case IIO_LIGHT: { + u8 buf[3]; + + ret = regmap_bulk_read(data->regmap, reg, &buf, 3); + if (ret < 0) + return ret; + *val = get_unaligned_le24(buf); + return IIO_VAL_INT; + } + default: + return -EINVAL; + } +} + +static int apds9160_write_event(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int val, int val2) +{ + u8 reg; + int ret = 0; + struct apds9160_chip *data = iio_priv(indio_dev); + + if (info != IIO_EV_INFO_VALUE) + return -EINVAL; + + ret = apds9160_get_thres_reg(chan, dir, ®); + if (ret < 0) + return ret; + + switch (chan->type) { + case IIO_PROXIMITY: { + __le16 buf; + + if (val < 0 || val > APDS9160_PS_THRES_MAX) + return -EINVAL; + + buf = cpu_to_le16(val); + return regmap_bulk_write(data->regmap, reg, &buf, 2); + } + case IIO_LIGHT: { + u8 buf[3]; + + if (val < 0 || val > APDS9160_LS_THRES_MAX) + return -EINVAL; + + put_unaligned_le24(val, buf); + return regmap_bulk_write(data->regmap, reg, &buf, 3); + } + default: + return -EINVAL; + } +} + +static int apds9160_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct apds9160_chip *data = iio_priv(indio_dev); + + switch (chan->type) { + case IIO_PROXIMITY: + return data->ps_int; + case IIO_LIGHT: + return data->als_int; + default: + return -EINVAL; + } +} + +static int apds9160_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, bool state) +{ + struct apds9160_chip *data = iio_priv(indio_dev); + int ret; + + switch (chan->type) { + case IIO_PROXIMITY: + ret = regmap_field_write(data->reg_int_ps, state); + if (ret) + return ret; + data->ps_int = state; + + return 0; + case IIO_LIGHT: + ret = regmap_field_write(data->reg_int_als, state); + if (ret) + return ret; + data->als_int = state; + + return 0; + default: + return -EINVAL; + } +} + +static irqreturn_t apds9160_irq_handler(int irq, void *private) +{ + struct iio_dev *indio_dev = private; + struct apds9160_chip *data = iio_priv(indio_dev); + int ret, status; + + /* Reading status register clears the interrupt flag */ + ret = regmap_read(data->regmap, APDS9160_REG_SR, &status); + if (ret < 0) { + dev_err_ratelimited(&data->client->dev, + "irq status reg read failed\n"); + return IRQ_HANDLED; + } + + if ((status & APDS9160_SR_LS_INT) && + (status & APDS9160_SR_LS_NEW_DATA) && data->als_int) { + iio_push_event(indio_dev, + IIO_UNMOD_EVENT_CODE(IIO_LIGHT, 0, + IIO_EV_TYPE_THRESH, + IIO_EV_DIR_EITHER), + iio_get_time_ns(indio_dev)); + } + + if ((status & APDS9160_SR_PS_INT) && + (status & APDS9160_SR_PS_NEW_DATA) && data->ps_int) { + iio_push_event(indio_dev, + IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 0, + IIO_EV_TYPE_THRESH, + IIO_EV_DIR_EITHER), + iio_get_time_ns(indio_dev)); + } + + return IRQ_HANDLED; +} + +static int apds9160_detect(struct apds9160_chip *chip) +{ + struct i2c_client *client = chip->client; + int ret; + u32 val; + + ret = regmap_read(chip->regmap, APDS9160_REG_ID, &val); + if (ret < 0) { + dev_err(&client->dev, "ID read failed\n"); + return ret; + } + + if (val != APDS9160_PART_ID_0) + dev_info(&client->dev, "Unknown part id %u\n", val); + + return 0; +} + +static void apds9160_disable(void *chip) +{ + struct apds9160_chip *data = chip; + int ret; + + ret = regmap_field_write(data->reg_enable_als, 0); + if (ret) + return; + + regmap_field_write(data->reg_enable_ps, 0); +} + +static int apds9160_chip_init(struct apds9160_chip *chip) +{ + int ret; + + /* Write default values to interrupt register */ + ret = regmap_field_write(chip->reg_int_ps, 0); + chip->ps_int = 0; + if (ret) + return ret; + + ret = regmap_field_write(chip->reg_int_als, 0); + chip->als_int = 0; + if (ret) + return ret; + + /* Write default values to control register */ + ret = regmap_field_write(chip->reg_enable_als, 1); + if (ret) + return ret; + + ret = regmap_field_write(chip->reg_enable_ps, 1); + if (ret) + return ret; + + /* Write other default values */ + ret = regmap_field_write(chip->reg_ps_resolution, + APDS9160_DEFAULT_PS_RESOLUTION_11BITS); + if (ret) + return ret; + + /* Write default values to configuration registers */ + ret = apds9160_set_ps_current(chip, APDS9160_DEFAULT_PS_CURRENT); + if (ret) + return ret; + + ret = apds9160_set_ps_rate(chip, APDS9160_DEFAULT_PS_RATE); + if (ret) + return ret; + + ret = apds9160_set_als_int_time(chip, APDS9160_DEFAULT_LS_RATE); + if (ret) + return ret; + + ret = apds9160_set_als_scale(chip, + apds9160_100ms_avail[0][0], + apds9160_100ms_avail[0][1]); + if (ret) + return ret; + + ret = apds9160_set_ps_gain(chip, APDS9160_DEFAULT_PS_GAIN); + if (ret) + return ret; + + ret = apds9160_set_ps_analog_cancellation( + chip, APDS9160_DEFAULT_PS_ANALOG_CANCELLATION); + if (ret) + return ret; + + ret = apds9160_set_ps_cancellation_level( + chip, APDS9160_DEFAULT_PS_CANCELLATION_LEVEL); + if (ret) + return ret; + + return devm_add_action_or_reset(&chip->client->dev, apds9160_disable, + chip); +} + +static int apds9160_regfield_init(struct apds9160_chip *data) +{ + struct device *dev = &data->client->dev; + struct regmap *regmap = data->regmap; + struct regmap_field *tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_int_als); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_int_als = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_int_ps); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_int_ps = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_ls_en); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_enable_als = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_ps_en); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_enable_ps = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, + apds9160_reg_field_ps_overflow); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_ps_overflow = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_als_rate); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_als_rate = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_als_res); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_als_resolution = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_ps_rate); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_ps_rate = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_als_gain); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_als_gain = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, + apds9160_reg_field_ps_current); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_ps_current = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, apds9160_reg_field_ps_gain); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_ps_gain = tmp; + + tmp = devm_regmap_field_alloc(dev, regmap, + apds9160_reg_field_ps_resolution); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + data->reg_ps_resolution = tmp; + + return 0; +} + +static const struct iio_info apds9160_info = { + .read_avail = apds9160_read_avail, + .read_raw = apds9160_read_raw, + .write_raw = apds9160_write_raw, + .write_raw_get_fmt = apds9160_write_raw_get_fmt, + .read_event_value = apds9160_read_event, + .write_event_value = apds9160_write_event, + .read_event_config = apds9160_read_event_config, + .write_event_config = apds9160_write_event_config, +}; + +static const struct iio_info apds9160_info_no_events = { + .read_avail = apds9160_read_avail, + .read_raw = apds9160_read_raw, + .write_raw = apds9160_write_raw, + .write_raw_get_fmt = apds9160_write_raw_get_fmt, +}; + +static int apds9160_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct apds9160_chip *chip; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*chip)); + if (!indio_dev) + return -ENOMEM; + + ret = devm_regulator_get_enable(dev, "vdd"); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable vdd supply\n"); + + indio_dev->name = "apds9160"; + indio_dev->modes = INDIO_DIRECT_MODE; + + chip = iio_priv(indio_dev); + chip->client = client; + chip->regmap = devm_regmap_init_i2c(client, &apds9160_regmap_config); + if (IS_ERR(chip->regmap)) + return dev_err_probe(dev, PTR_ERR(chip->regmap), + "regmap initialization failed.\n"); + + chip->client = client; + mutex_init(&chip->lock); + + ret = apds9160_detect(chip); + if (ret < 0) + return dev_err_probe(dev, ret, "apds9160 not found\n"); + + ret = apds9160_regfield_init(chip); + if (ret) + return ret; + + ret = apds9160_chip_init(chip); + if (ret) + return ret; + + ret = apds9160_ps_init_analog_cancellation(dev, chip); + if (ret) + return ret; + + if (client->irq > 0) { + indio_dev->info = &apds9160_info; + indio_dev->channels = apds9160_channels; + indio_dev->num_channels = ARRAY_SIZE(apds9160_channels); + ret = devm_request_threaded_irq(dev, client->irq, NULL, + apds9160_irq_handler, + IRQF_ONESHOT, "apds9160_event", + indio_dev); + if (ret) { + return dev_err_probe(dev, ret, + "request irq (%d) failed\n", + client->irq); + } + } else { + indio_dev->info = &apds9160_info_no_events; + indio_dev->channels = apds9160_channels_without_events; + indio_dev->num_channels = + ARRAY_SIZE(apds9160_channels_without_events); + } + + ret = devm_iio_device_register(dev, indio_dev); + if (ret) + return dev_err_probe(dev, ret, + "failed iio device registration\n"); + + return ret; +} + +static const struct of_device_id apds9160_of_match[] = { + { .compatible = "brcm,apds9160" }, + { } +}; +MODULE_DEVICE_TABLE(of, apds9160_of_match); + +static const struct i2c_device_id apds9160_id[] = { + { "apds9160", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, apds9160_id); + +static struct i2c_driver apds9160_driver = { + .driver = { + .name = "apds9160", + .of_match_table = apds9160_of_match, + }, + .probe = apds9160_probe, + .id_table = apds9160_id, +}; +module_i2c_driver(apds9160_driver); + +MODULE_DESCRIPTION("APDS9160 combined ALS and proximity sensor"); +MODULE_AUTHOR("Mikael Gonella-Bolduc "); +MODULE_LICENSE("GPL"); From dbd2e08ff09fd6bd51215b44474899cc1b7b7a16 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 27 Jan 2025 20:30:22 +0100 Subject: [PATCH 0114/2157] iio: gts-helper: export iio_gts_get_total_gain() Export this function in preparation for the fix in veml6030.c, where the total gain can be used to ease the calculation of the processed value of the IIO_LIGHT channel compared to acquiring the scale in NANO. Suggested-by: Matti Vaittinen Signed-off-by: Javier Carrasco Reviewed-by: Matti Vaittinen Link: https://patch.msgid.link/20250127-veml6030-scale-v3-1-4f32ba03df94@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-gts-helper.c | 11 ++++++++++- include/linux/iio/iio-gts-helper.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c index 200364b42ead..f35c36fd4a55 100644 --- a/drivers/iio/industrialio-gts-helper.c +++ b/drivers/iio/industrialio-gts-helper.c @@ -1026,7 +1026,15 @@ int iio_gts_find_gain_time_sel_for_scale(struct iio_gts *gts, int scale_int, } EXPORT_SYMBOL_NS_GPL(iio_gts_find_gain_time_sel_for_scale, "IIO_GTS_HELPER"); -static int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time) +/** + * iio_gts_get_total_gain - Fetch total gain for given HW-gain and time + * @gts: Gain time scale descriptor + * @gain: HW-gain for which the total gain is searched for + * @time: Integration time for which the total gain is searched for + * + * Return: total gain on success and -EINVAL on error. + */ +int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time) { const struct iio_itime_sel_mul *itime; @@ -1042,6 +1050,7 @@ static int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time) return gain * itime->mul; } +EXPORT_SYMBOL_NS_GPL(iio_gts_get_total_gain, "IIO_GTS_HELPER"); static int iio_gts_get_scale_linear(struct iio_gts *gts, int gain, int time, u64 *scale) diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h index e5de7a124bad..66f830ab9b49 100644 --- a/include/linux/iio/iio-gts-helper.h +++ b/include/linux/iio/iio-gts-helper.h @@ -208,5 +208,6 @@ int iio_gts_all_avail_scales(struct iio_gts *gts, const int **vals, int *type, int *length); int iio_gts_avail_scales_for_time(struct iio_gts *gts, int time, const int **vals, int *type, int *length); +int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time); #endif From 22eaca4283b216f5e1f7721e4ad0ecb3d23c6774 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 27 Jan 2025 20:30:23 +0100 Subject: [PATCH 0115/2157] iio: light: veml6030: fix scale to conform to ABI The current scale is not ABI-compliant as it is just the sensor gain instead of the value that acts as a multiplier to be applied to the raw value (there is no offset). Use the iio-gts helpers to obtain the proper scale values according to the gain and integration time to match the resolution tables from the datasheet and drop dedicated variables to store the current values of the integration time, gain and resolution. When at it, use 'scale' instead of 'gain' consistently for the get/set functions to avoid misunderstandings. Fixes: 7b779f573c48 ("iio: light: add driver for veml6030 ambient light sensor") Acked-by: Matti Vaittinen Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20250127-veml6030-scale-v3-2-4f32ba03df94@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/Kconfig | 1 + drivers/iio/light/veml6030.c | 538 +++++++++++++++-------------------- 2 files changed, 223 insertions(+), 316 deletions(-) diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 9260056f0af8..89aec08df739 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -694,6 +694,7 @@ config VEML6030 select REGMAP_I2C select IIO_BUFFER select IIO_TRIGGERED_BUFFER + select IIO_GTS_HELPER depends on I2C help Say Y here if you want to build a driver for the Vishay VEML6030 diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c index 3afd4bb1ad53..473a9c3e32a3 100644 --- a/drivers/iio/light/veml6030.c +++ b/drivers/iio/light/veml6030.c @@ -24,10 +24,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -65,6 +67,10 @@ #define VEML6035_GAIN_RF REG_FIELD(VEML6030_REG_ALS_CONF, 10, 12) +/* Maximum scales x 10000 to work with integers */ +#define VEML6030_MAX_SCALE 21504 +#define VEML6035_MAX_SCALE 4096 + enum veml6030_scan { VEML6030_SCAN_ALS, VEML6030_SCAN_WH, @@ -78,16 +84,13 @@ struct veml6030_rf { struct veml603x_chip { const char *name; - const int(*scale_vals)[][2]; - const int num_scale_vals; const struct iio_chan_spec *channels; const int num_channels; const struct reg_field gain_rf; const struct reg_field it_rf; + const int max_scale; int (*hw_init)(struct iio_dev *indio_dev, struct device *dev); int (*set_info)(struct iio_dev *indio_dev); - int (*set_als_gain)(struct iio_dev *indio_dev, int val, int val2); - int (*get_als_gain)(struct iio_dev *indio_dev, int *val, int *val2); }; /* @@ -105,40 +108,55 @@ struct veml6030_data { struct i2c_client *client; struct regmap *regmap; struct veml6030_rf rf; - int cur_resolution; - int cur_gain; - int cur_integration_time; const struct veml603x_chip *chip; + struct iio_gts gts; + }; -static const int veml6030_it_times[][2] = { - { 0, 25000 }, - { 0, 50000 }, - { 0, 100000 }, - { 0, 200000 }, - { 0, 400000 }, - { 0, 800000 }, +#define VEML6030_SEL_IT_25MS 0x0C +#define VEML6030_SEL_IT_50MS 0x08 +#define VEML6030_SEL_IT_100MS 0x00 +#define VEML6030_SEL_IT_200MS 0x01 +#define VEML6030_SEL_IT_400MS 0x02 +#define VEML6030_SEL_IT_800MS 0x03 +static const struct iio_itime_sel_mul veml6030_it_sel[] = { + GAIN_SCALE_ITIME_US(25000, VEML6030_SEL_IT_25MS, 1), + GAIN_SCALE_ITIME_US(50000, VEML6030_SEL_IT_50MS, 2), + GAIN_SCALE_ITIME_US(100000, VEML6030_SEL_IT_100MS, 4), + GAIN_SCALE_ITIME_US(200000, VEML6030_SEL_IT_200MS, 8), + GAIN_SCALE_ITIME_US(400000, VEML6030_SEL_IT_400MS, 16), + GAIN_SCALE_ITIME_US(800000, VEML6030_SEL_IT_800MS, 32), }; -/* - * Scale is 1/gain. Value 0.125 is ALS gain x (1/8), 0.25 is - * ALS gain x (1/4), 0.5 is ALS gain x (1/2), 1.0 is ALS gain x 1, - * 2.0 is ALS gain x2, and 4.0 is ALS gain x 4. +/* Gains are multiplied by 8 to work with integers. The values in the + * iio-gts tables don't need corrections because the maximum value of + * the scale refers to GAIN = x1, and the rest of the values are + * obtained from the resulting linear function. */ -static const int veml6030_scale_vals[][2] = { - { 0, 125000 }, - { 0, 250000 }, - { 1, 0 }, - { 2, 0 }, +#define VEML6030_SEL_MILLI_GAIN_X125 2 +#define VEML6030_SEL_MILLI_GAIN_X250 3 +#define VEML6030_SEL_MILLI_GAIN_X1000 0 +#define VEML6030_SEL_MILLI_GAIN_X2000 1 +static const struct iio_gain_sel_pair veml6030_gain_sel[] = { + GAIN_SCALE_GAIN(1, VEML6030_SEL_MILLI_GAIN_X125), + GAIN_SCALE_GAIN(2, VEML6030_SEL_MILLI_GAIN_X250), + GAIN_SCALE_GAIN(8, VEML6030_SEL_MILLI_GAIN_X1000), + GAIN_SCALE_GAIN(16, VEML6030_SEL_MILLI_GAIN_X2000), }; -static const int veml6035_scale_vals[][2] = { - { 0, 125000 }, - { 0, 250000 }, - { 0, 500000 }, - { 1, 0 }, - { 2, 0 }, - { 4, 0 }, +#define VEML6035_SEL_MILLI_GAIN_X125 4 +#define VEML6035_SEL_MILLI_GAIN_X250 5 +#define VEML6035_SEL_MILLI_GAIN_X500 7 +#define VEML6035_SEL_MILLI_GAIN_X1000 0 +#define VEML6035_SEL_MILLI_GAIN_X2000 1 +#define VEML6035_SEL_MILLI_GAIN_X4000 3 +static const struct iio_gain_sel_pair veml6035_gain_sel[] = { + GAIN_SCALE_GAIN(1, VEML6035_SEL_MILLI_GAIN_X125), + GAIN_SCALE_GAIN(2, VEML6035_SEL_MILLI_GAIN_X250), + GAIN_SCALE_GAIN(4, VEML6035_SEL_MILLI_GAIN_X500), + GAIN_SCALE_GAIN(8, VEML6035_SEL_MILLI_GAIN_X1000), + GAIN_SCALE_GAIN(16, VEML6035_SEL_MILLI_GAIN_X2000), + GAIN_SCALE_GAIN(32, VEML6035_SEL_MILLI_GAIN_X4000), }; /* @@ -372,104 +390,73 @@ static const struct regmap_config veml6030_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static int veml6030_get_intgrn_tm(struct iio_dev *indio_dev, - int *val, int *val2) +static int veml6030_get_it(struct veml6030_data *data, int *val, int *val2) { - int it_idx, ret; - struct veml6030_data *data = iio_priv(indio_dev); + int ret, it_idx; ret = regmap_field_read(data->rf.it, &it_idx); - if (ret) { - dev_err(&data->client->dev, - "can't read als conf register %d\n", ret); + if (ret) return ret; - } - switch (it_idx) { - case 0: - *val2 = 100000; - break; - case 1: - *val2 = 200000; - break; - case 2: - *val2 = 400000; - break; - case 3: - *val2 = 800000; - break; - case 8: - *val2 = 50000; - break; - case 12: - *val2 = 25000; - break; - default: - return -EINVAL; - } + ret = iio_gts_find_int_time_by_sel(&data->gts, it_idx); + if (ret < 0) + return ret; + *val2 = ret; *val = 0; + return IIO_VAL_INT_PLUS_MICRO; } -static int veml6030_set_intgrn_tm(struct iio_dev *indio_dev, - int val, int val2) +static int veml6030_set_it(struct iio_dev *indio_dev, int val, int val2) { - int ret, new_int_time, int_idx; struct veml6030_data *data = iio_priv(indio_dev); + int ret, gain_idx, it_idx, new_gain, prev_gain, prev_it; + bool in_range; - if (val) + if (val || !iio_gts_valid_time(&data->gts, val2)) return -EINVAL; - switch (val2) { - case 25000: - new_int_time = 0x300; - int_idx = 5; - break; - case 50000: - new_int_time = 0x200; - int_idx = 4; - break; - case 100000: - new_int_time = 0x00; - int_idx = 3; - break; - case 200000: - new_int_time = 0x40; - int_idx = 2; - break; - case 400000: - new_int_time = 0x80; - int_idx = 1; - break; - case 800000: - new_int_time = 0xC0; - int_idx = 0; - break; - default: - return -EINVAL; - } - - ret = regmap_field_write(data->rf.it, new_int_time); - if (ret) { - dev_err(&data->client->dev, - "can't update als integration time %d\n", ret); + ret = regmap_field_read(data->rf.it, &it_idx); + if (ret) return ret; - } - /* - * Cache current integration time and update resolution. For every - * increase in integration time to next level, resolution is halved - * and vice-versa. - */ - if (data->cur_integration_time < int_idx) - data->cur_resolution <<= int_idx - data->cur_integration_time; - else if (data->cur_integration_time > int_idx) - data->cur_resolution >>= data->cur_integration_time - int_idx; + ret = regmap_field_read(data->rf.gain, &gain_idx); + if (ret) + return ret; - data->cur_integration_time = int_idx; + prev_it = iio_gts_find_int_time_by_sel(&data->gts, it_idx); + if (prev_it < 0) + return prev_it; - return ret; + if (prev_it == val2) + return 0; + + prev_gain = iio_gts_find_gain_by_sel(&data->gts, gain_idx); + if (prev_gain < 0) + return prev_gain; + + ret = iio_gts_find_new_gain_by_gain_time_min(&data->gts, prev_gain, prev_it, + val2, &new_gain, &in_range); + if (ret) + return ret; + + if (!in_range) + dev_dbg(&data->client->dev, "Optimal gain out of range\n"); + + ret = iio_gts_find_sel_by_int_time(&data->gts, val2); + if (ret < 0) + return ret; + + ret = regmap_field_write(data->rf.it, ret); + if (ret) + return ret; + + ret = iio_gts_find_sel_by_gain(&data->gts, new_gain); + if (ret < 0) + return ret; + + return regmap_field_write(data->rf.gain, ret); } static int veml6030_read_persistence(struct iio_dev *indio_dev, @@ -478,7 +465,7 @@ static int veml6030_read_persistence(struct iio_dev *indio_dev, int ret, reg, period, x, y; struct veml6030_data *data = iio_priv(indio_dev); - ret = veml6030_get_intgrn_tm(indio_dev, &x, &y); + ret = veml6030_get_it(data, &x, &y); if (ret < 0) return ret; @@ -503,7 +490,7 @@ static int veml6030_write_persistence(struct iio_dev *indio_dev, int ret, period, x, y; struct veml6030_data *data = iio_priv(indio_dev); - ret = veml6030_get_intgrn_tm(indio_dev, &x, &y); + ret = veml6030_get_it(data, &x, &y); if (ret < 0) return ret; @@ -532,179 +519,31 @@ static int veml6030_write_persistence(struct iio_dev *indio_dev, return ret; } -/* - * Cache currently set gain & update resolution. For every - * increase in the gain to next level, resolution is halved - * and vice-versa. - */ -static void veml6030_update_gain_res(struct veml6030_data *data, int gain_idx) +static int veml6030_set_scale(struct iio_dev *indio_dev, int val, int val2) { - if (data->cur_gain < gain_idx) - data->cur_resolution <<= gain_idx - data->cur_gain; - else if (data->cur_gain > gain_idx) - data->cur_resolution >>= data->cur_gain - gain_idx; - - data->cur_gain = gain_idx; -} - -static int veml6030_set_als_gain(struct iio_dev *indio_dev, - int val, int val2) -{ - int ret, new_gain, gain_idx; + int ret, gain_sel, it_idx, it_sel; struct veml6030_data *data = iio_priv(indio_dev); - if (val == 0 && val2 == 125000) { - new_gain = 0x01; - gain_idx = 3; - } else if (val == 0 && val2 == 250000) { - new_gain = 0x11; - gain_idx = 2; - } else if (val == 1 && val2 == 0) { - new_gain = 0x00; - gain_idx = 1; - } else if (val == 2 && val2 == 0) { - new_gain = 0x01; - gain_idx = 0; - } else { - return -EINVAL; - } - - ret = regmap_field_write(data->rf.gain, new_gain); - if (ret) { - dev_err(&data->client->dev, - "can't set als gain %d\n", ret); + ret = regmap_field_read(data->rf.it, &it_idx); + if (ret) return ret; - } - veml6030_update_gain_res(data, gain_idx); + ret = iio_gts_find_gain_time_sel_for_scale(&data->gts, val, val2, + &gain_sel, &it_sel); + if (ret) + return ret; + + ret = regmap_field_write(data->rf.it, it_sel); + if (ret) + return ret; + + ret = regmap_field_write(data->rf.gain, gain_sel); + if (ret) + return ret; return 0; } -static int veml6035_set_als_gain(struct iio_dev *indio_dev, int val, int val2) -{ - int ret, new_gain, gain_idx; - struct veml6030_data *data = iio_priv(indio_dev); - - if (val == 0 && val2 == 125000) { - new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS); - gain_idx = 5; - } else if (val == 0 && val2 == 250000) { - new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS | - VEML6035_GAIN); - gain_idx = 4; - } else if (val == 0 && val2 == 500000) { - new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_SENS | - VEML6035_GAIN | VEML6035_DG); - gain_idx = 3; - } else if (val == 1 && val2 == 0) { - new_gain = 0x0000; - gain_idx = 2; - } else if (val == 2 && val2 == 0) { - new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_GAIN); - gain_idx = 1; - } else if (val == 4 && val2 == 0) { - new_gain = FIELD_GET(VEML6035_GAIN_M, VEML6035_GAIN | - VEML6035_DG); - gain_idx = 0; - } else { - return -EINVAL; - } - - ret = regmap_field_write(data->rf.gain, new_gain); - if (ret) { - dev_err(&data->client->dev, "can't set als gain %d\n", ret); - return ret; - } - - veml6030_update_gain_res(data, gain_idx); - - return 0; -} - -static int veml6030_get_als_gain(struct iio_dev *indio_dev, - int *val, int *val2) -{ - int gain, ret; - struct veml6030_data *data = iio_priv(indio_dev); - - ret = regmap_field_read(data->rf.gain, &gain); - if (ret) { - dev_err(&data->client->dev, - "can't read als conf register %d\n", ret); - return ret; - } - - switch (gain) { - case 0: - *val = 1; - *val2 = 0; - break; - case 1: - *val = 2; - *val2 = 0; - break; - case 2: - *val = 0; - *val2 = 125000; - break; - case 3: - *val = 0; - *val2 = 250000; - break; - default: - return -EINVAL; - } - - return IIO_VAL_INT_PLUS_MICRO; -} - -static int veml6035_get_als_gain(struct iio_dev *indio_dev, int *val, int *val2) -{ - int gain, ret; - struct veml6030_data *data = iio_priv(indio_dev); - - ret = regmap_field_read(data->rf.gain, &gain); - if (ret) { - dev_err(&data->client->dev, - "can't read als conf register %d\n", ret); - return ret; - } - - switch (gain) { - case 0: - *val = 1; - *val2 = 0; - break; - case 1: - case 2: - *val = 2; - *val2 = 0; - break; - case 3: - *val = 4; - *val2 = 0; - break; - case 4: - *val = 0; - *val2 = 125000; - break; - case 5: - case 6: - *val = 0; - *val2 = 250000; - break; - case 7: - *val = 0; - *val2 = 500000; - break; - default: - return -EINVAL; - } - - return IIO_VAL_INT_PLUS_MICRO; -} - static int veml6030_read_thresh(struct iio_dev *indio_dev, int *val, int *val2, int dir) { @@ -749,6 +588,71 @@ static int veml6030_write_thresh(struct iio_dev *indio_dev, return ret; } +static int veml6030_get_total_gain(struct veml6030_data *data) +{ + int gain, it, reg, ret; + + ret = regmap_field_read(data->rf.gain, ®); + if (ret) + return ret; + + gain = iio_gts_find_gain_by_sel(&data->gts, reg); + if (gain < 0) + return gain; + + ret = regmap_field_read(data->rf.it, ®); + if (ret) + return ret; + + it = iio_gts_find_int_time_by_sel(&data->gts, reg); + if (it < 0) + return it; + + return iio_gts_get_total_gain(&data->gts, gain, it); +} + +static int veml6030_get_scale(struct veml6030_data *data, int *val, int *val2) +{ + int gain, it, reg, ret; + + ret = regmap_field_read(data->rf.gain, ®); + if (ret) + return ret; + + gain = iio_gts_find_gain_by_sel(&data->gts, reg); + if (gain < 0) + return gain; + + ret = regmap_field_read(data->rf.it, ®); + if (ret) + return ret; + + it = iio_gts_find_int_time_by_sel(&data->gts, reg); + if (it < 0) + return it; + + ret = iio_gts_get_scale(&data->gts, gain, it, val, val2); + if (ret) + return ret; + + return IIO_VAL_INT_PLUS_NANO; +} + +static int veml6030_process_als(struct veml6030_data *data, int raw, + int *val, int *val2) +{ + int total_gain; + + total_gain = veml6030_get_total_gain(data); + if (total_gain < 0) + return total_gain; + + *val = raw * data->chip->max_scale / total_gain / 10000; + *val2 = raw * data->chip->max_scale / total_gain % 10000 * 100; + + return IIO_VAL_INT_PLUS_MICRO; +} + /* * Provide both raw as well as light reading in lux. * light (in lux) = resolution * raw reading @@ -772,11 +676,9 @@ static int veml6030_read_raw(struct iio_dev *indio_dev, dev_err(dev, "can't read als data %d\n", ret); return ret; } - if (mask == IIO_CHAN_INFO_PROCESSED) { - *val = (reg * data->cur_resolution) / 10000; - *val2 = (reg * data->cur_resolution) % 10000 * 100; - return IIO_VAL_INT_PLUS_MICRO; - } + if (mask == IIO_CHAN_INFO_PROCESSED) + return veml6030_process_als(data, reg, val, val2); + *val = reg; return IIO_VAL_INT; case IIO_INTENSITY: @@ -791,9 +693,9 @@ static int veml6030_read_raw(struct iio_dev *indio_dev, return -EINVAL; } case IIO_CHAN_INFO_INT_TIME: - return veml6030_get_intgrn_tm(indio_dev, val, val2); + return veml6030_get_it(data, val, val2); case IIO_CHAN_INFO_SCALE: - return data->chip->get_als_gain(indio_dev, val, val2); + return veml6030_get_scale(data, val, val2); default: return -EINVAL; } @@ -808,15 +710,9 @@ static int veml6030_read_avail(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_INT_TIME: - *vals = (int *)&veml6030_it_times; - *length = 2 * ARRAY_SIZE(veml6030_it_times); - *type = IIO_VAL_INT_PLUS_MICRO; - return IIO_AVAIL_LIST; + return iio_gts_avail_times(&data->gts, vals, type, length); case IIO_CHAN_INFO_SCALE: - *vals = (int *)*data->chip->scale_vals; - *length = 2 * data->chip->num_scale_vals; - *type = IIO_VAL_INT_PLUS_MICRO; - return IIO_AVAIL_LIST; + return iio_gts_all_avail_scales(&data->gts, vals, type, length); } return -EINVAL; @@ -826,13 +722,25 @@ static int veml6030_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { - struct veml6030_data *data = iio_priv(indio_dev); - switch (mask) { case IIO_CHAN_INFO_INT_TIME: - return veml6030_set_intgrn_tm(indio_dev, val, val2); + return veml6030_set_it(indio_dev, val, val2); case IIO_CHAN_INFO_SCALE: - return data->chip->set_als_gain(indio_dev, val, val2); + return veml6030_set_scale(indio_dev, val, val2); + default: + return -EINVAL; + } +} + +static int veml6030_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_INT_TIME: + return IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; } @@ -930,6 +838,7 @@ static const struct iio_info veml6030_info = { .read_raw = veml6030_read_raw, .read_avail = veml6030_read_avail, .write_raw = veml6030_write_raw, + .write_raw_get_fmt = veml6030_write_raw_get_fmt, .read_event_value = veml6030_read_event_val, .write_event_value = veml6030_write_event_val, .read_event_config = veml6030_read_interrupt_config, @@ -941,6 +850,7 @@ static const struct iio_info veml6030_info_no_irq = { .read_raw = veml6030_read_raw, .read_avail = veml6030_read_avail, .write_raw = veml6030_write_raw, + .write_raw_get_fmt = veml6030_write_raw_get_fmt, }; static irqreturn_t veml6030_event_handler(int irq, void *private) @@ -1066,6 +976,13 @@ static int veml6030_hw_init(struct iio_dev *indio_dev, struct device *dev) int ret, val; struct veml6030_data *data = iio_priv(indio_dev); + ret = devm_iio_init_iio_gts(dev, 2, 150400000, + veml6030_gain_sel, ARRAY_SIZE(veml6030_gain_sel), + veml6030_it_sel, ARRAY_SIZE(veml6030_it_sel), + &data->gts); + if (ret) + return dev_err_probe(dev, ret, "failed to init iio gts\n"); + ret = veml6030_als_shut_down(data); if (ret) return dev_err_probe(dev, ret, "can't shutdown als\n"); @@ -1101,11 +1018,6 @@ static int veml6030_hw_init(struct iio_dev *indio_dev, struct device *dev) return dev_err_probe(dev, ret, "can't clear als interrupt status\n"); - /* Cache currently active measurement parameters */ - data->cur_gain = 3; - data->cur_resolution = 5376; - data->cur_integration_time = 3; - return ret; } @@ -1121,6 +1033,13 @@ static int veml6035_hw_init(struct iio_dev *indio_dev, struct device *dev) int ret, val; struct veml6030_data *data = iio_priv(indio_dev); + ret = devm_iio_init_iio_gts(dev, 0, 409600000, + veml6035_gain_sel, ARRAY_SIZE(veml6035_gain_sel), + veml6030_it_sel, ARRAY_SIZE(veml6030_it_sel), + &data->gts); + if (ret) + return dev_err_probe(dev, ret, "failed to init iio gts\n"); + ret = veml6030_als_shut_down(data); if (ret) return dev_err_probe(dev, ret, "can't shutdown als\n"); @@ -1157,11 +1076,6 @@ static int veml6035_hw_init(struct iio_dev *indio_dev, struct device *dev) return dev_err_probe(dev, ret, "can't clear als interrupt status\n"); - /* Cache currently active measurement parameters */ - data->cur_gain = 5; - data->cur_resolution = 1024; - data->cur_integration_time = 3; - return 0; } @@ -1257,44 +1171,35 @@ static DEFINE_RUNTIME_DEV_PM_OPS(veml6030_pm_ops, veml6030_runtime_suspend, static const struct veml603x_chip veml6030_chip = { .name = "veml6030", - .scale_vals = &veml6030_scale_vals, - .num_scale_vals = ARRAY_SIZE(veml6030_scale_vals), .channels = veml6030_channels, .num_channels = ARRAY_SIZE(veml6030_channels), .gain_rf = VEML6030_GAIN_RF, .it_rf = VEML6030_IT_RF, + .max_scale = VEML6030_MAX_SCALE, .hw_init = veml6030_hw_init, .set_info = veml6030_set_info, - .set_als_gain = veml6030_set_als_gain, - .get_als_gain = veml6030_get_als_gain, }; static const struct veml603x_chip veml6035_chip = { .name = "veml6035", - .scale_vals = &veml6035_scale_vals, - .num_scale_vals = ARRAY_SIZE(veml6035_scale_vals), .channels = veml6030_channels, .num_channels = ARRAY_SIZE(veml6030_channels), .gain_rf = VEML6035_GAIN_RF, .it_rf = VEML6030_IT_RF, + .max_scale = VEML6035_MAX_SCALE, .hw_init = veml6035_hw_init, .set_info = veml6030_set_info, - .set_als_gain = veml6035_set_als_gain, - .get_als_gain = veml6035_get_als_gain, }; static const struct veml603x_chip veml7700_chip = { .name = "veml7700", - .scale_vals = &veml6030_scale_vals, - .num_scale_vals = ARRAY_SIZE(veml6030_scale_vals), .channels = veml7700_channels, .num_channels = ARRAY_SIZE(veml7700_channels), .gain_rf = VEML6030_GAIN_RF, .it_rf = VEML6030_IT_RF, + .max_scale = VEML6030_MAX_SCALE, .hw_init = veml6030_hw_init, .set_info = veml7700_set_info, - .set_als_gain = veml6030_set_als_gain, - .get_als_gain = veml6030_get_als_gain, }; static const struct of_device_id veml6030_of_match[] = { @@ -1336,3 +1241,4 @@ module_i2c_driver(veml6030_driver); MODULE_AUTHOR("Rishi Gupta "); MODULE_DESCRIPTION("VEML6030 Ambient Light Sensor"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS("IIO_GTS_HELPER"); From e8279e66a8dc8549f6bf457a4741d602958ecf07 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 27 Jan 2025 12:10:21 +0200 Subject: [PATCH 0116/2157] dt-bindings: iio: adf4371: add refin mode Add support for selecting between single-ended and differential reference input. Input frequency boundaries are change based on the mode selected (single-ended/differential). Signed-off-by: Antoniu Miclaus Acked-by: Conor Dooley Link: https://patch.msgid.link/20250127101026.5320-2-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/frequency/adf4371.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml index 1cb2adaf66f9..53d607441612 100644 --- a/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml +++ b/Documentation/devicetree/bindings/iio/frequency/adf4371.yaml @@ -30,8 +30,9 @@ properties: clock-names: description: - Must be "clkin" - maxItems: 1 + Must be "clkin" if the input reference is single ended or "clkin-diff" + if the input reference is differential. + enum: [clkin, clkin-diff] adi,mute-till-lock-en: type: boolean From e01670e31d04b743b30472bc9ec1dfd7b26af480 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 27 Jan 2025 12:10:22 +0200 Subject: [PATCH 0117/2157] iio: frequency: adf4371: add refin mode Add support for single-ended/differential reference input mode. Reviewed-by: Nuno Sa Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250127101026.5320-3-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/adf4371.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/iio/frequency/adf4371.c b/drivers/iio/frequency/adf4371.c index d752507e0c98..41155af3c4f6 100644 --- a/drivers/iio/frequency/adf4371.c +++ b/drivers/iio/frequency/adf4371.c @@ -42,6 +42,10 @@ #define ADF4371_MOD2WORD_MSK GENMASK(5, 0) #define ADF4371_MOD2WORD(x) FIELD_PREP(ADF4371_MOD2WORD_MSK, x) +/* ADF4371_REG22 */ +#define ADF4371_REFIN_MODE_MASK BIT(6) +#define ADF4371_REFIN_MODE(x) FIELD_PREP(ADF4371_REFIN_MODE_MASK, x) + /* ADF4371_REG24 */ #define ADF4371_RF_DIV_SEL_MSK GENMASK(6, 4) #define ADF4371_RF_DIV_SEL(x) FIELD_PREP(ADF4371_RF_DIV_SEL_MSK, x) @@ -70,6 +74,7 @@ #define ADF4371_MAX_FREQ_PFD 250000000UL /* Hz */ #define ADF4371_MAX_FREQ_REFIN 600000000UL /* Hz */ +#define ADF4371_MAX_FREQ_REFIN_SE 500000000UL /* Hz */ /* MOD1 is a 24-bit primary modulus with fixed value of 2^25 */ #define ADF4371_MODULUS1 33554432ULL @@ -176,6 +181,7 @@ struct adf4371_state { unsigned int mod2; unsigned int rf_div_sel; unsigned int ref_div_factor; + bool ref_diff_en; u8 buf[10] __aligned(IIO_DMA_MINALIGN); }; @@ -505,6 +511,17 @@ static int adf4371_setup(struct adf4371_state *st) ADF4371_ADDR_ASC(1) | ADF4371_ADDR_ASC_R(1)); if (ret < 0) return ret; + + if ((st->ref_diff_en && st->clkin_freq > ADF4371_MAX_FREQ_REFIN) || + (!st->ref_diff_en && st->clkin_freq > ADF4371_MAX_FREQ_REFIN_SE)) + return -EINVAL; + + ret = regmap_update_bits(st->regmap, ADF4371_REG(0x22), + ADF4371_REFIN_MODE_MASK, + ADF4371_REFIN_MODE(st->ref_diff_en)); + if (ret < 0) + return ret; + /* * Calculate and maximize PFD frequency * fPFD = REFIN × ((1 + D)/(R × (1 + T))) @@ -574,10 +591,16 @@ static int adf4371_probe(struct spi_device *spi) indio_dev->channels = st->chip_info->channels; indio_dev->num_channels = st->chip_info->num_channels; + st->ref_diff_en = false; + clkin = devm_clk_get_enabled(&spi->dev, "clkin"); - if (IS_ERR(clkin)) - return dev_err_probe(&spi->dev, PTR_ERR(clkin), - "Failed to get clkin\n"); + if (IS_ERR(clkin)) { + clkin = devm_clk_get_enabled(&spi->dev, "clkin-diff"); + if (IS_ERR(clkin)) + return dev_err_probe(&spi->dev, PTR_ERR(clkin), + "Failed to get clkin/clkin-diff\n"); + st->ref_diff_en = true; + } st->clkin_freq = clk_get_rate(clkin); From 80ce1f106a77186bc25c5c589957bb67c381e02e Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 27 Jan 2025 12:10:23 +0200 Subject: [PATCH 0118/2157] iio: frequency: adf4371: add ref doubler Add support for the reference doubler. Reviewed-by: Nuno Sa Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250127101026.5320-4-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/adf4371.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iio/frequency/adf4371.c b/drivers/iio/frequency/adf4371.c index 41155af3c4f6..9a84e81787b1 100644 --- a/drivers/iio/frequency/adf4371.c +++ b/drivers/iio/frequency/adf4371.c @@ -45,6 +45,8 @@ /* ADF4371_REG22 */ #define ADF4371_REFIN_MODE_MASK BIT(6) #define ADF4371_REFIN_MODE(x) FIELD_PREP(ADF4371_REFIN_MODE_MASK, x) +#define ADF4371_REF_DOUB_MASK BIT(5) +#define ADF4371_REF_DOUB(x) FIELD_PREP(ADF4371_REF_DOUB_MASK, x)\ /* ADF4371_REG24 */ #define ADF4371_RF_DIV_SEL_MSK GENMASK(6, 4) @@ -76,6 +78,9 @@ #define ADF4371_MAX_FREQ_REFIN 600000000UL /* Hz */ #define ADF4371_MAX_FREQ_REFIN_SE 500000000UL /* Hz */ +#define ADF4371_MIN_CLKIN_DOUB_FREQ 10000000ULL /* Hz */ +#define ADF4371_MAX_CLKIN_DOUB_FREQ 125000000ULL /* Hz */ + /* MOD1 is a 24-bit primary modulus with fixed value of 2^25 */ #define ADF4371_MODULUS1 33554432ULL /* MOD2 is the programmable, 14-bit auxiliary fractional modulus */ @@ -483,7 +488,7 @@ static const struct iio_info adf4371_info = { static int adf4371_setup(struct adf4371_state *st) { unsigned int synth_timeout = 2, timeout = 1, vco_alc_timeout = 1; - unsigned int vco_band_div, tmp; + unsigned int vco_band_div, tmp, ref_doubler_en = 0; int ret; /* Perform a software reset */ @@ -516,8 +521,14 @@ static int adf4371_setup(struct adf4371_state *st) (!st->ref_diff_en && st->clkin_freq > ADF4371_MAX_FREQ_REFIN_SE)) return -EINVAL; + if (st->clkin_freq < ADF4371_MAX_CLKIN_DOUB_FREQ && + st->clkin_freq > ADF4371_MIN_CLKIN_DOUB_FREQ) + ref_doubler_en = 1; + ret = regmap_update_bits(st->regmap, ADF4371_REG(0x22), + ADF4371_REF_DOUB_MASK | ADF4371_REFIN_MODE_MASK, + ADF4371_REF_DOUB(ref_doubler_en) | ADF4371_REFIN_MODE(st->ref_diff_en)); if (ret < 0) return ret; @@ -531,7 +542,8 @@ static int adf4371_setup(struct adf4371_state *st) */ do { st->ref_div_factor++; - st->fpfd = st->clkin_freq / st->ref_div_factor; + st->fpfd = st->clkin_freq * (1 + ref_doubler_en) / + st->ref_div_factor; } while (st->fpfd > ADF4371_MAX_FREQ_PFD); /* Calculate Timeouts */ From 34934d79965518548d33c0513a9572ae936d8c29 Mon Sep 17 00:00:00 2001 From: Guillaume Ranquet Date: Mon, 27 Jan 2025 14:59:32 +0100 Subject: [PATCH 0119/2157] iio: introduce the FAULT event type Add a new event type to describe an hardware failure. Reviewed-by: Nuno Sa Signed-off-by: Guillaume Ranquet Reviewed-by: David Lechner Link: https://patch.msgid.link/20250127-ad4111_openwire-v5-1-ef2db05c384f@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-event.c | 2 ++ include/uapi/linux/iio/types.h | 2 ++ tools/iio/iio_event_monitor.c | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index db06501b0e61..06295cfc2da8 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -232,6 +232,7 @@ static const char * const iio_ev_type_text[] = { [IIO_EV_TYPE_CHANGE] = "change", [IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced", [IIO_EV_TYPE_GESTURE] = "gesture", + [IIO_EV_TYPE_FAULT] = "fault", }; static const char * const iio_ev_dir_text[] = { @@ -240,6 +241,7 @@ static const char * const iio_ev_dir_text[] = { [IIO_EV_DIR_FALLING] = "falling", [IIO_EV_DIR_SINGLETAP] = "singletap", [IIO_EV_DIR_DOUBLETAP] = "doubletap", + [IIO_EV_DIR_FAULT_OPENWIRE] = "openwire", }; static const char * const iio_ev_info_text[] = { diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 12886d4465e4..3eb0821af7a4 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -119,6 +119,7 @@ enum iio_event_type { IIO_EV_TYPE_CHANGE, IIO_EV_TYPE_MAG_REFERENCED, IIO_EV_TYPE_GESTURE, + IIO_EV_TYPE_FAULT, }; enum iio_event_direction { @@ -128,6 +129,7 @@ enum iio_event_direction { IIO_EV_DIR_NONE, IIO_EV_DIR_SINGLETAP, IIO_EV_DIR_DOUBLETAP, + IIO_EV_DIR_FAULT_OPENWIRE, }; #endif /* _UAPI_IIO_TYPES_H_ */ diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index cccf62ea2b8f..eab7b082f19d 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -75,6 +75,7 @@ static const char * const iio_ev_type_text[] = { [IIO_EV_TYPE_CHANGE] = "change", [IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced", [IIO_EV_TYPE_GESTURE] = "gesture", + [IIO_EV_TYPE_FAULT] = "fault", }; static const char * const iio_ev_dir_text[] = { @@ -83,6 +84,7 @@ static const char * const iio_ev_dir_text[] = { [IIO_EV_DIR_FALLING] = "falling", [IIO_EV_DIR_SINGLETAP] = "singletap", [IIO_EV_DIR_DOUBLETAP] = "doubletap", + [IIO_EV_DIR_FAULT_OPENWIRE] = "openwire", }; static const char * const iio_modifier_names[] = { @@ -249,6 +251,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_EV_TYPE_MAG_ADAPTIVE: case IIO_EV_TYPE_CHANGE: case IIO_EV_TYPE_GESTURE: + case IIO_EV_TYPE_FAULT: break; default: return false; @@ -260,6 +263,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_EV_DIR_FALLING: case IIO_EV_DIR_SINGLETAP: case IIO_EV_DIR_DOUBLETAP: + case IIO_EV_DIR_FAULT_OPENWIRE: case IIO_EV_DIR_NONE: break; default: From 7530ed2aaa3f49325cad3ca80aa245897ed10e32 Mon Sep 17 00:00:00 2001 From: Guillaume Ranquet Date: Mon, 27 Jan 2025 14:59:33 +0100 Subject: [PATCH 0120/2157] iio: adc: ad7173: add openwire detection support for single conversions Some chips of the ad7173 family supports open wire detection. Generate a level fault event whenever an external source is disconnected from the system input on single conversions. Reviewed-by: Nuno Sa Signed-off-by: Guillaume Ranquet Reviewed-by: David Lechner Link: https://patch.msgid.link/20250127-ad4111_openwire-v5-2-ef2db05c384f@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 179 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 8b438c689594..54f9cc5a89f5 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -102,6 +103,7 @@ #define AD7173_GPIO_PDSW BIT(14) #define AD7173_GPIO_OP_EN2_3 BIT(13) +#define AD4111_GPIO_GP_OW_EN BIT(12) #define AD7173_GPIO_MUX_IO BIT(12) #define AD7173_GPIO_SYNC_EN BIT(11) #define AD7173_GPIO_ERR_EN BIT(10) @@ -149,6 +151,7 @@ #define AD7173_FILTER_ODR0_MASK GENMASK(5, 0) #define AD7173_MAX_CONFIGS 8 +#define AD4111_OW_DET_THRSH_MV 300 #define AD7173_MODE_CAL_INT_ZERO 0x4 /* Internal Zero-Scale Calibration */ #define AD7173_MODE_CAL_INT_FULL 0x5 /* Internal Full-Scale Calibration */ @@ -182,11 +185,15 @@ struct ad7173_device_info { bool has_int_ref; bool has_ref2; bool has_internal_fs_calibration; + bool has_openwire_det; bool higher_gpio_bits; u8 num_gpios; }; struct ad7173_channel_config { + /* Openwire detection threshold */ + unsigned int openwire_thrsh_raw; + int openwire_comp_chan; u8 cfg_slot; bool live; @@ -203,6 +210,7 @@ struct ad7173_channel { unsigned int ain; struct ad7173_channel_config cfg; u8 syscalib_mode; + bool openwire_det_en; }; struct ad7173_state { @@ -394,6 +402,76 @@ static int ad7173_calibrate_all(struct ad7173_state *st, struct iio_dev *indio_d return 0; } +/* + * Associative array of channel pairs for open wire detection + * The array is indexed by ain and gives the associated channel pair + * to perform the open wire detection with + * the channel pair [0] is for non differential and pair [1] + * is for differential inputs + */ +static int openwire_ain_to_channel_pair[][2][2] = { +/* AIN Single Differential */ + [0] = { { 0, 15 }, { 1, 2 } }, + [1] = { { 1, 2 }, { 2, 1 } }, + [2] = { { 3, 4 }, { 5, 6 } }, + [3] = { { 5, 6 }, { 6, 5 } }, + [4] = { { 7, 8 }, { 9, 10 } }, + [5] = { { 9, 10 }, { 10, 9 } }, + [6] = { { 11, 12 }, { 13, 14 } }, + [7] = { { 13, 14 }, { 14, 13 } }, +}; + +/* + * Openwire detection on ad4111 works by running the same input measurement + * on two different channels and compare if the difference between the two + * measurements exceeds a certain value (typical 300mV) + */ +static int ad4111_openwire_event(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad7173_state *st = iio_priv(indio_dev); + struct ad7173_channel *adchan = &st->channels[chan->address]; + struct ad7173_channel_config *cfg = &adchan->cfg; + int ret, val1, val2; + + ret = regmap_set_bits(st->reg_gpiocon_regmap, AD7173_REG_GPIO, + AD4111_GPIO_GP_OW_EN); + if (ret) + return ret; + + adchan->cfg.openwire_comp_chan = + openwire_ain_to_channel_pair[chan->channel][chan->differential][0]; + + ret = ad_sigma_delta_single_conversion(indio_dev, chan, &val1); + if (ret < 0) { + dev_err(&indio_dev->dev, + "Error running ad_sigma_delta single conversion: %d", ret); + goto out; + } + + adchan->cfg.openwire_comp_chan = + openwire_ain_to_channel_pair[chan->channel][chan->differential][1]; + + ret = ad_sigma_delta_single_conversion(indio_dev, chan, &val2); + if (ret < 0) { + dev_err(&indio_dev->dev, + "Error running ad_sigma_delta single conversion: %d", ret); + goto out; + } + + if (abs(val1 - val2) > cfg->openwire_thrsh_raw) + iio_push_event(indio_dev, + IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE, chan->address, + IIO_EV_TYPE_FAULT, IIO_EV_DIR_FAULT_OPENWIRE), + iio_get_time_ns(indio_dev)); + +out: + adchan->cfg.openwire_comp_chan = -1; + regmap_clear_bits(st->reg_gpiocon_regmap, AD7173_REG_GPIO, + AD4111_GPIO_GP_OW_EN); + return ret; +} + static int ad7173_mask_xlate(struct gpio_regmap *gpio, unsigned int base, unsigned int offset, unsigned int *reg, unsigned int *mask) @@ -591,6 +669,9 @@ static int ad7173_set_channel(struct ad_sigma_delta *sd, unsigned int channel) FIELD_PREP(AD7173_CH_SETUP_SEL_MASK, st->channels[channel].cfg.cfg_slot) | st->channels[channel].ain; + if (st->channels[channel].cfg.openwire_comp_chan >= 0) + channel = st->channels[channel].cfg.openwire_comp_chan; + return ad_sd_write_reg(&st->sd, AD7173_REG_CH(channel), 2, val); } @@ -639,6 +720,11 @@ static int ad7173_disable_all(struct ad_sigma_delta *sd) static int ad7173_disable_one(struct ad_sigma_delta *sd, unsigned int chan) { + struct ad7173_state *st = ad_sigma_delta_to_ad7173(sd); + + if (st->channels[chan].cfg.openwire_comp_chan >= 0) + chan = st->channels[chan].cfg.openwire_comp_chan; + return ad_sd_write_reg(sd, AD7173_REG_CH(chan), 2, 0); } @@ -690,6 +776,7 @@ static const struct ad7173_device_info ad4111_device_info = { .has_current_inputs = true, .has_int_ref = true, .has_internal_fs_calibration = true, + .has_openwire_det = true, .clock = 2 * HZ_PER_MHZ, .sinc5_data_rates = ad7173_sinc5_data_rates, .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), @@ -1000,6 +1087,12 @@ static int ad7173_read_raw(struct iio_dev *indio_dev, if (ret < 0) return ret; + if (ch->openwire_det_en) { + ret = ad4111_openwire_event(indio_dev, chan); + if (ret < 0) + return ret; + } + return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: @@ -1144,12 +1237,57 @@ static int ad7173_debug_reg_access(struct iio_dev *indio_dev, unsigned int reg, return ad_sd_write_reg(&st->sd, reg, reg_size, writeval); } +static int ad7173_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + bool state) +{ + struct ad7173_state *st = iio_priv(indio_dev); + struct ad7173_channel *adchan = &st->channels[chan->address]; + + switch (type) { + case IIO_EV_TYPE_FAULT: + adchan->openwire_det_en = state; + return 0; + default: + return -EINVAL; + } +} + +static int ad7173_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ad7173_state *st = iio_priv(indio_dev); + struct ad7173_channel *adchan = &st->channels[chan->address]; + + switch (type) { + case IIO_EV_TYPE_FAULT: + return adchan->openwire_det_en; + default: + return -EINVAL; + } +} + +static const struct iio_event_spec ad4111_events[] = { + { + .type = IIO_EV_TYPE_FAULT, + .dir = IIO_EV_DIR_FAULT_OPENWIRE, + .mask_separate = BIT(IIO_EV_INFO_VALUE), + .mask_shared_by_all = BIT(IIO_EV_INFO_ENABLE), + }, +}; + static const struct iio_info ad7173_info = { .read_raw = &ad7173_read_raw, .write_raw = &ad7173_write_raw, .debugfs_reg_access = &ad7173_debug_reg_access, .validate_trigger = ad_sd_validate_trigger, .update_scan_mode = ad7173_update_scan_mode, + .write_event_config = ad7173_write_event_config, + .read_event_config = ad7173_read_event_config, }; static const struct iio_scan_type ad4113_scan_type = { @@ -1353,6 +1491,37 @@ static int ad7173_validate_reference(struct ad7173_state *st, int ref_sel) return 0; } +static int ad7173_validate_openwire_ain_inputs(struct ad7173_state *st, + bool differential, + unsigned int ain0, + unsigned int ain1) +{ + /* + * If the channel is configured as differential, + * the ad4111 requires specific ains to be used together + */ + if (differential) + return (ain0 % 2) ? (ain0 - 1) == ain1 : (ain0 + 1) == ain1; + + return ain1 == AD4111_VINCOM_INPUT; +} + +static unsigned int ad7173_calc_openwire_thrsh_raw(struct ad7173_state *st, + struct iio_chan_spec *chan, + struct ad7173_channel *chan_st_priv, + unsigned int thrsh_mv) { + unsigned int thrsh_raw; + + thrsh_raw = + BIT(chan->scan_type.realbits - !!(chan_st_priv->cfg.bipolar)) + * thrsh_mv + / ad7173_get_ref_voltage_milli(st, chan_st_priv->cfg.ref_sel); + if (chan->channel < st->info->num_voltage_in_div) + thrsh_raw /= AD4111_DIVIDER_RATIO; + + return thrsh_raw; +} + static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) { struct ad7173_channel *chans_st_arr, *chan_st_priv; @@ -1400,6 +1569,7 @@ static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) chan_st_priv->cfg.bipolar = false; chan_st_priv->cfg.input_buf = st->info->has_input_buf; chan_st_priv->cfg.ref_sel = AD7173_SETUP_REF_SEL_INT_REF; + chan_st_priv->cfg.openwire_comp_chan = -1; st->adc_mode |= AD7173_ADC_MODE_REF_EN; if (st->info->data_reg_only_16bit) chan_arr[chan_index].scan_type = ad4113_scan_type; @@ -1466,6 +1636,7 @@ static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) chan->channel = ain[0]; chan_st_priv->cfg.input_buf = st->info->has_input_buf; chan_st_priv->cfg.odr = 0; + chan_st_priv->cfg.openwire_comp_chan = -1; chan_st_priv->cfg.bipolar = fwnode_property_read_bool(child, "bipolar"); if (chan_st_priv->cfg.bipolar) @@ -1480,6 +1651,14 @@ static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) chan_st_priv->cfg.input_buf = st->info->has_input_buf; chan->channel2 = ain[1]; chan_st_priv->ain = AD7173_CH_ADDRESS(ain[0], ain[1]); + if (st->info->has_openwire_det && + ad7173_validate_openwire_ain_inputs(st, chan->differential, ain[0], ain[1])) { + chan->event_spec = ad4111_events; + chan->num_event_specs = ARRAY_SIZE(ad4111_events); + chan_st_priv->cfg.openwire_thrsh_raw = + ad7173_calc_openwire_thrsh_raw(st, chan, chan_st_priv, + AD4111_OW_DET_THRSH_MV); + } } if (st->info->data_reg_only_16bit) From 7b465a0d58c19a45ddf81c90bc8ba04693de038b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Jan 2025 17:24:40 +0200 Subject: [PATCH 0121/2157] iio: light: adux1020: Drop unneeded assignment for cache_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REGCACHE_NONE is the default type of the cache when not provided. Drop unneeded explicit assignment to it. Note, it's defined to 0, and if ever be redefined, it will break literally a lot of the drivers, so it very unlikely to happen. Signed-off-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250129152546.1798306-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/adux1020.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/light/adux1020.c b/drivers/iio/light/adux1020.c index 593d614b1689..9240983a6cc4 100644 --- a/drivers/iio/light/adux1020.c +++ b/drivers/iio/light/adux1020.c @@ -118,7 +118,6 @@ static const struct regmap_config adux1020_regmap_config = { .reg_bits = 8, .val_bits = 16, .max_register = 0x6F, - .cache_type = REGCACHE_NONE, }; static const struct reg_sequence adux1020_def_conf[] = { From 23a6374c7ba6974c84753b8449299b710b28b0ff Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Jan 2025 17:24:41 +0200 Subject: [PATCH 0122/2157] iio: magnetometer: af8133j: Drop unneeded assignment for cache_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REGCACHE_NONE is the default type of the cache when not provided. Drop unneeded explicit assignment to it. Note, it's defined to 0, and if ever be redefined, it will break literally a lot of the drivers, so it very unlikely to happen. Signed-off-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250129152546.1798306-3-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/af8133j.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/magnetometer/af8133j.c b/drivers/iio/magnetometer/af8133j.c index a70bf8a3c73b..c1fc339e85b4 100644 --- a/drivers/iio/magnetometer/af8133j.c +++ b/drivers/iio/magnetometer/af8133j.c @@ -383,7 +383,6 @@ static const struct regmap_config af8133j_regmap_config = { .reg_bits = 8, .val_bits = 8, .max_register = AF8133J_REG_SWR, - .cache_type = REGCACHE_NONE, }; static void af8133j_power_down_action(void *ptr) From 5c2c07a18c7d9763bcc6e47466c6bbd40fcd61fc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Jan 2025 17:24:42 +0200 Subject: [PATCH 0123/2157] iio: pressure: zpa2326: Drop unneeded assignment for cache_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REGCACHE_NONE is the default type of the cache when not provided. Drop unneeded explicit assignment to it. Note, it's defined to 0, and if ever be redefined, it will break literally a lot of the drivers, so it very unlikely to happen. Signed-off-by: Andy Shevchenko Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250129152546.1798306-4-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/zpa2326_i2c.c | 1 - drivers/iio/pressure/zpa2326_spi.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/iio/pressure/zpa2326_i2c.c b/drivers/iio/pressure/zpa2326_i2c.c index 49a239ebdabf..a6034bf05d97 100644 --- a/drivers/iio/pressure/zpa2326_i2c.c +++ b/drivers/iio/pressure/zpa2326_i2c.c @@ -25,7 +25,6 @@ static const struct regmap_config zpa2326_regmap_i2c_config = { .precious_reg = zpa2326_isreg_precious, .max_register = ZPA2326_TEMP_OUT_H_REG, .read_flag_mask = BIT(7), - .cache_type = REGCACHE_NONE, }; static unsigned int zpa2326_i2c_hwid(const struct i2c_client *client) diff --git a/drivers/iio/pressure/zpa2326_spi.c b/drivers/iio/pressure/zpa2326_spi.c index 317270fa1c43..c678f5b96266 100644 --- a/drivers/iio/pressure/zpa2326_spi.c +++ b/drivers/iio/pressure/zpa2326_spi.c @@ -26,7 +26,6 @@ static const struct regmap_config zpa2326_regmap_spi_config = { .precious_reg = zpa2326_isreg_precious, .max_register = ZPA2326_TEMP_OUT_H_REG, .read_flag_mask = BIT(7) | BIT(6), - .cache_type = REGCACHE_NONE, }; static int zpa2326_probe_spi(struct spi_device *spi) From e903868b4ce73d1ba3663d5e0718424946cebd99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 4 Feb 2025 12:50:23 +0100 Subject: [PATCH 0124/2157] iio: adc: ad7124: Really disable all channels at probe time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If one or more of the 16 channels are enabled and the driver is not aware of that, unexpected things happen because different channels are used than intended. To prevent that, all channels should be disabled at probe time. In Commit 4be339af334c ("iio: adc: ad7124: Disable all channels at probe time") I intended do that, however only the channels that are potentially used by the driver and not all channels are disabled since then. So disable all 16 channels and not only the used ones. Also fix the same issue in the .disable_all() callback. Fixes: 4be339af334c ("iio: adc: ad7124: Disable all channels at probe time") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250204115023.265813-2-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 2fdeb3247952..6bc418d38820 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -550,11 +550,10 @@ static int ad7124_disable_one(struct ad_sigma_delta *sd, unsigned int chan) static int ad7124_disable_all(struct ad_sigma_delta *sd) { - struct ad7124_state *st = container_of(sd, struct ad7124_state, sd); int ret; int i; - for (i = 0; i < st->num_channels; i++) { + for (i = 0; i < 16; i++) { ret = ad7124_disable_one(sd, i); if (ret < 0) return ret; @@ -1017,11 +1016,10 @@ static int ad7124_setup(struct ad7124_state *st) * set all channels to this default value. */ ad7124_set_channel_odr(st, i, 10); - - /* Disable all channels to prevent unintended conversions. */ - ad_sd_write_reg(&st->sd, AD7124_CHANNEL(i), 2, 0); } + ad7124_disable_all(&st->sd); + ret = ad_sd_write_reg(&st->sd, AD7124_ADC_CONTROL, 2, st->adc_control); if (ret < 0) return dev_err_probe(dev, ret, "Failed to setup CONTROL register\n"); From fdaa9b763e3609af3efc57adcc70b77030fa6dd7 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Tue, 4 Feb 2025 12:00:39 -0300 Subject: [PATCH 0125/2157] Documentation: ABI: IIO: Add filter_type documentation A previous patch added documentation for filter_type_available attributes. However, the description for the value attribute (filter_type) was missing. Add documentation for filter_type sysfs ABI. Fixes: 01bb12922b60 ("Documentation: ABI: added filter mode doc in sysfs-bus-iio") Signed-off-by: Marcelo Schmitt Reviewed-by: David Lechner Link: https://patch.msgid.link/a8dbccac909e8d11e7d47561935a5575b1354d3a.1738680728.git.marcelo.schmitt@analog.com Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 25d366d452a5..6a2543aa36eb 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -2291,6 +2291,14 @@ Description: * "sinc3+pf3" - Sinc3 + device specific Post Filter 3. * "sinc3+pf4" - Sinc3 + device specific Post Filter 4. +What: /sys/bus/iio/devices/iio:deviceX/filter_type +What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_type +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies which filter type apply to the channel. The possible + values are given by the filter_type_available attribute. + What: /sys/.../events/in_proximity_thresh_either_runningperiod KernelVersion: 6.6 Contact: linux-iio@vger.kernel.org From c7eb65a37671191e89e0308cdd82616359fcf7bc Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Tue, 4 Feb 2025 12:00:57 -0300 Subject: [PATCH 0126/2157] Documentation: ABI: IIO: Re-add sysfs-bus-iio-adc-ad4130 The ad4130 driver exports in_voltageY-voltageZ_filter_mode and in_voltage-voltage_filter_mode_available attributes to user space. A previous patch merged the documentation for those attributes with the documentation for filter_type/filter_type_available into sysfs-bus-iio. Filter mode and filter type refer to the same feature which is the digital filter applied over ADC samples. However, since datasheets use the term `filter type` and ad4130 driver is the only one using filter_mode, deprecate the filter_mode ABI in favor of filter_type and keep the docs separate to avoid confusion and intricate attribute descriptions. Fixes: 01bb12922b60 ("Documentation: ABI: added filter mode doc in sysfs-bus-iio") Signed-off-by: Marcelo Schmitt Reviewed-by: David Lechner Link: https://patch.msgid.link/c77b2d65f1115c1c394582f55944d6f685058f9c.1738680728.git.marcelo.schmitt@analog.com Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 2 +- .../ABI/testing/sysfs-bus-iio-adc-ad4130 | 20 +++++++++++++++++++ MAINTAINERS | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 6a2543aa36eb..b8838cb92d38 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -2268,7 +2268,7 @@ Description: representing the sensor unique ID number. What: /sys/bus/iio/devices/iio:deviceX/filter_type_available -What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available +What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_type_available KernelVersion: 6.1 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 new file mode 100644 index 000000000000..d3fad27421d6 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 @@ -0,0 +1,20 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns a list with the possible filter modes. + + This ABI is only kept for backwards compatibility and the values + returned are identical to filter_type_available attribute + documented in Documentation/ABI/testing/sysfs-bus-iio. Please, + use filter_type_available like ABI to provide filter options for + new drivers. + +What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode +KernelVersion: 6.2 +Contact: linux-iio@vger.kernel.org +Description: + This ABI is only kept for backwards compatibility and the values + returned are identical to in_voltageY-voltageZ_filter_type + attribute documented in Documentation/ABI/testing/sysfs-bus-iio. + Please, use in_voltageY-voltageZ_filter_type for new drivers. diff --git a/MAINTAINERS b/MAINTAINERS index 311675697a82..a84fa86716e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1311,6 +1311,7 @@ M: Cosmin Tanislav L: linux-iio@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers +F: Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 F: Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml F: drivers/iio/adc/ad4130.c From 0c5d8af2a5fd5e5a9d17ad41e81501a12245bfc8 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Tue, 4 Feb 2025 12:01:14 -0300 Subject: [PATCH 0127/2157] iio: adc: ad4130: Add filter_type attributes Make filter control also available through filter_type attributes which are now standardized in main IIO ABI documentation. Suggested-by: David Lechner Signed-off-by: Marcelo Schmitt Reviewed-by: David Lechner Link: https://patch.msgid.link/61a87b288552cad9e925a9af4eb33022d14a4617.1738680728.git.marcelo.schmitt@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index de32cc9d18c5..acc241cc0a7a 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -939,9 +939,16 @@ static const struct iio_enum ad4130_filter_mode_enum = { }; static const struct iio_chan_spec_ext_info ad4130_filter_mode_ext_info[] = { + /* + * Intentional duplication of attributes to keep backwards compatibility + * while standardizing over the main IIO ABI for digital filtering. + */ IIO_ENUM("filter_mode", IIO_SEPARATE, &ad4130_filter_mode_enum), IIO_ENUM_AVAILABLE("filter_mode", IIO_SHARED_BY_TYPE, &ad4130_filter_mode_enum), + IIO_ENUM("filter_type", IIO_SEPARATE, &ad4130_filter_mode_enum), + IIO_ENUM_AVAILABLE("filter_type", IIO_SHARED_BY_TYPE, + &ad4130_filter_mode_enum), { } }; From b312d880fb462d4759396950865ec914de9d253c Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 8 Feb 2025 10:44:26 +0800 Subject: [PATCH 0128/2157] tools/power turbostat: Allow Zero return value for some RAPL registers turbostat aborted with below messages on a dual-package system, turbostat: turbostat.c:3744: rapl_counter_accumulate: Assertion `dst->unit == src->unit' failed. Aborted This is because 1. the MSR_DRAM_PERF_STATUS returns Zero for one package, and non-Zero for another package 2. probe_msr() treats Zero return value as a failure so this feature is enabled on one package, and disabled for another package. 3. turbostat aborts because the feature is invalid on some package Unlike the RAPL energy counter registers, MSR_DRAM_PERF_STATUS can return Zero value, and this should not be treated as a failure. Fix the problem by allowing Zero return value for RAPL registers other than the energy counters. Fixes: 7c6fee25bdf5 ("tools/power turbostat: Check for non-zero value when MSR probing") Reported-by: Artem Bityutskiy Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5eef95956c2f..d26008f37a2c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2211,7 +2211,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } -int probe_msr(int cpu, off_t offset) +int probe_rapl_msr(int cpu, off_t offset, int index) { ssize_t retval; unsigned long long value; @@ -2220,13 +2220,22 @@ int probe_msr(int cpu, off_t offset) retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); - /* - * Expect MSRs to accumulate some non-zero value since the system was powered on. - * Treat zero as a read failure. - */ - if (retval != sizeof(value) || value == 0) + /* if the read failed, the probe fails */ + if (retval != sizeof(value)) return 1; + /* If an Energy Status Counter MSR returns 0, the probe fails */ + switch (index) { + case RAPL_RCI_INDEX_ENERGY_PKG: + case RAPL_RCI_INDEX_ENERGY_CORES: + case RAPL_RCI_INDEX_DRAM: + case RAPL_RCI_INDEX_GFX: + case RAPL_RCI_INDEX_ENERGY_PLATFORM: + if (value == 0) + return 1; + } + + /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ return 0; } @@ -7907,7 +7916,7 @@ void rapl_perf_init(void) rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; @@ -8045,7 +8054,7 @@ void msr_perf_init_(void) cai->present = true; /* User MSR for this counter */ - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; cci->msr_mask[cai->rci_index] = cai->msr_mask; @@ -8159,7 +8168,7 @@ void cstate_perf_init_(bool soft_c1) /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit - && probe_msr(cpu, cai->msr) == 0) { + && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } From 5132681dcd96b2a8c357b6e5d93e9876924bb80b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 8 Feb 2025 12:55:42 +0200 Subject: [PATCH 0129/2157] tools/power turbostat: Fix names matching Fix the 'find_msrp_by_name()' function which returns incorrect matches for cases like this: s1 = "C1-"; find_msrp_by_name(head, s1); Inside 'find_msrp_by_name()': ... s2 = "C1" if !(strcnmp(s1, s2, len(s2))) // Incorrect match! return mp; Full strings should be match istead. Switch to 'strcmp()' to fix the problem. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d26008f37a2c..d3af2bf307e1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9612,7 +9612,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); - if (!strncmp(name, mp->name, strlen(mp->name))) + if (!strcmp(name, mp->name)) return mp; } return NULL; From 31d43141d13aa63587f140884b1f667800ce4e1d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2025 16:57:09 +0200 Subject: [PATCH 0130/2157] dmaengine: Replace dma_request_slave_channel() by dma_request_chan() Replace dma_request_slave_channel() by dma_request_chan() as suggested since the former is deprecated. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250205145757.889247-2-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 5 ++--- include/linux/dmaengine.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 3449006cd14b..02a85d6f1bea 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1459,9 +1459,8 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) * dmatest, thus create 'struct imx_dma_data mem_data' for this case. * Please note in any other slave case, you have to setup chan->private * with 'struct imx_dma_data' in your own filter function if you want to - * request dma channel by dma_request_channel() rather than - * dma_request_slave_channel(). Othwise, 'MEMCPY in case?' will appear - * to warn you to correct your filter function. + * request DMA channel by dma_request_channel(), otherwise, 'MEMCPY in + * case?' will appear to warn you to correct your filter function. */ if (!data) { dev_dbg(sdmac->sdma->dev, "MEMCPY in case?\n"); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 346251bf1026..83cbf7197a76 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1639,8 +1639,8 @@ static inline struct dma_chan { struct dma_chan *chan; - chan = dma_request_slave_channel(dev, name); - if (chan) + chan = dma_request_chan(dev, name); + if (!IS_ERR(chan)) return chan; if (!fn || !fn_param) From 1c83d3dfa0905590408595560629627cba4f9261 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2025 16:57:10 +0200 Subject: [PATCH 0131/2157] dmaengine: Use dma_request_channel() instead of __dma_request_channel() Reduce use of internal __dma_request_channel() function in public dmaengine.h. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250205145757.889247-3-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 83cbf7197a76..a360e0330436 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1646,7 +1646,7 @@ static inline struct dma_chan if (!fn || !fn_param) return NULL; - return __dma_request_channel(&mask, fn, fn_param, NULL); + return dma_request_channel(mask, fn, fn_param); } static inline char * From 1722fb4a1307748f983c1345c4c24178d8e0be47 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2025 16:57:11 +0200 Subject: [PATCH 0132/2157] dmaengine: Add a comment on why it's okay when kasprintf() fails Add a comment in dma_request_chan() to clarify kasprintf() missing return value check and it is correct functionality. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250205145757.889247-4-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index c1357d7f3dc6..dd4224d90f07 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -854,8 +854,8 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name) found: #ifdef CONFIG_DEBUG_FS - chan->dbg_client_name = kasprintf(GFP_KERNEL, "%s:%s", dev_name(dev), - name); + chan->dbg_client_name = kasprintf(GFP_KERNEL, "%s:%s", dev_name(dev), name); + /* No functional issue if it fails, users are supposed to test before use */ #endif chan->name = kasprintf(GFP_KERNEL, "dma:%s", name); From 91d8560c15918c7d44e4f665fac829ba8057a2f3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2025 16:57:12 +0200 Subject: [PATCH 0133/2157] dmaengine: Unify checks in dma_request_chan() Use dev_fwnode() to simplify the check logic for Device Tree and ACPI in dma_request_chan(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250205145757.889247-5-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index dd4224d90f07..758fcd0546d8 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -812,15 +814,13 @@ static const struct dma_slave_map *dma_filter_match(struct dma_device *device, */ struct dma_chan *dma_request_chan(struct device *dev, const char *name) { + struct fwnode_handle *fwnode = dev_fwnode(dev); struct dma_device *d, *_d; struct dma_chan *chan = NULL; - /* If device-tree is present get slave info from here */ - if (dev->of_node) - chan = of_dma_request_slave_channel(dev->of_node, name); - - /* If device was enumerated by ACPI get slave info from here */ - if (has_acpi_companion(dev) && !chan) + if (is_of_node(fwnode)) + chan = of_dma_request_slave_channel(to_of_node(fwnode), name); + else if (is_acpi_device_node(fwnode)) chan = acpi_dma_request_slave_chan_by_name(dev, name); if (PTR_ERR(chan) == -EPROBE_DEFER) From 1e137d53e8471b45257d937e9773b61a88807fe7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2025 17:06:48 +0200 Subject: [PATCH 0134/2157] dmaengine: dw: Switch to LATE_SIMPLE_DEV_PM_OPS() SET_LATE_SYSTEM_SLEEP_PM_OPS is deprecated, replace it with LATE_SYSTEM_SLEEP_PM_OPS() and use pm_sleep_ptr() for setting the driver's pm routines. We can now remove the ifdeffery in the suspend and resume functions. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20250205150701.893083-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dw/pci.c | 8 ++------ drivers/dma/dw/platform.c | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index e8a0eb81726a..a3aae3d1c093 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -76,8 +76,6 @@ static void dw_pci_remove(struct pci_dev *pdev) dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret); } -#ifdef CONFIG_PM_SLEEP - static int dw_pci_suspend_late(struct device *dev) { struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); @@ -94,10 +92,8 @@ static int dw_pci_resume_early(struct device *dev) return do_dw_dma_enable(chip); }; -#endif /* CONFIG_PM_SLEEP */ - static const struct dev_pm_ops dw_pci_dev_pm_ops = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early) + LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early) }; static const struct pci_device_id dw_pci_id_table[] = { @@ -136,7 +132,7 @@ static struct pci_driver dw_pci_driver = { .probe = dw_pci_probe, .remove = dw_pci_remove, .driver = { - .pm = &dw_pci_dev_pm_ops, + .pm = pm_sleep_ptr(&dw_pci_dev_pm_ops), }, }; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 2606cf9cd429..cee56cd31a61 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -157,8 +157,6 @@ static const struct acpi_device_id dw_dma_acpi_id_table[] = { MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table); #endif -#ifdef CONFIG_PM_SLEEP - static int dw_suspend_late(struct device *dev) { struct dw_dma_chip_pdata *data = dev_get_drvdata(dev); @@ -183,10 +181,8 @@ static int dw_resume_early(struct device *dev) return do_dw_dma_enable(chip); } -#endif /* CONFIG_PM_SLEEP */ - static const struct dev_pm_ops dw_dev_pm_ops = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early) + LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early) }; static struct platform_driver dw_driver = { @@ -195,7 +191,7 @@ static struct platform_driver dw_driver = { .shutdown = dw_shutdown, .driver = { .name = DRV_NAME, - .pm = &dw_dev_pm_ops, + .pm = pm_sleep_ptr(&dw_dev_pm_ops), .of_match_table = of_match_ptr(dw_dma_of_id_table), .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), }, From 1c4c8609d498173382913b8ef6a105f3e6ff3472 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 5 Feb 2025 10:14:55 +0100 Subject: [PATCH 0135/2157] dmaengine: fsl-edma: Add missing newlines to log messages Not all log messages have a newline at the end. So fix it. Signed-off-by: Stefan Wahren Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250205091455.4593-1-wahrenst@gmx.net Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index f989b6c9c0a9..760c9e3e374c 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -164,7 +164,7 @@ static bool fsl_edma_srcid_in_use(struct fsl_edma_engine *fsl_edma, u32 srcid) fsl_chan = &fsl_edma->chans[i]; if (fsl_chan->srcid && srcid == fsl_chan->srcid) { - dev_err(&fsl_chan->pdev->dev, "The srcid is in use, can't use!"); + dev_err(&fsl_chan->pdev->dev, "The srcid is in use, can't use!\n"); return true; } } @@ -822,7 +822,7 @@ static int fsl_edma_suspend_late(struct device *dev) spin_lock_irqsave(&fsl_chan->vchan.lock, flags); /* Make sure chan is idle or will force disable. */ if (unlikely(fsl_chan->status == DMA_IN_PROGRESS)) { - dev_warn(dev, "WARN: There is non-idle channel."); + dev_warn(dev, "WARN: There is non-idle channel.\n"); fsl_edma_disable_request(fsl_chan); fsl_edma_chan_mux(fsl_chan, 0, false); } From a54ec770396ce2b6e786acde22f4ebed8d1e9555 Mon Sep 17 00:00:00 2001 From: Durai Manickam KR Date: Mon, 3 Feb 2025 11:48:09 +0530 Subject: [PATCH 0136/2157] dt-bindings: dma: convert atmel-dma.txt to YAML Add a description, required properties, appropriate compatibles and missing properties like clocks and clock-names which are not defined in the text binding for all the SoCs that are supported by microchip. Update the text binding name `atmel-dma.txt` to `atmel,at91sam9g45-dma.yaml` for the files which reference to `atmel-dma.txt`. Drop Tudor name from maintainers. Signed-off-by: Durai Manickam KR Signed-off-by: Charan Pedumuru Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250203-test-v4-1-a9ec3eded1c7@microchip.com Signed-off-by: Vinod Koul --- .../bindings/dma/atmel,at91sam9g45-dma.yaml | 68 +++++++++++++++++++ .../devicetree/bindings/dma/atmel-dma.txt | 42 ------------ .../devicetree/bindings/misc/atmel-ssc.txt | 2 +- MAINTAINERS | 2 +- 4 files changed, 70 insertions(+), 44 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml delete mode 100644 Documentation/devicetree/bindings/dma/atmel-dma.txt diff --git a/Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml b/Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml new file mode 100644 index 000000000000..a58dc407311b --- /dev/null +++ b/Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/atmel,at91sam9g45-dma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Atmel Direct Memory Access Controller (DMA) + +maintainers: + - Ludovic Desroches + +description: + The Atmel Direct Memory Access Controller (DMAC) transfers data from a source + peripheral to a destination peripheral over one or more AMBA buses. One channel + is required for each source/destination pair. In the most basic configuration, + the DMAC has one master interface and one channel. The master interface reads + the data from a source and writes it to a destination. Two AMBA transfers are + required for each DMAC data transfer. This is also known as a dual-access transfer. + The DMAC is programmed via the APB interface. + +properties: + compatible: + enum: + - atmel,at91sam9g45-dma + - atmel,at91sam9rl-dma + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#dma-cells": + description: + Must be <2>, used to represent the number of integer cells in the dma + property of client devices. The two cells in order are + 1. The first cell represents the channel number. + 2. The second cell is 0 for RX and 1 for TX transfers. + const: 2 + + clocks: + maxItems: 1 + + clock-names: + const: dma_clk + +required: + - compatible + - reg + - interrupts + - "#dma-cells" + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + dma-controller@ffffec00 { + compatible = "atmel,at91sam9g45-dma"; + reg = <0xffffec00 0x200>; + interrupts = <21>; + #dma-cells = <2>; + clocks = <&pmc 2 20>; + clock-names = "dma_clk"; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt deleted file mode 100644 index f69bcf5a6343..000000000000 --- a/Documentation/devicetree/bindings/dma/atmel-dma.txt +++ /dev/null @@ -1,42 +0,0 @@ -* Atmel Direct Memory Access Controller (DMA) - -Required properties: -- compatible: Should be "atmel,-dma". -- reg: Should contain DMA registers location and length. -- interrupts: Should contain DMA interrupt. -- #dma-cells: Must be <2>, used to represent the number of integer cells in -the dmas property of client devices. - -Example: - -dma0: dma@ffffec00 { - compatible = "atmel,at91sam9g45-dma"; - reg = <0xffffec00 0x200>; - interrupts = <21>; - #dma-cells = <2>; -}; - -DMA clients connected to the Atmel DMA controller must use the format -described in the dma.txt file, using a three-cell specifier for each channel: -a phandle plus two integer cells. -The three cells in order are: - -1. A phandle pointing to the DMA controller. -2. The memory interface (16 most significant bits), the peripheral interface -(16 less significant bits). -3. Parameters for the at91 DMA configuration register which are device -dependent: - - bit 7-0: peripheral identifier for the hardware handshaking interface. The - identifier can be different for tx and rx. - - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP. - -Example: - -i2c0@i2c@f8010000 { - compatible = "atmel,at91sam9x5-i2c"; - reg = <0xf8010000 0x100>; - interrupts = <9 4 6>; - dmas = <&dma0 1 7>, - <&dma0 1 8>; - dma-names = "tx", "rx"; -}; diff --git a/Documentation/devicetree/bindings/misc/atmel-ssc.txt b/Documentation/devicetree/bindings/misc/atmel-ssc.txt index f9fb412642fe..b159dc2298b6 100644 --- a/Documentation/devicetree/bindings/misc/atmel-ssc.txt +++ b/Documentation/devicetree/bindings/misc/atmel-ssc.txt @@ -14,7 +14,7 @@ Required properties: Required properties for devices compatible with "atmel,at91sam9g45-ssc": - dmas: DMA specifier, consisting of a phandle to DMA controller node, the memory interface and SSC DMA channel ID (for tx and rx). - See Documentation/devicetree/bindings/dma/atmel-dma.txt for details. + See Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml for details. - dma-names: Must be "tx", "rx". Optional properties: diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..720be011b7d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15347,7 +15347,7 @@ M: Ludovic Desroches L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: dmaengine@vger.kernel.org S: Supported -F: Documentation/devicetree/bindings/dma/atmel-dma.txt +F: Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml F: drivers/dma/at_hdmac.c F: drivers/dma/at_xdmac.c F: include/dt-bindings/dma/at91.h From 753f324c4caa154e57b6dfff8fba7769dd76a0f5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 31 Jan 2025 09:35:51 -0800 Subject: [PATCH 0137/2157] MAINTAINERS: Change maintainer for IDXD Due to job transition, I am stepping down as IDXD maintainer. Vinicius will take over maintainership responsibilities moving forward. Signed-off-by: Fenghua Yu Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20250131173551.3979408-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 720be011b7d8..9bfadc45ccb2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11684,7 +11684,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git F: drivers/idle/intel_idle.c INTEL IDXD DRIVER -M: Fenghua Yu +M: Vinicius Costa Gomes R: Dave Jiang L: dmaengine@vger.kernel.org S: Supported From 8e63891831f3904047d7ad8078ff52dd454b6975 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 20:10:20 +0100 Subject: [PATCH 0138/2157] dmaengine: Use str_enable_disable-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Manivannan Sadhasivam #dw-edma Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20250114191021.854080-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 6 ++++-- drivers/dma/imx-dma.c | 3 ++- drivers/dma/pxa_dma.c | 4 ++-- drivers/dma/sun6i-dma.c | 3 ++- drivers/dma/ti/edma.c | 3 ++- drivers/dma/xilinx/xilinx_dma.c | 3 ++- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index 68236247059d..c2b88cc99e5d 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "dw-edma-core.h" #include "dw-edma-v0-core.h" @@ -746,7 +747,7 @@ static int dw_edma_channel_setup(struct dw_edma *dw, u32 wr_alloc, u32 rd_alloc) chan->ll_max -= 1; dev_vdbg(dev, "L. List:\tChannel %s[%u] max_cnt=%u\n", - chan->dir == EDMA_DIR_WRITE ? "write" : "read", + str_write_read(chan->dir == EDMA_DIR_WRITE), chan->id, chan->ll_max); if (dw->nr_irqs == 1) @@ -767,7 +768,8 @@ static int dw_edma_channel_setup(struct dw_edma *dw, u32 wr_alloc, u32 rd_alloc) memcpy(&chan->msi, &irq->msi, sizeof(chan->msi)); dev_vdbg(dev, "MSI:\t\tChannel %s[%u] addr=0x%.8x%.8x, data=0x%.8x\n", - chan->dir == EDMA_DIR_WRITE ? "write" : "read", chan->id, + str_write_read(chan->dir == EDMA_DIR_WRITE), + chan->id, chan->msi.address_hi, chan->msi.address_lo, chan->msi.data); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index a651e0995ce8..de8d7070904e 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -942,7 +943,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved( " src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__, imxdmac->channel, (unsigned long long)xt->src_start, (unsigned long long) xt->dst_start, - xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false", + str_true_false(xt->src_sgl), str_true_false(xt->dst_sgl), xt->numf, xt->frame_size); if (list_empty(&imxdmac->ld_free) || diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index e50cf3357e5e..249296389771 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -277,8 +278,7 @@ static int chan_state_show(struct seq_file *s, void *p) seq_printf(s, "\tPriority : %s\n", str_prio[(phy->idx & 0xf) / 4]); seq_printf(s, "\tUnaligned transfer bit: %s\n", - _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ? - "yes" : "no"); + str_yes_no(_phy_readl_relaxed(phy, DALGN) & BIT(phy->idx))); seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC), PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN), diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 95ecb12caaa5..2215ff877bf7 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "virt-dma.h" @@ -553,7 +554,7 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) continue; dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n", - i ? "high" : "low", status); + str_high_low(i), status); writel(status, sdev->base + DMA_IRQ_STAT(i)); diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 4ece125b2ae7..b1a54655e6ce 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -2047,7 +2048,7 @@ static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata, dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels); dev_dbg(dev, "num_slots: %u\n", ecc->num_slots); dev_dbg(dev, "num_tc: %u\n", ecc->num_tc); - dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no"); + dev_dbg(dev, "chmap_exist: %s\n", str_yes_no(ecc->chmap_exist)); /* Nothing need to be done if queue priority is provided */ if (pdata->queue_priority_mapping) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 108a7287f4cd..3ad44afd0e74 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -2940,7 +2941,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, XILINX_DMA_DMASR_SG_MASK) chan->has_sg = true; dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id, - chan->has_sg ? "enabled" : "disabled"); + str_enabled_disabled(chan->has_sg)); } /* Initialize the tasklet */ From 9fc2f03e85952ee52558ab844473a8284d924a56 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 20:13:16 +0100 Subject: [PATCH 0139/2157] dmaengine: pxa: Enable compile test The PXA_DMA driver does not include any asm/mach headers, so it can be compile tested for build coverage. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250114191316.857154-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 8afea2e23360..df2d2dc00a05 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -546,7 +546,7 @@ config PL330_DMA config PXA_DMA bool "PXA DMA support" - depends on (ARCH_MMP || ARCH_PXA) + depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help From 2c17e9ea0caa5555e31e154fa1b06260b816f5cc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Jan 2025 12:13:20 +0300 Subject: [PATCH 0140/2157] dmaengine: idxd: Delete unnecessary NULL check The "saved_evl" pointer is a offset into the middle of a non-NULL struct. It can't be NULL and the check is slightly confusing. Delete the check. Signed-off-by: Dan Carpenter Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/ec38214e-0bbb-4c5a-94ff-b2b2d4c3f245@stanley.mountain Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index b946f78f85e1..fca1d2924999 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -912,8 +912,7 @@ static void idxd_device_config_restore(struct idxd_device *idxd, idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit; - if (saved_evl) - idxd->evl->size = saved_evl->size; + idxd->evl->size = saved_evl->size; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *saved_group, *group; From c8f7d65cac565cacf0420acf8b54c855dd7b4484 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 6 Dec 2024 11:34:00 +0100 Subject: [PATCH 0141/2157] phy: phy-rockchip-samsung-hdptx: annotate regmap register-callback The variant of the driver in the vendor-tree contained those handy comments in the regmap register callback. Having the different ranges describe what they are looks helpful. Signed-off-by: Heiko Stuebner Reviewed-by: Cristian Ciocaltea Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20241206103401.1780416-2-heiko@sntech.de Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index 0965b9d4f9cf..f3bac8db2d88 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -560,13 +560,13 @@ static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = { static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) { switch (reg) { - case 0x0000 ... 0x029c: - case 0x0400 ... 0x04a4: - case 0x0800 ... 0x08a4: - case 0x0c00 ... 0x0cb4: - case 0x1000 ... 0x10b4: - case 0x1400 ... 0x14b4: - case 0x1800 ... 0x18b4: + case 0x0000 ... 0x029c: /* CMN Register */ + case 0x0400 ... 0x04a4: /* Sideband Register */ + case 0x0800 ... 0x08a4: /* Lane Top Register */ + case 0x0c00 ... 0x0cb4: /* Lane 0 Register */ + case 0x1000 ... 0x10b4: /* Lane 1 Register */ + case 0x1400 ... 0x14b4: /* Lane 2 Register */ + case 0x1800 ... 0x18b4: /* Lane 3 Register */ return true; } From f08d1c08563846f9be79a4859e912c8795d690fd Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 6 Dec 2024 11:34:01 +0100 Subject: [PATCH 0142/2157] phy: phy-rockchip-samsung-hdptx: Don't use dt aliases to determine phy-id The phy needs to know its identity in the system (phy0 or phy1 on rk3588) for some actions and the driver currently contains code abusing of_alias for that. Devicetree aliases are always optional and should not be used for core device functionality, so instead keep a list of phys on a soc in the of_device_data and find the phy-id by comparing against the mapped register-base. Fixes: c4b09c562086 ("phy: phy-rockchip-samsung-hdptx: Add clock provider support") Signed-off-by: Heiko Stuebner Reviewed-by: Cristian Ciocaltea Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20241206103401.1780416-3-heiko@sntech.de Signed-off-by: Vinod Koul --- .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 50 ++++++++++++++++--- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index f3bac8db2d88..35d5b762e5c4 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -263,11 +263,22 @@ enum rk_hdptx_reset { RST_MAX }; +#define MAX_HDPTX_PHY_NUM 2 + +struct rk_hdptx_phy_cfg { + unsigned int num_phys; + unsigned int phy_ids[MAX_HDPTX_PHY_NUM]; +}; + struct rk_hdptx_phy { struct device *dev; struct regmap *regmap; struct regmap *grf; + /* PHY const config */ + const struct rk_hdptx_phy_cfg *cfgs; + int phy_id; + struct phy *phy; struct phy_config *phy_cfg; struct clk_bulk_data *clks; @@ -1007,15 +1018,14 @@ static int rk_hdptx_phy_clk_register(struct rk_hdptx_phy *hdptx) struct device *dev = hdptx->dev; const char *name, *pname; struct clk *refclk; - int ret, id; + int ret; refclk = devm_clk_get(dev, "ref"); if (IS_ERR(refclk)) return dev_err_probe(dev, PTR_ERR(refclk), "Failed to get ref clock\n"); - id = of_alias_get_id(dev->of_node, "hdptxphy"); - name = id > 0 ? "clk_hdmiphy_pixel1" : "clk_hdmiphy_pixel0"; + name = hdptx->phy_id > 0 ? "clk_hdmiphy_pixel1" : "clk_hdmiphy_pixel0"; pname = __clk_get_name(refclk); hdptx->hw.init = CLK_HW_INIT(name, pname, &hdptx_phy_clk_ops, @@ -1058,8 +1068,9 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev) struct phy_provider *phy_provider; struct device *dev = &pdev->dev; struct rk_hdptx_phy *hdptx; + struct resource *res; void __iomem *regs; - int ret; + int ret, id; hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL); if (!hdptx) @@ -1067,11 +1078,27 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev) hdptx->dev = dev; - regs = devm_platform_ioremap_resource(pdev, 0); + regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(regs)) return dev_err_probe(dev, PTR_ERR(regs), "Failed to ioremap resource\n"); + hdptx->cfgs = device_get_match_data(dev); + if (!hdptx->cfgs) + return dev_err_probe(dev, -EINVAL, "missing match data\n"); + + /* find the phy-id from the io address */ + hdptx->phy_id = -ENODEV; + for (id = 0; id < hdptx->cfgs->num_phys; id++) { + if (res->start == hdptx->cfgs->phy_ids[id]) { + hdptx->phy_id = id; + break; + } + } + + if (hdptx->phy_id < 0) + return dev_err_probe(dev, -ENODEV, "no matching device found\n"); + ret = devm_clk_bulk_get_all(dev, &hdptx->clks); if (ret < 0) return dev_err_probe(dev, ret, "Failed to get clocks\n"); @@ -1132,8 +1159,19 @@ static const struct dev_pm_ops rk_hdptx_phy_pm_ops = { rk_hdptx_phy_runtime_resume, NULL) }; +static const struct rk_hdptx_phy_cfg rk3588_hdptx_phy_cfgs = { + .num_phys = 2, + .phy_ids = { + 0xfed60000, + 0xfed70000, + }, +}; + static const struct of_device_id rk_hdptx_phy_of_match[] = { - { .compatible = "rockchip,rk3588-hdptx-phy", }, + { + .compatible = "rockchip,rk3588-hdptx-phy", + .data = &rk3588_hdptx_phy_cfgs + }, {} }; MODULE_DEVICE_TABLE(of, rk_hdptx_phy_of_match); From ad205ffc0dd0fc3f4841e6ae900037f029aa0fa8 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Tue, 31 Dec 2024 17:27:11 +0800 Subject: [PATCH 0143/2157] dt-bindings: phy: Add rk3576 hdptx phy Add compatible for the HDPTX PHY on rk3576, which is compatible with rk3588, but without rst_phy/rst_ropll/rst_lcpll. In fact, these three reset lines are also optional on the rk3588, they just used for debug, then they were removed on the rk3576 IC design. Signed-off-by: Andy Yan Reviewed-by: Rob Herring (Arm) Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241231092721.252405-1-andyshrk@163.com Signed-off-by: Vinod Koul --- .../phy/rockchip,rk3588-hdptx-phy.yaml | 62 +++++++++++++------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml index 84fe59dbcf48..7a307f45cdec 100644 --- a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml +++ b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml @@ -11,8 +11,13 @@ maintainers: properties: compatible: - enum: - - rockchip,rk3588-hdptx-phy + oneOf: + - enum: + - rockchip,rk3588-hdptx-phy + - items: + - enum: + - rockchip,rk3576-hdptx-phy + - const: rockchip,rk3588-hdptx-phy reg: maxItems: 1 @@ -34,24 +39,12 @@ properties: const: 0 resets: - items: - - description: PHY reset line - - description: APB reset line - - description: INIT reset line - - description: CMN reset line - - description: LANE reset line - - description: ROPLL reset line - - description: LCPLL reset line + minItems: 4 + maxItems: 7 reset-names: - items: - - const: phy - - const: apb - - const: init - - const: cmn - - const: lane - - const: ropll - - const: lcpll + minItems: 4 + maxItems: 7 rockchip,grf: $ref: /schemas/types.yaml#/definitions/phandle @@ -67,6 +60,39 @@ required: - reset-names - rockchip,grf +allOf: + - if: + properties: + compatible: + contains: + enum: + - rockchip,rk3576-hdptx-phy + then: + properties: + resets: + minItems: 4 + maxItems: 4 + reset-names: + items: + - const: apb + - const: init + - const: cmn + - const: lane + else: + properties: + resets: + minItems: 7 + maxItems: 7 + reset-names: + items: + - const: phy + - const: apb + - const: init + - const: cmn + - const: lane + - const: ropll + - const: lcpll + additionalProperties: false examples: From 2e1ffd4c180591e6a46c7f94a6bb187a0661141e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 3 Feb 2025 15:43:20 +0100 Subject: [PATCH 0144/2157] dt-bindings: phy: qcom,qmp-pcie: Add X1P42100 PCIe Gen4x4 PHY X1P42100 has two Gen4x4 PHYs instead of one Gen4x4 and one Gen4x8. They are mostly identical to X1E80100's Gen4x4 PHY, but there are some minor details in the programming sequences. Introduce a new compatible for this flavor of the PHY. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250203-topic-x1p4_dts-v2-1-72cd4cdc767b@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index 89391649e0b5..51eaffa45c18 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -45,6 +45,7 @@ properties: - qcom,x1e80100-qmp-gen4x2-pcie-phy - qcom,x1e80100-qmp-gen4x4-pcie-phy - qcom,x1e80100-qmp-gen4x8-pcie-phy + - qcom,x1p42100-qmp-gen4x4-pcie-phy reg: minItems: 1 @@ -124,6 +125,7 @@ allOf: enum: - qcom,sc8280xp-qmp-gen3x4-pcie-phy - qcom,x1e80100-qmp-gen4x4-pcie-phy + - qcom,x1p42100-qmp-gen4x4-pcie-phy then: properties: reg: @@ -180,6 +182,7 @@ allOf: - qcom,x1e80100-qmp-gen4x2-pcie-phy - qcom,x1e80100-qmp-gen4x4-pcie-phy - qcom,x1e80100-qmp-gen4x8-pcie-phy + - qcom,x1p42100-qmp-gen4x4-pcie-phy then: properties: clocks: From f67f8c61b7fd3f72cf716b3845211e69265d13bd Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 3 Feb 2025 15:43:21 +0100 Subject: [PATCH 0145/2157] dt-bindings: phy: qcom,qmp-pcie: Drop reset number constraints (Almost?) all QMP PHYs come with both a "full reset" ("phy") and a "retain certain registers" one ("phy_nocsr"). Drop the maxItems=1 constraint for resets and reset_names as we go ahead and straighten out the DT usage. After that's done (which will involve modifying some clock drivers etc.), we may set *min*Items to 2, bar some possible exceptions. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250203-topic-x1p4_dts-v2-2-72cd4cdc767b@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index 51eaffa45c18..af8c4aa4f43d 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -220,12 +220,6 @@ allOf: minItems: 2 reset-names: minItems: 2 - else: - properties: - resets: - maxItems: 1 - reset-names: - maxItems: 1 - if: properties: From 0d8db251dd15d2e284f5a6a53bc2b869f3eca711 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 3 Feb 2025 15:43:22 +0100 Subject: [PATCH 0146/2157] phy: qcom: qmp-pcie: Add X1P42100 Gen4x4 PHY Add a new, common configuration for Gen4x4 V6 PHYs without an init sequence. The bootloader configures the hardware once and the OS retains that configuration by using the NOCSR reset line (which doesn't drop register state on assert) in place of the "full reset" one. Use this new configuration for X1P42100's Gen4x4 PHY. Acked-by: Dmitry Baryshkov Tested-by: Jens Glathe Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250203-topic-x1p4_dts-v2-3-72cd4cdc767b@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 018bbb300830..6726bbe4ad15 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -4156,6 +4156,21 @@ static const struct qmp_phy_cfg x1e80100_qmp_gen4x8_pciephy_cfg = { .has_nocsr_reset = true, }; +static const struct qmp_phy_cfg qmp_v6_gen4x4_pciephy_cfg = { + .lanes = 4, + + .offsets = &qmp_pcie_offsets_v6_20, + + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = pciephy_v6_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS_4_20, +}; + static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -4960,6 +4975,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,x1e80100-qmp-gen4x8-pcie-phy", .data = &x1e80100_qmp_gen4x8_pciephy_cfg, + }, { + .compatible = "qcom,x1p42100-qmp-gen4x4-pcie-phy", + .data = &qmp_v6_gen4x4_pciephy_cfg, }, { }, }; From d02dfd4ceb2e9f34caaaaf87105267e2f722f443 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 3 Feb 2025 12:54:21 -0600 Subject: [PATCH 0147/2157] phy: can-transceiver: Drop unnecessary "mux-states" property presence check It doesn't matter whether "mux-states" is not present or there is some other issue parsing it causing an error. Drop the presence check and rework the error handling to ignore anything other than deferred probe. Acked-by: Marc Kleine-Budde Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250203185421.3383805-2-robh@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/phy-can-transceiver.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index ee4ce4249698..2bec70615449 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -103,6 +103,7 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) struct phy *phy; struct gpio_desc *standby_gpio; struct gpio_desc *enable_gpio; + struct mux_state *mux_state; u32 max_bitrate = 0; int err; @@ -113,13 +114,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node); drvdata = match->data; - if (of_property_read_bool(dev->of_node, "mux-states")) { - struct mux_state *mux_state; - - mux_state = devm_mux_state_get(dev, NULL); - if (IS_ERR(mux_state)) - return dev_err_probe(&pdev->dev, PTR_ERR(mux_state), - "failed to get mux\n"); + mux_state = devm_mux_state_get(dev, NULL); + if (IS_ERR(mux_state)) { + if (PTR_ERR(mux_state) == -EPROBE_DEFER) + return PTR_ERR(mux_state); + } else { can_transceiver_phy->mux_state = mux_state; } From 88c0053baed659acd85b87dd52cbd75f3d8806be Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 31 Dec 2024 10:31:20 -0600 Subject: [PATCH 0148/2157] phy: Use (of|device)_property_present() for non-boolean properties The use of (of|device)_property_read_bool() for non-boolean properties is deprecated in favor of (of|device)_property_present() when testing for property presence. Signed-off-by: Rob Herring (Arm) Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Chunfeng Yun Link: https://lore.kernel.org/r/20241231163121.241543-1-robh@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/mediatek/phy-mtk-tphy.c | 4 ++-- drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index a496fbe3352b..644a34bd2b0b 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -1195,7 +1195,7 @@ static int phy_type_syscon_get(struct mtk_phy_instance *instance, int ret; /* type switch function is optional */ - if (!of_property_read_bool(dn, "mediatek,syscon-type")) + if (!of_property_present(dn, "mediatek,syscon-type")) return 0; ret = of_parse_phandle_with_fixed_args(dn, "mediatek,syscon-type", @@ -1258,7 +1258,7 @@ static int phy_efuse_get(struct mtk_tphy *tphy, struct mtk_phy_instance *instanc } /* software efuse is optional */ - instance->efuse_sw_en = device_property_read_bool(dev, "nvmem-cells"); + instance->efuse_sw_en = device_property_present(dev, "nvmem-cells"); if (!instance->efuse_sw_en) return 0; diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index 96f3d868a526..b5e6a864deeb 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -440,7 +440,7 @@ static int rockchip_usb2phy_extcon_register(struct rockchip_usb2phy *rphy) struct extcon_dev *edev; int ret; - if (of_property_read_bool(node, "extcon")) { + if (of_property_present(node, "extcon")) { edev = extcon_get_edev_by_phandle(rphy->dev, 0); if (IS_ERR(edev)) return dev_err_probe(rphy->dev, PTR_ERR(edev), @@ -1323,7 +1323,7 @@ static int rockchip_usb2phy_otg_port_init(struct rockchip_usb2phy *rphy, goto out; } - if (!of_property_read_bool(rphy->dev->of_node, "extcon")) { + if (!of_property_present(rphy->dev->of_node, "extcon")) { /* do initial sync of usb state */ id = property_enabled(rphy->grf, &rport->port_cfg->utmi_id); extcon_set_state_sync(rphy->edev, EXTCON_USB_HOST, !id); From 4fe7fd17fe66a5e243c1de7ff32c29fac7039b8d Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:05 -0600 Subject: [PATCH 0149/2157] iio: buffer-dmaengine: split requesting DMA channel from allocating buffer Refactor the IIO dmaengine buffer code to split requesting the DMA channel from allocating the buffer. We want to be able to add a new function where the IIO device driver manages the DMA channel, so these two actions need to be separate. To do this, calling dma_request_chan() is moved from iio_dmaengine_buffer_alloc() to iio_dmaengine_buffer_setup_ext(). A new __iio_dmaengine_buffer_setup_ext() helper function is added to simplify error unwinding and will also be used by a new function in a later patch. iio_dmaengine_buffer_free() now only frees the buffer and does not release the DMA channel. A new iio_dmaengine_buffer_teardown() function is added to unwind everything done in iio_dmaengine_buffer_setup_ext(). This keeps things more symmetrical with obvious pairs alloc/free and setup/teardown. Calling dma_get_slave_caps() in iio_dmaengine_buffer_alloc() is moved so that we can avoid any gotos for error unwinding. Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-8-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 2 +- .../buffer/industrialio-buffer-dmaengine.c | 112 ++++++++++-------- drivers/iio/dac/adi-axi-dac.c | 2 +- include/linux/iio/buffer-dmaengine.h | 2 +- 4 files changed, 68 insertions(+), 50 deletions(-) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index c7357601f0f8..a55db308baab 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -305,7 +305,7 @@ static struct iio_buffer *axi_adc_request_buffer(struct iio_backend *back, static void axi_adc_free_buffer(struct iio_backend *back, struct iio_buffer *buffer) { - iio_dmaengine_buffer_free(buffer); + iio_dmaengine_buffer_teardown(buffer); } static int axi_adc_reg_access(struct iio_backend *back, unsigned int reg, diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c index 614e1c4189a9..02847d3962fc 100644 --- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c +++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c @@ -206,39 +206,29 @@ static const struct iio_dev_attr *iio_dmaengine_buffer_attrs[] = { /** * iio_dmaengine_buffer_alloc() - Allocate new buffer which uses DMAengine - * @dev: DMA channel consumer device - * @channel: DMA channel name, typically "rx". + * @chan: DMA channel. * * This allocates a new IIO buffer which internally uses the DMAengine framework - * to perform its transfers. The parent device will be used to request the DMA - * channel. + * to perform its transfers. * * Once done using the buffer iio_dmaengine_buffer_free() should be used to * release it. */ -static struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, - const char *channel) +static struct iio_buffer *iio_dmaengine_buffer_alloc(struct dma_chan *chan) { struct dmaengine_buffer *dmaengine_buffer; unsigned int width, src_width, dest_width; struct dma_slave_caps caps; - struct dma_chan *chan; int ret; + ret = dma_get_slave_caps(chan, &caps); + if (ret < 0) + return ERR_PTR(ret); + dmaengine_buffer = kzalloc(sizeof(*dmaengine_buffer), GFP_KERNEL); if (!dmaengine_buffer) return ERR_PTR(-ENOMEM); - chan = dma_request_chan(dev, channel); - if (IS_ERR(chan)) { - ret = PTR_ERR(chan); - goto err_free; - } - - ret = dma_get_slave_caps(chan, &caps); - if (ret < 0) - goto err_release; - /* Needs to be aligned to the maximum of the minimums */ if (caps.src_addr_widths) src_width = __ffs(caps.src_addr_widths); @@ -262,12 +252,6 @@ static struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, dmaengine_buffer->queue.buffer.access = &iio_dmaengine_buffer_ops; return &dmaengine_buffer->queue.buffer; - -err_release: - dma_release_channel(chan); -err_free: - kfree(dmaengine_buffer); - return ERR_PTR(ret); } /** @@ -276,42 +260,42 @@ static struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, * * Frees a buffer previously allocated with iio_dmaengine_buffer_alloc(). */ -void iio_dmaengine_buffer_free(struct iio_buffer *buffer) +static void iio_dmaengine_buffer_free(struct iio_buffer *buffer) { struct dmaengine_buffer *dmaengine_buffer = iio_buffer_to_dmaengine_buffer(buffer); iio_dma_buffer_exit(&dmaengine_buffer->queue); - dma_release_channel(dmaengine_buffer->chan); - iio_buffer_put(buffer); } -EXPORT_SYMBOL_NS_GPL(iio_dmaengine_buffer_free, "IIO_DMAENGINE_BUFFER"); /** - * iio_dmaengine_buffer_setup_ext() - Setup a DMA buffer for an IIO device - * @dev: DMA channel consumer device - * @indio_dev: IIO device to which to attach this buffer. - * @channel: DMA channel name, typically "rx". - * @dir: Direction of buffer (in or out) + * iio_dmaengine_buffer_teardown() - Releases DMA channel and frees buffer + * @buffer: Buffer to free * - * This allocates a new IIO buffer with devm_iio_dmaengine_buffer_alloc() - * and attaches it to an IIO device with iio_device_attach_buffer(). - * It also appends the INDIO_BUFFER_HARDWARE mode to the supported modes of the - * IIO device. - * - * Once done using the buffer iio_dmaengine_buffer_free() should be used to - * release it. + * Releases the DMA channel and frees the buffer previously setup with + * iio_dmaengine_buffer_setup_ext(). */ -struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, - struct iio_dev *indio_dev, - const char *channel, - enum iio_buffer_direction dir) +void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer) +{ + struct dmaengine_buffer *dmaengine_buffer = + iio_buffer_to_dmaengine_buffer(buffer); + struct dma_chan *chan = dmaengine_buffer->chan; + + iio_dmaengine_buffer_free(buffer); + dma_release_channel(chan); +} +EXPORT_SYMBOL_NS_GPL(iio_dmaengine_buffer_teardown, "IIO_DMAENGINE_BUFFER"); + +static struct iio_buffer +*__iio_dmaengine_buffer_setup_ext(struct iio_dev *indio_dev, + struct dma_chan *chan, + enum iio_buffer_direction dir) { struct iio_buffer *buffer; int ret; - buffer = iio_dmaengine_buffer_alloc(dev, channel); + buffer = iio_dmaengine_buffer_alloc(chan); if (IS_ERR(buffer)) return ERR_CAST(buffer); @@ -327,11 +311,45 @@ struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, return buffer; } + +/** + * iio_dmaengine_buffer_setup_ext() - Setup a DMA buffer for an IIO device + * @dev: DMA channel consumer device + * @indio_dev: IIO device to which to attach this buffer. + * @channel: DMA channel name, typically "rx". + * @dir: Direction of buffer (in or out) + * + * This allocates a new IIO buffer with devm_iio_dmaengine_buffer_alloc() + * and attaches it to an IIO device with iio_device_attach_buffer(). + * It also appends the INDIO_BUFFER_HARDWARE mode to the supported modes of the + * IIO device. + * + * Once done using the buffer iio_dmaengine_buffer_teardown() should be used to + * release it. + */ +struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + const char *channel, + enum iio_buffer_direction dir) +{ + struct dma_chan *chan; + struct iio_buffer *buffer; + + chan = dma_request_chan(dev, channel); + if (IS_ERR(chan)) + return ERR_CAST(chan); + + buffer = __iio_dmaengine_buffer_setup_ext(indio_dev, chan, dir); + if (IS_ERR(buffer)) + dma_release_channel(chan); + + return buffer; +} EXPORT_SYMBOL_NS_GPL(iio_dmaengine_buffer_setup_ext, "IIO_DMAENGINE_BUFFER"); -static void __devm_iio_dmaengine_buffer_free(void *buffer) +static void devm_iio_dmaengine_buffer_teardown(void *buffer) { - iio_dmaengine_buffer_free(buffer); + iio_dmaengine_buffer_teardown(buffer); } /** @@ -357,7 +375,7 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, if (IS_ERR(buffer)) return PTR_ERR(buffer); - return devm_add_action_or_reset(dev, __devm_iio_dmaengine_buffer_free, + return devm_add_action_or_reset(dev, devm_iio_dmaengine_buffer_teardown, buffer); } EXPORT_SYMBOL_NS_GPL(devm_iio_dmaengine_buffer_setup_ext, "IIO_DMAENGINE_BUFFER"); diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index bcaf365feef4..009fb987e1af 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -168,7 +168,7 @@ static struct iio_buffer *axi_dac_request_buffer(struct iio_backend *back, static void axi_dac_free_buffer(struct iio_backend *back, struct iio_buffer *buffer) { - iio_dmaengine_buffer_free(buffer); + iio_dmaengine_buffer_teardown(buffer); } enum { diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 81d9a19aeb91..72a2e3fd8a5b 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -12,7 +12,7 @@ struct iio_dev; struct device; -void iio_dmaengine_buffer_free(struct iio_buffer *buffer); +void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer); struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, From 79f24971b4ffbdba9733365e298982c45338e6b1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:06 -0600 Subject: [PATCH 0150/2157] iio: buffer-dmaengine: add devm_iio_dmaengine_buffer_setup_with_handle() Add a new devm_iio_dmaengine_buffer_setup_with_handle() function to handle cases where the DMA channel is managed by the caller rather than being requested and released by the iio_dmaengine module. Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-9-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- .../buffer/industrialio-buffer-dmaengine.c | 38 +++++++++++++++++++ include/linux/iio/buffer-dmaengine.h | 5 +++ 2 files changed, 43 insertions(+) diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c index 02847d3962fc..e9d9a7d39fe1 100644 --- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c +++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c @@ -380,6 +380,44 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, } EXPORT_SYMBOL_NS_GPL(devm_iio_dmaengine_buffer_setup_ext, "IIO_DMAENGINE_BUFFER"); +static void devm_iio_dmaengine_buffer_free(void *buffer) +{ + iio_dmaengine_buffer_free(buffer); +} + +/** + * devm_iio_dmaengine_buffer_setup_with_handle() - Setup a DMA buffer for an + * IIO device + * @dev: Device for devm ownership + * @indio_dev: IIO device to which to attach this buffer. + * @chan: DMA channel + * @dir: Direction of buffer (in or out) + * + * This allocates a new IIO buffer with devm_iio_dmaengine_buffer_alloc() + * and attaches it to an IIO device with iio_device_attach_buffer(). + * It also appends the INDIO_BUFFER_HARDWARE mode to the supported modes of the + * IIO device. + * + * This is the same as devm_iio_dmaengine_buffer_setup_ext() except that the + * caller manages requesting and releasing the DMA channel handle. + */ +int devm_iio_dmaengine_buffer_setup_with_handle(struct device *dev, + struct iio_dev *indio_dev, + struct dma_chan *chan, + enum iio_buffer_direction dir) +{ + struct iio_buffer *buffer; + + buffer = __iio_dmaengine_buffer_setup_ext(indio_dev, chan, dir); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + return devm_add_action_or_reset(dev, devm_iio_dmaengine_buffer_free, + buffer); +} +EXPORT_SYMBOL_NS_GPL(devm_iio_dmaengine_buffer_setup_with_handle, + "IIO_DMAENGINE_BUFFER"); + MODULE_AUTHOR("Lars-Peter Clausen "); MODULE_DESCRIPTION("DMA buffer for the IIO framework"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 72a2e3fd8a5b..37f27545f69f 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -11,6 +11,7 @@ struct iio_dev; struct device; +struct dma_chan; void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer); struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, @@ -26,6 +27,10 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, enum iio_buffer_direction dir); +int devm_iio_dmaengine_buffer_setup_with_handle(struct device *dev, + struct iio_dev *indio_dev, + struct dma_chan *chan, + enum iio_buffer_direction dir); #define devm_iio_dmaengine_buffer_setup(dev, indio_dev, channel) \ devm_iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \ From 503d20ed8cf7c7b40ec0bd94f53c490c1d91c31b Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:07 -0600 Subject: [PATCH 0151/2157] iio: adc: ad7944: don't use storagebits for sizing Replace use of storagebits with realbits for determining the number of bytes needed for SPI transfers. When adding SPI offload support, storagebits will always be 32 rather than 16 for 16-bit 16-bit chips so we can no longer rely on storagebits being the correct size expected by the SPI framework (it always uses 4 bytes for > 16-bit xfers and 2 bytes for > 8-bit xfers). Instead, derive the correct size from realbits since it will always be correct even when SPI offloading is used. Reviewed-by: Jonathan Cameron Reviewed-vy: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-10-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7944.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad7944.c b/drivers/iio/adc/ad7944.c index 0ec9cda10f5f..abfababcea10 100644 --- a/drivers/iio/adc/ad7944.c +++ b/drivers/iio/adc/ad7944.c @@ -98,6 +98,9 @@ struct ad7944_chip_info { const struct iio_chan_spec channels[2]; }; +/* get number of bytes for SPI xfer */ +#define AD7944_SPI_BYTES(scan_type) ((scan_type).realbits > 16 ? 4 : 2) + /* * AD7944_DEFINE_CHIP_INFO - Define a chip info structure for a specific chip * @_name: The name of the chip @@ -164,7 +167,7 @@ static int ad7944_3wire_cs_mode_init_msg(struct device *dev, struct ad7944_adc * /* Then we can read the data during the acquisition phase */ xfers[2].rx_buf = &adc->sample.raw; - xfers[2].len = BITS_TO_BYTES(chan->scan_type.storagebits); + xfers[2].len = AD7944_SPI_BYTES(chan->scan_type); xfers[2].bits_per_word = chan->scan_type.realbits; spi_message_init_with_transfers(&adc->msg, xfers, 3); @@ -193,7 +196,7 @@ static int ad7944_4wire_mode_init_msg(struct device *dev, struct ad7944_adc *adc xfers[0].delay.unit = SPI_DELAY_UNIT_NSECS; xfers[1].rx_buf = &adc->sample.raw; - xfers[1].len = BITS_TO_BYTES(chan->scan_type.storagebits); + xfers[1].len = AD7944_SPI_BYTES(chan->scan_type); xfers[1].bits_per_word = chan->scan_type.realbits; spi_message_init_with_transfers(&adc->msg, xfers, 2); @@ -228,7 +231,7 @@ static int ad7944_chain_mode_init_msg(struct device *dev, struct ad7944_adc *adc xfers[0].delay.unit = SPI_DELAY_UNIT_NSECS; xfers[1].rx_buf = adc->chain_mode_buf; - xfers[1].len = BITS_TO_BYTES(chan->scan_type.storagebits) * n_chain_dev; + xfers[1].len = AD7944_SPI_BYTES(chan->scan_type) * n_chain_dev; xfers[1].bits_per_word = chan->scan_type.realbits; spi_message_init_with_transfers(&adc->msg, xfers, 2); @@ -274,12 +277,12 @@ static int ad7944_single_conversion(struct ad7944_adc *adc, return ret; if (adc->spi_mode == AD7944_SPI_MODE_CHAIN) { - if (chan->scan_type.storagebits > 16) + if (chan->scan_type.realbits > 16) *val = ((u32 *)adc->chain_mode_buf)[chan->scan_index]; else *val = ((u16 *)adc->chain_mode_buf)[chan->scan_index]; } else { - if (chan->scan_type.storagebits > 16) + if (chan->scan_type.realbits > 16) *val = adc->sample.raw.u32; else *val = adc->sample.raw.u16; @@ -409,8 +412,7 @@ static int ad7944_chain_mode_alloc(struct device *dev, /* 1 word for each voltage channel + aligned u64 for timestamp */ chain_mode_buf_size = ALIGN(n_chain_dev * - BITS_TO_BYTES(chan[0].scan_type.storagebits), sizeof(u64)) - + sizeof(u64); + AD7944_SPI_BYTES(chan[0].scan_type), sizeof(u64)) + sizeof(u64); buf = devm_kzalloc(dev, chain_mode_buf_size, GFP_KERNEL); if (!buf) return -ENOMEM; From cbc986cda57a0488069f7c6bda7e16cd592e8157 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:08 -0600 Subject: [PATCH 0152/2157] iio: adc: ad7944: add support for SPI offload Add support for SPI offload to the ad7944 driver. This allows reading data at the max sample rate of 2.5 MSPS. Reviewed-by: Jonathan Cameron Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-11-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 + drivers/iio/adc/ad7944.c | 293 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 278 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index a3e8ac569ce4..1555920f473b 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -360,7 +360,9 @@ config AD7923 config AD7944 tristate "Analog Devices AD7944 and similar ADCs driver" depends on SPI + select SPI_OFFLOAD select IIO_BUFFER + select IIO_BUFFER_DMAENGINE select IIO_TRIGGERED_BUFFER help Say yes here to build support for Analog Devices diff --git a/drivers/iio/adc/ad7944.c b/drivers/iio/adc/ad7944.c index abfababcea10..9a7825ea5087 100644 --- a/drivers/iio/adc/ad7944.c +++ b/drivers/iio/adc/ad7944.c @@ -16,11 +16,14 @@ #include #include #include +#include #include #include +#include #include #include +#include #include #include @@ -54,6 +57,12 @@ struct ad7944_adc { enum ad7944_spi_mode spi_mode; struct spi_transfer xfers[3]; struct spi_message msg; + struct spi_transfer offload_xfers[2]; + struct spi_message offload_msg; + struct spi_offload *offload; + struct spi_offload_trigger *offload_trigger; + unsigned long offload_trigger_hz; + int sample_freq_range[3]; void *chain_mode_buf; /* Chip-specific timing specifications. */ const struct ad7944_timing_spec *timing_spec; @@ -81,6 +90,8 @@ struct ad7944_adc { /* quite time before CNV rising edge */ #define AD7944_T_QUIET_NS 20 +/* minimum CNV high time to trigger conversion */ +#define AD7944_T_CNVH_NS 10 static const struct ad7944_timing_spec ad7944_timing_spec = { .conv_ns = 420, @@ -95,7 +106,9 @@ static const struct ad7944_timing_spec ad7986_timing_spec = { struct ad7944_chip_info { const char *name; const struct ad7944_timing_spec *timing_spec; + u32 max_sample_rate_hz; const struct iio_chan_spec channels[2]; + const struct iio_chan_spec offload_channels[1]; }; /* get number of bytes for SPI xfer */ @@ -105,13 +118,15 @@ struct ad7944_chip_info { * AD7944_DEFINE_CHIP_INFO - Define a chip info structure for a specific chip * @_name: The name of the chip * @_ts: The timing specification for the chip + * @_max: The maximum sample rate in Hz * @_bits: The number of bits in the conversion result * @_diff: Whether the chip is true differential or not */ -#define AD7944_DEFINE_CHIP_INFO(_name, _ts, _bits, _diff) \ +#define AD7944_DEFINE_CHIP_INFO(_name, _ts, _max, _bits, _diff) \ static const struct ad7944_chip_info _name##_chip_info = { \ .name = #_name, \ .timing_spec = &_ts##_timing_spec, \ + .max_sample_rate_hz = _max, \ .channels = { \ { \ .type = IIO_VOLTAGE, \ @@ -129,13 +144,43 @@ static const struct ad7944_chip_info _name##_chip_info = { \ }, \ IIO_CHAN_SOFT_TIMESTAMP(1), \ }, \ + .offload_channels = { \ + { \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .differential = _diff, \ + .channel = 0, \ + .channel2 = _diff ? 1 : 0, \ + .scan_index = 0, \ + .scan_type.sign = _diff ? 's' : 'u', \ + .scan_type.realbits = _bits, \ + .scan_type.storagebits = 32, \ + .scan_type.endianness = IIO_CPU, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) \ + | BIT(IIO_CHAN_INFO_SCALE) \ + | BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .info_mask_separate_available = \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + }, \ + }, \ } +/* + * Notes on the offload channels: + * - There is no soft timestamp since everything is done in hardware. + * - There is a sampling frequency attribute added. This controls the SPI + * offload trigger. + * - The storagebits value depends on the SPI offload provider. Currently there + * is only one supported provider, namely the ADI PULSAR ADC HDL project, + * which always uses 32-bit words for data values, even for <= 16-bit ADCs. + * So the value is just hardcoded to 32 for now. + */ + /* pseudo-differential with ground sense */ -AD7944_DEFINE_CHIP_INFO(ad7944, ad7944, 14, 0); -AD7944_DEFINE_CHIP_INFO(ad7985, ad7944, 16, 0); +AD7944_DEFINE_CHIP_INFO(ad7944, ad7944, 2.5 * MEGA, 14, 0); +AD7944_DEFINE_CHIP_INFO(ad7985, ad7944, 2.5 * MEGA, 16, 0); /* fully differential */ -AD7944_DEFINE_CHIP_INFO(ad7986, ad7986, 18, 1); +AD7944_DEFINE_CHIP_INFO(ad7986, ad7986, 2 * MEGA, 18, 1); static int ad7944_3wire_cs_mode_init_msg(struct device *dev, struct ad7944_adc *adc, const struct iio_chan_spec *chan) @@ -239,6 +284,48 @@ static int ad7944_chain_mode_init_msg(struct device *dev, struct ad7944_adc *adc return devm_spi_optimize_message(dev, adc->spi, &adc->msg); } +/* + * Unlike ad7944_3wire_cs_mode_init_msg(), this creates a message that reads + * during the conversion phase instead of the acquisition phase when reading + * a sample from the ADC. This is needed to be able to read at the maximum + * sample rate. It requires the SPI controller to have offload support and a + * high enough SCLK rate to read the sample during the conversion phase. + */ +static int ad7944_3wire_cs_mode_init_offload_msg(struct device *dev, + struct ad7944_adc *adc, + const struct iio_chan_spec *chan) +{ + struct spi_transfer *xfers = adc->offload_xfers; + int ret; + + /* + * CS is tied to CNV and we need a low to high transition to start the + * conversion, so place CNV low for t_QUIET to prepare for this. + */ + xfers[0].delay.value = AD7944_T_QUIET_NS; + xfers[0].delay.unit = SPI_DELAY_UNIT_NSECS; + /* CNV has to be high for a minimum time to trigger conversion. */ + xfers[0].cs_change = 1; + xfers[0].cs_change_delay.value = AD7944_T_CNVH_NS; + xfers[0].cs_change_delay.unit = SPI_DELAY_UNIT_NSECS; + + /* Then we can read the previous sample during the conversion phase */ + xfers[1].offload_flags = SPI_OFFLOAD_XFER_RX_STREAM; + xfers[1].len = AD7944_SPI_BYTES(chan->scan_type); + xfers[1].bits_per_word = chan->scan_type.realbits; + + spi_message_init_with_transfers(&adc->offload_msg, xfers, + ARRAY_SIZE(adc->offload_xfers)); + + adc->offload_msg.offload = adc->offload; + + ret = devm_spi_optimize_message(dev, adc->spi, &adc->offload_msg); + if (ret) + return dev_err_probe(dev, ret, "failed to prepare offload msg\n"); + + return 0; +} + /** * ad7944_convert_and_acquire - Perform a single conversion and acquisition * @adc: The ADC device structure @@ -294,6 +381,23 @@ static int ad7944_single_conversion(struct ad7944_adc *adc, return IIO_VAL_INT; } +static int ad7944_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + struct ad7944_adc *adc = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = adc->sample_freq_range; + *type = IIO_VAL_INT; + return IIO_AVAIL_RANGE; + default: + return -EINVAL; + } +} + static int ad7944_read_raw(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val, int *val2, long info) @@ -326,13 +430,104 @@ static int ad7944_read_raw(struct iio_dev *indio_dev, return -EINVAL; } + case IIO_CHAN_INFO_SAMP_FREQ: + *val = adc->offload_trigger_hz; + return IIO_VAL_INT; + default: return -EINVAL; } } +static int ad7944_set_sample_freq(struct ad7944_adc *adc, int val) +{ + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = val, + }, + }; + int ret; + + ret = spi_offload_trigger_validate(adc->offload_trigger, &config); + if (ret) + return ret; + + adc->offload_trigger_hz = config.periodic.frequency_hz; + + return 0; +} + +static int ad7944_write_raw(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + int val, int val2, long info) +{ + struct ad7944_adc *adc = iio_priv(indio_dev); + + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + if (val < 1 || val > adc->sample_freq_range[2]) + return -EINVAL; + + return ad7944_set_sample_freq(adc, val); + default: + return -EINVAL; + } +} + +static int ad7944_write_raw_get_fmt(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT; + default: + return IIO_VAL_INT_PLUS_MICRO; + } +} + static const struct iio_info ad7944_iio_info = { + .read_avail = &ad7944_read_avail, .read_raw = &ad7944_read_raw, + .write_raw = &ad7944_write_raw, + .write_raw_get_fmt = &ad7944_write_raw_get_fmt, +}; + +static int ad7944_offload_buffer_postenable(struct iio_dev *indio_dev) +{ + struct ad7944_adc *adc = iio_priv(indio_dev); + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = adc->offload_trigger_hz, + }, + }; + int ret; + + gpiod_set_value_cansleep(adc->turbo, 1); + + ret = spi_offload_trigger_enable(adc->offload, adc->offload_trigger, + &config); + if (ret) + gpiod_set_value_cansleep(adc->turbo, 0); + + return ret; +} + +static int ad7944_offload_buffer_predisable(struct iio_dev *indio_dev) +{ + struct ad7944_adc *adc = iio_priv(indio_dev); + + spi_offload_trigger_disable(adc->offload, adc->offload_trigger); + gpiod_set_value_cansleep(adc->turbo, 0); + + return 0; +} + +static const struct iio_buffer_setup_ops ad7944_offload_buffer_setup_ops = { + .postenable = &ad7944_offload_buffer_postenable, + .predisable = &ad7944_offload_buffer_predisable, }; static irqreturn_t ad7944_trigger_handler(int irq, void *p) @@ -446,6 +641,11 @@ static const char * const ad7944_power_supplies[] = { "avdd", "dvdd", "bvdd", "vio" }; +static const struct spi_offload_config ad7944_offload_config = { + .capability_flags = SPI_OFFLOAD_CAP_TRIGGER | + SPI_OFFLOAD_CAP_RX_STREAM_DMA, +}; + static int ad7944_probe(struct spi_device *spi) { const struct ad7944_chip_info *chip_info; @@ -471,6 +671,10 @@ static int ad7944_probe(struct spi_device *spi) adc->timing_spec = chip_info->timing_spec; + adc->sample_freq_range[0] = 1; /* min */ + adc->sample_freq_range[1] = 1; /* step */ + adc->sample_freq_range[2] = chip_info->max_sample_rate_hz; /* max */ + ret = device_property_match_property_string(dev, "adi,spi-mode", ad7944_spi_modes, ARRAY_SIZE(ad7944_spi_modes)); @@ -590,20 +794,74 @@ static int ad7944_probe(struct spi_device *spi) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &ad7944_iio_info; - if (adc->spi_mode == AD7944_SPI_MODE_CHAIN) { - indio_dev->available_scan_masks = chain_scan_masks; - indio_dev->channels = chain_chan; - indio_dev->num_channels = n_chain_dev + 1; - } else { - indio_dev->channels = chip_info->channels; - indio_dev->num_channels = ARRAY_SIZE(chip_info->channels); - } + adc->offload = devm_spi_offload_get(dev, spi, &ad7944_offload_config); + ret = PTR_ERR_OR_ZERO(adc->offload); + if (ret && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to get offload\n"); - ret = devm_iio_triggered_buffer_setup(dev, indio_dev, - iio_pollfunc_store_time, - ad7944_trigger_handler, NULL); - if (ret) - return ret; + /* Fall back to low speed usage when no SPI offload available. */ + if (ret == -ENODEV) { + if (adc->spi_mode == AD7944_SPI_MODE_CHAIN) { + indio_dev->available_scan_masks = chain_scan_masks; + indio_dev->channels = chain_chan; + indio_dev->num_channels = n_chain_dev + 1; + } else { + indio_dev->channels = chip_info->channels; + indio_dev->num_channels = ARRAY_SIZE(chip_info->channels); + } + + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, + iio_pollfunc_store_time, + ad7944_trigger_handler, + NULL); + if (ret) + return ret; + } else { + struct dma_chan *rx_dma; + + if (adc->spi_mode != AD7944_SPI_MODE_SINGLE) + return dev_err_probe(dev, -EINVAL, + "offload only supported in single mode\n"); + + indio_dev->setup_ops = &ad7944_offload_buffer_setup_ops; + indio_dev->channels = chip_info->offload_channels; + indio_dev->num_channels = ARRAY_SIZE(chip_info->offload_channels); + + adc->offload_trigger = devm_spi_offload_trigger_get(dev, + adc->offload, SPI_OFFLOAD_TRIGGER_PERIODIC); + if (IS_ERR(adc->offload_trigger)) + return dev_err_probe(dev, PTR_ERR(adc->offload_trigger), + "failed to get offload trigger\n"); + + ret = ad7944_set_sample_freq(adc, 2 * MEGA); + if (ret) + return dev_err_probe(dev, ret, + "failed to init sample rate\n"); + + rx_dma = devm_spi_offload_rx_stream_request_dma_chan(dev, + adc->offload); + if (IS_ERR(rx_dma)) + return dev_err_probe(dev, PTR_ERR(rx_dma), + "failed to get offload RX DMA\n"); + + /* + * REVISIT: ideally, we would confirm that the offload RX DMA + * buffer layout is the same as what is hard-coded in + * offload_channels. Right now, the only supported offload + * is the pulsar_adc project which always uses 32-bit word + * size for data values, regardless of the SPI bits per word. + */ + + ret = devm_iio_dmaengine_buffer_setup_with_handle(dev, + indio_dev, rx_dma, IIO_BUFFER_DIRECTION_IN); + if (ret) + return ret; + + ret = ad7944_3wire_cs_mode_init_offload_msg(dev, adc, + &chip_info->offload_channels[0]); + if (ret) + return ret; + } return devm_iio_device_register(dev, indio_dev); } @@ -638,3 +896,4 @@ module_spi_driver(ad7944_driver); MODULE_AUTHOR("David Lechner "); MODULE_DESCRIPTION("Analog Devices AD7944 PulSAR ADC family driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("IIO_DMAENGINE_BUFFER"); From f06a9c36729b8de1a3891d7c04c34ab5a2c26174 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:09 -0600 Subject: [PATCH 0153/2157] doc: iio: ad7944: describe offload support Add a section to the ad7944 documentation describing how to use the driver with SPI offloading. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-12-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad7944.rst | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/iio/ad7944.rst b/Documentation/iio/ad7944.rst index 0d26e56aba88..e6dbe4d7f58c 100644 --- a/Documentation/iio/ad7944.rst +++ b/Documentation/iio/ad7944.rst @@ -46,6 +46,8 @@ CS mode, 3-wire, without busy indicator To select this mode in the device tree, set the ``adi,spi-mode`` property to ``"single"`` and omit the ``cnv-gpios`` property. +This is the only wiring configuration supported when using `SPI offload support`_. + CS mode, 4-wire, without busy indicator ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -106,7 +108,6 @@ Unimplemented features ---------------------- - ``BUSY`` indication -- ``TURBO`` mode Device attributes @@ -147,6 +148,27 @@ AD7986 is a fully-differential ADC and has the following attributes: In "chain" mode, additional chips will appear as additional voltage input channels, e.g. ``in_voltage2-voltage3_raw``. +SPI offload support +=================== + +To be able to achieve the maximum sample rate, the driver can be used with the +`AXI SPI Engine`_ to provide SPI offload support. + +.. _AXI SPI Engine: http://analogdevicesinc.github.io/hdl/projects/pulsar_adc/index.html + +When SPI offload is being used, some attributes will be different. + +* ``trigger`` directory is removed. +* ``in_voltage0_sampling_frequency`` attribute is added for setting the sample + rate. +* ``in_voltage0_sampling_frequency_available`` attribute is added for querying + the max sample rate. +* ``timestamp`` channel is removed. +* Buffer data format may be different compared to when offload is not used, + e.g. the ``in_voltage0_type`` attribute. + +If the ``turbo-gpios`` property is present in the device tree, the driver will +turn on TURBO during buffered reads and turn it off otherwise. Device buffers ============== From b7c1e069f54668461856f03696812ab7176c400d Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:10 -0600 Subject: [PATCH 0154/2157] dt-bindings: iio: adc: adi,ad4695: add SPI offload properties Add a pwms property to the adi,ad4695 binding to specify an optional PWM output connected to the CNV pin on the ADC. Also add #trigger-source-cells property to allow the BUSY output to be used as a SPI offload trigger source to indicate when a sample is ready to be read. Macros are added to adi,ad4695.h for the cell values to help with readability since they are arbitrary values. Reviewed-by: Rob Herring (Arm) Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-13-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/adi,ad4695.yaml | 13 +++++++++++++ include/dt-bindings/iio/adc/adi,ad4695.h | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml index 7d2229dee444..cbde7a0505d2 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml @@ -84,6 +84,10 @@ properties: description: The Reset Input (RESET). Should be configured GPIO_ACTIVE_LOW. maxItems: 1 + pwms: + description: PWM signal connected to the CNV pin. + maxItems: 1 + interrupts: minItems: 1 items: @@ -106,6 +110,15 @@ properties: The first cell is the GPn number: 0 to 3. The second cell takes standard GPIO flags. + '#trigger-source-cells': + description: | + First cell indicates the output signal: 0 = BUSY, 1 = ALERT. + Second cell indicates which GPn pin is used: 0, 2 or 3. + + For convenience, macros for these values are available in + dt-bindings/iio/adc/adi,ad4695.h. + const: 2 + "#address-cells": const: 1 diff --git a/include/dt-bindings/iio/adc/adi,ad4695.h b/include/dt-bindings/iio/adc/adi,ad4695.h index 9fbef542bf67..fea4525d2710 100644 --- a/include/dt-bindings/iio/adc/adi,ad4695.h +++ b/include/dt-bindings/iio/adc/adi,ad4695.h @@ -6,4 +6,11 @@ #define AD4695_COMMON_MODE_REFGND 0xFF #define AD4695_COMMON_MODE_COM 0xFE +#define AD4695_TRIGGER_EVENT_BUSY 0 +#define AD4695_TRIGGER_EVENT_ALERT 1 + +#define AD4695_TRIGGER_PIN_GP0 0 +#define AD4695_TRIGGER_PIN_GP2 2 +#define AD4695_TRIGGER_PIN_GP3 3 + #endif /* _DT_BINDINGS_ADI_AD4695_H */ From f09f140e3ea8f4c1b2990cdd72848f4083388ad8 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:11 -0600 Subject: [PATCH 0155/2157] iio: adc: ad4695: Add support for SPI offload Add support for SPI offload to the ad4695 driver. SPI offload allows sampling data at the max sample rate (500kSPS or 1MSPS). This is developed and tested against the ADI example FPGA design for this family of ADCs [1]. [1]: http://analogdevicesinc.github.io/hdl/projects/ad469x_fmc/index.html Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-14-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 + drivers/iio/adc/ad4695.c | 446 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 431 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 1555920f473b..f64b5faeb257 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -52,8 +52,10 @@ config AD4695 tristate "Analog Device AD4695 ADC Driver" depends on SPI select IIO_BUFFER + select IIO_BUFFER_DMAENGINE select IIO_TRIGGERED_BUFFER select REGMAP + select SPI_OFFLOAD help Say yes here to build support for Analog Devices AD4695 and similar analog to digital converters (ADC). diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 13cf01d35301..5c6d4f658af9 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -19,14 +19,19 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include #include #include +#include +#include #include #include @@ -66,6 +71,8 @@ #define AD4695_REG_STD_SEQ_CONFIG 0x0024 #define AD4695_REG_GPIO_CTRL 0x0026 #define AD4695_REG_GP_MODE 0x0027 +#define AD4695_REG_GP_MODE_BUSY_GP_SEL BIT(5) +#define AD4695_REG_GP_MODE_BUSY_GP_EN BIT(1) #define AD4695_REG_TEMP_CTRL 0x0029 #define AD4695_REG_TEMP_CTRL_TEMP_EN BIT(0) #define AD4695_REG_CONFIG_IN(n) (0x0030 | (n)) @@ -124,13 +131,22 @@ struct ad4695_channel_config { struct ad4695_state { struct spi_device *spi; + struct spi_offload *offload; + struct spi_offload_trigger *offload_trigger; struct regmap *regmap; struct regmap *regmap16; struct gpio_desc *reset_gpio; + /* currently PWM CNV only supported with SPI offload use */ + struct pwm_device *cnv_pwm; + /* protects against concurrent use of cnv_pwm */ + struct mutex cnv_pwm_lock; + /* offload also requires separate gpio to manually control CNV */ + struct gpio_desc *cnv_gpio; /* voltages channels plus temperature and timestamp */ struct iio_chan_spec iio_chan[AD4695_MAX_CHANNELS + 2]; struct ad4695_channel_config channels_cfg[AD4695_MAX_CHANNELS]; const struct ad4695_chip_info *chip_info; + int sample_freq_range[3]; /* Reference voltage. */ unsigned int vref_mv; /* Common mode input pin voltage. */ @@ -355,6 +371,13 @@ static const struct ad4695_chip_info ad4698_chip_info = { .num_voltage_inputs = 8, }; +static void ad4695_cnv_manual_trigger(struct ad4695_state *st) +{ + gpiod_set_value_cansleep(st->cnv_gpio, 1); + ndelay(10); + gpiod_set_value_cansleep(st->cnv_gpio, 0); +} + /** * ad4695_set_single_cycle_mode - Set the device in single cycle mode * @st: The AD4695 state @@ -460,6 +483,17 @@ static int ad4695_exit_conversion_mode(struct ad4695_state *st) */ st->cnv_cmd2 = AD4695_CMD_EXIT_CNV_MODE << 3; + if (st->cnv_gpio) { + ad4695_cnv_manual_trigger(st); + + /* + * In this case, CNV is not connected to CS, so we don't need + * the extra CS toggle to trigger the conversion and toggling + * CS would have no effect. + */ + return spi_sync_transfer(st->spi, &xfers[1], 1); + } + return spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers)); } @@ -687,6 +721,161 @@ static irqreturn_t ad4695_trigger_handler(int irq, void *p) return IRQ_HANDLED; } +static int ad4695_offload_buffer_postenable(struct iio_dev *indio_dev) +{ + struct ad4695_state *st = iio_priv(indio_dev); + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_DATA_READY, + }; + struct spi_transfer *xfer = &st->buf_read_xfer[0]; + struct pwm_state state; + u8 temp_chan_bit = st->chip_info->num_voltage_inputs; + u8 num_slots = 0; + u8 temp_en = 0; + unsigned int bit; + int ret; + + iio_for_each_active_channel(indio_dev, bit) { + if (bit == temp_chan_bit) { + temp_en = 1; + continue; + } + + ret = regmap_write(st->regmap, AD4695_REG_AS_SLOT(num_slots), + FIELD_PREP(AD4695_REG_AS_SLOT_INX, bit)); + if (ret) + return ret; + + num_slots++; + } + + /* + * For non-offload, we could discard data to work around this + * restriction, but with offload, that is not possible. + */ + if (num_slots < 2) { + dev_err(&st->spi->dev, + "At least two voltage channels must be enabled.\n"); + return -EINVAL; + } + + ret = regmap_update_bits(st->regmap, AD4695_REG_TEMP_CTRL, + AD4695_REG_TEMP_CTRL_TEMP_EN, + FIELD_PREP(AD4695_REG_TEMP_CTRL_TEMP_EN, + temp_en)); + if (ret) + return ret; + + /* Each BUSY event means just one sample for one channel is ready. */ + memset(xfer, 0, sizeof(*xfer)); + xfer->offload_flags = SPI_OFFLOAD_XFER_RX_STREAM; + /* Using 19 bits per word to allow for possible oversampling */ + xfer->bits_per_word = 19; + xfer->len = 4; + + spi_message_init_with_transfers(&st->buf_read_msg, xfer, 1); + st->buf_read_msg.offload = st->offload; + + ret = spi_optimize_message(st->spi, &st->buf_read_msg); + if (ret) + return ret; + + /* + * NB: technically, this is part the SPI offload trigger enable, but it + * doesn't work to call it from the offload trigger enable callback + * because it requires accessing the SPI bus. Calling it from the + * trigger enable callback could cause a deadlock. + */ + ret = regmap_set_bits(st->regmap, AD4695_REG_GP_MODE, + AD4695_REG_GP_MODE_BUSY_GP_EN); + if (ret) + goto err_unoptimize_message; + + ret = spi_offload_trigger_enable(st->offload, st->offload_trigger, + &config); + if (ret) + goto err_disable_busy_output; + + ret = ad4695_enter_advanced_sequencer_mode(st, num_slots); + if (ret) + goto err_offload_trigger_disable; + + mutex_lock(&st->cnv_pwm_lock); + pwm_get_state(st->cnv_pwm, &state); + /* + * PWM subsystem generally rounds down, so requesting 2x minimum high + * time ensures that we meet the minimum high time in any case. + */ + state.duty_cycle = AD4695_T_CNVH_NS * 2; + ret = pwm_apply_might_sleep(st->cnv_pwm, &state); + mutex_unlock(&st->cnv_pwm_lock); + if (ret) + goto err_offload_exit_conversion_mode; + + return 0; + +err_offload_exit_conversion_mode: + /* + * We have to unwind in a different order to avoid triggering offload. + * ad4695_exit_conversion_mode() triggers a conversion, so it has to be + * done after spi_offload_trigger_disable(). + */ + spi_offload_trigger_disable(st->offload, st->offload_trigger); + ad4695_exit_conversion_mode(st); + goto err_disable_busy_output; + +err_offload_trigger_disable: + spi_offload_trigger_disable(st->offload, st->offload_trigger); + +err_disable_busy_output: + regmap_clear_bits(st->regmap, AD4695_REG_GP_MODE, + AD4695_REG_GP_MODE_BUSY_GP_EN); + +err_unoptimize_message: + spi_unoptimize_message(&st->buf_read_msg); + + return ret; +} + +static int ad4695_offload_buffer_predisable(struct iio_dev *indio_dev) +{ + struct ad4695_state *st = iio_priv(indio_dev); + struct pwm_state state; + int ret; + + scoped_guard(mutex, &st->cnv_pwm_lock) { + pwm_get_state(st->cnv_pwm, &state); + state.duty_cycle = 0; + ret = pwm_apply_might_sleep(st->cnv_pwm, &state); + if (ret) + return ret; + } + + spi_offload_trigger_disable(st->offload, st->offload_trigger); + + /* + * ad4695_exit_conversion_mode() triggers a conversion, so it has to be + * done after spi_offload_trigger_disable(). + */ + ret = ad4695_exit_conversion_mode(st); + if (ret) + return ret; + + ret = regmap_clear_bits(st->regmap, AD4695_REG_GP_MODE, + AD4695_REG_GP_MODE_BUSY_GP_EN); + if (ret) + return ret; + + spi_unoptimize_message(&st->buf_read_msg); + + return 0; +} + +static const struct iio_buffer_setup_ops ad4695_offload_buffer_setup_ops = { + .postenable = ad4695_offload_buffer_postenable, + .predisable = ad4695_offload_buffer_predisable, +}; + /** * ad4695_read_one_sample - Read a single sample using single-cycle mode * @st: The AD4695 state @@ -718,6 +907,13 @@ static int ad4695_read_one_sample(struct ad4695_state *st, unsigned int address) if (ret) return ret; + /* + * If CNV is connected to CS, the previous function will have triggered + * the conversion, otherwise, we do it manually. + */ + if (st->cnv_gpio) + ad4695_cnv_manual_trigger(st); + /* * Setting the first channel to the temperature channel isn't supported * in single-cycle mode, so we have to do an extra conversion to read @@ -729,6 +925,13 @@ static int ad4695_read_one_sample(struct ad4695_state *st, unsigned int address) ret = spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers)); if (ret) return ret; + + /* + * If CNV is connected to CS, the previous function will have + * triggered the conversion, otherwise, we do it manually. + */ + if (st->cnv_gpio) + ad4695_cnv_manual_trigger(st); } /* Then read the result and exit conversion mode. */ @@ -842,11 +1045,34 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, default: return -EINVAL; } + case IIO_CHAN_INFO_SAMP_FREQ: { + struct pwm_state state; + + ret = pwm_get_state_hw(st->cnv_pwm, &state); + if (ret) + return ret; + + *val = DIV_ROUND_UP_ULL(NSEC_PER_SEC, state.period); + + return IIO_VAL_INT; + } default: return -EINVAL; } } +static int ad4695_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT; + default: + return IIO_VAL_INT_PLUS_MICRO; + } +} + static int ad4695_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) @@ -900,6 +1126,17 @@ static int ad4695_write_raw(struct iio_dev *indio_dev, default: return -EINVAL; } + case IIO_CHAN_INFO_SAMP_FREQ: { + struct pwm_state state; + + if (val <= 0 || val > st->chip_info->max_sample_rate) + return -EINVAL; + + guard(mutex)(&st->cnv_pwm_lock); + pwm_get_state(st->cnv_pwm, &state); + state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, val); + return pwm_apply_might_sleep(st->cnv_pwm, &state); + } default: return -EINVAL; } @@ -923,6 +1160,7 @@ static int ad4695_read_avail(struct iio_dev *indio_dev, */ S16_MIN / 4, 0, 0, MICRO / 4, S16_MAX / 4, S16_MAX % 4 * MICRO / 4 }; + struct ad4695_state *st = iio_priv(indio_dev); switch (mask) { case IIO_CHAN_INFO_CALIBSCALE: @@ -943,6 +1181,10 @@ static int ad4695_read_avail(struct iio_dev *indio_dev, default: return -EINVAL; } + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = st->sample_freq_range; + *type = IIO_VAL_INT; + return IIO_AVAIL_RANGE; default: return -EINVAL; } @@ -978,6 +1220,7 @@ static int ad4695_debugfs_reg_access(struct iio_dev *indio_dev, static const struct iio_info ad4695_info = { .read_raw = &ad4695_read_raw, + .write_raw_get_fmt = &ad4695_write_raw_get_fmt, .write_raw = &ad4695_write_raw, .read_avail = &ad4695_read_avail, .debugfs_reg_access = &ad4695_debugfs_reg_access, @@ -1091,26 +1334,166 @@ static int ad4695_parse_channel_cfg(struct ad4695_state *st) return 0; } +static bool ad4695_offload_trigger_match(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, + u64 *args, u32 nargs) +{ + if (type != SPI_OFFLOAD_TRIGGER_DATA_READY) + return false; + + /* + * Requires 2 args: + * args[0] is the trigger event. + * args[1] is the GPIO pin number. + */ + if (nargs != 2 || args[0] != AD4695_TRIGGER_EVENT_BUSY) + return false; + + return true; +} + +static int ad4695_offload_trigger_request(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, + u64 *args, u32 nargs) +{ + struct ad4695_state *st = spi_offload_trigger_get_priv(trigger); + + /* Should already be validated by match, but just in case. */ + if (nargs != 2) + return -EINVAL; + + /* DT tells us if BUSY event uses GP0 or GP3. */ + if (args[1] == AD4695_TRIGGER_PIN_GP3) + return regmap_set_bits(st->regmap, AD4695_REG_GP_MODE, + AD4695_REG_GP_MODE_BUSY_GP_SEL); + + return regmap_clear_bits(st->regmap, AD4695_REG_GP_MODE, + AD4695_REG_GP_MODE_BUSY_GP_SEL); +} + +static int +ad4695_offload_trigger_validate(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config) +{ + if (config->type != SPI_OFFLOAD_TRIGGER_DATA_READY) + return -EINVAL; + + return 0; +} + +/* + * NB: There are no enable/disable callbacks here due to requiring a SPI + * message to enable or disable the BUSY output on the ADC. + */ +static const struct spi_offload_trigger_ops ad4695_offload_trigger_ops = { + .match = ad4695_offload_trigger_match, + .request = ad4695_offload_trigger_request, + .validate = ad4695_offload_trigger_validate, +}; + +static void ad4695_pwm_disable(void *pwm) +{ + pwm_disable(pwm); +} + +static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, + struct ad4695_state *st) +{ + struct device *dev = &st->spi->dev; + struct spi_offload_trigger_info trigger_info = { + .fwnode = dev_fwnode(dev), + .ops = &ad4695_offload_trigger_ops, + .priv = st, + }; + struct pwm_state pwm_state; + struct dma_chan *rx_dma; + int ret, i; + + indio_dev->num_channels = st->chip_info->num_voltage_inputs + 1; + indio_dev->setup_ops = &ad4695_offload_buffer_setup_ops; + + if (!st->cnv_gpio) + return dev_err_probe(dev, -ENODEV, + "CNV GPIO is required for SPI offload\n"); + + ret = devm_spi_offload_trigger_register(dev, &trigger_info); + if (ret) + return dev_err_probe(dev, ret, + "failed to register offload trigger\n"); + + st->offload_trigger = devm_spi_offload_trigger_get(dev, st->offload, + SPI_OFFLOAD_TRIGGER_DATA_READY); + if (IS_ERR(st->offload_trigger)) + return dev_err_probe(dev, PTR_ERR(st->offload_trigger), + "failed to get offload trigger\n"); + + ret = devm_mutex_init(dev, &st->cnv_pwm_lock); + if (ret) + return ret; + + st->cnv_pwm = devm_pwm_get(dev, NULL); + if (IS_ERR(st->cnv_pwm)) + return dev_err_probe(dev, PTR_ERR(st->cnv_pwm), + "failed to get CNV PWM\n"); + + pwm_init_state(st->cnv_pwm, &pwm_state); + + /* If firmware didn't provide default rate, use 10kHz (arbitrary). */ + if (pwm_state.period == 0) + pwm_state.period = 100 * MILLI; + + pwm_state.enabled = true; + + ret = pwm_apply_might_sleep(st->cnv_pwm, &pwm_state); + if (ret) + return dev_err_probe(dev, ret, "failed to apply CNV PWM\n"); + + ret = devm_add_action_or_reset(dev, ad4695_pwm_disable, st->cnv_pwm); + if (ret) + return ret; + + rx_dma = devm_spi_offload_rx_stream_request_dma_chan(dev, st->offload); + if (IS_ERR(rx_dma)) + return dev_err_probe(dev, PTR_ERR(rx_dma), + "failed to get offload RX DMA\n"); + + for (i = 0; i < indio_dev->num_channels; i++) { + struct iio_chan_spec *chan = &st->iio_chan[i]; + + /* + * NB: When using offload support, all channels need to have the + * same bits_per_word because they all use the same SPI message + * for reading one sample. In order to prevent breaking + * userspace in the future when oversampling support is added, + * all channels are set read 19 bits with a shift of 3 to mask + * out the extra bits even though we currently only support 16 + * bit samples (oversampling ratio == 1). + */ + chan->scan_type.shift = 3; + chan->scan_type.storagebits = 32; + /* add sample frequency for PWM CNV trigger */ + chan->info_mask_separate |= BIT(IIO_CHAN_INFO_SAMP_FREQ); + chan->info_mask_separate_available |= BIT(IIO_CHAN_INFO_SAMP_FREQ); + } + + return devm_iio_dmaengine_buffer_setup_with_handle(dev, indio_dev, + rx_dma, IIO_BUFFER_DIRECTION_IN); +} + +static const struct spi_offload_config ad4695_spi_offload_config = { + .capability_flags = SPI_OFFLOAD_CAP_TRIGGER | + SPI_OFFLOAD_CAP_RX_STREAM_DMA, +}; + static int ad4695_probe(struct spi_device *spi) { struct device *dev = &spi->dev; struct ad4695_state *st; struct iio_dev *indio_dev; - struct gpio_desc *cnv_gpio; bool use_internal_ldo_supply; bool use_internal_ref_buffer; int ret; - cnv_gpio = devm_gpiod_get_optional(dev, "cnv", GPIOD_OUT_LOW); - if (IS_ERR(cnv_gpio)) - return dev_err_probe(dev, PTR_ERR(cnv_gpio), - "Failed to get CNV GPIO\n"); - - /* Driver currently requires CNV pin to be connected to SPI CS */ - if (cnv_gpio) - return dev_err_probe(dev, -ENODEV, - "CNV GPIO is not supported\n"); - indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); if (!indio_dev) return -ENOMEM; @@ -1122,6 +1505,10 @@ static int ad4695_probe(struct spi_device *spi) if (!st->chip_info) return -EINVAL; + st->sample_freq_range[0] = 1; /* min */ + st->sample_freq_range[1] = 1; /* step */ + st->sample_freq_range[2] = st->chip_info->max_sample_rate; /* max */ + st->regmap = devm_regmap_init(dev, &ad4695_regmap_bus, st, &ad4695_regmap_config); if (IS_ERR(st->regmap)) @@ -1134,6 +1521,11 @@ static int ad4695_probe(struct spi_device *spi) return dev_err_probe(dev, PTR_ERR(st->regmap16), "Failed to initialize regmap16\n"); + st->cnv_gpio = devm_gpiod_get_optional(dev, "cnv", GPIOD_OUT_LOW); + if (IS_ERR(st->cnv_gpio)) + return dev_err_probe(dev, PTR_ERR(st->cnv_gpio), + "Failed to get CNV GPIO\n"); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(ad4695_power_supplies), ad4695_power_supplies); @@ -1261,12 +1653,31 @@ static int ad4695_probe(struct spi_device *spi) indio_dev->channels = st->iio_chan; indio_dev->num_channels = st->chip_info->num_voltage_inputs + 2; - ret = devm_iio_triggered_buffer_setup(dev, indio_dev, - iio_pollfunc_store_time, - ad4695_trigger_handler, - &ad4695_buffer_setup_ops); - if (ret) - return ret; + st->offload = devm_spi_offload_get(dev, spi, &ad4695_spi_offload_config); + ret = PTR_ERR_OR_ZERO(st->offload); + if (ret && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to get SPI offload\n"); + + /* If no SPI offload, fall back to low speed usage. */ + if (ret == -ENODEV) { + /* Driver currently requires CNV pin to be connected to SPI CS */ + if (st->cnv_gpio) + return dev_err_probe(dev, -EINVAL, + "CNV GPIO is not supported\n"); + + indio_dev->num_channels = st->chip_info->num_voltage_inputs + 2; + + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, + iio_pollfunc_store_time, + ad4695_trigger_handler, + &ad4695_buffer_setup_ops); + if (ret) + return ret; + } else { + ret = ad4695_probe_spi_offload(indio_dev, st); + if (ret) + return ret; + } return devm_iio_device_register(dev, indio_dev); } @@ -1303,3 +1714,4 @@ MODULE_AUTHOR("Ramona Gradinariu "); MODULE_AUTHOR("David Lechner "); MODULE_DESCRIPTION("Analog Devices AD4695 ADC driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("IIO_DMAENGINE_BUFFER"); From 5031c9df4af04a815365bf1cbe6acee872384586 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:12 -0600 Subject: [PATCH 0156/2157] doc: iio: ad4695: add SPI offload support Document SPI offload support for the ad4695 driver. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-15-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad4695.rst | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/Documentation/iio/ad4695.rst b/Documentation/iio/ad4695.rst index 9ec8bf466c15..ead0faadff4b 100644 --- a/Documentation/iio/ad4695.rst +++ b/Documentation/iio/ad4695.rst @@ -47,6 +47,36 @@ In this mode, CNV and CS are tied together and there is a single SDO line. To use this mode, in the device tree, omit the ``cnv-gpios`` and ``spi-rx-bus-width`` properties. +SPI offload wiring +^^^^^^^^^^^^^^^^^^ + +When used with a SPI offload, the supported wiring configuration is: + +.. code-block:: + + +-------------+ +-------------+ + | GP0/BUSY |-------->| TRIGGER | + | CS |<--------| CS | + | | | | + | ADC | | SPI | + | | | | + | SDI |<--------| SDO | + | SDO |-------->| SDI | + | SCLK |<--------| SCLK | + | | | | + | | +-------------+ + | CNV |<-----+--| PWM | + | | +--| GPIO | + +-------------+ +-------------+ + +In this case, both the ``cnv-gpios`` and ``pwms`` properties are required. +The ``#trigger-source-cells = <2>`` property is also required to connect back +to the SPI offload. The SPI offload will have ``trigger-sources`` property +with cells to indicate the busy signal and which GPx pin is used, e.g +``<&ad4695 AD4695_TRIGGER_EVENT_BUSY AD4695_TRIGGER_PIN_GP0>``. + +.. seealso:: `SPI offload support`_ + Channel configuration --------------------- @@ -158,6 +188,27 @@ Unimplemented features - GPIO support - CRC support +SPI offload support +=================== + +To be able to achieve the maximum sample rate, the driver can be used with the +`AXI SPI Engine`_ to provide SPI offload support. + +.. _AXI SPI Engine: http://analogdevicesinc.github.io/hdl/projects/ad469x_fmc/index.html + +.. seealso:: `SPI offload wiring`_ + +When SPI offload is being used, some attributes will be different. + +* ``trigger`` directory is removed. +* ``in_voltage0_sampling_frequency`` attributes are added for setting the sample + rate. +* ``in_voltage0_sampling_frequency_available`` attributes are added for querying + the max sample rate. +* ``timestamp`` channel is removed. +* Buffer data format may be different compared to when offload is not used, + e.g. the ``buffer0/in_voltage0_type`` attribute. + Device buffers ============== @@ -165,3 +216,20 @@ This driver supports hardware triggered buffers. This uses the "advanced sequencer" feature of the chip to trigger a burst of conversions. Also see :doc:`iio_devbuf` for more general information. + +Effective sample rate for buffered reads +---------------------------------------- + +When SPI offload is not used, the sample rate is determined by the trigger that +is manually configured in userspace. All enabled channels will be read in a +burst when the trigger is received. + +When SPI offload is used, the sample rate is configured per channel. All +channels will have the same rate, so only one ``in_voltageY_sampling_frequency`` +attribute needs to be set. Since this rate determines the delay between each +individual conversion, the effective sample rate for each sample is actually +the sum of the periods of each enabled channel in a buffered read. In other +words, it is the value of the ``in_voltageY_sampling_frequency`` attribute +divided by the number of enabled channels. So if 4 channels are enabled, with +the ``in_voltageY_sampling_frequency`` attributes set to 1 MHz, the effective +sample rate is 250 kHz. From c91c294c722e44c14a452f887b8151cd3b14fa6c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 7 Feb 2025 14:09:13 -0600 Subject: [PATCH 0157/2157] iio: dac: ad5791: sort include directives Sort includes alphabetically before we add more in a later patch. Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-16-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5791.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 034228a7c059..4f28f49071c2 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -6,21 +6,21 @@ * Copyright 2011 Analog Devices Inc. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include #define AD5791_DAC_MASK GENMASK(19, 0) From 192b669b930c16f493dde1b2a3e9b25e466fd624 Mon Sep 17 00:00:00 2001 From: Axel Haslam Date: Fri, 7 Feb 2025 14:09:14 -0600 Subject: [PATCH 0158/2157] iio: dac: ad5791: Add offload support Add SPI offload support to stream TX buffers using DMA. This allows loading samples to the DAC with a rate of 1 MSPS. Signed-off-by: Axel Haslam Reviewed-by: Jonathan Cameron Reviewed-by: Nuno Sa Signed-off-by: David Lechner Link: https://patch.msgid.link/20250207-dlech-mainline-spi-engine-offload-2-v8-17-e48a489be48c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/Kconfig | 3 + drivers/iio/dac/ad5791.c | 163 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig index 5690a37267d8..4811ea973125 100644 --- a/drivers/iio/dac/Kconfig +++ b/drivers/iio/dac/Kconfig @@ -296,6 +296,9 @@ config AD5770R config AD5791 tristate "Analog Devices AD5760/AD5780/AD5781/AD5790/AD5791 DAC SPI driver" depends on SPI + select SPI_OFFLOAD + select IIO_BUFFER + select IIO_BUFFER_DMAENGINE help Say yes here to build support for Analog Devices AD5760, AD5780, AD5781, AD5790, AD5791 High Resolution Voltage Output Digital to diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 4f28f49071c2..07848be3f8d5 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -15,9 +15,12 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -64,11 +67,13 @@ * struct ad5791_chip_info - chip specific information * @name: name of the dac chip * @channel: channel specification + * @channel_offload: channel specification for offload * @get_lin_comp: function pointer to the device specific function */ struct ad5791_chip_info { const char *name; const struct iio_chan_spec channel; + const struct iio_chan_spec channel_offload; int (*get_lin_comp)(unsigned int span); }; @@ -81,6 +86,11 @@ struct ad5791_chip_info { * @gpio_clear: clear gpio * @gpio_ldac: load dac gpio * @chip_info: chip model specific constants + * @offload_msg: spi message used for offload + * @offload_xfer: spi transfer used for offload + * @offload: offload device + * @offload_trigger: offload trigger + * @offload_trigger_hz: offload sample rate * @vref_mv: actual reference voltage used * @vref_neg_mv: voltage of the negative supply * @ctrl: control register cache @@ -96,6 +106,11 @@ struct ad5791_state { struct gpio_desc *gpio_clear; struct gpio_desc *gpio_ldac; const struct ad5791_chip_info *chip_info; + struct spi_message offload_msg; + struct spi_transfer offload_xfer; + struct spi_offload *offload; + struct spi_offload_trigger *offload_trigger; + unsigned int offload_trigger_hz; unsigned short vref_mv; unsigned int vref_neg_mv; unsigned ctrl; @@ -232,6 +247,25 @@ static int ad5780_get_lin_comp(unsigned int span) return AD5780_LINCOMP_10_20; } +static int ad5791_set_sample_freq(struct ad5791_state *st, int val) +{ + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = val, + }, + }; + int ret; + + ret = spi_offload_trigger_validate(st->offload_trigger, &config); + if (ret) + return ret; + + st->offload_trigger_hz = config.periodic.frequency_hz; + + return 0; +} + static int ad5791_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, @@ -259,6 +293,9 @@ static int ad5791_read_raw(struct iio_dev *indio_dev, do_div(val64, st->vref_mv); *val = -val64; return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->offload_trigger_hz; + return IIO_VAL_INT; default: return -EINVAL; } @@ -299,6 +336,24 @@ static const struct ad5791_chip_info _name##_chip_info = { \ }, \ .ext_info = ad5791_ext_info, \ }, \ + .channel_offload = { \ + .type = IIO_VOLTAGE, \ + .output = 1, \ + .indexed = 1, \ + .address = AD5791_ADDR_DAC0, \ + .channel = 0, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_OFFSET), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),\ + .scan_type = { \ + .sign = 'u', \ + .realbits = (bits), \ + .storagebits = 32, \ + .shift = (_shift), \ + }, \ + .ext_info = ad5791_ext_info, \ + }, \ } AD5791_DEFINE_CHIP_INFO(ad5760, 16, 4, ad5780_get_lin_comp); @@ -322,14 +377,106 @@ static int ad5791_write_raw(struct iio_dev *indio_dev, return ad5791_spi_write(st, chan->address, val); + case IIO_CHAN_INFO_SAMP_FREQ: + if (val < 1) + return -EINVAL; + return ad5791_set_sample_freq(st, val); default: return -EINVAL; } } +static int ad5791_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT; + default: + return IIO_VAL_INT_PLUS_MICRO; + } +} + +static int ad5791_buffer_preenable(struct iio_dev *indio_dev) +{ + struct ad5791_state *st = iio_priv(indio_dev); + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = st->offload_trigger_hz, + }, + }; + + if (st->pwr_down) + return -EINVAL; + + return spi_offload_trigger_enable(st->offload, st->offload_trigger, + &config); +} + +static int ad5791_buffer_postdisable(struct iio_dev *indio_dev) +{ + struct ad5791_state *st = iio_priv(indio_dev); + + spi_offload_trigger_disable(st->offload, st->offload_trigger); + + return 0; +} + +static const struct iio_buffer_setup_ops ad5791_buffer_setup_ops = { + .preenable = &ad5791_buffer_preenable, + .postdisable = &ad5791_buffer_postdisable, +}; + +static int ad5791_offload_setup(struct iio_dev *indio_dev) +{ + struct ad5791_state *st = iio_priv(indio_dev); + struct spi_device *spi = st->spi; + struct dma_chan *tx_dma; + int ret; + + st->offload_trigger = devm_spi_offload_trigger_get(&spi->dev, + st->offload, SPI_OFFLOAD_TRIGGER_PERIODIC); + if (IS_ERR(st->offload_trigger)) + return dev_err_probe(&spi->dev, PTR_ERR(st->offload_trigger), + "failed to get offload trigger\n"); + + ret = ad5791_set_sample_freq(st, 1 * MEGA); + if (ret) + return dev_err_probe(&spi->dev, ret, + "failed to init sample rate\n"); + + tx_dma = devm_spi_offload_tx_stream_request_dma_chan(&spi->dev, + st->offload); + if (IS_ERR(tx_dma)) + return dev_err_probe(&spi->dev, PTR_ERR(tx_dma), + "failed to get offload TX DMA\n"); + + ret = devm_iio_dmaengine_buffer_setup_with_handle(&spi->dev, + indio_dev, tx_dma, IIO_BUFFER_DIRECTION_OUT); + if (ret) + return ret; + + st->offload_xfer.len = 4; + st->offload_xfer.bits_per_word = 24; + st->offload_xfer.offload_flags = SPI_OFFLOAD_XFER_TX_STREAM; + + spi_message_init_with_transfers(&st->offload_msg, &st->offload_xfer, 1); + st->offload_msg.offload = st->offload; + + return devm_spi_optimize_message(&spi->dev, st->spi, &st->offload_msg); +} + static const struct iio_info ad5791_info = { .read_raw = &ad5791_read_raw, .write_raw = &ad5791_write_raw, + .write_raw_get_fmt = &ad5791_write_raw_get_fmt, +}; + +static const struct spi_offload_config ad5791_offload_config = { + .capability_flags = SPI_OFFLOAD_CAP_TRIGGER | + SPI_OFFLOAD_CAP_TX_STREAM_DMA, }; static int ad5791_probe(struct spi_device *spi) @@ -416,6 +563,21 @@ static int ad5791_probe(struct spi_device *spi) indio_dev->channels = &st->chip_info->channel; indio_dev->num_channels = 1; indio_dev->name = st->chip_info->name; + + st->offload = devm_spi_offload_get(&spi->dev, spi, &ad5791_offload_config); + ret = PTR_ERR_OR_ZERO(st->offload); + if (ret && ret != -ENODEV) + return dev_err_probe(&spi->dev, ret, "failed to get offload\n"); + + if (ret != -ENODEV) { + indio_dev->channels = &st->chip_info->channel_offload; + indio_dev->setup_ops = &ad5791_buffer_setup_ops; + ret = ad5791_offload_setup(indio_dev); + if (ret) + return dev_err_probe(&spi->dev, ret, + "fail to setup offload\n"); + } + return devm_iio_device_register(&spi->dev, indio_dev); } @@ -452,3 +614,4 @@ module_spi_driver(ad5791_driver); MODULE_AUTHOR("Michael Hennerich "); MODULE_DESCRIPTION("Analog Devices AD5760/AD5780/AD5781/AD5790/AD5791 DAC"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS("IIO_DMAENGINE_BUFFER"); From 67d63185db79fa5c17ddb948599b7e2f0e031003 Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Thu, 9 Jan 2025 13:47:23 -0500 Subject: [PATCH 0159/2157] iio: adc: ad4695: add offload-based oversampling support Add support for the ad4695's oversampling feature when SPI offload is available. This allows the ad4695 to set oversampling ratios on a per-channel basis, raising the effective-number-of-bits from 16 (OSR == 1) to 17 (4), 18 (16), or 19 (64) for a given sample (i.e. one full cycle through the auto-sequencer). The logic for reading and writing sampling frequency for a given channel is also adjusted based on the current oversampling ratio. The non-offload case isn't supported as there isn't a good way to trigger the CNV pin in this mode. Support could be added in the future if a use-case arises. Signed-off-by: Trevor Gamblin Reviewed-by: Nuno Sa Tested-by: David Lechner Link: https://patch.msgid.link/20250109-ad4695-oversampling-v2-1-a46ac487082c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 331 +++++++++++++++++++++++++++++++++++---- 1 file changed, 302 insertions(+), 29 deletions(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 5c6d4f658af9..3a1a6f96480f 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -79,6 +79,7 @@ #define AD4695_REG_CONFIG_IN_MODE BIT(6) #define AD4695_REG_CONFIG_IN_PAIR GENMASK(5, 4) #define AD4695_REG_CONFIG_IN_AINHIGHZ_EN BIT(3) +#define AD4695_REG_CONFIG_IN_OSR_SET GENMASK(1, 0) #define AD4695_REG_UPPER_IN(n) (0x0040 | (2 * (n))) #define AD4695_REG_LOWER_IN(n) (0x0060 | (2 * (n))) #define AD4695_REG_HYST_IN(n) (0x0080 | (2 * (n))) @@ -127,6 +128,7 @@ struct ad4695_channel_config { bool bipolar; enum ad4695_in_pair pin_pairing; unsigned int common_mode_mv; + unsigned int oversampling_ratio; }; struct ad4695_state { @@ -306,6 +308,65 @@ static const struct regmap_bus ad4695_regmap_bus = { .val_format_endian_default = REGMAP_ENDIAN_BIG, }; +enum { + AD4695_SCAN_TYPE_OSR_1, + AD4695_SCAN_TYPE_OSR_4, + AD4695_SCAN_TYPE_OSR_16, + AD4695_SCAN_TYPE_OSR_64, +}; + +static const struct iio_scan_type ad4695_scan_type_offload_u[] = { + [AD4695_SCAN_TYPE_OSR_1] = { + .sign = 'u', + .realbits = 16, + .shift = 3, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_4] = { + .sign = 'u', + .realbits = 17, + .shift = 2, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_16] = { + .sign = 'u', + .realbits = 18, + .shift = 1, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_64] = { + .sign = 'u', + .realbits = 19, + .storagebits = 32, + }, +}; + +static const struct iio_scan_type ad4695_scan_type_offload_s[] = { + [AD4695_SCAN_TYPE_OSR_1] = { + .sign = 's', + .realbits = 16, + .shift = 3, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_4] = { + .sign = 's', + .realbits = 17, + .shift = 2, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_16] = { + .sign = 's', + .realbits = 18, + .shift = 1, + .storagebits = 32, + }, + [AD4695_SCAN_TYPE_OSR_64] = { + .sign = 's', + .realbits = 19, + .storagebits = 32, + }, +}; + static const struct iio_chan_spec ad4695_channel_template = { .type = IIO_VOLTAGE, .indexed = 1, @@ -343,6 +404,10 @@ static const char * const ad4695_power_supplies[] = { "avdd", "vio" }; +static const int ad4695_oversampling_ratios[] = { + 1, 4, 16, 64, +}; + static const struct ad4695_chip_info ad4695_chip_info = { .name = "ad4695", .max_sample_rate = 500 * KILO, @@ -519,6 +584,29 @@ static int ad4695_set_ref_voltage(struct ad4695_state *st, int vref_mv) FIELD_PREP(AD4695_REG_REF_CTRL_VREF_SET, val)); } +/** + * ad4695_osr_to_regval - convert ratio to OSR register value + * @ratio: ratio to check + * + * Check if ratio is present in the list of available ratios and return + * the corresponding value that needs to be written to the register to + * select that ratio. + * + * Returns: register value (0 to 3) or -EINVAL if there is not an exact + * match + */ +static int ad4695_osr_to_regval(int ratio) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ad4695_oversampling_ratios); i++) { + if (ratio == ad4695_oversampling_ratios[i]) + return i; + } + + return -EINVAL; +} + static int ad4695_write_chn_cfg(struct ad4695_state *st, struct ad4695_channel_config *cfg) { @@ -946,10 +1034,18 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct ad4695_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; struct ad4695_channel_config *cfg = &st->channels_cfg[chan->scan_index]; - u8 realbits = chan->scan_type.realbits; + unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; unsigned int reg_val; int ret, tmp; + u8 realbits; + + scan_type = iio_get_current_scan_type(indio_dev, chan); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + realbits = scan_type->realbits; switch (mask) { case IIO_CHAN_INFO_RAW: @@ -958,7 +1054,7 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - if (chan->scan_type.sign == 's') + if (scan_type->sign == 's') *val = sign_extend32(st->raw_data, realbits - 1); else *val = st->raw_data; @@ -970,7 +1066,7 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, switch (chan->type) { case IIO_VOLTAGE: *val = st->vref_mv; - *val2 = chan->scan_type.realbits; + *val2 = realbits; return IIO_VAL_FRACTIONAL_LOG2; case IIO_TEMP: /* T_scale (°C) = raw * V_REF (mV) / (-1.8 mV/°C * 2^16) */ @@ -1031,8 +1127,26 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, tmp = sign_extend32(reg_val, 15); - *val = tmp / 4; - *val2 = abs(tmp) % 4 * MICRO / 4; + switch (cfg->oversampling_ratio) { + case 1: + *val = tmp / 4; + *val2 = abs(tmp) % 4 * MICRO / 4; + break; + case 4: + *val = tmp / 2; + *val2 = abs(tmp) % 2 * MICRO / 2; + break; + case 16: + *val = tmp; + *val2 = 0; + break; + case 64: + *val = tmp * 2; + *val2 = 0; + break; + default: + return -EINVAL; + } if (tmp < 0 && *val2) { *val *= -1; @@ -1045,6 +1159,14 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, default: return -EINVAL; } + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + switch (chan->type) { + case IIO_VOLTAGE: + *val = st->channels_cfg[chan->scan_index].oversampling_ratio; + return IIO_VAL_INT; + default: + return -EINVAL; + } case IIO_CHAN_INFO_SAMP_FREQ: { struct pwm_state state; @@ -1052,7 +1174,11 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - *val = DIV_ROUND_UP_ULL(NSEC_PER_SEC, state.period); + /* + * The effective sampling frequency for a channel is the input + * frequency divided by the channel's OSR value. + */ + *val = DIV_ROUND_UP_ULL(NSEC_PER_SEC, state.period * osr); return IIO_VAL_INT; } @@ -1073,12 +1199,69 @@ static int ad4695_write_raw_get_fmt(struct iio_dev *indio_dev, } } +static int ad4695_set_osr_val(struct ad4695_state *st, + struct iio_chan_spec const *chan, + int val) +{ + int osr = ad4695_osr_to_regval(val); + + if (osr < 0) + return osr; + + switch (chan->type) { + case IIO_VOLTAGE: + st->channels_cfg[chan->scan_index].oversampling_ratio = val; + return regmap_update_bits(st->regmap, + AD4695_REG_CONFIG_IN(chan->scan_index), + AD4695_REG_CONFIG_IN_OSR_SET, + FIELD_PREP(AD4695_REG_CONFIG_IN_OSR_SET, osr)); + default: + return -EINVAL; + } +} + +static unsigned int ad4695_get_calibbias(int val, int val2, int osr) +{ + int val_calc, scale; + + switch (osr) { + case 4: + scale = 4; + break; + case 16: + scale = 2; + break; + case 64: + scale = 1; + break; + default: + scale = 8; + break; + } + + val = clamp_t(int, val, S32_MIN / 8, S32_MAX / 8); + + /* val2 range is (-MICRO, MICRO) if val == 0, otherwise [0, MICRO) */ + if (val < 0) + val_calc = val * scale - val2 * scale / MICRO; + else if (val2 < 0) + /* if val2 < 0 then val == 0 */ + val_calc = val2 * scale / (int)MICRO; + else + val_calc = val * scale + val2 * scale / MICRO; + + val_calc /= 2; + + return clamp_t(int, val_calc, S16_MIN, S16_MAX); +} + static int ad4695_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct ad4695_state *st = iio_priv(indio_dev); unsigned int reg_val; + unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { switch (mask) { @@ -1103,23 +1286,7 @@ static int ad4695_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_CALIBBIAS: switch (chan->type) { case IIO_VOLTAGE: - if (val2 >= 0 && val > S16_MAX / 4) - reg_val = S16_MAX; - else if ((val2 < 0 ? -val : val) < S16_MIN / 4) - reg_val = S16_MIN; - else if (val2 < 0) - reg_val = clamp_t(int, - -(val * 4 + -val2 * 4 / MICRO), - S16_MIN, S16_MAX); - else if (val < 0) - reg_val = clamp_t(int, - val * 4 - val2 * 4 / MICRO, - S16_MIN, S16_MAX); - else - reg_val = clamp_t(int, - val * 4 + val2 * 4 / MICRO, - S16_MIN, S16_MAX); - + reg_val = ad4695_get_calibbias(val, val2, osr); return regmap_write(st->regmap16, AD4695_REG_OFFSET_IN(chan->scan_index), reg_val); @@ -1128,15 +1295,27 @@ static int ad4695_write_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_SAMP_FREQ: { struct pwm_state state; + /* + * Limit the maximum acceptable sample rate according to + * the channel's oversampling ratio. + */ + u64 max_osr_rate = DIV_ROUND_UP_ULL(st->chip_info->max_sample_rate, + osr); - if (val <= 0 || val > st->chip_info->max_sample_rate) + if (val <= 0 || val > max_osr_rate) return -EINVAL; guard(mutex)(&st->cnv_pwm_lock); pwm_get_state(st->cnv_pwm, &state); - state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, val); + /* + * The required sample frequency for a given OSR is the + * input frequency multiplied by it. + */ + state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, val * osr); return pwm_apply_might_sleep(st->cnv_pwm, &state); } + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return ad4695_set_osr_val(st, chan, val); default: return -EINVAL; } @@ -1149,18 +1328,40 @@ static int ad4695_read_avail(struct iio_dev *indio_dev, const int **vals, int *type, int *length, long mask) { + int ret; static const int ad4695_calibscale_available[6] = { /* Range of 0 (inclusive) to 2 (exclusive) */ 0, 15, 1, 15, U16_MAX, 15 }; - static const int ad4695_calibbias_available[6] = { + static const int ad4695_calibbias_available[4][6] = { /* * Datasheet says FSR/8 which translates to signed/4. The step - * depends on oversampling ratio which is always 1 for now. + * depends on oversampling ratio, so we need four different + * ranges to select from. */ - S16_MIN / 4, 0, 0, MICRO / 4, S16_MAX / 4, S16_MAX % 4 * MICRO / 4 + { + S16_MIN / 4, 0, + 0, MICRO / 4, + S16_MAX / 4, S16_MAX % 4 * MICRO / 4 + }, + { + S16_MIN / 2, 0, + 0, MICRO / 2, + S16_MAX / 2, S16_MAX % 2 * MICRO / 2, + }, + { + S16_MIN, 0, + 1, 0, + S16_MAX, 0, + }, + { + S16_MIN * 2, 0, + 2, 0, + S16_MAX * 2, 0, + }, }; struct ad4695_state *st = iio_priv(indio_dev); + unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; switch (mask) { case IIO_CHAN_INFO_CALIBSCALE: @@ -1175,16 +1376,36 @@ static int ad4695_read_avail(struct iio_dev *indio_dev, case IIO_CHAN_INFO_CALIBBIAS: switch (chan->type) { case IIO_VOLTAGE: - *vals = ad4695_calibbias_available; + ret = ad4695_osr_to_regval(osr); + if (ret < 0) + return ret; + /* + * Select the appropriate calibbias array based on the + * OSR value in the register. + */ + *vals = ad4695_calibbias_available[ret]; *type = IIO_VAL_INT_PLUS_MICRO; return IIO_AVAIL_RANGE; default: return -EINVAL; } case IIO_CHAN_INFO_SAMP_FREQ: + /* Max sample rate for the channel depends on OSR */ + st->sample_freq_range[2] = + DIV_ROUND_UP_ULL(st->chip_info->max_sample_rate, osr); *vals = st->sample_freq_range; *type = IIO_VAL_INT; return IIO_AVAIL_RANGE; + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + switch (chan->type) { + case IIO_VOLTAGE: + *vals = ad4695_oversampling_ratios; + *length = ARRAY_SIZE(ad4695_oversampling_ratios); + *type = IIO_VAL_INT; + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } default: return -EINVAL; } @@ -1218,6 +1439,26 @@ static int ad4695_debugfs_reg_access(struct iio_dev *indio_dev, return -EINVAL; } +static int ad4695_get_current_scan_type(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad4695_state *st = iio_priv(indio_dev); + unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; + + switch (osr) { + case 1: + return AD4695_SCAN_TYPE_OSR_1; + case 4: + return AD4695_SCAN_TYPE_OSR_4; + case 16: + return AD4695_SCAN_TYPE_OSR_16; + case 64: + return AD4695_SCAN_TYPE_OSR_64; + default: + return -EINVAL; + } +} + static const struct iio_info ad4695_info = { .read_raw = &ad4695_read_raw, .write_raw_get_fmt = &ad4695_write_raw_get_fmt, @@ -1226,6 +1467,15 @@ static const struct iio_info ad4695_info = { .debugfs_reg_access = &ad4695_debugfs_reg_access, }; +static const struct iio_info ad4695_offload_info = { + .read_raw = &ad4695_read_raw, + .write_raw_get_fmt = &ad4695_write_raw_get_fmt, + .write_raw = &ad4695_write_raw, + .get_current_scan_type = &ad4695_get_current_scan_type, + .read_avail = &ad4695_read_avail, + .debugfs_reg_access = &ad4695_debugfs_reg_access, +}; + static int ad4695_parse_channel_cfg(struct ad4695_state *st) { struct device *dev = &st->spi->dev; @@ -1241,6 +1491,9 @@ static int ad4695_parse_channel_cfg(struct ad4695_state *st) chan_cfg->highz_en = true; chan_cfg->channel = i; + /* This is the default OSR after reset */ + chan_cfg->oversampling_ratio = 1; + *iio_chan = ad4695_channel_template; iio_chan->channel = i; iio_chan->scan_index = i; @@ -1409,6 +1662,7 @@ static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, struct dma_chan *rx_dma; int ret, i; + indio_dev->info = &ad4695_offload_info; indio_dev->num_channels = st->chip_info->num_voltage_inputs + 1; indio_dev->setup_ops = &ad4695_offload_buffer_setup_ops; @@ -1459,6 +1713,7 @@ static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, for (i = 0; i < indio_dev->num_channels; i++) { struct iio_chan_spec *chan = &st->iio_chan[i]; + struct ad4695_channel_config *cfg = &st->channels_cfg[i]; /* * NB: When using offload support, all channels need to have the @@ -1474,6 +1729,24 @@ static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, /* add sample frequency for PWM CNV trigger */ chan->info_mask_separate |= BIT(IIO_CHAN_INFO_SAMP_FREQ); chan->info_mask_separate_available |= BIT(IIO_CHAN_INFO_SAMP_FREQ); + + /* Add the oversampling properties only for voltage channels */ + if (chan->type != IIO_VOLTAGE) + continue; + + chan->info_mask_separate |= BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); + chan->info_mask_separate_available |= + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); + chan->has_ext_scan_type = 1; + if (cfg->bipolar) { + chan->ext_scan_type = ad4695_scan_type_offload_s; + chan->num_ext_scan_type = + ARRAY_SIZE(ad4695_scan_type_offload_s); + } else { + chan->ext_scan_type = ad4695_scan_type_offload_u; + chan->num_ext_scan_type = + ARRAY_SIZE(ad4695_scan_type_offload_u); + } } return devm_iio_dmaengine_buffer_setup_with_handle(dev, indio_dev, From c26b0854eb2b7301dee83ec6c190bc94a364e910 Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Thu, 9 Jan 2025 13:47:24 -0500 Subject: [PATCH 0160/2157] doc: iio: ad4695: describe oversampling support Add a section to the ad4695 documentation describing how to use the oversampling feature. Also add some clarification on how the oversampling ratio influences effective sample rate in the offload section. Signed-off-by: Trevor Gamblin Tested-by: David Lechner Link: https://patch.msgid.link/20250109-ad4695-oversampling-v2-2-a46ac487082c@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad4695.rst | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/Documentation/iio/ad4695.rst b/Documentation/iio/ad4695.rst index ead0faadff4b..f40593bcc37d 100644 --- a/Documentation/iio/ad4695.rst +++ b/Documentation/iio/ad4695.rst @@ -179,12 +179,38 @@ Gain/offset calibration System calibration is supported using the channel gain and offset registers via the ``calibscale`` and ``calibbias`` attributes respectively. +Oversampling +------------ + +The chip supports per-channel oversampling when SPI offload is being used, with +available oversampling ratios (OSR) of 1 (default), 4, 16, and 64. Enabling +oversampling on a channel raises the effective number of bits of sampled data to +17 (OSR == 4), 18 (16), or 19 (64), respectively. This can be set via the +``oversampling_ratio`` attribute. + +Setting the oversampling ratio for a channel also changes the sample rate for +that channel, since it requires multiple conversions per 1 sample. Specifically, +the new sampling frequency is the PWM sampling frequency divided by the +particular OSR. This is set automatically by the driver when setting the +``oversampling_ratio`` attribute. For example, if the device's current +``sampling_frequency`` is 10000 and an OSR of 4 is set on channel ``voltage0``, +the new reported sampling rate for that channel will be 2500 (ignoring PWM API +rounding), while all others will remain at 10000. Subsequently setting the +sampling frequency to a higher value on that channel will adjust the CNV trigger +period for all channels, e.g. if ``voltage0``'s sampling frequency is adjusted +from 2500 (with an OSR of 4) to 10000, the value reported by +``in_voltage0_sampling_frequency`` will be 10000, but all other channels will +now report 40000. + +For simplicity, the sampling frequency of the device should be set (considering +the highest desired OSR value to be used) first, before configuring oversampling +for specific channels. + Unimplemented features ---------------------- - Additional wiring modes - Threshold events -- Oversampling - GPIO support - CRC support @@ -233,3 +259,11 @@ words, it is the value of the ``in_voltageY_sampling_frequency`` attribute divided by the number of enabled channels. So if 4 channels are enabled, with the ``in_voltageY_sampling_frequency`` attributes set to 1 MHz, the effective sample rate is 250 kHz. + +With oversampling enabled, the effective sample rate also depends on the OSR +assigned to each channel. For example, if one of the 4 channels mentioned in the +previous case is configured with an OSR of 4, the effective sample rate for that +channel becomes (1 MHz / 4 ) = 250 kHz. The effective sample rate for all +four channels is then 1 / ( (3 / 1 MHz) + ( 1 / 250 kHz) ) ~= 142.9 kHz. Note +that in this case "sample" refers to one read of all enabled channels (i.e. one +full cycle through the auto-sequencer). From f2ae180926074842dec8809a8c568420b719342b Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:51 +0100 Subject: [PATCH 0161/2157] dt-bindings: iio: dac: adi-axi-adc: add ad7606 variant A new compatible is added to reflect the specialized version of the HDL. We use the parallel interface to write the ADC's registers, and accessing this interface requires to use ADI_AXI_REG_CONFIG_RD, ADI_AXI_REG_CONFIG_WR and ADI_AXI_REG_CONFIG_CTRL in a custom fashion. Reviewed-by: Rob Herring (Arm) Signed-off-by: Guillaume Stols Co-developed-by: Angelo Dureghello Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-1-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,axi-adc.yaml | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml b/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml index e1f450b80db2..4fa82dcf6fc9 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml @@ -17,13 +17,23 @@ description: | interface for the actual ADC, while this IP core will interface to the data-lines of the ADC and handle the streaming of data into memory via DMA. + In some cases, the AXI ADC interface is used to perform specialized + operation to a particular ADC, e.g access the physical bus through + specific registers to write ADC registers. + In this case, we use a different compatible which indicates the target + IP core's name. + The following IP is currently supported: + - AXI AD7606x: specialized version of the IP core for all the chips from + the ad7606 family. https://wiki.analog.com/resources/fpga/docs/axi_adc_ip + http://analogdevicesinc.github.io/hdl/library/axi_ad7606x/index.html properties: compatible: enum: - adi,axi-adc-10.0.a + - adi,axi-ad7606x reg: maxItems: 1 @@ -47,17 +57,48 @@ properties: '#io-backend-cells': const: 0 + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + "^adc@[0-9a-f]+$": + type: object + properties: + reg: + maxItems: 1 + additionalProperties: true + required: + - compatible + - reg + required: - compatible - dmas - reg - clocks +allOf: + - if: + properties: + compatible: + not: + contains: + const: adi,axi-ad7606x + then: + properties: + '#address-cells': false + '#size-cells': false + patternProperties: + "^adc@[0-9a-f]+$": false + additionalProperties: false examples: - | - axi-adc@44a00000 { + adc@44a00000 { compatible = "adi,axi-adc-10.0.a"; reg = <0x44a00000 0x10000>; dmas = <&rx_dma 0>; @@ -65,4 +106,31 @@ examples: clocks = <&axi_clk>; #io-backend-cells = <0>; }; + - | + #include + parallel_bus_controller@44a00000 { + compatible = "adi,axi-ad7606x"; + reg = <0x44a00000 0x10000>; + dmas = <&rx_dma 0>; + dma-names = "rx"; + clocks = <&ext_clk>; + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + compatible = "adi,ad7606b"; + reg = <0>; + pwms = <&axi_pwm_gen 0 0>; + pwm-names = "convst1"; + avcc-supply = <&adc_vref>; + vdrive-supply = <&vdd_supply>; + reset-gpios = <&gpio0 91 GPIO_ACTIVE_HIGH>; + standby-gpios = <&gpio0 90 GPIO_ACTIVE_LOW>; + adi,range-gpios = <&gpio0 89 GPIO_ACTIVE_HIGH>; + adi,oversampling-ratio-gpios = <&gpio0 88 GPIO_ACTIVE_HIGH + &gpio0 87 GPIO_ACTIVE_HIGH + &gpio0 86 GPIO_ACTIVE_HIGH>; + io-backends = <¶llel_bus_controller>; + }; + }; ... From f2a62931b39478c98f977caf299df5bc072f38e0 Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:52 +0100 Subject: [PATCH 0162/2157] iio: adc: ad7606: move the software mode configuration This is a preparation for the intoduction of the sofware functions in the iio backend version of the driver. The software mode configuration must be executed once the channels are configured, and the number of channels is known. This is not the case before iio-backend's configuration is called, and iio backend version of the driver does not have a timestamp channel. Also the sw_mode_config callback is configured during the iio-backend configuration. For clarity purpose, I moved the entire block instead of just the concerned function calls. Signed-off-by: Guillaume Stols Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-2-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index d8e3c7a43678..dde372ce7569 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1246,17 +1246,6 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, return -ERESTARTSYS; } - st->write_scale = ad7606_write_scale_hw; - st->write_os = ad7606_write_os_hw; - - ret = ad7606_sw_mode_setup(indio_dev); - if (ret) - return ret; - - ret = ad7606_chan_scales_setup(indio_dev); - if (ret) - return ret; - /* If convst pin is not defined, setup PWM. */ if (!st->gpio_convst) { st->cnvst_pwm = devm_pwm_get(dev, NULL); @@ -1334,6 +1323,17 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, return ret; } + st->write_scale = ad7606_write_scale_hw; + st->write_os = ad7606_write_os_hw; + + ret = ad7606_sw_mode_setup(indio_dev); + if (ret) + return ret; + + ret = ad7606_chan_scales_setup(indio_dev); + if (ret) + return ret; + return devm_iio_device_register(dev, indio_dev); } EXPORT_SYMBOL_NS_GPL(ad7606_probe, "IIO_AD7606"); From d2477887f6677de0675e600f1590378a5fb52909 Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:53 +0100 Subject: [PATCH 0163/2157] iio: adc: ad7606: move software functions into common file Since the register are always the same, whatever bus is used, moving the software functions into the main file avoids the code to be duplicated in both SPI and parallel version of the driver. Signed-off-by: Guillaume Stols Co-developed-by: Angelo Dureghello Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-3-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 133 +++++++++++++++++++++++++++++++--- drivers/iio/adc/ad7606.h | 37 +++++++++- drivers/iio/adc/ad7606_spi.c | 137 +---------------------------------- 3 files changed, 158 insertions(+), 149 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index dde372ce7569..b88c3e248b7e 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -85,6 +85,10 @@ static const unsigned int ad7606_oversampling_avail[7] = { 1, 2, 4, 8, 16, 32, 64, }; +static const unsigned int ad7606b_oversampling_avail[9] = { + 1, 2, 4, 8, 16, 32, 64, 128, 256, +}; + static const unsigned int ad7616_oversampling_avail[8] = { 1, 2, 4, 8, 16, 32, 64, 128, }; @@ -187,6 +191,8 @@ static int ad7608_chan_scale_setup(struct iio_dev *indio_dev, struct iio_chan_spec *chan, int ch); static int ad7609_chan_scale_setup(struct iio_dev *indio_dev, struct iio_chan_spec *chan, int ch); +static int ad7616_sw_mode_setup(struct iio_dev *indio_dev); +static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev); const struct ad7606_chip_info ad7605_4_info = { .channels = ad7605_channels, @@ -239,6 +245,7 @@ const struct ad7606_chip_info ad7606b_info = { .oversampling_avail = ad7606_oversampling_avail, .oversampling_num = ARRAY_SIZE(ad7606_oversampling_avail), .scale_setup_cb = ad7606_16bit_chan_scale_setup, + .sw_setup_cb = ad7606b_sw_mode_setup, }; EXPORT_SYMBOL_NS_GPL(ad7606b_info, "IIO_AD7606"); @@ -250,6 +257,7 @@ const struct ad7606_chip_info ad7606c_16_info = { .oversampling_avail = ad7606_oversampling_avail, .oversampling_num = ARRAY_SIZE(ad7606_oversampling_avail), .scale_setup_cb = ad7606c_16bit_chan_scale_setup, + .sw_setup_cb = ad7606b_sw_mode_setup, }; EXPORT_SYMBOL_NS_GPL(ad7606c_16_info, "IIO_AD7606"); @@ -294,6 +302,7 @@ const struct ad7606_chip_info ad7606c_18_info = { .oversampling_avail = ad7606_oversampling_avail, .oversampling_num = ARRAY_SIZE(ad7606_oversampling_avail), .scale_setup_cb = ad7606c_18bit_chan_scale_setup, + .sw_setup_cb = ad7606b_sw_mode_setup, }; EXPORT_SYMBOL_NS_GPL(ad7606c_18_info, "IIO_AD7606"); @@ -307,6 +316,7 @@ const struct ad7606_chip_info ad7616_info = { .oversampling_num = ARRAY_SIZE(ad7616_oversampling_avail), .os_req_reset = true, .scale_setup_cb = ad7606_16bit_chan_scale_setup, + .sw_setup_cb = ad7616_sw_mode_setup, }; EXPORT_SYMBOL_NS_GPL(ad7616_info, "IIO_AD7606"); @@ -1138,16 +1148,118 @@ static const struct iio_trigger_ops ad7606_trigger_ops = { .validate_device = iio_trigger_validate_own_device, }; -static int ad7606_sw_mode_setup(struct iio_dev *indio_dev) +static int ad7606_write_mask(struct ad7606_state *st, unsigned int addr, + unsigned long mask, unsigned int val) +{ + int readval; + + readval = st->bops->reg_read(st, addr); + if (readval < 0) + return readval; + + readval &= ~mask; + readval |= val; + + return st->bops->reg_write(st, addr, readval); +} + +static int ad7616_write_scale_sw(struct iio_dev *indio_dev, int ch, int val) +{ + struct ad7606_state *st = iio_priv(indio_dev); + unsigned int ch_addr, mode, ch_index; + + /* + * Ad7616 has 16 channels divided in group A and group B. + * The range of channels from A are stored in registers with address 4 + * while channels from B are stored in register with address 6. + * The last bit from channels determines if it is from group A or B + * because the order of channels in iio is 0A, 0B, 1A, 1B... + */ + ch_index = ch >> 1; + + ch_addr = AD7616_RANGE_CH_ADDR(ch_index); + + if ((ch & 0x1) == 0) /* channel A */ + ch_addr += AD7616_RANGE_CH_A_ADDR_OFF; + else /* channel B */ + ch_addr += AD7616_RANGE_CH_B_ADDR_OFF; + + /* 0b01 for 2.5v, 0b10 for 5v and 0b11 for 10v */ + mode = AD7616_RANGE_CH_MODE(ch_index, ((val + 1) & 0b11)); + + return ad7606_write_mask(st, ch_addr, AD7616_RANGE_CH_MSK(ch_index), + mode); +} + +static int ad7616_write_os_sw(struct iio_dev *indio_dev, int val) { struct ad7606_state *st = iio_priv(indio_dev); - st->sw_mode_en = st->bops->sw_mode_config && - device_property_present(st->dev, "adi,sw-mode"); - if (!st->sw_mode_en) - return 0; + return ad7606_write_mask(st, AD7616_CONFIGURATION_REGISTER, + AD7616_OS_MASK, val << 2); +} - indio_dev->info = &ad7606_info_sw_mode; +static int ad7606_write_scale_sw(struct iio_dev *indio_dev, int ch, int val) +{ + struct ad7606_state *st = iio_priv(indio_dev); + + return ad7606_write_mask(st, AD7606_RANGE_CH_ADDR(ch), + AD7606_RANGE_CH_MSK(ch), + AD7606_RANGE_CH_MODE(ch, val)); +} + +static int ad7606_write_os_sw(struct iio_dev *indio_dev, int val) +{ + struct ad7606_state *st = iio_priv(indio_dev); + + return st->bops->reg_write(st, AD7606_OS_MODE, val); +} + +static int ad7616_sw_mode_setup(struct iio_dev *indio_dev) +{ + struct ad7606_state *st = iio_priv(indio_dev); + int ret; + + /* + * Scale can be configured individually for each channel + * in software mode. + */ + + st->write_scale = ad7616_write_scale_sw; + st->write_os = &ad7616_write_os_sw; + + ret = st->bops->sw_mode_config(indio_dev); + if (ret) + return ret; + + /* Activate Burst mode and SEQEN MODE */ + return ad7606_write_mask(st, AD7616_CONFIGURATION_REGISTER, + AD7616_BURST_MODE | AD7616_SEQEN_MODE, + AD7616_BURST_MODE | AD7616_SEQEN_MODE); +} + +static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev) +{ + struct ad7606_state *st = iio_priv(indio_dev); + DECLARE_BITMAP(os, 3); + + bitmap_fill(os, 3); + /* + * Software mode is enabled when all three oversampling + * pins are set to high. If oversampling gpios are defined + * in the device tree, then they need to be set to high, + * otherwise, they must be hardwired to VDD + */ + if (st->gpio_os) { + gpiod_set_array_value(st->gpio_os->ndescs, st->gpio_os->desc, + st->gpio_os->info, os); + } + /* OS of 128 and 256 are available only in software mode */ + st->oversampling_avail = ad7606b_oversampling_avail; + st->num_os_ratios = ARRAY_SIZE(ad7606b_oversampling_avail); + + st->write_scale = ad7606_write_scale_sw; + st->write_os = &ad7606_write_os_sw; return st->bops->sw_mode_config(indio_dev); } @@ -1326,9 +1438,12 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, st->write_scale = ad7606_write_scale_hw; st->write_os = ad7606_write_os_hw; - ret = ad7606_sw_mode_setup(indio_dev); - if (ret) - return ret; + st->sw_mode_en = st->chip_info->sw_setup_cb && + device_property_present(st->dev, "adi,sw-mode"); + if (st->sw_mode_en) { + indio_dev->info = &ad7606_info_sw_mode; + st->chip_info->sw_setup_cb(indio_dev); + } ret = ad7606_chan_scales_setup(indio_dev); if (ret) diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index 8778ffe515b3..7a044b499cfe 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -10,6 +10,36 @@ #define AD760X_MAX_CHANNELS 16 +#define AD7616_CONFIGURATION_REGISTER 0x02 +#define AD7616_OS_MASK GENMASK(4, 2) +#define AD7616_BURST_MODE BIT(6) +#define AD7616_SEQEN_MODE BIT(5) +#define AD7616_RANGE_CH_A_ADDR_OFF 0x04 +#define AD7616_RANGE_CH_B_ADDR_OFF 0x06 +/* + * Range of channels from a group are stored in 2 registers. + * 0, 1, 2, 3 in a register followed by 4, 5, 6, 7 in second register. + * For channels from second group(8-15) the order is the same, only with + * an offset of 2 for register address. + */ +#define AD7616_RANGE_CH_ADDR(ch) ((ch) >> 2) +/* The range of the channel is stored in 2 bits */ +#define AD7616_RANGE_CH_MSK(ch) (0b11 << (((ch) & 0b11) * 2)) +#define AD7616_RANGE_CH_MODE(ch, mode) ((mode) << ((((ch) & 0b11)) * 2)) + +#define AD7606_CONFIGURATION_REGISTER 0x02 +#define AD7606_SINGLE_DOUT 0x00 + +/* + * Range for AD7606B channels are stored in registers starting with address 0x3. + * Each register stores range for 2 channels(4 bits per channel). + */ +#define AD7606_RANGE_CH_MSK(ch) (GENMASK(3, 0) << (4 * ((ch) & 0x1))) +#define AD7606_RANGE_CH_MODE(ch, mode) \ + ((GENMASK(3, 0) & (mode)) << (4 * ((ch) & 0x1))) +#define AD7606_RANGE_CH_ADDR(ch) (0x03 + ((ch) >> 1)) +#define AD7606_OS_MODE 0x08 + #define AD760X_CHANNEL(num, mask_sep, mask_type, mask_all, bits) { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ @@ -71,6 +101,7 @@ struct ad7606_state; typedef int (*ad7606_scale_setup_cb_t)(struct iio_dev *indio_dev, struct iio_chan_spec *chan, int ch); +typedef int (*ad7606_sw_setup_cb_t)(struct iio_dev *indio_dev); /** * struct ad7606_chip_info - chip specific information @@ -80,6 +111,7 @@ typedef int (*ad7606_scale_setup_cb_t)(struct iio_dev *indio_dev, * @num_channels: number of channels * @num_adc_channels the number of channels the ADC actually inputs. * @scale_setup_cb: callback to setup the scales for each channel + * @sw_setup_cb: callback to setup the software mode if available. * @oversampling_avail pointer to the array which stores the available * oversampling ratios. * @oversampling_num number of elements stored in oversampling_avail array @@ -94,6 +126,7 @@ struct ad7606_chip_info { unsigned int num_adc_channels; unsigned int num_channels; ad7606_scale_setup_cb_t scale_setup_cb; + ad7606_sw_setup_cb_t sw_setup_cb; const unsigned int *oversampling_avail; unsigned int oversampling_num; bool os_req_reset; @@ -206,10 +239,6 @@ struct ad7606_bus_ops { int (*reg_write)(struct ad7606_state *st, unsigned int addr, unsigned int val); - int (*write_mask)(struct ad7606_state *st, - unsigned int addr, - unsigned long mask, - unsigned int val); int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); u16 (*rd_wr_cmd)(int addr, char isWriteOp); }; diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index e2c147525706..885bf0b68e77 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -15,36 +15,6 @@ #define MAX_SPI_FREQ_HZ 23500000 /* VDRIVE above 4.75 V */ -#define AD7616_CONFIGURATION_REGISTER 0x02 -#define AD7616_OS_MASK GENMASK(4, 2) -#define AD7616_BURST_MODE BIT(6) -#define AD7616_SEQEN_MODE BIT(5) -#define AD7616_RANGE_CH_A_ADDR_OFF 0x04 -#define AD7616_RANGE_CH_B_ADDR_OFF 0x06 -/* - * Range of channels from a group are stored in 2 registers. - * 0, 1, 2, 3 in a register followed by 4, 5, 6, 7 in second register. - * For channels from second group(8-15) the order is the same, only with - * an offset of 2 for register address. - */ -#define AD7616_RANGE_CH_ADDR(ch) ((ch) >> 2) -/* The range of the channel is stored in 2 bits */ -#define AD7616_RANGE_CH_MSK(ch) (0b11 << (((ch) & 0b11) * 2)) -#define AD7616_RANGE_CH_MODE(ch, mode) ((mode) << ((((ch) & 0b11)) * 2)) - -#define AD7606_CONFIGURATION_REGISTER 0x02 -#define AD7606_SINGLE_DOUT 0x00 - -/* - * Range for AD7606B channels are stored in registers starting with address 0x3. - * Each register stores range for 2 channels(4 bits per channel). - */ -#define AD7606_RANGE_CH_MSK(ch) (GENMASK(3, 0) << (4 * ((ch) & 0x1))) -#define AD7606_RANGE_CH_MODE(ch, mode) \ - ((GENMASK(3, 0) & mode) << (4 * ((ch) & 0x1))) -#define AD7606_RANGE_CH_ADDR(ch) (0x03 + ((ch) >> 1)) -#define AD7606_OS_MODE 0x08 - static const struct iio_chan_spec ad7616_sw_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(16), AD7616_CHANNEL(0), @@ -89,10 +59,6 @@ static const struct iio_chan_spec ad7606c_18_sw_channels[] = { AD7606_SW_CHANNEL(7, 18), }; -static const unsigned int ad7606B_oversampling_avail[9] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256 -}; - static u16 ad7616_spi_rd_wr_cmd(int addr, char isWriteOp) { /* @@ -194,118 +160,20 @@ static int ad7606_spi_reg_write(struct ad7606_state *st, return spi_write(spi, &st->d16[0], sizeof(st->d16[0])); } -static int ad7606_spi_write_mask(struct ad7606_state *st, - unsigned int addr, - unsigned long mask, - unsigned int val) -{ - int readval; - - readval = st->bops->reg_read(st, addr); - if (readval < 0) - return readval; - - readval &= ~mask; - readval |= val; - - return st->bops->reg_write(st, addr, readval); -} - -static int ad7616_write_scale_sw(struct iio_dev *indio_dev, int ch, int val) -{ - struct ad7606_state *st = iio_priv(indio_dev); - unsigned int ch_addr, mode, ch_index; - - - /* - * Ad7616 has 16 channels divided in group A and group B. - * The range of channels from A are stored in registers with address 4 - * while channels from B are stored in register with address 6. - * The last bit from channels determines if it is from group A or B - * because the order of channels in iio is 0A, 0B, 1A, 1B... - */ - ch_index = ch >> 1; - - ch_addr = AD7616_RANGE_CH_ADDR(ch_index); - - if ((ch & 0x1) == 0) /* channel A */ - ch_addr += AD7616_RANGE_CH_A_ADDR_OFF; - else /* channel B */ - ch_addr += AD7616_RANGE_CH_B_ADDR_OFF; - - /* 0b01 for 2.5v, 0b10 for 5v and 0b11 for 10v */ - mode = AD7616_RANGE_CH_MODE(ch_index, ((val + 1) & 0b11)); - return st->bops->write_mask(st, ch_addr, AD7616_RANGE_CH_MSK(ch_index), - mode); -} - -static int ad7616_write_os_sw(struct iio_dev *indio_dev, int val) -{ - struct ad7606_state *st = iio_priv(indio_dev); - - return st->bops->write_mask(st, AD7616_CONFIGURATION_REGISTER, - AD7616_OS_MASK, val << 2); -} - -static int ad7606_write_scale_sw(struct iio_dev *indio_dev, int ch, int val) -{ - struct ad7606_state *st = iio_priv(indio_dev); - - return ad7606_spi_write_mask(st, - AD7606_RANGE_CH_ADDR(ch), - AD7606_RANGE_CH_MSK(ch), - AD7606_RANGE_CH_MODE(ch, val)); -} - -static int ad7606_write_os_sw(struct iio_dev *indio_dev, int val) -{ - struct ad7606_state *st = iio_priv(indio_dev); - - return ad7606_spi_reg_write(st, AD7606_OS_MODE, val); -} - static int ad7616_sw_mode_config(struct iio_dev *indio_dev) { - struct ad7606_state *st = iio_priv(indio_dev); - /* * Scale can be configured individually for each channel * in software mode. */ indio_dev->channels = ad7616_sw_channels; - st->write_scale = ad7616_write_scale_sw; - st->write_os = &ad7616_write_os_sw; - - /* Activate Burst mode and SEQEN MODE */ - return st->bops->write_mask(st, - AD7616_CONFIGURATION_REGISTER, - AD7616_BURST_MODE | AD7616_SEQEN_MODE, - AD7616_BURST_MODE | AD7616_SEQEN_MODE); + return 0; } static int ad7606B_sw_mode_config(struct iio_dev *indio_dev) { struct ad7606_state *st = iio_priv(indio_dev); - DECLARE_BITMAP(os, 3); - - bitmap_fill(os, 3); - /* - * Software mode is enabled when all three oversampling - * pins are set to high. If oversampling gpios are defined - * in the device tree, then they need to be set to high, - * otherwise, they must be hardwired to VDD - */ - if (st->gpio_os) { - gpiod_set_array_value(st->gpio_os->ndescs, - st->gpio_os->desc, st->gpio_os->info, os); - } - /* OS of 128 and 256 are available only in software mode */ - st->oversampling_avail = ad7606B_oversampling_avail; - st->num_os_ratios = ARRAY_SIZE(ad7606B_oversampling_avail); - - st->write_scale = ad7606_write_scale_sw; - st->write_os = &ad7606_write_os_sw; /* Configure device spi to output on a single channel */ st->bops->reg_write(st, @@ -350,7 +218,6 @@ static const struct ad7606_bus_ops ad7616_spi_bops = { .read_block = ad7606_spi_read_block, .reg_read = ad7606_spi_reg_read, .reg_write = ad7606_spi_reg_write, - .write_mask = ad7606_spi_write_mask, .rd_wr_cmd = ad7616_spi_rd_wr_cmd, .sw_mode_config = ad7616_sw_mode_config, }; @@ -359,7 +226,6 @@ static const struct ad7606_bus_ops ad7606b_spi_bops = { .read_block = ad7606_spi_read_block, .reg_read = ad7606_spi_reg_read, .reg_write = ad7606_spi_reg_write, - .write_mask = ad7606_spi_write_mask, .rd_wr_cmd = ad7606B_spi_rd_wr_cmd, .sw_mode_config = ad7606B_sw_mode_config, }; @@ -368,7 +234,6 @@ static const struct ad7606_bus_ops ad7606c_18_spi_bops = { .read_block = ad7606_spi_read_block18to32, .reg_read = ad7606_spi_reg_read, .reg_write = ad7606_spi_reg_write, - .write_mask = ad7606_spi_write_mask, .rd_wr_cmd = ad7606B_spi_rd_wr_cmd, .sw_mode_config = ad7606c_18_sw_mode_config, }; From c4330d081775c628340cbf297619b15c47fb9b98 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 10 Feb 2025 17:10:54 +0100 Subject: [PATCH 0164/2157] iio: adc: adi-axi-adc: add struct axi_adc_info Add struct axi_adc_info to allow different axi-adc compatibles that can be added to this generic implementation. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-4-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a55db308baab..04f7d7d3890b 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -80,7 +80,16 @@ ADI_AXI_REG_CHAN_CTRL_FMT_EN | \ ADI_AXI_REG_CHAN_CTRL_ENABLE) +struct axi_adc_info { + unsigned int version; + const struct iio_backend_info *backend_info; + bool has_child_nodes; + const void *pdata; + unsigned int pdata_sz; +}; + struct adi_axi_adc_state { + const struct axi_adc_info *info; struct regmap *regmap; struct device *dev; /* lock to protect multiple accesses to the device registers */ @@ -348,7 +357,6 @@ static const struct iio_backend_info adi_axi_adc_generic = { static int adi_axi_adc_probe(struct platform_device *pdev) { - const unsigned int *expected_ver; struct adi_axi_adc_state *st; void __iomem *base; unsigned int ver; @@ -370,8 +378,8 @@ static int adi_axi_adc_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(st->regmap), "failed to init register map\n"); - expected_ver = device_get_match_data(&pdev->dev); - if (!expected_ver) + st->info = device_get_match_data(&pdev->dev); + if (!st->info) return -ENODEV; clk = devm_clk_get_enabled(&pdev->dev, NULL); @@ -391,12 +399,13 @@ static int adi_axi_adc_probe(struct platform_device *pdev) if (ret) return ret; - if (ADI_AXI_PCORE_VER_MAJOR(ver) != ADI_AXI_PCORE_VER_MAJOR(*expected_ver)) { + if (ADI_AXI_PCORE_VER_MAJOR(ver) != + ADI_AXI_PCORE_VER_MAJOR(st->info->version)) { dev_err(&pdev->dev, "Major version mismatch. Expected %d.%.2d.%c, Reported %d.%.2d.%c\n", - ADI_AXI_PCORE_VER_MAJOR(*expected_ver), - ADI_AXI_PCORE_VER_MINOR(*expected_ver), - ADI_AXI_PCORE_VER_PATCH(*expected_ver), + ADI_AXI_PCORE_VER_MAJOR(st->info->version), + ADI_AXI_PCORE_VER_MINOR(st->info->version), + ADI_AXI_PCORE_VER_PATCH(st->info->version), ADI_AXI_PCORE_VER_MAJOR(ver), ADI_AXI_PCORE_VER_MINOR(ver), ADI_AXI_PCORE_VER_PATCH(ver)); @@ -416,11 +425,14 @@ static int adi_axi_adc_probe(struct platform_device *pdev) return 0; } -static unsigned int adi_axi_adc_10_0_a_info = ADI_AXI_PCORE_VER(10, 0, 'a'); +static const struct axi_adc_info adc_generic = { + .version = ADI_AXI_PCORE_VER(10, 0, 'a'), + .backend_info = &adi_axi_adc_generic, +}; /* Match table for of_platform binding */ static const struct of_device_id adi_axi_adc_of_match[] = { - { .compatible = "adi,axi-adc-10.0.a", .data = &adi_axi_adc_10_0_a_info }, + { .compatible = "adi,axi-adc-10.0.a", .data = &adc_generic }, { /* end of list */ } }; MODULE_DEVICE_TABLE(of, adi_axi_adc_of_match); From a4ab57debde24a0ae3e02b70670352d0c49e96b9 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 10 Feb 2025 17:10:55 +0100 Subject: [PATCH 0165/2157] iio: adc: adi-axi-adc: add platform children support This is a preparation for the next commit adding support for register read and write functions on AD7606. Since sometimes a bus will be used, it has been agreed during ad3552's driver implementation that the device's driver bus is the backend, whose device node will be a child node. To provide the special callbacks for setting the register, axi-adc needs to pass them to the child device's driver through platform data. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-5-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606_bus_iface.h | 16 ++++++++ drivers/iio/adc/adi-axi-adc.c | 65 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 drivers/iio/adc/ad7606_bus_iface.h diff --git a/drivers/iio/adc/ad7606_bus_iface.h b/drivers/iio/adc/ad7606_bus_iface.h new file mode 100644 index 000000000000..f2c979a9b7f3 --- /dev/null +++ b/drivers/iio/adc/ad7606_bus_iface.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2010-2024 Analog Devices Inc. + * Copyright (c) 2025 Baylibre, SAS + */ +#ifndef __LINUX_PLATFORM_DATA_AD7606_H__ +#define __LINUX_PLATFORM_DATA_AD7606_H__ + +struct iio_backend; + +struct ad7606_platform_data { + int (*bus_reg_read)(struct iio_backend *back, u32 reg, u32 *val); + int (*bus_reg_write)(struct iio_backend *back, u32 reg, u32 val); +}; + +#endif /* __LINUX_PLATFORM_DATA_AD7606_H__ */ diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 04f7d7d3890b..182ad14e4949 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -334,6 +334,36 @@ static const struct regmap_config axi_adc_regmap_config = { .reg_stride = 4, }; +static void axi_adc_child_remove(void *data) +{ + platform_device_unregister(data); +} + +static int axi_adc_create_platform_device(struct adi_axi_adc_state *st, + struct fwnode_handle *child) +{ + struct platform_device_info pi = { + .parent = st->dev, + .name = fwnode_get_name(child), + .id = PLATFORM_DEVID_AUTO, + .fwnode = child, + .data = st->info->pdata, + .size_data = st->info->pdata_sz, + }; + struct platform_device *pdev; + int ret; + + pdev = platform_device_register_full(&pi); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + ret = devm_add_action_or_reset(st->dev, axi_adc_child_remove, pdev); + if (ret) + return ret; + + return 0; +} + static const struct iio_backend_ops adi_axi_adc_ops = { .enable = axi_adc_enable, .disable = axi_adc_disable, @@ -417,6 +447,28 @@ static int adi_axi_adc_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "failed to register iio backend\n"); + device_for_each_child_node_scoped(&pdev->dev, child) { + int val; + + if (!st->info->has_child_nodes) + return dev_err_probe(&pdev->dev, -EINVAL, + "invalid fdt axi-dac compatible."); + + /* Processing only reg 0 node */ + ret = fwnode_property_read_u32(child, "reg", &val); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "invalid reg property."); + if (val != 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "invalid node address."); + + ret = axi_adc_create_platform_device(st, child); + if (ret) + return dev_err_probe(&pdev->dev, -EINVAL, + "cannot create device."); + } + dev_info(&pdev->dev, "AXI ADC IP core (%d.%.2d.%c) probed\n", ADI_AXI_PCORE_VER_MAJOR(ver), ADI_AXI_PCORE_VER_MINOR(ver), @@ -430,6 +482,19 @@ static const struct axi_adc_info adc_generic = { .backend_info = &adi_axi_adc_generic, }; +static const struct ad7606_platform_data ad7606_pdata = { + .bus_reg_read = ad7606_bus_reg_read, + .bus_reg_write = ad7606_bus_reg_write, +}; + +static const struct axi_adc_info adc_ad7606 = { + .version = ADI_AXI_PCORE_VER(10, 0, 'a'), + .backend_info = &adi_axi_adc_generic, + .pdata = &ad7606_pdata, + .pdata_sz = sizeof(ad7606_pdata), + .has_child_nodes = true, +}; + /* Match table for of_platform binding */ static const struct of_device_id adi_axi_adc_of_match[] = { { .compatible = "adi,axi-adc-10.0.a", .data = &adc_generic }, From 79c47485e438c8ea6e08ed9b6572158500f8c4cd Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:56 +0100 Subject: [PATCH 0166/2157] iio: adc: adi-axi-adc: add support for AD7606 register writing Since we must access the bus parallel bus using a custom procedure, let's add a specialized compatible, and define specialized callbacks for writing the registers using the parallel interface. Signed-off-by: Guillaume Stols Co-developed-by: Angelo Dureghello Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-6-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 182ad14e4949..766406f45396 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -27,6 +27,7 @@ #include #include +#include "ad7606_bus_iface.h" /* * Register definitions: * https://wiki.analog.com/resources/fpga/docs/axi_adc_ip#register_map @@ -73,6 +74,12 @@ #define ADI_AXI_ADC_REG_DELAY(l) (0x0800 + (l) * 0x4) #define AXI_ADC_DELAY_CTRL_MASK GENMASK(4, 0) +#define ADI_AXI_REG_CONFIG_WR 0x0080 +#define ADI_AXI_REG_CONFIG_RD 0x0084 +#define ADI_AXI_REG_CONFIG_CTRL 0x008c +#define ADI_AXI_REG_CONFIG_CTRL_READ 0x03 +#define ADI_AXI_REG_CONFIG_CTRL_WRITE 0x01 + #define ADI_AXI_ADC_MAX_IO_NUM_LANES 15 #define ADI_AXI_REG_CHAN_CTRL_DEFAULTS \ @@ -80,6 +87,10 @@ ADI_AXI_REG_CHAN_CTRL_FMT_EN | \ ADI_AXI_REG_CHAN_CTRL_ENABLE) +#define ADI_AXI_REG_READ_BIT 0x8000 +#define ADI_AXI_REG_ADDRESS_MASK 0xff00 +#define ADI_AXI_REG_VALUE_MASK 0x00ff + struct axi_adc_info { unsigned int version; const struct iio_backend_info *backend_info; @@ -311,6 +322,75 @@ static struct iio_buffer *axi_adc_request_buffer(struct iio_backend *back, return iio_dmaengine_buffer_setup(st->dev, indio_dev, dma_name); } +static int axi_adc_raw_write(struct iio_backend *back, u32 val) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + + regmap_write(st->regmap, ADI_AXI_REG_CONFIG_WR, val); + regmap_write(st->regmap, ADI_AXI_REG_CONFIG_CTRL, + ADI_AXI_REG_CONFIG_CTRL_WRITE); + fsleep(100); + regmap_write(st->regmap, ADI_AXI_REG_CONFIG_CTRL, 0x00); + fsleep(100); + + return 0; +} + +static int axi_adc_raw_read(struct iio_backend *back, u32 *val) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + + regmap_write(st->regmap, ADI_AXI_REG_CONFIG_CTRL, + ADI_AXI_REG_CONFIG_CTRL_READ); + fsleep(100); + regmap_read(st->regmap, ADI_AXI_REG_CONFIG_RD, val); + regmap_write(st->regmap, ADI_AXI_REG_CONFIG_CTRL, 0x00); + fsleep(100); + + return 0; +} + +static int ad7606_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + int addr; + + guard(mutex)(&st->lock); + + /* + * The address is written on the highest weight byte, and the MSB set + * at 1 indicates a read operation. + */ + addr = FIELD_PREP(ADI_AXI_REG_ADDRESS_MASK, reg) | ADI_AXI_REG_READ_BIT; + axi_adc_raw_write(back, addr); + axi_adc_raw_read(back, val); + + /* Write 0x0 on the bus to get back to ADC mode */ + axi_adc_raw_write(back, 0); + + return 0; +} + +static int ad7606_bus_reg_write(struct iio_backend *back, u32 reg, u32 val) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + u32 buf; + + guard(mutex)(&st->lock); + + /* Write any register to switch to register mode */ + axi_adc_raw_write(back, 0xaf00); + + buf = FIELD_PREP(ADI_AXI_REG_ADDRESS_MASK, reg) | + FIELD_PREP(ADI_AXI_REG_VALUE_MASK, val); + axi_adc_raw_write(back, buf); + + /* Write 0x0 on the bus to get back to ADC mode */ + axi_adc_raw_write(back, 0); + + return 0; +} + static void axi_adc_free_buffer(struct iio_backend *back, struct iio_buffer *buffer) { @@ -498,6 +578,7 @@ static const struct axi_adc_info adc_ad7606 = { /* Match table for of_platform binding */ static const struct of_device_id adi_axi_adc_of_match[] = { { .compatible = "adi,axi-adc-10.0.a", .data = &adc_generic }, + { .compatible = "adi,axi-ad7606x", .data = &adc_ad7606 }, { /* end of list */ } }; MODULE_DEVICE_TABLE(of, adi_axi_adc_of_match); From 0f65f59e632d942cccffd12c36036c24eb7037eb Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 10 Feb 2025 17:10:57 +0100 Subject: [PATCH 0167/2157] iio: adc: ad7606: protect register access Protect register (and bus) access from concurrent read / write. Needed in the backend operating mode. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-7-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index b88c3e248b7e..612fe73396d7 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -862,7 +862,12 @@ static int ad7606_write_raw(struct iio_dev *indio_dev, } val = (val * MICRO) + val2; i = find_closest(val, scale_avail_uv, cs->num_scales); + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret < 0) + return ret; ret = st->write_scale(indio_dev, ch, i + cs->reg_offset); + iio_device_release_direct_mode(indio_dev); if (ret < 0) return ret; cs->range = i; @@ -873,7 +878,12 @@ static int ad7606_write_raw(struct iio_dev *indio_dev, return -EINVAL; i = find_closest(val, st->oversampling_avail, st->num_os_ratios); + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret < 0) + return ret; ret = st->write_os(indio_dev, i); + iio_device_release_direct_mode(indio_dev); if (ret < 0) return ret; st->oversampling = st->oversampling_avail[i]; From 5efb0a3cc6c8643a7f76409d92ea11b42f576234 Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:58 +0100 Subject: [PATCH 0168/2157] iio: adc: ad7606: change channel macros parameters Add the possibility to pass the *_available parameters to the main macro. This is a preparation to add the new channels for software mode and hardware mode in iio backend mode more easily. Signed-off-by: Guillaume Stols Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-8-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.h | 51 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index 7a044b499cfe..a35b526f3915 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -40,14 +40,19 @@ #define AD7606_RANGE_CH_ADDR(ch) (0x03 + ((ch) >> 1)) #define AD7606_OS_MODE 0x08 -#define AD760X_CHANNEL(num, mask_sep, mask_type, mask_all, bits) { \ +#define AD760X_CHANNEL(num, mask_sep, mask_type, mask_all, \ + mask_sep_avail, mask_all_avail, bits) { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ .channel = num, \ .address = num, \ .info_mask_separate = mask_sep, \ + .info_mask_separate_available = \ + mask_sep_avail, \ .info_mask_shared_by_type = mask_type, \ .info_mask_shared_by_all = mask_all, \ + .info_mask_shared_by_all_available = \ + mask_all_avail, \ .scan_index = num, \ .scan_type = { \ .sign = 's', \ @@ -57,37 +62,30 @@ }, \ } -#define AD7606_SW_CHANNEL(num, bits) { \ - .type = IIO_VOLTAGE, \ - .indexed = 1, \ - .channel = num, \ - .address = num, \ - .info_mask_separate = \ - BIT(IIO_CHAN_INFO_RAW) | \ - BIT(IIO_CHAN_INFO_SCALE), \ - .info_mask_separate_available = \ - BIT(IIO_CHAN_INFO_SCALE), \ - .info_mask_shared_by_all = \ - BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ - .info_mask_shared_by_all_available = \ - BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ - .scan_index = num, \ - .scan_type = { \ - .sign = 's', \ - .realbits = (bits), \ - .storagebits = (bits) > 16 ? 32 : 16, \ - .endianness = IIO_CPU, \ - }, \ -} +#define AD7606_SW_CHANNEL(num, bits) \ + AD760X_CHANNEL(num, \ + /* mask separate */ \ + BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ + /* mask type */ \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + /* mask all */ \ + 0, \ + /* mask separate available */ \ + BIT(IIO_CHAN_INFO_SCALE), \ + /* mask all available */ \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + bits) #define AD7605_CHANNEL(num) \ AD760X_CHANNEL(num, BIT(IIO_CHAN_INFO_RAW), \ - BIT(IIO_CHAN_INFO_SCALE), 0, 16) + BIT(IIO_CHAN_INFO_SCALE), 0, 0, 0, 16) #define AD7606_CHANNEL(num, bits) \ AD760X_CHANNEL(num, BIT(IIO_CHAN_INFO_RAW), \ BIT(IIO_CHAN_INFO_SCALE), \ - BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), bits) + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + 0, 0, bits) #define AD7616_CHANNEL(num) AD7606_SW_CHANNEL(num, 16) @@ -95,7 +93,8 @@ AD760X_CHANNEL(num, 0, \ BIT(IIO_CHAN_INFO_SCALE), \ BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ - BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), 16) + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + 0, 0, 16) struct ad7606_state; From ac856912f210bcff6a1cf8cf9cb2f6a1dfe85798 Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Mon, 10 Feb 2025 17:10:59 +0100 Subject: [PATCH 0169/2157] iio: adc: ad7606: add support for writing registers when using backend Add the logic for effectively enabling the software mode for the iio-backend, i.e. enabling the software mode channel configuration and implementing the register writing functions. Signed-off-by: Guillaume Stols Co-developed-by: Angelo Dureghello Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250210-wip-bl-ad7606_add_backend_sw_mode-v4-9-160df18b1da7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.h | 15 +++++++++++ drivers/iio/adc/ad7606_par.c | 52 +++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index a35b526f3915..71a30525eaab 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -96,6 +96,21 @@ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ 0, 0, 16) +#define AD7606_BI_SW_CHANNEL(num) \ + AD760X_CHANNEL(num, \ + /* mask separate */ \ + BIT(IIO_CHAN_INFO_SCALE), \ + /* mask type */ \ + 0, \ + /* mask all */ \ + BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + /* mask separate available */ \ + BIT(IIO_CHAN_INFO_SCALE), \ + /* mask all available */ \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + 16) + struct ad7606_state; typedef int (*ad7606_scale_setup_cb_t)(struct iio_dev *indio_dev, diff --git a/drivers/iio/adc/ad7606_par.c b/drivers/iio/adc/ad7606_par.c index 64733b607aa8..335fb481bfde 100644 --- a/drivers/iio/adc/ad7606_par.c +++ b/drivers/iio/adc/ad7606_par.c @@ -19,6 +19,7 @@ #include #include "ad7606.h" +#include "ad7606_bus_iface.h" static const struct iio_chan_spec ad7606b_bi_channels[] = { AD7606_BI_CHANNEL(0), @@ -31,7 +32,19 @@ static const struct iio_chan_spec ad7606b_bi_channels[] = { AD7606_BI_CHANNEL(7), }; -static int ad7606_bi_update_scan_mode(struct iio_dev *indio_dev, const unsigned long *scan_mask) +static const struct iio_chan_spec ad7606b_bi_sw_channels[] = { + AD7606_BI_SW_CHANNEL(0), + AD7606_BI_SW_CHANNEL(1), + AD7606_BI_SW_CHANNEL(2), + AD7606_BI_SW_CHANNEL(3), + AD7606_BI_SW_CHANNEL(4), + AD7606_BI_SW_CHANNEL(5), + AD7606_BI_SW_CHANNEL(6), + AD7606_BI_SW_CHANNEL(7), +}; + +static int ad7606_par_bus_update_scan_mode(struct iio_dev *indio_dev, + const unsigned long *scan_mask) { struct ad7606_state *st = iio_priv(indio_dev); unsigned int c, ret; @@ -48,7 +61,8 @@ static int ad7606_bi_update_scan_mode(struct iio_dev *indio_dev, const unsigned return 0; } -static int ad7606_bi_setup_iio_backend(struct device *dev, struct iio_dev *indio_dev) +static int ad7606_par_bus_setup_iio_backend(struct device *dev, + struct iio_dev *indio_dev) { struct ad7606_state *st = iio_priv(indio_dev); unsigned int ret, c; @@ -86,9 +100,39 @@ static int ad7606_bi_setup_iio_backend(struct device *dev, struct iio_dev *indio return 0; } +static int ad7606_par_bus_reg_read(struct ad7606_state *st, unsigned int addr) +{ + struct ad7606_platform_data *pdata = st->dev->platform_data; + int val, ret; + + ret = pdata->bus_reg_read(st->back, addr, &val); + if (ret) + return ret; + + return val; +} + +static int ad7606_par_bus_reg_write(struct ad7606_state *st, unsigned int addr, + unsigned int val) +{ + struct ad7606_platform_data *pdata = st->dev->platform_data; + + return pdata->bus_reg_write(st->back, addr, val); +} + +static int ad7606_par_bus_sw_mode_config(struct iio_dev *indio_dev) +{ + indio_dev->channels = ad7606b_bi_sw_channels; + + return 0; +} + static const struct ad7606_bus_ops ad7606_bi_bops = { - .iio_backend_config = ad7606_bi_setup_iio_backend, - .update_scan_mode = ad7606_bi_update_scan_mode, + .iio_backend_config = ad7606_par_bus_setup_iio_backend, + .update_scan_mode = ad7606_par_bus_update_scan_mode, + .reg_read = ad7606_par_bus_reg_read, + .reg_write = ad7606_par_bus_reg_write, + .sw_mode_config = ad7606_par_bus_sw_mode_config, }; static int ad7606_par16_read_block(struct device *dev, From c6250d0eab82da7af78e8d010360c91f33cbc242 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 13:45:04 +0200 Subject: [PATCH 0170/2157] power: ip5xxx_power: Make use of i2c_get_match_data() Get matching data in one step by switching to use i2c_get_match_data(). Acked-by: Sebastian Reichel Signed-off-by: Andy Shevchenko Reviewed-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang --- drivers/power/supply/ip5xxx_power.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/ip5xxx_power.c b/drivers/power/supply/ip5xxx_power.c index c448e0ac0dfa..a293b2765771 100644 --- a/drivers/power/supply/ip5xxx_power.c +++ b/drivers/power/supply/ip5xxx_power.c @@ -828,10 +828,9 @@ static void ip5xxx_setup_regs(struct device *dev, struct ip5xxx *ip5xxx, static int ip5xxx_power_probe(struct i2c_client *client) { - const struct ip5xxx_regfield_config *fields = &ip51xx_fields; + const struct ip5xxx_regfield_config *fields; struct power_supply_config psy_cfg = {}; struct device *dev = &client->dev; - const struct of_device_id *of_id; struct power_supply *psy; struct ip5xxx *ip5xxx; @@ -843,9 +842,7 @@ static int ip5xxx_power_probe(struct i2c_client *client) if (IS_ERR(ip5xxx->regmap)) return PTR_ERR(ip5xxx->regmap); - of_id = i2c_of_match_device(dev->driver->of_match_table, client); - if (of_id) - fields = (const struct ip5xxx_regfield_config *)of_id->data; + fields = i2c_get_match_data(client) ?: &ip51xx_fields; ip5xxx_setup_regs(dev, ip5xxx, fields); psy_cfg.of_node = dev->of_node; From fbc54ae4f8d7cef3b11f70945bf8df8f952814b6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 13:45:05 +0200 Subject: [PATCH 0171/2157] i2c: Unexport i2c_of_match_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i2c_of_match_device() is not used anymore outside of I²C framework, unexport it. Signed-off-by: Andy Shevchenko Reviewed-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-of.c | 1 - drivers/i2c/i2c-core.h | 9 +++++++++ include/linux/i2c.h | 11 ----------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index a6c407d36800..02feee6c9ba9 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -157,7 +157,6 @@ const struct of_device_id return i2c_of_match_device_sysfs(matches, client); } -EXPORT_SYMBOL_GPL(i2c_of_match_device); #if IS_ENABLED(CONFIG_OF_DYNAMIC) static int of_i2c_notify(struct notifier_block *nb, unsigned long action, diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h index 36587f38dff3..4797ba88331c 100644 --- a/drivers/i2c/i2c-core.h +++ b/drivers/i2c/i2c-core.h @@ -84,8 +84,17 @@ static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter) { #ifdef CONFIG_OF void of_i2c_register_devices(struct i2c_adapter *adap); +const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, + struct i2c_client *client); + #else static inline void of_i2c_register_devices(struct i2c_adapter *adap) { } +static inline +const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, + struct i2c_client *client) +{ + return NULL; +} #endif extern struct notifier_block i2c_of_notifier; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 2b2af24d2a43..997e80649889 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1029,10 +1029,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); } -const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client); - int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info); @@ -1053,13 +1049,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return NULL; } -static inline const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client) -{ - return NULL; -} - static inline int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info) From e738d77f78b3ac085dfb51be414e93464abba7ec Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 5 Feb 2025 15:42:31 +0800 Subject: [PATCH 0172/2157] soundwire: cadence_master: set frame shape and divider based on actual clk freq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frame shape and curr_dr_freq could be updated by sdw_compute_bus_params(). Peripherals will set curr_dr_freq as their frequency. Managers should do the same. Then update frame shape according to the actual bus frequency. Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20250205074232.87537-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index f367670ea991..68be8ff3f02b 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1341,7 +1341,7 @@ static u32 cdns_set_initial_frame_shape(int n_rows, int n_cols) return val; } -static void cdns_init_clock_ctrl(struct sdw_cdns *cdns) +static int cdns_init_clock_ctrl(struct sdw_cdns *cdns) { struct sdw_bus *bus = &cdns->bus; struct sdw_master_prop *prop = &bus->prop; @@ -1355,14 +1355,25 @@ static void cdns_init_clock_ctrl(struct sdw_cdns *cdns) prop->default_row, prop->default_col); + if (!prop->default_frame_rate || !prop->default_row) { + dev_err(cdns->dev, "Default frame_rate %d or row %d is invalid\n", + prop->default_frame_rate, prop->default_row); + return -EINVAL; + } + /* Set clock divider */ - divider = (prop->mclk_freq / prop->max_clk_freq) - 1; + divider = (prop->mclk_freq * SDW_DOUBLE_RATE_FACTOR / + bus->params.curr_dr_freq) - 1; cdns_updatel(cdns, CDNS_MCP_CLK_CTRL0, CDNS_MCP_CLK_MCLKD_MASK, divider); cdns_updatel(cdns, CDNS_MCP_CLK_CTRL1, CDNS_MCP_CLK_MCLKD_MASK, divider); + /* Set frame shape base on the actual bus frequency. */ + prop->default_col = bus->params.curr_dr_freq / + prop->default_frame_rate / prop->default_row; + /* * Frame shape changes after initialization have to be done * with the bank switch mechanism @@ -1375,6 +1386,8 @@ static void cdns_init_clock_ctrl(struct sdw_cdns *cdns) ssp_interval = prop->default_frame_rate / SDW_CADENCE_GSYNC_HZ; cdns_writel(cdns, CDNS_MCP_SSP_CTRL0, ssp_interval); cdns_writel(cdns, CDNS_MCP_SSP_CTRL1, ssp_interval); + + return 0; } /** @@ -1408,9 +1421,12 @@ EXPORT_SYMBOL(sdw_cdns_soft_reset); */ int sdw_cdns_init(struct sdw_cdns *cdns) { + int ret; u32 val; - cdns_init_clock_ctrl(cdns); + ret = cdns_init_clock_ctrl(cdns); + if (ret) + return ret; sdw_cdns_check_self_clearing_bits(cdns, __func__, false, 0); From d38ea972da679f223ae1e761080e37dd8b582bac Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 5 Feb 2025 15:42:32 +0800 Subject: [PATCH 0173/2157] soundwire: Revert "soundwire: intel_auxdevice: start the bus at default frequency" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now, we can support more than 1 soundwire bus clock frequency. This reverts commit c326356188f1 ("soundwire: intel_auxdevice: start the bus at default frequency") Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20250205074232.87537-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel_auxdevice.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 599954d92752..dee126f6d9d5 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -222,30 +222,9 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) static int intel_prop_read(struct sdw_bus *bus) { - struct sdw_master_prop *prop; - /* Initialize with default handler to read all DisCo properties */ sdw_master_read_prop(bus); - /* - * Only one bus frequency is supported so far, filter - * frequencies reported in the DSDT - */ - prop = &bus->prop; - if (prop->clk_freq && prop->num_clk_freq > 1) { - unsigned int default_bus_frequency; - - default_bus_frequency = - prop->default_frame_rate * - prop->default_row * - prop->default_col / - SDW_DOUBLE_RATE_FACTOR; - - prop->num_clk_freq = 1; - prop->clk_freq[0] = default_bus_frequency; - prop->max_clk_freq = default_bus_frequency; - } - /* read Intel-specific properties */ sdw_master_read_intel_prop(bus); From dcc48a73eae7f791b1a6856ea1bcc4079282c88d Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:36 +0530 Subject: [PATCH 0174/2157] soundwire: amd: change the soundwire wake enable/disable sequence During runtime suspend scenario, SoundWire wake should be enabled and during system level suspend scenario SoundWire wake should be disabled. Implement the SoundWire wake enable/disable sequence as per design flow for SoundWire poweroff mode. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 5a54b10daf77..9d8062378724 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -1139,6 +1139,7 @@ static int __maybe_unused amd_suspend(struct device *dev) amd_sdw_wake_enable(amd_manager, false); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { + amd_sdw_wake_enable(amd_manager, false); /* * As per hardware programming sequence on AMD platforms, * clock stop should be invoked first before powering-off @@ -1166,6 +1167,7 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) amd_sdw_wake_enable(amd_manager, true); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { + amd_sdw_wake_enable(amd_manager, true); ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; From 19427c08b818c65f579cbfc78062e1ff4c37c768 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:37 +0530 Subject: [PATCH 0175/2157] soundwire: amd: add debug log for soundwire wake event Add debug log in amd_sdw_process_wake_event() function. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-3-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 9d8062378724..7bedcec6dc08 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -849,6 +849,7 @@ static void amd_sdw_update_slave_status(u32 status_change_0to7, u32 status_chang static void amd_sdw_process_wake_event(struct amd_sdw_manager *amd_manager) { + dev_dbg(amd_manager->dev, "SoundWire Wake event reported\n"); pm_request_resume(amd_manager->dev); writel(0x00, amd_manager->acp_mmio + ACP_SW_WAKE_EN(amd_manager->instance)); writel(0x00, amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_8TO11); From 2c0ae8ef1e5edfd0e42727fba4617694f3aac2eb Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:38 +0530 Subject: [PATCH 0176/2157] soundwire: amd: add support for ACP7.0 & ACP7.1 platforms Add SoundWire support for ACP7.0 and ACP7.1 platforms. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-4-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 21 +++++++++++++++++++++ drivers/soundwire/amd_manager.h | 18 ++++++++++++++++++ include/linux/soundwire/sdw_amd.h | 2 ++ 3 files changed, 41 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 7bedcec6dc08..97164d6edbe0 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -446,6 +446,10 @@ static int amd_sdw_port_params(struct sdw_bus *bus, struct sdw_port_params *p_pa return -EINVAL; } break; + case ACP70_PCI_REV_ID: + case ACP71_PCI_REV_ID: + frame_fmt_reg = acp70_sdw_dp_reg[p_params->num].frame_fmt_reg; + break; default: return -EINVAL; } @@ -494,6 +498,14 @@ static int amd_sdw_transport_params(struct sdw_bus *bus, return -EINVAL; } break; + case ACP70_PCI_REV_ID: + case ACP71_PCI_REV_ID: + frame_fmt_reg = acp70_sdw_dp_reg[params->port_num].frame_fmt_reg; + sample_int_reg = acp70_sdw_dp_reg[params->port_num].sample_int_reg; + hctrl_dp0_reg = acp70_sdw_dp_reg[params->port_num].hctrl_dp0_reg; + offset_reg = acp70_sdw_dp_reg[params->port_num].offset_reg; + lane_ctrl_ch_en_reg = acp70_sdw_dp_reg[params->port_num].lane_ctrl_ch_en_reg; + break; default: return -EINVAL; } @@ -549,6 +561,10 @@ static int amd_sdw_port_enable(struct sdw_bus *bus, return -EINVAL; } break; + case ACP70_PCI_REV_ID: + case ACP71_PCI_REV_ID: + lane_ctrl_ch_en_reg = acp70_sdw_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; + break; default: return -EINVAL; } @@ -966,6 +982,11 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) return -EINVAL; } break; + case ACP70_PCI_REV_ID: + case ACP71_PCI_REV_ID: + amd_manager->num_dout_ports = AMD_ACP70_SDW_MAX_TX_PORTS; + amd_manager->num_din_ports = AMD_ACP70_SDW_MAX_RX_PORTS; + break; default: return -EINVAL; } diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index cc2170e4521e..30244a31c21c 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -159,8 +159,11 @@ #define AMD_ACP63_SDW0_MAX_RX_PORTS 3 #define AMD_ACP63_SDW1_MAX_TX_PORTS 1 #define AMD_ACP63_SDW1_MAX_RX_PORTS 1 +#define AMD_ACP70_SDW_MAX_TX_PORTS 3 +#define AMD_ACP70_SDW_MAX_RX_PORTS 3 #define AMD_ACP63_SDW0_MAX_DAI 6 #define AMD_ACP63_SDW1_MAX_DAI 2 +#define AMD_ACP70_SDW_MAX_DAI 6 #define AMD_SDW_SLAVE_0_ATTACHED 5 #define AMD_SDW_SSP_COUNTER_VAL 3 @@ -244,6 +247,21 @@ static struct sdw_manager_dp_reg acp63_sdw1_dp_reg[AMD_ACP63_SDW1_MAX_DAI] = { ACP_SW_AUDIO1_RX_OFFSET, ACP_SW_AUDIO1_RX_CHANNEL_ENABLE_DP0} }; +static struct sdw_manager_dp_reg acp70_sdw_dp_reg[AMD_ACP70_SDW_MAX_DAI] = { + {ACP_SW_AUDIO0_TX_FRAME_FORMAT, ACP_SW_AUDIO0_TX_SAMPLEINTERVAL, ACP_SW_AUDIO0_TX_HCTRL_DP0, + ACP_SW_AUDIO0_TX_OFFSET_DP0, ACP_SW_AUDIO0_TX_CHANNEL_ENABLE_DP0}, + {ACP_SW_AUDIO1_TX_FRAME_FORMAT, ACP_SW_AUDIO1_TX_SAMPLEINTERVAL, ACP_SW_AUDIO1_TX_HCTRL, + ACP_SW_AUDIO1_TX_OFFSET, ACP_SW_AUDIO1_TX_CHANNEL_ENABLE_DP0}, + {ACP_SW_AUDIO2_TX_FRAME_FORMAT, ACP_SW_AUDIO2_TX_SAMPLEINTERVAL, ACP_SW_AUDIO2_TX_HCTRL, + ACP_SW_AUDIO2_TX_OFFSET, ACP_SW_AUDIO2_TX_CHANNEL_ENABLE_DP0}, + {ACP_SW_AUDIO0_RX_FRAME_FORMAT, ACP_SW_AUDIO0_RX_SAMPLEINTERVAL, ACP_SW_AUDIO0_RX_HCTRL_DP0, + ACP_SW_AUDIO0_RX_OFFSET_DP0, ACP_SW_AUDIO0_RX_CHANNEL_ENABLE_DP0}, + {ACP_SW_AUDIO1_RX_FRAME_FORMAT, ACP_SW_AUDIO1_RX_SAMPLEINTERVAL, ACP_SW_AUDIO1_RX_HCTRL, + ACP_SW_AUDIO1_RX_OFFSET, ACP_SW_AUDIO1_RX_CHANNEL_ENABLE_DP0}, + {ACP_SW_AUDIO2_RX_FRAME_FORMAT, ACP_SW_AUDIO2_RX_SAMPLEINTERVAL, ACP_SW_AUDIO2_RX_HCTRL, + ACP_SW_AUDIO2_RX_OFFSET, ACP_SW_AUDIO2_RX_CHANNEL_ENABLE_DP0}, +}; + static u32 sdw_manager_reg_mask_array[AMD_SDW_MAX_MANAGER_COUNT] = { AMD_SDW0_EXT_INTR_MASK, AMD_SDW1_EXT_INTR_MASK diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 799f8578137b..6b839987f14c 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -28,6 +28,8 @@ #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 #define ACP63_PCI_REV_ID 0x63 +#define ACP70_PCI_REV_ID 0x70 +#define ACP71_PCI_REV_ID 0x71 struct acp_sdw_pdata { u16 instance; From 829c3e1cb4a3f60c5cef11963052009ea50d2941 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:39 +0530 Subject: [PATCH 0177/2157] soundwire: amd: set device power state during suspend/resume sequence Set SoundWire manager device power state during suspend and resume sequence for ACP7.0 & ACP7.1 platforms. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-5-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 58 ++++++++++++++++++++++++++++++--- drivers/soundwire/amd_manager.h | 5 +++ 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 97164d6edbe0..50d7b10b0581 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -143,6 +143,29 @@ static void amd_sdw_wake_enable(struct amd_sdw_manager *amd_manager, bool enable writel(wake_ctrl, amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11); } +static int amd_sdw_set_device_state(struct amd_sdw_manager *amd_manager, u32 target_device_state) +{ + u32 sdw_dev_state; + + sdw_dev_state = readl(amd_manager->acp_mmio + AMD_SDW_DEVICE_STATE); + switch (amd_manager->instance) { + case ACP_SDW0: + u32p_replace_bits(&sdw_dev_state, target_device_state, + AMD_SDW0_DEVICE_STATE_MASK); + break; + case ACP_SDW1: + u32p_replace_bits(&sdw_dev_state, target_device_state, + AMD_SDW1_DEVICE_STATE_MASK); + break; + default: + return -EINVAL; + } + writel(sdw_dev_state, amd_manager->acp_mmio + AMD_SDW_DEVICE_STATE); + sdw_dev_state = readl(amd_manager->acp_mmio + AMD_SDW_DEVICE_STATE); + dev_dbg(amd_manager->dev, "AMD_SDW_DEVICE_STATE:0x%x\n", sdw_dev_state); + return 0; +} + static void amd_sdw_ctl_word_prep(u32 *lower_word, u32 *upper_word, struct sdw_msg *msg, int cmd_offset) { @@ -1159,7 +1182,9 @@ static int __maybe_unused amd_suspend(struct device *dev) if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { amd_sdw_wake_enable(amd_manager, false); - return amd_sdw_clock_stop(amd_manager); + ret = amd_sdw_clock_stop(amd_manager); + if (ret) + return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { amd_sdw_wake_enable(amd_manager, false); /* @@ -1169,7 +1194,14 @@ static int __maybe_unused amd_suspend(struct device *dev) ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; - return amd_deinit_sdw_manager(amd_manager); + ret = amd_deinit_sdw_manager(amd_manager); + if (ret) + return ret; + } + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_set_device_state(amd_manager, AMD_SDW_DEVICE_STATE_D3); + if (ret) + return ret; } return 0; } @@ -1187,13 +1219,22 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { amd_sdw_wake_enable(amd_manager, true); - return amd_sdw_clock_stop(amd_manager); + ret = amd_sdw_clock_stop(amd_manager); + if (ret) + return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { amd_sdw_wake_enable(amd_manager, true); ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; - return amd_deinit_sdw_manager(amd_manager); + ret = amd_deinit_sdw_manager(amd_manager); + if (ret) + return ret; + } + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_set_device_state(amd_manager, AMD_SDW_DEVICE_STATE_D3); + if (ret) + return ret; } return 0; } @@ -1212,7 +1253,9 @@ static int __maybe_unused amd_resume_runtime(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { - return amd_sdw_clock_stop_exit(amd_manager); + ret = amd_sdw_clock_stop_exit(amd_manager); + if (ret) + return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { writel(0x00, amd_manager->acp_mmio + ACP_SW_WAKE_EN(amd_manager->instance)); val = readl(amd_manager->mmio + ACP_SW_CLK_RESUME_CTRL); @@ -1235,6 +1278,11 @@ static int __maybe_unused amd_resume_runtime(struct device *dev) return ret; amd_sdw_set_frameshape(amd_manager); } + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_set_device_state(amd_manager, AMD_SDW_DEVICE_STATE_D0); + if (ret) + return ret; + } return 0; } diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 30244a31c21c..8430f279d88e 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -194,6 +194,11 @@ #define AMD_SDW_CLK_RESUME_DONE 3 #define AMD_SDW_WAKE_STAT_MASK BIT(16) #define AMD_SDW_WAKE_INTR_MASK BIT(16) +#define AMD_SDW_DEVICE_STATE 0x1430 +#define AMD_SDW0_DEVICE_STATE_MASK GENMASK(1, 0) +#define AMD_SDW1_DEVICE_STATE_MASK GENMASK(3, 2) +#define AMD_SDW_DEVICE_STATE_D0 0 +#define AMD_SDW_DEVICE_STATE_D3 3 static u32 amd_sdw_freq_tbl[AMD_SDW_MAX_FREQ_NUM] = { AMD_SDW_DEFAULT_CLK_FREQ, From 5818ed3636b3c63eddef299223c7369de86eefee Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:40 +0530 Subject: [PATCH 0178/2157] soundwire: amd: set ACP_PME_EN during runtime suspend sequence Set ACP_PME_EN to 1 during runtime suspend sequence as per design flow for ACP7.0 & ACP7.1 platforms. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-6-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 9 +++++++++ drivers/soundwire/amd_manager.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 50d7b10b0581..0fce876dcb42 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -1211,6 +1211,7 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) struct amd_sdw_manager *amd_manager = dev_get_drvdata(dev); struct sdw_bus *bus = &amd_manager->bus; int ret; + u32 val; if (bus->prop.hw_disabled) { dev_dbg(bus->dev, "SoundWire manager %d is disabled,\n", @@ -1235,6 +1236,14 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) ret = amd_sdw_set_device_state(amd_manager, AMD_SDW_DEVICE_STATE_D3); if (ret) return ret; + if (amd_manager->wake_en_mask) { + val = readl(amd_manager->acp_mmio + ACP_PME_EN); + if (!val) { + writel(1, amd_manager->acp_mmio + ACP_PME_EN); + val = readl(amd_manager->acp_mmio + ACP_PME_EN); + dev_dbg(amd_manager->dev, "ACP_PME_EN:0x%x\n", val); + } + } } return 0; } diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 8430f279d88e..1d5e94371f81 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -199,6 +199,7 @@ #define AMD_SDW1_DEVICE_STATE_MASK GENMASK(3, 2) #define AMD_SDW_DEVICE_STATE_D0 0 #define AMD_SDW_DEVICE_STATE_D3 3 +#define ACP_PME_EN 0x0001400 static u32 amd_sdw_freq_tbl[AMD_SDW_MAX_FREQ_NUM] = { AMD_SDW_DEFAULT_CLK_FREQ, From 3df75289ddc28b46121d51d2812943b78676497b Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 7 Feb 2025 12:28:41 +0530 Subject: [PATCH 0179/2157] soundwire: amd: add soundwire host wake interrupt enable/disable sequence For wake event, SoundWire host wake interrupt will be asserted based on below pre-conditions for ACP7.0 & ACP7.1 platforms. - ACP device should be in D0 state. - SoundWire manager instance should be in D3 state. - SoundWire manager device state should be set to D3. - ACP_PME_EN should be set to 1. Implement code changes to enable/disable SoundWire host wake interrupt mask during suspend and resume as per design flow for ACP7.0 & ACP7.1 platforms. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20250207065841.4718-7-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 58 +++++++++++++++++++++++++++++++++ drivers/soundwire/amd_manager.h | 2 ++ 2 files changed, 60 insertions(+) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 0fce876dcb42..dcf85f94950a 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -166,6 +166,34 @@ static int amd_sdw_set_device_state(struct amd_sdw_manager *amd_manager, u32 tar return 0; } +static int amd_sdw_host_wake_enable(struct amd_sdw_manager *amd_manager, bool enable) +{ + u32 intr_cntl1; + u32 sdw_host_wake_irq_mask; + + if (!amd_manager->wake_en_mask) + return 0; + + switch (amd_manager->instance) { + case ACP_SDW0: + sdw_host_wake_irq_mask = AMD_SDW0_HOST_WAKE_INTR_MASK; + break; + case ACP_SDW1: + sdw_host_wake_irq_mask = AMD_SDW1_HOST_WAKE_INTR_MASK; + break; + default: + return -EINVAL; + } + + intr_cntl1 = readl(amd_manager->acp_mmio + ACP_EXTERNAL_INTR_CNTL(ACP_SDW1)); + if (enable) + intr_cntl1 |= sdw_host_wake_irq_mask; + else + intr_cntl1 &= ~sdw_host_wake_irq_mask; + writel(intr_cntl1, amd_manager->acp_mmio + ACP_EXTERNAL_INTR_CNTL(ACP_SDW1)); + return 0; +} + static void amd_sdw_ctl_word_prep(u32 *lower_word, u32 *upper_word, struct sdw_msg *msg, int cmd_offset) { @@ -1182,11 +1210,21 @@ static int __maybe_unused amd_suspend(struct device *dev) if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { amd_sdw_wake_enable(amd_manager, false); + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, false); + if (ret) + return ret; + } ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { amd_sdw_wake_enable(amd_manager, false); + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, false); + if (ret) + return ret; + } /* * As per hardware programming sequence on AMD platforms, * clock stop should be invoked first before powering-off @@ -1220,11 +1258,21 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { amd_sdw_wake_enable(amd_manager, true); + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, true); + if (ret) + return ret; + } ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { amd_sdw_wake_enable(amd_manager, true); + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, true); + if (ret) + return ret; + } ret = amd_sdw_clock_stop(amd_manager); if (ret) return ret; @@ -1265,8 +1313,18 @@ static int __maybe_unused amd_resume_runtime(struct device *dev) ret = amd_sdw_clock_stop_exit(amd_manager); if (ret) return ret; + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, false); + if (ret) + return ret; + } } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { writel(0x00, amd_manager->acp_mmio + ACP_SW_WAKE_EN(amd_manager->instance)); + if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { + ret = amd_sdw_host_wake_enable(amd_manager, false); + if (ret) + return ret; + } val = readl(amd_manager->mmio + ACP_SW_CLK_RESUME_CTRL); if (val) { val |= AMD_SDW_CLK_RESUME_REQ; diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 1d5e94371f81..6cc916b0c820 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -194,6 +194,8 @@ #define AMD_SDW_CLK_RESUME_DONE 3 #define AMD_SDW_WAKE_STAT_MASK BIT(16) #define AMD_SDW_WAKE_INTR_MASK BIT(16) +#define AMD_SDW0_HOST_WAKE_INTR_MASK BIT(22) +#define AMD_SDW1_HOST_WAKE_INTR_MASK BIT(23) #define AMD_SDW_DEVICE_STATE 0x1430 #define AMD_SDW0_DEVICE_STATE_MASK GENMASK(1, 0) #define AMD_SDW1_DEVICE_STATE_MASK GENMASK(3, 2) From 836c8a2edb96d78de6f00b60d6d8e24f2ea87e57 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 21:07:26 +0100 Subject: [PATCH 0180/2157] soundwire: Use str_enable_disable-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20250114200726.969501-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 5 +++-- drivers/soundwire/debugfs.c | 3 ++- drivers/soundwire/stream.c | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 9b295fc9acd5..25120aa3bd67 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "bus.h" #include "irq.h" #include "sysfs_local.h" @@ -277,7 +278,7 @@ static int sdw_transfer_unlocked(struct sdw_bus *bus, struct sdw_msg *msg) if (ret != 0 && ret != -ENODATA) dev_err(bus->dev, "trf on Slave %d failed:%d %s addr %x count %d\n", msg->dev_num, ret, - (msg->flags & SDW_MSG_FLAG_WRITE) ? "write" : "read", + str_write_read(msg->flags & SDW_MSG_FLAG_WRITE), msg->addr, msg->len); return ret; @@ -1263,7 +1264,7 @@ int sdw_configure_dpn_intr(struct sdw_slave *slave, if (slave->bus->params.s_data_mode != SDW_PORT_DATA_MODE_NORMAL) { dev_dbg(&slave->dev, "TEST FAIL interrupt %s\n", - enable ? "on" : "off"); + str_on_off(enable)); mask |= SDW_DPN_INT_TEST_FAIL; } diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index c30f571934ee..5bf0d9552433 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "bus.h" static struct dentry *sdw_debugfs_root; @@ -153,7 +154,7 @@ static int set_command(void *data, u64 value) /* Userspace changed the hardware state behind the kernel's back */ add_taint(TAINT_USER, LOCKDEP_STILL_OK); - dev_dbg(&slave->dev, "command: %s\n", value ? "read" : "write"); + dev_dbg(&slave->dev, "command: %s\n", str_read_write(value)); cmd = value; return 0; diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index e9df503332bb..dd8de88d8b46 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "bus.h" @@ -358,7 +359,7 @@ static int sdw_enable_disable_master_ports(struct sdw_master_runtime *m_rt, } else { dev_err(bus->dev, "dpn_port_enable_ch not supported, %s failed\n", - en ? "enable" : "disable"); + str_enable_disable(en)); return -EINVAL; } From aac2f8363f773ae1f65aab140e06e2084ac6b787 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 5 Dec 2024 12:48:44 +0900 Subject: [PATCH 0181/2157] soundwire: slave: fix an OF node reference leak in soundwire slave device When initializing a soundwire slave device, an OF node is stored to the device with refcount incremented. However, the refcount is not decremented in .release(), thus call of_node_put() in sdw_slave_release(). Fixes: a2e484585ad3 ("soundwire: core: add device tree support for slave devices") Signed-off-by: Joe Hattori Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241205034844.2784964-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Vinod Koul --- drivers/soundwire/slave.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index 4869b073b11c..d2d99555ec5a 100644 --- a/drivers/soundwire/slave.c +++ b/drivers/soundwire/slave.c @@ -13,6 +13,7 @@ static void sdw_slave_release(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); + of_node_put(slave->dev.of_node); mutex_destroy(&slave->sdw_dev_lock); kfree(slave); } From 642b1ed4cd184d5c2e5814c220bb93453492644d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:01 +0000 Subject: [PATCH 0182/2157] dt-bindings: phy: samsung,usb3-drd-phy: add blank lines between DT properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In [1], Rob pointed out that we should really be separating properties with blank lines in between, which is universal style. Only where properties are booleans, empty lines are not required. Do so. Link: https://lore.kernel.org/all/20240711212359.GA3023490-robh@kernel.org/ [1] Reviewed-by: Peter Griffin Acked-by: Rob Herring (Arm) Signed-off-by: André Draszik Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-1-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- .../bindings/phy/samsung,usb3-drd-phy.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index 16321cdd4919..1f8b35917b11 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -83,14 +83,19 @@ properties: pll-supply: description: Power supply for the USB PLL. + dvdd-usb20-supply: description: DVDD power supply for the USB 2.0 phy. + vddh-usb20-supply: description: VDDh power supply for the USB 2.0 phy. + vdd33-usb20-supply: description: 3.3V power supply for the USB 2.0 phy. + vdda-usbdp-supply: description: VDDa power supply for the USB DP phy. + vddh-usbdp-supply: description: VDDh power supply for the USB DP phy. @@ -117,6 +122,7 @@ allOf: - description: Gate of control interface AXI clock - description: Gate of control interface APB clock - description: Gate of SCL APB clock + clock-names: items: - const: phy @@ -124,10 +130,13 @@ allOf: - const: ctrl_aclk - const: ctrl_pclk - const: scl_pclk + reg: minItems: 3 + reg-names: minItems: 3 + required: - reg-names - pll-supply @@ -149,6 +158,7 @@ allOf: clocks: minItems: 5 maxItems: 5 + clock-names: items: - const: phy @@ -156,8 +166,10 @@ allOf: - const: phy_utmi - const: phy_pipe - const: itp + reg: maxItems: 1 + reg-names: maxItems: 1 @@ -174,12 +186,15 @@ allOf: clocks: minItems: 2 maxItems: 2 + clock-names: items: - const: phy - const: ref + reg: maxItems: 1 + reg-names: maxItems: 1 From c38528812c2e9b05fe8b5fd1f66cf4c75835a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:02 +0000 Subject: [PATCH 0183/2157] dt-bindings: phy: samsung,usb3-drd-phy: gs101: require Type-C properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit orientation-switch is the standard declaration to inform the Type-C mux layer that a remote-endpoint is capable of processing orientation change messages. The USB PHY on gs101 needs to be configured based on the orientation of the connector. For that the DTS needs a link between the phy's port and a TCPCi, and we'll need to inform the phy driver that it should handle the orientation (register a handler). Update the schema to enforce that by requiring the orientation-switch and port properties on gs101 (only). We disallow orientation-switch on all other supported platforms, since other versions of this phy (or its system integration) don't currently support or even need it. Even though this new required gs101 property is an ABI break, the intention for the driver is to behave as before if it's missing (meaning for gs101 it will work in SS mode in one orientation only). Other platforms are not affected. Reviewed-by: Peter Griffin Signed-off-by: André Draszik Reviewed-by: Rob Herring (Arm) Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-2-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/samsung,usb3-drd-phy.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index 1f8b35917b11..27295acbba76 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -114,6 +114,8 @@ allOf: contains: const: google,gs101-usb31drd-phy then: + $ref: /schemas/usb/usb-switch.yaml# + properties: clocks: items: @@ -139,6 +141,8 @@ allOf: required: - reg-names + - orientation-switch + - port - pll-supply - dvdd-usb20-supply - vddh-usb20-supply @@ -198,7 +202,7 @@ allOf: reg-names: maxItems: 1 -additionalProperties: false +unevaluatedProperties: false examples: - | From ee064390b82329df7fd8e0c48da03a8fee7d46ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:03 +0000 Subject: [PATCH 0184/2157] phy: exynos5-usbdrd: convert to dev_err_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_err_probe() exists to simplify code. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Peter Griffin Signed-off-by: André Draszik Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-3-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index c421b495eb0f..ceae4b47cece 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1725,10 +1725,9 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) reg_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "samsung,pmu-syscon"); - if (IS_ERR(reg_pmu)) { - dev_err(dev, "Failed to lookup PMU regmap\n"); - return PTR_ERR(reg_pmu); - } + if (IS_ERR(reg_pmu)) + return dev_err_probe(dev, PTR_ERR(reg_pmu), + "Failed to lookup PMU regmap\n"); /* * Exynos5420 SoC has multiple channels for USB 3.0 PHY, with @@ -1759,10 +1758,9 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) for (i = 0; i < EXYNOS5_DRDPHYS_NUM; i++) { struct phy *phy = devm_phy_create(dev, NULL, drv_data->phy_ops); - if (IS_ERR(phy)) { - dev_err(dev, "Failed to create usbdrd_phy phy\n"); - return PTR_ERR(phy); - } + if (IS_ERR(phy)) + return dev_err_probe(dev, PTR_ERR(phy), + "Failed to create usbdrd_phy phy\n"); phy_drd->phys[i].phy = phy; phy_drd->phys[i].index = i; @@ -1786,10 +1784,9 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, exynos5_usbdrd_phy_xlate); - if (IS_ERR(phy_provider)) { - dev_err(phy_drd->dev, "Failed to register phy provider\n"); - return PTR_ERR(phy_provider); - } + if (IS_ERR(phy_provider)) + return dev_err_probe(phy_drd->dev, PTR_ERR(phy_provider), + "Failed to register phy provider\n"); return 0; } From 21860f340ba76ee042e5431ff92537f89bc11476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:04 +0000 Subject: [PATCH 0185/2157] phy: exynos5-usbdrd: fix EDS distribution tuning (gs101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code's intention is to configure lane0 and lane2 tunings, but for lane2 there is a typo and it ends up tuning something else. Fix the typo, as it doesn't appear to make sense to apply different tunings for lane0 vs lane2. The same typo appears to exist in the bootloader, hence we restore the original value in the typo'd registers as well. This can be removed once / if the bootloader is updated. Note that this is incorrect in the downstream driver as well - the values had been copied from there. Reviewed-by: Peter Griffin Tested-by: Peter Griffin Signed-off-by: André Draszik Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-4-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index ceae4b47cece..2a724d362c2d 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1510,8 +1510,11 @@ static const struct exynos5_usbdrd_phy_tuning gs101_tunes_pipe3_preinit[] = { PHY_TUNING_ENTRY_PMA(0x09e0, -1, 0x00), PHY_TUNING_ENTRY_PMA(0x09e4, -1, 0x36), PHY_TUNING_ENTRY_PMA(0x1e7c, -1, 0x06), - PHY_TUNING_ENTRY_PMA(0x1e90, -1, 0x00), - PHY_TUNING_ENTRY_PMA(0x1e94, -1, 0x36), + PHY_TUNING_ENTRY_PMA(0x19e0, -1, 0x00), + PHY_TUNING_ENTRY_PMA(0x19e4, -1, 0x36), + /* fix bootloader bug */ + PHY_TUNING_ENTRY_PMA(0x1e90, -1, 0x02), + PHY_TUNING_ENTRY_PMA(0x1e94, -1, 0x0b), /* improve LVCC */ PHY_TUNING_ENTRY_PMA(0x08f0, -1, 0x30), PHY_TUNING_ENTRY_PMA(0x18f0, -1, 0x30), From 0bccdcb3eea93e087887027ff374dac5c3de36cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:05 +0000 Subject: [PATCH 0186/2157] phy: exynos5-usbdrd: gs101: configure SS lanes based on orientation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USB SS lanes need to be configured based on the connector orientation - at most two lanes will be in use for USB (and the remaining two for alternate modes like DP). For the USB link to come up in SS, the lane configuration registers have to be programmed accordingly. While we still need a way to be notified of the actual connector orientation and then reprogram the registers accordingly (at the moment the configuration happens just once during phy_init() and never again), we can prepare the code doing the configuration to take the orientation into account. Do so. Note: the mutex is needed to synchronize this with the upcoming connector orientation callback. Reviewed-by: Peter Griffin Tested-by: Peter Griffin Signed-off-by: André Draszik Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-5-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 72 +++++++++++++++++------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 2a724d362c2d..61e0de4b3d4b 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Exynos USB PHY registers */ #define EXYNOS5_FSEL_9MHZ6 0x0 @@ -209,6 +210,10 @@ #define EXYNOS9_PMA_USBDP_CMN_REG00B8 0x02e0 #define CMN_REG00B8_LANE_MUX_SEL_DP GENMASK(3, 0) +#define CMN_REG00B8_LANE_MUX_SEL_DP_LANE3 BIT(3) +#define CMN_REG00B8_LANE_MUX_SEL_DP_LANE2 BIT(2) +#define CMN_REG00B8_LANE_MUX_SEL_DP_LANE1 BIT(1) +#define CMN_REG00B8_LANE_MUX_SEL_DP_LANE0 BIT(0) #define EXYNOS9_PMA_USBDP_CMN_REG01C0 0x0700 #define CMN_REG01C0_ANA_LCPLL_LOCK_DONE BIT(7) @@ -383,11 +388,13 @@ struct exynos5_usbdrd_phy_drvdata { * @clks: clocks for register access * @core_clks: core clocks for phy (ref, pipe3, utmi+, ITP, etc. as required) * @drv_data: pointer to SoC level driver data structure + * @phy_mutex: mutex protecting phy_init/exit & TCPC callbacks * @phys: array for 'EXYNOS5_DRDPHYS_NUM' number of PHY * instances each with its 'phy' and 'phy_cfg'. * @extrefclk: frequency select settings when using 'separate * reference clocks' for SS and HS operations * @regulators: regulators for phy + * @orientation: TypeC connector orientation - normal or flipped */ struct exynos5_usbdrd_phy { struct device *dev; @@ -397,6 +404,7 @@ struct exynos5_usbdrd_phy { struct clk_bulk_data *clks; struct clk_bulk_data *core_clks; const struct exynos5_usbdrd_phy_drvdata *drv_data; + struct mutex phy_mutex; struct phy_usb_instance { struct phy *phy; u32 index; @@ -406,6 +414,8 @@ struct exynos5_usbdrd_phy { } phys[EXYNOS5_DRDPHYS_NUM]; u32 extrefclk; struct regulator_bulk_data *regulators; + + enum typec_orientation orientation; }; static inline @@ -647,22 +657,38 @@ exynos5_usbdrd_usbdp_g2_v4_pma_lane_mux_sel(struct exynos5_usbdrd_phy *phy_drd) /* lane configuration: USB on all lanes */ reg = readl(regs_base + EXYNOS9_PMA_USBDP_CMN_REG00B8); reg &= ~CMN_REG00B8_LANE_MUX_SEL_DP; + /* + * USB on lanes 0 & 1 in normal mode, or 2 & 3 if reversed, DP on the + * other ones. + */ + reg |= FIELD_PREP(CMN_REG00B8_LANE_MUX_SEL_DP, + ((phy_drd->orientation == TYPEC_ORIENTATION_NORMAL) + ? (CMN_REG00B8_LANE_MUX_SEL_DP_LANE3 + | CMN_REG00B8_LANE_MUX_SEL_DP_LANE2) + : (CMN_REG00B8_LANE_MUX_SEL_DP_LANE1 + | CMN_REG00B8_LANE_MUX_SEL_DP_LANE0))); writel(reg, regs_base + EXYNOS9_PMA_USBDP_CMN_REG00B8); - /* - * FIXME: below code supports one connector orientation only. It needs - * updating once we can receive connector events. - */ /* override of TX receiver detector and comparator: lane 1 */ reg = readl(regs_base + EXYNOS9_PMA_USBDP_TRSV_REG0413); - reg &= ~TRSV_REG0413_OVRD_LN1_TX_RXD_COMP_EN; - reg &= ~TRSV_REG0413_OVRD_LN1_TX_RXD_EN; + if (phy_drd->orientation == TYPEC_ORIENTATION_NORMAL) { + reg &= ~TRSV_REG0413_OVRD_LN1_TX_RXD_COMP_EN; + reg &= ~TRSV_REG0413_OVRD_LN1_TX_RXD_EN; + } else { + reg |= TRSV_REG0413_OVRD_LN1_TX_RXD_COMP_EN; + reg |= TRSV_REG0413_OVRD_LN1_TX_RXD_EN; + } writel(reg, regs_base + EXYNOS9_PMA_USBDP_TRSV_REG0413); /* lane 3 */ reg = readl(regs_base + EXYNOS9_PMA_USBDP_TRSV_REG0813); - reg |= TRSV_REG0813_OVRD_LN3_TX_RXD_COMP_EN; - reg |= TRSV_REG0813_OVRD_LN3_TX_RXD_EN; + if (phy_drd->orientation == TYPEC_ORIENTATION_NORMAL) { + reg |= TRSV_REG0813_OVRD_LN3_TX_RXD_COMP_EN; + reg |= TRSV_REG0813_OVRD_LN3_TX_RXD_EN; + } else { + reg &= ~TRSV_REG0813_OVRD_LN3_TX_RXD_COMP_EN; + reg &= ~TRSV_REG0813_OVRD_LN3_TX_RXD_EN; + } writel(reg, regs_base + EXYNOS9_PMA_USBDP_TRSV_REG0813); } @@ -700,21 +726,18 @@ exynos5_usbdrd_usbdp_g2_v4_pma_check_cdr_lock(struct exynos5_usbdrd_phy *phy_drd int err; err = readl_poll_timeout( - phy_drd->reg_pma + EXYNOS9_PMA_USBDP_TRSV_REG03C3, - reg, (reg & locked) == locked, sleep_us, timeout_us); - if (!err) - return; - - dev_err(phy_drd->dev, - "timed out waiting for CDR lock (l0): %#.8x, retrying\n", reg); - - /* based on cable orientation, this might be on the other phy port */ - err = readl_poll_timeout( - phy_drd->reg_pma + EXYNOS9_PMA_USBDP_TRSV_REG07C3, + /* lane depends on cable orientation */ + (phy_drd->reg_pma + + ((phy_drd->orientation == TYPEC_ORIENTATION_NORMAL) + ? EXYNOS9_PMA_USBDP_TRSV_REG03C3 + : EXYNOS9_PMA_USBDP_TRSV_REG07C3)), reg, (reg & locked) == locked, sleep_us, timeout_us); if (err) dev_err(phy_drd->dev, - "timed out waiting for CDR lock (l2): %#.8x\n", reg); + "timed out waiting for CDR(l%d) lock: %#.8x\n", + ((phy_drd->orientation == TYPEC_ORIENTATION_NORMAL) + ? 0 + : 2), reg); } static void exynos5_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd) @@ -1184,7 +1207,8 @@ static int exynos850_usbdrd_phy_init(struct phy *phy) return ret; /* UTMI or PIPE3 specific init */ - inst->phy_cfg->phy_init(phy_drd); + scoped_guard(mutex, &phy_drd->phy_mutex) + inst->phy_cfg->phy_init(phy_drd); clk_bulk_disable_unprepare(phy_drd->drv_data->n_clks, phy_drd->clks); @@ -1203,6 +1227,8 @@ static int exynos850_usbdrd_phy_exit(struct phy *phy) if (ret) return ret; + guard(mutex)(&phy_drd->phy_mutex); + /* Set PHY clock and control HS PHY */ reg = readl(regs_base + EXYNOS850_DRD_UTMI); reg &= ~(UTMI_DP_PULLDOWN | UTMI_DM_PULLDOWN); @@ -1698,6 +1724,10 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) return -EINVAL; phy_drd->drv_data = drv_data; + ret = devm_mutex_init(dev, &phy_drd->phy_mutex); + if (ret) + return ret; + if (of_property_present(dev->of_node, "reg-names")) { void __iomem *reg; From 09dc674295a388e71192430b6f9c3c5cb0eb47da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:06 +0000 Subject: [PATCH 0187/2157] phy: exynos5-usbdrd: subscribe to orientation notifier if required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gs101's SS phy needs to be configured differently based on the connector orientation, as the SS link can only be established if the mux is configured correctly. The code to handle programming of the mux is in place already, this commit now adds the missing pieces to subscribe to the Type-C orientation switch event. Note that for this all to work we rely on the USB controller re-initialising us. It should invoke our .exit() upon cable unplug, and during cable plug we'll receive the orientation event after which we expect our .init() to be called. Above reinitialisation happens if the DWC3 controller can enter runtime suspend automatically. For the DWC3 driver, this is an opt-in: echo auto > /sys/devices/.../11110000.usb/power/control Once done, things work as long as the UDC is not bound as otherwise it stays busy because it doesn't cancel / stop outstanding TRBs. For now we have to manually unbind the UDC in that case: echo "" > sys/kernel/config/usb_gadget/.../UDC Note that if the orientation-switch property is missing from the DT, the code will behave as before this commit (meaning for gs101 it will work in SS mode in one orientation only). Other platforms are not affected either way. Signed-off-by: André Draszik Tested-by: Will McVicker Reviewed-by: Peter Griffin Tested-by: Peter Griffin Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-6-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/Kconfig | 1 + drivers/phy/samsung/phy-exynos5-usbdrd.c | 56 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/drivers/phy/samsung/Kconfig b/drivers/phy/samsung/Kconfig index e2330b0894d6..7fba571c0e2b 100644 --- a/drivers/phy/samsung/Kconfig +++ b/drivers/phy/samsung/Kconfig @@ -81,6 +81,7 @@ config PHY_EXYNOS5_USBDRD tristate "Exynos5 SoC series USB DRD PHY driver" depends on (ARCH_EXYNOS && OF) || COMPILE_TEST depends on HAS_IOMEM + depends on TYPEC || (TYPEC=n && COMPILE_TEST) depends on USB_DWC3_EXYNOS select GENERIC_PHY select MFD_SYSCON diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 61e0de4b3d4b..8fc15847cfd8 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -24,6 +24,7 @@ #include #include #include +#include /* Exynos USB PHY registers */ #define EXYNOS5_FSEL_9MHZ6 0x0 @@ -394,6 +395,7 @@ struct exynos5_usbdrd_phy_drvdata { * @extrefclk: frequency select settings when using 'separate * reference clocks' for SS and HS operations * @regulators: regulators for phy + * @sw: TypeC orientation switch handle * @orientation: TypeC connector orientation - normal or flipped */ struct exynos5_usbdrd_phy { @@ -415,6 +417,7 @@ struct exynos5_usbdrd_phy { u32 extrefclk; struct regulator_bulk_data *regulators; + struct typec_switch_dev *sw; enum typec_orientation orientation; }; @@ -1397,6 +1400,55 @@ static int exynos5_usbdrd_phy_clk_handle(struct exynos5_usbdrd_phy *phy_drd) return 0; } +static int exynos5_usbdrd_orien_sw_set(struct typec_switch_dev *sw, + enum typec_orientation orientation) +{ + struct exynos5_usbdrd_phy *phy_drd = typec_switch_get_drvdata(sw); + + scoped_guard(mutex, &phy_drd->phy_mutex) + phy_drd->orientation = orientation; + + return 0; +} + +static void exynos5_usbdrd_orien_switch_unregister(void *data) +{ + struct exynos5_usbdrd_phy *phy_drd = data; + + typec_switch_unregister(phy_drd->sw); +} + +static int exynos5_usbdrd_setup_notifiers(struct exynos5_usbdrd_phy *phy_drd) +{ + int ret; + + if (!IS_ENABLED(CONFIG_TYPEC)) + return 0; + + if (device_property_present(phy_drd->dev, "orientation-switch")) { + struct typec_switch_desc sw_desc = { }; + + sw_desc.drvdata = phy_drd; + sw_desc.fwnode = dev_fwnode(phy_drd->dev); + sw_desc.set = exynos5_usbdrd_orien_sw_set; + + phy_drd->sw = typec_switch_register(phy_drd->dev, &sw_desc); + if (IS_ERR(phy_drd->sw)) + return dev_err_probe(phy_drd->dev, + PTR_ERR(phy_drd->sw), + "Failed to register TypeC orientation switch\n"); + + ret = devm_add_action_or_reset(phy_drd->dev, + exynos5_usbdrd_orien_switch_unregister, + phy_drd); + if (ret) + return dev_err_probe(phy_drd->dev, ret, + "Failed to register TypeC orientation devm action\n"); + } + + return 0; +} + static const struct exynos5_usbdrd_phy_config phy_cfg_exynos5[] = { { .id = EXYNOS5_DRDPHY_UTMI, @@ -1786,6 +1838,10 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to get regulators\n"); + ret = exynos5_usbdrd_setup_notifiers(phy_drd); + if (ret) + return ret; + dev_vdbg(dev, "Creating usbdrd_phy phy\n"); for (i = 0; i < EXYNOS5_DRDPHYS_NUM; i++) { From f4fb9c4d7f94dabef4abf2209cf840dd1c9ca11e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 6 Dec 2024 16:31:07 +0000 Subject: [PATCH 0188/2157] phy: exynos5-usbdrd: allow DWC3 runtime suspend with UDC bound (E850+) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make USB runtime suspend work when a UDC has been bound, the phy needs to inform the USBDRD controller (DWC3) that Vbus and bvalid are gone, so that it can in turn raise the respective gadget interrupt with event == DWC3_DEVICE_EVENT_DISCONNECT, which will cause the USB stack to clean up, allowing DWC3 to enter runtime suspend. On e850 and gs101 this isn't working, as the respective signals are not directly connected, and instead this driver uses override bits in the PHY IP to set those signals. It currently forcefully sets them to 'on', so the above mentioned interrupt will not be raised, preventing runtime suspend. To detect that state, update this driver to act on the TCPC's orientation signal - when orientation == NONE, Vbus is gone and we can clear the respective bits. Similarly, for other orientation values we re-enable them. This makes runtime suspend work on platforms with a TCPC (like Pixel6), while keeping compatibility with platforms without (e850-96). With runtime suspend working, USB-C cable orientation detection now also fully works on such platforms, and the link comes up as Superspeed as expected irrespective of the cable orientation and whether UDC / gadget are configured and active. Signed-off-by: André Draszik Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241206-gs101-phy-lanes-orientation-phy-v4-7-f5961268b149@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 48 ++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 8fc15847cfd8..bac1dc927b26 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1137,13 +1137,15 @@ static void exynos850_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd) reg |= LINKCTRL_BUS_FILTER_BYPASS(0xf); writel(reg, regs_base + EXYNOS850_DRD_LINKCTRL); - reg = readl(regs_base + EXYNOS850_DRD_UTMI); - reg |= UTMI_FORCE_BVALID | UTMI_FORCE_VBUSVALID; - writel(reg, regs_base + EXYNOS850_DRD_UTMI); + if (!phy_drd->sw) { + reg = readl(regs_base + EXYNOS850_DRD_UTMI); + reg |= UTMI_FORCE_BVALID | UTMI_FORCE_VBUSVALID; + writel(reg, regs_base + EXYNOS850_DRD_UTMI); - reg = readl(regs_base + EXYNOS850_DRD_HSP); - reg |= HSP_VBUSVLDEXT | HSP_VBUSVLDEXTSEL; - writel(reg, regs_base + EXYNOS850_DRD_HSP); + reg = readl(regs_base + EXYNOS850_DRD_HSP); + reg |= HSP_VBUSVLDEXT | HSP_VBUSVLDEXTSEL; + writel(reg, regs_base + EXYNOS850_DRD_HSP); + } reg = readl(regs_base + EXYNOS850_DRD_SSPPLLCTL); reg &= ~SSPPLLCTL_FSEL; @@ -1404,9 +1406,41 @@ static int exynos5_usbdrd_orien_sw_set(struct typec_switch_dev *sw, enum typec_orientation orientation) { struct exynos5_usbdrd_phy *phy_drd = typec_switch_get_drvdata(sw); + int ret; + + ret = clk_bulk_prepare_enable(phy_drd->drv_data->n_clks, phy_drd->clks); + if (ret) { + dev_err(phy_drd->dev, "Failed to enable PHY clocks(s)\n"); + return ret; + } + + scoped_guard(mutex, &phy_drd->phy_mutex) { + void __iomem * const regs_base = phy_drd->reg_phy; + unsigned int reg; + + if (orientation == TYPEC_ORIENTATION_NONE) { + reg = readl(regs_base + EXYNOS850_DRD_UTMI); + reg &= ~(UTMI_FORCE_VBUSVALID | UTMI_FORCE_BVALID); + writel(reg, regs_base + EXYNOS850_DRD_UTMI); + + reg = readl(regs_base + EXYNOS850_DRD_HSP); + reg |= HSP_VBUSVLDEXTSEL; + reg &= ~HSP_VBUSVLDEXT; + writel(reg, regs_base + EXYNOS850_DRD_HSP); + } else { + reg = readl(regs_base + EXYNOS850_DRD_UTMI); + reg |= UTMI_FORCE_VBUSVALID | UTMI_FORCE_BVALID; + writel(reg, regs_base + EXYNOS850_DRD_UTMI); + + reg = readl(regs_base + EXYNOS850_DRD_HSP); + reg |= HSP_VBUSVLDEXTSEL | HSP_VBUSVLDEXT; + writel(reg, regs_base + EXYNOS850_DRD_HSP); + } - scoped_guard(mutex, &phy_drd->phy_mutex) phy_drd->orientation = orientation; + } + + clk_bulk_disable(phy_drd->drv_data->n_clks, phy_drd->clks); return 0; } From 13c1eb1b4bd169f820188c62acaa1f96677284b1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Jan 2025 19:54:07 +0100 Subject: [PATCH 0189/2157] phy: stih407-usb: Use syscon_regmap_lookup_by_phandle_args Use syscon_regmap_lookup_by_phandle_args() which is a wrapper over syscon_regmap_lookup_by_phandle() combined with getting the syscon argument. Except simpler code this annotates within one line that given phandle has arguments, so grepping for code would be easier. There is also no real benefit in printing errors on missing syscon argument, because this is done just too late: runtime check on static/build-time data. Dtschema and Devicetree bindings offer the static/build-time check for this already. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Patrice Chotard Link: https://lore.kernel.org/r/20250111185407.183855-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stih407-usb.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/phy/st/phy-stih407-usb.c b/drivers/phy/st/phy-stih407-usb.c index a4ae2cca7f63..ebb1d0858aa3 100644 --- a/drivers/phy/st/phy-stih407-usb.c +++ b/drivers/phy/st/phy-stih407-usb.c @@ -18,8 +18,8 @@ #include #include -#define PHYPARAM_REG 1 -#define PHYCTRL_REG 2 +#define PHYPARAM_REG 0 +#define PHYCTRL_REG 1 /* Default PHY_SEL and REFCLKSEL configuration */ #define STIH407_USB_PICOPHY_CTRL_PORT_CONF 0x6 @@ -91,8 +91,8 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct phy_provider *phy_provider; + unsigned int syscon_args[2]; struct phy *phy; - int ret; phy_dev = devm_kzalloc(dev, sizeof(*phy_dev), GFP_KERNEL); if (!phy_dev) @@ -116,25 +116,15 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) /* Reset port by default: only deassert it in phy init */ reset_control_assert(phy_dev->rstport); - phy_dev->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscfg"); + phy_dev->regmap = syscon_regmap_lookup_by_phandle_args(np, "st,syscfg", + 2, syscon_args); if (IS_ERR(phy_dev->regmap)) { dev_err(dev, "No syscfg phandle specified\n"); return PTR_ERR(phy_dev->regmap); } - ret = of_property_read_u32_index(np, "st,syscfg", PHYPARAM_REG, - &phy_dev->param); - if (ret) { - dev_err(dev, "can't get phyparam offset (%d)\n", ret); - return ret; - } - - ret = of_property_read_u32_index(np, "st,syscfg", PHYCTRL_REG, - &phy_dev->ctrl); - if (ret) { - dev_err(dev, "can't get phyctrl offset (%d)\n", ret); - return ret; - } + phy_dev->param = syscon_args[PHYPARAM_REG]; + phy_dev->ctrl = syscon_args[PHYCTRL_REG]; phy = devm_phy_create(dev, NULL, &stih407_usb2_picophy_data); if (IS_ERR(phy)) { From d58c04e305afbaa9dda7969151f06c4efe2c98b0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 9 Feb 2025 14:31:45 +0200 Subject: [PATCH 0190/2157] phy: core: don't require set_mode() callback for phy_get_mode() to work As reported by Damon Ding, the phy_get_mode() call doesn't work as expected unless the PHY driver has a .set_mode() call. This prompts PHY drivers to have empty stubs for .set_mode() for the sake of being able to get the mode. Make .set_mode() callback truly optional and update PHY's mode even if it there is none. Cc: Damon Ding Link: https://lore.kernel.org/r/96f8310f-93f1-4bcb-8637-137e1159ff83@rock-chips.com Tested-by: Damon Ding Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250209-phy-fix-set-moe-v2-1-76e248503856@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/phy-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 8dfdce605a90..067316dfcd83 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -405,13 +405,14 @@ EXPORT_SYMBOL_GPL(phy_power_off); int phy_set_mode_ext(struct phy *phy, enum phy_mode mode, int submode) { - int ret; + int ret = 0; - if (!phy || !phy->ops->set_mode) + if (!phy) return 0; mutex_lock(&phy->mutex); - ret = phy->ops->set_mode(phy, mode, submode); + if (phy->ops->set_mode) + ret = phy->ops->set_mode(phy, mode, submode); if (!ret) phy->attrs.mode = mode; mutex_unlock(&phy->mutex); From 279950205dde78bc0e3ca73a8dfee6842f0e1edc Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Sat, 8 Feb 2025 11:44:01 +0800 Subject: [PATCH 0191/2157] phy: freescale: fsl-samsung-hdmi: Use helper function devm_clk_get_enabled() Since commit 7ef9651e9792 ("clk: Provide new devm_clk helpers for prepared and enabled clocks"), devm_clk_get() and clk_prepare_enable() can now be replaced by devm_clk_get_enabled() when driver enables the clocks for the whole lifetime of the device. Moreover, it is no longer necessary to unprepare and disable the clocks explicitly. Signed-off-by: Pei Xiao Link: https://lore.kernel.org/r/tencent_9087BCE04E38E6AA5C4B2252B82FA99C2009@qq.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-samsung-hdmi.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c index 45004f598e4d..f0f2ed414db5 100644 --- a/drivers/phy/freescale/phy-fsl-samsung-hdmi.c +++ b/drivers/phy/freescale/phy-fsl-samsung-hdmi.c @@ -659,7 +659,7 @@ static int fsl_samsung_hdmi_phy_probe(struct platform_device *pdev) if (IS_ERR(phy->regs)) return PTR_ERR(phy->regs); - phy->apbclk = devm_clk_get(phy->dev, "apb"); + phy->apbclk = devm_clk_get_enabled(phy->dev, "apb"); if (IS_ERR(phy->apbclk)) return dev_err_probe(phy->dev, PTR_ERR(phy->apbclk), "failed to get apb clk\n"); @@ -669,12 +669,6 @@ static int fsl_samsung_hdmi_phy_probe(struct platform_device *pdev) return dev_err_probe(phy->dev, PTR_ERR(phy->refclk), "failed to get ref clk\n"); - ret = clk_prepare_enable(phy->apbclk); - if (ret) { - dev_err(phy->dev, "failed to enable apbclk\n"); - return ret; - } - pm_runtime_get_noresume(phy->dev); pm_runtime_set_active(phy->dev); pm_runtime_enable(phy->dev); @@ -690,8 +684,6 @@ static int fsl_samsung_hdmi_phy_probe(struct platform_device *pdev) return 0; register_clk_failed: - clk_disable_unprepare(phy->apbclk); - return ret; } From 2947c8065e9efdd3b6434d2817dc8896234a3fc0 Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Wed, 5 Feb 2025 18:51:54 +0800 Subject: [PATCH 0192/2157] phy: phy-rockchip-samsung-hdptx: Swap the definitions of LCPLL_REF and ROPLL_REF According to the datasheet, setting the dig_clk_sel bit of CMN_REG(0097) to 1'b1 selects LCPLL as the reference clock, while setting it to 1'b0 selects the ROPLL. Signed-off-by: Damon Ding Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250205105157.580060-2-damon.ding@rock-chips.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index 35d5b762e5c4..dfcc37b1c8a6 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -94,8 +94,8 @@ #define LCPLL_ALONE_MODE BIT(1) /* CMN_REG(0097) */ #define DIG_CLK_SEL BIT(1) -#define ROPLL_REF BIT(1) -#define LCPLL_REF 0 +#define LCPLL_REF BIT(1) +#define ROPLL_REF 0 /* CMN_REG(0099) */ #define CMN_ROPLL_ALONE_MODE BIT(2) #define ROPLL_ALONE_MODE BIT(2) From f706024107204cb0b640bac35ea47e7b91b8c71f Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Wed, 5 Feb 2025 18:51:55 +0800 Subject: [PATCH 0193/2157] phy: phy-rockchip-samsung-hdptx: Supplement some register names with their full version Complete the register names of CMN_REG(0081) and CMN_REG(0087) to their full version, and it can help to better match the datasheet. Signed-off-by: Damon Ding Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250205105157.580060-3-damon.ding@rock-chips.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index dfcc37b1c8a6..77b817ac9059 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -82,14 +82,14 @@ #define ROPLL_SSC_EN BIT(0) /* CMN_REG(0081) */ #define OVRD_PLL_CD_CLK_EN BIT(8) -#define PLL_CD_HSCLK_EAST_EN BIT(0) +#define ANA_PLL_CD_HSCLK_EAST_EN BIT(0) /* CMN_REG(0086) */ #define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) #define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) #define PLL_PCG_CLK_EN BIT(0) /* CMN_REG(0087) */ -#define PLL_FRL_MODE_EN BIT(3) -#define PLL_TX_HS_CLK_EN BIT(2) +#define ANA_PLL_FRL_MODE_EN BIT(3) +#define ANA_PLL_TX_HS_CLK_EN BIT(2) /* CMN_REG(0089) */ #define LCPLL_ALONE_MODE BIT(1) /* CMN_REG(0097) */ From 2dc8224e3758c5d6387786ea1d74d2d510149b1a Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Wed, 5 Feb 2025 18:51:56 +0800 Subject: [PATCH 0194/2157] phy: phy-rockchip-samsung-hdptx: Add the '_MASK' suffix to all registers Adding the '_MASK' suffix to all registers in order to ensures consistency in the naming convention for register macros throughout the file. Signed-off-by: Damon Ding Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250205105157.580060-4-damon.ding@rock-chips.com Signed-off-by: Vinod Koul --- .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index 77b817ac9059..c3c8bd23beae 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -50,60 +50,60 @@ #define LCPLL_PI_EN_MASK BIT(5) #define LCPLL_100M_CLK_EN_MASK BIT(0) /* CMN_REG(0025) */ -#define LCPLL_PMS_IQDIV_RSTN BIT(4) +#define LCPLL_PMS_IQDIV_RSTN_MASK BIT(4) /* CMN_REG(0028) */ -#define LCPLL_SDC_FRAC_EN BIT(2) -#define LCPLL_SDC_FRAC_RSTN BIT(0) +#define LCPLL_SDC_FRAC_EN_MASK BIT(2) +#define LCPLL_SDC_FRAC_RSTN_MASK BIT(0) /* CMN_REG(002d) */ #define LCPLL_SDC_N_MASK GENMASK(3, 1) /* CMN_REG(002e) */ #define LCPLL_SDC_NUMBERATOR_MASK GENMASK(5, 0) /* CMN_REG(002f) */ #define LCPLL_SDC_DENOMINATOR_MASK GENMASK(7, 2) -#define LCPLL_SDC_NDIV_RSTN BIT(0) +#define LCPLL_SDC_NDIV_RSTN_MASK BIT(0) /* CMN_REG(003d) */ -#define ROPLL_LCVCO_EN BIT(4) +#define ROPLL_LCVCO_EN_MASK BIT(4) /* CMN_REG(004e) */ -#define ROPLL_PI_EN BIT(5) +#define ROPLL_PI_EN_MASK BIT(5) /* CMN_REG(005c) */ -#define ROPLL_PMS_IQDIV_RSTN BIT(5) +#define ROPLL_PMS_IQDIV_RSTN_MASK BIT(5) /* CMN_REG(005e) */ #define ROPLL_SDM_EN_MASK BIT(6) -#define ROPLL_SDM_FRAC_EN_RBR BIT(3) -#define ROPLL_SDM_FRAC_EN_HBR BIT(2) -#define ROPLL_SDM_FRAC_EN_HBR2 BIT(1) -#define ROPLL_SDM_FRAC_EN_HBR3 BIT(0) +#define ROPLL_SDC_FRAC_EN_RBR_MASK BIT(3) +#define ROPLL_SDC_FRAC_EN_HBR_MASK BIT(2) +#define ROPLL_SDC_FRAC_EN_HBR2_MASK BIT(1) +#define ROPLL_SDM_FRAC_EN_HBR3_MASK BIT(0) /* CMN_REG(0064) */ #define ROPLL_SDM_NUM_SIGN_RBR_MASK BIT(3) /* CMN_REG(0069) */ #define ROPLL_SDC_N_RBR_MASK GENMASK(2, 0) /* CMN_REG(0074) */ -#define ROPLL_SDC_NDIV_RSTN BIT(2) -#define ROPLL_SSC_EN BIT(0) +#define ROPLL_SDC_NDIV_RSTN_MASK BIT(2) +#define ROPLL_SSC_EN_MASK BIT(0) /* CMN_REG(0081) */ -#define OVRD_PLL_CD_CLK_EN BIT(8) -#define ANA_PLL_CD_HSCLK_EAST_EN BIT(0) +#define OVRD_PLL_CD_CLK_EN_MASK BIT(8) +#define ANA_PLL_CD_HSCLK_EAST_EN_MASK BIT(0) /* CMN_REG(0086) */ #define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) #define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) -#define PLL_PCG_CLK_EN BIT(0) +#define PLL_PCG_CLK_EN_MASK BIT(0) /* CMN_REG(0087) */ -#define ANA_PLL_FRL_MODE_EN BIT(3) -#define ANA_PLL_TX_HS_CLK_EN BIT(2) +#define ANA_PLL_FRL_MODE_EN_MASK BIT(3) +#define ANA_PLL_TX_HS_CLK_EN_MASK BIT(2) /* CMN_REG(0089) */ -#define LCPLL_ALONE_MODE BIT(1) +#define LCPLL_ALONE_MODE_MASK BIT(1) /* CMN_REG(0097) */ -#define DIG_CLK_SEL BIT(1) +#define DIG_CLK_SEL_MASK BIT(1) #define LCPLL_REF BIT(1) #define ROPLL_REF 0 /* CMN_REG(0099) */ -#define CMN_ROPLL_ALONE_MODE BIT(2) +#define CMN_ROPLL_ALONE_MODE_MASK BIT(2) #define ROPLL_ALONE_MODE BIT(2) /* CMN_REG(009a) */ -#define HS_SPEED_SEL BIT(0) +#define HS_SPEED_SEL_MASK BIT(0) #define DIV_10_CLOCK BIT(0) /* CMN_REG(009b) */ -#define IS_SPEED_SEL BIT(4) +#define LS_SPEED_SEL_MASK BIT(4) #define LINK_SYMBOL_CLOCK BIT(4) #define LINK_SYMBOL_CLOCK1_2 0 @@ -161,36 +161,36 @@ #define SB_READY_MASK BIT(4) /* LNTOP_REG(0200) */ -#define PROTOCOL_SEL BIT(2) +#define PROTOCOL_SEL_MASK BIT(2) #define HDMI_MODE BIT(2) #define HDMI_TMDS_FRL_SEL BIT(1) /* LNTOP_REG(0206) */ -#define DATA_BUS_SEL BIT(0) +#define DATA_BUS_WIDTH_SEL_MASK BIT(0) #define DATA_BUS_36_40 BIT(0) /* LNTOP_REG(0207) */ -#define LANE_EN 0xf +#define LANE_EN_MASK 0xf #define ALL_LANE_EN 0xf /* LANE_REG(0312) */ -#define LN0_TX_SER_RATE_SEL_RBR BIT(5) -#define LN0_TX_SER_RATE_SEL_HBR BIT(4) -#define LN0_TX_SER_RATE_SEL_HBR2 BIT(3) -#define LN0_TX_SER_RATE_SEL_HBR3 BIT(2) +#define LN0_TX_SER_RATE_SEL_RBR_MASK BIT(5) +#define LN0_TX_SER_RATE_SEL_HBR_MASK BIT(4) +#define LN0_TX_SER_RATE_SEL_HBR2_MASK BIT(3) +#define LN0_TX_SER_RATE_SEL_HBR3_MASK BIT(2) /* LANE_REG(0412) */ -#define LN1_TX_SER_RATE_SEL_RBR BIT(5) -#define LN1_TX_SER_RATE_SEL_HBR BIT(4) -#define LN1_TX_SER_RATE_SEL_HBR2 BIT(3) -#define LN1_TX_SER_RATE_SEL_HBR3 BIT(2) +#define LN1_TX_SER_RATE_SEL_RBR_MASK BIT(5) +#define LN1_TX_SER_RATE_SEL_HBR_MASK BIT(4) +#define LN1_TX_SER_RATE_SEL_HBR2_MASK BIT(3) +#define LN1_TX_SER_RATE_SEL_HBR3_MASK BIT(2) /* LANE_REG(0512) */ -#define LN2_TX_SER_RATE_SEL_RBR BIT(5) -#define LN2_TX_SER_RATE_SEL_HBR BIT(4) -#define LN2_TX_SER_RATE_SEL_HBR2 BIT(3) -#define LN2_TX_SER_RATE_SEL_HBR3 BIT(2) +#define LN2_TX_SER_RATE_SEL_RBR_MASK BIT(5) +#define LN2_TX_SER_RATE_SEL_HBR_MASK BIT(4) +#define LN2_TX_SER_RATE_SEL_HBR2_MASK BIT(3) +#define LN2_TX_SER_RATE_SEL_HBR3_MASK BIT(2) /* LANE_REG(0612) */ -#define LN3_TX_SER_RATE_SEL_RBR BIT(5) -#define LN3_TX_SER_RATE_SEL_HBR BIT(4) -#define LN3_TX_SER_RATE_SEL_HBR2 BIT(3) -#define LN3_TX_SER_RATE_SEL_HBR3 BIT(2) +#define LN3_TX_SER_RATE_SEL_RBR_MASK BIT(5) +#define LN3_TX_SER_RATE_SEL_HBR_MASK BIT(4) +#define LN3_TX_SER_RATE_SEL_HBR2_MASK BIT(3) +#define LN3_TX_SER_RATE_SEL_HBR3_MASK BIT(2) #define HDMI20_MAX_RATE 600000000 @@ -824,8 +824,8 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK, FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv)); - regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN, - PLL_PCG_CLK_EN); + regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN_MASK, + FIELD_PREP(PLL_PCG_CLK_EN_MASK, 0x1)); return rk_hdptx_post_enable_pll(hdptx); } From 8f831f272b4c89aa13b45bd010c2c18ad97a3f1b Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Wed, 5 Feb 2025 18:51:57 +0800 Subject: [PATCH 0195/2157] phy: phy-rockchip-samsung-hdptx: Add eDP mode support for RK3588 The PHY is based on a Samsung IP block that supports HDMI 2.1, and eDP 1.4b. RK3588 integrates the Analogix eDP 1.3 TX controller IP and the HDMI/eDP TX Combo PHY to support eDP display. Add basic support for RBR/HBR/HBR2 link rates, and the voltage swing and pre-emphasis configurations of each link rate are set according to the eDP 1.3 requirements. Signed-off-by: Damon Ding Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250205105157.580060-5-damon.ding@rock-chips.com Signed-off-by: Vinod Koul --- .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 879 +++++++++++++++++- 1 file changed, 869 insertions(+), 10 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index c3c8bd23beae..f88369864c50 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -25,6 +25,7 @@ #define HDPTX_I_PLL_EN BIT(7) #define HDPTX_I_BIAS_EN BIT(6) #define HDPTX_I_BGR_EN BIT(5) +#define HDPTX_MODE_SEL BIT(0) #define GRF_HDPTX_STATUS 0x80 #define HDPTX_O_PLL_LOCK_DONE BIT(3) #define HDPTX_O_PHY_CLK_RDY BIT(2) @@ -44,6 +45,7 @@ #define LANE_REG(n) HDTPX_REG(n, 0300, 062d) /* CMN_REG(0008) */ +#define OVRD_LCPLL_EN_MASK BIT(7) #define LCPLL_EN_MASK BIT(6) #define LCPLL_LCVCO_MODE_EN_MASK BIT(4) /* CMN_REG(001e) */ @@ -61,28 +63,88 @@ /* CMN_REG(002f) */ #define LCPLL_SDC_DENOMINATOR_MASK GENMASK(7, 2) #define LCPLL_SDC_NDIV_RSTN_MASK BIT(0) +/* CMN_REG(003c) */ +#define ANA_LCPLL_RESERVED7_MASK BIT(7) /* CMN_REG(003d) */ +#define OVRD_ROPLL_EN_MASK BIT(7) +#define ROPLL_EN_MASK BIT(6) #define ROPLL_LCVCO_EN_MASK BIT(4) +/* CMN_REG(0046) */ +#define ROPLL_ANA_CPP_CTRL_COARSE_MASK GENMASK(7, 4) +#define ROPLL_ANA_CPP_CTRL_FINE_MASK GENMASK(3, 0) +/* CMN_REG(0047) */ +#define ROPLL_ANA_LPF_C_SEL_COARSE_MASK GENMASK(5, 3) +#define ROPLL_ANA_LPF_C_SEL_FINE_MASK GENMASK(2, 0) /* CMN_REG(004e) */ #define ROPLL_PI_EN_MASK BIT(5) +/* CMN_REG(0051) */ +#define ROPLL_PMS_MDIV_MASK GENMASK(7, 0) +/* CMN_REG(0055) */ +#define ROPLL_PMS_MDIV_AFC_MASK GENMASK(7, 0) +/* CMN_REG(0059) */ +#define ANA_ROPLL_PMS_PDIV_MASK GENMASK(7, 4) +#define ANA_ROPLL_PMS_REFDIV_MASK GENMASK(3, 0) +/* CMN_REG(005a) */ +#define ROPLL_PMS_SDIV_RBR_MASK GENMASK(7, 4) +#define ROPLL_PMS_SDIV_HBR_MASK GENMASK(3, 0) +/* CMN_REG(005b) */ +#define ROPLL_PMS_SDIV_HBR2_MASK GENMASK(7, 4) /* CMN_REG(005c) */ #define ROPLL_PMS_IQDIV_RSTN_MASK BIT(5) /* CMN_REG(005e) */ #define ROPLL_SDM_EN_MASK BIT(6) +#define OVRD_ROPLL_SDM_RSTN_MASK BIT(5) +#define ROPLL_SDM_RSTN_MASK BIT(4) #define ROPLL_SDC_FRAC_EN_RBR_MASK BIT(3) #define ROPLL_SDC_FRAC_EN_HBR_MASK BIT(2) #define ROPLL_SDC_FRAC_EN_HBR2_MASK BIT(1) #define ROPLL_SDM_FRAC_EN_HBR3_MASK BIT(0) +/* CMN_REG(005f) */ +#define OVRD_ROPLL_SDC_RSTN_MASK BIT(5) +#define ROPLL_SDC_RSTN_MASK BIT(4) +/* CMN_REG(0060) */ +#define ROPLL_SDM_DENOMINATOR_MASK GENMASK(7, 0) /* CMN_REG(0064) */ #define ROPLL_SDM_NUM_SIGN_RBR_MASK BIT(3) +#define ROPLL_SDM_NUM_SIGN_HBR_MASK BIT(2) +#define ROPLL_SDM_NUM_SIGN_HBR2_MASK BIT(1) +/* CMN_REG(0065) */ +#define ROPLL_SDM_NUM_MASK GENMASK(7, 0) /* CMN_REG(0069) */ #define ROPLL_SDC_N_RBR_MASK GENMASK(2, 0) +/* CMN_REG(006a) */ +#define ROPLL_SDC_N_HBR_MASK GENMASK(5, 3) +#define ROPLL_SDC_N_HBR2_MASK GENMASK(2, 0) +/* CMN_REG(006b) */ +#define ROPLL_SDC_N_HBR3_MASK GENMASK(3, 1) +/* CMN_REG(006c) */ +#define ROPLL_SDC_NUM_MASK GENMASK(5, 0) +/* cmn_reg0070 */ +#define ROPLL_SDC_DENO_MASK GENMASK(5, 0) /* CMN_REG(0074) */ +#define OVRD_ROPLL_SDC_NDIV_RSTN_MASK BIT(3) #define ROPLL_SDC_NDIV_RSTN_MASK BIT(2) +#define OVRD_ROPLL_SSC_EN_MASK BIT(1) #define ROPLL_SSC_EN_MASK BIT(0) +/* CMN_REG(0075) */ +#define ANA_ROPLL_SSC_FM_DEVIATION_MASK GENMASK(5, 0) +/* CMN_REG(0076) */ +#define ANA_ROPLL_SSC_FM_FREQ_MASK GENMASK(6, 2) +/* CMN_REG(0077) */ +#define ANA_ROPLL_SSC_CLK_DIV_SEL_MASK GENMASK(6, 3) /* CMN_REG(0081) */ #define OVRD_PLL_CD_CLK_EN_MASK BIT(8) +#define ANA_PLL_CD_TX_SER_RATE_SEL_MASK BIT(3) +#define ANA_PLL_CD_HSCLK_WEST_EN_MASK BIT(1) #define ANA_PLL_CD_HSCLK_EAST_EN_MASK BIT(0) +/* CMN_REG(0082) */ +#define ANA_PLL_CD_VREG_GAIN_CTRL_MASK GENMASK(3, 0) +/* CMN_REG(0083) */ +#define ANA_PLL_CD_VREG_ICTRL_MASK GENMASK(6, 5) +/* CMN_REG(0084) */ +#define PLL_LCRO_CLK_SEL_MASK BIT(5) +/* CMN_REG(0085) */ +#define ANA_PLL_SYNC_LOSS_DET_MODE_MASK GENMASK(1, 0) /* CMN_REG(0086) */ #define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) #define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) @@ -92,11 +154,14 @@ #define ANA_PLL_TX_HS_CLK_EN_MASK BIT(2) /* CMN_REG(0089) */ #define LCPLL_ALONE_MODE_MASK BIT(1) +/* CMN_REG(0095) */ +#define DP_TX_LINK_BW_MASK GENMASK(1, 0) /* CMN_REG(0097) */ #define DIG_CLK_SEL_MASK BIT(1) #define LCPLL_REF BIT(1) #define ROPLL_REF 0 /* CMN_REG(0099) */ +#define SSC_EN_MASK GENMASK(7, 6) #define CMN_ROPLL_ALONE_MODE_MASK BIT(2) #define ROPLL_ALONE_MODE BIT(2) /* CMN_REG(009a) */ @@ -118,6 +183,8 @@ /* SB_REG(0104) */ #define OVRD_SB_EN_MASK BIT(5) #define SB_EN_MASK BIT(4) +#define OVRD_SB_AUX_EN_MASK BIT(1) +#define SB_AUX_EN_MASK BIT(0) /* SB_REG(0105) */ #define OVRD_SB_EARC_CMDC_EN_MASK BIT(6) #define SB_EARC_CMDC_EN_MASK BIT(5) @@ -126,6 +193,8 @@ #define ANA_SB_TX_LLVL_PROG_MASK GENMASK(6, 4) /* SB_REG(0109) */ #define ANA_SB_DMRX_AFC_DIV_RATIO_MASK GENMASK(2, 0) +/* SB_REG(010d) */ +#define ANA_SB_DMRX_LPBK_DATA_MASK BIT(4) /* SB_REG(010f) */ #define OVRD_SB_VREG_EN_MASK BIT(7) #define SB_VREG_EN_MASK BIT(6) @@ -133,6 +202,7 @@ #define SB_VREG_LPF_BYPASS_MASK BIT(4) #define ANA_SB_VREG_GAIN_CTRL_MASK GENMASK(3, 0) /* SB_REG(0110) */ +#define ANA_SB_VREG_OUT_SEL_MASK BIT(1) #define ANA_SB_VREG_REF_SEL_MASK BIT(0) /* SB_REG(0113) */ #define SB_RX_RCAL_OPT_CODE_MASK GENMASK(5, 4) @@ -147,13 +217,24 @@ #define AFC_RSTN_DELAY_TIME_MASK GENMASK(6, 4) /* SB_REG(0117) */ #define FAST_PULSE_TIME_MASK GENMASK(3, 0) +/* SB_REG(0118) */ +#define SB_TG_EARC_DMRX_RECVRD_CLK_CNT_MASK GENMASK(7, 0) +/* SB_REG(011a) */ +#define SB_TG_CNT_RUN_NO_7_0_MASK GENMASK(7, 0) /* SB_REG(011b) */ #define SB_EARC_SIG_DET_BYPASS_MASK BIT(4) #define SB_AFC_TOL_MASK GENMASK(3, 0) +/* SB_REG(011c) */ +#define SB_AFC_STB_NUM_MASK GENMASK(3, 0) +/* SB_REG(011d) */ +#define SB_TG_OSC_CNT_MIN_MASK GENMASK(7, 0) +/* SB_REG(011e) */ +#define SB_TG_OSC_CNT_MAX_MASK GENMASK(7, 0) /* SB_REG(011f) */ #define SB_PWM_AFC_CTRL_MASK GENMASK(7, 2) #define SB_RCAL_RSTN_MASK BIT(1) /* SB_REG(0120) */ +#define SB_AUX_EN_IN_MASK BIT(7) #define SB_EARC_EN_MASK BIT(1) #define SB_EARC_AFC_EN_MASK BIT(2) /* SB_REG(0123) */ @@ -165,27 +246,74 @@ #define HDMI_MODE BIT(2) #define HDMI_TMDS_FRL_SEL BIT(1) /* LNTOP_REG(0206) */ +#define DATA_BUS_WIDTH_MASK GENMASK(2, 1) #define DATA_BUS_WIDTH_SEL_MASK BIT(0) #define DATA_BUS_36_40 BIT(0) /* LNTOP_REG(0207) */ #define LANE_EN_MASK 0xf #define ALL_LANE_EN 0xf +/* LANE_REG(0301) */ +#define OVRD_LN_TX_DRV_EI_EN_MASK BIT(7) +#define LN_TX_DRV_EI_EN_MASK BIT(6) +/* LANE_REG(0303) */ +#define OVRD_LN_TX_DRV_LVL_CTRL_MASK BIT(5) +#define LN_TX_DRV_LVL_CTRL_MASK GENMASK(4, 0) +/* LANE_REG(0304) */ +#define OVRD_LN_TX_DRV_POST_LVL_CTRL_MASK BIT(4) +#define LN_TX_DRV_POST_LVL_CTRL_MASK GENMASK(3, 0) +/* LANE_REG(0305) */ +#define OVRD_LN_TX_DRV_PRE_LVL_CTRL_MASK BIT(6) +#define LN_TX_DRV_PRE_LVL_CTRL_MASK GENMASK(5, 2) +/* LANE_REG(0306) */ +#define LN_ANA_TX_DRV_IDRV_IDN_CTRL_MASK GENMASK(7, 5) +#define LN_ANA_TX_DRV_IDRV_IUP_CTRL_MASK GENMASK(4, 2) +#define LN_ANA_TX_DRV_ACCDRV_EN_MASK BIT(0) +/* LANE_REG(0307) */ +#define LN_ANA_TX_DRV_ACCDRV_POL_SEL_MASK BIT(6) +#define LN_ANA_TX_DRV_ACCDRV_CTRL_MASK GENMASK(5, 3) +/* LANE_REG(030a) */ +#define LN_ANA_TX_JEQ_EN_MASK BIT(4) +#define LN_TX_JEQ_EVEN_CTRL_RBR_MASK GENMASK(3, 0) +/* LANE_REG(030b) */ +#define LN_TX_JEQ_EVEN_CTRL_HBR_MASK GENMASK(7, 4) +#define LN_TX_JEQ_EVEN_CTRL_HBR2_MASK GENMASK(3, 0) +/* LANE_REG(030c) */ +#define LN_TX_JEQ_ODD_CTRL_RBR_MASK GENMASK(3, 0) +/* LANE_REG(030d) */ +#define LN_TX_JEQ_ODD_CTRL_HBR_MASK GENMASK(7, 4) +#define LN_TX_JEQ_ODD_CTRL_HBR2_MASK GENMASK(3, 0) +/* LANE_REG(0310) */ +#define LN_ANA_TX_SYNC_LOSS_DET_MODE_MASK GENMASK(1, 0) +/* LANE_REG(0311) */ +#define LN_TX_SER_40BIT_EN_RBR_MASK BIT(3) +#define LN_TX_SER_40BIT_EN_HBR_MASK BIT(2) +#define LN_TX_SER_40BIT_EN_HBR2_MASK BIT(1) /* LANE_REG(0312) */ #define LN0_TX_SER_RATE_SEL_RBR_MASK BIT(5) #define LN0_TX_SER_RATE_SEL_HBR_MASK BIT(4) #define LN0_TX_SER_RATE_SEL_HBR2_MASK BIT(3) #define LN0_TX_SER_RATE_SEL_HBR3_MASK BIT(2) +/* LANE_REG(0316) */ +#define LN_ANA_TX_SER_VREG_GAIN_CTRL_MASK GENMASK(3, 0) +/* LANE_REG(031B) */ +#define LN_ANA_TX_RESERVED_MASK GENMASK(7, 0) +/* LANE_REG(031e) */ +#define LN_POLARITY_INV_MASK BIT(2) +#define LN_LANE_MODE_MASK BIT(1) + /* LANE_REG(0412) */ #define LN1_TX_SER_RATE_SEL_RBR_MASK BIT(5) #define LN1_TX_SER_RATE_SEL_HBR_MASK BIT(4) #define LN1_TX_SER_RATE_SEL_HBR2_MASK BIT(3) #define LN1_TX_SER_RATE_SEL_HBR3_MASK BIT(2) + /* LANE_REG(0512) */ #define LN2_TX_SER_RATE_SEL_RBR_MASK BIT(5) #define LN2_TX_SER_RATE_SEL_HBR_MASK BIT(4) #define LN2_TX_SER_RATE_SEL_HBR2_MASK BIT(3) #define LN2_TX_SER_RATE_SEL_HBR3_MASK BIT(2) + /* LANE_REG(0612) */ #define LN3_TX_SER_RATE_SEL_RBR_MASK BIT(5) #define LN3_TX_SER_RATE_SEL_HBR_MASK BIT(4) @@ -194,6 +322,12 @@ #define HDMI20_MAX_RATE 600000000 +enum dp_link_rate { + DP_BW_RBR, + DP_BW_HBR, + DP_BW_HBR2, +}; + struct lcpll_config { u32 bit_rate; u8 lcvco_mode_en; @@ -255,6 +389,19 @@ struct ropll_config { u8 cd_tx_ser_rate_sel; }; +struct tx_drv_ctrl { + u8 tx_drv_lvl_ctrl; + u8 tx_drv_post_lvl_ctrl; + u8 ana_tx_drv_idrv_idn_ctrl; + u8 ana_tx_drv_idrv_iup_ctrl; + u8 ana_tx_drv_accdrv_en; + u8 ana_tx_drv_accdrv_ctrl; + u8 tx_drv_pre_lvl_ctrl; + u8 ana_tx_jeq_en; + u8 tx_jeq_even_ctrl; + u8 tx_jeq_odd_ctrl; +}; + enum rk_hdptx_reset { RST_APB = 0, RST_INIT, @@ -290,6 +437,10 @@ struct rk_hdptx_phy { unsigned long rate; atomic_t usage_count; + + /* used for dp mode */ + unsigned int link_rate; + unsigned int lanes; }; static const struct ropll_config ropll_tmds_cfg[] = { @@ -568,6 +719,90 @@ static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = { REG_SEQ0(LANE_REG(0606), 0x1c), }; +static struct tx_drv_ctrl tx_drv_ctrl_rbr[4][4] = { + /* voltage swing 0, pre-emphasis 0->3 */ + { + { 0x2, 0x0, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0x4, 0x3, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0x7, 0x6, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xd, 0xc, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 1, pre-emphasis 0->2 */ + { + { 0x4, 0x0, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0x9, 0x5, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xc, 0x8, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 2, pre-emphasis 0->1 */ + { + { 0x8, 0x0, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0xc, 0x5, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 3, pre-emphasis 0 */ + { + { 0xb, 0x0, 0x7, 0x7, 0x1, 0x4, 0x1, 0x1, 0x7, 0x7 }, + } +}; + +static struct tx_drv_ctrl tx_drv_ctrl_hbr[4][4] = { + /* voltage swing 0, pre-emphasis 0->3 */ + { + { 0x2, 0x0, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0x5, 0x4, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0x9, 0x8, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xd, 0xc, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 1, pre-emphasis 0->2 */ + { + { 0x6, 0x1, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0xa, 0x6, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xc, 0x8, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 2, pre-emphasis 0->1 */ + { + { 0x9, 0x1, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0xd, 0x6, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 3, pre-emphasis 0 */ + { + { 0xc, 0x1, 0x7, 0x7, 0x1, 0x4, 0x1, 0x1, 0x7, 0x7 }, + } +}; + +static struct tx_drv_ctrl tx_drv_ctrl_hbr2[4][4] = { + /* voltage swing 0, pre-emphasis 0->3 */ + { + { 0x2, 0x1, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0x5, 0x4, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0x9, 0x8, 0x4, 0x6, 0x1, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xd, 0xc, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 1, pre-emphasis 0->2 */ + { + { 0x6, 0x1, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0xb, 0x7, 0x4, 0x6, 0x0, 0x4, 0x0, 0x1, 0x7, 0x7 }, + { 0xd, 0x9, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 2, pre-emphasis 0->1 */ + { + { 0x8, 0x1, 0x4, 0x6, 0x0, 0x4, 0x1, 0x1, 0x7, 0x7 }, + { 0xc, 0x6, 0x7, 0x7, 0x1, 0x7, 0x0, 0x1, 0x7, 0x7 }, + }, + + /* voltage swing 3, pre-emphasis 0 */ + { + { 0xb, 0x0, 0x7, 0x7, 0x1, 0x4, 0x1, 0x1, 0x7, 0x7 }, + } +}; + static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) { switch (reg) { @@ -854,9 +1089,45 @@ static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, return rk_hdptx_post_enable_lane(hdptx); } +static void rk_hdptx_dp_reset(struct rk_hdptx_phy *hdptx) +{ + reset_control_assert(hdptx->rsts[RST_LANE].rstc); + reset_control_assert(hdptx->rsts[RST_CMN].rstc); + reset_control_assert(hdptx->rsts[RST_INIT].rstc); + + reset_control_assert(hdptx->rsts[RST_APB].rstc); + udelay(10); + reset_control_deassert(hdptx->rsts[RST_APB].rstc); + + regmap_update_bits(hdptx->regmap, LANE_REG(0301), + OVRD_LN_TX_DRV_EI_EN_MASK | LN_TX_DRV_EI_EN_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_EI_EN_MASK, 1) | + FIELD_PREP(LN_TX_DRV_EI_EN_MASK, 0)); + regmap_update_bits(hdptx->regmap, LANE_REG(0401), + OVRD_LN_TX_DRV_EI_EN_MASK | LN_TX_DRV_EI_EN_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_EI_EN_MASK, 1) | + FIELD_PREP(LN_TX_DRV_EI_EN_MASK, 0)); + regmap_update_bits(hdptx->regmap, LANE_REG(0501), + OVRD_LN_TX_DRV_EI_EN_MASK | LN_TX_DRV_EI_EN_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_EI_EN_MASK, 1) | + FIELD_PREP(LN_TX_DRV_EI_EN_MASK, 0)); + regmap_update_bits(hdptx->regmap, LANE_REG(0601), + OVRD_LN_TX_DRV_EI_EN_MASK | LN_TX_DRV_EI_EN_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_EI_EN_MASK, 1) | + FIELD_PREP(LN_TX_DRV_EI_EN_MASK, 0)); + + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_PLL_EN << 16 | FIELD_PREP(HDPTX_I_PLL_EN, 0x0)); + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_BIAS_EN << 16 | FIELD_PREP(HDPTX_I_BIAS_EN, 0x0)); + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_BGR_EN << 16 | FIELD_PREP(HDPTX_I_BGR_EN, 0x0)); +} + static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx, unsigned int rate) { + enum phy_mode mode = phy_get_mode(hdptx->phy); u32 status; int ret; @@ -870,10 +1141,14 @@ static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx, if (status & HDPTX_O_PLL_LOCK_DONE) dev_warn(hdptx->dev, "PLL locked by unknown consumer!\n"); - if (rate) { - ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate); - if (ret) - goto dec_usage; + if (mode == PHY_MODE_DP) { + rk_hdptx_dp_reset(hdptx); + } else { + if (rate) { + ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate); + if (ret) + goto dec_usage; + } } return 0; @@ -885,6 +1160,7 @@ static int rk_hdptx_phy_consumer_get(struct rk_hdptx_phy *hdptx, static int rk_hdptx_phy_consumer_put(struct rk_hdptx_phy *hdptx, bool force) { + enum phy_mode mode = phy_get_mode(hdptx->phy); u32 status; int ret; @@ -898,8 +1174,12 @@ static int rk_hdptx_phy_consumer_put(struct rk_hdptx_phy *hdptx, bool force) } else { ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &status); if (!ret) { - if (status & HDPTX_O_PLL_LOCK_DONE) - rk_hdptx_phy_disable(hdptx); + if (status & HDPTX_O_PLL_LOCK_DONE) { + if (mode == PHY_MODE_DP) + rk_hdptx_dp_reset(hdptx); + else + rk_hdptx_phy_disable(hdptx); + } return 0; } else if (force) { return 0; @@ -910,11 +1190,262 @@ static int rk_hdptx_phy_consumer_put(struct rk_hdptx_phy *hdptx, bool force) return ret; } +static void rk_hdptx_dp_pll_init(struct rk_hdptx_phy *hdptx) +{ + regmap_update_bits(hdptx->regmap, CMN_REG(003c), ANA_LCPLL_RESERVED7_MASK, + FIELD_PREP(ANA_LCPLL_RESERVED7_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0046), + ROPLL_ANA_CPP_CTRL_COARSE_MASK | ROPLL_ANA_CPP_CTRL_FINE_MASK, + FIELD_PREP(ROPLL_ANA_CPP_CTRL_COARSE_MASK, 0xe) | + FIELD_PREP(ROPLL_ANA_CPP_CTRL_FINE_MASK, 0xe)); + regmap_update_bits(hdptx->regmap, CMN_REG(0047), + ROPLL_ANA_LPF_C_SEL_COARSE_MASK | + ROPLL_ANA_LPF_C_SEL_FINE_MASK, + FIELD_PREP(ROPLL_ANA_LPF_C_SEL_COARSE_MASK, 0x4) | + FIELD_PREP(ROPLL_ANA_LPF_C_SEL_FINE_MASK, 0x4)); + + regmap_write(hdptx->regmap, CMN_REG(0051), FIELD_PREP(ROPLL_PMS_MDIV_MASK, 0x87)); + regmap_write(hdptx->regmap, CMN_REG(0052), FIELD_PREP(ROPLL_PMS_MDIV_MASK, 0x71)); + regmap_write(hdptx->regmap, CMN_REG(0053), FIELD_PREP(ROPLL_PMS_MDIV_MASK, 0x71)); + + regmap_write(hdptx->regmap, CMN_REG(0055), + FIELD_PREP(ROPLL_PMS_MDIV_AFC_MASK, 0x87)); + regmap_write(hdptx->regmap, CMN_REG(0056), + FIELD_PREP(ROPLL_PMS_MDIV_AFC_MASK, 0x71)); + regmap_write(hdptx->regmap, CMN_REG(0057), + FIELD_PREP(ROPLL_PMS_MDIV_AFC_MASK, 0x71)); + + regmap_write(hdptx->regmap, CMN_REG(0059), + FIELD_PREP(ANA_ROPLL_PMS_PDIV_MASK, 0x1) | + FIELD_PREP(ANA_ROPLL_PMS_REFDIV_MASK, 0x1)); + regmap_write(hdptx->regmap, CMN_REG(005a), + FIELD_PREP(ROPLL_PMS_SDIV_RBR_MASK, 0x3) | + FIELD_PREP(ROPLL_PMS_SDIV_HBR_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(005b), ROPLL_PMS_SDIV_HBR2_MASK, + FIELD_PREP(ROPLL_PMS_SDIV_HBR2_MASK, 0x0)); + + regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK, + FIELD_PREP(ROPLL_SDM_EN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(005e), + OVRD_ROPLL_SDM_RSTN_MASK | ROPLL_SDM_RSTN_MASK, + FIELD_PREP(OVRD_ROPLL_SDM_RSTN_MASK, 0x1) | + FIELD_PREP(ROPLL_SDM_RSTN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDC_FRAC_EN_RBR_MASK, + FIELD_PREP(ROPLL_SDC_FRAC_EN_RBR_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDC_FRAC_EN_HBR_MASK, + FIELD_PREP(ROPLL_SDC_FRAC_EN_HBR_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDC_FRAC_EN_HBR2_MASK, + FIELD_PREP(ROPLL_SDC_FRAC_EN_HBR2_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(005f), + OVRD_ROPLL_SDC_RSTN_MASK | ROPLL_SDC_RSTN_MASK, + FIELD_PREP(OVRD_ROPLL_SDC_RSTN_MASK, 0x1) | + FIELD_PREP(ROPLL_SDC_RSTN_MASK, 0x1)); + regmap_write(hdptx->regmap, CMN_REG(0060), + FIELD_PREP(ROPLL_SDM_DENOMINATOR_MASK, 0x21)); + regmap_write(hdptx->regmap, CMN_REG(0061), + FIELD_PREP(ROPLL_SDM_DENOMINATOR_MASK, 0x27)); + regmap_write(hdptx->regmap, CMN_REG(0062), + FIELD_PREP(ROPLL_SDM_DENOMINATOR_MASK, 0x27)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0064), + ROPLL_SDM_NUM_SIGN_RBR_MASK | + ROPLL_SDM_NUM_SIGN_HBR_MASK | + ROPLL_SDM_NUM_SIGN_HBR2_MASK, + FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, 0x0) | + FIELD_PREP(ROPLL_SDM_NUM_SIGN_HBR_MASK, 0x1) | + FIELD_PREP(ROPLL_SDM_NUM_SIGN_HBR2_MASK, 0x1)); + regmap_write(hdptx->regmap, CMN_REG(0065), + FIELD_PREP(ROPLL_SDM_NUM_MASK, 0x0)); + regmap_write(hdptx->regmap, CMN_REG(0066), + FIELD_PREP(ROPLL_SDM_NUM_MASK, 0xd)); + regmap_write(hdptx->regmap, CMN_REG(0067), + FIELD_PREP(ROPLL_SDM_NUM_MASK, 0xd)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK, + FIELD_PREP(ROPLL_SDC_N_RBR_MASK, 0x2)); + + regmap_update_bits(hdptx->regmap, CMN_REG(006a), + ROPLL_SDC_N_HBR_MASK | ROPLL_SDC_N_HBR2_MASK, + FIELD_PREP(ROPLL_SDC_N_HBR_MASK, 0x1) | + FIELD_PREP(ROPLL_SDC_N_HBR2_MASK, 0x1)); + + regmap_write(hdptx->regmap, CMN_REG(006c), + FIELD_PREP(ROPLL_SDC_NUM_MASK, 0x3)); + regmap_write(hdptx->regmap, CMN_REG(006d), + FIELD_PREP(ROPLL_SDC_NUM_MASK, 0x7)); + regmap_write(hdptx->regmap, CMN_REG(006e), + FIELD_PREP(ROPLL_SDC_NUM_MASK, 0x7)); + + regmap_write(hdptx->regmap, CMN_REG(0070), + FIELD_PREP(ROPLL_SDC_DENO_MASK, 0x8)); + regmap_write(hdptx->regmap, CMN_REG(0071), + FIELD_PREP(ROPLL_SDC_DENO_MASK, 0x18)); + regmap_write(hdptx->regmap, CMN_REG(0072), + FIELD_PREP(ROPLL_SDC_DENO_MASK, 0x18)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0074), + OVRD_ROPLL_SDC_NDIV_RSTN_MASK | ROPLL_SDC_NDIV_RSTN_MASK, + FIELD_PREP(OVRD_ROPLL_SDC_NDIV_RSTN_MASK, 0x1) | + FIELD_PREP(ROPLL_SDC_NDIV_RSTN_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0077), ANA_ROPLL_SSC_CLK_DIV_SEL_MASK, + FIELD_PREP(ANA_ROPLL_SSC_CLK_DIV_SEL_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0081), ANA_PLL_CD_TX_SER_RATE_SEL_MASK, + FIELD_PREP(ANA_PLL_CD_TX_SER_RATE_SEL_MASK, 0x0)); + regmap_update_bits(hdptx->regmap, CMN_REG(0081), + ANA_PLL_CD_HSCLK_EAST_EN_MASK | ANA_PLL_CD_HSCLK_WEST_EN_MASK, + FIELD_PREP(ANA_PLL_CD_HSCLK_EAST_EN_MASK, 0x1) | + FIELD_PREP(ANA_PLL_CD_HSCLK_WEST_EN_MASK, 0x0)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0082), ANA_PLL_CD_VREG_GAIN_CTRL_MASK, + FIELD_PREP(ANA_PLL_CD_VREG_GAIN_CTRL_MASK, 0x4)); + regmap_update_bits(hdptx->regmap, CMN_REG(0083), ANA_PLL_CD_VREG_ICTRL_MASK, + FIELD_PREP(ANA_PLL_CD_VREG_ICTRL_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(0084), PLL_LCRO_CLK_SEL_MASK, + FIELD_PREP(PLL_LCRO_CLK_SEL_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(0085), ANA_PLL_SYNC_LOSS_DET_MODE_MASK, + FIELD_PREP(ANA_PLL_SYNC_LOSS_DET_MODE_MASK, 0x3)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0087), ANA_PLL_TX_HS_CLK_EN_MASK, + FIELD_PREP(ANA_PLL_TX_HS_CLK_EN_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0097), DIG_CLK_SEL_MASK, + FIELD_PREP(DIG_CLK_SEL_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0099), CMN_ROPLL_ALONE_MODE_MASK, + FIELD_PREP(CMN_ROPLL_ALONE_MODE_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(009a), HS_SPEED_SEL_MASK, + FIELD_PREP(HS_SPEED_SEL_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, CMN_REG(009b), LS_SPEED_SEL_MASK, + FIELD_PREP(LS_SPEED_SEL_MASK, 0x1)); +} + +static int rk_hdptx_dp_aux_init(struct rk_hdptx_phy *hdptx) +{ + u32 status; + int ret; + + regmap_update_bits(hdptx->regmap, SB_REG(0102), ANA_SB_RXTERM_OFFSP_MASK, + FIELD_PREP(ANA_SB_RXTERM_OFFSP_MASK, 0x3)); + regmap_update_bits(hdptx->regmap, SB_REG(0103), ANA_SB_RXTERM_OFFSN_MASK, + FIELD_PREP(ANA_SB_RXTERM_OFFSN_MASK, 0x3)); + regmap_update_bits(hdptx->regmap, SB_REG(0104), SB_AUX_EN_MASK, + FIELD_PREP(SB_AUX_EN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, SB_REG(0105), ANA_SB_TX_HLVL_PROG_MASK, + FIELD_PREP(ANA_SB_TX_HLVL_PROG_MASK, 0x7)); + regmap_update_bits(hdptx->regmap, SB_REG(0106), ANA_SB_TX_LLVL_PROG_MASK, + FIELD_PREP(ANA_SB_TX_LLVL_PROG_MASK, 0x7)); + + regmap_update_bits(hdptx->regmap, SB_REG(010d), ANA_SB_DMRX_LPBK_DATA_MASK, + FIELD_PREP(ANA_SB_DMRX_LPBK_DATA_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, SB_REG(010f), ANA_SB_VREG_GAIN_CTRL_MASK, + FIELD_PREP(ANA_SB_VREG_GAIN_CTRL_MASK, 0x0)); + regmap_update_bits(hdptx->regmap, SB_REG(0110), + ANA_SB_VREG_OUT_SEL_MASK | ANA_SB_VREG_REF_SEL_MASK, + FIELD_PREP(ANA_SB_VREG_OUT_SEL_MASK, 0x1) | + FIELD_PREP(ANA_SB_VREG_REF_SEL_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, SB_REG(0113), + SB_RX_RCAL_OPT_CODE_MASK | SB_RX_RTERM_CTRL_MASK, + FIELD_PREP(SB_RX_RCAL_OPT_CODE_MASK, 0x1) | + FIELD_PREP(SB_RX_RTERM_CTRL_MASK, 0x3)); + regmap_update_bits(hdptx->regmap, SB_REG(0114), + SB_TG_SB_EN_DELAY_TIME_MASK | SB_TG_RXTERM_EN_DELAY_TIME_MASK, + FIELD_PREP(SB_TG_SB_EN_DELAY_TIME_MASK, 0x2) | + FIELD_PREP(SB_TG_RXTERM_EN_DELAY_TIME_MASK, 0x2)); + regmap_update_bits(hdptx->regmap, SB_REG(0115), + SB_READY_DELAY_TIME_MASK | SB_TG_OSC_EN_DELAY_TIME_MASK, + FIELD_PREP(SB_READY_DELAY_TIME_MASK, 0x2) | + FIELD_PREP(SB_TG_OSC_EN_DELAY_TIME_MASK, 0x2)); + regmap_update_bits(hdptx->regmap, SB_REG(0116), + AFC_RSTN_DELAY_TIME_MASK, + FIELD_PREP(AFC_RSTN_DELAY_TIME_MASK, 0x2)); + regmap_update_bits(hdptx->regmap, SB_REG(0117), + FAST_PULSE_TIME_MASK, + FIELD_PREP(FAST_PULSE_TIME_MASK, 0x4)); + regmap_update_bits(hdptx->regmap, SB_REG(0118), + SB_TG_EARC_DMRX_RECVRD_CLK_CNT_MASK, + FIELD_PREP(SB_TG_EARC_DMRX_RECVRD_CLK_CNT_MASK, 0xa)); + + regmap_update_bits(hdptx->regmap, SB_REG(011a), SB_TG_CNT_RUN_NO_7_0_MASK, + FIELD_PREP(SB_TG_CNT_RUN_NO_7_0_MASK, 0x3)); + regmap_update_bits(hdptx->regmap, SB_REG(011b), + SB_EARC_SIG_DET_BYPASS_MASK | SB_AFC_TOL_MASK, + FIELD_PREP(SB_EARC_SIG_DET_BYPASS_MASK, 0x1) | + FIELD_PREP(SB_AFC_TOL_MASK, 0x3)); + regmap_update_bits(hdptx->regmap, SB_REG(011c), SB_AFC_STB_NUM_MASK, + FIELD_PREP(SB_AFC_STB_NUM_MASK, 0x4)); + regmap_update_bits(hdptx->regmap, SB_REG(011d), SB_TG_OSC_CNT_MIN_MASK, + FIELD_PREP(SB_TG_OSC_CNT_MIN_MASK, 0x67)); + regmap_update_bits(hdptx->regmap, SB_REG(011e), SB_TG_OSC_CNT_MAX_MASK, + FIELD_PREP(SB_TG_OSC_CNT_MAX_MASK, 0x6a)); + regmap_update_bits(hdptx->regmap, SB_REG(011f), SB_PWM_AFC_CTRL_MASK, + FIELD_PREP(SB_PWM_AFC_CTRL_MASK, 0x5)); + regmap_update_bits(hdptx->regmap, SB_REG(011f), SB_RCAL_RSTN_MASK, + FIELD_PREP(SB_RCAL_RSTN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, SB_REG(0120), SB_AUX_EN_IN_MASK, + FIELD_PREP(SB_AUX_EN_IN_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, SB_REG(0102), OVRD_SB_RXTERM_EN_MASK, + FIELD_PREP(OVRD_SB_RXTERM_EN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, SB_REG(0103), OVRD_SB_RX_RESCAL_DONE_MASK, + FIELD_PREP(OVRD_SB_RX_RESCAL_DONE_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, SB_REG(0104), OVRD_SB_EN_MASK, + FIELD_PREP(OVRD_SB_EN_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, SB_REG(0104), OVRD_SB_AUX_EN_MASK, + FIELD_PREP(OVRD_SB_AUX_EN_MASK, 0x1)); + + regmap_update_bits(hdptx->regmap, SB_REG(010f), OVRD_SB_VREG_EN_MASK, + FIELD_PREP(OVRD_SB_VREG_EN_MASK, 0x1)); + + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_BGR_EN << 16 | FIELD_PREP(HDPTX_I_BGR_EN, 0x1)); + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_BIAS_EN << 16 | FIELD_PREP(HDPTX_I_BIAS_EN, 0x1)); + usleep_range(20, 25); + + reset_control_deassert(hdptx->rsts[RST_INIT].rstc); + usleep_range(20, 25); + reset_control_deassert(hdptx->rsts[RST_CMN].rstc); + usleep_range(20, 25); + + regmap_update_bits(hdptx->regmap, SB_REG(0103), OVRD_SB_RX_RESCAL_DONE_MASK, + FIELD_PREP(OVRD_SB_RX_RESCAL_DONE_MASK, 0x1)); + usleep_range(100, 110); + regmap_update_bits(hdptx->regmap, SB_REG(0104), SB_EN_MASK, + FIELD_PREP(SB_EN_MASK, 0x1)); + usleep_range(100, 110); + regmap_update_bits(hdptx->regmap, SB_REG(0102), SB_RXTERM_EN_MASK, + FIELD_PREP(SB_RXTERM_EN_MASK, 0x1)); + usleep_range(20, 25); + regmap_update_bits(hdptx->regmap, SB_REG(010f), SB_VREG_EN_MASK, + FIELD_PREP(SB_VREG_EN_MASK, 0x1)); + usleep_range(20, 25); + regmap_update_bits(hdptx->regmap, SB_REG(0104), SB_AUX_EN_MASK, + FIELD_PREP(SB_AUX_EN_MASK, 0x1)); + usleep_range(100, 110); + + ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, + status, FIELD_GET(HDPTX_O_SB_RDY, status), + 50, 1000); + if (ret) { + dev_err(hdptx->dev, "Failed to get phy sb ready: %d\n", ret); + return ret; + } + + return 0; +} + static int rk_hdptx_phy_power_on(struct phy *phy) { struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); int bus_width = phy_get_bus_width(hdptx->phy); - int ret; + enum phy_mode mode = phy_get_mode(phy); + int ret, lane; /* * FIXME: Temporary workaround to pass pixel_clk_rate @@ -930,9 +1461,37 @@ static int rk_hdptx_phy_power_on(struct phy *phy) if (ret) return ret; - ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate); - if (ret) - rk_hdptx_phy_consumer_put(hdptx, true); + if (mode == PHY_MODE_DP) { + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_MODE_SEL << 16 | FIELD_PREP(HDPTX_MODE_SEL, 0x1)); + + for (lane = 0; lane < 4; lane++) { + regmap_update_bits(hdptx->regmap, LANE_REG(031e) + 0x400 * lane, + LN_POLARITY_INV_MASK | LN_LANE_MODE_MASK, + FIELD_PREP(LN_POLARITY_INV_MASK, 0) | + FIELD_PREP(LN_LANE_MODE_MASK, 1)); + } + + regmap_update_bits(hdptx->regmap, LNTOP_REG(0200), PROTOCOL_SEL_MASK, + FIELD_PREP(PROTOCOL_SEL_MASK, 0x0)); + regmap_update_bits(hdptx->regmap, LNTOP_REG(0206), DATA_BUS_WIDTH_MASK, + FIELD_PREP(DATA_BUS_WIDTH_MASK, 0x1)); + regmap_update_bits(hdptx->regmap, LNTOP_REG(0206), DATA_BUS_WIDTH_SEL_MASK, + FIELD_PREP(DATA_BUS_WIDTH_SEL_MASK, 0x0)); + + rk_hdptx_dp_pll_init(hdptx); + + ret = rk_hdptx_dp_aux_init(hdptx); + if (ret) + rk_hdptx_phy_consumer_put(hdptx, true); + } else { + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_MODE_SEL << 16 | FIELD_PREP(HDPTX_MODE_SEL, 0x0)); + + ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate); + if (ret) + rk_hdptx_phy_consumer_put(hdptx, true); + } return ret; } @@ -944,9 +1503,308 @@ static int rk_hdptx_phy_power_off(struct phy *phy) return rk_hdptx_phy_consumer_put(hdptx, false); } +static int rk_hdptx_phy_verify_config(struct rk_hdptx_phy *hdptx, + struct phy_configure_opts_dp *dp) +{ + int i; + + if (dp->set_rate) { + switch (dp->link_rate) { + case 1620: + case 2700: + case 5400: + break; + default: + return -EINVAL; + } + } + + if (dp->set_lanes) { + switch (dp->lanes) { + case 1: + case 2: + case 4: + break; + default: + return -EINVAL; + } + } + + if (dp->set_voltages) { + for (i = 0; i < hdptx->lanes; i++) { + if (dp->voltage[i] > 3 || dp->pre[i] > 3) + return -EINVAL; + + if (dp->voltage[i] + dp->pre[i] > 3) + return -EINVAL; + } + } + + return 0; +} + +static int rk_hdptx_phy_set_rate(struct rk_hdptx_phy *hdptx, + struct phy_configure_opts_dp *dp) +{ + u32 bw, status; + int ret; + + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_PLL_EN << 16 | FIELD_PREP(HDPTX_I_PLL_EN, 0x0)); + + switch (dp->link_rate) { + case 1620: + bw = DP_BW_RBR; + break; + case 2700: + bw = DP_BW_HBR; + break; + case 5400: + bw = DP_BW_HBR2; + break; + default: + return -EINVAL; + } + hdptx->link_rate = dp->link_rate; + + regmap_update_bits(hdptx->regmap, CMN_REG(0008), OVRD_LCPLL_EN_MASK | LCPLL_EN_MASK, + FIELD_PREP(OVRD_LCPLL_EN_MASK, 0x1) | + FIELD_PREP(LCPLL_EN_MASK, 0x0)); + + regmap_update_bits(hdptx->regmap, CMN_REG(003d), OVRD_ROPLL_EN_MASK | ROPLL_EN_MASK, + FIELD_PREP(OVRD_ROPLL_EN_MASK, 0x1) | + FIELD_PREP(ROPLL_EN_MASK, 0x1)); + + if (dp->ssc) { + regmap_update_bits(hdptx->regmap, CMN_REG(0074), + OVRD_ROPLL_SSC_EN_MASK | ROPLL_SSC_EN_MASK, + FIELD_PREP(OVRD_ROPLL_SSC_EN_MASK, 0x1) | + FIELD_PREP(ROPLL_SSC_EN_MASK, 0x1)); + regmap_write(hdptx->regmap, CMN_REG(0075), + FIELD_PREP(ANA_ROPLL_SSC_FM_DEVIATION_MASK, 0xc)); + regmap_update_bits(hdptx->regmap, CMN_REG(0076), + ANA_ROPLL_SSC_FM_FREQ_MASK, + FIELD_PREP(ANA_ROPLL_SSC_FM_FREQ_MASK, 0x1f)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0099), SSC_EN_MASK, + FIELD_PREP(SSC_EN_MASK, 0x2)); + } else { + regmap_update_bits(hdptx->regmap, CMN_REG(0074), + OVRD_ROPLL_SSC_EN_MASK | ROPLL_SSC_EN_MASK, + FIELD_PREP(OVRD_ROPLL_SSC_EN_MASK, 0x1) | + FIELD_PREP(ROPLL_SSC_EN_MASK, 0x0)); + regmap_write(hdptx->regmap, CMN_REG(0075), + FIELD_PREP(ANA_ROPLL_SSC_FM_DEVIATION_MASK, 0x20)); + regmap_update_bits(hdptx->regmap, CMN_REG(0076), + ANA_ROPLL_SSC_FM_FREQ_MASK, + FIELD_PREP(ANA_ROPLL_SSC_FM_FREQ_MASK, 0xc)); + + regmap_update_bits(hdptx->regmap, CMN_REG(0099), SSC_EN_MASK, + FIELD_PREP(SSC_EN_MASK, 0x0)); + } + + regmap_update_bits(hdptx->regmap, CMN_REG(0095), DP_TX_LINK_BW_MASK, + FIELD_PREP(DP_TX_LINK_BW_MASK, bw)); + + regmap_write(hdptx->grf, GRF_HDPTX_CON0, + HDPTX_I_PLL_EN << 16 | FIELD_PREP(HDPTX_I_PLL_EN, 0x1)); + + ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, + status, FIELD_GET(HDPTX_O_PLL_LOCK_DONE, status), + 50, 1000); + if (ret) { + dev_err(hdptx->dev, "Failed to get phy pll lock: %d\n", ret); + return ret; + } + + return 0; +} + +static int rk_hdptx_phy_set_lanes(struct rk_hdptx_phy *hdptx, + struct phy_configure_opts_dp *dp) +{ + hdptx->lanes = dp->lanes; + + regmap_update_bits(hdptx->regmap, LNTOP_REG(0207), LANE_EN_MASK, + FIELD_PREP(LANE_EN_MASK, GENMASK(hdptx->lanes - 1, 0))); + + return 0; +} + +static void rk_hdptx_phy_set_voltage(struct rk_hdptx_phy *hdptx, + struct phy_configure_opts_dp *dp, + u8 lane) +{ + const struct tx_drv_ctrl *ctrl; + u32 offset = lane * 0x400; + + switch (hdptx->link_rate) { + case 1620: + ctrl = &tx_drv_ctrl_rbr[dp->voltage[lane]][dp->pre[lane]]; + regmap_update_bits(hdptx->regmap, LANE_REG(030a) + offset, + LN_TX_JEQ_EVEN_CTRL_RBR_MASK, + FIELD_PREP(LN_TX_JEQ_EVEN_CTRL_RBR_MASK, + ctrl->tx_jeq_even_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(030c) + offset, + LN_TX_JEQ_ODD_CTRL_RBR_MASK, + FIELD_PREP(LN_TX_JEQ_ODD_CTRL_RBR_MASK, + ctrl->tx_jeq_odd_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0311) + offset, + LN_TX_SER_40BIT_EN_RBR_MASK, + FIELD_PREP(LN_TX_SER_40BIT_EN_RBR_MASK, 0x1)); + break; + case 2700: + ctrl = &tx_drv_ctrl_hbr[dp->voltage[lane]][dp->pre[lane]]; + regmap_update_bits(hdptx->regmap, LANE_REG(030b) + offset, + LN_TX_JEQ_EVEN_CTRL_HBR_MASK, + FIELD_PREP(LN_TX_JEQ_EVEN_CTRL_HBR_MASK, + ctrl->tx_jeq_even_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(030d) + offset, + LN_TX_JEQ_ODD_CTRL_HBR_MASK, + FIELD_PREP(LN_TX_JEQ_ODD_CTRL_HBR_MASK, + ctrl->tx_jeq_odd_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0311) + offset, + LN_TX_SER_40BIT_EN_HBR_MASK, + FIELD_PREP(LN_TX_SER_40BIT_EN_HBR_MASK, 0x1)); + break; + case 5400: + default: + ctrl = &tx_drv_ctrl_hbr2[dp->voltage[lane]][dp->pre[lane]]; + regmap_update_bits(hdptx->regmap, LANE_REG(030b) + offset, + LN_TX_JEQ_EVEN_CTRL_HBR2_MASK, + FIELD_PREP(LN_TX_JEQ_EVEN_CTRL_HBR2_MASK, + ctrl->tx_jeq_even_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(030d) + offset, + LN_TX_JEQ_ODD_CTRL_HBR2_MASK, + FIELD_PREP(LN_TX_JEQ_ODD_CTRL_HBR2_MASK, + ctrl->tx_jeq_odd_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0311) + offset, + LN_TX_SER_40BIT_EN_HBR2_MASK, + FIELD_PREP(LN_TX_SER_40BIT_EN_HBR2_MASK, 0x1)); + break; + } + + regmap_update_bits(hdptx->regmap, LANE_REG(0303) + offset, + OVRD_LN_TX_DRV_LVL_CTRL_MASK | LN_TX_DRV_LVL_CTRL_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_LVL_CTRL_MASK, 0x1) | + FIELD_PREP(LN_TX_DRV_LVL_CTRL_MASK, + ctrl->tx_drv_lvl_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0304) + offset, + OVRD_LN_TX_DRV_POST_LVL_CTRL_MASK | + LN_TX_DRV_POST_LVL_CTRL_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_POST_LVL_CTRL_MASK, 0x1) | + FIELD_PREP(LN_TX_DRV_POST_LVL_CTRL_MASK, + ctrl->tx_drv_post_lvl_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0305) + offset, + OVRD_LN_TX_DRV_PRE_LVL_CTRL_MASK | + LN_TX_DRV_PRE_LVL_CTRL_MASK, + FIELD_PREP(OVRD_LN_TX_DRV_PRE_LVL_CTRL_MASK, 0x1) | + FIELD_PREP(LN_TX_DRV_PRE_LVL_CTRL_MASK, + ctrl->tx_drv_pre_lvl_ctrl)); + regmap_update_bits(hdptx->regmap, LANE_REG(0306) + offset, + LN_ANA_TX_DRV_IDRV_IDN_CTRL_MASK | + LN_ANA_TX_DRV_IDRV_IUP_CTRL_MASK | + LN_ANA_TX_DRV_ACCDRV_EN_MASK, + FIELD_PREP(LN_ANA_TX_DRV_IDRV_IDN_CTRL_MASK, + ctrl->ana_tx_drv_idrv_idn_ctrl) | + FIELD_PREP(LN_ANA_TX_DRV_IDRV_IUP_CTRL_MASK, + ctrl->ana_tx_drv_idrv_iup_ctrl) | + FIELD_PREP(LN_ANA_TX_DRV_ACCDRV_EN_MASK, + ctrl->ana_tx_drv_accdrv_en)); + regmap_update_bits(hdptx->regmap, LANE_REG(0307) + offset, + LN_ANA_TX_DRV_ACCDRV_POL_SEL_MASK | + LN_ANA_TX_DRV_ACCDRV_CTRL_MASK, + FIELD_PREP(LN_ANA_TX_DRV_ACCDRV_POL_SEL_MASK, 0x1) | + FIELD_PREP(LN_ANA_TX_DRV_ACCDRV_CTRL_MASK, + ctrl->ana_tx_drv_accdrv_ctrl)); + + regmap_update_bits(hdptx->regmap, LANE_REG(030a) + offset, + LN_ANA_TX_JEQ_EN_MASK, + FIELD_PREP(LN_ANA_TX_JEQ_EN_MASK, ctrl->ana_tx_jeq_en)); + + regmap_update_bits(hdptx->regmap, LANE_REG(0310) + offset, + LN_ANA_TX_SYNC_LOSS_DET_MODE_MASK, + FIELD_PREP(LN_ANA_TX_SYNC_LOSS_DET_MODE_MASK, 0x3)); + + regmap_update_bits(hdptx->regmap, LANE_REG(0316) + offset, + LN_ANA_TX_SER_VREG_GAIN_CTRL_MASK, + FIELD_PREP(LN_ANA_TX_SER_VREG_GAIN_CTRL_MASK, 0x2)); + + regmap_update_bits(hdptx->regmap, LANE_REG(031b) + offset, + LN_ANA_TX_RESERVED_MASK, + FIELD_PREP(LN_ANA_TX_RESERVED_MASK, 0x1)); +} + +static int rk_hdptx_phy_set_voltages(struct rk_hdptx_phy *hdptx, + struct phy_configure_opts_dp *dp) +{ + u8 lane; + u32 status; + int ret; + + for (lane = 0; lane < hdptx->lanes; lane++) + rk_hdptx_phy_set_voltage(hdptx, dp, lane); + + reset_control_deassert(hdptx->rsts[RST_LANE].rstc); + + ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, + status, FIELD_GET(HDPTX_O_PHY_RDY, status), + 50, 5000); + if (ret) { + dev_err(hdptx->dev, "Failed to get phy ready: %d\n", ret); + return ret; + } + + return 0; +} + +static int rk_hdptx_phy_configure(struct phy *phy, union phy_configure_opts *opts) +{ + struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); + enum phy_mode mode = phy_get_mode(phy); + int ret; + + if (mode != PHY_MODE_DP) + return 0; + + ret = rk_hdptx_phy_verify_config(hdptx, &opts->dp); + if (ret) { + dev_err(hdptx->dev, "invalid params for phy configure\n"); + return ret; + } + + if (opts->dp.set_rate) { + ret = rk_hdptx_phy_set_rate(hdptx, &opts->dp); + if (ret) { + dev_err(hdptx->dev, "failed to set rate: %d\n", ret); + return ret; + } + } + + if (opts->dp.set_lanes) { + ret = rk_hdptx_phy_set_lanes(hdptx, &opts->dp); + if (ret) { + dev_err(hdptx->dev, "failed to set lanes: %d\n", ret); + return ret; + } + } + + if (opts->dp.set_voltages) { + ret = rk_hdptx_phy_set_voltages(hdptx, &opts->dp); + if (ret) { + dev_err(hdptx->dev, "failed to set voltages: %d\n", + ret); + return ret; + } + } + + return 0; +} + static const struct phy_ops rk_hdptx_phy_ops = { .power_on = rk_hdptx_phy_power_on, .power_off = rk_hdptx_phy_power_off, + .configure = rk_hdptx_phy_configure, .owner = THIS_MODULE, }; @@ -1188,5 +2046,6 @@ module_platform_driver(rk_hdptx_phy_driver); MODULE_AUTHOR("Algea Cao "); MODULE_AUTHOR("Cristian Ciocaltea "); +MODULE_AUTHOR("Damon Ding "); MODULE_DESCRIPTION("Samsung HDMI/eDP Transmitter Combo PHY Driver"); MODULE_LICENSE("GPL"); From 0ee54dcfe76760a65d86437f66df7f93b8b81903 Mon Sep 17 00:00:00 2001 From: Sowon Na Date: Thu, 26 Dec 2024 12:11:36 +0900 Subject: [PATCH 0196/2157] dt-bindings: phy: Add ExynosAutov920 UFS PHY bindings Add samsung,exynosautov920-ufs-phy compatible for ExynosAuto v920 SoC. Signed-off-by: Sowon Na Acked-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20241226031142.1764652-2-sowon.na@samsung.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml index f402e31bf58d..d70ffeb6e824 100644 --- a/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml @@ -18,6 +18,7 @@ properties: - google,gs101-ufs-phy - samsung,exynos7-ufs-phy - samsung,exynosautov9-ufs-phy + - samsung,exynosautov920-ufs-phy - tesla,fsd-ufs-phy reg: From d2317767723b63d28e3b93da92760b7934935536 Mon Sep 17 00:00:00 2001 From: Sowon Na Date: Thu, 26 Dec 2024 12:11:37 +0900 Subject: [PATCH 0197/2157] phy: samsung-ufs: support ExynosAutov920 ufs phy driver Add support for ExynosAutov920 ufs phy driver. Signed-off-by: Sowon Na Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20241226031142.1764652-3-sowon.na@samsung.com Signed-off-by: Vinod Koul --- drivers/phy/samsung/Makefile | 1 + drivers/phy/samsung/phy-exynosautov920-ufs.c | 168 +++++++++++++++++++ drivers/phy/samsung/phy-samsung-ufs.c | 9 +- drivers/phy/samsung/phy-samsung-ufs.h | 4 + 4 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 drivers/phy/samsung/phy-exynosautov920-ufs.c diff --git a/drivers/phy/samsung/Makefile b/drivers/phy/samsung/Makefile index fea1f96d0e43..342682638a87 100644 --- a/drivers/phy/samsung/Makefile +++ b/drivers/phy/samsung/Makefile @@ -7,6 +7,7 @@ phy-exynos-ufs-y += phy-gs101-ufs.o phy-exynos-ufs-y += phy-samsung-ufs.o phy-exynos-ufs-y += phy-exynos7-ufs.o phy-exynos-ufs-y += phy-exynosautov9-ufs.o +phy-exynos-ufs-y += phy-exynosautov920-ufs.o phy-exynos-ufs-y += phy-fsd-ufs.o obj-$(CONFIG_PHY_SAMSUNG_USB2) += phy-exynos-usb2.o phy-exynos-usb2-y += phy-samsung-usb2.o diff --git a/drivers/phy/samsung/phy-exynosautov920-ufs.c b/drivers/phy/samsung/phy-exynosautov920-ufs.c new file mode 100644 index 000000000000..21ef79c42f95 --- /dev/null +++ b/drivers/phy/samsung/phy-exynosautov920-ufs.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * UFS PHY driver data for Samsung ExynosAuto v920 SoC + * + * Copyright (C) 2024 Samsung Electronics Co., Ltd. + */ + +#include "phy-samsung-ufs.h" + +#define EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL 0x708 +#define EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL_MASK 0x1 +#define EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL_EN BIT(0) +#define EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CDR_LOCK_STATUS 0x5e + +#define EXYNOSAUTOV920_CDR_LOCK_OFFSET 0xce4 + +#define PHY_EXYNOSAUTOV920_LANE_OFFSET 0x200 +#define PHY_TRSV_REG_CFG_AUTOV920(o, v, d) \ + PHY_TRSV_REG_CFG_OFFSET(o, v, d, PHY_EXYNOSAUTOV920_LANE_OFFSET) + +/* Calibration for phy initialization */ +static const struct samsung_ufs_phy_cfg exynosautov920_pre_init_cfg[] = { + PHY_COMN_REG_CFG(0x29, 0x22, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x43, 0x10, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x3c, 0x14, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x46, 0x48, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x04, 0x95, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x06, 0x30, PWR_MODE_ANY), + + PHY_TRSV_REG_CFG_AUTOV920(0x200, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x201, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x202, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x203, 0x0a, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x204, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x205, 0x10, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x207, 0x0c, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2e1, 0xc0, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x22d, 0xf8, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x234, 0x60, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x238, 0x13, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x239, 0x48, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23a, 0x01, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23b, 0x29, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23c, 0x2a, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23d, 0x01, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23e, 0x14, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x23f, 0x13, PWR_MODE_ANY), + + PHY_TRSV_REG_CFG_AUTOV920(0x240, 0x4a, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x243, 0x40, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x244, 0x02, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x25d, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x25e, 0x3f, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x25f, 0xff, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x26f, 0xf0, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x273, 0x33, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x274, 0x50, PWR_MODE_ANY), + + PHY_TRSV_REG_CFG_AUTOV920(0x284, 0x02, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x285, 0x02, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2a2, 0x04, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x27d, 0x01, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2fa, 0x01, PWR_MODE_ANY), + + PHY_TRSV_REG_CFG_AUTOV920(0x286, 0x03, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x287, 0x03, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x288, 0x03, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x289, 0x03, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2b3, 0x04, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2b6, 0x0b, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2b7, 0x0b, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2b8, 0x0b, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2b9, 0x0b, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2ba, 0x0b, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2bb, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2bc, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2bd, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x2be, 0x06, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x34b, 0x01, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x34c, 0x24, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x34d, 0x23, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x34e, 0x45, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x34f, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x350, 0x31, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x351, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x352, 0x02, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x353, 0x00, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x354, 0x01, PWR_MODE_ANY), + + PHY_COMN_REG_CFG(0x43, 0x18, PWR_MODE_ANY), + PHY_COMN_REG_CFG(0x43, 0x00, PWR_MODE_ANY), + + END_UFS_PHY_CFG, +}; + +/* Calibration for HS mode series A/B */ +static const struct samsung_ufs_phy_cfg exynosautov920_pre_pwr_hs_cfg[] = { + PHY_TRSV_REG_CFG_AUTOV920(0x369, 0x11, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x246, 0x03, PWR_MODE_ANY), + + END_UFS_PHY_CFG, +}; + +static const struct samsung_ufs_phy_cfg exynosautov920_post_pwr_hs_cfg[] = { + END_UFS_PHY_CFG, +}; + +#define DELAY_IN_US 40 +#define RETRY_CNT 100 +#define EXYNOSAUTOV920_CDR_LOCK_MASK 0x8 + +int exynosautov920_ufs_phy_wait_cdr_lock(struct phy *phy, u8 lane) +{ + struct samsung_ufs_phy *ufs_phy = get_samsung_ufs_phy(phy); + u32 reg, i; + + struct samsung_ufs_phy_cfg cfg[4] = { + PHY_TRSV_REG_CFG_AUTOV920(0x222, 0x10, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x222, 0x18, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV920(0x246, 0x01, PWR_MODE_ANY), + END_UFS_PHY_CFG, + }; + + for (i = 0; i < RETRY_CNT; i++) { + udelay(DELAY_IN_US); + + reg = readl(ufs_phy->reg_pma + EXYNOSAUTOV920_CDR_LOCK_OFFSET + + (PHY_APB_ADDR(PHY_EXYNOSAUTOV920_LANE_OFFSET) * lane)); + + if ((reg & EXYNOSAUTOV920_CDR_LOCK_MASK) + == EXYNOSAUTOV920_CDR_LOCK_MASK) { + samsung_ufs_phy_config(ufs_phy, &cfg[2], lane); + return 0; + } + + udelay(DELAY_IN_US); + + /* Disable and enable CDR */ + samsung_ufs_phy_config(ufs_phy, &cfg[0], lane); + samsung_ufs_phy_config(ufs_phy, &cfg[1], lane); + } + + dev_err(ufs_phy->dev, "failed to get phy cdr lock\n"); + return -ETIMEDOUT; +} + +static const struct samsung_ufs_phy_cfg *exynosautov920_ufs_phy_cfgs[CFG_TAG_MAX] = { + [CFG_PRE_INIT] = exynosautov920_pre_init_cfg, + [CFG_PRE_PWR_HS] = exynosautov920_pre_pwr_hs_cfg, + [CFG_POST_PWR_HS] = exynosautov920_post_pwr_hs_cfg, +}; + +static const char * const exynosautov920_ufs_phy_clks[] = { + "ref_clk", +}; + +const struct samsung_ufs_phy_drvdata exynosautov920_ufs_phy = { + .cfgs = exynosautov920_ufs_phy_cfgs, + .isol = { + .offset = EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL, + .mask = EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL_MASK, + .en = EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CTRL_EN, + }, + .clk_list = exynosautov920_ufs_phy_clks, + .num_clks = ARRAY_SIZE(exynosautov920_ufs_phy_clks), + .cdr_lock_status_offset = EXYNOSAUTOV920_EMBEDDED_COMBO_PHY_CDR_LOCK_STATUS, + .wait_for_cdr = exynosautov920_ufs_phy_wait_cdr_lock, +}; diff --git a/drivers/phy/samsung/phy-samsung-ufs.c b/drivers/phy/samsung/phy-samsung-ufs.c index 8e9ccd39f97e..f3cbe6b17b23 100644 --- a/drivers/phy/samsung/phy-samsung-ufs.c +++ b/drivers/phy/samsung/phy-samsung-ufs.c @@ -28,9 +28,9 @@ #define PHY_DEF_LANE_CNT 1 -static void samsung_ufs_phy_config(struct samsung_ufs_phy *phy, - const struct samsung_ufs_phy_cfg *cfg, - u8 lane) +void samsung_ufs_phy_config(struct samsung_ufs_phy *phy, + const struct samsung_ufs_phy_cfg *cfg, + u8 lane) { enum {LANE_0, LANE_1}; /* lane index */ @@ -323,6 +323,9 @@ static const struct of_device_id samsung_ufs_phy_match[] = { }, { .compatible = "samsung,exynosautov9-ufs-phy", .data = &exynosautov9_ufs_phy, + }, { + .compatible = "samsung,exynosautov920-ufs-phy", + .data = &exynosautov920_ufs_phy, }, { .compatible = "tesla,fsd-ufs-phy", .data = &fsd_ufs_phy, diff --git a/drivers/phy/samsung/phy-samsung-ufs.h b/drivers/phy/samsung/phy-samsung-ufs.h index 9b7deef6e10f..a28f148081d1 100644 --- a/drivers/phy/samsung/phy-samsung-ufs.h +++ b/drivers/phy/samsung/phy-samsung-ufs.h @@ -143,9 +143,13 @@ static inline void samsung_ufs_phy_ctrl_isol( } int samsung_ufs_phy_wait_for_lock_acq(struct phy *phy, u8 lane); +int exynosautov920_ufs_phy_wait_cdr_lock(struct phy *phy, u8 lane); +void samsung_ufs_phy_config(struct samsung_ufs_phy *phy, + const struct samsung_ufs_phy_cfg *cfg, u8 lane); extern const struct samsung_ufs_phy_drvdata exynos7_ufs_phy; extern const struct samsung_ufs_phy_drvdata exynosautov9_ufs_phy; +extern const struct samsung_ufs_phy_drvdata exynosautov920_ufs_phy; extern const struct samsung_ufs_phy_drvdata fsd_ufs_phy; extern const struct samsung_ufs_phy_drvdata tensor_gs101_ufs_phy; From dcba69711fff8af4370cb4c00460db0bf47c7093 Mon Sep 17 00:00:00 2001 From: Jameson Thies Date: Tue, 4 Feb 2025 02:45:58 +0000 Subject: [PATCH 0198/2157] platform/chrome: add PD_EVENT_INIT bit definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update cros_ec_commands.h to include a definition for PD_EVENT_INIT. On platforms supporting UCSI, this host event type is sent when the PPM initializes. Signed-off-by: Jameson Thies Reviewed-by: Benson Leung Acked-by: Tzung-Bi Shih Reviewed-by: Łukasz Bartosik Link: https://lore.kernel.org/r/20250204024600.4138776-2-jthies@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/cros_ec_commands.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ecf290a0c98f..1f4e4f2b89bb 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5046,6 +5046,7 @@ struct ec_response_pd_status { #define PD_EVENT_DATA_SWAP BIT(3) #define PD_EVENT_TYPEC BIT(4) #define PD_EVENT_PPM BIT(5) +#define PD_EVENT_INIT BIT(6) struct ec_response_host_event_status { uint32_t status; /* PD MCU host event status */ From 7f7283183c62411ea50730b3d0728fedd9aceb21 Mon Sep 17 00:00:00 2001 From: Jameson Thies Date: Tue, 4 Feb 2025 02:45:59 +0000 Subject: [PATCH 0199/2157] usb: typec: ucsi: resume work after EC init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A manual EC sysjump will restart the PPM and break communication with the UCSI driver by disabling notifications in the initial PPM state. Update cros_ec_ucsi to listen for PPM init events and treat them as a system resume to re-establish communication with the PPM (ChromeOS EC). Signed-off-by: Jameson Thies Reviewed-by: Łukasz Bartosik Reviewed-by: Benson Leung Link: https://lore.kernel.org/r/20250204024600.4138776-3-jthies@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/cros_ec_ucsi.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c index 744f0709a40e..4ec1c6d22310 100644 --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c @@ -206,12 +206,19 @@ static int cros_ucsi_event(struct notifier_block *nb, { struct cros_ucsi_data *udata = container_of(nb, struct cros_ucsi_data, nb); - if (!(host_event & PD_EVENT_PPM)) - return NOTIFY_OK; + if (host_event & PD_EVENT_INIT) { + /* Late init event received from ChromeOS EC. Treat this as a + * system resume to re-enable communication with the PPM. + */ + dev_dbg(udata->dev, "Late PD init received\n"); + ucsi_resume(udata->ucsi); + } - dev_dbg(udata->dev, "UCSI notification received\n"); - flush_work(&udata->work); - schedule_work(&udata->work); + if (host_event & PD_EVENT_PPM) { + dev_dbg(udata->dev, "UCSI notification received\n"); + flush_work(&udata->work); + schedule_work(&udata->work); + } return NOTIFY_OK; } From 8bc8a32a280410f68eae99e1ec623ac87586cb1f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 21:28:01 +0200 Subject: [PATCH 0200/2157] dt-bindings: usb: dwc3: Add a property to reserve endpoints Some of the endpoints may be reserved by hardware for different purposes, e.g., tracing control and output. This is the case, for instance, on Intel Merrifield and Moorefield platforms that reserve a few and USB driver may not use them for the regular transfers. Add the respective bindings. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250212193116.2487289-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/snps,dwc3-common.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml b/Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml index c956053fd036..71249b6ba616 100644 --- a/Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml +++ b/Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml @@ -65,6 +65,17 @@ properties: mode. type: boolean + snps,reserved-endpoints: + description: + Reserve endpoints for other needs, e.g, for tracing control and output. + When set, the driver will avoid using them for the regular USB transfers. + $ref: /schemas/types.yaml#/definitions/uint8-array + minItems: 1 + maxItems: 30 + items: + minimum: 2 + maximum: 31 + snps,dis-start-transfer-quirk: description: When set, disable isoc START TRANSFER command failure SW work-around From eafba0205426091354f050381c32ad1567c35844 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 21:28:02 +0200 Subject: [PATCH 0201/2157] usb: dwc3: gadget: Refactor loop to avoid NULL endpoints Prepare the gadget driver to handle the reserved endpoints that will be not allocated at the initialisation time. While at it, add a warning where the NULL endpoint should never happen. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250212193116.2487289-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d27af65eb08a..73cebb7d90c2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -547,6 +547,7 @@ static int dwc3_gadget_set_xfer_resource(struct dwc3_ep *dep) int dwc3_gadget_start_config(struct dwc3 *dwc, unsigned int resource_index) { struct dwc3_gadget_ep_cmd_params params; + struct dwc3_ep *dep; u32 cmd; int i; int ret; @@ -563,8 +564,13 @@ int dwc3_gadget_start_config(struct dwc3 *dwc, unsigned int resource_index) return ret; /* Reset resource allocation flags */ - for (i = resource_index; i < dwc->num_eps && dwc->eps[i]; i++) - dwc->eps[i]->flags &= ~DWC3_EP_RESOURCE_ALLOCATED; + for (i = resource_index; i < dwc->num_eps; i++) { + dep = dwc->eps[i]; + if (!dep) + continue; + + dep->flags &= ~DWC3_EP_RESOURCE_ALLOCATED; + } return 0; } @@ -751,9 +757,11 @@ void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc) dwc->last_fifo_depth = fifo_depth; /* Clear existing TXFIFO for all IN eps except ep0 */ - for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM); - num += 2) { + for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM); num += 2) { dep = dwc->eps[num]; + if (!dep) + continue; + /* Don't change TXFRAMNUM on usb31 version */ size = DWC3_IP_IS(DWC3) ? 0 : dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1)) & @@ -3669,6 +3677,8 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) { dep = dwc->eps[i]; + if (!dep) + continue; if (!(dep->flags & DWC3_EP_ENABLED)) continue; @@ -3818,6 +3828,10 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, u8 epnum = event->endpoint_number; dep = dwc->eps[epnum]; + if (!dep) { + dev_warn(dwc->dev, "spurious event, endpoint %u is not allocated\n", epnum); + return; + } if (!(dep->flags & DWC3_EP_ENABLED)) { if ((epnum > 1) && !(dep->flags & DWC3_EP_TRANSFER_STARTED)) From 5425191f85fea2e10248c699d64382999cb78c34 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 21:28:03 +0200 Subject: [PATCH 0202/2157] usb: dwc3: gadget: Add support for snps,reserved-endpoints property The snps,reserved-endpoints property lists the reserved endpoints that shouldn't be used for normal transfers. Add support for that to the driver. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250212193116.2487289-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 41 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 73cebb7d90c2..f3ad8434366e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3403,14 +3403,53 @@ static int dwc3_gadget_init_endpoint(struct dwc3 *dwc, u8 epnum) return 0; } +static int dwc3_gadget_get_reserved_endpoints(struct dwc3 *dwc, const char *propname, + u8 *eps, u8 num) +{ + u8 count; + int ret; + + if (!device_property_present(dwc->dev, propname)) + return 0; + + ret = device_property_count_u8(dwc->dev, propname); + if (ret < 0) + return ret; + count = ret; + + ret = device_property_read_u8_array(dwc->dev, propname, eps, min(num, count)); + if (ret) + return ret; + + return count; +} + static int dwc3_gadget_init_endpoints(struct dwc3 *dwc, u8 total) { + const char *propname = "snps,reserved-endpoints"; u8 epnum; + u8 reserved_eps[DWC3_ENDPOINTS_NUM]; + u8 count; + u8 num; + int ret; INIT_LIST_HEAD(&dwc->gadget->ep_list); + ret = dwc3_gadget_get_reserved_endpoints(dwc, propname, + reserved_eps, ARRAY_SIZE(reserved_eps)); + if (ret < 0) { + dev_err(dwc->dev, "failed to read %s\n", propname); + return ret; + } + count = ret; + for (epnum = 0; epnum < total; epnum++) { - int ret; + for (num = 0; num < count; num++) { + if (epnum == reserved_eps[num]) + break; + } + if (num < count) + continue; ret = dwc3_gadget_init_endpoint(dwc, epnum); if (ret) From 461f24bff86808ee5fbfe74751a825f8a7ab24e0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 21:28:04 +0200 Subject: [PATCH 0203/2157] usb: dwc3: gadget: Avoid using reserved endpoints on Intel Merrifield MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel Merrifield SoC uses these endpoints for tracing and they cannot be re-allocated if being used because the side band flow control signals are hard wired to certain endpoints: • 1 High BW Bulk IN (IN#1) (RTIT) • 1 1KB BW Bulk IN (IN#8) + 1 1KB BW Bulk OUT (Run Control) (OUT#8) In device mode, since RTIT (EP#1) and EXI/RunControl (EP#8) uses External Buffer Control (EBC) mode, these endpoints are to be mapped to EBC mode (to be done by EXI target driver). Additionally TRB for RTIT and EXI are maintained in STM (System Trace Module) unit and the EXI target driver will as well configure the TRB location for EP #1 IN and EP#8 (IN and OUT). Since STM/PTI and EXI hardware blocks manage these endpoints and interface to OTG3 controller through EBC interface, there is no need to enable any events (such as XferComplete etc) for these end points. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250212193116.2487289-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 052852f80146..54a4ee2b90b7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -148,11 +148,21 @@ static const struct property_entry dwc3_pci_intel_byt_properties[] = { {} }; +/* + * Intel Merrifield SoC uses these endpoints for tracing and they cannot + * be re-allocated if being used because the side band flow control signals + * are hard wired to certain endpoints: + * - 1 High BW Bulk IN (IN#1) (RTIT) + * - 1 1KB BW Bulk IN (IN#8) + 1 1KB BW Bulk OUT (Run Control) (OUT#8) + */ +static const u8 dwc3_pci_mrfld_reserved_endpoints[] = { 3, 16, 17 }; + static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("dr_mode", "otg"), PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_U8_ARRAY("snps,reserved-endpoints", dwc3_pci_mrfld_reserved_endpoints), PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"), PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), {} From 07959ad5775f0fbbb8de9eb230d1b364b9800893 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 Feb 2025 21:29:13 +0100 Subject: [PATCH 0204/2157] USB: dwc3: Use syscon_regmap_lookup_by_phandle_args Use syscon_regmap_lookup_by_phandle_args() which is a wrapper over syscon_regmap_lookup_by_phandle() combined with getting the syscon argument. Except simpler code this annotates within one line that given phandle has arguments, so grepping for code would be easier. Signed-off-by: Krzysztof Kozlowski Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250212202913.23443-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-am62.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index c158364bc03e..9db8f3ca493d 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -153,11 +153,11 @@ static int phy_syscon_pll_refclk(struct dwc3_am62 *am62) { struct device *dev = am62->dev; struct device_node *node = dev->of_node; - struct of_phandle_args args; struct regmap *syscon; int ret; - syscon = syscon_regmap_lookup_by_phandle(node, "ti,syscon-phy-pll-refclk"); + syscon = syscon_regmap_lookup_by_phandle_args(node, "ti,syscon-phy-pll-refclk", + 1, &am62->offset); if (IS_ERR(syscon)) { dev_err(dev, "unable to get ti,syscon-phy-pll-refclk regmap\n"); return PTR_ERR(syscon); @@ -165,14 +165,6 @@ static int phy_syscon_pll_refclk(struct dwc3_am62 *am62) am62->syscon = syscon; - ret = of_parse_phandle_with_fixed_args(node, "ti,syscon-phy-pll-refclk", 1, - 0, &args); - if (ret) - return ret; - - of_node_put(args.np); - am62->offset = args.args[0]; - /* Core voltage. PHY_CORE_VOLTAGE bit Recommended to be 0 always */ ret = regmap_update_bits(am62->syscon, am62->offset, PHY_CORE_VOLTAGE_MASK, 0); if (ret) { From d73ddefaf97805dd2c549f2301785edb0a7a2147 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 6 Feb 2025 11:28:27 +0200 Subject: [PATCH 0205/2157] dt-bindings: usb: Add Parade PS8830 Type-C retimer bindings The Parade PS8830 is a USB4, DisplayPort and Thunderbolt 4 retimer, controlled over I2C. It usually sits between a USB/DisplayPort PHY and the Type-C connector, and provides orientation and altmode handling. Currently, it is found on all boards featuring the Qualcomm Snapdragon X Elite SoCs. Document bindings for its new driver. Future-proof the schema for the PS8833 variant, which seems to be similar to PS8830. Signed-off-by: Abel Vesa Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20250206-x1e80100-ps8830-v6-1-60b1e49cfa8d@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/parade,ps8830.yaml | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 Documentation/devicetree/bindings/usb/parade,ps8830.yaml diff --git a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml new file mode 100644 index 000000000000..935d57f5d26f --- /dev/null +++ b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/parade,ps8830.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Parade PS883x USB and DisplayPort Retimer + +maintainers: + - Abel Vesa + +properties: + compatible: + enum: + - parade,ps8830 + + reg: + maxItems: 1 + + clocks: + items: + - description: XO Clock + + reset-gpios: + maxItems: 1 + + vdd-supply: + description: power supply (1.07V) + + vdd33-supply: + description: power supply (3.3V) + + vdd33-cap-supply: + description: power supply (3.3V) + + vddar-supply: + description: power supply (1.07V) + + vddat-supply: + description: power supply (1.07V) + + vddio-supply: + description: power supply (1.2V or 1.8V) + + orientation-switch: true + retimer-switch: true + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: Super Speed (SS) Output endpoint to the Type-C connector + + port@1: + $ref: /schemas/graph.yaml#/$defs/port-base + description: Super Speed (SS) Input endpoint from the Super-Speed PHY + unevaluatedProperties: false + + port@2: + $ref: /schemas/graph.yaml#/properties/port + description: + Sideband Use (SBU) AUX lines endpoint to the Type-C connector for the purpose of + handling altmode muxing and orientation switching. + +required: + - compatible + - reg + - clocks + - reset-gpios + - vdd-supply + - vdd33-supply + - vdd33-cap-supply + - vddat-supply + - vddio-supply + - orientation-switch + - retimer-switch + +allOf: + - $ref: usb-switch.yaml# + +additionalProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + typec-mux@8 { + compatible = "parade,ps8830"; + reg = <0x8>; + + clocks = <&clk_rtmr_xo>; + + vdd-supply = <&vreg_rtmr_1p15>; + vdd33-supply = <&vreg_rtmr_3p3>; + vdd33-cap-supply = <&vreg_rtmr_3p3>; + vddar-supply = <&vreg_rtmr_1p15>; + vddat-supply = <&vreg_rtmr_1p15>; + vddio-supply = <&vreg_rtmr_1p8>; + + reset-gpios = <&tlmm 10 GPIO_ACTIVE_LOW>; + + retimer-switch; + orientation-switch; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + endpoint { + remote-endpoint = <&typec_con_ss>; + }; + }; + + port@1 { + reg = <1>; + + endpoint { + remote-endpoint = <&usb_phy_ss>; + }; + }; + + port@2 { + reg = <2>; + + endpoint { + remote-endpoint = <&typec_dp_aux>; + }; + }; + }; + }; + }; +... From 257a087c8b5206e046048de6053fc8b3fa1af814 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 6 Feb 2025 11:28:28 +0200 Subject: [PATCH 0206/2157] usb: typec: Add support for Parade PS8830 Type-C Retimer The Parade PS8830 is a USB4, DisplayPort and Thunderbolt 4 retimer, controlled over I2C. It usually sits between a USB/DisplayPort PHY and the Type-C connector, and provides orientation and altmode handling. The boards that use this retimer are the ones featuring the Qualcomm Snapdragon X Elite SoCs. Add a driver with support for the following modes: - DisplayPort 4-lanes - DisplayPort 2-lanes + USB3 - USB3 There is another variant of this retimer which is called PS8833. It seems to be really similar to the PS8830, so future-proof this driver by naming it ps883x. Signed-off-by: Abel Vesa Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250206-x1e80100-ps8830-v6-2-60b1e49cfa8d@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/Kconfig | 10 + drivers/usb/typec/mux/Makefile | 1 + drivers/usb/typec/mux/ps883x.c | 437 +++++++++++++++++++++++++++++++++ 3 files changed, 448 insertions(+) create mode 100644 drivers/usb/typec/mux/ps883x.c diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig index 67381b4ef4f6..6dd8f961b593 100644 --- a/drivers/usb/typec/mux/Kconfig +++ b/drivers/usb/typec/mux/Kconfig @@ -56,6 +56,16 @@ config TYPEC_MUX_NB7VPQ904M Say Y or M if your system has a On Semiconductor NB7VPQ904M Type-C redriver chip found on some devices with a Type-C port. +config TYPEC_MUX_PS883X + tristate "Parade PS883x Type-C retimer driver" + depends on I2C + depends on DRM || DRM=n + select DRM_AUX_BRIDGE if DRM_BRIDGE && OF + select REGMAP_I2C + help + Say Y or M if your system has a Parade PS883x Type-C retimer chip + found on some devices with a Type-C port. + config TYPEC_MUX_PTN36502 tristate "NXP PTN36502 Type-C redriver driver" depends on I2C diff --git a/drivers/usb/typec/mux/Makefile b/drivers/usb/typec/mux/Makefile index 60879446da93..b4f599eb5053 100644 --- a/drivers/usb/typec/mux/Makefile +++ b/drivers/usb/typec/mux/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_TYPEC_MUX_PI3USB30532) += pi3usb30532.o obj-$(CONFIG_TYPEC_MUX_INTEL_PMC) += intel_pmc_mux.o obj-$(CONFIG_TYPEC_MUX_IT5205) += it5205.o obj-$(CONFIG_TYPEC_MUX_NB7VPQ904M) += nb7vpq904m.o +obj-$(CONFIG_TYPEC_MUX_PS883X) += ps883x.o obj-$(CONFIG_TYPEC_MUX_PTN36502) += ptn36502.o obj-$(CONFIG_TYPEC_MUX_TUSB1046) += tusb1046.o obj-$(CONFIG_TYPEC_MUX_WCD939X_USBSS) += wcd939x-usbss.o diff --git a/drivers/usb/typec/mux/ps883x.c b/drivers/usb/typec/mux/ps883x.c new file mode 100644 index 000000000000..10e407ab6b7f --- /dev/null +++ b/drivers/usb/typec/mux/ps883x.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Parade ps883x usb retimer driver + * + * Copyright (C) 2024 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REG_USB_PORT_CONN_STATUS_0 0x00 + +#define CONN_STATUS_0_CONNECTION_PRESENT BIT(0) +#define CONN_STATUS_0_ORIENTATION_REVERSED BIT(1) +#define CONN_STATUS_0_USB_3_1_CONNECTED BIT(5) + +#define REG_USB_PORT_CONN_STATUS_1 0x01 + +#define CONN_STATUS_1_DP_CONNECTED BIT(0) +#define CONN_STATUS_1_DP_SINK_REQUESTED BIT(1) +#define CONN_STATUS_1_DP_PIN_ASSIGNMENT_C_D BIT(2) +#define CONN_STATUS_1_DP_HPD_LEVEL BIT(7) + +#define REG_USB_PORT_CONN_STATUS_2 0x02 + +struct ps883x_retimer { + struct i2c_client *client; + struct gpio_desc *reset_gpio; + struct regmap *regmap; + struct typec_switch_dev *sw; + struct typec_retimer *retimer; + struct clk *xo_clk; + struct regulator *vdd_supply; + struct regulator *vdd33_supply; + struct regulator *vdd33_cap_supply; + struct regulator *vddat_supply; + struct regulator *vddar_supply; + struct regulator *vddio_supply; + + struct typec_switch *typec_switch; + struct typec_mux *typec_mux; + + struct mutex lock; /* protect non-concurrent retimer & switch */ + + enum typec_orientation orientation; + unsigned long mode; + unsigned int svid; +}; + +static void ps883x_configure(struct ps883x_retimer *retimer, int cfg0, + int cfg1, int cfg2) +{ + regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, cfg0); + regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_1, cfg1); + regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_2, cfg2); +} + +static int ps883x_set(struct ps883x_retimer *retimer) +{ + int cfg0 = CONN_STATUS_0_CONNECTION_PRESENT; + int cfg1 = 0x00; + int cfg2 = 0x00; + + if (retimer->orientation == TYPEC_ORIENTATION_NONE || + retimer->mode == TYPEC_STATE_SAFE) { + ps883x_configure(retimer, cfg0, cfg1, cfg2); + return 0; + } + + if (retimer->mode != TYPEC_STATE_USB && retimer->svid != USB_TYPEC_DP_SID) + return -EINVAL; + + if (retimer->orientation == TYPEC_ORIENTATION_REVERSE) + cfg0 |= CONN_STATUS_0_ORIENTATION_REVERSED; + + switch (retimer->mode) { + case TYPEC_STATE_USB: + cfg0 |= CONN_STATUS_0_USB_3_1_CONNECTED; + break; + + case TYPEC_DP_STATE_C: + cfg1 = CONN_STATUS_1_DP_CONNECTED | + CONN_STATUS_1_DP_SINK_REQUESTED | + CONN_STATUS_1_DP_PIN_ASSIGNMENT_C_D | + CONN_STATUS_1_DP_HPD_LEVEL; + break; + + case TYPEC_DP_STATE_D: + cfg0 |= CONN_STATUS_0_USB_3_1_CONNECTED; + cfg1 = CONN_STATUS_1_DP_CONNECTED | + CONN_STATUS_1_DP_SINK_REQUESTED | + CONN_STATUS_1_DP_PIN_ASSIGNMENT_C_D | + CONN_STATUS_1_DP_HPD_LEVEL; + break; + + case TYPEC_DP_STATE_E: + cfg1 = CONN_STATUS_1_DP_CONNECTED | + CONN_STATUS_1_DP_HPD_LEVEL; + break; + + default: + return -EOPNOTSUPP; + } + + ps883x_configure(retimer, cfg0, cfg1, cfg2); + + return 0; +} + +static int ps883x_sw_set(struct typec_switch_dev *sw, + enum typec_orientation orientation) +{ + struct ps883x_retimer *retimer = typec_switch_get_drvdata(sw); + int ret = 0; + + ret = typec_switch_set(retimer->typec_switch, orientation); + if (ret) + return ret; + + mutex_lock(&retimer->lock); + + if (retimer->orientation != orientation) { + retimer->orientation = orientation; + + ret = ps883x_set(retimer); + } + + mutex_unlock(&retimer->lock); + + return ret; +} + +static int ps883x_retimer_set(struct typec_retimer *rtmr, + struct typec_retimer_state *state) +{ + struct ps883x_retimer *retimer = typec_retimer_get_drvdata(rtmr); + struct typec_mux_state mux_state; + int ret = 0; + + mutex_lock(&retimer->lock); + + if (state->mode != retimer->mode) { + retimer->mode = state->mode; + + if (state->alt) + retimer->svid = state->alt->svid; + else + retimer->svid = 0; + + ret = ps883x_set(retimer); + } + + mutex_unlock(&retimer->lock); + + if (ret) + return ret; + + mux_state.alt = state->alt; + mux_state.data = state->data; + mux_state.mode = state->mode; + + return typec_mux_set(retimer->typec_mux, &mux_state); +} + +static int ps883x_enable_vregs(struct ps883x_retimer *retimer) +{ + struct device *dev = &retimer->client->dev; + int ret; + + ret = regulator_enable(retimer->vdd33_supply); + if (ret) { + dev_err(dev, "cannot enable VDD 3.3V regulator: %d\n", ret); + return ret; + } + + ret = regulator_enable(retimer->vdd33_cap_supply); + if (ret) { + dev_err(dev, "cannot enable VDD 3.3V CAP regulator: %d\n", ret); + goto err_vdd33_disable; + } + + usleep_range(4000, 10000); + + ret = regulator_enable(retimer->vdd_supply); + if (ret) { + dev_err(dev, "cannot enable VDD regulator: %d\n", ret); + goto err_vdd33_cap_disable; + } + + ret = regulator_enable(retimer->vddar_supply); + if (ret) { + dev_err(dev, "cannot enable VDD AR regulator: %d\n", ret); + goto err_vdd_disable; + } + + ret = regulator_enable(retimer->vddat_supply); + if (ret) { + dev_err(dev, "cannot enable VDD AT regulator: %d\n", ret); + goto err_vddar_disable; + } + + ret = regulator_enable(retimer->vddio_supply); + if (ret) { + dev_err(dev, "cannot enable VDD IO regulator: %d\n", ret); + goto err_vddat_disable; + } + + return 0; + +err_vddat_disable: + regulator_disable(retimer->vddat_supply); +err_vddar_disable: + regulator_disable(retimer->vddar_supply); +err_vdd_disable: + regulator_disable(retimer->vdd_supply); +err_vdd33_cap_disable: + regulator_disable(retimer->vdd33_cap_supply); +err_vdd33_disable: + regulator_disable(retimer->vdd33_supply); + + return ret; +} + +static void ps883x_disable_vregs(struct ps883x_retimer *retimer) +{ + regulator_disable(retimer->vddio_supply); + regulator_disable(retimer->vddat_supply); + regulator_disable(retimer->vddar_supply); + regulator_disable(retimer->vdd_supply); + regulator_disable(retimer->vdd33_cap_supply); + regulator_disable(retimer->vdd33_supply); +} + +static int ps883x_get_vregs(struct ps883x_retimer *retimer) +{ + struct device *dev = &retimer->client->dev; + + retimer->vdd_supply = devm_regulator_get(dev, "vdd"); + if (IS_ERR(retimer->vdd_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vdd_supply), + "failed to get VDD\n"); + + retimer->vdd33_supply = devm_regulator_get(dev, "vdd33"); + if (IS_ERR(retimer->vdd33_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vdd33_supply), + "failed to get VDD 3.3V\n"); + + retimer->vdd33_cap_supply = devm_regulator_get(dev, "vdd33-cap"); + if (IS_ERR(retimer->vdd33_cap_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vdd33_cap_supply), + "failed to get VDD CAP 3.3V\n"); + + retimer->vddat_supply = devm_regulator_get(dev, "vddat"); + if (IS_ERR(retimer->vddat_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vddat_supply), + "failed to get VDD AT\n"); + + retimer->vddar_supply = devm_regulator_get(dev, "vddar"); + if (IS_ERR(retimer->vddar_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vddar_supply), + "failed to get VDD AR\n"); + + retimer->vddio_supply = devm_regulator_get(dev, "vddio"); + if (IS_ERR(retimer->vddio_supply)) + return dev_err_probe(dev, PTR_ERR(retimer->vddio_supply), + "failed to get VDD IO\n"); + + return 0; +} + +static const struct regmap_config ps883x_retimer_regmap = { + .max_register = 0x1f, + .reg_bits = 8, + .val_bits = 8, +}; + +static int ps883x_retimer_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct typec_switch_desc sw_desc = { }; + struct typec_retimer_desc rtmr_desc = { }; + struct ps883x_retimer *retimer; + int ret; + + retimer = devm_kzalloc(dev, sizeof(*retimer), GFP_KERNEL); + if (!retimer) + return -ENOMEM; + + retimer->client = client; + + mutex_init(&retimer->lock); + + retimer->regmap = devm_regmap_init_i2c(client, &ps883x_retimer_regmap); + if (IS_ERR(retimer->regmap)) + return dev_err_probe(dev, PTR_ERR(retimer->regmap), + "failed to allocate register map\n"); + + ret = ps883x_get_vregs(retimer); + if (ret) + return ret; + + retimer->xo_clk = devm_clk_get(dev, NULL); + if (IS_ERR(retimer->xo_clk)) + return dev_err_probe(dev, PTR_ERR(retimer->xo_clk), + "failed to get xo clock\n"); + + retimer->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_ASIS); + if (IS_ERR(retimer->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(retimer->reset_gpio), + "failed to get reset gpio\n"); + + retimer->typec_switch = typec_switch_get(dev); + if (IS_ERR(retimer->typec_switch)) + return dev_err_probe(dev, PTR_ERR(retimer->typec_switch), + "failed to acquire orientation-switch\n"); + + retimer->typec_mux = typec_mux_get(dev); + if (IS_ERR(retimer->typec_mux)) { + ret = dev_err_probe(dev, PTR_ERR(retimer->typec_mux), + "failed to acquire mode-mux\n"); + goto err_switch_put; + } + + ret = drm_aux_bridge_register(dev); + if (ret) + goto err_mux_put; + + ret = ps883x_enable_vregs(retimer); + if (ret) + goto err_mux_put; + + ret = clk_prepare_enable(retimer->xo_clk); + if (ret) { + dev_err(dev, "failed to enable XO: %d\n", ret); + goto err_vregs_disable; + } + + sw_desc.drvdata = retimer; + sw_desc.fwnode = dev_fwnode(dev); + sw_desc.set = ps883x_sw_set; + + retimer->sw = typec_switch_register(dev, &sw_desc); + if (IS_ERR(retimer->sw)) { + ret = PTR_ERR(retimer->sw); + dev_err(dev, "failed to register typec switch: %d\n", ret); + goto err_clk_disable; + } + + rtmr_desc.drvdata = retimer; + rtmr_desc.fwnode = dev_fwnode(dev); + rtmr_desc.set = ps883x_retimer_set; + + retimer->retimer = typec_retimer_register(dev, &rtmr_desc); + if (IS_ERR(retimer->retimer)) { + ret = PTR_ERR(retimer->retimer); + dev_err(dev, "failed to register typec retimer: %d\n", ret); + goto err_switch_unregister; + } + + /* skip resetting if already configured */ + if (regmap_test_bits(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, + CONN_STATUS_0_CONNECTION_PRESENT) == 1) + return gpiod_direction_output(retimer->reset_gpio, 0); + + gpiod_direction_output(retimer->reset_gpio, 1); + + /* VDD IO supply enable to reset release delay */ + usleep_range(4000, 14000); + + gpiod_set_value(retimer->reset_gpio, 0); + + /* firmware initialization delay */ + msleep(60); + + return 0; + +err_switch_unregister: + typec_switch_unregister(retimer->sw); +err_vregs_disable: + ps883x_disable_vregs(retimer); +err_clk_disable: + clk_disable_unprepare(retimer->xo_clk); +err_mux_put: + typec_mux_put(retimer->typec_mux); +err_switch_put: + typec_switch_put(retimer->typec_switch); + + return ret; +} + +static void ps883x_retimer_remove(struct i2c_client *client) +{ + struct ps883x_retimer *retimer = i2c_get_clientdata(client); + + typec_retimer_unregister(retimer->retimer); + typec_switch_unregister(retimer->sw); + + gpiod_set_value(retimer->reset_gpio, 1); + + clk_disable_unprepare(retimer->xo_clk); + + ps883x_disable_vregs(retimer); + + typec_mux_put(retimer->typec_mux); + typec_switch_put(retimer->typec_switch); +} + +static const struct of_device_id ps883x_retimer_of_table[] = { + { .compatible = "parade,ps8830" }, + { } +}; +MODULE_DEVICE_TABLE(of, ps883x_retimer_of_table); + +static struct i2c_driver ps883x_retimer_driver = { + .driver = { + .name = "ps883x_retimer", + .of_match_table = ps883x_retimer_of_table, + }, + .probe = ps883x_retimer_probe, + .remove = ps883x_retimer_remove, +}; + +module_i2c_driver(ps883x_retimer_driver); + +MODULE_DESCRIPTION("Parade ps883x Type-C Retimer driver"); +MODULE_LICENSE("GPL"); From b5bbace353ad654d8a562dbfea88de7d694e44a7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 5 Feb 2025 10:10:07 +0100 Subject: [PATCH 0207/2157] tty: serial: fsl_lpuart: Make interrupt name distinct SoCs like the i.MX93 have several lpuart interfaces, but fsl_lpuart uses the driver name to request the IRQ. This makes it hard to identify interfaces from outputs like /proc/interrupts . So use the dev_name() for requesting instead. Signed-off-by: Stefan Wahren Reviewed-by: Peng Fan Link: https://lore.kernel.org/r/20250205091007.4528-1-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c91b9d9818cd..91d02c55c470 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2954,7 +2954,7 @@ static int lpuart_probe(struct platform_device *pdev) goto failed_attach_port; ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0, - DRIVER_NAME, sport); + dev_name(&pdev->dev), sport); if (ret) goto failed_irq_request; From 1f0cfc68ad7a4c1b05e95a425b779997fafd975d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Sun, 9 Feb 2025 23:13:40 +0100 Subject: [PATCH 0208/2157] dt-bindings: serial: Allow fsl,ns16550 with broken FIFOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing on a Freescale MPC8314E board, I noticed that changing the UART compatible string from ns16550 to ns16550a breaks the output, suggesting that the FIFOs don't work correctly. To accommodate this fact, move the definition of fsl,ns16550 to the section of 8250.yaml that allows broken FIFOs. Signed-off-by: J. Neuschäfer Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250209-uartfifo-v1-1-501a510a5f07@posteo.net Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 0bde2379e864..dc0d52920575 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -77,7 +77,6 @@ properties: - altr,16550-FIFO64 - altr,16550-FIFO128 - fsl,16550-FIFO64 - - fsl,ns16550 - andestech,uart16550 - nxp,lpc1850-uart - opencores,uart16550-rtlsvn105 @@ -86,6 +85,7 @@ properties: - items: - enum: - ns16750 + - fsl,ns16550 - cavium,octeon-3860-uart - xlnx,xps-uart16550-2.00.b - ralink,rt2880-uart From 22a6984c5b5df8eab864d7f3e8b94d5a554d31ab Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 7 Feb 2025 13:33:13 +0200 Subject: [PATCH 0209/2157] serial: sh-sci: Update the suspend/resume support The Renesas RZ/G3S supports a power saving mode where power to most of the SoC components is turned off. When returning from this power saving mode, SoC components need to be re-configured. The SCIFs on the Renesas RZ/G3S need to be re-configured as well when returning from this power saving mode. The sh-sci code already configures the SCIF clocks, power domain and registers by calling uart_resume_port() in sci_resume(). On suspend path the SCIF UART ports are suspended accordingly (by calling uart_suspend_port() in sci_suspend()). The only missing setting is the reset signal. For this assert/de-assert the reset signal on driver suspend/resume. In case the no_console_suspend is specified by the user, the registers need to be saved on suspend path and restore on resume path. To do this the sci_console_save()/sci_console_restore() functions were added. There is no need to cache/restore the status or FIFO registers. Only the control registers. The registers that will be saved/restored on suspend/resume are specified by the struct sci_suspend_regs data structure. Signed-off-by: Claudiu Beznea Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250207113313.545432-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 71 +++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index b1ea48f38248..4bc637f9d649 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -104,6 +104,15 @@ struct plat_sci_reg { u8 offset, size; }; +struct sci_suspend_regs { + u16 scsmr; + u16 scscr; + u16 scfcr; + u16 scsptr; + u8 scbrr; + u8 semr; +}; + struct sci_port_params { const struct plat_sci_reg regs[SCIx_NR_REGS]; unsigned int fifosize; @@ -134,6 +143,8 @@ struct sci_port { struct dma_chan *chan_tx; struct dma_chan *chan_rx; + struct reset_control *rstc; + #ifdef CONFIG_SERIAL_SH_SCI_DMA struct dma_chan *chan_tx_saved; struct dma_chan *chan_rx_saved; @@ -153,6 +164,7 @@ struct sci_port { int rx_trigger; struct timer_list rx_fifo_timer; int rx_fifo_timeout; + struct sci_suspend_regs suspend_regs; u16 hscif_tot; bool has_rtscts; @@ -3374,6 +3386,7 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev, } sp = &sci_ports[id]; + sp->rstc = rstc; *dev_id = id; p->type = SCI_OF_TYPE(data); @@ -3546,13 +3559,57 @@ static int sci_probe(struct platform_device *dev) return 0; } +static void sci_console_save(struct sci_port *s) +{ + struct sci_suspend_regs *regs = &s->suspend_regs; + struct uart_port *port = &s->port; + + if (sci_getreg(port, SCSMR)->size) + regs->scsmr = sci_serial_in(port, SCSMR); + if (sci_getreg(port, SCSCR)->size) + regs->scscr = sci_serial_in(port, SCSCR); + if (sci_getreg(port, SCFCR)->size) + regs->scfcr = sci_serial_in(port, SCFCR); + if (sci_getreg(port, SCSPTR)->size) + regs->scsptr = sci_serial_in(port, SCSPTR); + if (sci_getreg(port, SCBRR)->size) + regs->scbrr = sci_serial_in(port, SCBRR); + if (sci_getreg(port, SEMR)->size) + regs->semr = sci_serial_in(port, SEMR); +} + +static void sci_console_restore(struct sci_port *s) +{ + struct sci_suspend_regs *regs = &s->suspend_regs; + struct uart_port *port = &s->port; + + if (sci_getreg(port, SCSMR)->size) + sci_serial_out(port, SCSMR, regs->scsmr); + if (sci_getreg(port, SCSCR)->size) + sci_serial_out(port, SCSCR, regs->scscr); + if (sci_getreg(port, SCFCR)->size) + sci_serial_out(port, SCFCR, regs->scfcr); + if (sci_getreg(port, SCSPTR)->size) + sci_serial_out(port, SCSPTR, regs->scsptr); + if (sci_getreg(port, SCBRR)->size) + sci_serial_out(port, SCBRR, regs->scbrr); + if (sci_getreg(port, SEMR)->size) + sci_serial_out(port, SEMR, regs->semr); +} + static __maybe_unused int sci_suspend(struct device *dev) { struct sci_port *sport = dev_get_drvdata(dev); - if (sport) + if (sport) { uart_suspend_port(&sci_uart_driver, &sport->port); + if (!console_suspend_enabled && uart_console(&sport->port)) + sci_console_save(sport); + else + return reset_control_assert(sport->rstc); + } + return 0; } @@ -3560,8 +3617,18 @@ static __maybe_unused int sci_resume(struct device *dev) { struct sci_port *sport = dev_get_drvdata(dev); - if (sport) + if (sport) { + if (!console_suspend_enabled && uart_console(&sport->port)) { + sci_console_restore(sport); + } else { + int ret = reset_control_deassert(sport->rstc); + + if (ret) + return ret; + } + uart_resume_port(&sci_uart_driver, &sport->port); + } return 0; } From c213375e32830418188743c5b8d75e5dfbdc0bc2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 13:59:07 +0200 Subject: [PATCH 0210/2157] serial: 8250_dw: Call dw8250_quirks() conditionally Not all the cases provide the driver data, that is represented by an object instance of struct dw8250_platform_data. Id est the change missed the case when the driver is instantiated via board files as pure platform driver. Fix this by calling dw8250_quirks() conditionally. This will require splitting dw8250_setup_dma_filter() out of dw8250_quirks(). Also make sure IRQ handler won't crash, it also requires driver data to be present. Fixes: bfd3d4a40f39 ("serial: 8250_dw: Drop unneeded NULL checks in dw8250_quirks()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502121529.f7e65d49-lkp@intel.com Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250212115952.2312444-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 36 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index ac3987513564..af24ec25d976 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -421,6 +421,18 @@ static bool dw8250_idma_filter(struct dma_chan *chan, void *param) return param == chan->device->dev; } +static void dw8250_setup_dma_filter(struct uart_port *p, struct dw8250_data *data) +{ + /* Platforms with iDMA 64-bit */ + if (platform_get_resource_byname(to_platform_device(p->dev), IORESOURCE_MEM, "lpss_priv")) { + data->data.dma.rx_param = p->dev->parent; + data->data.dma.tx_param = p->dev->parent; + data->data.dma.fn = dw8250_idma_filter; + } else { + data->data.dma.fn = dw8250_fallback_dma_filter; + } +} + static u32 dw8250_rzn1_get_dmacr_burst(int max_burst) { if (max_burst >= 8) @@ -491,14 +503,6 @@ static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) p->serial_in = dw8250_serial_in32; data->uart_16550_compatible = true; } - - /* Platforms with iDMA 64-bit */ - if (platform_get_resource_byname(to_platform_device(p->dev), - IORESOURCE_MEM, "lpss_priv")) { - data->data.dma.rx_param = p->dev->parent; - data->data.dma.tx_param = p->dev->parent; - data->data.dma.fn = dw8250_idma_filter; - } } static void dw8250_reset_control_assert(void *data) @@ -520,7 +524,6 @@ static int dw8250_probe(struct platform_device *pdev) return dev_err_probe(dev, -EINVAL, "no registers defined\n"); spin_lock_init(&p->lock); - p->handle_irq = dw8250_handle_irq; p->pm = dw8250_do_pm; p->type = PORT_8250; p->flags = UPF_FIXED_PORT; @@ -532,13 +535,8 @@ static int dw8250_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - data->data.dma.fn = dw8250_fallback_dma_filter; - data->pdata = device_get_match_data(p->dev); p->private_data = &data->data; - data->uart_16550_compatible = device_property_read_bool(dev, - "snps,uart-16550-compatible"); - p->mapbase = regs->start; p->mapsize = resource_size(regs); @@ -626,11 +624,19 @@ static int dw8250_probe(struct platform_device *pdev) if (err) return err; - dw8250_quirks(p, data); + data->uart_16550_compatible = device_property_read_bool(dev, "snps,uart-16550-compatible"); + + data->pdata = device_get_match_data(p->dev); + if (data->pdata) + dw8250_quirks(p, data); /* If the Busy Functionality is not implemented, don't handle it */ if (data->uart_16550_compatible) p->handle_irq = NULL; + else if (data->pdata) + p->handle_irq = dw8250_handle_irq; + + dw8250_setup_dma_filter(p, data); if (!data->skip_autocfg) dw8250_setup_port(p); From c08b0f2c317223fa702616dfa77f10a92b7b34b2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Jan 2025 08:25:50 -0800 Subject: [PATCH 0211/2157] Revert "tty/serial: Add kgdb_nmi driver" This reverts commit 0c57dfcc6c1d037243c2f8fbf62eab3633326ec0. The functionality was supoosed to be used by a later patch in the series that never landed [1]. Drop it. NOTE: part of functionality was already reverted by commit 39d0be87438a ("serial: kgdb_nmi: Remove unused knock code"). Also note that this revert is not a clean revert given code changes that have happened in the meantime. It's obvious that nobody is using this code since the two exposed functions (kgdb_register_nmi_console() and kgdb_unregister_nmi_console()) are both no-ops if "arch_kgdb_ops.enable_nmi" is not defined. No architectures define it. [1] https://lore.kernel.org/lkml/1348522080-32629-9-git-send-email-anton.vorontsov@linaro.org/ Signed-off-by: Douglas Anderson Acked-by: Andy Shevchenko Link: URL [1] Link: https://lore.kernel.org/r/20250129082535.1.Ia095eac1ae357f87d23e7af2206741f5d40788f1@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 19 --- drivers/tty/serial/Makefile | 1 - drivers/tty/serial/kgdb_nmi.c | 280 ---------------------------------- drivers/tty/serial/kgdboc.c | 8 - include/linux/kgdb.h | 8 - 5 files changed, 316 deletions(-) delete mode 100644 drivers/tty/serial/kgdb_nmi.c diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 976dae3bb1bb..f21cad6aaa47 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -179,25 +179,6 @@ config SERIAL_ATMEL_TTYAT Say Y if you have an external 8250/16C550 UART. If unsure, say N. -config SERIAL_KGDB_NMI - bool "Serial console over KGDB NMI debugger port" - depends on KGDB_SERIAL_CONSOLE - help - This special driver allows you to temporary use NMI debugger port - as a normal console (assuming that the port is attached to KGDB). - - Unlike KDB's disable_nmi command, with this driver you are always - able to go back to the debugger using KGDB escape sequence ($3#33). - This is because this console driver processes the input in NMI - context, and thus is able to intercept the magic sequence. - - Note that since the console interprets input and uses polling - communication methods, for things like PPP you still must fully - detach debugger port from the KGDB NMI (i.e. disable_nmi), and - use raw console. - - If unsure, say N. - config SERIAL_MESON tristate "Meson serial port support" depends on ARCH_MESON || COMPILE_TEST diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 6ff74f0a9530..3019260d9120 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -96,6 +96,5 @@ obj-$(CONFIG_SERIAL_ZS) += zs.o # GPIOLIB helpers for modem control lines obj-$(CONFIG_SERIAL_MCTRL_GPIO) += serial_mctrl_gpio.o -obj-$(CONFIG_SERIAL_KGDB_NMI) += kgdb_nmi.o obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o obj-$(CONFIG_SERIAL_NUVOTON_MA35D1) += ma35d1_serial.o diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c deleted file mode 100644 index 2833708e369f..000000000000 --- a/drivers/tty/serial/kgdb_nmi.c +++ /dev/null @@ -1,280 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KGDB NMI serial console - * - * Copyright 2010 Google, Inc. - * Arve Hjønnevåg - * Colin Cross - * Copyright 2012 Linaro Ltd. - * Anton Vorontsov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static atomic_t kgdb_nmi_num_readers = ATOMIC_INIT(0); - -static int kgdb_nmi_console_setup(struct console *co, char *options) -{ - arch_kgdb_ops.enable_nmi(1); - - /* The NMI console uses the dbg_io_ops to issue console messages. To - * avoid duplicate messages during kdb sessions we must inform kdb's - * I/O utilities that messages sent to the console will automatically - * be displayed on the dbg_io. - */ - dbg_io_ops->cons = co; - - return 0; -} - -static void kgdb_nmi_console_write(struct console *co, const char *s, uint c) -{ - int i; - - for (i = 0; i < c; i++) - dbg_io_ops->write_char(s[i]); -} - -static struct tty_driver *kgdb_nmi_tty_driver; - -static struct tty_driver *kgdb_nmi_console_device(struct console *co, int *idx) -{ - *idx = co->index; - return kgdb_nmi_tty_driver; -} - -static struct console kgdb_nmi_console = { - .name = "ttyNMI", - .setup = kgdb_nmi_console_setup, - .write = kgdb_nmi_console_write, - .device = kgdb_nmi_console_device, - .flags = CON_PRINTBUFFER | CON_ANYTIME, - .index = -1, -}; - -/* - * This is usually the maximum rate on debug ports. We make fifo large enough - * to make copy-pasting to the terminal usable. - */ -#define KGDB_NMI_BAUD 115200 -#define KGDB_NMI_FIFO_SIZE roundup_pow_of_two(KGDB_NMI_BAUD / 8 / HZ) - -struct kgdb_nmi_tty_priv { - struct tty_port port; - struct timer_list timer; - STRUCT_KFIFO(char, KGDB_NMI_FIFO_SIZE) fifo; -}; - -static struct tty_port *kgdb_nmi_port; - -/* - * The tasklet is cheap, it does not cause wakeups when reschedules itself, - * instead it waits for the next tick. - */ -static void kgdb_nmi_tty_receiver(struct timer_list *t) -{ - struct kgdb_nmi_tty_priv *priv = from_timer(priv, t, timer); - char ch; - - priv->timer.expires = jiffies + (HZ/100); - add_timer(&priv->timer); - - if (likely(!atomic_read(&kgdb_nmi_num_readers) || - !kfifo_len(&priv->fifo))) - return; - - while (kfifo_out(&priv->fifo, &ch, 1)) - tty_insert_flip_char(&priv->port, ch, TTY_NORMAL); - tty_flip_buffer_push(&priv->port); -} - -static int kgdb_nmi_tty_activate(struct tty_port *port, struct tty_struct *tty) -{ - struct kgdb_nmi_tty_priv *priv = - container_of(port, struct kgdb_nmi_tty_priv, port); - - kgdb_nmi_port = port; - priv->timer.expires = jiffies + (HZ/100); - add_timer(&priv->timer); - - return 0; -} - -static void kgdb_nmi_tty_shutdown(struct tty_port *port) -{ - struct kgdb_nmi_tty_priv *priv = - container_of(port, struct kgdb_nmi_tty_priv, port); - - del_timer(&priv->timer); - kgdb_nmi_port = NULL; -} - -static const struct tty_port_operations kgdb_nmi_tty_port_ops = { - .activate = kgdb_nmi_tty_activate, - .shutdown = kgdb_nmi_tty_shutdown, -}; - -static int kgdb_nmi_tty_install(struct tty_driver *drv, struct tty_struct *tty) -{ - struct kgdb_nmi_tty_priv *priv; - int ret; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - INIT_KFIFO(priv->fifo); - timer_setup(&priv->timer, kgdb_nmi_tty_receiver, 0); - tty_port_init(&priv->port); - priv->port.ops = &kgdb_nmi_tty_port_ops; - tty->driver_data = priv; - - ret = tty_port_install(&priv->port, drv, tty); - if (ret) { - pr_err("%s: can't install tty port: %d\n", __func__, ret); - goto err; - } - return 0; -err: - tty_port_destroy(&priv->port); - kfree(priv); - return ret; -} - -static void kgdb_nmi_tty_cleanup(struct tty_struct *tty) -{ - struct kgdb_nmi_tty_priv *priv = tty->driver_data; - - tty->driver_data = NULL; - tty_port_destroy(&priv->port); - kfree(priv); -} - -static int kgdb_nmi_tty_open(struct tty_struct *tty, struct file *file) -{ - struct kgdb_nmi_tty_priv *priv = tty->driver_data; - unsigned int mode = file->f_flags & O_ACCMODE; - int ret; - - ret = tty_port_open(&priv->port, tty, file); - if (!ret && (mode == O_RDONLY || mode == O_RDWR)) - atomic_inc(&kgdb_nmi_num_readers); - - return ret; -} - -static void kgdb_nmi_tty_close(struct tty_struct *tty, struct file *file) -{ - struct kgdb_nmi_tty_priv *priv = tty->driver_data; - unsigned int mode = file->f_flags & O_ACCMODE; - - if (mode == O_RDONLY || mode == O_RDWR) - atomic_dec(&kgdb_nmi_num_readers); - - tty_port_close(&priv->port, tty, file); -} - -static void kgdb_nmi_tty_hangup(struct tty_struct *tty) -{ - struct kgdb_nmi_tty_priv *priv = tty->driver_data; - - tty_port_hangup(&priv->port); -} - -static unsigned int kgdb_nmi_tty_write_room(struct tty_struct *tty) -{ - /* Actually, we can handle any amount as we use polled writes. */ - return 2048; -} - -static ssize_t kgdb_nmi_tty_write(struct tty_struct *tty, const u8 *buf, - size_t c) -{ - int i; - - for (i = 0; i < c; i++) - dbg_io_ops->write_char(buf[i]); - return c; -} - -static const struct tty_operations kgdb_nmi_tty_ops = { - .open = kgdb_nmi_tty_open, - .close = kgdb_nmi_tty_close, - .install = kgdb_nmi_tty_install, - .cleanup = kgdb_nmi_tty_cleanup, - .hangup = kgdb_nmi_tty_hangup, - .write_room = kgdb_nmi_tty_write_room, - .write = kgdb_nmi_tty_write, -}; - -int kgdb_register_nmi_console(void) -{ - int ret; - - if (!arch_kgdb_ops.enable_nmi) - return 0; - - kgdb_nmi_tty_driver = tty_alloc_driver(1, TTY_DRIVER_REAL_RAW); - if (IS_ERR(kgdb_nmi_tty_driver)) { - pr_err("%s: cannot allocate tty\n", __func__); - return PTR_ERR(kgdb_nmi_tty_driver); - } - kgdb_nmi_tty_driver->driver_name = "ttyNMI"; - kgdb_nmi_tty_driver->name = "ttyNMI"; - kgdb_nmi_tty_driver->num = 1; - kgdb_nmi_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; - kgdb_nmi_tty_driver->subtype = SERIAL_TYPE_NORMAL; - kgdb_nmi_tty_driver->init_termios = tty_std_termios; - tty_termios_encode_baud_rate(&kgdb_nmi_tty_driver->init_termios, - KGDB_NMI_BAUD, KGDB_NMI_BAUD); - tty_set_operations(kgdb_nmi_tty_driver, &kgdb_nmi_tty_ops); - - ret = tty_register_driver(kgdb_nmi_tty_driver); - if (ret) { - pr_err("%s: can't register tty driver: %d\n", __func__, ret); - goto err_drv_reg; - } - - register_console(&kgdb_nmi_console); - - return 0; -err_drv_reg: - tty_driver_kref_put(kgdb_nmi_tty_driver); - return ret; -} -EXPORT_SYMBOL_GPL(kgdb_register_nmi_console); - -int kgdb_unregister_nmi_console(void) -{ - int ret; - - if (!arch_kgdb_ops.enable_nmi) - return 0; - arch_kgdb_ops.enable_nmi(0); - - ret = unregister_console(&kgdb_nmi_console); - if (ret) - return ret; - - tty_unregister_driver(kgdb_nmi_tty_driver); - tty_driver_kref_put(kgdb_nmi_tty_driver); - - return 0; -} -EXPORT_SYMBOL_GPL(kgdb_unregister_nmi_console); diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 58ea1e1391ce..85f6c5a76e0f 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -186,8 +186,6 @@ static void cleanup_kgdboc(void) if (configured != 1) return; - if (kgdb_unregister_nmi_console()) - return; kgdboc_unregister_kbd(); kgdb_unregister_io_module(&kgdboc_io_ops); } @@ -250,16 +248,10 @@ static int configure_kgdboc(void) if (err) goto noconfig; - err = kgdb_register_nmi_console(); - if (err) - goto nmi_con_failed; - configured = 1; return 0; -nmi_con_failed: - kgdb_unregister_io_module(&kgdboc_io_ops); noconfig: kgdboc_unregister_kbd(); configured = 0; diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 51ef131e66b7..14739952698b 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -306,14 +306,6 @@ extern const struct kgdb_arch arch_kgdb_ops; extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); -#ifdef CONFIG_SERIAL_KGDB_NMI -extern int kgdb_register_nmi_console(void); -extern int kgdb_unregister_nmi_console(void); -#else -static inline int kgdb_register_nmi_console(void) { return 0; } -static inline int kgdb_unregister_nmi_console(void) { return 0; } -#endif - extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); extern struct kgdb_io *dbg_io_ops; From dbb1f9c03bad116ce771d0b3335ca55b1387cf78 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Jan 2025 08:25:51 -0800 Subject: [PATCH 0212/2157] Revert "kdb: Implement disable_nmi command" This reverts commit ad394f66fa57ae66014cb74f337e2820bac4c417. No architectures ever implemented `enable_nmi` since the later patches in the series adding it never landed. It's been a long time. Drop it. NOTE: this is not a clean revert due to changes in the file in the meantime. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250129082535.2.Ib91bfb95bdcf77591257a84063fdeb5b4dce65b1@changeid Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_main.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 5f4be507d79f..3a5408b54570 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -2119,32 +2118,6 @@ static int kdb_dmesg(int argc, const char **argv) return 0; } #endif /* CONFIG_PRINTK */ - -/* Make sure we balance enable/disable calls, must disable first. */ -static atomic_t kdb_nmi_disabled; - -static int kdb_disable_nmi(int argc, const char *argv[]) -{ - if (atomic_read(&kdb_nmi_disabled)) - return 0; - atomic_set(&kdb_nmi_disabled, 1); - arch_kgdb_ops.enable_nmi(0); - return 0; -} - -static int kdb_param_enable_nmi(const char *val, const struct kernel_param *kp) -{ - if (!atomic_add_unless(&kdb_nmi_disabled, -1, 0)) - return -EINVAL; - arch_kgdb_ops.enable_nmi(1); - return 0; -} - -static const struct kernel_param_ops kdb_param_ops_enable_nmi = { - .set = kdb_param_enable_nmi, -}; -module_param_cb(enable_nmi, &kdb_param_ops_enable_nmi, NULL, 0600); - /* * kdb_cpu - This function implements the 'cpu' command. * cpu [] @@ -2836,20 +2809,10 @@ static kdbtab_t maintab[] = { }, }; -static kdbtab_t nmicmd = { - .name = "disable_nmi", - .func = kdb_disable_nmi, - .usage = "", - .help = "Disable NMI entry to KDB", - .flags = KDB_ENABLE_ALWAYS_SAFE, -}; - /* Initialize the kdb command table. */ static void __init kdb_inittab(void) { kdb_register_table(maintab, ARRAY_SIZE(maintab)); - if (arch_kgdb_ops.enable_nmi) - kdb_register_table(&nmicmd, 1); } /* Execute any commands defined in kdb_cmds. */ From a029a219385cfdf9c43fb1ef9eb49d173773388f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Jan 2025 08:25:52 -0800 Subject: [PATCH 0213/2157] Revert "kernel/debug: Mask KGDB NMI upon entry" This reverts commit 5a14fead07bcf4e0acc877a8d9e1d1f40a441153. No architectures ever implemented `enable_nmi` since the later patches in the series adding it never landed. It's been a long time. Drop it. NOTE: this is not a clean revert due to changes in the file in the meantime. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250129082535.3.I2254953cd852f31f354456689d68b2d910de3fbe@changeid Signed-off-by: Greg Kroah-Hartman --- include/linux/kgdb.h | 3 --- kernel/debug/debug_core.c | 14 +++----------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 14739952698b..5eebbe7a3545 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -257,7 +257,6 @@ extern void kgdb_arch_late(void); * hardware breakpoints. * @correct_hw_break: Allow an architecture to specify how to correct the * hardware debug registers. - * @enable_nmi: Manage NMI-triggered entry to KGDB */ struct kgdb_arch { unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE]; @@ -270,8 +269,6 @@ struct kgdb_arch { void (*disable_hw_break)(struct pt_regs *regs); void (*remove_all_hw_break)(void); void (*correct_hw_break)(void); - - void (*enable_nmi)(bool on); }; /** diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ce1bb2301c06..0b9495187fba 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -837,10 +837,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) { struct kgdb_state kgdb_var; struct kgdb_state *ks = &kgdb_var; - int ret = 0; - - if (arch_kgdb_ops.enable_nmi) - arch_kgdb_ops.enable_nmi(0); /* * Avoid entering the debugger if we were triggered due to an oops * but panic_timeout indicates the system should automatically @@ -858,15 +854,11 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) ks->linux_regs = regs; if (kgdb_reenter_check(ks)) - goto out; /* Ouch, double exception ! */ + return 0; /* Ouch, double exception ! */ if (kgdb_info[ks->cpu].enter_kgdb != 0) - goto out; + return 0; - ret = kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); -out: - if (arch_kgdb_ops.enable_nmi) - arch_kgdb_ops.enable_nmi(1); - return ret; + return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); } NOKPROBE_SYMBOL(kgdb_handle_exception); From 5b28371f5f77922cf948fee6f448c0eca023e961 Mon Sep 17 00:00:00 2001 From: Kartik Rajput Date: Thu, 13 Feb 2025 18:26:11 +0530 Subject: [PATCH 0214/2157] dt-bindings: serial: Add bindings for nvidia,tegra264-utc The Tegra UTC (UART Trace Controller) allows multiple clients within the Tegra SoC to share a physical UART interface. It supports up to 16 clients. Each client operates as an independent UART endpoint with a dedicated interrupt and 128-character TX/RX FIFOs. Add device tree binding documentation for the Tegra UTC client. Signed-off-by: Kartik Rajput Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250213125612.4705-2-kkartik@nvidia.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/nvidia,tegra264-utc.yaml | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml diff --git a/Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml b/Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml new file mode 100644 index 000000000000..572cc574da64 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/nvidia,tegra264-utc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra UTC (UART Trace Controller) client + +maintainers: + - Kartik Rajput + - Thierry Reding + - Jonathan Hunter + +description: + Represents a client interface of the Tegra UTC (UART Trace Controller). The + Tegra UTC allows multiple clients within the Tegra SoC to share a physical + UART interface. It supports up to 16 clients. Each client operates as an + independent UART endpoint with a dedicated interrupt and 128-character TX/RX + FIFOs. + + The Tegra UTC clients use 8-N-1 configuration and operates on a baudrate + configured by the bootloader at the controller level. + +allOf: + - $ref: serial.yaml# + +properties: + compatible: + const: nvidia,tegra264-utc + + reg: + items: + - description: TX region. + - description: RX region. + + reg-names: + items: + - const: tx + - const: rx + + interrupts: + maxItems: 1 + + tx-threshold: + minimum: 1 + maximum: 128 + + rx-threshold: + minimum: 1 + maximum: 128 + +required: + - compatible + - reg + - reg-names + - interrupts + - tx-threshold + - rx-threshold + +additionalProperties: false + +examples: + - | + #include + + tegra_utc: serial@c4e0000 { + compatible = "nvidia,tegra264-utc"; + reg = <0xc4e0000 0x8000>, <0xc4e8000 0x8000>; + reg-names = "tx", "rx"; + interrupts = ; + tx-threshold = <4>; + rx-threshold = <4>; + }; From eb07e3a946796b682b12897e080d856bc6d63c3d Mon Sep 17 00:00:00 2001 From: Kartik Rajput Date: Thu, 13 Feb 2025 18:26:12 +0530 Subject: [PATCH 0215/2157] serial: tegra-utc: Add driver for Tegra UART Trace Controller (UTC) The Tegra264 SoC supports the UART Trace Controller (UTC), which allows multiple firmware clients (up to 16) to share a single physical UART. Each client is provided with its own interrupt and has access to a 128-character wide FIFO for both transmit (TX) and receive (RX) operations. Add tegra-utc driver to support Tegra UART Trace Controller (UTC) client. Signed-off-by: Kartik Rajput Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213125612.4705-3-kkartik@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 23 ++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/tegra-utc.c | 625 +++++++++++++++++++++++++++++++++ 3 files changed, 649 insertions(+) create mode 100644 drivers/tty/serial/tegra-utc.c diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index f21cad6aaa47..79a8186d3361 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -287,6 +287,29 @@ config SERIAL_TEGRA_TCU_CONSOLE If unsure, say Y. +config SERIAL_TEGRA_UTC + tristate "NVIDIA Tegra UART Trace Controller" + depends on ARCH_TEGRA || COMPILE_TEST + select SERIAL_CORE + help + Support for Tegra UTC (UART Trace controller) client serial port. + + UTC is a HW based serial port that allows multiplexing multiple data + streams of up to 16 UTC clients into a single hardware serial port. + +config SERIAL_TEGRA_UTC_CONSOLE + bool "Support for console on a Tegra UTC serial port" + depends on SERIAL_TEGRA_UTC + select SERIAL_CORE_CONSOLE + default SERIAL_TEGRA_UTC + help + If you say Y here, it will be possible to use a Tegra UTC client as + the system console (the system console is the device which receives + all kernel messages and warnings and which allows logins in single + user mode). + + If unsure, say Y. + config SERIAL_MAX3100 tristate "MAX3100/3110/3111/3222 support" depends on SPI diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 3019260d9120..d58d9f719889 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_SERIAL_STM32) += stm32-usart.o obj-$(CONFIG_SERIAL_SUNPLUS) += sunplus-uart.o obj-$(CONFIG_SERIAL_TEGRA) += serial-tegra.o obj-$(CONFIG_SERIAL_TEGRA_TCU) += tegra-tcu.o +obj-$(CONFIG_SERIAL_TEGRA_UTC) += tegra-utc.o obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o diff --git a/drivers/tty/serial/tegra-utc.c b/drivers/tty/serial/tegra-utc.c new file mode 100644 index 000000000000..39b14fe813c9 --- /dev/null +++ b/drivers/tty/serial/tegra-utc.c @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// NVIDIA Tegra UTC (UART Trace Controller) driver. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TEGRA_UTC_ENABLE 0x000 +#define TEGRA_UTC_ENABLE_CLIENT_ENABLE BIT(0) + +#define TEGRA_UTC_FIFO_THRESHOLD 0x008 + +#define TEGRA_UTC_COMMAND 0x00c +#define TEGRA_UTC_COMMAND_RESET BIT(0) +#define TEGRA_UTC_COMMAND_FLUSH BIT(1) + +#define TEGRA_UTC_DATA 0x020 + +#define TEGRA_UTC_FIFO_STATUS 0x100 +#define TEGRA_UTC_FIFO_EMPTY BIT(0) +#define TEGRA_UTC_FIFO_FULL BIT(1) +#define TEGRA_UTC_FIFO_REQ BIT(2) +#define TEGRA_UTC_FIFO_OVERFLOW BIT(3) +#define TEGRA_UTC_FIFO_TIMEOUT BIT(4) + +#define TEGRA_UTC_FIFO_OCCUPANCY 0x104 + +#define TEGRA_UTC_INTR_STATUS 0x108 +#define TEGRA_UTC_INTR_SET 0x10c +#define TEGRA_UTC_INTR_MASK 0x110 +#define TEGRA_UTC_INTR_CLEAR 0x114 +#define TEGRA_UTC_INTR_EMPTY BIT(0) +#define TEGRA_UTC_INTR_FULL BIT(1) +#define TEGRA_UTC_INTR_REQ BIT(2) +#define TEGRA_UTC_INTR_OVERFLOW BIT(3) +#define TEGRA_UTC_INTR_TIMEOUT BIT(4) + +#define TEGRA_UTC_UART_NR 16 + +#define TEGRA_UTC_INTR_COMMON (TEGRA_UTC_INTR_REQ | TEGRA_UTC_INTR_FULL | TEGRA_UTC_INTR_EMPTY) + +struct tegra_utc_port { +#if IS_ENABLED(CONFIG_SERIAL_TEGRA_UTC_CONSOLE) + struct console console; +#endif + struct uart_port port; + + void __iomem *rx_base; + void __iomem *tx_base; + + u32 tx_irqmask; + u32 rx_irqmask; + + unsigned int fifosize; + u32 tx_threshold; + u32 rx_threshold; +}; + +static u32 tegra_utc_rx_readl(struct tegra_utc_port *tup, unsigned int offset) +{ + void __iomem *addr = tup->rx_base + offset; + + return readl_relaxed(addr); +} + +static void tegra_utc_rx_writel(struct tegra_utc_port *tup, u32 val, unsigned int offset) +{ + void __iomem *addr = tup->rx_base + offset; + + writel_relaxed(val, addr); +} + +static u32 tegra_utc_tx_readl(struct tegra_utc_port *tup, unsigned int offset) +{ + void __iomem *addr = tup->tx_base + offset; + + return readl_relaxed(addr); +} + +static void tegra_utc_tx_writel(struct tegra_utc_port *tup, u32 val, unsigned int offset) +{ + void __iomem *addr = tup->tx_base + offset; + + writel_relaxed(val, addr); +} + +static void tegra_utc_enable_tx_irq(struct tegra_utc_port *tup) +{ + tup->tx_irqmask = TEGRA_UTC_INTR_REQ; + + tegra_utc_tx_writel(tup, tup->tx_irqmask, TEGRA_UTC_INTR_MASK); + tegra_utc_tx_writel(tup, tup->tx_irqmask, TEGRA_UTC_INTR_SET); +} + +static void tegra_utc_disable_tx_irq(struct tegra_utc_port *tup) +{ + tup->tx_irqmask = 0x0; + + tegra_utc_tx_writel(tup, tup->tx_irqmask, TEGRA_UTC_INTR_MASK); + tegra_utc_tx_writel(tup, tup->tx_irqmask, TEGRA_UTC_INTR_SET); +} + +static void tegra_utc_stop_tx(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + tegra_utc_disable_tx_irq(tup); +} + +static void tegra_utc_init_tx(struct tegra_utc_port *tup) +{ + /* Disable TX. */ + tegra_utc_tx_writel(tup, 0x0, TEGRA_UTC_ENABLE); + + /* Update the FIFO Threshold. */ + tegra_utc_tx_writel(tup, tup->tx_threshold, TEGRA_UTC_FIFO_THRESHOLD); + + /* Clear and mask all the interrupts. */ + tegra_utc_tx_writel(tup, TEGRA_UTC_INTR_COMMON, TEGRA_UTC_INTR_CLEAR); + tegra_utc_disable_tx_irq(tup); + + /* Enable TX. */ + tegra_utc_tx_writel(tup, TEGRA_UTC_ENABLE_CLIENT_ENABLE, TEGRA_UTC_ENABLE); +} + +static void tegra_utc_init_rx(struct tegra_utc_port *tup) +{ + tup->rx_irqmask = TEGRA_UTC_INTR_REQ | TEGRA_UTC_INTR_TIMEOUT; + + tegra_utc_rx_writel(tup, TEGRA_UTC_COMMAND_RESET, TEGRA_UTC_COMMAND); + tegra_utc_rx_writel(tup, tup->rx_threshold, TEGRA_UTC_FIFO_THRESHOLD); + + /* Clear all the pending interrupts. */ + tegra_utc_rx_writel(tup, TEGRA_UTC_INTR_TIMEOUT | TEGRA_UTC_INTR_OVERFLOW | + TEGRA_UTC_INTR_COMMON, TEGRA_UTC_INTR_CLEAR); + tegra_utc_rx_writel(tup, tup->rx_irqmask, TEGRA_UTC_INTR_MASK); + tegra_utc_rx_writel(tup, tup->rx_irqmask, TEGRA_UTC_INTR_SET); + + /* Enable RX. */ + tegra_utc_rx_writel(tup, TEGRA_UTC_ENABLE_CLIENT_ENABLE, TEGRA_UTC_ENABLE); +} + +static bool tegra_utc_tx_chars(struct tegra_utc_port *tup) +{ + struct uart_port *port = &tup->port; + unsigned int pending; + u8 c; + + pending = uart_port_tx(port, c, + !(tegra_utc_tx_readl(tup, TEGRA_UTC_FIFO_STATUS) & TEGRA_UTC_FIFO_FULL), + tegra_utc_tx_writel(tup, c, TEGRA_UTC_DATA)); + + return pending; +} + +static void tegra_utc_rx_chars(struct tegra_utc_port *tup) +{ + struct tty_port *port = &tup->port.state->port; + unsigned int max_chars = 256; + u32 status; + int sysrq; + u32 ch; + + while (max_chars--) { + status = tegra_utc_rx_readl(tup, TEGRA_UTC_FIFO_STATUS); + if (status & TEGRA_UTC_FIFO_EMPTY) + break; + + ch = tegra_utc_rx_readl(tup, TEGRA_UTC_DATA); + tup->port.icount.rx++; + + if (status & TEGRA_UTC_FIFO_OVERFLOW) + tup->port.icount.overrun++; + + uart_port_unlock(&tup->port); + sysrq = uart_handle_sysrq_char(&tup->port, ch); + uart_port_lock(&tup->port); + + if (!sysrq) + tty_insert_flip_char(port, ch, TTY_NORMAL); + } + + tty_flip_buffer_push(port); +} + +static irqreturn_t tegra_utc_isr(int irq, void *dev_id) +{ + struct tegra_utc_port *tup = dev_id; + unsigned int handled = 0; + u32 status; + + uart_port_lock(&tup->port); + + /* Process RX_REQ and RX_TIMEOUT interrupts. */ + do { + status = tegra_utc_rx_readl(tup, TEGRA_UTC_INTR_STATUS) & tup->rx_irqmask; + if (status) { + tegra_utc_rx_writel(tup, tup->rx_irqmask, TEGRA_UTC_INTR_CLEAR); + tegra_utc_rx_chars(tup); + handled = 1; + } + } while (status); + + /* Process TX_REQ interrupt. */ + do { + status = tegra_utc_tx_readl(tup, TEGRA_UTC_INTR_STATUS) & tup->tx_irqmask; + if (status) { + tegra_utc_tx_writel(tup, tup->tx_irqmask, TEGRA_UTC_INTR_CLEAR); + tegra_utc_tx_chars(tup); + handled = 1; + } + } while (status); + + uart_port_unlock(&tup->port); + + return IRQ_RETVAL(handled); +} + +static unsigned int tegra_utc_tx_empty(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + return tegra_utc_tx_readl(tup, TEGRA_UTC_FIFO_OCCUPANCY) ? 0 : TIOCSER_TEMT; +} + +static void tegra_utc_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ +} + +static unsigned int tegra_utc_get_mctrl(struct uart_port *port) +{ + return 0; +} + +static void tegra_utc_start_tx(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + if (tegra_utc_tx_chars(tup)) + tegra_utc_enable_tx_irq(tup); +} + +static void tegra_utc_stop_rx(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + tup->rx_irqmask = 0x0; + tegra_utc_rx_writel(tup, tup->rx_irqmask, TEGRA_UTC_INTR_MASK); + tegra_utc_rx_writel(tup, tup->rx_irqmask, TEGRA_UTC_INTR_SET); +} + +static void tegra_utc_hw_init(struct tegra_utc_port *tup) +{ + tegra_utc_init_tx(tup); + tegra_utc_init_rx(tup); +} + +static int tegra_utc_startup(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + int ret; + + tegra_utc_hw_init(tup); + + /* Interrupt is dedicated to this UTC client. */ + ret = request_irq(port->irq, tegra_utc_isr, 0, dev_name(port->dev), tup); + if (ret < 0) + dev_err(port->dev, "failed to register interrupt handler\n"); + + return ret; +} + +static void tegra_utc_shutdown(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + tegra_utc_rx_writel(tup, 0x0, TEGRA_UTC_ENABLE); + free_irq(port->irq, tup); +} + +static void tegra_utc_set_termios(struct uart_port *port, struct ktermios *termios, + const struct ktermios *old) +{ + /* The Tegra UTC clients supports only 8-N-1 configuration without HW flow control */ + termios->c_cflag &= ~(CSIZE | CSTOPB | PARENB | PARODD); + termios->c_cflag &= ~(CMSPAR | CRTSCTS); + termios->c_cflag |= CS8 | CLOCAL; +} + +#ifdef CONFIG_CONSOLE_POLL + +static int tegra_utc_poll_init(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + tegra_utc_hw_init(tup); + return 0; +} + +static int tegra_utc_get_poll_char(struct uart_port *port) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + if (tegra_utc_rx_readl(tup, TEGRA_UTC_FIFO_STATUS) & TEGRA_UTC_FIFO_EMPTY) + return NO_POLL_CHAR; + + return tegra_utc_rx_readl(tup, TEGRA_UTC_DATA); +} + +static void tegra_utc_put_poll_char(struct uart_port *port, unsigned char ch) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + u32 val; + + read_poll_timeout_atomic(tegra_utc_tx_readl, val, !(val & TEGRA_UTC_FIFO_FULL), + 0, USEC_PER_SEC, false, tup, TEGRA_UTC_FIFO_STATUS); + + tegra_utc_tx_writel(tup, ch, TEGRA_UTC_DATA); +} + +#endif + +static const struct uart_ops tegra_utc_uart_ops = { + .tx_empty = tegra_utc_tx_empty, + .set_mctrl = tegra_utc_set_mctrl, + .get_mctrl = tegra_utc_get_mctrl, + .stop_tx = tegra_utc_stop_tx, + .start_tx = tegra_utc_start_tx, + .stop_rx = tegra_utc_stop_rx, + .startup = tegra_utc_startup, + .shutdown = tegra_utc_shutdown, + .set_termios = tegra_utc_set_termios, +#ifdef CONFIG_CONSOLE_POLL + .poll_init = tegra_utc_poll_init, + .poll_get_char = tegra_utc_get_poll_char, + .poll_put_char = tegra_utc_put_poll_char, +#endif +}; + +#if IS_ENABLED(CONFIG_SERIAL_TEGRA_UTC_CONSOLE) +#define TEGRA_UTC_DEFAULT_FIFO_THRESHOLD 4 +#define TEGRA_UTC_EARLYCON_MAX_BURST_SIZE 128 + +static void tegra_utc_putc(struct uart_port *port, unsigned char c) +{ + writel(c, port->membase + TEGRA_UTC_DATA); +} + +static void tegra_utc_early_write(struct console *con, const char *s, unsigned int n) +{ + struct earlycon_device *dev = con->data; + + while (n) { + u32 burst_size = TEGRA_UTC_EARLYCON_MAX_BURST_SIZE; + + burst_size -= readl(dev->port.membase + TEGRA_UTC_FIFO_OCCUPANCY); + if (n < burst_size) + burst_size = n; + + uart_console_write(&dev->port, s, burst_size, tegra_utc_putc); + + n -= burst_size; + s += burst_size; + } +} + +static int __init tegra_utc_early_console_setup(struct earlycon_device *device, const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + /* Configure TX */ + writel(TEGRA_UTC_COMMAND_FLUSH | TEGRA_UTC_COMMAND_RESET, + device->port.membase + TEGRA_UTC_COMMAND); + writel(TEGRA_UTC_DEFAULT_FIFO_THRESHOLD, device->port.membase + TEGRA_UTC_FIFO_THRESHOLD); + + /* Clear and mask all the interrupts. */ + writel(TEGRA_UTC_INTR_COMMON, device->port.membase + TEGRA_UTC_INTR_CLEAR); + + writel(0x0, device->port.membase + TEGRA_UTC_INTR_MASK); + writel(0x0, device->port.membase + TEGRA_UTC_INTR_SET); + + /* Enable TX. */ + writel(TEGRA_UTC_ENABLE_CLIENT_ENABLE, device->port.membase + TEGRA_UTC_ENABLE); + + device->con->write = tegra_utc_early_write; + + return 0; +} +OF_EARLYCON_DECLARE(tegra_utc, "nvidia,tegra264-utc", tegra_utc_early_console_setup); + +static void tegra_utc_console_putchar(struct uart_port *port, unsigned char ch) +{ + struct tegra_utc_port *tup = container_of(port, struct tegra_utc_port, port); + + tegra_utc_tx_writel(tup, ch, TEGRA_UTC_DATA); +} + +static void tegra_utc_console_write_atomic(struct console *cons, struct nbcon_write_context *wctxt) +{ + struct tegra_utc_port *tup = container_of(cons, struct tegra_utc_port, console); + unsigned int len; + char *outbuf; + + if (!nbcon_enter_unsafe(wctxt)) + return; + + outbuf = wctxt->outbuf; + len = wctxt->len; + + while (len) { + u32 burst_size = tup->fifosize; + + burst_size -= tegra_utc_tx_readl(tup, TEGRA_UTC_FIFO_OCCUPANCY); + if (len < burst_size) + burst_size = len; + + uart_console_write(&tup->port, outbuf, burst_size, tegra_utc_console_putchar); + + outbuf += burst_size; + len -= burst_size; + }; + + nbcon_exit_unsafe(wctxt); +} + +static void tegra_utc_console_write_thread(struct console *cons, struct nbcon_write_context *wctxt) +{ + struct tegra_utc_port *tup = container_of(cons, struct tegra_utc_port, console); + unsigned int len = READ_ONCE(wctxt->len); + unsigned int i; + u32 val; + + for (i = 0; i < len; i++) { + if (!nbcon_enter_unsafe(wctxt)) + break; + + read_poll_timeout_atomic(tegra_utc_tx_readl, val, !(val & TEGRA_UTC_FIFO_FULL), + 0, USEC_PER_SEC, false, tup, TEGRA_UTC_FIFO_STATUS); + uart_console_write(&tup->port, wctxt->outbuf + i, 1, tegra_utc_console_putchar); + + if (!nbcon_exit_unsafe(wctxt)) + break; + } +} + +static void tegra_utc_console_device_lock(struct console *cons, unsigned long *flags) +{ + struct tegra_utc_port *tup = container_of(cons, struct tegra_utc_port, console); + struct uart_port *port = &tup->port; + + __uart_port_lock_irqsave(port, flags); +} + +static void tegra_utc_console_device_unlock(struct console *cons, unsigned long flags) +{ + struct tegra_utc_port *tup = container_of(cons, struct tegra_utc_port, console); + struct uart_port *port = &tup->port; + + __uart_port_unlock_irqrestore(port, flags); +} + +static int tegra_utc_console_setup(struct console *cons, char *options) +{ + struct tegra_utc_port *tup = container_of(cons, struct tegra_utc_port, console); + + tegra_utc_init_tx(tup); + + return 0; +} +#endif + +static struct uart_driver tegra_utc_driver = { + .driver_name = "tegra-utc", + .dev_name = "ttyUTC", + .nr = TEGRA_UTC_UART_NR, +}; + +static int tegra_utc_setup_port(struct device *dev, struct tegra_utc_port *tup) +{ + tup->port.dev = dev; + tup->port.fifosize = tup->fifosize; + tup->port.flags = UPF_BOOT_AUTOCONF; + tup->port.iotype = UPIO_MEM; + tup->port.ops = &tegra_utc_uart_ops; + tup->port.type = PORT_TEGRA_TCU; + tup->port.private_data = tup; + +#if IS_ENABLED(CONFIG_SERIAL_TEGRA_UTC_CONSOLE) + strscpy(tup->console.name, "ttyUTC", sizeof(tup->console.name)); + tup->console.write_atomic = tegra_utc_console_write_atomic; + tup->console.write_thread = tegra_utc_console_write_thread; + tup->console.device_lock = tegra_utc_console_device_lock; + tup->console.device_unlock = tegra_utc_console_device_unlock; + tup->console.device = uart_console_device; + tup->console.setup = tegra_utc_console_setup; + tup->console.flags = CON_PRINTBUFFER | CON_NBCON; + tup->console.data = &tegra_utc_driver; +#endif + + return uart_read_port_properties(&tup->port); +} + +static int tegra_utc_register_port(struct tegra_utc_port *tup) +{ + int ret; + + ret = uart_add_one_port(&tegra_utc_driver, &tup->port); + if (ret) + return ret; + +#if IS_ENABLED(CONFIG_SERIAL_TEGRA_UTC_CONSOLE) + register_console(&tup->console); +#endif + + return 0; +} + +static int tegra_utc_probe(struct platform_device *pdev) +{ + const unsigned int *soc_fifosize; + struct device *dev = &pdev->dev; + struct tegra_utc_port *tup; + int ret; + + tup = devm_kzalloc(dev, sizeof(*tup), GFP_KERNEL); + if (!tup) + return -ENOMEM; + + ret = device_property_read_u32(dev, "tx-threshold", &tup->tx_threshold); + if (ret) + return dev_err_probe(dev, ret, "missing %s property\n", "tx-threshold"); + + ret = device_property_read_u32(dev, "rx-threshold", &tup->rx_threshold); + if (ret) + return dev_err_probe(dev, ret, "missing %s property\n", "rx-threshold"); + + soc_fifosize = device_get_match_data(dev); + tup->fifosize = *soc_fifosize; + + tup->tx_base = devm_platform_ioremap_resource_byname(pdev, "tx"); + if (IS_ERR(tup->tx_base)) + return PTR_ERR(tup->tx_base); + + tup->rx_base = devm_platform_ioremap_resource_byname(pdev, "rx"); + if (IS_ERR(tup->rx_base)) + return PTR_ERR(tup->rx_base); + + ret = tegra_utc_setup_port(dev, tup); + if (ret) + dev_err_probe(dev, ret, "failed to setup uart port\n"); + + platform_set_drvdata(pdev, tup); + + return tegra_utc_register_port(tup); +} + +static void tegra_utc_remove(struct platform_device *pdev) +{ + struct tegra_utc_port *tup = platform_get_drvdata(pdev); + +#if IS_ENABLED(CONFIG_SERIAL_TEGRA_UTC_CONSOLE) + unregister_console(&tup->console); +#endif + uart_remove_one_port(&tegra_utc_driver, &tup->port); +} + +static const unsigned int tegra264_utc_soc = 128; + +static const struct of_device_id tegra_utc_of_match[] = { + { .compatible = "nvidia,tegra264-utc", .data = &tegra264_utc_soc }, + {} +}; +MODULE_DEVICE_TABLE(of, tegra_utc_of_match); + +static struct platform_driver tegra_utc_platform_driver = { + .probe = tegra_utc_probe, + .remove = tegra_utc_remove, + .driver = { + .name = "tegra-utc", + .of_match_table = tegra_utc_of_match, + }, +}; + +static int __init tegra_utc_init(void) +{ + int ret; + + ret = uart_register_driver(&tegra_utc_driver); + if (ret) + return ret; + + ret = platform_driver_register(&tegra_utc_platform_driver); + if (ret) + uart_unregister_driver(&tegra_utc_driver); + + return ret; +} +module_init(tegra_utc_init); + +static void __exit tegra_utc_exit(void) +{ + platform_driver_unregister(&tegra_utc_platform_driver); + uart_unregister_driver(&tegra_utc_driver); +} +module_exit(tegra_utc_exit); + +MODULE_AUTHOR("Kartik Rajput "); +MODULE_DESCRIPTION("Tegra UART Trace Controller"); +MODULE_LICENSE("GPL"); From b58f0f86fd6156d7b084257f5c91ceaf7d760927 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 4 Dec 2024 13:09:05 +0800 Subject: [PATCH 0216/2157] phy: fsl-imx8mq-usb: add tca function driver for imx95 The i.MX95 USB3 phy has a Type-C Assist block (TCA). This block consists two functional blocks (XBar assist and VBus assist) and one system access interface using APB. The primary functionality of XBar assist is: - switching lane for flip - moving unused lanes into lower power states. This info can be get from: i.MX95 RM Chapter 163.3.8 Type-C assist (TCA) block. This will add support for TCA block to achieve lane switching and tca lower power functionality. Signed-off-by: Xu Yang Reviewed-by: Jun Li Link: https://lore.kernel.org/r/20241204050907.1081781-1-xu.yang_2@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/Kconfig | 1 + drivers/phy/freescale/phy-fsl-imx8mq-usb.c | 240 +++++++++++++++++++++ 2 files changed, 241 insertions(+) diff --git a/drivers/phy/freescale/Kconfig b/drivers/phy/freescale/Kconfig index dcd9acff6d01..81f53564ee15 100644 --- a/drivers/phy/freescale/Kconfig +++ b/drivers/phy/freescale/Kconfig @@ -5,6 +5,7 @@ if (ARCH_MXC && ARM64) || COMPILE_TEST config PHY_FSL_IMX8MQ_USB tristate "Freescale i.MX8M USB3 PHY" depends on OF && HAS_IOMEM + depends on TYPEC || TYPEC=n select GENERIC_PHY default ARCH_MXC && ARM64 diff --git a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c index adc6394626ce..a974ef94de9a 100644 --- a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c +++ b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c @@ -10,6 +10,7 @@ #include #include #include +#include #define PHY_CTRL0 0x0 #define PHY_CTRL0_REF_SSP_EN BIT(2) @@ -50,11 +51,66 @@ #define PHY_TUNE_DEFAULT 0xffffffff +#define TCA_CLK_RST 0x00 +#define TCA_CLK_RST_SW BIT(9) +#define TCA_CLK_RST_REF_CLK_EN BIT(1) +#define TCA_CLK_RST_SUSPEND_CLK_EN BIT(0) + +#define TCA_INTR_EN 0x04 +#define TCA_INTR_STS 0x08 + +#define TCA_GCFG 0x10 +#define TCA_GCFG_ROLE_HSTDEV BIT(4) +#define TCA_GCFG_OP_MODE GENMASK(1, 0) +#define TCA_GCFG_OP_MODE_SYSMODE 0 +#define TCA_GCFG_OP_MODE_SYNCMODE 1 + +#define TCA_TCPC 0x14 +#define TCA_TCPC_VALID BIT(4) +#define TCA_TCPC_LOW_POWER_EN BIT(3) +#define TCA_TCPC_ORIENTATION_NORMAL BIT(2) +#define TCA_TCPC_MUX_CONTRL GENMASK(1, 0) +#define TCA_TCPC_MUX_CONTRL_NO_CONN 0 +#define TCA_TCPC_MUX_CONTRL_USB_CONN 1 + +#define TCA_SYSMODE_CFG 0x18 +#define TCA_SYSMODE_TCPC_DISABLE BIT(3) +#define TCA_SYSMODE_TCPC_FLIP BIT(2) + +#define TCA_CTRLSYNCMODE_CFG0 0x20 +#define TCA_CTRLSYNCMODE_CFG1 0x20 + +#define TCA_PSTATE 0x30 +#define TCA_PSTATE_CM_STS BIT(4) +#define TCA_PSTATE_TX_STS BIT(3) +#define TCA_PSTATE_RX_PLL_STS BIT(2) +#define TCA_PSTATE_PIPE0_POWER_DOWN GENMASK(1, 0) + +#define TCA_GEN_STATUS 0x34 +#define TCA_GEN_DEV_POR BIT(12) +#define TCA_GEN_REF_CLK_SEL BIT(8) +#define TCA_GEN_TYPEC_FLIP_INVERT BIT(4) +#define TCA_GEN_PHY_TYPEC_DISABLE BIT(3) +#define TCA_GEN_PHY_TYPEC_FLIP BIT(2) + +#define TCA_VBUS_CTRL 0x40 +#define TCA_VBUS_STATUS 0x44 + +#define TCA_INFO 0xfc + +struct tca_blk { + struct typec_switch_dev *sw; + void __iomem *base; + struct mutex mutex; + enum typec_orientation orientation; +}; + struct imx8mq_usb_phy { struct phy *phy; struct clk *clk; void __iomem *base; struct regulator *vbus; + struct tca_blk *tca; u32 pcs_tx_swing_full; u32 pcs_tx_deemph_3p5db; u32 tx_vref_tune; @@ -64,6 +120,172 @@ struct imx8mq_usb_phy { u32 comp_dis_tune; }; + +static void tca_blk_orientation_set(struct tca_blk *tca, + enum typec_orientation orientation); + +#ifdef CONFIG_TYPEC + +static int tca_blk_typec_switch_set(struct typec_switch_dev *sw, + enum typec_orientation orientation) +{ + struct imx8mq_usb_phy *imx_phy = typec_switch_get_drvdata(sw); + struct tca_blk *tca = imx_phy->tca; + int ret; + + if (tca->orientation == orientation) + return 0; + + ret = clk_prepare_enable(imx_phy->clk); + if (ret) + return ret; + + tca_blk_orientation_set(tca, orientation); + clk_disable_unprepare(imx_phy->clk); + + return 0; +} + +static struct typec_switch_dev *tca_blk_get_typec_switch(struct platform_device *pdev, + struct imx8mq_usb_phy *imx_phy) +{ + struct device *dev = &pdev->dev; + struct typec_switch_dev *sw; + struct typec_switch_desc sw_desc = { }; + + sw_desc.drvdata = imx_phy; + sw_desc.fwnode = dev->fwnode; + sw_desc.set = tca_blk_typec_switch_set; + sw_desc.name = NULL; + + sw = typec_switch_register(dev, &sw_desc); + if (IS_ERR(sw)) { + dev_err(dev, "Error register tca orientation switch: %ld", + PTR_ERR(sw)); + return NULL; + } + + return sw; +} + +static void tca_blk_put_typec_switch(struct typec_switch_dev *sw) +{ + typec_switch_unregister(sw); +} + +#else + +static struct typec_switch_dev *tca_blk_get_typec_switch(struct platform_device *pdev, + struct imx8mq_usb_phy *imx_phy) +{ + return NULL; +} + +static void tca_blk_put_typec_switch(struct typec_switch_dev *sw) {} + +#endif /* CONFIG_TYPEC */ + +static void tca_blk_orientation_set(struct tca_blk *tca, + enum typec_orientation orientation) +{ + u32 val; + + mutex_lock(&tca->mutex); + + if (orientation == TYPEC_ORIENTATION_NONE) { + /* + * use Controller Synced Mode for TCA low power enable and + * put PHY to USB safe state. + */ + val = FIELD_PREP(TCA_GCFG_OP_MODE, TCA_GCFG_OP_MODE_SYNCMODE); + writel(val, tca->base + TCA_GCFG); + + val = TCA_TCPC_VALID | TCA_TCPC_LOW_POWER_EN; + writel(val, tca->base + TCA_TCPC); + + goto out; + } + + /* use System Configuration Mode for TCA mux control. */ + val = FIELD_PREP(TCA_GCFG_OP_MODE, TCA_GCFG_OP_MODE_SYSMODE); + writel(val, tca->base + TCA_GCFG); + + /* Disable TCA module */ + val = readl(tca->base + TCA_SYSMODE_CFG); + val |= TCA_SYSMODE_TCPC_DISABLE; + writel(val, tca->base + TCA_SYSMODE_CFG); + + if (orientation == TYPEC_ORIENTATION_REVERSE) + val |= TCA_SYSMODE_TCPC_FLIP; + else if (orientation == TYPEC_ORIENTATION_NORMAL) + val &= ~TCA_SYSMODE_TCPC_FLIP; + + writel(val, tca->base + TCA_SYSMODE_CFG); + + /* Enable TCA module */ + val &= ~TCA_SYSMODE_TCPC_DISABLE; + writel(val, tca->base + TCA_SYSMODE_CFG); + +out: + tca->orientation = orientation; + mutex_unlock(&tca->mutex); +} + +static void tca_blk_init(struct tca_blk *tca) +{ + u32 val; + + /* reset XBar block */ + val = readl(tca->base + TCA_CLK_RST); + val &= ~TCA_CLK_RST_SW; + writel(val, tca->base + TCA_CLK_RST); + + udelay(100); + + /* clear reset */ + val |= TCA_CLK_RST_SW; + writel(val, tca->base + TCA_CLK_RST); + + tca_blk_orientation_set(tca, tca->orientation); +} + +static struct tca_blk *imx95_usb_phy_get_tca(struct platform_device *pdev, + struct imx8mq_usb_phy *imx_phy) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct tca_blk *tca; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return NULL; + + tca = devm_kzalloc(dev, sizeof(*tca), GFP_KERNEL); + if (!tca) + return ERR_PTR(-ENOMEM); + + tca->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tca->base)) + return ERR_CAST(tca->base); + + mutex_init(&tca->mutex); + + tca->orientation = TYPEC_ORIENTATION_NORMAL; + tca->sw = tca_blk_get_typec_switch(pdev, imx_phy); + + return tca; +} + +static void imx95_usb_phy_put_tca(struct imx8mq_usb_phy *imx_phy) +{ + struct tca_blk *tca = imx_phy->tca; + + if (!tca) + return; + + tca_blk_put_typec_switch(tca->sw); +} + static u32 phy_tx_vref_tune_from_property(u32 percent) { percent = clamp(percent, 94U, 124U); @@ -315,6 +537,9 @@ static int imx8mp_usb_phy_init(struct phy *phy) imx8m_phy_tune(imx_phy); + if (imx_phy->tca) + tca_blk_init(imx_phy->tca); + return 0; } @@ -359,6 +584,8 @@ static const struct of_device_id imx8mq_usb_phy_of_match[] = { .data = &imx8mq_usb_phy_ops,}, {.compatible = "fsl,imx8mp-usb-phy", .data = &imx8mp_usb_phy_ops,}, + {.compatible = "fsl,imx95-usb-phy", + .data = &imx8mp_usb_phy_ops,}, { } }; MODULE_DEVICE_TABLE(of, imx8mq_usb_phy_of_match); @@ -398,6 +625,11 @@ static int imx8mq_usb_phy_probe(struct platform_device *pdev) phy_set_drvdata(imx_phy->phy, imx_phy); + imx_phy->tca = imx95_usb_phy_get_tca(pdev, imx_phy); + if (IS_ERR(imx_phy->tca)) + return dev_err_probe(dev, PTR_ERR(imx_phy->tca), + "failed to get tca\n"); + imx8m_get_phy_tuning_data(imx_phy); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); @@ -405,8 +637,16 @@ static int imx8mq_usb_phy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } +static void imx8mq_usb_phy_remove(struct platform_device *pdev) +{ + struct imx8mq_usb_phy *imx_phy = platform_get_drvdata(pdev); + + imx95_usb_phy_put_tca(imx_phy); +} + static struct platform_driver imx8mq_usb_phy_driver = { .probe = imx8mq_usb_phy_probe, + .remove = imx8mq_usb_phy_remove, .driver = { .name = "imx8mq-usb-phy", .of_match_table = imx8mq_usb_phy_of_match, From 7dff18535b93ea1ce6dbaf36b7ae670f04113d08 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 20 Jan 2025 15:35:03 +0100 Subject: [PATCH 0217/2157] phy: PHY_LAN966X_SERDES should depend on SOC_LAN966 || MCHP_LAN966X_PCI The Microchip LAN966X SerDes PHY is only present on Microchip LAN966x SoCs. However, when used as a PCI endpoint, all peripherals of the LAN966x SoC can be accessed by the PCI host. Hence add dependencies on SOC_LAN966 and MCHP_LAN966X_PCI, to prevent asking the user about this driver when configuring a kernel without Microchip LAN966x SoC and PCIe support. Signed-off-by: Geert Uytterhoeven Acked-by: Herve Codina Link: https://lore.kernel.org/r/369233dfded88ff6fb342e03794fe31985d84d82.1737383314.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/microchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/microchip/Kconfig b/drivers/phy/microchip/Kconfig index 38039ed0754c..2f0045e874ac 100644 --- a/drivers/phy/microchip/Kconfig +++ b/drivers/phy/microchip/Kconfig @@ -15,6 +15,7 @@ config PHY_SPARX5_SERDES config PHY_LAN966X_SERDES tristate "SerDes PHY driver for Microchip LAN966X" select GENERIC_PHY + depends on SOC_LAN966 || MCHP_LAN966X_PCI || COMPILE_TEST depends on OF depends on MFD_SYSCON help From 400188ae361a9d9a72a47a6cedaf2d2efcc84aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:18 +0100 Subject: [PATCH 0218/2157] kernfs: Acquire kernfs_rwsem in kernfs_notify_workfn(). kernfs_notify_workfn() dereferences kernfs_node::name and passes it later to fsnotify(). If the node is renamed then the previously observed name pointer becomes invalid. Acquire kernfs_root::kernfs_rwsem to block renames of the node. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-2-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 0eb320617d7b..c4ffa8dc89eb 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -911,6 +911,7 @@ static void kernfs_notify_workfn(struct work_struct *work) /* kick fsnotify */ down_read(&root->kernfs_supers_rwsem); + down_read(&root->kernfs_rwsem); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; @@ -947,6 +948,7 @@ static void kernfs_notify_workfn(struct work_struct *work) iput(inode); } + up_read(&root->kernfs_rwsem); up_read(&root->kernfs_supers_rwsem); kernfs_put(kn); goto repeat; From 122ab92dee80582c39740609a627198dd5b6b595 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:19 +0100 Subject: [PATCH 0219/2157] kernfs: Acquire kernfs_rwsem in kernfs_get_parent_dentry(). kernfs_get_parent_dentry() passes kernfs_node::parent to kernfs_get_inode(). Acquire kernfs_root::kernfs_rwsem to ensure kernfs_node::parent isn't replaced during the operation. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-3-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 1358c21837f1..b9b16e97bff1 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -145,7 +145,9 @@ static struct dentry *kernfs_fh_to_parent(struct super_block *sb, static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); + struct kernfs_root *root = kernfs_root(kn); + guard(rwsem_read)(&root->kernfs_rwsem); return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); } From 5b2fabf7fe8f745ff214ff003e6067b64f172271 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:20 +0100 Subject: [PATCH 0220/2157] kernfs: Acquire kernfs_rwsem in kernfs_node_dentry(). kernfs_node_dentry() passes kernfs_node::name to lookup_positive_unlocked(). Acquire kernfs_root::kernfs_rwsem to ensure the node is not renamed during the operation. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-4-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index b9b16e97bff1..4a0ff08d589c 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -209,6 +209,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, { struct dentry *dentry; struct kernfs_node *knparent; + struct kernfs_root *root; BUG_ON(sb->s_op != &kernfs_sops); @@ -218,6 +219,9 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, if (!kn->parent) return dentry; + root = kernfs_root(kn); + guard(rwsem_read)(&root->kernfs_rwsem); + knparent = find_next_ancestor(kn, NULL); if (WARN_ON(!knparent)) { dput(dentry); From 9aab10a0249eab4ec77c6a5e4f66442610c12a09 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:21 +0100 Subject: [PATCH 0221/2157] kernfs: Don't re-lock kernfs_root::kernfs_rwsem in kernfs_fop_readdir(). The readdir operation iterates over all entries and invokes dir_emit() for every entry passing kernfs_node::name as argument. Since the name argument can change, and become invalid, the kernfs_root::kernfs_rwsem lock should not be dropped to prevent renames during the operation. The lock drop around dir_emit() has been initially introduced in commit 1e5289c97bba2 ("sysfs: Cache the last sysfs_dirent to improve readdir scalability v2") to avoid holding a global lock during a page fault. The lock drop is wrong since the support of renames and not a big burden since the lock is no longer global. Don't re-acquire kernfs_root::kernfs_rwsem while copying the name to the userpace buffer. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-5-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5f0f8b95f44c..43fbada67838 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1869,10 +1869,10 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) file->private_data = pos; kernfs_get(pos); - up_read(&root->kernfs_rwsem); - if (!dir_emit(ctx, name, len, ino, type)) + if (!dir_emit(ctx, name, len, ino, type)) { + up_read(&root->kernfs_rwsem); return 0; - down_read(&root->kernfs_rwsem); + } } up_read(&root->kernfs_rwsem); file->private_data = NULL; From 633488947ef66b194377411322dc9e12aab79b65 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:22 +0100 Subject: [PATCH 0222/2157] kernfs: Use RCU to access kernfs_node::parent. kernfs_rename_lock is used to obtain stable kernfs_node::{name|parent} pointer. This is a preparation to access kernfs_node::parent under RCU and ensure that the pointer remains stable under the RCU lifetime guarantees. For a complete path, as it is done in kernfs_path_from_node(), the kernfs_rename_lock is still required in order to obtain a stable parent relationship while computing the relevant node depth. This must not change while the nodes are inspected in order to build the path. If the kernfs user never moves the nodes (changes the parent) then the kernfs_rename_lock is not required and the RCU guarantees are sufficient. This "restriction" can be set with KERNFS_ROOT_INVARIANT_PARENT. Otherwise the lock is required. Rename kernfs_node::parent to kernfs_node::__parent to denote the RCU access and use RCU accessor while accessing the node. Make cgroup use KERNFS_ROOT_INVARIANT_PARENT since the parent here can not change. Acked-by: Tejun Heo Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-6-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 65 +++++++++---- fs/kernfs/dir.c | 96 ++++++++++++------- fs/kernfs/kernfs-internal.h | 32 ++++++- fs/kernfs/mount.c | 10 +- fs/kernfs/symlink.c | 23 ++--- fs/sysfs/file.c | 24 +++-- include/linux/kernfs.h | 10 +- kernel/cgroup/cgroup-v1.c | 2 +- kernel/cgroup/cgroup.c | 24 ++++- .../selftests/bpf/progs/profiler.inc.h | 2 +- 10 files changed, 195 insertions(+), 93 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6419e04d8a7b..55dcdeea1a1b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -956,10 +956,20 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } +static void *rdt_kn_parent_priv(struct kernfs_node *kn) +{ + /* + * The parent pointer is only valid within RCU section since it can be + * replaced. + */ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%u\n", s->num_closid); return 0; @@ -968,7 +978,7 @@ static int rdt_num_closids_show(struct kernfs_open_file *of, static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->default_ctrl); @@ -978,7 +988,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); @@ -988,7 +998,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); @@ -1012,7 +1022,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. @@ -1094,7 +1104,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); @@ -1104,7 +1114,7 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, static int rdt_num_rmids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%d\n", r->num_rmid); @@ -1114,7 +1124,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, static int rdt_mon_features_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; list_for_each_entry(mevt, &r->evt_list, list) { @@ -1129,7 +1139,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); @@ -1139,7 +1149,7 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); @@ -1157,7 +1167,7 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) @@ -1222,7 +1232,7 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); @@ -1634,7 +1644,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid static int mbm_total_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); @@ -1644,7 +1654,7 @@ static int mbm_total_bytes_config_show(struct kernfs_open_file *of, static int mbm_local_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); @@ -1750,7 +1760,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -1776,7 +1786,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -2440,12 +2450,13 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) * resource. "info" and its subdirectories don't * have rdtgroup structures, so return NULL here. */ - if (kn == kn_info || kn->parent == kn_info) + if (kn == kn_info || + rcu_access_pointer(kn->__parent) == kn_info) return NULL; else return kn->priv; } else { - return kn->parent->priv; + return rdt_kn_parent_priv(kn); } } @@ -3771,9 +3782,18 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) return 0; } +static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) +{ + /* + * Valid within the RCU section it was obtained or while rdtgroup_mutex + * is held. + */ + return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); +} + static int rdtgroup_rmdir(struct kernfs_node *kn) { - struct kernfs_node *parent_kn = kn->parent; + struct kernfs_node *parent_kn; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; int ret = 0; @@ -3786,6 +3806,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = -EPERM; goto out; } + parent_kn = rdt_kn_parent(kn); /* * If the rdtgroup is a ctrl_mon group and parent directory @@ -3854,6 +3875,7 @@ static void mongrp_reparent(struct rdtgroup *rdtgrp, static int rdtgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { + struct kernfs_node *kn_parent; struct rdtgroup *new_prdtgrp; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; @@ -3888,8 +3910,9 @@ static int rdtgroup_rename(struct kernfs_node *kn, goto out; } - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || - !is_mon_groups(kn->parent, kn->name)) { + kn_parent = rdt_kn_parent(kn); + if (rdtgrp->type != RDTMON_GROUP || !kn_parent || + !is_mon_groups(kn_parent, kn->name)) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 43fbada67838..1d370c497e8a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -17,7 +17,7 @@ #include "kernfs-internal.h" -static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ +DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ /* * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to * call pr_cont() while holding rename_lock. Because sometimes pr_cont() @@ -56,7 +56,7 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) if (!kn) return strscpy(buf, "(null)", buflen); - return strscpy(buf, kn->parent ? kn->name : "/", buflen); + return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen); } /* kernfs_node_depth - compute depth from @from to @to */ @@ -64,9 +64,9 @@ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { size_t depth = 0; - while (to->parent && to != from) { + while (rcu_dereference(to->__parent) && to != from) { depth++; - to = to->parent; + to = rcu_dereference(to->__parent); } return depth; } @@ -84,18 +84,18 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, db = kernfs_depth(rb->kn, b); while (da > db) { - a = a->parent; + a = rcu_dereference(a->__parent); da--; } while (db > da) { - b = b->parent; + b = rcu_dereference(b->__parent); db--; } /* worst case b and a will be the same at root */ while (b != a) { - b = b->parent; - a = a->parent; + b = rcu_dereference(b->__parent); + a = rcu_dereference(a->__parent); } return a; @@ -168,8 +168,9 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { + for (kn = kn_to, j = 0; j < i; j++) - kn = kn->parent; + kn = rcu_dereference(kn->__parent); len += scnprintf(buf + len, buflen - len, "/%s", kn->name); } @@ -226,6 +227,7 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, unsigned long flags; int ret; + guard(rcu)(); read_lock_irqsave(&kernfs_rename_lock, flags); ret = kernfs_path_from_node_locked(to, from, buf, buflen); read_unlock_irqrestore(&kernfs_rename_lock, flags); @@ -295,7 +297,7 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) unsigned long flags; read_lock_irqsave(&kernfs_rename_lock, flags); - parent = kn->parent; + parent = kernfs_parent(kn); kernfs_get(parent); read_unlock_irqrestore(&kernfs_rename_lock, flags); @@ -360,8 +362,12 @@ static int kernfs_sd_compare(const struct kernfs_node *left, */ static int kernfs_link_sibling(struct kernfs_node *kn) { - struct rb_node **node = &kn->parent->dir.children.rb_node; struct rb_node *parent = NULL; + struct kernfs_node *kn_parent; + struct rb_node **node; + + kn_parent = kernfs_parent(kn); + node = &kn_parent->dir.children.rb_node; while (*node) { struct kernfs_node *pos; @@ -380,13 +386,13 @@ static int kernfs_link_sibling(struct kernfs_node *kn) /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); - rb_insert_color(&kn->rb, &kn->parent->dir.children); + rb_insert_color(&kn->rb, &kn_parent->dir.children); /* successfully added, account subdir number */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs++; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); return 0; @@ -407,16 +413,19 @@ static int kernfs_link_sibling(struct kernfs_node *kn) */ static bool kernfs_unlink_sibling(struct kernfs_node *kn) { + struct kernfs_node *kn_parent; + if (RB_EMPTY_NODE(&kn->rb)) return false; + kn_parent = kernfs_parent(kn); down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs--; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs--; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); - rb_erase(&kn->rb, &kn->parent->dir.children); + rb_erase(&kn->rb, &kn_parent->dir.children); RB_CLEAR_NODE(&kn->rb); return true; } @@ -562,7 +571,7 @@ void kernfs_put(struct kernfs_node *kn) * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ - parent = kn->parent; + parent = kernfs_parent(kn); WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", @@ -701,7 +710,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); - kn->parent = parent; + rcu_assign_pointer(kn->__parent, parent); } return kn; } @@ -769,13 +778,14 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, */ int kernfs_add_one(struct kernfs_node *kn) { - struct kernfs_node *parent = kn->parent; - struct kernfs_root *root = kernfs_root(parent); + struct kernfs_root *root = kernfs_root(kn); struct kernfs_iattrs *ps_iattr; + struct kernfs_node *parent; bool has_ns; int ret; down_write(&root->kernfs_rwsem); + parent = kernfs_parent(kn); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); @@ -949,6 +959,11 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, return kn; } +unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ + return kernfs_root(kn)->flags; +} + /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy @@ -1112,7 +1127,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { - struct kernfs_node *kn; + struct kernfs_node *kn, *parent; struct kernfs_root *root; if (flags & LOOKUP_RCU) @@ -1163,8 +1178,9 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, if (!kernfs_active(kn)) goto out_bad; + parent = kernfs_parent(kn); /* The kernfs node has been moved? */ - if (kernfs_dentry_node(dentry->d_parent) != kn->parent) + if (kernfs_dentry_node(dentry->d_parent) != parent) goto out_bad; /* The kernfs node has been renamed */ @@ -1172,7 +1188,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, goto out_bad; /* The kernfs node has been moved to a different namespace */ - if (kn->parent && kernfs_ns_enabled(kn->parent) && + if (parent && kernfs_ns_enabled(parent) && kernfs_info(dentry->d_sb)->ns != kn->ns) goto out_bad; @@ -1365,7 +1381,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return kernfs_leftmost_descendant(rb_to_kn(rbn)); /* no sibling left, visit parent */ - return pos->parent; + return kernfs_parent(pos); } static void kernfs_activate_one(struct kernfs_node *kn) @@ -1377,7 +1393,7 @@ static void kernfs_activate_one(struct kernfs_node *kn) if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) return; - WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb)); + WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb)); WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); @@ -1447,7 +1463,7 @@ void kernfs_show(struct kernfs_node *kn, bool show) static void __kernfs_remove(struct kernfs_node *kn) { - struct kernfs_node *pos; + struct kernfs_node *pos, *parent; /* Short-circuit if non-root @kn has already finished removal. */ if (!kn) @@ -1459,7 +1475,7 @@ static void __kernfs_remove(struct kernfs_node *kn) * This is for kernfs_remove_self() which plays with active ref * after removal. */ - if (kn->parent && RB_EMPTY_NODE(&kn->rb)) + if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; pr_debug("kernfs %s: removing\n", kn->name); @@ -1485,14 +1501,14 @@ static void __kernfs_remove(struct kernfs_node *kn) kernfs_get(pos); kernfs_drain(pos); - + parent = kernfs_parent(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it * to decide who's responsible for cleanups. */ - if (!pos->parent || kernfs_unlink_sibling(pos)) { + if (!parent || kernfs_unlink_sibling(pos)) { struct kernfs_iattrs *ps_iattr = - pos->parent ? pos->parent->iattr : NULL; + parent ? parent->iattr : NULL; /* update timestamps on the parent */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); @@ -1722,7 +1738,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, int error; /* can't move or rename root */ - if (!kn->parent) + if (!rcu_access_pointer(kn->__parent)) return -EINVAL; root = kernfs_root(kn); @@ -1733,8 +1749,15 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; + old_parent = kernfs_parent(kn); + if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) { + error = -EINVAL; + if (WARN_ON_ONCE(old_parent != new_parent)) + goto out; + } + error = 0; - if ((kn->parent == new_parent) && (kn->ns == new_ns) && + if ((old_parent == new_parent) && (kn->ns == new_ns) && (strcmp(kn->name, new_name) == 0)) goto out; /* nothing to rename */ @@ -1761,8 +1784,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, /* rename_lock protects ->parent and ->name accessors */ write_lock_irq(&kernfs_rename_lock); - old_parent = kn->parent; - kn->parent = new_parent; + old_parent = kernfs_parent(kn); + rcu_assign_pointer(kn->__parent, new_parent); kn->ns = new_ns; if (new_name) { @@ -1795,7 +1818,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, { if (pos) { int valid = kernfs_active(pos) && - pos->parent == parent && hash == pos->hash; + rcu_access_pointer(pos->__parent) == parent && + hash == pos->hash; kernfs_put(pos); if (!valid) pos = NULL; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index b42ee6547cdc..c43bee18b79f 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -19,6 +19,8 @@ #include #include +extern rwlock_t kernfs_rename_lock; + struct kernfs_iattrs { kuid_t ia_uid; kgid_t ia_gid; @@ -64,11 +66,14 @@ struct kernfs_root { * * Return: the kernfs_root @kn belongs to. */ -static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) +static inline struct kernfs_root *kernfs_root(const struct kernfs_node *kn) { + const struct kernfs_node *knp; /* if parent exists, it's always a dir; otherwise, @sd is a dir */ - if (kn->parent) - kn = kn->parent; + guard(rcu)(); + knp = rcu_dereference(kn->__parent); + if (knp) + kn = knp; return kn->dir.root; } @@ -97,6 +102,27 @@ struct kernfs_super_info { }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) +static inline bool kernfs_root_is_locked(const struct kernfs_node *kn) +{ + return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem); +} + +static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn) +{ + /* + * The kernfs_node::__parent remains valid within a RCU section. The kn + * can be reparented (and renamed) which changes the entry. This can be + * avoided by locking kernfs_root::kernfs_rwsem or kernfs_rename_lock. + * Both locks can be used to obtain a reference on __parent. Once the + * reference count reaches 0 then the node is about to be freed + * and can not be renamed (or become a different parent) anymore. + */ + return rcu_dereference_check(kn->__parent, + kernfs_root_is_locked(kn) || + lockdep_is_held(&kernfs_rename_lock) || + !atomic_read(&kn->count)); +} + static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry) { if (d_really_is_negative(dentry)) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 4a0ff08d589c..2252b16e6ef0 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -148,7 +148,7 @@ static struct dentry *kernfs_get_parent_dentry(struct dentry *child) struct kernfs_root *root = kernfs_root(kn); guard(rwsem_read)(&root->kernfs_rwsem); - return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); + return d_obtain_alias(kernfs_get_inode(child->d_sb, kernfs_parent(kn))); } static const struct export_operations kernfs_export_ops = { @@ -188,10 +188,10 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, return NULL; } - while (child->parent != parent) { - if (!child->parent) + while (kernfs_parent(child) != parent) { + child = kernfs_parent(child); + if (!child) return NULL; - child = child->parent; } return child; @@ -216,7 +216,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ - if (!kn->parent) + if (!rcu_access_pointer(kn->__parent)) return dentry; root = kernfs_root(kn); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 45371a70caa7..05c62ca93c53 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -62,10 +62,10 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* go up to the root, stop at the base */ base = parent; - while (base->parent) { - kn = target->parent; - while (kn->parent && base != kn) - kn = kn->parent; + while (kernfs_parent(base)) { + kn = kernfs_parent(target); + while (kernfs_parent(kn) && base != kn) + kn = kernfs_parent(kn); if (base == kn) break; @@ -75,14 +75,14 @@ static int kernfs_get_target_path(struct kernfs_node *parent, strcpy(s, "../"); s += 3; - base = base->parent; + base = kernfs_parent(base); } /* determine end of target string for reverse fillup */ kn = target; - while (kn->parent && kn != base) { + while (kernfs_parent(kn) && kn != base) { len += strlen(kn->name) + 1; - kn = kn->parent; + kn = kernfs_parent(kn); } /* check limits */ @@ -94,7 +94,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* reverse fillup of target string from target to base */ kn = target; - while (kn->parent && kn != base) { + while (kernfs_parent(kn) && kn != base) { int slen = strlen(kn->name); len -= slen; @@ -102,7 +102,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (len) s[--len] = '/'; - kn = kn->parent; + kn = kernfs_parent(kn); } return 0; @@ -111,12 +111,13 @@ static int kernfs_get_target_path(struct kernfs_node *parent, static int kernfs_getlink(struct inode *inode, char *path) { struct kernfs_node *kn = inode->i_private; - struct kernfs_node *parent = kn->parent; + struct kernfs_node *parent; struct kernfs_node *target = kn->symlink.target_kn; - struct kernfs_root *root = kernfs_root(parent); + struct kernfs_root *root = kernfs_root(kn); int error; down_read(&root->kernfs_rwsem); + parent = kernfs_parent(kn); error = kernfs_get_target_path(parent, target, path); up_read(&root->kernfs_rwsem); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 6931308876c4..c3d3b079aedd 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -19,13 +19,19 @@ #include "sysfs.h" +static struct kobject *sysfs_file_kobj(struct kernfs_node *kn) +{ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + /* * Determine ktype->sysfs_ops for the given kernfs_node. This function * must be called while holding an active reference. */ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) { - struct kobject *kobj = kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(kn); if (kn->flags & KERNFS_LOCKDEP) lockdep_assert_held(kn); @@ -40,7 +46,7 @@ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) static int sysfs_kf_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); const struct sysfs_ops *ops = sysfs_file_ops(of->kn); ssize_t count; char *buf; @@ -78,7 +84,7 @@ static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (!count) @@ -105,7 +111,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); ssize_t len; /* @@ -131,7 +137,7 @@ static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); if (!count) return 0; @@ -144,7 +150,7 @@ static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (size) { @@ -168,7 +174,7 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, struct vm_area_struct *vma) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); return battr->mmap(of->file, kobj, battr, vma); } @@ -177,7 +183,7 @@ static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset, int whence) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); if (battr->llseek) return battr->llseek(of->file, kobj, battr, offset, whence); @@ -494,7 +500,7 @@ EXPORT_SYMBOL_GPL(sysfs_break_active_protection); */ void sysfs_unbreak_active_protection(struct kernfs_node *kn) { - struct kobject *kobj = kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(kn); kernfs_unbreak_active_protection(kn); kernfs_put(kn); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 87c79d076d6d..5dda9a268e44 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -147,6 +147,11 @@ enum kernfs_root_flag { * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, + + /* + * Renames must not change the parent node. + */ + KERNFS_ROOT_INVARIANT_PARENT = 0x0010, }; /* type-specific structures for kernfs_node union members */ @@ -199,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - struct kernfs_node *parent; const char *name; + struct kernfs_node __rcu *__parent; struct rb_node rb; @@ -416,6 +421,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); +unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -514,6 +520,8 @@ kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } +static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ return 0; } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index e28d5f0d20ed..c9752eb607ec 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -844,7 +844,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent if (kernfs_type(kn) != KERNFS_DIR) return -ENOTDIR; - if (kn->parent != new_parent) + if (rcu_access_pointer(kn->__parent) != new_parent) return -EIO; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9061bd55436..71819e58d70c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -633,9 +633,22 @@ int cgroup_task_count(const struct cgroup *cgrp) return count; } +static struct cgroup *kn_priv(struct kernfs_node *kn) +{ + struct kernfs_node *parent; + /* + * The parent can not be replaced due to KERNFS_ROOT_INVARIANT_PARENT. + * Therefore it is always safe to dereference this pointer outside of a + * RCU section. + */ + parent = rcu_dereference_check(kn->__parent, + kernfs_root_flags(kn) & KERNFS_ROOT_INVARIANT_PARENT); + return parent->priv; +} + struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { - struct cgroup *cgrp = of->kn->parent->priv; + struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); /* @@ -1612,7 +1625,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else - cgrp = kn->parent->priv; + cgrp = kn_priv(kn); cgroup_unlock(); @@ -1644,7 +1657,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline) if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else - cgrp = kn->parent->priv; + cgrp = kn_priv(kn); /* * We're gonna grab cgroup_mutex which nests outside kernfs @@ -2118,7 +2131,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) root->kf_root = kernfs_create_root(kf_sops, KERNFS_ROOT_CREATE_DEACTIVATED | KERNFS_ROOT_SUPPORT_EXPORTOP | - KERNFS_ROOT_SUPPORT_USER_XATTR, + KERNFS_ROOT_SUPPORT_USER_XATTR | + KERNFS_ROOT_INVARIANT_PARENT, root_cgrp); if (IS_ERR(root->kf_root)) { ret = PTR_ERR(root->kf_root); @@ -4119,7 +4133,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup_file_ctx *ctx = of->priv; - struct cgroup *cgrp = of->kn->parent->priv; + struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 8bd1ebd7d6af..813143b4985d 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { payload += filepart_length; } - cgroup_node = BPF_CORE_READ(cgroup_node, parent); + cgroup_node = BPF_CORE_READ(cgroup_node, __parent); } return payload; } From 741c10b096bc4dd79cd9f215b6ef173bb953e75c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:23 +0100 Subject: [PATCH 0223/2157] kernfs: Use RCU to access kernfs_node::name. Using RCU lifetime rules to access kernfs_node::name can avoid the trouble with kernfs_rename_lock in kernfs_name() and kernfs_path_from_node() if the fs was created with KERNFS_ROOT_INVARIANT_PARENT. This is usefull as it allows to implement kernfs_path_from_node() only with RCU protection and avoiding kernfs_rename_lock. The lock is only required if the __parent node can be changed and the function requires an unchanged hierarchy while it iterates from the node to its parent. The change is needed to allow the lookup of the node's path (kernfs_path_from_node()) from context which runs always with disabled preemption and or interrutps even on PREEMPT_RT. The problem is that kernfs_rename_lock becomes a sleeping lock on PREEMPT_RT. I went through all ::name users and added the required access for the lookup with a few extensions: - rdtgroup_pseudo_lock_create() drops all locks and then uses the name later on. resctrl supports rename with different parents. Here I made a temporal copy of the name while it is used outside of the lock. - kernfs_rename_ns() accepts NULL as new_parent. This simplifies sysfs_move_dir_ns() where it can set NULL in order to reuse the current name. - kernfs_rename_ns() is only using kernfs_rename_lock if the parents are different. All users use either kernfs_rwsem (for stable path view) or just RCU for the lookup. The ::name uses always RCU free. Use RCU lifetime guarantees to access kernfs_node::name. Suggested-by: Tejun Heo Acked-by: Tejun Heo Reported-by: syzbot+6ea37e2e6ffccf41a7e6@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/67251dc6.050a0220.529b6.015e.GAE@google.com/ Reported-by: Hillf Danton Closes: https://lore.kernel.org/20241102001224.2789-1-hdanton@sina.com Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-7-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/internal.h | 5 + arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 14 ++- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 10 +- fs/kernfs/dir.c | 113 ++++++++++++---------- fs/kernfs/file.c | 4 +- fs/kernfs/kernfs-internal.h | 5 + fs/kernfs/mount.c | 5 +- fs/kernfs/symlink.c | 7 +- fs/sysfs/dir.c | 2 +- include/linux/kernfs.h | 4 +- security/selinux/hooks.c | 7 +- 11 files changed, 105 insertions(+), 71 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 20c898f09b7e..dd5d6b4bfcc2 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -507,6 +507,11 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, extern struct mutex rdtgroup_mutex; +static inline const char *rdt_kn_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex)); +} + extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 42cc162f7fc9..7a2db7fa4108 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -52,7 +52,8 @@ static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) rdtgrp = dev_get_drvdata(dev); if (mode) *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); + guard(mutex)(&rdtgroup_mutex); + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn)); } static const struct class pseudo_lock_class = { @@ -1293,6 +1294,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) struct task_struct *thread; unsigned int new_minor; struct device *dev; + char *kn_name __free(kfree) = NULL; int ret; ret = pseudo_lock_region_alloc(plr); @@ -1304,6 +1306,11 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) ret = -EINVAL; goto out_region; } + kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL); + if (!kn_name) { + ret = -ENOMEM; + goto out_cstates; + } plr->thread_done = 0; @@ -1348,8 +1355,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) mutex_unlock(&rdtgroup_mutex); if (!IS_ERR_OR_NULL(debugfs_resctrl)) { - plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, - debugfs_resctrl); + plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl); if (!IS_ERR_OR_NULL(plr->debugfs_dir)) debugfs_create_file("pseudo_lock_measure", 0200, plr->debugfs_dir, rdtgrp, @@ -1358,7 +1364,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) dev = device_create(&pseudo_lock_class, NULL, MKDEV(pseudo_lock_major, new_minor), - rdtgrp, "%s", rdtgrp->kn->name); + rdtgrp, "%s", kn_name); mutex_lock(&rdtgroup_mutex); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 55dcdeea1a1b..10afc4eaa467 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -916,14 +916,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, continue; seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", - rdtg->kn->name); + rdt_kn_name(rdtg->kn)); seq_puts(s, "mon:"); list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, mon.crdtgrp_list) { if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, crg->mon.rmid)) continue; - seq_printf(s, "%s", crg->kn->name); + seq_printf(s, "%s", rdt_kn_name(crg->kn)); break; } seq_putc(s, '\n'); @@ -3675,7 +3675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, */ static bool is_mon_groups(struct kernfs_node *kn, const char *name) { - return (!strcmp(kn->name, "mon_groups") && + return (!strcmp(rdt_kn_name(kn), "mon_groups") && strcmp(name, "mon_groups")); } @@ -3824,7 +3824,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && - is_mon_groups(parent_kn, kn->name)) { + is_mon_groups(parent_kn, rdt_kn_name(kn))) { ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; @@ -3912,7 +3912,7 @@ static int rdtgroup_rename(struct kernfs_node *kn, kn_parent = rdt_kn_parent(kn); if (rdtgrp->type != RDTMON_GROUP || !kn_parent || - !is_mon_groups(kn_parent, kn->name)) { + !is_mon_groups(kn_parent, rdt_kn_name(kn))) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 1d370c497e8a..c5a578c46759 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -51,14 +51,6 @@ static bool kernfs_lockdep(struct kernfs_node *kn) #endif } -static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) -{ - if (!kn) - return strscpy(buf, "(null)", buflen); - - return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen); -} - /* kernfs_node_depth - compute depth from @from to @to */ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { @@ -168,11 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { + const char *name; for (kn = kn_to, j = 0; j < i; j++) kn = rcu_dereference(kn->__parent); - len += scnprintf(buf + len, buflen - len, "/%s", kn->name); + name = rcu_dereference(kn->name); + len += scnprintf(buf + len, buflen - len, "/%s", name); } return len; @@ -196,13 +190,18 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, */ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_node *kn_parent; - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_name_locked(kn, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + if (!kn) + return strscpy(buf, "(null)", buflen); + + guard(rcu)(); + /* + * KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and + * the parent is either existing or not. + */ + kn_parent = rcu_dereference(kn->__parent); + return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen); } /** @@ -224,14 +223,17 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_root *root; guard(rcu)(); - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_path_from_node_locked(to, from, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + if (to) { + root = kernfs_root(to); + if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) { + guard(read_lock_irqsave)(&kernfs_rename_lock); + return kernfs_path_from_node_locked(to, from, buf, buflen); + } + } + return kernfs_path_from_node_locked(to, from, buf, buflen); } EXPORT_SYMBOL_GPL(kernfs_path_from_node); @@ -338,13 +340,13 @@ static int kernfs_name_compare(unsigned int hash, const char *name, return -1; if (ns > kn->ns) return 1; - return strcmp(name, kn->name); + return strcmp(name, kernfs_rcu_name(kn)); } static int kernfs_sd_compare(const struct kernfs_node *left, const struct kernfs_node *right) { - return kernfs_name_compare(left->hash, left->name, left->ns, right); + return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right); } /** @@ -542,7 +544,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu) { struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu); - kfree_const(kn->name); + /* If the whole node goes away, then name can't be used outside */ + kfree_const(rcu_access_pointer(kn->name)); if (kn->iattr) { simple_xattrs_free(&kn->iattr->xattrs, NULL); @@ -575,7 +578,8 @@ void kernfs_put(struct kernfs_node *kn) WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", - parent ? parent->name : "", kn->name, atomic_read(&kn->active)); + parent ? rcu_dereference(parent->name) : "", + rcu_dereference(kn->name), atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); @@ -652,7 +656,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); - kn->name = name; + rcu_assign_pointer(kn->name, name); kn->mode = mode; kn->flags = flags; @@ -790,7 +794,8 @@ int kernfs_add_one(struct kernfs_node *kn) ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name)) + has_ns ? "required" : "invalid", + kernfs_rcu_name(parent), kernfs_rcu_name(kn))) goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) @@ -800,7 +805,7 @@ int kernfs_add_one(struct kernfs_node *kn) if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) goto out_unlock; - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns); ret = kernfs_link_sibling(kn); if (ret) @@ -856,7 +861,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, name); + has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name); return NULL; } @@ -1135,8 +1140,6 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, /* Negative hashed dentry? */ if (d_really_is_negative(dentry)) { - struct kernfs_node *parent; - /* If the kernfs parent node has changed discard and * proceed to ->lookup. * @@ -1184,7 +1187,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, goto out_bad; /* The kernfs node has been renamed */ - if (strcmp(dentry->d_name.name, kn->name) != 0) + if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0) goto out_bad; /* The kernfs node has been moved to a different namespace */ @@ -1478,7 +1481,7 @@ static void __kernfs_remove(struct kernfs_node *kn) if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; - pr_debug("kernfs %s: removing\n", kn->name); + pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn)); /* prevent new usage by marking all nodes removing and deactivating */ pos = NULL; @@ -1734,7 +1737,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, { struct kernfs_node *old_parent; struct kernfs_root *root; - const char *old_name = NULL; + const char *old_name; int error; /* can't move or rename root */ @@ -1757,8 +1760,11 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, } error = 0; + old_name = kernfs_rcu_name(kn); + if (!new_name) + new_name = old_name; if ((old_parent == new_parent) && (kn->ns == new_ns) && - (strcmp(kn->name, new_name) == 0)) + (strcmp(old_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; @@ -1766,7 +1772,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, goto out; /* rename kernfs_node */ - if (strcmp(kn->name, new_name) != 0) { + if (strcmp(old_name, new_name) != 0) { error = -ENOMEM; new_name = kstrdup_const(new_name, GFP_KERNEL); if (!new_name) @@ -1779,27 +1785,32 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, * Move to the appropriate place in the appropriate directories rbtree. */ kernfs_unlink_sibling(kn); - kernfs_get(new_parent); - /* rename_lock protects ->parent and ->name accessors */ - write_lock_irq(&kernfs_rename_lock); + /* rename_lock protects ->parent accessors */ + if (old_parent != new_parent) { + kernfs_get(new_parent); + write_lock_irq(&kernfs_rename_lock); - old_parent = kernfs_parent(kn); - rcu_assign_pointer(kn->__parent, new_parent); + rcu_assign_pointer(kn->__parent, new_parent); - kn->ns = new_ns; - if (new_name) { - old_name = kn->name; - kn->name = new_name; + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); + + write_unlock_irq(&kernfs_rename_lock); + kernfs_put(old_parent); + } else { + /* name assignment is RCU protected, parent is the same */ + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); } - write_unlock_irq(&kernfs_rename_lock); - - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns); kernfs_link_sibling(kn); - kernfs_put(old_parent); - kfree_const(old_name); + if (new_name && !is_kernel_rodata((unsigned long)old_name)) + kfree_rcu_mightsleep(old_name); error = 0; out: @@ -1884,7 +1895,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); pos; pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { - const char *name = pos->name; + const char *name = kernfs_rcu_name(pos); unsigned int type = fs_umode_to_dtype(pos->mode); int len = strlen(name); ino_t ino = kernfs_ino(pos); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index c4ffa8dc89eb..66fe8fe41f06 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -915,6 +915,7 @@ static void kernfs_notify_workfn(struct work_struct *work) list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; + const char *kn_name; struct inode *inode; struct qstr name; @@ -928,7 +929,8 @@ static void kernfs_notify_workfn(struct work_struct *work) if (!inode) continue; - name = QSTR(kn->name); + kn_name = kernfs_rcu_name(kn); + name = QSTR(kn_name); parent = kernfs_get_parent(kn); if (parent) { p_inode = ilookup(info->sb, kernfs_ino(parent)); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index c43bee18b79f..40a2a9cd819d 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -107,6 +107,11 @@ static inline bool kernfs_root_is_locked(const struct kernfs_node *kn) return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem); } +static inline const char *kernfs_rcu_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, kernfs_root_is_locked(kn)); +} + static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn) { /* diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 2252b16e6ef0..d1f512b7bf86 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -231,6 +231,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, do { struct dentry *dtmp; struct kernfs_node *kntmp; + const char *name; if (kn == knparent) return dentry; @@ -239,8 +240,8 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, dput(dentry); return ERR_PTR(-EINVAL); } - dtmp = lookup_positive_unlocked(kntmp->name, dentry, - strlen(kntmp->name)); + name = rcu_dereference(kntmp->name); + dtmp = lookup_positive_unlocked(name, dentry, strlen(name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 05c62ca93c53..0bd8a2143723 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -81,7 +81,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* determine end of target string for reverse fillup */ kn = target; while (kernfs_parent(kn) && kn != base) { - len += strlen(kn->name) + 1; + len += strlen(kernfs_rcu_name(kn)) + 1; kn = kernfs_parent(kn); } @@ -95,10 +95,11 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* reverse fillup of target string from target to base */ kn = target; while (kernfs_parent(kn) && kn != base) { - int slen = strlen(kn->name); + const char *name = kernfs_rcu_name(kn); + int slen = strlen(name); len -= slen; - memcpy(s + len, kn->name, slen); + memcpy(s + len, name, slen); if (len) s[--len] = '/'; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4df2afa551dc..94e12efd92f2 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -123,7 +123,7 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; - return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); + return kernfs_rename_ns(kn, new_parent, NULL, new_ns); } /** diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5dda9a268e44..b5a5f32fdfd1 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -204,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - const char *name; struct kernfs_node __rcu *__parent; + const char __rcu *name; struct rb_node rb; @@ -400,7 +400,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, +int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7b867dfec88b..7dee9616147d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3584,10 +3584,13 @@ static int selinux_kernfs_init_security(struct kernfs_node *kn_dir, newsid = tsec->create_sid; } else { u16 secclass = inode_mode_to_security_class(kn->mode); + const char *kn_name; struct qstr q; - q.name = kn->name; - q.hash_len = hashlen_string(kn_dir, kn->name); + /* kn is fresh, can't be renamed, name goes not away */ + kn_name = rcu_dereference_check(kn->name, true); + q.name = kn_name; + q.hash_len = hashlen_string(kn_dir, kn_name); rc = security_transition_sid(tsec->sid, parent_sid, secclass, &q, From 4018ab42636cbea1bcf4fd69afc31d51cd905ba0 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:47 +0200 Subject: [PATCH 0224/2157] iio: backend: add API for interface get Add backend support for obtaining the interface type used. Reviewed-by: Nuno Sa Reviewed-by: David Lechner Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-2-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-backend.c | 24 ++++++++++++++++++++++++ include/linux/iio/backend.h | 11 +++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c index 363281272035..8bf3d570da1b 100644 --- a/drivers/iio/industrialio-backend.c +++ b/drivers/iio/industrialio-backend.c @@ -636,6 +636,30 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, } EXPORT_SYMBOL_NS_GPL(iio_backend_ext_info_set, "IIO_BACKEND"); +/** + * iio_backend_interface_type_get - get the interface type used. + * @back: Backend device + * @type: Interface type + * + * RETURNS: + * 0 on success, negative error number on failure. + */ +int iio_backend_interface_type_get(struct iio_backend *back, + enum iio_backend_interface_type *type) +{ + int ret; + + ret = iio_backend_op_call(back, interface_type_get, type); + if (ret) + return ret; + + if (*type >= IIO_BACKEND_INTERFACE_MAX) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(iio_backend_interface_type_get, "IIO_BACKEND"); + /** * iio_backend_extend_chan_spec - Extend an IIO channel * @back: Backend device diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 10be00f3b120..a0ea6c29d7ba 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -70,6 +70,12 @@ enum iio_backend_sample_trigger { IIO_BACKEND_SAMPLE_TRIGGER_MAX }; +enum iio_backend_interface_type { + IIO_BACKEND_INTERFACE_SERIAL_LVDS, + IIO_BACKEND_INTERFACE_SERIAL_CMOS, + IIO_BACKEND_INTERFACE_MAX +}; + /** * struct iio_backend_ops - operations structure for an iio_backend * @enable: Enable backend. @@ -88,6 +94,7 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @interface_type_get: Interface type. * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. @@ -128,6 +135,8 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*interface_type_get)(struct iio_backend *back, + enum iio_backend_interface_type *type); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -186,6 +195,8 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const char *buf, size_t len); ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); +int iio_backend_interface_type_get(struct iio_backend *back, + enum iio_backend_interface_type *type); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); From fc3fdb835eebb2ba88c6fdbf2c8b9eae1ceab106 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:48 +0200 Subject: [PATCH 0225/2157] iio: backend: add support for data size set Add backend support for setting the data size used. This setting can be adjusted within the IP cores interfacing devices. Reviewed-by: Nuno Sa Reviewed-by: David Lechner Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-3-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-backend.c | 21 +++++++++++++++++++++ include/linux/iio/backend.h | 3 +++ 2 files changed, 24 insertions(+) diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c index 8bf3d570da1b..2088afa7a55c 100644 --- a/drivers/iio/industrialio-backend.c +++ b/drivers/iio/industrialio-backend.c @@ -660,6 +660,27 @@ int iio_backend_interface_type_get(struct iio_backend *back, } EXPORT_SYMBOL_NS_GPL(iio_backend_interface_type_get, "IIO_BACKEND"); +/** + * iio_backend_data_size_set - set the data width/size in the data bus. + * @back: Backend device + * @size: Size in bits + * + * Some frontend devices can dynamically control the word/data size on the + * interface/data bus. Hence, the backend device needs to be aware of it so + * data can be correctly transferred. + * + * Return: + * 0 on success, negative error number on failure. + */ +int iio_backend_data_size_set(struct iio_backend *back, unsigned int size) +{ + if (!size) + return -EINVAL; + + return iio_backend_op_call(back, data_size_set, size); +} +EXPORT_SYMBOL_NS_GPL(iio_backend_data_size_set, "IIO_BACKEND"); + /** * iio_backend_extend_chan_spec - Extend an IIO channel * @back: Backend device diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index a0ea6c29d7ba..9ae861a21472 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -95,6 +95,7 @@ enum iio_backend_interface_type { * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. * @interface_type_get: Interface type. + * @data_size_set: Data size. * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. @@ -137,6 +138,7 @@ struct iio_backend_ops { const struct iio_chan_spec *chan, char *buf); int (*interface_type_get)(struct iio_backend *back, enum iio_backend_interface_type *type); + int (*data_size_set)(struct iio_backend *back, unsigned int size); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -197,6 +199,7 @@ ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); int iio_backend_interface_type_get(struct iio_backend *back, enum iio_backend_interface_type *type); +int iio_backend_data_size_set(struct iio_backend *back, unsigned int size); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); From 22894e0be908791e3df011cdfac02589c2f340bd Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:49 +0200 Subject: [PATCH 0226/2157] iio: backend: add API for oversampling Add backend support for setting oversampling ratio. Reviewed-by: David Lechner Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-4-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-backend.c | 15 +++++++++++++++ include/linux/iio/backend.h | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c index 2088afa7a55c..d4ad36f54090 100644 --- a/drivers/iio/industrialio-backend.c +++ b/drivers/iio/industrialio-backend.c @@ -681,6 +681,21 @@ int iio_backend_data_size_set(struct iio_backend *back, unsigned int size) } EXPORT_SYMBOL_NS_GPL(iio_backend_data_size_set, "IIO_BACKEND"); +/** + * iio_backend_oversampling_ratio_set - set the oversampling ratio + * @back: Backend device + * @ratio: The oversampling ratio - value 1 corresponds to no oversampling. + * + * Return: + * 0 on success, negative error number on failure. + */ +int iio_backend_oversampling_ratio_set(struct iio_backend *back, + unsigned int ratio) +{ + return iio_backend_op_call(back, oversampling_ratio_set, ratio); +} +EXPORT_SYMBOL_NS_GPL(iio_backend_oversampling_ratio_set, "IIO_BACKEND"); + /** * iio_backend_extend_chan_spec - Extend an IIO channel * @back: Backend device diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 9ae861a21472..e45b7dfbec35 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -96,6 +96,7 @@ enum iio_backend_interface_type { * @ext_info_get: Extended info getter. * @interface_type_get: Interface type. * @data_size_set: Data size. + * @oversampling_ratio_set: Set Oversampling ratio. * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. @@ -139,6 +140,8 @@ struct iio_backend_ops { int (*interface_type_get)(struct iio_backend *back, enum iio_backend_interface_type *type); int (*data_size_set)(struct iio_backend *back, unsigned int size); + int (*oversampling_ratio_set)(struct iio_backend *back, + unsigned int ratio); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -200,6 +203,8 @@ ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, int iio_backend_interface_type_get(struct iio_backend *back, enum iio_backend_interface_type *type); int iio_backend_data_size_set(struct iio_backend *back, unsigned int size); +int iio_backend_oversampling_ratio_set(struct iio_backend *back, + unsigned int ratio); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); From fc9156c04566b2f98b339c9d8f4994f34ada9cde Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:50 +0200 Subject: [PATCH 0227/2157] iio: adc: adi-axi-adc: add interface type Add support for getting the interface (CMOS or LVDS) used by the AXI ADC IP. Reviewed-by: Nuno Sa Reviewed-by: David Lechner Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-5-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 766406f45396..fb3ff605db05 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -40,6 +40,9 @@ #define ADI_AXI_REG_RSTN_MMCM_RSTN BIT(1) #define ADI_AXI_REG_RSTN_RSTN BIT(0) +#define ADI_AXI_ADC_REG_CONFIG 0x000c +#define ADI_AXI_ADC_REG_CONFIG_CMOS_OR_LVDS_N BIT(7) + #define ADI_AXI_ADC_REG_CTRL 0x0044 #define ADI_AXI_ADC_CTRL_DDR_EDGESEL_MASK BIT(1) @@ -310,6 +313,25 @@ static int axi_adc_chan_disable(struct iio_backend *back, unsigned int chan) ADI_AXI_REG_CHAN_CTRL_ENABLE); } +static int axi_adc_interface_type_get(struct iio_backend *back, + enum iio_backend_interface_type *type) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + unsigned int val; + int ret; + + ret = regmap_read(st->regmap, ADI_AXI_ADC_REG_CONFIG, &val); + if (ret) + return ret; + + if (val & ADI_AXI_ADC_REG_CONFIG_CMOS_OR_LVDS_N) + *type = IIO_BACKEND_INTERFACE_SERIAL_CMOS; + else + *type = IIO_BACKEND_INTERFACE_SERIAL_LVDS; + + return 0; +} + static struct iio_buffer *axi_adc_request_buffer(struct iio_backend *back, struct iio_dev *indio_dev) { @@ -456,6 +478,7 @@ static const struct iio_backend_ops adi_axi_adc_ops = { .iodelay_set = axi_adc_iodelays_set, .test_pattern_set = axi_adc_test_pattern_set, .chan_status = axi_adc_chan_status, + .interface_type_get = axi_adc_interface_type_get, .debugfs_reg_access = iio_backend_debugfs_ptr(axi_adc_reg_access), .debugfs_print_chan_status = iio_backend_debugfs_ptr(axi_adc_debugfs_print_chan_status), }; From 4831509fd5a718c06bd3ca13dc4d6f628d116778 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:51 +0200 Subject: [PATCH 0228/2157] dt-bindings: iio: adc: add ad485x axi variant Add a new compatible and related bindings for the fpga-based AD485x AXI IP core, a variant of the generic AXI ADC IP. The AXI AD485x IP is a very similar HDL (fpga) variant of the generic AXI ADC IP, intended to control ad485x family. Although this is not preferred, the wildcard naming is used to match the published firmware under the same name. Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-6-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml b/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml index 4fa82dcf6fc9..cf74f84d6103 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml @@ -27,6 +27,7 @@ description: | the ad7606 family. https://wiki.analog.com/resources/fpga/docs/axi_adc_ip + https://analogdevicesinc.github.io/hdl/library/axi_ad485x/index.html http://analogdevicesinc.github.io/hdl/library/axi_ad7606x/index.html properties: @@ -34,6 +35,7 @@ properties: enum: - adi,axi-adc-10.0.a - adi,axi-ad7606x + - adi,axi-ad485x reg: maxItems: 1 From 7a794e3a0dc5b1df525bf72260b641d70e337784 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:52 +0200 Subject: [PATCH 0229/2157] iio: adc: adi-axi-adc: set data format Add support for selecting the data format within the AXI ADC ip. Add separate complatible string for the custom AD485X IP and implement the necessary changes. Reviewed-by: Nuno Sa Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-7-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 78 ++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index fb3ff605db05..ed6a46bdbd80 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -46,6 +46,12 @@ #define ADI_AXI_ADC_REG_CTRL 0x0044 #define ADI_AXI_ADC_CTRL_DDR_EDGESEL_MASK BIT(1) +#define ADI_AXI_ADC_REG_CNTRL_3 0x004c +#define AXI_AD485X_CNTRL_3_PACKET_FORMAT_MSK GENMASK(1, 0) +#define AXI_AD485X_PACKET_FORMAT_20BIT 0x0 +#define AXI_AD485X_PACKET_FORMAT_24BIT 0x1 +#define AXI_AD485X_PACKET_FORMAT_32BIT 0x2 + #define ADI_AXI_ADC_REG_DRP_STATUS 0x0074 #define ADI_AXI_ADC_DRP_LOCKED BIT(17) @@ -332,6 +338,47 @@ static int axi_adc_interface_type_get(struct iio_backend *back, return 0; } +static int axi_adc_ad485x_data_size_set(struct iio_backend *back, + unsigned int size) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + unsigned int val; + + switch (size) { + /* + * There are two different variants of the AXI AXI_AD485X IP block, a + * 16-bit and a 20-bit variant. + * The 0x0 value (AXI_AD485X_PACKET_FORMAT_20BIT) is corresponding also + * to the 16-bit variant of the IP block. + */ + case 16: + case 20: + val = AXI_AD485X_PACKET_FORMAT_20BIT; + break; + case 24: + val = AXI_AD485X_PACKET_FORMAT_24BIT; + break; + /* + * The 0x2 (AXI_AD485X_PACKET_FORMAT_32BIT) corresponds only to the + * 20-bit variant of the IP block. Setting this value properly is + * ensured by the upper layers of the drivers calling the axi-adc + * functions. + * Also, for 16-bit IP block, the 0x2 (AXI_AD485X_PACKET_FORMAT_32BIT) + * value is handled as maximum size available which is 24-bit for this + * configuration. + */ + case 32: + val = AXI_AD485X_PACKET_FORMAT_32BIT; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(st->regmap, ADI_AXI_ADC_REG_CNTRL_3, + AXI_AD485X_CNTRL_3_PACKET_FORMAT_MSK, + FIELD_PREP(AXI_AD485X_CNTRL_3_PACKET_FORMAT_MSK, val)); +} + static struct iio_buffer *axi_adc_request_buffer(struct iio_backend *back, struct iio_dev *indio_dev) { @@ -488,6 +535,29 @@ static const struct iio_backend_info adi_axi_adc_generic = { .ops = &adi_axi_adc_ops, }; +static const struct iio_backend_ops adi_ad485x_ops = { + .enable = axi_adc_enable, + .disable = axi_adc_disable, + .data_format_set = axi_adc_data_format_set, + .chan_enable = axi_adc_chan_enable, + .chan_disable = axi_adc_chan_disable, + .request_buffer = axi_adc_request_buffer, + .free_buffer = axi_adc_free_buffer, + .data_sample_trigger = axi_adc_data_sample_trigger, + .iodelay_set = axi_adc_iodelays_set, + .chan_status = axi_adc_chan_status, + .interface_type_get = axi_adc_interface_type_get, + .data_size_set = axi_adc_ad485x_data_size_set, + .debugfs_reg_access = iio_backend_debugfs_ptr(axi_adc_reg_access), + .debugfs_print_chan_status = + iio_backend_debugfs_ptr(axi_adc_debugfs_print_chan_status), +}; + +static const struct iio_backend_info axi_ad485x = { + .name = "axi-ad485x", + .ops = &adi_ad485x_ops, +}; + static int adi_axi_adc_probe(struct platform_device *pdev) { struct adi_axi_adc_state *st; @@ -545,7 +615,7 @@ static int adi_axi_adc_probe(struct platform_device *pdev) return -ENODEV; } - ret = devm_iio_backend_register(&pdev->dev, &adi_axi_adc_generic, st); + ret = devm_iio_backend_register(&pdev->dev, st->info->backend_info, st); if (ret) return dev_err_probe(&pdev->dev, ret, "failed to register iio backend\n"); @@ -585,6 +655,11 @@ static const struct axi_adc_info adc_generic = { .backend_info = &adi_axi_adc_generic, }; +static const struct axi_adc_info adi_axi_ad485x = { + .version = ADI_AXI_PCORE_VER(10, 0, 'a'), + .backend_info = &axi_ad485x, +}; + static const struct ad7606_platform_data ad7606_pdata = { .bus_reg_read = ad7606_bus_reg_read, .bus_reg_write = ad7606_bus_reg_write, @@ -601,6 +676,7 @@ static const struct axi_adc_info adc_ad7606 = { /* Match table for of_platform binding */ static const struct of_device_id adi_axi_adc_of_match[] = { { .compatible = "adi,axi-adc-10.0.a", .data = &adc_generic }, + { .compatible = "adi,axi-ad485x", .data = &adi_axi_ad485x }, { .compatible = "adi,axi-ad7606x", .data = &adc_ad7606 }, { /* end of list */ } }; From 208a94c8884d3a0525807d798883f568135db604 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:53 +0200 Subject: [PATCH 0230/2157] iio: adc: adi-axi-adc: add oversampling Add support for enabling/disabling oversampling. Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-8-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index ed6a46bdbd80..61ab7dce43be 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -47,6 +47,7 @@ #define ADI_AXI_ADC_CTRL_DDR_EDGESEL_MASK BIT(1) #define ADI_AXI_ADC_REG_CNTRL_3 0x004c +#define AXI_AD485X_CNTRL_3_OS_EN_MSK BIT(2) #define AXI_AD485X_CNTRL_3_PACKET_FORMAT_MSK GENMASK(1, 0) #define AXI_AD485X_PACKET_FORMAT_20BIT 0x0 #define AXI_AD485X_PACKET_FORMAT_24BIT 0x1 @@ -379,6 +380,28 @@ static int axi_adc_ad485x_data_size_set(struct iio_backend *back, FIELD_PREP(AXI_AD485X_CNTRL_3_PACKET_FORMAT_MSK, val)); } +static int axi_adc_ad485x_oversampling_ratio_set(struct iio_backend *back, + unsigned int ratio) +{ + struct adi_axi_adc_state *st = iio_backend_get_priv(back); + + /* The current state of the function enables or disables the + * oversampling in REG_CNTRL_3 register. A ratio equal to 1 implies no + * oversampling, while a value greater than 1 implies oversampling being + * enabled. + */ + switch (ratio) { + case 0: + return -EINVAL; + case 1: + return regmap_clear_bits(st->regmap, ADI_AXI_ADC_REG_CNTRL_3, + AXI_AD485X_CNTRL_3_OS_EN_MSK); + default: + return regmap_set_bits(st->regmap, ADI_AXI_ADC_REG_CNTRL_3, + AXI_AD485X_CNTRL_3_OS_EN_MSK); + } +} + static struct iio_buffer *axi_adc_request_buffer(struct iio_backend *back, struct iio_dev *indio_dev) { @@ -548,6 +571,7 @@ static const struct iio_backend_ops adi_ad485x_ops = { .chan_status = axi_adc_chan_status, .interface_type_get = axi_adc_interface_type_get, .data_size_set = axi_adc_ad485x_data_size_set, + .oversampling_ratio_set = axi_adc_ad485x_oversampling_ratio_set, .debugfs_reg_access = iio_backend_debugfs_ptr(axi_adc_reg_access), .debugfs_print_chan_status = iio_backend_debugfs_ptr(axi_adc_debugfs_print_chan_status), From e04b1b0c6769d048f0befd75748daf58cf026b50 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:54 +0200 Subject: [PATCH 0231/2157] dt-bindings: iio: adc: add ad4851 Add devicetree bindings for ad485x family. Reviewed-by: Conor Dooley Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-9-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,ad4851.yaml | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml new file mode 100644 index 000000000000..c6676d91b4e6 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml @@ -0,0 +1,153 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2024 Analog Devices Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/adi,ad4851.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD485X family + +maintainers: + - Sergiu Cuciurean + - Dragos Bogdan + - Antoniu Miclaus + +description: | + Analog Devices AD485X fully buffered, 8-channel simultaneous sampling, + 16/20-bit, 1 MSPS data acquisition system (DAS) with differential, wide + common-mode range inputs. + + https://www.analog.com/media/en/technical-documentation/data-sheets/ad4855.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/ad4856.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/ad4857.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/ad4858.pdf + +$ref: /schemas/spi/spi-peripheral-props.yaml# + +properties: + compatible: + enum: + - adi,ad4851 + - adi,ad4852 + - adi,ad4853 + - adi,ad4854 + - adi,ad4855 + - adi,ad4856 + - adi,ad4857 + - adi,ad4858 + - adi,ad4858i + + reg: + maxItems: 1 + + vcc-supply: true + + vee-supply: true + + vdd-supply: true + + vddh-supply: true + + vddl-supply: true + + vio-supply: true + + vrefbuf-supply: true + + vrefio-supply: true + + pwms: + description: PWM connected to the CNV pin. + maxItems: 1 + + io-backends: + maxItems: 1 + + pd-gpios: + maxItems: 1 + + spi-max-frequency: + maximum: 25000000 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + "^channel(@[0-7])?$": + $ref: adc.yaml + type: object + description: Represents the channels which are connected to the ADC. + + properties: + reg: + description: + The channel number, as specified in the datasheet (from 0 to 7). + minimum: 0 + maximum: 7 + + diff-channels: + description: + Each channel can be configured as a bipolar differential channel. + The ADC uses the same positive and negative inputs for this. + This property must be specified as 'reg' (or the channel number) for + both positive and negative inputs (i.e. diff-channels = ). + Since the configuration is bipolar differential, the 'bipolar' + property is required. + items: + minimum: 0 + maximum: 7 + + bipolar: true + + required: + - reg + + additionalProperties: false + +required: + - compatible + - reg + - vcc-supply + - vee-supply + - vdd-supply + - vio-supply + - pwms + +unevaluatedProperties: false + +examples: + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0{ + #address-cells = <1>; + #size-cells = <0>; + compatible = "adi,ad4858"; + reg = <0>; + spi-max-frequency = <10000000>; + vcc-supply = <&vcc>; + vdd-supply = <&vdd>; + vee-supply = <&vee>; + vddh-supply = <&vddh>; + vddl-supply = <&vddl>; + vio-supply = <&vio>; + pwms = <&pwm_gen 0 0>; + io-backends = <&iio_backend>; + + channel@0 { + reg = <0>; + diff-channels = <0 0>; + bipolar; + }; + + channel@1 { + reg = <1>; + }; + }; + }; +... From 6250803fe2ec92be32a4df1c3a39c4a460d5bd58 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 14 Feb 2025 15:19:55 +0200 Subject: [PATCH 0232/2157] iio: adc: ad4851: add ad485x driver Add support for the AD485X a fully buffered, 8-channel simultaneous sampling, 16/20-bit, 1 MSPS data acquisition system (DAS) with differential, wide common-mode range inputs. Signed-off-by: Antoniu Miclaus Link: https://patch.msgid.link/20250214131955.31973-10-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 14 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ad4851.c | 1315 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 1330 insertions(+) create mode 100644 drivers/iio/adc/ad4851.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index f64b5faeb257..d6e8d5165ccc 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -63,6 +63,20 @@ config AD4695 To compile this driver as a module, choose M here: the module will be called ad4695. +config AD4851 + tristate "Analog Device AD4851 DAS Driver" + depends on SPI + depends on PWM + select REGMAP_SPI + select IIO_BACKEND + help + Say yes here to build support for Analog Devices AD4851, AD4852, + AD4853, AD4854, AD4855, AD4856, AD4857, AD4858, AD4858I high speed + data acquisition system (DAS). + + To compile this driver as a module, choose M here: the module will be + called ad4851. + config AD7091R tristate diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index ee19afba62b7..e4d8ba12f841 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o obj-$(CONFIG_AD4000) += ad4000.o obj-$(CONFIG_AD4130) += ad4130.o obj-$(CONFIG_AD4695) += ad4695.o +obj-$(CONFIG_AD4851) += ad4851.o obj-$(CONFIG_AD7091R) += ad7091r-base.o obj-$(CONFIG_AD7091R5) += ad7091r5.o obj-$(CONFIG_AD7091R8) += ad7091r8.o diff --git a/drivers/iio/adc/ad4851.c b/drivers/iio/adc/ad4851.c new file mode 100644 index 000000000000..1ad37084355e --- /dev/null +++ b/drivers/iio/adc/ad4851.c @@ -0,0 +1,1315 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Analog Devices AD4851 DAS driver + * + * Copyright 2024 Analog Devices Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define AD4851_REG_INTERFACE_CONFIG_A 0x00 +#define AD4851_REG_INTERFACE_CONFIG_B 0x01 +#define AD4851_REG_PRODUCT_ID_L 0x04 +#define AD4851_REG_PRODUCT_ID_H 0x05 +#define AD4851_REG_DEVICE_CTRL 0x25 +#define AD4851_REG_PACKET 0x26 +#define AD4851_REG_OVERSAMPLE 0x27 + +#define AD4851_REG_CH_CONFIG_BASE 0x2A +#define AD4851_REG_CHX_SOFTSPAN(ch) ((0x12 * (ch)) + AD4851_REG_CH_CONFIG_BASE) +#define AD4851_REG_CHX_OFFSET(ch) (AD4851_REG_CHX_SOFTSPAN(ch) + 0x01) +#define AD4851_REG_CHX_OFFSET_LSB(ch) AD4851_REG_CHX_OFFSET(ch) +#define AD4851_REG_CHX_OFFSET_MID(ch) (AD4851_REG_CHX_OFFSET_LSB(ch) + 0x01) +#define AD4851_REG_CHX_OFFSET_MSB(ch) (AD4851_REG_CHX_OFFSET_MID(ch) + 0x01) +#define AD4851_REG_CHX_GAIN(ch) (AD4851_REG_CHX_OFFSET(ch) + 0x03) +#define AD4851_REG_CHX_GAIN_LSB(ch) AD4851_REG_CHX_GAIN(ch) +#define AD4851_REG_CHX_GAIN_MSB(ch) (AD4851_REG_CHX_GAIN(ch) + 0x01) +#define AD4851_REG_CHX_PHASE(ch) (AD4851_REG_CHX_GAIN(ch) + 0x02) +#define AD4851_REG_CHX_PHASE_LSB(ch) AD4851_REG_CHX_PHASE(ch) +#define AD4851_REG_CHX_PHASE_MSB(ch) (AD4851_REG_CHX_PHASE_LSB(ch) + 0x01) + +#define AD4851_REG_TESTPAT_0(c) (0x38 + (c) * 0x12) +#define AD4851_REG_TESTPAT_1(c) (0x39 + (c) * 0x12) +#define AD4851_REG_TESTPAT_2(c) (0x3A + (c) * 0x12) +#define AD4851_REG_TESTPAT_3(c) (0x3B + (c) * 0x12) + +#define AD4851_SW_RESET (BIT(7) | BIT(0)) +#define AD4851_SDO_ENABLE BIT(4) +#define AD4851_SINGLE_INSTRUCTION BIT(7) +#define AD4851_REFBUF BIT(2) +#define AD4851_REFSEL BIT(1) +#define AD4851_ECHO_CLOCK_MODE BIT(0) + +#define AD4851_PACKET_FORMAT_0 0 +#define AD4851_PACKET_FORMAT_1 1 +#define AD4851_PACKET_FORMAT_MASK GENMASK(1, 0) + +#define AD4851_OS_EN_MSK BIT(7) +#define AD4851_OS_RATIO_MSK GENMASK(3, 0) + +#define AD4851_TEST_PAT BIT(2) + +#define AD4858_PACKET_SIZE_20 0 +#define AD4858_PACKET_SIZE_24 1 +#define AD4858_PACKET_SIZE_32 2 + +#define AD4857_PACKET_SIZE_16 0 +#define AD4857_PACKET_SIZE_24 1 + +#define AD4851_TESTPAT_0_DEFAULT 0x2A +#define AD4851_TESTPAT_1_DEFAULT 0x3C +#define AD4851_TESTPAT_2_DEFAULT 0xCE +#define AD4851_TESTPAT_3_DEFAULT(c) (0x0A + (0x10 * (c))) + +#define AD4851_SOFTSPAN_0V_2V5 0 +#define AD4851_SOFTSPAN_N2V5_2V5 1 +#define AD4851_SOFTSPAN_0V_5V 2 +#define AD4851_SOFTSPAN_N5V_5V 3 +#define AD4851_SOFTSPAN_0V_6V25 4 +#define AD4851_SOFTSPAN_N6V25_6V25 5 +#define AD4851_SOFTSPAN_0V_10V 6 +#define AD4851_SOFTSPAN_N10V_10V 7 +#define AD4851_SOFTSPAN_0V_12V5 8 +#define AD4851_SOFTSPAN_N12V5_12V5 9 +#define AD4851_SOFTSPAN_0V_20V 10 +#define AD4851_SOFTSPAN_N20V_20V 11 +#define AD4851_SOFTSPAN_0V_25V 12 +#define AD4851_SOFTSPAN_N25V_25V 13 +#define AD4851_SOFTSPAN_0V_40V 14 +#define AD4851_SOFTSPAN_N40V_40V 15 + +#define AD4851_MAX_LANES 8 +#define AD4851_MAX_IODELAY 32 + +#define AD4851_T_CNVH_NS 40 +#define AD4851_T_CNVH_NS_MARGIN 10 + +#define AD4841_MAX_SCALE_AVAIL 8 + +#define AD4851_MAX_CH_NR 8 +#define AD4851_CH_START 0 + +struct ad4851_scale { + unsigned int scale_val; + u8 reg_val; +}; + +static const struct ad4851_scale ad4851_scale_table_unipolar[] = { + { 2500, 0x0 }, + { 5000, 0x2 }, + { 6250, 0x4 }, + { 10000, 0x6 }, + { 12500, 0x8 }, + { 20000, 0xA }, + { 25000, 0xC }, + { 40000, 0xE }, +}; + +static const struct ad4851_scale ad4851_scale_table_bipolar[] = { + { 5000, 0x1 }, + { 10000, 0x3 }, + { 12500, 0x5 }, + { 20000, 0x7 }, + { 25000, 0x9 }, + { 40000, 0xB }, + { 50000, 0xD }, + { 80000, 0xF }, +}; + +static const unsigned int ad4851_scale_avail_unipolar[] = { + 2500, + 5000, + 6250, + 10000, + 12500, + 20000, + 25000, + 40000, +}; + +static const unsigned int ad4851_scale_avail_bipolar[] = { + 5000, + 10000, + 12500, + 20000, + 25000, + 40000, + 50000, + 80000, +}; + +struct ad4851_chip_info { + const char *name; + unsigned int product_id; + int num_scales; + unsigned long max_sample_rate_hz; + unsigned int resolution; + unsigned int max_channels; + int (*parse_channels)(struct iio_dev *indio_dev); +}; + +enum { + AD4851_SCAN_TYPE_NORMAL, + AD4851_SCAN_TYPE_RESOLUTION_BOOST, +}; + +struct ad4851_state { + struct spi_device *spi; + struct pwm_device *cnv; + struct iio_backend *back; + /* + * Synchronize access to members the of driver state, and ensure + * atomicity of consecutive regmap operations. + */ + struct mutex lock; + struct regmap *regmap; + const struct ad4851_chip_info *info; + struct gpio_desc *pd_gpio; + bool resolution_boost_enabled; + unsigned long cnv_trigger_rate_hz; + unsigned int osr; + bool vrefbuf_en; + bool vrefio_en; + bool bipolar_ch[AD4851_MAX_CH_NR]; + unsigned int scales_unipolar[AD4841_MAX_SCALE_AVAIL][2]; + unsigned int scales_bipolar[AD4841_MAX_SCALE_AVAIL][2]; +}; + +static int ad4851_reg_access(struct iio_dev *indio_dev, + unsigned int reg, + unsigned int writeval, + unsigned int *readval) +{ + struct ad4851_state *st = iio_priv(indio_dev); + + if (readval) + return regmap_read(st->regmap, reg, readval); + + return regmap_write(st->regmap, reg, writeval); +} + +static int ad4851_set_sampling_freq(struct ad4851_state *st, unsigned int freq) +{ + struct pwm_state cnv_state = { + .duty_cycle = AD4851_T_CNVH_NS + AD4851_T_CNVH_NS_MARGIN, + .enabled = true, + }; + int ret; + + freq = clamp(freq, 1, st->info->max_sample_rate_hz); + + cnv_state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, freq); + + ret = pwm_apply_might_sleep(st->cnv, &cnv_state); + if (ret) + return ret; + + st->cnv_trigger_rate_hz = freq; + + return 0; +} + +static const int ad4851_oversampling_ratios[] = { + 1, 2, 4, 8, 16, 32, 64, 128, + 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, + 65536, +}; + +static int ad4851_osr_to_regval(unsigned int ratio) +{ + int i; + + for (i = 1; i < ARRAY_SIZE(ad4851_oversampling_ratios); i++) + if (ratio == ad4851_oversampling_ratios[i]) + return i - 1; + + return -EINVAL; +} + +static int __ad4851_get_scale(struct iio_dev *indio_dev, int scale_tbl, + unsigned int *val, unsigned int *val2) +{ + const struct iio_scan_type *scan_type; + unsigned int tmp; + + scan_type = iio_get_current_scan_type(indio_dev, &indio_dev->channels[0]); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + tmp = ((u64)scale_tbl * MICRO) >> scan_type->realbits; + *val = tmp / MICRO; + *val2 = tmp % MICRO; + + return 0; +} + +static int ad4851_scale_fill(struct iio_dev *indio_dev) +{ + struct ad4851_state *st = iio_priv(indio_dev); + unsigned int i, val1, val2; + int ret; + + for (i = 0; i < ARRAY_SIZE(ad4851_scale_avail_unipolar); i++) { + ret = __ad4851_get_scale(indio_dev, + ad4851_scale_avail_unipolar[i], + &val1, &val2); + if (ret) + return ret; + + st->scales_unipolar[i][0] = val1; + st->scales_unipolar[i][1] = val2; + } + + for (i = 0; i < ARRAY_SIZE(ad4851_scale_avail_bipolar); i++) { + ret = __ad4851_get_scale(indio_dev, + ad4851_scale_avail_bipolar[i], + &val1, &val2); + if (ret) + return ret; + + st->scales_bipolar[i][0] = val1; + st->scales_bipolar[i][1] = val2; + } + + return 0; +} + +static int ad4851_set_oversampling_ratio(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + unsigned int osr) +{ + struct ad4851_state *st = iio_priv(indio_dev); + int val, ret; + + guard(mutex)(&st->lock); + + if (osr == 1) { + ret = regmap_clear_bits(st->regmap, AD4851_REG_OVERSAMPLE, + AD4851_OS_EN_MSK); + if (ret) + return ret; + } else { + val = ad4851_osr_to_regval(osr); + if (val < 0) + return -EINVAL; + + ret = regmap_update_bits(st->regmap, AD4851_REG_OVERSAMPLE, + AD4851_OS_EN_MSK | + AD4851_OS_RATIO_MSK, + FIELD_PREP(AD4851_OS_EN_MSK, 1) | + FIELD_PREP(AD4851_OS_RATIO_MSK, val)); + if (ret) + return ret; + } + + ret = iio_backend_oversampling_ratio_set(st->back, osr); + if (ret) + return ret; + + switch (st->info->resolution) { + case 20: + switch (osr) { + case 0: + return -EINVAL; + case 1: + val = 20; + break; + default: + val = 24; + break; + } + break; + case 16: + val = 16; + break; + default: + return -EINVAL; + } + + ret = iio_backend_data_size_set(st->back, val); + if (ret) + return ret; + + if (osr == 1 || st->info->resolution == 16) { + ret = regmap_clear_bits(st->regmap, AD4851_REG_PACKET, + AD4851_PACKET_FORMAT_MASK); + if (ret) + return ret; + + st->resolution_boost_enabled = false; + } else { + ret = regmap_update_bits(st->regmap, AD4851_REG_PACKET, + AD4851_PACKET_FORMAT_MASK, + FIELD_PREP(AD4851_PACKET_FORMAT_MASK, 1)); + if (ret) + return ret; + + st->resolution_boost_enabled = true; + } + + if (st->osr != osr) { + ret = ad4851_scale_fill(indio_dev); + if (ret) + return ret; + + st->osr = osr; + } + + return 0; +} + +static int ad4851_get_oversampling_ratio(struct ad4851_state *st, unsigned int *val) +{ + unsigned int osr; + int ret; + + guard(mutex)(&st->lock); + + ret = regmap_read(st->regmap, AD4851_REG_OVERSAMPLE, &osr); + if (ret) + return ret; + + if (!FIELD_GET(AD4851_OS_EN_MSK, osr)) + *val = 1; + else + *val = ad4851_oversampling_ratios[FIELD_GET(AD4851_OS_RATIO_MSK, osr) + 1]; + + st->osr = *val; + + return IIO_VAL_INT; +} + +static void ad4851_pwm_disable(void *data) +{ + pwm_disable(data); +} + +static int ad4851_setup(struct ad4851_state *st) +{ + unsigned int product_id; + int ret; + + if (st->pd_gpio) { + /* To initiate a global reset, bring the PD pin high twice */ + gpiod_set_value(st->pd_gpio, 1); + fsleep(1); + gpiod_set_value(st->pd_gpio, 0); + fsleep(1); + gpiod_set_value(st->pd_gpio, 1); + fsleep(1); + gpiod_set_value(st->pd_gpio, 0); + fsleep(1000); + } else { + ret = regmap_set_bits(st->regmap, AD4851_REG_INTERFACE_CONFIG_A, + AD4851_SW_RESET); + if (ret) + return ret; + } + + if (st->vrefbuf_en) { + ret = regmap_set_bits(st->regmap, AD4851_REG_DEVICE_CTRL, + AD4851_REFBUF); + if (ret) + return ret; + } + + if (st->vrefio_en) { + ret = regmap_set_bits(st->regmap, AD4851_REG_DEVICE_CTRL, + AD4851_REFSEL); + if (ret) + return ret; + } + + ret = regmap_write(st->regmap, AD4851_REG_INTERFACE_CONFIG_B, + AD4851_SINGLE_INSTRUCTION); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_INTERFACE_CONFIG_A, + AD4851_SDO_ENABLE); + if (ret) + return ret; + + ret = regmap_read(st->regmap, AD4851_REG_PRODUCT_ID_L, &product_id); + if (ret) + return ret; + + if (product_id != st->info->product_id) + dev_info(&st->spi->dev, "Unknown product ID: 0x%02X\n", + product_id); + + ret = regmap_set_bits(st->regmap, AD4851_REG_DEVICE_CTRL, + AD4851_ECHO_CLOCK_MODE); + if (ret) + return ret; + + return regmap_write(st->regmap, AD4851_REG_PACKET, 0); +} + +/* + * Find the longest consecutive sequence of false values from field + * and return starting index. + */ +static int ad4851_find_opt(const unsigned long *field, unsigned int start, + unsigned int nbits, unsigned int *val) +{ + unsigned int bit = start, end, start_cnt, cnt = 0; + + for_each_clear_bitrange_from(bit, end, field, start + nbits) { + if (end - bit > cnt) { + cnt = end - bit; + start_cnt = bit - start; + } + } + + if (!cnt) + return -ENOENT; + + *val = start_cnt; + + return cnt; +} + +static int ad4851_calibrate(struct iio_dev *indio_dev) +{ + struct ad4851_state *st = iio_priv(indio_dev); + unsigned int opt_delay, num_lanes, delay, i, s, c; + enum iio_backend_interface_type interface_type; + DECLARE_BITMAP(pn_status, AD4851_MAX_LANES * AD4851_MAX_IODELAY); + bool status; + int ret; + + ret = iio_backend_interface_type_get(st->back, &interface_type); + if (ret) + return ret; + + switch (interface_type) { + case IIO_BACKEND_INTERFACE_SERIAL_CMOS: + num_lanes = indio_dev->num_channels; + break; + case IIO_BACKEND_INTERFACE_SERIAL_LVDS: + num_lanes = 1; + break; + default: + return -EINVAL; + } + + if (st->info->resolution == 16) { + ret = iio_backend_data_size_set(st->back, 24); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_PACKET, + AD4851_TEST_PAT | AD4857_PACKET_SIZE_24); + if (ret) + return ret; + } else { + ret = iio_backend_data_size_set(st->back, 32); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_PACKET, + AD4851_TEST_PAT | AD4858_PACKET_SIZE_32); + if (ret) + return ret; + } + + for (i = 0; i < indio_dev->num_channels; i++) { + ret = regmap_write(st->regmap, AD4851_REG_TESTPAT_0(i), + AD4851_TESTPAT_0_DEFAULT); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_TESTPAT_1(i), + AD4851_TESTPAT_1_DEFAULT); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_TESTPAT_2(i), + AD4851_TESTPAT_2_DEFAULT); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_TESTPAT_3(i), + AD4851_TESTPAT_3_DEFAULT(i)); + if (ret) + return ret; + + ret = iio_backend_chan_enable(st->back, + indio_dev->channels[i].channel); + if (ret) + return ret; + } + + for (i = 0; i < num_lanes; i++) { + for (delay = 0; delay < AD4851_MAX_IODELAY; delay++) { + ret = iio_backend_iodelay_set(st->back, i, delay); + if (ret) + return ret; + + ret = iio_backend_chan_status(st->back, i, &status); + if (ret) + return ret; + + __assign_bit(i * AD4851_MAX_IODELAY + delay, pn_status, + status); + } + } + + for (i = 0; i < num_lanes; i++) { + c = ad4851_find_opt(pn_status, i * AD4851_MAX_IODELAY, + AD4851_MAX_IODELAY, &s); + if (c < 0) + return c; + + opt_delay = s + c / 2; + ret = iio_backend_iodelay_set(st->back, i, opt_delay); + if (ret) + return ret; + } + + for (i = 0; i < indio_dev->num_channels; i++) { + ret = iio_backend_chan_disable(st->back, i); + if (ret) + return ret; + } + + ret = iio_backend_data_size_set(st->back, 20); + if (ret) + return ret; + + return regmap_write(st->regmap, AD4851_REG_PACKET, 0); +} + +static int ad4851_get_calibscale(struct ad4851_state *st, int ch, int *val, int *val2) +{ + unsigned int reg_val; + int gain; + int ret; + + guard(mutex)(&st->lock); + + ret = regmap_read(st->regmap, AD4851_REG_CHX_GAIN_MSB(ch), ®_val); + if (ret) + return ret; + + gain = reg_val << 8; + + ret = regmap_read(st->regmap, AD4851_REG_CHX_GAIN_LSB(ch), ®_val); + if (ret) + return ret; + + gain |= reg_val; + + *val = gain; + *val2 = 15; + + return IIO_VAL_FRACTIONAL_LOG2; +} + +static int ad4851_set_calibscale(struct ad4851_state *st, int ch, int val, + int val2) +{ + u64 gain; + u8 buf[2]; + int ret; + + if (val < 0 || val2 < 0) + return -EINVAL; + + gain = val * MICRO + val2; + gain = DIV_U64_ROUND_CLOSEST(gain * 32768, MICRO); + + put_unaligned_be16(gain, buf); + + guard(mutex)(&st->lock); + + ret = regmap_write(st->regmap, AD4851_REG_CHX_GAIN_MSB(ch), buf[0]); + if (ret) + return ret; + + return regmap_write(st->regmap, AD4851_REG_CHX_GAIN_LSB(ch), buf[1]); +} + +static int ad4851_get_calibbias(struct ad4851_state *st, int ch, int *val) +{ + unsigned int lsb, mid, msb; + int ret; + + guard(mutex)(&st->lock); + /* + * After testing, the bulk_write operations doesn't work as expected + * here since the cs needs to be raised after each byte transaction. + */ + ret = regmap_read(st->regmap, AD4851_REG_CHX_OFFSET_MSB(ch), &msb); + if (ret) + return ret; + + ret = regmap_read(st->regmap, AD4851_REG_CHX_OFFSET_MID(ch), &mid); + if (ret) + return ret; + + ret = regmap_read(st->regmap, AD4851_REG_CHX_OFFSET_LSB(ch), &lsb); + if (ret) + return ret; + + if (st->info->resolution == 16) { + *val = msb << 8; + *val |= mid; + *val = sign_extend32(*val, 15); + } else { + *val = msb << 12; + *val |= mid << 4; + *val |= lsb >> 4; + *val = sign_extend32(*val, 19); + } + + return IIO_VAL_INT; +} + +static int ad4851_set_calibbias(struct ad4851_state *st, int ch, int val) +{ + u8 buf[3]; + int ret; + + if (val < 0) + return -EINVAL; + + if (st->info->resolution == 16) + put_unaligned_be16(val, buf); + else + put_unaligned_be24(val << 4, buf); + + guard(mutex)(&st->lock); + /* + * After testing, the bulk_write operations doesn't work as expected + * here since the cs needs to be raised after each byte transaction. + */ + ret = regmap_write(st->regmap, AD4851_REG_CHX_OFFSET_LSB(ch), buf[2]); + if (ret) + return ret; + + ret = regmap_write(st->regmap, AD4851_REG_CHX_OFFSET_MID(ch), buf[1]); + if (ret) + return ret; + + return regmap_write(st->regmap, AD4851_REG_CHX_OFFSET_MSB(ch), buf[0]); +} + +static int ad4851_set_scale(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, int val, int val2) +{ + struct ad4851_state *st = iio_priv(indio_dev); + unsigned int scale_val[2]; + unsigned int i; + const struct ad4851_scale *scale_table; + size_t table_size; + int ret; + + if (st->bipolar_ch[chan->channel]) { + scale_table = ad4851_scale_table_bipolar; + table_size = ARRAY_SIZE(ad4851_scale_table_bipolar); + } else { + scale_table = ad4851_scale_table_unipolar; + table_size = ARRAY_SIZE(ad4851_scale_table_unipolar); + } + + for (i = 0; i < table_size; i++) { + ret = __ad4851_get_scale(indio_dev, scale_table[i].scale_val, + &scale_val[0], &scale_val[1]); + if (ret) + return ret; + + if (scale_val[0] != val || scale_val[1] != val2) + continue; + + return regmap_write(st->regmap, + AD4851_REG_CHX_SOFTSPAN(chan->channel), + scale_table[i].reg_val); + } + + return -EINVAL; +} + +static int ad4851_get_scale(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, int *val, + int *val2) +{ + struct ad4851_state *st = iio_priv(indio_dev); + const struct ad4851_scale *scale_table; + size_t table_size; + u32 softspan_val; + int i, ret; + + if (st->bipolar_ch[chan->channel]) { + scale_table = ad4851_scale_table_bipolar; + table_size = ARRAY_SIZE(ad4851_scale_table_bipolar); + } else { + scale_table = ad4851_scale_table_unipolar; + table_size = ARRAY_SIZE(ad4851_scale_table_unipolar); + } + + ret = regmap_read(st->regmap, AD4851_REG_CHX_SOFTSPAN(chan->channel), + &softspan_val); + if (ret) + return ret; + + for (i = 0; i < table_size; i++) { + if (softspan_val == scale_table[i].reg_val) + break; + } + + if (i == table_size) + return -EIO; + + ret = __ad4851_get_scale(indio_dev, scale_table[i].scale_val, val, + val2); + if (ret) + return ret; + + return IIO_VAL_INT_PLUS_MICRO; +} + +static int ad4851_read_raw(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + int *val, int *val2, long info) +{ + struct ad4851_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->cnv_trigger_rate_hz; + *val2 = st->osr; + return IIO_VAL_FRACTIONAL; + case IIO_CHAN_INFO_CALIBSCALE: + return ad4851_get_calibscale(st, chan->channel, val, val2); + case IIO_CHAN_INFO_SCALE: + return ad4851_get_scale(indio_dev, chan, val, val2); + case IIO_CHAN_INFO_CALIBBIAS: + return ad4851_get_calibbias(st, chan->channel, val); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return ad4851_get_oversampling_ratio(st, val); + default: + return -EINVAL; + } +} + +static int ad4851_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long info) +{ + struct ad4851_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + if (val < 0 || val2 < 0) + return -EINVAL; + return ad4851_set_sampling_freq(st, val * st->osr + val2 * st->osr / MICRO); + case IIO_CHAN_INFO_SCALE: + return ad4851_set_scale(indio_dev, chan, val, val2); + case IIO_CHAN_INFO_CALIBSCALE: + return ad4851_set_calibscale(st, chan->channel, val, val2); + case IIO_CHAN_INFO_CALIBBIAS: + return ad4851_set_calibbias(st, chan->channel, val); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return ad4851_set_oversampling_ratio(indio_dev, chan, val); + default: + return -EINVAL; + } +} + +static int ad4851_update_scan_mode(struct iio_dev *indio_dev, + const unsigned long *scan_mask) +{ + struct ad4851_state *st = iio_priv(indio_dev); + unsigned int c; + int ret; + + for (c = 0; c < indio_dev->num_channels; c++) { + if (test_bit(c, scan_mask)) + ret = iio_backend_chan_enable(st->back, c); + else + ret = iio_backend_chan_disable(st->back, c); + if (ret) + return ret; + } + + return 0; +} + +static int ad4851_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + struct ad4851_state *st = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + if (st->bipolar_ch[chan->channel]) { + *vals = (const int *)st->scales_bipolar; + *type = IIO_VAL_INT_PLUS_MICRO; + /* Values are stored in a 2D matrix */ + *length = ARRAY_SIZE(ad4851_scale_avail_bipolar) * 2; + } else { + *vals = (const int *)st->scales_unipolar; + *type = IIO_VAL_INT_PLUS_MICRO; + /* Values are stored in a 2D matrix */ + *length = ARRAY_SIZE(ad4851_scale_avail_unipolar) * 2; + } + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + *vals = ad4851_oversampling_ratios; + *length = ARRAY_SIZE(ad4851_oversampling_ratios); + *type = IIO_VAL_INT; + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } +} + +static const struct iio_scan_type ad4851_scan_type_20_u[] = { + [AD4851_SCAN_TYPE_NORMAL] = { + .sign = 'u', + .realbits = 20, + .storagebits = 32, + }, + [AD4851_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 'u', + .realbits = 24, + .storagebits = 32, + }, +}; + +static const struct iio_scan_type ad4851_scan_type_20_b[] = { + [AD4851_SCAN_TYPE_NORMAL] = { + .sign = 's', + .realbits = 20, + .storagebits = 32, + }, + [AD4851_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 's', + .realbits = 24, + .storagebits = 32, + }, +}; + +static int ad4851_get_current_scan_type(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad4851_state *st = iio_priv(indio_dev); + + return st->resolution_boost_enabled ? AD4851_SCAN_TYPE_RESOLUTION_BOOST + : AD4851_SCAN_TYPE_NORMAL; +} + +#define AD4851_IIO_CHANNEL \ + .type = IIO_VOLTAGE, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_CALIBSCALE) | \ + BIT(IIO_CHAN_INFO_CALIBBIAS) | \ + BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_separate_available = BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .info_mask_shared_by_all_available = \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .indexed = 1 + +/* + * In case of AD4858_IIO_CHANNEL the scan_type is handled dynamically during the + * parse_channels function. + */ +#define AD4858_IIO_CHANNEL \ +{ \ + AD4851_IIO_CHANNEL \ +} + +#define AD4857_IIO_CHANNEL \ +{ \ + AD4851_IIO_CHANNEL, \ + .scan_type = { \ + .sign = 'u', \ + .realbits = 16, \ + .storagebits = 16, \ + }, \ +} + +static int ad4851_parse_channels_common(struct iio_dev *indio_dev, + struct iio_chan_spec **chans, + const struct iio_chan_spec ad4851_chan) +{ + struct ad4851_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + struct iio_chan_spec *channels, *chan_start; + unsigned int num_channels, reg; + unsigned int index = 0; + int ret; + + num_channels = device_get_child_node_count(dev); + if (num_channels > AD4851_MAX_CH_NR) + return dev_err_probe(dev, -EINVAL, "Too many channels: %u\n", + num_channels); + + channels = devm_kcalloc(dev, num_channels, sizeof(*channels), GFP_KERNEL); + if (!channels) + return -ENOMEM; + + chan_start = channels; + + device_for_each_child_node_scoped(dev, child) { + ret = fwnode_property_read_u32(child, "reg", ®); + if (ret) + return dev_err_probe(dev, ret, + "Missing channel number\n"); + if (reg >= AD4851_MAX_CH_NR) + return dev_err_probe(dev, -EINVAL, + "Invalid channel number\n"); + *channels = ad4851_chan; + channels->scan_index = index++; + channels->channel = reg; + + if (fwnode_property_present(child, "diff-channels")) { + channels->channel2 = reg + st->info->max_channels; + channels->differential = 1; + } + + st->bipolar_ch[reg] = fwnode_property_read_bool(child, "bipolar"); + + if (st->bipolar_ch[reg]) { + channels->scan_type.sign = 's'; + } else { + ret = regmap_write(st->regmap, AD4851_REG_CHX_SOFTSPAN(reg), + AD4851_SOFTSPAN_0V_40V); + if (ret) + return ret; + } + + channels++; + } + + *chans = chan_start; + + return num_channels; +} + +static int ad4857_parse_channels(struct iio_dev *indio_dev) +{ + struct iio_chan_spec *ad4851_channels; + const struct iio_chan_spec ad4851_chan = AD4857_IIO_CHANNEL; + int ret; + + ret = ad4851_parse_channels_common(indio_dev, &ad4851_channels, + ad4851_chan); + if (ret < 0) + return ret; + + indio_dev->channels = ad4851_channels; + indio_dev->num_channels = ret; + + return 0; +} + +static int ad4858_parse_channels(struct iio_dev *indio_dev) +{ + struct ad4851_state *st = iio_priv(indio_dev); + struct device *dev = &st->spi->dev; + struct iio_chan_spec *ad4851_channels; + const struct iio_chan_spec ad4851_chan = AD4858_IIO_CHANNEL; + int ret; + + ret = ad4851_parse_channels_common(indio_dev, &ad4851_channels, + ad4851_chan); + if (ret < 0) + return ret; + + device_for_each_child_node_scoped(dev, child) { + ad4851_channels->has_ext_scan_type = 1; + if (fwnode_property_read_bool(child, "bipolar")) { + ad4851_channels->ext_scan_type = ad4851_scan_type_20_b; + ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_b); + } else { + ad4851_channels->ext_scan_type = ad4851_scan_type_20_u; + ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_u); + } + ad4851_channels++; + } + + indio_dev->channels = ad4851_channels; + indio_dev->num_channels = ret; + + return 0; +} + +/* + * parse_channels() function handles the rest of the channel related attributes + * that are usually are stored in the chip info structure. + */ +static const struct ad4851_chip_info ad4851_info = { + .name = "ad4851", + .product_id = 0x67, + .max_sample_rate_hz = 250 * KILO, + .resolution = 16, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4857_parse_channels, +}; + +static const struct ad4851_chip_info ad4852_info = { + .name = "ad4852", + .product_id = 0x66, + .max_sample_rate_hz = 250 * KILO, + .resolution = 20, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4858_parse_channels, +}; + +static const struct ad4851_chip_info ad4853_info = { + .name = "ad4853", + .product_id = 0x65, + .max_sample_rate_hz = 1 * MEGA, + .resolution = 16, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4857_parse_channels, +}; + +static const struct ad4851_chip_info ad4854_info = { + .name = "ad4854", + .product_id = 0x64, + .max_sample_rate_hz = 1 * MEGA, + .resolution = 20, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4858_parse_channels, +}; + +static const struct ad4851_chip_info ad4855_info = { + .name = "ad4855", + .product_id = 0x63, + .max_sample_rate_hz = 250 * KILO, + .resolution = 16, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4857_parse_channels, +}; + +static const struct ad4851_chip_info ad4856_info = { + .name = "ad4856", + .product_id = 0x62, + .max_sample_rate_hz = 250 * KILO, + .resolution = 20, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4858_parse_channels, +}; + +static const struct ad4851_chip_info ad4857_info = { + .name = "ad4857", + .product_id = 0x61, + .max_sample_rate_hz = 1 * MEGA, + .resolution = 16, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4857_parse_channels, +}; + +static const struct ad4851_chip_info ad4858_info = { + .name = "ad4858", + .product_id = 0x60, + .max_sample_rate_hz = 1 * MEGA, + .resolution = 20, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4858_parse_channels, +}; + +static const struct ad4851_chip_info ad4858i_info = { + .name = "ad4858i", + .product_id = 0x6F, + .max_sample_rate_hz = 1 * MEGA, + .resolution = 20, + .max_channels = AD4851_MAX_CH_NR, + .parse_channels = ad4858_parse_channels, +}; + +static const struct iio_info ad4851_iio_info = { + .debugfs_reg_access = ad4851_reg_access, + .read_raw = ad4851_read_raw, + .write_raw = ad4851_write_raw, + .update_scan_mode = ad4851_update_scan_mode, + .get_current_scan_type = ad4851_get_current_scan_type, + .read_avail = ad4851_read_avail, +}; + +static const struct regmap_config regmap_config = { + .reg_bits = 16, + .val_bits = 8, + .read_flag_mask = BIT(7), +}; + +static const char * const ad4851_power_supplies[] = { + "vcc", "vdd", "vee", "vio", +}; + +static int ad4851_probe(struct spi_device *spi) +{ + struct iio_dev *indio_dev; + struct device *dev = &spi->dev; + struct ad4851_state *st; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + st->spi = spi; + + ret = devm_mutex_init(dev, &st->lock); + if (ret) + return ret; + + ret = devm_regulator_bulk_get_enable(dev, + ARRAY_SIZE(ad4851_power_supplies), + ad4851_power_supplies); + if (ret) + return dev_err_probe(dev, ret, + "failed to get and enable supplies\n"); + + ret = devm_regulator_get_enable_optional(dev, "vddh"); + if (ret < 0 && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to enable vddh voltage\n"); + + ret = devm_regulator_get_enable_optional(dev, "vddl"); + if (ret < 0 && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to enable vddl voltage\n"); + + ret = devm_regulator_get_enable_optional(dev, "vrefbuf"); + if (ret < 0 && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to enable vrefbuf voltage\n"); + + st->vrefbuf_en = ret != -ENODEV; + + ret = devm_regulator_get_enable_optional(dev, "vrefio"); + if (ret < 0 && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to enable vrefio voltage\n"); + + st->vrefio_en = ret != -ENODEV; + + st->pd_gpio = devm_gpiod_get_optional(dev, "pd", GPIOD_OUT_LOW); + if (IS_ERR(st->pd_gpio)) + return dev_err_probe(dev, PTR_ERR(st->pd_gpio), + "Error on requesting pd GPIO\n"); + + st->cnv = devm_pwm_get(dev, NULL); + if (IS_ERR(st->cnv)) + return dev_err_probe(dev, PTR_ERR(st->cnv), + "Error on requesting pwm\n"); + + st->info = spi_get_device_match_data(spi); + if (!st->info) + return -ENODEV; + + st->regmap = devm_regmap_init_spi(spi, ®map_config); + if (IS_ERR(st->regmap)) + return PTR_ERR(st->regmap); + + ret = ad4851_set_sampling_freq(st, HZ_PER_MHZ); + if (ret) + return ret; + + ret = devm_add_action_or_reset(&st->spi->dev, ad4851_pwm_disable, + st->cnv); + if (ret) + return ret; + + ret = ad4851_setup(st); + if (ret) + return ret; + + indio_dev->name = st->info->name; + indio_dev->info = &ad4851_iio_info; + indio_dev->modes = INDIO_DIRECT_MODE; + + ret = st->info->parse_channels(indio_dev); + if (ret) + return ret; + + ret = ad4851_scale_fill(indio_dev); + if (ret) + return ret; + + st->back = devm_iio_backend_get(dev, NULL); + if (IS_ERR(st->back)) + return PTR_ERR(st->back); + + ret = devm_iio_backend_request_buffer(dev, st->back, indio_dev); + if (ret) + return ret; + + ret = devm_iio_backend_enable(dev, st->back); + if (ret) + return ret; + + ret = ad4851_calibrate(indio_dev); + if (ret) + return ret; + + return devm_iio_device_register(dev, indio_dev); +} + +static const struct of_device_id ad4851_of_match[] = { + { .compatible = "adi,ad4851", .data = &ad4851_info, }, + { .compatible = "adi,ad4852", .data = &ad4852_info, }, + { .compatible = "adi,ad4853", .data = &ad4853_info, }, + { .compatible = "adi,ad4854", .data = &ad4854_info, }, + { .compatible = "adi,ad4855", .data = &ad4855_info, }, + { .compatible = "adi,ad4856", .data = &ad4856_info, }, + { .compatible = "adi,ad4857", .data = &ad4857_info, }, + { .compatible = "adi,ad4858", .data = &ad4858_info, }, + { .compatible = "adi,ad4858i", .data = &ad4858i_info, }, + { } +}; + +static const struct spi_device_id ad4851_spi_id[] = { + { "ad4851", (kernel_ulong_t)&ad4851_info }, + { "ad4852", (kernel_ulong_t)&ad4852_info }, + { "ad4853", (kernel_ulong_t)&ad4853_info }, + { "ad4854", (kernel_ulong_t)&ad4854_info }, + { "ad4855", (kernel_ulong_t)&ad4855_info }, + { "ad4856", (kernel_ulong_t)&ad4856_info }, + { "ad4857", (kernel_ulong_t)&ad4857_info }, + { "ad4858", (kernel_ulong_t)&ad4858_info }, + { "ad4858i", (kernel_ulong_t)&ad4858i_info }, + { } +}; +MODULE_DEVICE_TABLE(spi, ad4851_spi_id); + +static struct spi_driver ad4851_driver = { + .probe = ad4851_probe, + .driver = { + .name = "ad4851", + .of_match_table = ad4851_of_match, + }, + .id_table = ad4851_spi_id, +}; +module_spi_driver(ad4851_driver); + +MODULE_AUTHOR("Sergiu Cuciurean "); +MODULE_AUTHOR("Dragos Bogdan "); +MODULE_AUTHOR("Antoniu Miclaus "); +MODULE_DESCRIPTION("Analog Devices AD4851 DAS driver"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("IIO_BACKEND"); From 8de148c0189ec9764035e4521bf85f16dc1fad0b Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:31 +0100 Subject: [PATCH 0233/2157] dt-bindings: iio: adc: add ADI ad4030, ad4630 and ad4632 This adds a binding specification for the Analog Devices Inc. AD4030, AD4630 and AD4632 families of ADCs. - ad4030-24 is a 1 channel SAR ADC with 24 bits of precision and a sampling rate of 2M samples per second - ad4032-24 is a 1 channel SAR ADC with 24 bits of precision and a sampling rate of 500K samples per second - ad4630-16 is a 2 channels SAR ADC with 16 bits of precision and a sampling rate of 2M samples per second - ad4630-24 is a 2 channels SAR ADC with 24 bits of precision and a sampling rate of 2M samples per second - ad4632-16 is a 2 channels SAR ADC with 16 bits of precision and a sampling rate of 500K samples per second - ad4632-24 is a 2 channels SAR ADC with 24 bits of precision and a sampling rate of 500K samples per second Reviewed-by: Conor Dooley Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-1-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,ad4030.yaml | 110 ++++++++++++++++++ MAINTAINERS | 9 ++ 2 files changed, 119 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml new file mode 100644 index 000000000000..54e7349317b7 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2024 Analog Devices Inc. +# Copyright 2024 BayLibre, SAS. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/adi,ad4030.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD4030 and AD4630 ADC families + +maintainers: + - Michael Hennerich + - Nuno Sa + +description: | + Analog Devices AD4030 single channel and AD4630/AD4632 dual channel precision + SAR ADC families + + * https://www.analog.com/media/en/technical-documentation/data-sheets/ad4030-24-4032-24.pdf + * https://www.analog.com/media/en/technical-documentation/data-sheets/ad4630-24_ad4632-24.pdf + * https://www.analog.com/media/en/technical-documentation/data-sheets/ad4630-16-4632-16.pdf + +properties: + compatible: + enum: + - adi,ad4030-24 + - adi,ad4032-24 + - adi,ad4630-16 + - adi,ad4630-24 + - adi,ad4632-16 + - adi,ad4632-24 + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 102040816 + + spi-rx-bus-width: + enum: [1, 2, 4] + + vdd-5v-supply: true + vdd-1v8-supply: true + vio-supply: true + + ref-supply: + description: + Optional External unbuffered reference. Used when refin-supply is not + connected. + + refin-supply: + description: + Internal buffered Reference. Used when ref-supply is not connected. + + cnv-gpios: + description: + The Convert Input (CNV). It initiates the sampling conversions. + maxItems: 1 + + reset-gpios: + description: + The Reset Input (/RST). Used for asynchronous device reset. + maxItems: 1 + + interrupts: + description: + The BUSY pin is used to signal that the conversions results are available + to be transferred when in SPI Clocking Mode. This nodes should be + connected to an interrupt that is triggered when the BUSY line goes low. + maxItems: 1 + + interrupt-names: + const: busy + +required: + - compatible + - reg + - vdd-5v-supply + - vdd-1v8-supply + - vio-supply + - cnv-gpios + +oneOf: + - required: + - ref-supply + - required: + - refin-supply + +unevaluatedProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + compatible = "adi,ad4030-24"; + reg = <0>; + spi-max-frequency = <80000000>; + vdd-5v-supply = <&supply_5V>; + vdd-1v8-supply = <&supply_1_8V>; + vio-supply = <&supply_1_8V>; + ref-supply = <&supply_5V>; + cnv-gpios = <&gpio0 0 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index fa7a4f36843e..fa698f88f06b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1306,6 +1306,15 @@ F: Documentation/devicetree/bindings/iio/adc/adi,ad4000.yaml F: Documentation/iio/ad4000.rst F: drivers/iio/adc/ad4000.c +AD4030 ADC DRIVER (AD4030-24/AD4630-16/AD4630-24/AD4632-16/AD4632-24) +M: Michael Hennerich +M: Nuno Sá +R: Esteban Blanc +L: linux-iio@vger.kernel.org +S: Supported +W: https://ez.analog.com/linux-software-drivers +F: Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml + ANALOG DEVICES INC AD4130 DRIVER M: Cosmin Tanislav L: linux-iio@vger.kernel.org From 0cb8b324852f3e3d91dd4611879b4e1233c1f683 Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:32 +0100 Subject: [PATCH 0234/2157] iio: adc: ad4030: add driver for ad4030-24 This adds a new driver for the Analog Devices INC. AD4030-24 ADC. The driver implements basic support for the AD4030-24 1 channel differential ADC with hardware gain and offset control. Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-2-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 + drivers/iio/adc/Kconfig | 14 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ad4030.c | 922 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 938 insertions(+) create mode 100644 drivers/iio/adc/ad4030.c diff --git a/MAINTAINERS b/MAINTAINERS index fa698f88f06b..594ac38ad15d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1314,6 +1314,7 @@ L: linux-iio@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml +F: drivers/iio/adc/ad4030.c ANALOG DEVICES INC AD4130 DRIVER M: Cosmin Tanislav diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index d6e8d5165ccc..b1bfeed66315 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -33,6 +33,20 @@ config AD4000 To compile this driver as a module, choose M here: the module will be called ad4000. +config AD4030 + tristate "Analog Devices AD4030 ADC Driver" + depends on SPI + depends on GPIOLIB + select REGMAP + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER + help + Say yes here to build support for Analog Devices AD4030 and AD4630 high speed + SPI analog to digital converters (ADC). + + To compile this driver as a module, choose M here: the module will be + called ad4030. + config AD4130 tristate "Analog Device AD4130 ADC Driver" depends on SPI diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index e4d8ba12f841..ed41d9a4054e 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o obj-$(CONFIG_AD4000) += ad4000.o +obj-$(CONFIG_AD4030) += ad4030.o obj-$(CONFIG_AD4130) += ad4130.o obj-$(CONFIG_AD4695) += ad4695.o obj-$(CONFIG_AD4851) += ad4851.o diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c new file mode 100644 index 000000000000..8188d44bdd66 --- /dev/null +++ b/drivers/iio/adc/ad4030.c @@ -0,0 +1,922 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Analog Devices AD4030 and AD4630 ADC family driver. + * + * Copyright 2024 Analog Devices, Inc. + * Copyright 2024 BayLibre, SAS + * + * based on code from: + * Analog Devices, Inc. + * Sergiu Cuciurean + * Nuno Sa + * Marcelo Schmitt + * Liviu Adace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AD4030_REG_INTERFACE_CONFIG_A 0x00 +#define AD4030_REG_INTERFACE_CONFIG_A_SW_RESET (BIT(0) | BIT(7)) +#define AD4030_REG_INTERFACE_CONFIG_B 0x01 +#define AD4030_REG_DEVICE_CONFIG 0x02 +#define AD4030_REG_CHIP_TYPE 0x03 +#define AD4030_REG_PRODUCT_ID_L 0x04 +#define AD4030_REG_PRODUCT_ID_H 0x05 +#define AD4030_REG_CHIP_GRADE 0x06 +#define AD4030_REG_CHIP_GRADE_AD4030_24_GRADE 0x10 +#define AD4030_REG_CHIP_GRADE_MASK_CHIP_GRADE GENMASK(7, 3) +#define AD4030_REG_SCRATCH_PAD 0x0A +#define AD4030_REG_SPI_REVISION 0x0B +#define AD4030_REG_VENDOR_L 0x0C +#define AD4030_REG_VENDOR_H 0x0D +#define AD4030_REG_STREAM_MODE 0x0E +#define AD4030_REG_INTERFACE_CONFIG_C 0x10 +#define AD4030_REG_INTERFACE_STATUS_A 0x11 +#define AD4030_REG_EXIT_CFG_MODE 0x14 +#define AD4030_REG_EXIT_CFG_MODE_EXIT_MSK BIT(0) +#define AD4030_REG_AVG 0x15 +#define AD4030_REG_AVG_MASK_AVG_SYNC BIT(7) +#define AD4030_REG_AVG_MASK_AVG_VAL GENMASK(4, 0) +#define AD4030_REG_OFFSET_X0_0 0x16 +#define AD4030_REG_OFFSET_X0_1 0x17 +#define AD4030_REG_OFFSET_X0_2 0x18 +#define AD4030_REG_OFFSET_X1_0 0x19 +#define AD4030_REG_OFFSET_X1_1 0x1A +#define AD4030_REG_OFFSET_X1_2 0x1B +#define AD4030_REG_OFFSET_BYTES_NB 3 +#define AD4030_REG_OFFSET_CHAN(ch) \ + (AD4030_REG_OFFSET_X0_2 + (AD4030_REG_OFFSET_BYTES_NB * (ch))) +#define AD4030_REG_GAIN_X0_LSB 0x1C +#define AD4030_REG_GAIN_X0_MSB 0x1D +#define AD4030_REG_GAIN_X1_LSB 0x1E +#define AD4030_REG_GAIN_X1_MSB 0x1F +#define AD4030_REG_GAIN_MAX_GAIN 1999970 +#define AD4030_REG_GAIN_BYTES_NB 2 +#define AD4030_REG_GAIN_CHAN(ch) \ + (AD4030_REG_GAIN_X0_MSB + (AD4030_REG_GAIN_BYTES_NB * (ch))) +#define AD4030_REG_MODES 0x20 +#define AD4030_REG_MODES_MASK_OUT_DATA_MODE GENMASK(2, 0) +#define AD4030_REG_MODES_MASK_LANE_MODE GENMASK(7, 6) +#define AD4030_REG_OSCILATOR 0x21 +#define AD4030_REG_IO 0x22 +#define AD4030_REG_IO_MASK_IO2X BIT(1) +#define AD4030_REG_PAT0 0x23 +#define AD4030_REG_PAT1 0x24 +#define AD4030_REG_PAT2 0x25 +#define AD4030_REG_PAT3 0x26 +#define AD4030_REG_DIG_DIAG 0x34 +#define AD4030_REG_DIG_ERR 0x35 + +/* Sequence starting with "1 0 1" to enable reg access */ +#define AD4030_REG_ACCESS 0xA0 + +#define AD4030_MAX_IIO_SAMPLE_SIZE_BUFFERED BITS_TO_BYTES(64) +#define AD4030_MAX_HARDWARE_CHANNEL_NB 2 +#define AD4030_MAX_IIO_CHANNEL_NB 5 +#define AD4030_SINGLE_COMMON_BYTE_CHANNELS_MASK 0b10 +#define AD4030_GAIN_MIDLE_POINT 0x8000 +/* + * This accounts for 1 sample per channel plus one s64 for the timestamp, + * aligned on a s64 boundary + */ +#define AD4030_MAXIMUM_RX_BUFFER_SIZE \ + (ALIGN(AD4030_MAX_IIO_SAMPLE_SIZE_BUFFERED * \ + AD4030_MAX_HARDWARE_CHANNEL_NB, \ + sizeof(s64)) + sizeof(s64)) + +#define AD4030_VREF_MIN_UV (4096 * MILLI) +#define AD4030_VREF_MAX_UV (5000 * MILLI) +#define AD4030_VIO_THRESHOLD_UV (1400 * MILLI) +#define AD4030_SPI_MAX_XFER_LEN 8 +#define AD4030_SPI_MAX_REG_XFER_SPEED (80 * MEGA) +#define AD4030_TCNVH_NS 10 +#define AD4030_TCNVL_NS 20 +#define AD4030_TCYC_NS 500 +#define AD4030_TCYC_ADJUSTED_NS (AD4030_TCYC_NS - AD4030_TCNVL_NS) +#define AD4030_TRESET_PW_NS 50 + +enum ad4030_out_mode { + AD4030_OUT_DATA_MD_DIFF, + AD4030_OUT_DATA_MD_16_DIFF_8_COM, + AD4030_OUT_DATA_MD_24_DIFF_8_COM, + AD4030_OUT_DATA_MD_30_AVERAGED_DIFF, + AD4030_OUT_DATA_MD_32_PATTERN, +}; + +struct ad4030_chip_info { + const char *name; + const unsigned long *available_masks; + const struct iio_chan_spec channels[AD4030_MAX_IIO_CHANNEL_NB]; + u8 grade; + u8 precision_bits; + /* Number of hardware channels */ + int num_voltage_inputs; + unsigned int tcyc_ns; +}; + +struct ad4030_state { + struct spi_device *spi; + struct regmap *regmap; + const struct ad4030_chip_info *chip; + struct gpio_desc *cnv_gpio; + int vref_uv; + int vio_uv; + int offset_avail[3]; + enum ad4030_out_mode mode; + + /* + * DMA (thus cache coherency maintenance) requires the transfer buffers + * to live in their own cache lines. + */ + u8 tx_data[AD4030_SPI_MAX_XFER_LEN] __aligned(IIO_DMA_MINALIGN); + union { + u8 raw[AD4030_MAXIMUM_RX_BUFFER_SIZE]; + struct { + s32 diff; + u8 common; + }; + } rx_data; +}; + +/* + * For a chip with 2 hardware channel this will be used to create 2 common-mode + * channels: + * - voltage4 + * - voltage5 + * As the common-mode channels are after the differential ones, we compute the + * channel number like this: + * - _idx is the scan_index (the order in the output buffer) + * - _ch is the hardware channel number this common-mode channel is related + * - _idx - _ch gives us the number of channel in the chip + * - _idx - _ch * 2 is the starting number of the common-mode channels, since + * for each differential channel there is a common-mode channel + * - _idx - _ch * 2 + _ch gives the channel number for this specific common-mode + * channel + */ +#define AD4030_CHAN_CMO(_idx, _ch) { \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .address = (_ch), \ + .channel = ((_idx) - (_ch)) * 2 + (_ch), \ + .scan_index = (_idx), \ + .scan_type = { \ + .sign = 'u', \ + .storagebits = 8, \ + .realbits = 8, \ + .endianness = IIO_BE, \ + }, \ +} + +/* + * For a chip with 2 hardware channel this will be used to create 2 differential + * channels: + * - voltage0-voltage1 + * - voltage2-voltage3 + */ +#define AD4030_CHAN_DIFF(_idx, _storage, _real, _shift) { \ + .info_mask_separate = BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_CALIBSCALE) | \ + BIT(IIO_CHAN_INFO_CALIBBIAS) | \ + BIT(IIO_CHAN_INFO_RAW), \ + .info_mask_separate_available = BIT(IIO_CHAN_INFO_CALIBBIAS) | \ + BIT(IIO_CHAN_INFO_CALIBSCALE), \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .address = (_idx), \ + .channel = (_idx) * 2, \ + .channel2 = (_idx) * 2 + 1, \ + .scan_index = (_idx), \ + .differential = true, \ + .scan_type = { \ + .sign = 's', \ + .storagebits = _storage, \ + .realbits = _real, \ + .shift = _shift, \ + .endianness = IIO_BE, \ + }, \ +} + +static int ad4030_enter_config_mode(struct ad4030_state *st) +{ + st->tx_data[0] = AD4030_REG_ACCESS; + + struct spi_transfer xfer = { + .tx_buf = st->tx_data, + .bits_per_word = 8, + .len = 1, + .speed_hz = AD4030_SPI_MAX_REG_XFER_SPEED, + }; + + return spi_sync_transfer(st->spi, &xfer, 1); +} + +static int ad4030_exit_config_mode(struct ad4030_state *st) +{ + st->tx_data[0] = 0; + st->tx_data[1] = AD4030_REG_EXIT_CFG_MODE; + st->tx_data[2] = AD4030_REG_EXIT_CFG_MODE_EXIT_MSK; + + struct spi_transfer xfer = { + .tx_buf = st->tx_data, + .bits_per_word = 8, + .len = 3, + .speed_hz = AD4030_SPI_MAX_REG_XFER_SPEED, + }; + + return spi_sync_transfer(st->spi, &xfer, 1); +} + +static int ad4030_spi_read(void *context, const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + int ret; + struct ad4030_state *st = context; + struct spi_transfer xfer = { + .tx_buf = st->tx_data, + .rx_buf = st->rx_data.raw, + .bits_per_word = 8, + .len = reg_size + val_size, + .speed_hz = AD4030_SPI_MAX_REG_XFER_SPEED, + }; + + if (xfer.len > sizeof(st->tx_data) || + xfer.len > sizeof(st->rx_data.raw)) + return -EINVAL; + + ret = ad4030_enter_config_mode(st); + if (ret) + return ret; + + memset(st->tx_data, 0, sizeof(st->tx_data)); + memcpy(st->tx_data, reg, reg_size); + + ret = spi_sync_transfer(st->spi, &xfer, 1); + if (ret) + return ret; + + memcpy(val, &st->rx_data.raw[reg_size], val_size); + + return ad4030_exit_config_mode(st); +} + +static int ad4030_spi_write(void *context, const void *data, size_t count) +{ + int ret; + struct ad4030_state *st = context; + bool is_reset = count >= 3 && + ((u8 *)data)[0] == 0 && + ((u8 *)data)[1] == 0 && + ((u8 *)data)[2] == 0x81; + struct spi_transfer xfer = { + .tx_buf = st->tx_data, + .bits_per_word = 8, + .len = count, + .speed_hz = AD4030_SPI_MAX_REG_XFER_SPEED, + }; + + if (count > sizeof(st->tx_data)) + return -EINVAL; + + ret = ad4030_enter_config_mode(st); + if (ret) + return ret; + + memcpy(st->tx_data, data, count); + + ret = spi_sync_transfer(st->spi, &xfer, 1); + if (ret) + return ret; + + /* + * From datasheet: "After a [...] reset, no SPI commands or conversions + * can be started for 750us" + * After a reset we are in conversion mode, no need to exit config mode + */ + if (is_reset) { + fsleep(750); + return 0; + } + + return ad4030_exit_config_mode(st); +} + +static const struct regmap_bus ad4030_regmap_bus = { + .read = ad4030_spi_read, + .write = ad4030_spi_write, + .reg_format_endian_default = REGMAP_ENDIAN_BIG, +}; + +static const struct regmap_range ad4030_regmap_rd_range[] = { + regmap_reg_range(AD4030_REG_INTERFACE_CONFIG_A, AD4030_REG_CHIP_GRADE), + regmap_reg_range(AD4030_REG_SCRATCH_PAD, AD4030_REG_STREAM_MODE), + regmap_reg_range(AD4030_REG_INTERFACE_CONFIG_C, + AD4030_REG_INTERFACE_STATUS_A), + regmap_reg_range(AD4030_REG_EXIT_CFG_MODE, AD4030_REG_PAT3), + regmap_reg_range(AD4030_REG_DIG_DIAG, AD4030_REG_DIG_ERR), +}; + +static const struct regmap_range ad4030_regmap_wr_range[] = { + regmap_reg_range(AD4030_REG_CHIP_TYPE, AD4030_REG_CHIP_GRADE), + regmap_reg_range(AD4030_REG_SPI_REVISION, AD4030_REG_VENDOR_H), +}; + +static const struct regmap_access_table ad4030_regmap_rd_table = { + .yes_ranges = ad4030_regmap_rd_range, + .n_yes_ranges = ARRAY_SIZE(ad4030_regmap_rd_range), +}; + +static const struct regmap_access_table ad4030_regmap_wr_table = { + .no_ranges = ad4030_regmap_wr_range, + .n_no_ranges = ARRAY_SIZE(ad4030_regmap_wr_range), +}; + +static const struct regmap_config ad4030_regmap_config = { + .reg_bits = 16, + .val_bits = 8, + .read_flag_mask = 0x80, + .rd_table = &ad4030_regmap_rd_table, + .wr_table = &ad4030_regmap_wr_table, + .max_register = AD4030_REG_DIG_ERR, +}; + +static int ad4030_get_chan_scale(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, + int *val2) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + if (chan->differential) { + *val = (st->vref_uv * 2) / MILLI; + *val2 = chan->scan_type.realbits; + return IIO_VAL_FRACTIONAL_LOG2; + } + + *val = st->vref_uv / MILLI; + *val2 = chan->scan_type.realbits; + return IIO_VAL_FRACTIONAL_LOG2; +} + +static int ad4030_get_chan_calibscale(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, + int *val2) +{ + struct ad4030_state *st = iio_priv(indio_dev); + u16 gain; + int ret; + + ret = regmap_bulk_read(st->regmap, AD4030_REG_GAIN_CHAN(chan->address), + st->rx_data.raw, AD4030_REG_GAIN_BYTES_NB); + if (ret) + return ret; + + gain = get_unaligned_be16(st->rx_data.raw); + + /* From datasheet: multiplied output = input × gain word/0x8000 */ + *val = gain / AD4030_GAIN_MIDLE_POINT; + *val2 = mul_u64_u32_div(gain % AD4030_GAIN_MIDLE_POINT, NANO, + AD4030_GAIN_MIDLE_POINT); + + return IIO_VAL_INT_PLUS_NANO; +} + +/* Returns the offset where 1 LSB = (VREF/2^precision_bits - 1)/gain */ +static int ad4030_get_chan_calibbias(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val) +{ + struct ad4030_state *st = iio_priv(indio_dev); + int ret; + + ret = regmap_bulk_read(st->regmap, + AD4030_REG_OFFSET_CHAN(chan->address), + st->rx_data.raw, AD4030_REG_OFFSET_BYTES_NB); + if (ret) + return ret; + + switch (st->chip->precision_bits) { + case 16: + *val = sign_extend32(get_unaligned_be16(st->rx_data.raw), 15); + return IIO_VAL_INT; + + case 24: + *val = sign_extend32(get_unaligned_be24(st->rx_data.raw), 23); + return IIO_VAL_INT; + + default: + return -EINVAL; + } +} + +static int ad4030_set_chan_calibscale(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int gain_int, + int gain_frac) +{ + struct ad4030_state *st = iio_priv(indio_dev); + u64 gain; + + if (gain_int < 0 || gain_frac < 0) + return -EINVAL; + + gain = mul_u32_u32(gain_int, MICRO) + gain_frac; + + if (gain > AD4030_REG_GAIN_MAX_GAIN) + return -EINVAL; + + put_unaligned_be16(DIV_ROUND_CLOSEST_ULL(gain * AD4030_GAIN_MIDLE_POINT, + MICRO), + st->tx_data); + + return regmap_bulk_write(st->regmap, + AD4030_REG_GAIN_CHAN(chan->address), + st->tx_data, AD4030_REG_GAIN_BYTES_NB); +} + +static int ad4030_set_chan_calibbias(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int offset) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + if (offset < st->offset_avail[0] || offset > st->offset_avail[2]) + return -EINVAL; + + st->tx_data[2] = 0; + + switch (st->chip->precision_bits) { + case 16: + put_unaligned_be16(offset, st->tx_data); + break; + + case 24: + put_unaligned_be24(offset, st->tx_data); + break; + + default: + return -EINVAL; + } + + return regmap_bulk_write(st->regmap, + AD4030_REG_OFFSET_CHAN(chan->address), + st->tx_data, AD4030_REG_OFFSET_BYTES_NB); +} + +static bool ad4030_is_common_byte_asked(struct ad4030_state *st, + unsigned int mask) +{ + return mask & AD4030_SINGLE_COMMON_BYTE_CHANNELS_MASK; +} + +static int ad4030_set_mode(struct iio_dev *indio_dev, unsigned long mask) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + if (ad4030_is_common_byte_asked(st, mask)) + st->mode = AD4030_OUT_DATA_MD_24_DIFF_8_COM; + else + st->mode = AD4030_OUT_DATA_MD_DIFF; + + return regmap_update_bits(st->regmap, AD4030_REG_MODES, + AD4030_REG_MODES_MASK_OUT_DATA_MODE, + st->mode); +} + +static int ad4030_conversion(struct iio_dev *indio_dev) +{ + struct ad4030_state *st = iio_priv(indio_dev); + const struct iio_scan_type scan_type = indio_dev->channels->scan_type; + unsigned char diff_realbytes = BITS_TO_BYTES(scan_type.realbits); + unsigned int bytes_to_read; + int ret; + + /* Number of bytes for one differential channel */ + bytes_to_read = diff_realbytes; + /* Add one byte if we are using a differential + common byte mode */ + bytes_to_read += (st->mode == AD4030_OUT_DATA_MD_24_DIFF_8_COM || + st->mode == AD4030_OUT_DATA_MD_16_DIFF_8_COM) ? 1 : 0; + /* Mulitiply by the number of hardware channels */ + bytes_to_read *= st->chip->num_voltage_inputs; + + gpiod_set_value_cansleep(st->cnv_gpio, 1); + ndelay(AD4030_TCNVH_NS); + gpiod_set_value_cansleep(st->cnv_gpio, 0); + ndelay(st->chip->tcyc_ns); + + ret = spi_read(st->spi, st->rx_data.raw, bytes_to_read); + if (ret) + return ret; + + if (st->mode != AD4030_OUT_DATA_MD_24_DIFF_8_COM) + return 0; + + st->rx_data.common = st->rx_data.raw[diff_realbytes]; + + return 0; +} + +static int ad4030_single_conversion(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, int *val) +{ + struct ad4030_state *st = iio_priv(indio_dev); + int ret; + + ret = ad4030_set_mode(indio_dev, BIT(chan->scan_index)); + if (ret) + return ret; + + ret = ad4030_conversion(indio_dev); + if (ret) + return ret; + + if (chan->differential) + *val = st->rx_data.diff; + else + *val = st->rx_data.common; + + return IIO_VAL_INT; +} + +static irqreturn_t ad4030_trigger_handler(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *indio_dev = pf->indio_dev; + struct ad4030_state *st = iio_priv(indio_dev); + int ret; + + ret = ad4030_conversion(indio_dev); + if (ret) + goto out; + + iio_push_to_buffers_with_timestamp(indio_dev, st->rx_data.raw, + pf->timestamp); + +out: + iio_trigger_notify_done(indio_dev->trig); + + return IRQ_HANDLED; +} + +static const int ad4030_gain_avail[3][2] = { + { 0, 0 }, + { 0, 30518 }, + { 1, 999969482 }, +}; + +static int ad4030_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *channel, + const int **vals, int *type, + int *length, long mask) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_CALIBBIAS: + *vals = st->offset_avail; + *type = IIO_VAL_INT; + return IIO_AVAIL_RANGE; + + case IIO_CHAN_INFO_CALIBSCALE: + *vals = (void *)ad4030_gain_avail; + *type = IIO_VAL_INT_PLUS_NANO; + return IIO_AVAIL_RANGE; + + default: + return -EINVAL; + } +} + +static int ad4030_read_raw_dispatch(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long info) +{ + switch (info) { + case IIO_CHAN_INFO_RAW: + return ad4030_single_conversion(indio_dev, chan, val); + + case IIO_CHAN_INFO_CALIBSCALE: + return ad4030_get_chan_calibscale(indio_dev, chan, val, val2); + + case IIO_CHAN_INFO_CALIBBIAS: + return ad4030_get_chan_calibbias(indio_dev, chan, val); + + default: + return -EINVAL; + } +} + +static int ad4030_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long info) +{ + int ret; + + if (info == IIO_CHAN_INFO_SCALE) + return ad4030_get_chan_scale(indio_dev, chan, val, val2); + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad4030_read_raw_dispatch(indio_dev, chan, val, val2, info); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad4030_write_raw_dispatch(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + int val2, long info) +{ + switch (info) { + case IIO_CHAN_INFO_CALIBSCALE: + return ad4030_set_chan_calibscale(indio_dev, chan, val, val2); + + case IIO_CHAN_INFO_CALIBBIAS: + if (val2 != 0) + return -EINVAL; + return ad4030_set_chan_calibbias(indio_dev, chan, val); + + default: + return -EINVAL; + } +} + +static int ad4030_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + int val2, long info) +{ + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = ad4030_write_raw_dispatch(indio_dev, chan, val, val2, info); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad4030_reg_access(struct iio_dev *indio_dev, unsigned int reg, + unsigned int writeval, unsigned int *readval) +{ + const struct ad4030_state *st = iio_priv(indio_dev); + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + if (readval) + ret = regmap_read(st->regmap, reg, readval); + else + ret = regmap_write(st->regmap, reg, writeval); + + iio_device_release_direct_mode(indio_dev); + + return ret; +} + +static int ad4030_read_label(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + char *label) +{ + if (chan->differential) + return sprintf(label, "differential%lu\n", chan->address); + return sprintf(label, "common-mode%lu\n", chan->address); +} + +static const struct iio_info ad4030_iio_info = { + .read_avail = ad4030_read_avail, + .read_raw = ad4030_read_raw, + .write_raw = ad4030_write_raw, + .debugfs_reg_access = ad4030_reg_access, + .read_label = ad4030_read_label, +}; + +static int ad4030_buffer_preenable(struct iio_dev *indio_dev) +{ + return ad4030_set_mode(indio_dev, *indio_dev->active_scan_mask); +} + +static const struct iio_buffer_setup_ops ad4030_buffer_setup_ops = { + .preenable = ad4030_buffer_preenable, +}; + +static int ad4030_regulators_get(struct ad4030_state *st) +{ + struct device *dev = &st->spi->dev; + static const char * const ids[] = { "vdd-5v", "vdd-1v8" }; + int ret; + + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(ids), ids); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable regulators\n"); + + st->vio_uv = devm_regulator_get_enable_read_voltage(dev, "vio"); + if (st->vio_uv < 0) + return dev_err_probe(dev, st->vio_uv, + "Failed to enable and read vio voltage\n"); + + st->vref_uv = devm_regulator_get_enable_read_voltage(dev, "ref"); + if (st->vref_uv < 0) { + if (st->vref_uv != -ENODEV) + return dev_err_probe(dev, st->vref_uv, + "Failed to read ref voltage\n"); + + /* if not using optional REF, the REFIN must be used */ + st->vref_uv = devm_regulator_get_enable_read_voltage(dev, + "refin"); + if (st->vref_uv < 0) + return dev_err_probe(dev, st->vref_uv, + "Failed to read refin voltage\n"); + } + + return 0; +} + +static int ad4030_reset(struct ad4030_state *st) +{ + struct device *dev = &st->spi->dev; + struct gpio_desc *reset; + + reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(reset)) + return dev_err_probe(dev, PTR_ERR(reset), + "Failed to get reset GPIO\n"); + + if (reset) { + ndelay(50); + gpiod_set_value_cansleep(reset, 0); + return 0; + } + + return regmap_write(st->regmap, AD4030_REG_INTERFACE_CONFIG_A, + AD4030_REG_INTERFACE_CONFIG_A_SW_RESET); +} + +static int ad4030_detect_chip_info(const struct ad4030_state *st) +{ + unsigned int grade; + int ret; + + ret = regmap_read(st->regmap, AD4030_REG_CHIP_GRADE, &grade); + if (ret) + return ret; + + grade = FIELD_GET(AD4030_REG_CHIP_GRADE_MASK_CHIP_GRADE, grade); + if (grade != st->chip->grade) + dev_warn(&st->spi->dev, "Unknown grade(0x%x) for %s\n", grade, + st->chip->name); + + return 0; +} + +static int ad4030_config(struct ad4030_state *st) +{ + st->offset_avail[0] = (int)BIT(st->chip->precision_bits - 1) * -1; + st->offset_avail[1] = 1; + st->offset_avail[2] = BIT(st->chip->precision_bits - 1) - 1; + + if (st->vio_uv < AD4030_VIO_THRESHOLD_UV) + return regmap_write(st->regmap, AD4030_REG_IO, + AD4030_REG_IO_MASK_IO2X); + + return 0; +} + +static int ad4030_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct iio_dev *indio_dev; + struct ad4030_state *st; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + st->spi = spi; + + st->regmap = devm_regmap_init(dev, &ad4030_regmap_bus, st, + &ad4030_regmap_config); + if (IS_ERR(st->regmap)) + dev_err_probe(dev, PTR_ERR(st->regmap), + "Failed to initialize regmap\n"); + + st->chip = spi_get_device_match_data(spi); + if (!st->chip) + return -EINVAL; + + ret = ad4030_regulators_get(st); + if (ret) + return ret; + + /* + * From datasheet: "Perform a reset no sooner than 3ms after the power + * supplies are valid and stable" + */ + fsleep(3000); + + ret = ad4030_reset(st); + if (ret) + return ret; + + ret = ad4030_detect_chip_info(st); + if (ret) + return ret; + + ret = ad4030_config(st); + if (ret) + return ret; + + st->cnv_gpio = devm_gpiod_get(dev, "cnv", GPIOD_OUT_LOW); + if (IS_ERR(st->cnv_gpio)) + return dev_err_probe(dev, PTR_ERR(st->cnv_gpio), + "Failed to get cnv gpio\n"); + + /* + * One hardware channel is split in two software channels when using + * common byte mode. Add one more channel for the timestamp. + */ + indio_dev->num_channels = 2 * st->chip->num_voltage_inputs + 1; + indio_dev->name = st->chip->name; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &ad4030_iio_info; + indio_dev->channels = st->chip->channels; + indio_dev->available_scan_masks = st->chip->available_masks; + + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, + iio_pollfunc_store_time, + ad4030_trigger_handler, + &ad4030_buffer_setup_ops); + if (ret) + return dev_err_probe(dev, ret, + "Failed to setup triggered buffer\n"); + + return devm_iio_device_register(dev, indio_dev); +} + +static const unsigned long ad4030_channel_masks[] = { + /* Differential only */ + BIT(0), + /* Differential and common-mode voltage */ + GENMASK(1, 0), + 0, +}; + +static const struct ad4030_chip_info ad4030_24_chip_info = { + .name = "ad4030-24", + .available_masks = ad4030_channel_masks, + .channels = { + AD4030_CHAN_DIFF(0, 32, 24, 8), + AD4030_CHAN_CMO(1, 0), + IIO_CHAN_SOFT_TIMESTAMP(2), + }, + .grade = AD4030_REG_CHIP_GRADE_AD4030_24_GRADE, + .precision_bits = 24, + .num_voltage_inputs = 1, + .tcyc_ns = AD4030_TCYC_ADJUSTED_NS, +}; + +static const struct spi_device_id ad4030_id_table[] = { + { "ad4030-24", (kernel_ulong_t)&ad4030_24_chip_info }, + { } +}; +MODULE_DEVICE_TABLE(spi, ad4030_id_table); + +static const struct of_device_id ad4030_of_match[] = { + { .compatible = "adi,ad4030-24", .data = &ad4030_24_chip_info }, + { } +}; +MODULE_DEVICE_TABLE(of, ad4030_of_match); + +static struct spi_driver ad4030_driver = { + .driver = { + .name = "ad4030", + .of_match_table = ad4030_of_match, + }, + .probe = ad4030_probe, + .id_table = ad4030_id_table, +}; +module_spi_driver(ad4030_driver); + +MODULE_AUTHOR("Esteban Blanc "); +MODULE_DESCRIPTION("Analog Devices AD4630 ADC family driver"); +MODULE_LICENSE("GPL"); From 949abd1ca5a4342d9fb8c774035222e65dd1cfe4 Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:33 +0100 Subject: [PATCH 0235/2157] iio: adc: ad4030: add averaging support This add support for the averaging mode of AD4030 using oversampling IIO attribute Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-3-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4030.c | 136 ++++++++++++++++++++++++++++++++++----- 1 file changed, 119 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c index 8188d44bdd66..d026535e5d9b 100644 --- a/drivers/iio/adc/ad4030.c +++ b/drivers/iio/adc/ad4030.c @@ -112,6 +112,11 @@ enum ad4030_out_mode { AD4030_OUT_DATA_MD_32_PATTERN, }; +enum { + AD4030_SCAN_TYPE_NORMAL, + AD4030_SCAN_TYPE_AVG, +}; + struct ad4030_chip_info { const char *name; const unsigned long *available_masks; @@ -127,10 +132,12 @@ struct ad4030_state { struct spi_device *spi; struct regmap *regmap; const struct ad4030_chip_info *chip; + const struct iio_scan_type *current_scan_type; struct gpio_desc *cnv_gpio; int vref_uv; int vio_uv; int offset_avail[3]; + unsigned int avg_log2; enum ad4030_out_mode mode; /* @@ -184,7 +191,11 @@ struct ad4030_state { * - voltage0-voltage1 * - voltage2-voltage3 */ -#define AD4030_CHAN_DIFF(_idx, _storage, _real, _shift) { \ +#define AD4030_CHAN_DIFF(_idx, _scan_type) { \ + .info_mask_shared_by_all = \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .info_mask_shared_by_all_available = \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ .info_mask_separate = BIT(IIO_CHAN_INFO_SCALE) | \ BIT(IIO_CHAN_INFO_CALIBSCALE) | \ BIT(IIO_CHAN_INFO_CALIBBIAS) | \ @@ -198,15 +209,17 @@ struct ad4030_state { .channel2 = (_idx) * 2 + 1, \ .scan_index = (_idx), \ .differential = true, \ - .scan_type = { \ - .sign = 's', \ - .storagebits = _storage, \ - .realbits = _real, \ - .shift = _shift, \ - .endianness = IIO_BE, \ - }, \ + .has_ext_scan_type = 1, \ + .ext_scan_type = _scan_type, \ + .num_ext_scan_type = ARRAY_SIZE(_scan_type), \ } +static const int ad4030_average_modes[] = { + 1, 2, 4, 8, 16, 32, 64, 128, + 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, + 65536, +}; + static int ad4030_enter_config_mode(struct ad4030_state *st) { st->tx_data[0] = AD4030_REG_ACCESS; @@ -356,10 +369,13 @@ static int ad4030_get_chan_scale(struct iio_dev *indio_dev, int *val2) { struct ad4030_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; if (chan->differential) { + scan_type = iio_get_current_scan_type(indio_dev, + st->chip->channels); *val = (st->vref_uv * 2) / MILLI; - *val2 = chan->scan_type.realbits; + *val2 = scan_type->realbits; return IIO_VAL_FRACTIONAL_LOG2; } @@ -474,6 +490,27 @@ static int ad4030_set_chan_calibbias(struct iio_dev *indio_dev, st->tx_data, AD4030_REG_OFFSET_BYTES_NB); } +static int ad4030_set_avg_frame_len(struct iio_dev *dev, int avg_val) +{ + struct ad4030_state *st = iio_priv(dev); + unsigned int avg_log2 = ilog2(avg_val); + unsigned int last_avg_idx = ARRAY_SIZE(ad4030_average_modes) - 1; + int ret; + + if (avg_val < 0 || avg_val > ad4030_average_modes[last_avg_idx]) + return -EINVAL; + + ret = regmap_write(st->regmap, AD4030_REG_AVG, + AD4030_REG_AVG_MASK_AVG_SYNC | + FIELD_PREP(AD4030_REG_AVG_MASK_AVG_VAL, avg_log2)); + if (ret) + return ret; + + st->avg_log2 = avg_log2; + + return 0; +} + static bool ad4030_is_common_byte_asked(struct ad4030_state *st, unsigned int mask) { @@ -484,11 +521,18 @@ static int ad4030_set_mode(struct iio_dev *indio_dev, unsigned long mask) { struct ad4030_state *st = iio_priv(indio_dev); - if (ad4030_is_common_byte_asked(st, mask)) + if (st->avg_log2 > 0) + st->mode = AD4030_OUT_DATA_MD_30_AVERAGED_DIFF; + else if (ad4030_is_common_byte_asked(st, mask)) st->mode = AD4030_OUT_DATA_MD_24_DIFF_8_COM; else st->mode = AD4030_OUT_DATA_MD_DIFF; + st->current_scan_type = iio_get_current_scan_type(indio_dev, + st->chip->channels); + if (IS_ERR(st->current_scan_type)) + return PTR_ERR(st->current_scan_type); + return regmap_update_bits(st->regmap, AD4030_REG_MODES, AD4030_REG_MODES_MASK_OUT_DATA_MODE, st->mode); @@ -497,9 +541,11 @@ static int ad4030_set_mode(struct iio_dev *indio_dev, unsigned long mask) static int ad4030_conversion(struct iio_dev *indio_dev) { struct ad4030_state *st = iio_priv(indio_dev); - const struct iio_scan_type scan_type = indio_dev->channels->scan_type; - unsigned char diff_realbytes = BITS_TO_BYTES(scan_type.realbits); + unsigned char diff_realbytes = + BITS_TO_BYTES(st->current_scan_type->realbits); unsigned int bytes_to_read; + unsigned long cnv_nb = BIT(st->avg_log2); + unsigned int i; int ret; /* Number of bytes for one differential channel */ @@ -510,10 +556,12 @@ static int ad4030_conversion(struct iio_dev *indio_dev) /* Mulitiply by the number of hardware channels */ bytes_to_read *= st->chip->num_voltage_inputs; - gpiod_set_value_cansleep(st->cnv_gpio, 1); - ndelay(AD4030_TCNVH_NS); - gpiod_set_value_cansleep(st->cnv_gpio, 0); - ndelay(st->chip->tcyc_ns); + for (i = 0; i < cnv_nb; i++) { + gpiod_set_value_cansleep(st->cnv_gpio, 1); + ndelay(AD4030_TCNVH_NS); + gpiod_set_value_cansleep(st->cnv_gpio, 0); + ndelay(st->chip->tcyc_ns); + } ret = spi_read(st->spi, st->rx_data.raw, bytes_to_read); if (ret) @@ -593,6 +641,12 @@ static int ad4030_read_avail(struct iio_dev *indio_dev, *type = IIO_VAL_INT_PLUS_NANO; return IIO_AVAIL_RANGE; + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + *vals = ad4030_average_modes; + *type = IIO_VAL_INT; + *length = ARRAY_SIZE(ad4030_average_modes); + return IIO_AVAIL_LIST; + default: return -EINVAL; } @@ -602,6 +656,8 @@ static int ad4030_read_raw_dispatch(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long info) { + struct ad4030_state *st = iio_priv(indio_dev); + switch (info) { case IIO_CHAN_INFO_RAW: return ad4030_single_conversion(indio_dev, chan, val); @@ -612,6 +668,10 @@ static int ad4030_read_raw_dispatch(struct iio_dev *indio_dev, case IIO_CHAN_INFO_CALIBBIAS: return ad4030_get_chan_calibbias(indio_dev, chan, val); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + *val = BIT(st->avg_log2); + return IIO_VAL_INT; + default: return -EINVAL; } @@ -650,6 +710,9 @@ static int ad4030_write_raw_dispatch(struct iio_dev *indio_dev, return -EINVAL; return ad4030_set_chan_calibbias(indio_dev, chan, val); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return ad4030_set_avg_frame_len(indio_dev, val); + default: return -EINVAL; } @@ -701,12 +764,21 @@ static int ad4030_read_label(struct iio_dev *indio_dev, return sprintf(label, "common-mode%lu\n", chan->address); } +static int ad4030_get_current_scan_type(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + return st->avg_log2 ? AD4030_SCAN_TYPE_AVG : AD4030_SCAN_TYPE_NORMAL; +} + static const struct iio_info ad4030_iio_info = { .read_avail = ad4030_read_avail, .read_raw = ad4030_read_raw, .write_raw = ad4030_write_raw, .debugfs_reg_access = ad4030_reg_access, .read_label = ad4030_read_label, + .get_current_scan_type = ad4030_get_current_scan_type, }; static int ad4030_buffer_preenable(struct iio_dev *indio_dev) @@ -714,8 +786,21 @@ static int ad4030_buffer_preenable(struct iio_dev *indio_dev) return ad4030_set_mode(indio_dev, *indio_dev->active_scan_mask); } +static bool ad4030_validate_scan_mask(struct iio_dev *indio_dev, + const unsigned long *scan_mask) +{ + struct ad4030_state *st = iio_priv(indio_dev); + + /* Asking for both common channels and averaging */ + if (st->avg_log2 && ad4030_is_common_byte_asked(st, *scan_mask)) + return false; + + return true; +} + static const struct iio_buffer_setup_ops ad4030_buffer_setup_ops = { .preenable = ad4030_buffer_preenable, + .validate_scan_mask = ad4030_validate_scan_mask, }; static int ad4030_regulators_get(struct ad4030_state *st) @@ -881,11 +966,28 @@ static const unsigned long ad4030_channel_masks[] = { 0, }; +static const struct iio_scan_type ad4030_24_scan_types[] = { + [AD4030_SCAN_TYPE_NORMAL] = { + .sign = 's', + .storagebits = 32, + .realbits = 24, + .shift = 8, + .endianness = IIO_BE, + }, + [AD4030_SCAN_TYPE_AVG] = { + .sign = 's', + .storagebits = 32, + .realbits = 30, + .shift = 2, + .endianness = IIO_BE, + }, +}; + static const struct ad4030_chip_info ad4030_24_chip_info = { .name = "ad4030-24", .available_masks = ad4030_channel_masks, .channels = { - AD4030_CHAN_DIFF(0, 32, 24, 8), + AD4030_CHAN_DIFF(0, ad4030_24_scan_types), AD4030_CHAN_CMO(1, 0), IIO_CHAN_SOFT_TIMESTAMP(2), }, From c8ed843c4860a2dea6b4b9396a87c7d68470e177 Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:34 +0100 Subject: [PATCH 0236/2157] iio: adc: ad4030: add support for ad4630-24 and ad4630-16 AD4630-24 and AD4630-16 are 2 channels ADCs. Both channels are interleaved bit per bit on SDO line. Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-4-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4030.c | 188 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 178 insertions(+), 10 deletions(-) diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c index d026535e5d9b..0fdf01d6d7f3 100644 --- a/drivers/iio/adc/ad4030.c +++ b/drivers/iio/adc/ad4030.c @@ -33,6 +33,8 @@ #define AD4030_REG_PRODUCT_ID_H 0x05 #define AD4030_REG_CHIP_GRADE 0x06 #define AD4030_REG_CHIP_GRADE_AD4030_24_GRADE 0x10 +#define AD4030_REG_CHIP_GRADE_AD4630_16_GRADE 0x03 +#define AD4030_REG_CHIP_GRADE_AD4630_24_GRADE 0x00 #define AD4030_REG_CHIP_GRADE_MASK_CHIP_GRADE GENMASK(7, 3) #define AD4030_REG_SCRATCH_PAD 0x0A #define AD4030_REG_SPI_REVISION 0x0B @@ -83,6 +85,7 @@ #define AD4030_MAX_HARDWARE_CHANNEL_NB 2 #define AD4030_MAX_IIO_CHANNEL_NB 5 #define AD4030_SINGLE_COMMON_BYTE_CHANNELS_MASK 0b10 +#define AD4030_DUAL_COMMON_BYTE_CHANNELS_MASK 0b1100 #define AD4030_GAIN_MIDLE_POINT 0x8000 /* * This accounts for 1 sample per channel plus one s64 for the timestamp, @@ -112,6 +115,13 @@ enum ad4030_out_mode { AD4030_OUT_DATA_MD_32_PATTERN, }; +enum { + AD4030_LANE_MD_1_PER_CH, + AD4030_LANE_MD_2_PER_CH, + AD4030_LANE_MD_4_PER_CH, + AD4030_LANE_MD_INTERLEAVED, +}; + enum { AD4030_SCAN_TYPE_NORMAL, AD4030_SCAN_TYPE_AVG, @@ -150,7 +160,11 @@ struct ad4030_state { struct { s32 diff; u8 common; - }; + } single; + struct { + s32 diff[2]; + u8 common[2]; + } dual; } rx_data; }; @@ -514,19 +528,33 @@ static int ad4030_set_avg_frame_len(struct iio_dev *dev, int avg_val) static bool ad4030_is_common_byte_asked(struct ad4030_state *st, unsigned int mask) { - return mask & AD4030_SINGLE_COMMON_BYTE_CHANNELS_MASK; + return mask & (st->chip->num_voltage_inputs == 1 ? + AD4030_SINGLE_COMMON_BYTE_CHANNELS_MASK : + AD4030_DUAL_COMMON_BYTE_CHANNELS_MASK); } static int ad4030_set_mode(struct iio_dev *indio_dev, unsigned long mask) { struct ad4030_state *st = iio_priv(indio_dev); - if (st->avg_log2 > 0) + if (st->avg_log2 > 0) { st->mode = AD4030_OUT_DATA_MD_30_AVERAGED_DIFF; - else if (ad4030_is_common_byte_asked(st, mask)) - st->mode = AD4030_OUT_DATA_MD_24_DIFF_8_COM; - else + } else if (ad4030_is_common_byte_asked(st, mask)) { + switch (st->chip->precision_bits) { + case 16: + st->mode = AD4030_OUT_DATA_MD_16_DIFF_8_COM; + break; + + case 24: + st->mode = AD4030_OUT_DATA_MD_24_DIFF_8_COM; + break; + + default: + return -EINVAL; + } + } else { st->mode = AD4030_OUT_DATA_MD_DIFF; + } st->current_scan_type = iio_get_current_scan_type(indio_dev, st->chip->channels); @@ -538,11 +566,52 @@ static int ad4030_set_mode(struct iio_dev *indio_dev, unsigned long mask) st->mode); } +/* + * Descramble 2 32bits numbers out of a 64bits. The bits are interleaved: + * 1 bit for first number, 1 bit for the second, and so on... + */ +static void ad4030_extract_interleaved(u8 *src, u32 *ch0, u32 *ch1) +{ + u8 h0, h1, l0, l1; + u32 out0, out1; + u8 *out0_raw = (u8 *)&out0; + u8 *out1_raw = (u8 *)&out1; + + for (int i = 0; i < 4; i++) { + h0 = src[i * 2]; + l1 = src[i * 2 + 1]; + h1 = h0 << 1; + l0 = l1 >> 1; + + h0 &= 0xAA; + l0 &= 0x55; + h1 &= 0xAA; + l1 &= 0x55; + + h0 = (h0 | h0 << 001) & 0xCC; + h1 = (h1 | h1 << 001) & 0xCC; + l0 = (l0 | l0 >> 001) & 0x33; + l1 = (l1 | l1 >> 001) & 0x33; + h0 = (h0 | h0 << 002) & 0xF0; + h1 = (h1 | h1 << 002) & 0xF0; + l0 = (l0 | l0 >> 002) & 0x0F; + l1 = (l1 | l1 >> 002) & 0x0F; + + out0_raw[i] = h0 | l0; + out1_raw[i] = h1 | l1; + } + + *ch0 = out0; + *ch1 = out1; +} + static int ad4030_conversion(struct iio_dev *indio_dev) { struct ad4030_state *st = iio_priv(indio_dev); unsigned char diff_realbytes = BITS_TO_BYTES(st->current_scan_type->realbits); + unsigned char diff_storagebytes = + BITS_TO_BYTES(st->current_scan_type->storagebits); unsigned int bytes_to_read; unsigned long cnv_nb = BIT(st->avg_log2); unsigned int i; @@ -567,10 +636,23 @@ static int ad4030_conversion(struct iio_dev *indio_dev) if (ret) return ret; - if (st->mode != AD4030_OUT_DATA_MD_24_DIFF_8_COM) + if (st->chip->num_voltage_inputs == 2) + ad4030_extract_interleaved(st->rx_data.raw, + &st->rx_data.dual.diff[0], + &st->rx_data.dual.diff[1]); + + if (st->mode != AD4030_OUT_DATA_MD_16_DIFF_8_COM && + st->mode != AD4030_OUT_DATA_MD_24_DIFF_8_COM) return 0; - st->rx_data.common = st->rx_data.raw[diff_realbytes]; + if (st->chip->num_voltage_inputs == 1) { + st->rx_data.single.common = st->rx_data.raw[diff_realbytes]; + return 0; + } + + for (i = 0; i < st->chip->num_voltage_inputs; i++) + st->rx_data.dual.common[i] = + st->rx_data.raw[diff_storagebytes * i + diff_realbytes]; return 0; } @@ -585,14 +667,25 @@ static int ad4030_single_conversion(struct iio_dev *indio_dev, if (ret) return ret; + st->current_scan_type = iio_get_current_scan_type(indio_dev, + st->chip->channels); + if (IS_ERR(st->current_scan_type)) + return PTR_ERR(st->current_scan_type); + ret = ad4030_conversion(indio_dev); if (ret) return ret; if (chan->differential) - *val = st->rx_data.diff; + if (st->chip->num_voltage_inputs == 1) + *val = st->rx_data.single.diff; + else + *val = st->rx_data.dual.diff[chan->address]; else - *val = st->rx_data.common; + if (st->chip->num_voltage_inputs == 1) + *val = st->rx_data.single.common; + else + *val = st->rx_data.dual.common[chan->address]; return IIO_VAL_INT; } @@ -874,10 +967,24 @@ static int ad4030_detect_chip_info(const struct ad4030_state *st) static int ad4030_config(struct ad4030_state *st) { + int ret; + u8 reg_modes; + st->offset_avail[0] = (int)BIT(st->chip->precision_bits - 1) * -1; st->offset_avail[1] = 1; st->offset_avail[2] = BIT(st->chip->precision_bits - 1) - 1; + if (st->chip->num_voltage_inputs > 1) + reg_modes = FIELD_PREP(AD4030_REG_MODES_MASK_LANE_MODE, + AD4030_LANE_MD_INTERLEAVED); + else + reg_modes = FIELD_PREP(AD4030_REG_MODES_MASK_LANE_MODE, + AD4030_LANE_MD_1_PER_CH); + + ret = regmap_write(st->regmap, AD4030_REG_MODES, reg_modes); + if (ret) + return ret; + if (st->vio_uv < AD4030_VIO_THRESHOLD_UV) return regmap_write(st->regmap, AD4030_REG_IO, AD4030_REG_IO_MASK_IO2X); @@ -966,6 +1073,14 @@ static const unsigned long ad4030_channel_masks[] = { 0, }; +static const unsigned long ad4630_channel_masks[] = { + /* Differential only */ + BIT(1) | BIT(0), + /* Differential with common byte */ + GENMASK(3, 0), + 0, +}; + static const struct iio_scan_type ad4030_24_scan_types[] = { [AD4030_SCAN_TYPE_NORMAL] = { .sign = 's', @@ -983,6 +1098,23 @@ static const struct iio_scan_type ad4030_24_scan_types[] = { }, }; +static const struct iio_scan_type ad4030_16_scan_types[] = { + [AD4030_SCAN_TYPE_NORMAL] = { + .sign = 's', + .storagebits = 32, + .realbits = 16, + .shift = 16, + .endianness = IIO_BE, + }, + [AD4030_SCAN_TYPE_AVG] = { + .sign = 's', + .storagebits = 32, + .realbits = 30, + .shift = 2, + .endianness = IIO_BE, + } +}; + static const struct ad4030_chip_info ad4030_24_chip_info = { .name = "ad4030-24", .available_masks = ad4030_channel_masks, @@ -997,14 +1129,50 @@ static const struct ad4030_chip_info ad4030_24_chip_info = { .tcyc_ns = AD4030_TCYC_ADJUSTED_NS, }; +static const struct ad4030_chip_info ad4630_16_chip_info = { + .name = "ad4630-16", + .available_masks = ad4630_channel_masks, + .channels = { + AD4030_CHAN_DIFF(0, ad4030_16_scan_types), + AD4030_CHAN_DIFF(1, ad4030_16_scan_types), + AD4030_CHAN_CMO(2, 0), + AD4030_CHAN_CMO(3, 1), + IIO_CHAN_SOFT_TIMESTAMP(4), + }, + .grade = AD4030_REG_CHIP_GRADE_AD4630_16_GRADE, + .precision_bits = 16, + .num_voltage_inputs = 2, + .tcyc_ns = AD4030_TCYC_ADJUSTED_NS, +}; + +static const struct ad4030_chip_info ad4630_24_chip_info = { + .name = "ad4630-24", + .available_masks = ad4630_channel_masks, + .channels = { + AD4030_CHAN_DIFF(0, ad4030_24_scan_types), + AD4030_CHAN_DIFF(1, ad4030_24_scan_types), + AD4030_CHAN_CMO(2, 0), + AD4030_CHAN_CMO(3, 1), + IIO_CHAN_SOFT_TIMESTAMP(4), + }, + .grade = AD4030_REG_CHIP_GRADE_AD4630_24_GRADE, + .precision_bits = 24, + .num_voltage_inputs = 2, + .tcyc_ns = AD4030_TCYC_ADJUSTED_NS, +}; + static const struct spi_device_id ad4030_id_table[] = { { "ad4030-24", (kernel_ulong_t)&ad4030_24_chip_info }, + { "ad4630-16", (kernel_ulong_t)&ad4630_16_chip_info }, + { "ad4630-24", (kernel_ulong_t)&ad4630_24_chip_info }, { } }; MODULE_DEVICE_TABLE(spi, ad4030_id_table); static const struct of_device_id ad4030_of_match[] = { { .compatible = "adi,ad4030-24", .data = &ad4030_24_chip_info }, + { .compatible = "adi,ad4630-16", .data = &ad4630_16_chip_info }, + { .compatible = "adi,ad4630-24", .data = &ad4630_24_chip_info }, { } }; MODULE_DEVICE_TABLE(of, ad4030_of_match); From ec25cf6f1ee31c8ac75ca8bf37addb26cd3924de Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:35 +0100 Subject: [PATCH 0237/2157] iio: adc: ad4030: add support for ad4632-16 and ad4632-24 AD4632-24 and AD4632-16 are 2 channels ADCs. Both channels are interleaved bit per bit on SDO line. Both of them do not have evaluation board. As such, the support added here can't be tested. Support is provided as best effort until someone get their hands on one. Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-5-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4030.c | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c index 0fdf01d6d7f3..ab5497c8ea1e 100644 --- a/drivers/iio/adc/ad4030.c +++ b/drivers/iio/adc/ad4030.c @@ -35,6 +35,8 @@ #define AD4030_REG_CHIP_GRADE_AD4030_24_GRADE 0x10 #define AD4030_REG_CHIP_GRADE_AD4630_16_GRADE 0x03 #define AD4030_REG_CHIP_GRADE_AD4630_24_GRADE 0x00 +#define AD4030_REG_CHIP_GRADE_AD4632_16_GRADE 0x05 +#define AD4030_REG_CHIP_GRADE_AD4632_24_GRADE 0x02 #define AD4030_REG_CHIP_GRADE_MASK_CHIP_GRADE GENMASK(7, 3) #define AD4030_REG_SCRATCH_PAD 0x0A #define AD4030_REG_SPI_REVISION 0x0B @@ -106,6 +108,9 @@ #define AD4030_TCYC_NS 500 #define AD4030_TCYC_ADJUSTED_NS (AD4030_TCYC_NS - AD4030_TCNVL_NS) #define AD4030_TRESET_PW_NS 50 +#define AD4632_TCYC_NS 2000 +#define AD4632_TCYC_ADJUSTED_NS (AD4632_TCYC_NS - AD4030_TCNVL_NS) +#define AD4030_TRESET_COM_DELAY_MS 750 enum ad4030_out_mode { AD4030_OUT_DATA_MD_DIFF, @@ -1161,10 +1166,44 @@ static const struct ad4030_chip_info ad4630_24_chip_info = { .tcyc_ns = AD4030_TCYC_ADJUSTED_NS, }; +static const struct ad4030_chip_info ad4632_16_chip_info = { + .name = "ad4632-16", + .available_masks = ad4630_channel_masks, + .channels = { + AD4030_CHAN_DIFF(0, ad4030_16_scan_types), + AD4030_CHAN_DIFF(1, ad4030_16_scan_types), + AD4030_CHAN_CMO(2, 0), + AD4030_CHAN_CMO(3, 1), + IIO_CHAN_SOFT_TIMESTAMP(4), + }, + .grade = AD4030_REG_CHIP_GRADE_AD4632_16_GRADE, + .precision_bits = 16, + .num_voltage_inputs = 2, + .tcyc_ns = AD4632_TCYC_ADJUSTED_NS, +}; + +static const struct ad4030_chip_info ad4632_24_chip_info = { + .name = "ad4632-24", + .available_masks = ad4630_channel_masks, + .channels = { + AD4030_CHAN_DIFF(0, ad4030_24_scan_types), + AD4030_CHAN_DIFF(1, ad4030_24_scan_types), + AD4030_CHAN_CMO(2, 0), + AD4030_CHAN_CMO(3, 1), + IIO_CHAN_SOFT_TIMESTAMP(4), + }, + .grade = AD4030_REG_CHIP_GRADE_AD4632_24_GRADE, + .precision_bits = 24, + .num_voltage_inputs = 2, + .tcyc_ns = AD4632_TCYC_ADJUSTED_NS, +}; + static const struct spi_device_id ad4030_id_table[] = { { "ad4030-24", (kernel_ulong_t)&ad4030_24_chip_info }, { "ad4630-16", (kernel_ulong_t)&ad4630_16_chip_info }, { "ad4630-24", (kernel_ulong_t)&ad4630_24_chip_info }, + { "ad4632-16", (kernel_ulong_t)&ad4632_16_chip_info }, + { "ad4632-24", (kernel_ulong_t)&ad4632_24_chip_info }, { } }; MODULE_DEVICE_TABLE(spi, ad4030_id_table); @@ -1173,6 +1212,8 @@ static const struct of_device_id ad4030_of_match[] = { { .compatible = "adi,ad4030-24", .data = &ad4030_24_chip_info }, { .compatible = "adi,ad4630-16", .data = &ad4630_16_chip_info }, { .compatible = "adi,ad4630-24", .data = &ad4630_24_chip_info }, + { .compatible = "adi,ad4632-16", .data = &ad4632_16_chip_info }, + { .compatible = "adi,ad4632-24", .data = &ad4632_24_chip_info }, { } }; MODULE_DEVICE_TABLE(of, ad4030_of_match); From b29050e8b3b9ef771d66a70b32e48fa0e587c3ce Mon Sep 17 00:00:00 2001 From: Esteban Blanc Date: Fri, 14 Feb 2025 13:22:36 +0100 Subject: [PATCH 0238/2157] docs: iio: ad4030: add documentation This adds a new page to document how to use the ad4030 ADC driver Signed-off-by: Esteban Blanc Link: https://patch.msgid.link/20250214-eblanc-ad4630_v1-v4-6-135dd66cab6a@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad4030.rst | 180 +++++++++++++++++++++++++++++++++++ Documentation/iio/index.rst | 1 + MAINTAINERS | 1 + 3 files changed, 182 insertions(+) create mode 100644 Documentation/iio/ad4030.rst diff --git a/Documentation/iio/ad4030.rst b/Documentation/iio/ad4030.rst new file mode 100644 index 000000000000..b57424b650a8 --- /dev/null +++ b/Documentation/iio/ad4030.rst @@ -0,0 +1,180 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +============= +AD4030 driver +============= + +ADC driver for Analog Devices Inc. AD4030 and similar devices. The module name +is ``ad4030``. + + +Supported devices +================= + +The following chips are supported by this driver: + +* `AD4030-24 `_ +* `AD4032-24 `_ +* `AD4630-16 `_ +* `AD4630-24 `_ +* `AD4632-16 `_ +* `AD4632-24 `_ + +IIO channels +============ + +Each "hardware" channel as described in the datasheet is split in 2 IIO +channels: + +- One channel for the differential data +- One channel for the common byte. + +The possible IIO channels depending on the numbers of "hardware" channel are: + ++------------------------------------+------------------------------------+ +| 1 channel ADC | 2 channels ADC | ++====================================+====================================+ +| - voltage0-voltage1 (differential) | - voltage0-voltage1 (differential) | +| - voltage2 (common-mode) | - voltage2-voltage3 (differential) | +| | - voltage4 (common-mode) | +| | - voltage5 (common-mode) | ++------------------------------------+------------------------------------+ + +Labels +------ + +For ease of use, the IIO channels provide a label. For a differential channel, +the label is ``differentialN`` where ``N`` is the "hardware" channel id. For a +common-mode channel, the label is ``common-modeN`` where ``N`` is the +"hardware" channel id. + +The possible labels are: + ++-----------------+-----------------+ +| 1 channel ADC | 2 channels ADC | ++=================+=================+ +| - differential0 | - differential0 | +| - common-mode0 | - differential1 | +| | - common-mode0 | +| | - common-mode1 | ++-----------------+-----------------+ + +Supported features +================== + +SPI wiring modes +---------------- + +The driver currently supports the following SPI wiring configurations: + +One lane mode +^^^^^^^^^^^^^ + +In this mode, each channel has its own SDO line to send the conversion results. +At the moment this mode can only be used on AD4030 which has one channel so only +one SDO line is used. + +.. code-block:: + + +-------------+ +-------------+ + | ADC | | HOST | + | | | | + | CNV |<--------| CNV | + | CS |<--------| CS | + | SDI |<--------| SDO | + | SDO0 |-------->| SDI | + | SCLK |<--------| SCLK | + +-------------+ +-------------+ + +Interleaved mode +^^^^^^^^^^^^^^^^ + +In this mode, both channels conversion results are bit interleaved one SDO line. +As such the wiring is the same as `One lane mode`_. + +SPI Clock mode +-------------- + +Only the SPI clocking mode is supported. + +Output modes +------------ + +There are more exposed IIO channels than channels as describe in the devices +datasheet. This is due to the `Differential data + common-mode`_ encoding +2 types of information in one conversion result. As such a "device" channel +provides 2 IIO channels, one for the differential data and one for the common +byte. + +Differential data +^^^^^^^^^^^^^^^^^ + +This mode is selected when: + +- Only differential channels are enabled in a buffered read +- Oversampling attribute is set to 1 + +Differential data + common-mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This mode is selected when: + +- Differential and common-mode channels are enabled in a buffered read +- Oversampling attribute is set to 1 + +For the 24-bits chips, this mode is also available with 16-bits differential +data but is not selectable yet. + +Averaged differential data +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This mode is selected when: + +- Only differential channels are selected enabled in a buffered read +- Oversampling attribute is greater than 1 + +Digital Gain and Offset +----------------------- + +Each differential data channel has a 16-bits unsigned configurable hardware +gain applied to it. By default it's equal to 1. Note that applying gain can +cause numerical saturation. + +Each differential data channel has a signed configurable hardware offset. +For the ADCs ending in ``-24``, the gain is encoded on 24-bits. +Likewise, the ADCs ending in ``-16`` have a gain encoded on 16-bits. Note that +applying an offset can cause numerical saturation. + +The final differential data returned by the ADC is computed by first applying +the gain, then the offset. + +The gain is controlled by the ``calibscale`` IIO attribute while the offset is +controlled by the ``calibbias`` attribute. + +Reference voltage +----------------- + +The chip supports an external reference voltage via the ``REF`` input or an +internal buffered reference voltage via the ``REFIN`` input. The driver looks +at the device tree to determine which is being used. If ``ref-supply`` is +present, then the external reference voltage is used and the internal buffer is +disabled. If ``refin-supply`` is present, then the internal buffered reference +voltage is used. + +Reset +----- + +Both hardware and software reset are supported. The driver looks first at the +device tree to see if the ``reset-gpio`` is populated. +If not present, the driver will fallback to a software reset by wiring to the +device's registers. + +Unimplemented features +---------------------- + +- ``BUSY`` indication +- Additional wiring modes +- Additional clock modes +- Differential data 16-bits + common-mode for 24-bits chips +- Overrange events +- Test patterns diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst index 5710f5b9e958..2d334be2b7f2 100644 --- a/Documentation/iio/index.rst +++ b/Documentation/iio/index.rst @@ -19,6 +19,7 @@ Industrial I/O Kernel Drivers :maxdepth: 1 ad4000 + ad4030 ad4695 ad7380 ad7606 diff --git a/MAINTAINERS b/MAINTAINERS index 594ac38ad15d..bd04375ab4a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1314,6 +1314,7 @@ L: linux-iio@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml +F: Documentation/iio/ad4030.rst F: drivers/iio/adc/ad4030.c ANALOG DEVICES INC AD4130 DRIVER From f3255b0e1c0a8c9e6841781d7bac4599cc41d47f Mon Sep 17 00:00:00 2001 From: Tobias Sperling Date: Thu, 13 Feb 2025 16:58:57 +0100 Subject: [PATCH 0239/2157] dt-bindings: iio: adc: Introduce ADS7138 Add documentation for the driver of ADS7128 and ADS7138 12-bit, 8-channel analog-to-digital converters. These ADCs have a wide operating range and a wide feature set. Communication is based on the I2C interface. ADS7128 differs in the addition of further hardware features, like a root-mean-square (RMS) and a zero-crossing-detect (ZCD) module. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Tobias Sperling Link: https://patch.msgid.link/20250213-adc_ml-v4-1-66b68f8fdb8c@softing.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/ti,ads7138.yaml | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml b/Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml new file mode 100644 index 000000000000..a51893e207d4 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/ti,ads7138.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments ADS7128/ADS7138 analog-to-digital converter (ADC) + +maintainers: + - Tobias Sperling + +description: | + The ADS7128 and ADS7138 chips are 12-bit, 8 channel analog-to-digital + converters (ADC) with build-in digital window comparator (DWC), using the + I2C interface. + ADS7128 differs in the addition of further hardware features, like a + root-mean-square (RMS) and a zero-crossing-detect (ZCD) module. + + Datasheets: + https://www.ti.com/product/ADS7128 + https://www.ti.com/product/ADS7138 + +properties: + compatible: + enum: + - ti,ads7128 + - ti,ads7138 + + reg: + maxItems: 1 + + avdd-supply: + description: + The regulator used as analog supply voltage as well as reference voltage. + + interrupts: + description: + Interrupt on ALERT pin, triggers on low level. + maxItems: 1 + +required: + - compatible + - reg + - avdd-supply + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + adc@10 { + compatible = "ti,ads7138"; + reg = <0x10>; + avdd-supply = <®_stb_3v3>; + interrupt-parent = <&gpio2>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; + }; + }; +... From 024b08fee342843c30a0bf0f5109047f7f576422 Mon Sep 17 00:00:00 2001 From: Tobias Sperling Date: Thu, 13 Feb 2025 16:58:58 +0100 Subject: [PATCH 0240/2157] iio: adc: Add driver for ADS7128 / ADS7138 Add driver for ADS7128 and ADS7138 12-bit, 8-channel analog-to-digital converters. These ADCs have a wide operating range and a wide feature set. Communication is based on the I2C interface. ADS7128 differs in the addition of further hardware features, like a root-mean-square (RMS) and a zero-crossing-detect (ZCD) module. Signed-off-by: Tobias Sperling Link: https://patch.msgid.link/20250213-adc_ml-v4-2-66b68f8fdb8c@softing.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 10 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ti-ads7138.c | 749 +++++++++++++++++++++++++++++++++++ 3 files changed, 760 insertions(+) create mode 100644 drivers/iio/adc/ti-ads7138.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index b1bfeed66315..27413516216c 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -1499,6 +1499,16 @@ config TI_ADS1119 This driver can also be built as a module. If so, the module will be called ti-ads1119. +config TI_ADS7138 + tristate "Texas Instruments ADS7128 and ADS7138 ADC driver" + depends on I2C + help + If you say yes here you get support for Texas Instruments ADS7128 and + ADS7138 8-channel A/D converters with 12-bit resolution. + + This driver can also be built as a module. If so, the module will be + called ti-ads7138. + config TI_ADS7924 tristate "Texas Instruments ADS7924 ADC" depends on I2C diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index ed41d9a4054e..9f26d5eca822 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -135,6 +135,7 @@ obj-$(CONFIG_TI_ADS1119) += ti-ads1119.o obj-$(CONFIG_TI_ADS124S08) += ti-ads124s08.o obj-$(CONFIG_TI_ADS1298) += ti-ads1298.o obj-$(CONFIG_TI_ADS131E08) += ti-ads131e08.o +obj-$(CONFIG_TI_ADS7138) += ti-ads7138.o obj-$(CONFIG_TI_ADS7924) += ti-ads7924.o obj-$(CONFIG_TI_ADS7950) += ti-ads7950.o obj-$(CONFIG_TI_ADS8344) += ti-ads8344.o diff --git a/drivers/iio/adc/ti-ads7138.c b/drivers/iio/adc/ti-ads7138.c new file mode 100644 index 000000000000..ee5c1b8e3a8e --- /dev/null +++ b/drivers/iio/adc/ti-ads7138.c @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ADS7138 - Texas Instruments Analog-to-Digital Converter + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Always assume 16 bits resolution as HW registers are aligned like that and + * with enabled oversampling/averaging it actually corresponds to 16 bits. + */ +#define ADS7138_RES_BITS 16 + +/* ADS7138 operation codes */ +#define ADS7138_OPCODE_SINGLE_WRITE 0x08 +#define ADS7138_OPCODE_SET_BIT 0x18 +#define ADS7138_OPCODE_CLEAR_BIT 0x20 +#define ADS7138_OPCODE_BLOCK_WRITE 0x28 +#define ADS7138_OPCODE_BLOCK_READ 0x30 + +/* ADS7138 registers */ +#define ADS7138_REG_GENERAL_CFG 0x01 +#define ADS7138_REG_OSR_CFG 0x03 +#define ADS7138_REG_OPMODE_CFG 0x04 +#define ADS7138_REG_SEQUENCE_CFG 0x10 +#define ADS7138_REG_AUTO_SEQ_CH_SEL 0x12 +#define ADS7138_REG_ALERT_CH_SEL 0x14 +#define ADS7138_REG_EVENT_FLAG 0x18 +#define ADS7138_REG_EVENT_HIGH_FLAG 0x1A +#define ADS7138_REG_EVENT_LOW_FLAG 0x1C +#define ADS7138_REG_HIGH_TH_HYS_CH(x) ((x) * 4 + 0x20) +#define ADS7138_REG_LOW_TH_CNT_CH(x) ((x) * 4 + 0x22) +#define ADS7138_REG_MAX_LSB_CH(x) ((x) * 2 + 0x60) +#define ADS7138_REG_MIN_LSB_CH(x) ((x) * 2 + 0x80) +#define ADS7138_REG_RECENT_LSB_CH(x) ((x) * 2 + 0xA0) + +#define ADS7138_GENERAL_CFG_RST BIT(0) +#define ADS7138_GENERAL_CFG_DWC_EN BIT(4) +#define ADS7138_GENERAL_CFG_STATS_EN BIT(5) +#define ADS7138_OSR_CFG_MASK GENMASK(2, 0) +#define ADS7138_OPMODE_CFG_CONV_MODE BIT(5) +#define ADS7138_OPMODE_CFG_FREQ_MASK GENMASK(4, 0) +#define ADS7138_SEQUENCE_CFG_SEQ_MODE BIT(0) +#define ADS7138_SEQUENCE_CFG_SEQ_START BIT(4) +#define ADS7138_THRESHOLD_LSB_MASK GENMASK(7, 4) + +enum ads7138_modes { + ADS7138_MODE_MANUAL, + ADS7138_MODE_AUTO, +}; + +struct ads7138_chip_data { + const char *name; + const int channel_num; +}; + +struct ads7138_data { + /* Protects RMW access to the I2C interface */ + struct mutex lock; + struct i2c_client *client; + struct regulator *vref_regu; + const struct ads7138_chip_data *chip_data; +}; + +/* + * 2D array of available sampling frequencies and the corresponding register + * values. Structured like this to be easily usable in read_avail function. + */ +static const int ads7138_samp_freqs_bits[2][26] = { + { + 163, 244, 326, 488, 651, 977, 1302, 1953, + 2604, 3906, 5208, 7813, 10417, 15625, 20833, 31250, + 41667, 62500, 83333, 125000, 166667, 250000, 333333, 500000, + 666667, 1000000 + }, { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + /* Here is a hole, due to duplicate frequencies */ + 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, + 0x01, 0x00 + } +}; + +static const int ads7138_oversampling_ratios[] = { + 1, 2, 4, 8, 16, 32, 64, 128 +}; + +static int ads7138_i2c_write_block(const struct i2c_client *client, u8 reg, + u8 *values, u8 length) +{ + int ret; + int len = length + 2; /* "+ 2" for OPCODE and reg */ + + u8 *buf __free(kfree) = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = ADS7138_OPCODE_BLOCK_WRITE; + buf[1] = reg; + memcpy(&buf[2], values, length); + + ret = i2c_master_send(client, buf, len); + if (ret < 0) + return ret; + if (ret != len) + return -EIO; + + return 0; +} + +static int ads7138_i2c_write_with_opcode(const struct i2c_client *client, + u8 reg, u8 regval, u8 opcode) +{ + u8 buf[3] = { opcode, reg, regval }; + int ret; + + ret = i2c_master_send(client, buf, ARRAY_SIZE(buf)); + if (ret < 0) + return ret; + if (ret != ARRAY_SIZE(buf)) + return -EIO; + + return 0; +} + +static int ads7138_i2c_write(const struct i2c_client *client, u8 reg, u8 value) +{ + return ads7138_i2c_write_with_opcode(client, reg, value, + ADS7138_OPCODE_SINGLE_WRITE); +} + +static int ads7138_i2c_set_bit(const struct i2c_client *client, u8 reg, u8 bits) +{ + return ads7138_i2c_write_with_opcode(client, reg, bits, + ADS7138_OPCODE_SET_BIT); +} + +static int ads7138_i2c_clear_bit(const struct i2c_client *client, u8 reg, u8 bits) +{ + return ads7138_i2c_write_with_opcode(client, reg, bits, + ADS7138_OPCODE_CLEAR_BIT); +} + +static int ads7138_i2c_read_block(const struct i2c_client *client, u8 reg, + u8 *out_values, u8 length) +{ + u8 buf[2] = { ADS7138_OPCODE_BLOCK_READ, reg }; + int ret; + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .len = ARRAY_SIZE(buf), + .buf = buf, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = length, + .buf = out_values, + }, + }; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret < 0) + return ret; + if (ret != ARRAY_SIZE(msgs)) + return -EIO; + + return 0; +} + +static int ads7138_i2c_read(const struct i2c_client *client, u8 reg) +{ + u8 value; + int ret; + + ret = ads7138_i2c_read_block(client, reg, &value, sizeof(value)); + if (ret) + return ret; + return value; +} + +static int ads7138_freq_to_bits(int freq) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ads7138_samp_freqs_bits[0]); i++) + if (freq == ads7138_samp_freqs_bits[0][i]) + return ads7138_samp_freqs_bits[1][i]; + + return -EINVAL; +} + +static int ads7138_bits_to_freq(int bits) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ads7138_samp_freqs_bits[1]); i++) + if (bits == ads7138_samp_freqs_bits[1][i]) + return ads7138_samp_freqs_bits[0][i]; + + return -EINVAL; +} + +static int ads7138_osr_to_bits(int osr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ads7138_oversampling_ratios); i++) + if (osr == ads7138_oversampling_ratios[i]) + return i; + + return -EINVAL; +} + +static int ads7138_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) +{ + struct ads7138_data *data = iio_priv(indio_dev); + int ret, vref, bits; + u8 values[2]; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = ads7138_i2c_read_block(data->client, + ADS7138_REG_RECENT_LSB_CH(chan->channel), + values, ARRAY_SIZE(values)); + if (ret) + return ret; + + *val = get_unaligned_le16(values); + return IIO_VAL_INT; + case IIO_CHAN_INFO_PEAK: + ret = ads7138_i2c_read_block(data->client, + ADS7138_REG_MAX_LSB_CH(chan->channel), + values, ARRAY_SIZE(values)); + if (ret) + return ret; + + *val = get_unaligned_le16(values); + return IIO_VAL_INT; + case IIO_CHAN_INFO_TROUGH: + ret = ads7138_i2c_read_block(data->client, + ADS7138_REG_MIN_LSB_CH(chan->channel), + values, ARRAY_SIZE(values)); + if (ret) + return ret; + + *val = get_unaligned_le16(values); + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + ret = ads7138_i2c_read(data->client, ADS7138_REG_OPMODE_CFG); + if (ret < 0) + return ret; + + bits = FIELD_GET(ADS7138_OPMODE_CFG_FREQ_MASK, ret); + *val = ads7138_bits_to_freq(bits); + return IIO_VAL_INT; + case IIO_CHAN_INFO_SCALE: + vref = regulator_get_voltage(data->vref_regu); + if (vref < 0) + return vref; + *val = vref / 1000; + *val2 = ADS7138_RES_BITS; + return IIO_VAL_FRACTIONAL_LOG2; + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + ret = ads7138_i2c_read(data->client, ADS7138_REG_OSR_CFG); + if (ret < 0) + return ret; + + bits = FIELD_GET(ADS7138_OSR_CFG_MASK, ret); + *val = ads7138_oversampling_ratios[bits]; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ads7138_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + int val2, long mask) +{ + struct ads7138_data *data = iio_priv(indio_dev); + int bits, ret; + u8 value; + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: { + bits = ads7138_freq_to_bits(val); + if (bits < 0) + return bits; + + guard(mutex)(&data->lock); + ret = ads7138_i2c_read(data->client, ADS7138_REG_OPMODE_CFG); + if (ret < 0) + return ret; + + value = ret & ~ADS7138_OPMODE_CFG_FREQ_MASK; + value |= FIELD_PREP(ADS7138_OPMODE_CFG_FREQ_MASK, bits); + return ads7138_i2c_write(data->client, ADS7138_REG_OPMODE_CFG, + value); + } + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + bits = ads7138_osr_to_bits(val); + if (bits < 0) + return bits; + + return ads7138_i2c_write(data->client, ADS7138_REG_OSR_CFG, + bits); + default: + return -EINVAL; + } +} + +static int ads7138_read_event(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int *val, int *val2) +{ + struct ads7138_data *data = iio_priv(indio_dev); + u8 reg, values[2]; + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + reg = (dir == IIO_EV_DIR_RISING) ? + ADS7138_REG_HIGH_TH_HYS_CH(chan->channel) : + ADS7138_REG_LOW_TH_CNT_CH(chan->channel); + ret = ads7138_i2c_read_block(data->client, reg, values, + ARRAY_SIZE(values)); + if (ret) + return ret; + + *val = ((values[1] << 4) | (values[0] >> 4)); + return IIO_VAL_INT; + case IIO_EV_INFO_HYSTERESIS: + ret = ads7138_i2c_read(data->client, + ADS7138_REG_HIGH_TH_HYS_CH(chan->channel)); + if (ret < 0) + return ret; + + *val = ret & ~ADS7138_THRESHOLD_LSB_MASK; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ads7138_write_event(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int val, int val2) +{ + struct ads7138_data *data = iio_priv(indio_dev); + u8 reg, values[2]; + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: { + if (val >= BIT(12) || val < 0) + return -EINVAL; + + reg = (dir == IIO_EV_DIR_RISING) ? + ADS7138_REG_HIGH_TH_HYS_CH(chan->channel) : + ADS7138_REG_LOW_TH_CNT_CH(chan->channel); + + guard(mutex)(&data->lock); + ret = ads7138_i2c_read(data->client, reg); + if (ret < 0) + return ret; + + values[0] = ret & ~ADS7138_THRESHOLD_LSB_MASK; + values[0] |= FIELD_PREP(ADS7138_THRESHOLD_LSB_MASK, val); + values[1] = (val >> 4); + return ads7138_i2c_write_block(data->client, reg, values, + ARRAY_SIZE(values)); + } + case IIO_EV_INFO_HYSTERESIS: { + if (val >= BIT(4) || val < 0) + return -EINVAL; + + reg = ADS7138_REG_HIGH_TH_HYS_CH(chan->channel); + + guard(mutex)(&data->lock); + ret = ads7138_i2c_read(data->client, reg); + if (ret < 0) + return ret; + + values[0] = val & ~ADS7138_THRESHOLD_LSB_MASK; + values[0] |= FIELD_PREP(ADS7138_THRESHOLD_LSB_MASK, ret >> 4); + return ads7138_i2c_write(data->client, reg, values[0]); + } + default: + return -EINVAL; + } +} + +static int ads7138_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ads7138_data *data = iio_priv(indio_dev); + int ret; + + if (dir != IIO_EV_DIR_EITHER) + return -EINVAL; + + ret = ads7138_i2c_read(data->client, ADS7138_REG_ALERT_CH_SEL); + if (ret < 0) + return ret; + + return (ret & BIT(chan->channel)) ? 1 : 0; +} + +static int ads7138_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, bool state) +{ + struct ads7138_data *data = iio_priv(indio_dev); + + if (dir != IIO_EV_DIR_EITHER) + return -EINVAL; + + if (state) + return ads7138_i2c_set_bit(data->client, + ADS7138_REG_ALERT_CH_SEL, + BIT(chan->channel)); + else + return ads7138_i2c_clear_bit(data->client, + ADS7138_REG_ALERT_CH_SEL, + BIT(chan->channel)); +} + +static int ads7138_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = ads7138_samp_freqs_bits[0]; + *length = ARRAY_SIZE(ads7138_samp_freqs_bits[0]); + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + *vals = ads7138_oversampling_ratios; + *length = ARRAY_SIZE(ads7138_oversampling_ratios); + *type = IIO_VAL_INT; + + return IIO_AVAIL_LIST; + default: + return -EINVAL; + } +} + +static const struct iio_info ti_ads7138_info = { + .read_raw = &ads7138_read_raw, + .read_avail = &ads7138_read_avail, + .write_raw = &ads7138_write_raw, + .read_event_value = &ads7138_read_event, + .write_event_value = &ads7138_write_event, + .read_event_config = &ads7138_read_event_config, + .write_event_config = &ads7138_write_event_config, +}; + +static const struct iio_event_spec ads7138_events[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) + }, { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_separate = BIT(IIO_EV_INFO_VALUE), + }, { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS) | + BIT(IIO_EV_INFO_ENABLE), + }, +}; + +#define ADS7138_V_CHAN(_chan) { \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .channel = _chan, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_PEAK) | \ + BIT(IIO_CHAN_INFO_TROUGH), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ + BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .info_mask_shared_by_type_available = \ + BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ + .datasheet_name = "AIN"#_chan, \ + .event_spec = ads7138_events, \ + .num_event_specs = ARRAY_SIZE(ads7138_events), \ +} + +static const struct iio_chan_spec ads7138_channels[] = { + ADS7138_V_CHAN(0), + ADS7138_V_CHAN(1), + ADS7138_V_CHAN(2), + ADS7138_V_CHAN(3), + ADS7138_V_CHAN(4), + ADS7138_V_CHAN(5), + ADS7138_V_CHAN(6), + ADS7138_V_CHAN(7), +}; + +static irqreturn_t ads7138_event_handler(int irq, void *priv) +{ + struct iio_dev *indio_dev = priv; + struct ads7138_data *data = iio_priv(indio_dev); + struct device *dev = &data->client->dev; + u8 i, events_high, events_low; + u64 code; + int ret; + + /* Check if interrupt was trigger by us */ + ret = ads7138_i2c_read(data->client, ADS7138_REG_EVENT_FLAG); + if (ret <= 0) + return IRQ_NONE; + + ret = ads7138_i2c_read(data->client, ADS7138_REG_EVENT_HIGH_FLAG); + if (ret < 0) { + dev_warn(dev, "Failed to read event high flags: %d\n", ret); + return IRQ_HANDLED; + } + events_high = ret; + + ret = ads7138_i2c_read(data->client, ADS7138_REG_EVENT_LOW_FLAG); + if (ret < 0) { + dev_warn(dev, "Failed to read event low flags: %d\n", ret); + return IRQ_HANDLED; + } + events_low = ret; + + for (i = 0; i < data->chip_data->channel_num; i++) { + if (events_high & BIT(i)) { + code = IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE, i, + IIO_EV_TYPE_THRESH, + IIO_EV_DIR_RISING); + iio_push_event(indio_dev, code, + iio_get_time_ns(indio_dev)); + } + if (events_low & BIT(i)) { + code = IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE, i, + IIO_EV_TYPE_THRESH, + IIO_EV_DIR_FALLING); + iio_push_event(indio_dev, code, + iio_get_time_ns(indio_dev)); + } + } + + /* Try to clear all interrupt flags */ + ret = ads7138_i2c_write(data->client, ADS7138_REG_EVENT_HIGH_FLAG, 0xFF); + if (ret) + dev_warn(dev, "Failed to clear event high flags: %d\n", ret); + + ret = ads7138_i2c_write(data->client, ADS7138_REG_EVENT_LOW_FLAG, 0xFF); + if (ret) + dev_warn(dev, "Failed to clear event low flags: %d\n", ret); + + return IRQ_HANDLED; +} + +static int ads7138_set_conv_mode(struct ads7138_data *data, + enum ads7138_modes mode) +{ + if (mode == ADS7138_MODE_AUTO) + return ads7138_i2c_set_bit(data->client, ADS7138_REG_OPMODE_CFG, + ADS7138_OPMODE_CFG_CONV_MODE); + return ads7138_i2c_clear_bit(data->client, ADS7138_REG_OPMODE_CFG, + ADS7138_OPMODE_CFG_CONV_MODE); +} + +static int ads7138_init_hw(struct ads7138_data *data) +{ + struct device *dev = &data->client->dev; + int ret; + + data->vref_regu = devm_regulator_get(dev, "avdd"); + if (IS_ERR(data->vref_regu)) + return dev_err_probe(dev, PTR_ERR(data->vref_regu), + "Failed to get avdd regulator\n"); + + ret = regulator_get_voltage(data->vref_regu); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to get avdd voltage\n"); + + /* Reset the chip to get a defined starting configuration */ + ret = ads7138_i2c_set_bit(data->client, ADS7138_REG_GENERAL_CFG, + ADS7138_GENERAL_CFG_RST); + if (ret) + return ret; + + ret = ads7138_set_conv_mode(data, ADS7138_MODE_AUTO); + if (ret) + return ret; + + /* Enable statistics and digital window comparator */ + ret = ads7138_i2c_set_bit(data->client, ADS7138_REG_GENERAL_CFG, + ADS7138_GENERAL_CFG_STATS_EN | + ADS7138_GENERAL_CFG_DWC_EN); + if (ret) + return ret; + + /* Enable all channels for auto sequencing */ + ret = ads7138_i2c_set_bit(data->client, ADS7138_REG_AUTO_SEQ_CH_SEL, 0xFF); + if (ret) + return ret; + + /* Set auto sequence mode and start sequencing */ + return ads7138_i2c_set_bit(data->client, ADS7138_REG_SEQUENCE_CFG, + ADS7138_SEQUENCE_CFG_SEQ_START | + ADS7138_SEQUENCE_CFG_SEQ_MODE); +} + +static int ads7138_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct iio_dev *indio_dev; + struct ads7138_data *data; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); + if (!indio_dev) + return -ENOMEM; + + data = iio_priv(indio_dev); + data->client = client; + data->chip_data = i2c_get_match_data(client); + if (!data->chip_data) + return -ENODEV; + + ret = devm_mutex_init(dev, &data->lock); + if (ret) + return ret; + + indio_dev->name = data->chip_data->name; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = ads7138_channels; + indio_dev->num_channels = ARRAY_SIZE(ads7138_channels); + indio_dev->info = &ti_ads7138_info; + + i2c_set_clientdata(client, indio_dev); + + if (client->irq > 0) { + ret = devm_request_threaded_irq(dev, client->irq, + NULL, ads7138_event_handler, + IRQF_TRIGGER_LOW | + IRQF_ONESHOT | IRQF_SHARED, + client->name, indio_dev); + if (ret) + return ret; + } + + ret = ads7138_init_hw(data); + if (ret) + return dev_err_probe(dev, ret, "Failed to initialize device\n"); + + ret = devm_iio_device_register(dev, indio_dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to register iio device\n"); + + return 0; +} + +static int ads7138_runtime_suspend(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct ads7138_data *data = iio_priv(indio_dev); + + return ads7138_set_conv_mode(data, ADS7138_MODE_MANUAL); +} + +static int ads7138_runtime_resume(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct ads7138_data *data = iio_priv(indio_dev); + + return ads7138_set_conv_mode(data, ADS7138_MODE_AUTO); +} + +static DEFINE_RUNTIME_DEV_PM_OPS(ads7138_pm_ops, + ads7138_runtime_suspend, + ads7138_runtime_resume, + NULL); + +static const struct ads7138_chip_data ads7128_data = { + .name = "ads7128", + .channel_num = 8, +}; + +static const struct ads7138_chip_data ads7138_data = { + .name = "ads7138", + .channel_num = 8, +}; + +static const struct of_device_id ads7138_of_match[] = { + { .compatible = "ti,ads7128", .data = &ads7128_data }, + { .compatible = "ti,ads7138", .data = &ads7138_data }, + { } +}; +MODULE_DEVICE_TABLE(of, ads7138_of_match); + +static const struct i2c_device_id ads7138_device_ids[] = { + { "ads7128", (kernel_ulong_t)&ads7128_data }, + { "ads7138", (kernel_ulong_t)&ads7138_data }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ads7138_device_ids); + +static struct i2c_driver ads7138_driver = { + .driver = { + .name = "ads7138", + .of_match_table = ads7138_of_match, + .pm = pm_ptr(&ads7138_pm_ops), + }, + .id_table = ads7138_device_ids, + .probe = ads7138_probe, +}; +module_i2c_driver(ads7138_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tobias Sperling "); +MODULE_DESCRIPTION("Driver for TI ADS7138 ADCs"); From 7a2dd31359b0761c48ae44d376d2f083fc41f0ef Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 12 Feb 2025 02:52:20 -0500 Subject: [PATCH 0241/2157] iio: accel: msa311: convert to use maple tree register cache The maple tree register cache is based on a much more modern data structure than the rbtree cache and makes optimisation choices which are probably more appropriate for modern systems than those made by the rbtree cache. Signed-off-by: Bo Liu Link: https://patch.msgid.link/20250212075223.4164-2-liubo03@inspur.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/msa311.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/msa311.c b/drivers/iio/accel/msa311.c index e7fb860f3233..fe4c32a9558a 100644 --- a/drivers/iio/accel/msa311.c +++ b/drivers/iio/accel/msa311.c @@ -332,7 +332,7 @@ static const struct regmap_config msa311_regmap_config = { .wr_table = &msa311_writeable_table, .rd_table = &msa311_readable_table, .volatile_table = &msa311_volatile_table, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, }; #define MSA311_GENMASK(field) ({ \ From 58e9fe259750bdcab9b2b9aeffd48296f748c3cc Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 12 Feb 2025 02:52:21 -0500 Subject: [PATCH 0242/2157] iio: accel: bma400: convert to use maple tree register cache The maple tree register cache is based on a much more modern data structure than the rbtree cache and makes optimisation choices which are probably more appropriate for modern systems than those made by the rbtree cache. Signed-off-by: Bo Liu Link: https://patch.msgid.link/20250212075223.4164-3-liubo03@inspur.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma400_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index ae806ed60271..23f5e1ce9cc4 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -190,7 +190,7 @@ const struct regmap_config bma400_regmap_config = { .reg_bits = 8, .val_bits = 8, .max_register = BMA400_CMD_REG, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, .writeable_reg = bma400_is_writable_reg, .volatile_reg = bma400_is_volatile_reg, }; From 7ed9db68c37590e9740d05332d8b73b9bc3e98e5 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 12 Feb 2025 02:52:22 -0500 Subject: [PATCH 0243/2157] iio: accel: bmi088: convert to use maple tree register cache The maple tree register cache is based on a much more modern data structure than the rbtree cache and makes optimisation choices which are probably more appropriate for modern systems than those made by the rbtree cache. Signed-off-by: Bo Liu Link: https://patch.msgid.link/20250212075223.4164-4-liubo03@inspur.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmi088-accel-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bmi088-accel-core.c b/drivers/iio/accel/bmi088-accel-core.c index 9206fbdbf520..36e5d06ffd33 100644 --- a/drivers/iio/accel/bmi088-accel-core.c +++ b/drivers/iio/accel/bmi088-accel-core.c @@ -145,7 +145,7 @@ const struct regmap_config bmi088_regmap_conf = { .val_bits = 8, .max_register = 0x7E, .volatile_table = &bmi088_volatile_table, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, }; EXPORT_SYMBOL_NS_GPL(bmi088_regmap_conf, "IIO_BMI088"); From 9d7d7bfb45c5d8cc6cc158061e9e078889ead848 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 12 Feb 2025 02:52:23 -0500 Subject: [PATCH 0244/2157] iio: accel: kx022a: convert to use maple tree register cache The maple tree register cache is based on a much more modern data structure than the rbtree cache and makes optimisation choices which are probably more appropriate for modern systems than those made by the rbtree cache. Signed-off-by: Bo Liu Acked-by: Matti Vaittinen Link: https://patch.msgid.link/20250212075223.4164-5-liubo03@inspur.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kionix-kx022a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index 5aeb3b951ac5..3a56ab00791a 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -149,7 +149,7 @@ static const struct regmap_config kx022a_regmap_config = { .rd_noinc_table = &kx022a_nir_regs, .precious_table = &kx022a_precious_regs, .max_register = KX022A_MAX_REGISTER, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, }; /* Regmap configs kx132 */ @@ -260,7 +260,7 @@ static const struct regmap_config kx132_regmap_config = { .rd_noinc_table = &kx132_nir_regs, .precious_table = &kx132_precious_regs, .max_register = KX132_MAX_REGISTER, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_MAPLE, }; struct kx022a_data { From 6a7713ec5337d3a535a1e35a7a7a71020436770e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 11 Feb 2025 17:02:29 +0100 Subject: [PATCH 0245/2157] USB: serial: mos7840: drop unused defines Drop some defines that have never (really) been used, some of which are duplicates (the read and write requests) and others which are just bogus (the ioctl). Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7840.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index ca3da79afd23..93710b762893 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -66,29 +66,16 @@ #define MOS_WDR_TIMEOUT 5000 /* default urb timeout */ -#define MOS_PORT1 0x0200 -#define MOS_PORT2 0x0300 -#define MOS_VENREG 0x0000 -#define MOS_MAX_PORT 0x02 -#define MOS_WRITE 0x0E -#define MOS_READ 0x0D - /* Requests */ #define MCS_RD_RTYPE 0xC0 #define MCS_WR_RTYPE 0x40 #define MCS_RDREQ 0x0D #define MCS_WRREQ 0x0E -#define MCS_CTRL_TIMEOUT 500 #define VENDOR_READ_LENGTH (0x01) -#define MAX_NAME_LEN 64 - #define ZLP_REG1 0x3A /* Zero_Flag_Reg1 58 */ #define ZLP_REG5 0x3E /* Zero_Flag_Reg5 62 */ -/* For higher baud Rates use TIOCEXBAUD */ -#define TIOCEXBAUD 0x5462 - /* * Vendor id and device id defines * From a8b8a126c8573b392432b0d1b23314bda59acbfc Mon Sep 17 00:00:00 2001 From: Ed Tsai Date: Sun, 16 Feb 2025 22:42:21 +0800 Subject: [PATCH 0246/2157] dm: Enable inline crypto passthrough for striped target Added DM_TARGET_PASSES_CRYPTO feature to the striped target to utilize the hardware encryption of the underlying storage devices, preventing fallback to the crypto API. Signed-off-by: Ed Tsai Signed-off-by: Mikulas Patocka --- drivers/md/dm-stripe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 3786ac67cefe..a1b7535c508a 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -467,7 +467,7 @@ static struct target_type stripe_target = { .name = "striped", .version = {1, 7, 0}, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT | - DM_TARGET_ATOMIC_WRITES, + DM_TARGET_ATOMIC_WRITES | DM_TARGET_PASSES_CRYPTO, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, From d795e38df4b7ebac1072bbf7d8a5500c1ea83332 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:05:58 +0000 Subject: [PATCH 0247/2157] iio: core: Rework claim and release of direct mode to work with sparse. Initial thought was to do something similar to __cond_lock() do_iio_device_claim_direct_mode(iio_dev) ? : ({ __acquire(iio_dev); 0; }) + Appropriate static inline iio_device_release_direct_mode() However with that, sparse generates false positives. E.g. drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c:1811:17: warning: context imbalance in 'st_lsm6dsx_read_raw' - unexpected unlock So instead, this patch rethinks the return type and makes it more 'conditional lock like' (which is part of what is going on under the hood anyway) and return a boolean - true for successfully acquired, false for did not acquire. To allow a migration path given the rework is now non trivial, take a leaf out of the naming of the conditional guard we currently have for IIO device direct mode and drop the _mode postfix from the new functions giving iio_device_claim_direct() and iio_device_release_direct() Whilst the kernel supports __cond_acquires() upstream sparse does not yet do so. Hence rely on sparse expanding a static inline wrapper to explicitly see whether __acquire() is called. Note that even with the solution here, sparse sometimes gives false positives. However in the few cases seen they were complex code structures that benefited from simplification anyway. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-2-jic23@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 56161e02f002..5ed03e36178f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include /* IIO TODO LIST */ @@ -662,6 +663,31 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); +/* + * Helper functions that allow claim and release of direct mode + * in a fashion that doesn't generate many false positives from sparse. + * Note this must remain static inline in the header so that sparse + * can see the __acquire() marking. Revisit when sparse supports + * __cond_acquires() + */ +static inline bool iio_device_claim_direct(struct iio_dev *indio_dev) +{ + int ret = iio_device_claim_direct_mode(indio_dev); + + if (ret) + return false; + + __acquire(iio_dev); + + return true; +} + +static inline void iio_device_release_direct(struct iio_dev *indio_dev) +{ + iio_device_release_direct_mode(indio_dev); + __release(indio_dev); +} + /* * This autocleanup logic is normally used via * iio_device_claim_direct_scoped(). From 5feb5532870fbced5d6f450b8061a33f461b88ca Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:05:59 +0000 Subject: [PATCH 0248/2157] iio: chemical: scd30: Use guard(mutex) to allow early returns Auto cleanup based release of the lock allows for simpler code flow in a few functions with large multiplexing style switch statements and no common operations following the switch. Suggested-by: David Lechner Cc: Tomasz Duszynski Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-3-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/scd30_core.c | 63 ++++++++++++++----------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c index d613c54cb28d..23ba46f7ca32 100644 --- a/drivers/iio/chemical/scd30_core.c +++ b/drivers/iio/chemical/scd30_core.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Tomasz Duszynski */ #include +#include #include #include #include @@ -198,112 +199,104 @@ static int scd30_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const int *val, int *val2, long mask) { struct scd30_state *state = iio_priv(indio_dev); - int ret = -EINVAL; + int ret; u16 tmp; - mutex_lock(&state->lock); + guard(mutex)(&state->lock); switch (mask) { case IIO_CHAN_INFO_RAW: case IIO_CHAN_INFO_PROCESSED: if (chan->output) { *val = state->pressure_comp; - ret = IIO_VAL_INT; - break; + return IIO_VAL_INT; } ret = iio_device_claim_direct_mode(indio_dev); if (ret) - break; + return ret; ret = scd30_read(state); if (ret) { iio_device_release_direct_mode(indio_dev); - break; + return ret; } *val = state->meas[chan->address]; iio_device_release_direct_mode(indio_dev); - ret = IIO_VAL_INT; - break; + return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 0; *val2 = 1; - ret = IIO_VAL_INT_PLUS_MICRO; - break; + return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_SAMP_FREQ: ret = scd30_command_read(state, CMD_MEAS_INTERVAL, &tmp); if (ret) - break; + return ret; *val = 0; *val2 = 1000000000 / tmp; - ret = IIO_VAL_INT_PLUS_NANO; - break; + return IIO_VAL_INT_PLUS_NANO; case IIO_CHAN_INFO_CALIBBIAS: ret = scd30_command_read(state, CMD_TEMP_OFFSET, &tmp); if (ret) - break; + return ret; *val = tmp; - ret = IIO_VAL_INT; - break; + return IIO_VAL_INT; + default: + return -EINVAL; } - mutex_unlock(&state->lock); - - return ret; } static int scd30_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct scd30_state *state = iio_priv(indio_dev); - int ret = -EINVAL; + int ret; - mutex_lock(&state->lock); + guard(mutex)(&state->lock); switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: if (val) - break; + return -EINVAL; val = 1000000000 / val2; if (val < SCD30_MEAS_INTERVAL_MIN_S || val > SCD30_MEAS_INTERVAL_MAX_S) - break; + return -EINVAL; ret = scd30_command_write(state, CMD_MEAS_INTERVAL, val); if (ret) - break; + return ret; state->meas_interval = val; - break; + return 0; case IIO_CHAN_INFO_RAW: switch (chan->type) { case IIO_PRESSURE: if (val < SCD30_PRESSURE_COMP_MIN_MBAR || val > SCD30_PRESSURE_COMP_MAX_MBAR) - break; + return -EINVAL; ret = scd30_command_write(state, CMD_START_MEAS, val); if (ret) - break; + return ret; state->pressure_comp = val; - break; + return 0; default: - break; + return -EINVAL; } - break; case IIO_CHAN_INFO_CALIBBIAS: if (val < 0 || val > SCD30_TEMP_OFFSET_MAX) - break; + return -EINVAL; /* * Manufacturer does not explicitly specify min/max sensible * values hence check is omitted for simplicity. */ - ret = scd30_command_write(state, CMD_TEMP_OFFSET / 10, val); + return scd30_command_write(state, CMD_TEMP_OFFSET / 10, val); + default: + return -EINVAL; } - mutex_unlock(&state->lock); - - return ret; } static int scd30_write_raw_get_fmt(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, From 403f0f9b3609b88fa2beb774893867a238f51a64 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:00 +0000 Subject: [PATCH 0249/2157] iio: chemical: scd30: Switch to sparse friendly claim/release_direct() This driver caused a false positive with __cond_lock() style solution but is fine with the simple boolean return approach now used. Cc: Tomasz Duszynski Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-4-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/scd30_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c index 23ba46f7ca32..3fed6b63710f 100644 --- a/drivers/iio/chemical/scd30_core.c +++ b/drivers/iio/chemical/scd30_core.c @@ -211,18 +211,17 @@ static int scd30_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const return IIO_VAL_INT; } - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = scd30_read(state); if (ret) { - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } *val = state->meas[chan->address]; - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 0; From bcbd26d8662b75b5e602ccf71544d8bea5dded6b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:01 +0000 Subject: [PATCH 0250/2157] iio: temperature: tmp006: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Antoni Pokusinski Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-5-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tmp006.c | 33 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/iio/temperature/tmp006.c b/drivers/iio/temperature/tmp006.c index 1998047a1f24..b5c94b7492f5 100644 --- a/drivers/iio/temperature/tmp006.c +++ b/drivers/iio/temperature/tmp006.c @@ -85,19 +85,25 @@ static int tmp006_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_RAW: if (channel->type == IIO_VOLTAGE) { /* LSB is 156.25 nV */ - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = tmp006_read_measurement(data, TMP006_VOBJECT); - if (ret < 0) - return ret; - } + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = tmp006_read_measurement(data, TMP006_VOBJECT); + iio_device_release_direct(indio_dev); + if (ret < 0) + return ret; + *val = sign_extend32(ret, 15); } else if (channel->type == IIO_TEMP) { /* LSB is 0.03125 degrees Celsius */ - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = tmp006_read_measurement(data, TMP006_TAMBIENT); - if (ret < 0) - return ret; - } + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = tmp006_read_measurement(data, TMP006_TAMBIENT); + iio_device_release_direct(indio_dev); + if (ret < 0) + return ret; + *val = sign_extend32(ret, 15) >> TMP006_TAMBIENT_SHIFT; } else { break; @@ -142,9 +148,8 @@ static int tmp006_write_raw(struct iio_dev *indio_dev, for (i = 0; i < ARRAY_SIZE(tmp006_freqs); i++) if ((val == tmp006_freqs[i][0]) && (val2 == tmp006_freqs[i][1])) { - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; data->config &= ~TMP006_CONFIG_CR_MASK; data->config |= i << TMP006_CONFIG_CR_SHIFT; @@ -153,7 +158,7 @@ static int tmp006_write_raw(struct iio_dev *indio_dev, TMP006_CONFIG, data->config); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } return -EINVAL; From f238f1efc2ae9027ee0a3a2a27ef7d137cd3c973 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:02 +0000 Subject: [PATCH 0251/2157] iio: proximity: sx9310: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: Gwendal Grignou Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-6-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9310.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iio/proximity/sx9310.c b/drivers/iio/proximity/sx9310.c index 0d7f0518d4fb..b60707eba39d 100644 --- a/drivers/iio/proximity/sx9310.c +++ b/drivers/iio/proximity/sx9310.c @@ -337,19 +337,26 @@ static int sx9310_read_raw(struct iio_dev *indio_dev, int *val2, long mask) { struct sx_common_data *data = iio_priv(indio_dev); + int ret; if (chan->type != IIO_PROXIMITY) return -EINVAL; switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx_common_read_proximity(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx_common_read_proximity(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_HARDWAREGAIN: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx9310_read_gain(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx9310_read_gain(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SAMP_FREQ: return sx9310_read_samp_freq(data, val, val2); default: From ec59a125ea251280ad529a1cd29f8daab477e5ae Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:03 +0000 Subject: [PATCH 0252/2157] iio: proximity: sx9324: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context Reviewed-by: Gwendal Grignou Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-7-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9324.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index f7819dd2775c..73d972416c01 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -429,16 +429,23 @@ static int sx9324_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct sx_common_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx_common_read_proximity(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx_common_read_proximity(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_HARDWAREGAIN: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx9324_read_gain(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx9324_read_gain(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SAMP_FREQ: return sx9324_read_samp_freq(data, val, val2); default: From 7b7f7e6ee01bf93d5121805292ef810e1148727c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:04 +0000 Subject: [PATCH 0253/2157] iio: proximity: sx9360: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context Reviewed-by: Gwendal Grignou Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-8-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9360.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iio/proximity/sx9360.c b/drivers/iio/proximity/sx9360.c index a6ff16e33c1e..4448988d4e7e 100644 --- a/drivers/iio/proximity/sx9360.c +++ b/drivers/iio/proximity/sx9360.c @@ -321,16 +321,23 @@ static int sx9360_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct sx_common_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx_common_read_proximity(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx_common_read_proximity(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_HARDWAREGAIN: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return sx9360_read_gain(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = sx9360_read_gain(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SAMP_FREQ: return sx9360_read_samp_freq(data, val, val2); default: From 7c00c85a635bd458cfa07e8e59f9b467abc44777 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:05 +0000 Subject: [PATCH 0254/2157] iio: accel: adxl367: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context In some cases there is a convenient wrapper function to which the handling can be moved. Do that instead of introducing another layer of wrappers. In others an outer wrapper is added which claims direct mode, runs the original function with the scoped claim logic removed, releases direct mode and then checks for errors. Cc: Cosmin Tanislav Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-9-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 194 ++++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 88 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index a48ac0d7bd96..add4053e7a02 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -477,45 +477,42 @@ static int adxl367_set_fifo_watermark(struct adxl367_state *st, static int adxl367_set_range(struct iio_dev *indio_dev, enum adxl367_range range) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct adxl367_state *st = iio_priv(indio_dev); - int ret; + struct adxl367_state *st = iio_priv(indio_dev); + int ret; - guard(mutex)(&st->lock); + guard(mutex)(&st->lock); - ret = adxl367_set_measure_en(st, false); - if (ret) - return ret; + ret = adxl367_set_measure_en(st, false); + if (ret) + return ret; - ret = regmap_update_bits(st->regmap, ADXL367_REG_FILTER_CTL, - ADXL367_FILTER_CTL_RANGE_MASK, - FIELD_PREP(ADXL367_FILTER_CTL_RANGE_MASK, - range)); - if (ret) - return ret; + ret = regmap_update_bits(st->regmap, ADXL367_REG_FILTER_CTL, + ADXL367_FILTER_CTL_RANGE_MASK, + FIELD_PREP(ADXL367_FILTER_CTL_RANGE_MASK, + range)); + if (ret) + return ret; - adxl367_scale_act_thresholds(st, st->range, range); + adxl367_scale_act_thresholds(st, st->range, range); - /* Activity thresholds depend on range */ - ret = _adxl367_set_act_threshold(st, ADXL367_ACTIVITY, - st->act_threshold); - if (ret) - return ret; + /* Activity thresholds depend on range */ + ret = _adxl367_set_act_threshold(st, ADXL367_ACTIVITY, + st->act_threshold); + if (ret) + return ret; - ret = _adxl367_set_act_threshold(st, ADXL367_INACTIVITY, - st->inact_threshold); - if (ret) - return ret; + ret = _adxl367_set_act_threshold(st, ADXL367_INACTIVITY, + st->inact_threshold); + if (ret) + return ret; - ret = adxl367_set_measure_en(st, true); - if (ret) - return ret; + ret = adxl367_set_measure_en(st, true); + if (ret) + return ret; - st->range = range; + st->range = range; - return 0; - } - unreachable(); + return 0; } static int adxl367_time_ms_to_samples(struct adxl367_state *st, unsigned int ms) @@ -620,23 +617,20 @@ static int _adxl367_set_odr(struct adxl367_state *st, enum adxl367_odr odr) static int adxl367_set_odr(struct iio_dev *indio_dev, enum adxl367_odr odr) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct adxl367_state *st = iio_priv(indio_dev); - int ret; + struct adxl367_state *st = iio_priv(indio_dev); + int ret; - guard(mutex)(&st->lock); + guard(mutex)(&st->lock); - ret = adxl367_set_measure_en(st, false); - if (ret) - return ret; + ret = adxl367_set_measure_en(st, false); + if (ret) + return ret; - ret = _adxl367_set_odr(st, odr); - if (ret) - return ret; + ret = _adxl367_set_odr(st, odr); + if (ret) + return ret; - return adxl367_set_measure_en(st, true); - } - unreachable(); + return adxl367_set_measure_en(st, true); } static int adxl367_set_temp_adc_en(struct adxl367_state *st, unsigned int reg, @@ -725,32 +719,29 @@ static int adxl367_read_sample(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct adxl367_state *st = iio_priv(indio_dev); - u16 sample; - int ret; + struct adxl367_state *st = iio_priv(indio_dev); + u16 sample; + int ret; - guard(mutex)(&st->lock); + guard(mutex)(&st->lock); - ret = adxl367_set_temp_adc_reg_en(st, chan->address, true); - if (ret) - return ret; + ret = adxl367_set_temp_adc_reg_en(st, chan->address, true); + if (ret) + return ret; - ret = regmap_bulk_read(st->regmap, chan->address, &st->sample_buf, - sizeof(st->sample_buf)); - if (ret) - return ret; + ret = regmap_bulk_read(st->regmap, chan->address, &st->sample_buf, + sizeof(st->sample_buf)); + if (ret) + return ret; - sample = FIELD_GET(ADXL367_DATA_MASK, be16_to_cpu(st->sample_buf)); - *val = sign_extend32(sample, chan->scan_type.realbits - 1); + sample = FIELD_GET(ADXL367_DATA_MASK, be16_to_cpu(st->sample_buf)); + *val = sign_extend32(sample, chan->scan_type.realbits - 1); - ret = adxl367_set_temp_adc_reg_en(st, chan->address, false); - if (ret) - return ret; + ret = adxl367_set_temp_adc_reg_en(st, chan->address, false); + if (ret) + return ret; - return IIO_VAL_INT; - } - unreachable(); + return IIO_VAL_INT; } static int adxl367_get_status(struct adxl367_state *st, u8 *status, @@ -852,10 +843,15 @@ static int adxl367_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long info) { struct adxl367_state *st = iio_priv(indio_dev); + int ret; switch (info) { case IIO_CHAN_INFO_RAW: - return adxl367_read_sample(indio_dev, chan, val); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = adxl367_read_sample(indio_dev, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_ACCEL: { @@ -912,7 +908,12 @@ static int adxl367_write_raw(struct iio_dev *indio_dev, if (ret) return ret; - return adxl367_set_odr(indio_dev, odr); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = adxl367_set_odr(indio_dev, odr); + iio_device_release_direct(indio_dev); + return ret; } case IIO_CHAN_INFO_SCALE: { enum adxl367_range range; @@ -921,7 +922,12 @@ static int adxl367_write_raw(struct iio_dev *indio_dev, if (ret) return ret; - return adxl367_set_range(indio_dev, range); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = adxl367_set_range(indio_dev, range); + iio_device_release_direct(indio_dev); + return ret; } default: return -EINVAL; @@ -1069,13 +1075,15 @@ static int adxl367_read_event_config(struct iio_dev *indio_dev, } } -static int adxl367_write_event_config(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan, - enum iio_event_type type, - enum iio_event_direction dir, - bool state) +static int __adxl367_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + bool state) { + struct adxl367_state *st = iio_priv(indio_dev); enum adxl367_activity_type act; + int ret; switch (dir) { case IIO_EV_DIR_RISING: @@ -1088,28 +1096,38 @@ static int adxl367_write_event_config(struct iio_dev *indio_dev, return -EINVAL; } - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct adxl367_state *st = iio_priv(indio_dev); - int ret; + guard(mutex)(&st->lock); - guard(mutex)(&st->lock); + ret = adxl367_set_measure_en(st, false); + if (ret) + return ret; - ret = adxl367_set_measure_en(st, false); - if (ret) - return ret; + ret = adxl367_set_act_interrupt_en(st, act, state); + if (ret) + return ret; - ret = adxl367_set_act_interrupt_en(st, act, state); - if (ret) - return ret; + ret = adxl367_set_act_en(st, act, state ? ADCL367_ACT_REF_ENABLED + : ADXL367_ACT_DISABLED); + if (ret) + return ret; - ret = adxl367_set_act_en(st, act, state ? ADCL367_ACT_REF_ENABLED - : ADXL367_ACT_DISABLED); - if (ret) - return ret; + return adxl367_set_measure_en(st, true); +} - return adxl367_set_measure_en(st, true); - } - unreachable(); +static int adxl367_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + bool state) +{ + int ret; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = __adxl367_write_event_config(indio_dev, chan, type, dir, state); + iio_device_release_direct(indio_dev); + return ret; } static ssize_t adxl367_get_fifo_enabled(struct device *dev, From e48c56d1503c091f6b235d3a8d5f8fff4778ec6b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:06 +0000 Subject: [PATCH 0255/2157] iio: adc: ad4000: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: Marcelo Schmitt Tested-by: Marcelo Schmitt Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-10-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4000.c | 64 ++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/iio/adc/ad4000.c b/drivers/iio/adc/ad4000.c index 1d556a842a68..4fe8dee48da9 100644 --- a/drivers/iio/adc/ad4000.c +++ b/drivers/iio/adc/ad4000.c @@ -535,12 +535,16 @@ static int ad4000_read_raw(struct iio_dev *indio_dev, int *val2, long info) { struct ad4000_state *st = iio_priv(indio_dev); + int ret; switch (info) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ad4000_single_conversion(indio_dev, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = ad4000_single_conversion(indio_dev, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: *val = st->scale_tbl[st->span_comp][0]; *val2 = st->scale_tbl[st->span_comp][1]; @@ -585,36 +589,46 @@ static int ad4000_write_raw_get_fmt(struct iio_dev *indio_dev, } } -static int ad4000_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, int val, int val2, - long mask) +static int __ad4000_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val2) { struct ad4000_state *st = iio_priv(indio_dev); unsigned int reg_val; bool span_comp_en; int ret; + guard(mutex)(&st->lock); + + ret = ad4000_read_reg(st, ®_val); + if (ret < 0) + return ret; + + span_comp_en = val2 == st->scale_tbl[1][1]; + reg_val &= ~AD4000_CFG_SPAN_COMP; + reg_val |= FIELD_PREP(AD4000_CFG_SPAN_COMP, span_comp_en); + + ret = ad4000_write_reg(st, reg_val); + if (ret < 0) + return ret; + + st->span_comp = span_comp_en; + return 0; +} + +static int ad4000_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int ret; + switch (mask) { case IIO_CHAN_INFO_SCALE: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - guard(mutex)(&st->lock); - - ret = ad4000_read_reg(st, ®_val); - if (ret < 0) - return ret; - - span_comp_en = val2 == st->scale_tbl[1][1]; - reg_val &= ~AD4000_CFG_SPAN_COMP; - reg_val |= FIELD_PREP(AD4000_CFG_SPAN_COMP, span_comp_en); - - ret = ad4000_write_reg(st, reg_val); - if (ret < 0) - return ret; - - st->span_comp = span_comp_en; - return 0; - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = __ad4000_write_raw(indio_dev, chan, val2); + iio_device_release_direct(indio_dev); + return ret; default: return -EINVAL; } From b70fb3c1951e8ed03a10780322e9a77b4a3c1a66 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:07 +0000 Subject: [PATCH 0256/2157] iio: adc: ad4130: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Cosmin Tanislav Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-11-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index acc241cc0a7a..061eeb9b1f8d 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1067,13 +1067,11 @@ static int _ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel, static int ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel, int *val) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - struct ad4130_state *st = iio_priv(indio_dev); + struct ad4130_state *st = iio_priv(indio_dev); - guard(mutex)(&st->lock); - return _ad4130_read_sample(indio_dev, channel, val); - } - unreachable(); + guard(mutex)(&st->lock); + + return _ad4130_read_sample(indio_dev, channel, val); } static int ad4130_read_raw(struct iio_dev *indio_dev, @@ -1083,10 +1081,16 @@ static int ad4130_read_raw(struct iio_dev *indio_dev, struct ad4130_state *st = iio_priv(indio_dev); unsigned int channel = chan->scan_index; struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup; + int ret; switch (info) { case IIO_CHAN_INFO_RAW: - return ad4130_read_sample(indio_dev, channel, val); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = ad4130_read_sample(indio_dev, channel, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: { guard(mutex)(&st->lock); *val = st->scale_tbls[setup_info->ref_sel][setup_info->pga][0]; From 20a57c2714a839c55f8d7e813432f4aa8c1dedbc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:08 +0000 Subject: [PATCH 0257/2157] iio: adc: ad4695: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. In some cases code is factored out to utility functions that can do a direct return with the claim and release around the call. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-12-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 293 +++++++++++++++++++++------------------ 1 file changed, 159 insertions(+), 134 deletions(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 3a1a6f96480f..9dbf326b6273 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -1029,6 +1029,25 @@ static int ad4695_read_one_sample(struct ad4695_state *st, unsigned int address) return spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers)); } +static int __ad4695_read_info_raw(struct ad4695_state *st, + struct iio_chan_spec const *chan, + int *val) +{ + u8 realbits = chan->scan_type.realbits; + int ret; + + ret = ad4695_read_one_sample(st, chan->address); + if (ret) + return ret; + + if (chan->scan_type.sign == 's') + *val = sign_extend32(st->raw_data, realbits - 1); + else + *val = st->raw_data; + + return IIO_VAL_INT; +} + static int ad4695_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -1049,19 +1068,12 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = ad4695_read_one_sample(st, chan->address); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; - if (scan_type->sign == 's') - *val = sign_extend32(st->raw_data, realbits - 1); - else - *val = st->raw_data; - - return IIO_VAL_INT; - } - unreachable(); + ret = __ad4695_read_info_raw(st, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_VOLTAGE: @@ -1099,63 +1111,62 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_CALIBSCALE: switch (chan->type) { case IIO_VOLTAGE: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = regmap_read(st->regmap16, - AD4695_REG_GAIN_IN(chan->scan_index), - ®_val); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = regmap_read(st->regmap16, + AD4695_REG_GAIN_IN(chan->scan_index), + ®_val); + iio_device_release_direct(indio_dev); + if (ret) + return ret; + *val = reg_val; + *val2 = 15; - *val = reg_val; - *val2 = 15; - - return IIO_VAL_FRACTIONAL_LOG2; - } - unreachable(); + return IIO_VAL_FRACTIONAL_LOG2; default: return -EINVAL; } case IIO_CHAN_INFO_CALIBBIAS: - switch (chan->type) { - case IIO_VOLTAGE: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = regmap_read(st->regmap16, - AD4695_REG_OFFSET_IN(chan->scan_index), - ®_val); - if (ret) - return ret; + switch (chan->type) + case IIO_VOLTAGE: { + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = regmap_read(st->regmap16, + AD4695_REG_OFFSET_IN(chan->scan_index), + ®_val); + iio_device_release_direct(indio_dev); + if (ret) + return ret; - tmp = sign_extend32(reg_val, 15); + tmp = sign_extend32(reg_val, 15); - switch (cfg->oversampling_ratio) { - case 1: - *val = tmp / 4; - *val2 = abs(tmp) % 4 * MICRO / 4; - break; - case 4: - *val = tmp / 2; - *val2 = abs(tmp) % 2 * MICRO / 2; - break; - case 16: - *val = tmp; - *val2 = 0; - break; - case 64: - *val = tmp * 2; - *val2 = 0; - break; - default: - return -EINVAL; - } - - if (tmp < 0 && *val2) { - *val *= -1; - *val2 *= -1; - } - - return IIO_VAL_INT_PLUS_MICRO; + switch (osr) { + case 1: + *val = tmp / 4; + *val2 = abs(tmp) % 4 * MICRO / 4; + break; + case 4: + *val = tmp / 2; + *val2 = abs(tmp) % 2 * MICRO / 2; + break; + case 16: + *val = tmp; + *val2 = 0; + break; + case 64: + *val = tmp * 2; + *val2 = 0; + break; + default: + return -EINVAL; } - unreachable(); + + if (tmp < 0 && *val2) { + *val *= -1; + *val2 *= -1; + } + + return IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; } @@ -1255,72 +1266,83 @@ static unsigned int ad4695_get_calibbias(int val, int val2, int osr) return clamp_t(int, val_calc, S16_MIN, S16_MAX); } -static int ad4695_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, int val2, long mask) +static int __ad4695_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) { struct ad4695_state *st = iio_priv(indio_dev); unsigned int reg_val; unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - switch (mask) { - case IIO_CHAN_INFO_CALIBSCALE: - switch (chan->type) { - case IIO_VOLTAGE: - if (val < 0 || val2 < 0) - reg_val = 0; - else if (val > 1) - reg_val = U16_MAX; - else - reg_val = (val * (1 << 16) + - mul_u64_u32_div(val2, 1 << 16, - MICRO)) / 2; + switch (mask) { + case IIO_CHAN_INFO_CALIBSCALE: + switch (chan->type) { + case IIO_VOLTAGE: + if (val < 0 || val2 < 0) + reg_val = 0; + else if (val > 1) + reg_val = U16_MAX; + else + reg_val = (val * (1 << 16) + + mul_u64_u32_div(val2, 1 << 16, + MICRO)) / 2; - return regmap_write(st->regmap16, - AD4695_REG_GAIN_IN(chan->scan_index), - reg_val); - default: - return -EINVAL; - } - case IIO_CHAN_INFO_CALIBBIAS: - switch (chan->type) { - case IIO_VOLTAGE: - reg_val = ad4695_get_calibbias(val, val2, osr); - return regmap_write(st->regmap16, - AD4695_REG_OFFSET_IN(chan->scan_index), - reg_val); - default: - return -EINVAL; - } - case IIO_CHAN_INFO_SAMP_FREQ: { - struct pwm_state state; - /* - * Limit the maximum acceptable sample rate according to - * the channel's oversampling ratio. - */ - u64 max_osr_rate = DIV_ROUND_UP_ULL(st->chip_info->max_sample_rate, - osr); - - if (val <= 0 || val > max_osr_rate) - return -EINVAL; - - guard(mutex)(&st->cnv_pwm_lock); - pwm_get_state(st->cnv_pwm, &state); - /* - * The required sample frequency for a given OSR is the - * input frequency multiplied by it. - */ - state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, val * osr); - return pwm_apply_might_sleep(st->cnv_pwm, &state); - } - case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - return ad4695_set_osr_val(st, chan, val); + return regmap_write(st->regmap16, + AD4695_REG_GAIN_IN(chan->scan_index), + reg_val); default: return -EINVAL; } + case IIO_CHAN_INFO_CALIBBIAS: + switch (chan->type) { + case IIO_VOLTAGE: + reg_val = ad4695_get_calibbias(val, val2, osr); + return regmap_write(st->regmap16, + AD4695_REG_OFFSET_IN(chan->scan_index), + reg_val); + default: + return -EINVAL; + } + case IIO_CHAN_INFO_SAMP_FREQ: { + struct pwm_state state; + /* + * Limit the maximum acceptable sample rate according to + * the channel's oversampling ratio. + */ + u64 max_osr_rate = DIV_ROUND_UP_ULL(st->chip_info->max_sample_rate, + osr); + + if (val <= 0 || val > max_osr_rate) + return -EINVAL; + + guard(mutex)(&st->cnv_pwm_lock); + pwm_get_state(st->cnv_pwm, &state); + /* + * The required sample frequency for a given OSR is the + * input frequency multiplied by it. + */ + state.period = DIV_ROUND_UP_ULL(NSEC_PER_SEC, val * osr); + return pwm_apply_might_sleep(st->cnv_pwm, &state); } - unreachable(); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return ad4695_set_osr_val(st, chan, val); + default: + return -EINVAL; + } +} + +static int ad4695_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int ret; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = __ad4695_write_raw(indio_dev, chan, val, val2, mask); + iio_device_release_direct(indio_dev); + + return ret; } static int ad4695_read_avail(struct iio_dev *indio_dev, @@ -1417,26 +1439,29 @@ static int ad4695_debugfs_reg_access(struct iio_dev *indio_dev, unsigned int *readval) { struct ad4695_state *st = iio_priv(indio_dev); + int ret = -EINVAL; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - if (readval) { - if (regmap_check_range_table(st->regmap, reg, - &ad4695_regmap_rd_table)) - return regmap_read(st->regmap, reg, readval); - if (regmap_check_range_table(st->regmap16, reg, - &ad4695_regmap16_rd_table)) - return regmap_read(st->regmap16, reg, readval); - } else { - if (regmap_check_range_table(st->regmap, reg, - &ad4695_regmap_wr_table)) - return regmap_write(st->regmap, reg, writeval); - if (regmap_check_range_table(st->regmap16, reg, - &ad4695_regmap16_wr_table)) - return regmap_write(st->regmap16, reg, writeval); - } + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + if (readval) { + if (regmap_check_range_table(st->regmap, reg, + &ad4695_regmap_rd_table)) + ret = regmap_read(st->regmap, reg, readval); + if (regmap_check_range_table(st->regmap16, reg, + &ad4695_regmap16_rd_table)) + ret = regmap_read(st->regmap16, reg, readval); + } else { + if (regmap_check_range_table(st->regmap, reg, + &ad4695_regmap_wr_table)) + ret = regmap_write(st->regmap, reg, writeval); + if (regmap_check_range_table(st->regmap16, reg, + &ad4695_regmap16_wr_table)) + ret = regmap_write(st->regmap16, reg, writeval); } + iio_device_release_direct(indio_dev); - return -EINVAL; + return ret; } static int ad4695_get_current_scan_type(const struct iio_dev *indio_dev, From 8a1812d040c0059378fe8687d68a8b6a90493de1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:09 +0000 Subject: [PATCH 0258/2157] iio: adc: ad7606: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Guillaume Stols Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-13-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 612fe73396d7..1e8b03a518b8 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -762,13 +762,13 @@ static int ad7606_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = ad7606_scan_direct(indio_dev, chan->address, val); - if (ret < 0) - return ret; - return IIO_VAL_INT; - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ad7606_scan_direct(indio_dev, chan->address, val); + iio_device_release_direct(indio_dev); + if (ret < 0) + return ret; + return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: if (st->sw_mode_en) ch = chan->address; From 48a24fd21d118a4ee91f91ccd8ab142034096518 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:10 +0000 Subject: [PATCH 0259/2157] iio: adc: ad7625: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: Trevor Gamblin Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-14-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7625.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7625.c b/drivers/iio/adc/ad7625.c index adb4e25dd7ea..0466c0c7eae4 100644 --- a/drivers/iio/adc/ad7625.c +++ b/drivers/iio/adc/ad7625.c @@ -248,12 +248,15 @@ static int ad7625_write_raw(struct iio_dev *indio_dev, int val, int val2, long info) { struct ad7625_state *st = iio_priv(indio_dev); + int ret; switch (info) { case IIO_CHAN_INFO_SAMP_FREQ: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ad7625_set_sampling_freq(st, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ad7625_set_sampling_freq(st, val); + iio_device_release_direct(indio_dev); + return ret; default: return -EINVAL; } From 82a0760c109f3aa3314f44af229005e655a85527 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:11 +0000 Subject: [PATCH 0260/2157] iio: adc: ad7779: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Ramona Alexandra Nechita Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-15-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7779.c | 103 ++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 40 deletions(-) diff --git a/drivers/iio/adc/ad7779.c b/drivers/iio/adc/ad7779.c index 2537dab69a35..a5d87faa5e12 100644 --- a/drivers/iio/adc/ad7779.c +++ b/drivers/iio/adc/ad7779.c @@ -467,59 +467,82 @@ static int ad7779_set_calibbias(struct ad7779_state *st, int channel, int val) calibbias[2]); } -static int ad7779_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, int *val, - int *val2, long mask) +static int __ad7779_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) { struct ad7779_state *st = iio_priv(indio_dev); int ret; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - switch (mask) { - case IIO_CHAN_INFO_CALIBSCALE: - ret = ad7779_get_calibscale(st, chan->channel); - if (ret < 0) - return ret; - *val = ret; - *val2 = GAIN_REL; - return IIO_VAL_FRACTIONAL; - case IIO_CHAN_INFO_CALIBBIAS: - ret = ad7779_get_calibbias(st, chan->channel); - if (ret < 0) - return ret; - *val = ret; - return IIO_VAL_INT; - case IIO_CHAN_INFO_SAMP_FREQ: - *val = st->sampling_freq; - if (*val < 0) - return -EINVAL; - return IIO_VAL_INT; - default: + switch (mask) { + case IIO_CHAN_INFO_CALIBSCALE: + ret = ad7779_get_calibscale(st, chan->channel); + if (ret < 0) + return ret; + *val = ret; + *val2 = GAIN_REL; + return IIO_VAL_FRACTIONAL; + case IIO_CHAN_INFO_CALIBBIAS: + ret = ad7779_get_calibbias(st, chan->channel); + if (ret < 0) + return ret; + *val = ret; + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->sampling_freq; + if (*val < 0) return -EINVAL; - } + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7779_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) +{ + int ret; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = __ad7779_read_raw(indio_dev, chan, val, val2, mask); + iio_device_release_direct(indio_dev); + return ret; +} + +static int __ad7779_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, + long mask) +{ + struct ad7779_state *st = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_CALIBSCALE: + return ad7779_set_calibscale(st, chan->channel, val2); + case IIO_CHAN_INFO_CALIBBIAS: + return ad7779_set_calibbias(st, chan->channel, val); + case IIO_CHAN_INFO_SAMP_FREQ: + return ad7779_set_sampling_frequency(st, val); + default: + return -EINVAL; } - unreachable(); } static int ad7779_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { - struct ad7779_state *st = iio_priv(indio_dev); + int ret; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - switch (mask) { - case IIO_CHAN_INFO_CALIBSCALE: - return ad7779_set_calibscale(st, chan->channel, val2); - case IIO_CHAN_INFO_CALIBBIAS: - return ad7779_set_calibbias(st, chan->channel, val); - case IIO_CHAN_INFO_SAMP_FREQ: - return ad7779_set_sampling_frequency(st, val); - default: - return -EINVAL; - } - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = __ad7779_write_raw(indio_dev, chan, val, val2, mask); + iio_device_release_direct(indio_dev); + return ret; } static int ad7779_buffer_preenable(struct iio_dev *indio_dev) From 7b22c000308a3d8dc24da736e08b98892ae246bc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:12 +0000 Subject: [PATCH 0261/2157] iio: adc: ad9467: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Also use guard() to simplify mutex unlock paths. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-16-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad9467.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c index f30119b42ba0..f7a9f46ea0dc 100644 --- a/drivers/iio/adc/ad9467.c +++ b/drivers/iio/adc/ad9467.c @@ -813,6 +813,18 @@ static int ad9467_read_raw(struct iio_dev *indio_dev, } } +static int __ad9467_update_clock(struct ad9467_state *st, long r_clk) +{ + int ret; + + ret = clk_set_rate(st->clk, r_clk); + if (ret) + return ret; + + guard(mutex)(&st->lock); + return ad9467_calibrate(st); +} + static int ad9467_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) @@ -842,14 +854,11 @@ static int ad9467_write_raw(struct iio_dev *indio_dev, if (sample_rate == r_clk) return 0; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = clk_set_rate(st->clk, r_clk); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; - guard(mutex)(&st->lock); - ret = ad9467_calibrate(st); - } + ret = __ad9467_update_clock(st, r_clk); + iio_device_release_direct(indio_dev); return ret; default: return -EINVAL; From 5fd89f430d9e97b06ca242a59ca69819f3e6a85d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:13 +0000 Subject: [PATCH 0262/2157] iio: adc: max1363: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-17-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1363.c | 163 ++++++++++++++++++++------------------ 1 file changed, 88 insertions(+), 75 deletions(-) diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c index e8d731bc34e0..35717ec082ce 100644 --- a/drivers/iio/adc/max1363.c +++ b/drivers/iio/adc/max1363.c @@ -364,55 +364,52 @@ static int max1363_read_single_chan(struct iio_dev *indio_dev, int *val, long m) { - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - s32 data; - u8 rxbuf[2]; - struct max1363_state *st = iio_priv(indio_dev); - struct i2c_client *client = st->client; + s32 data; + u8 rxbuf[2]; + struct max1363_state *st = iio_priv(indio_dev); + struct i2c_client *client = st->client; - guard(mutex)(&st->lock); + guard(mutex)(&st->lock); - /* - * If monitor mode is enabled, the method for reading a single - * channel will have to be rather different and has not yet - * been implemented. - * - * Also, cannot read directly if buffered capture enabled. - */ - if (st->monitor_on) - return -EBUSY; + /* + * If monitor mode is enabled, the method for reading a single + * channel will have to be rather different and has not yet + * been implemented. + * + * Also, cannot read directly if buffered capture enabled. + */ + if (st->monitor_on) + return -EBUSY; - /* Check to see if current scan mode is correct */ - if (st->current_mode != &max1363_mode_table[chan->address]) { - int ret; + /* Check to see if current scan mode is correct */ + if (st->current_mode != &max1363_mode_table[chan->address]) { + int ret; - /* Update scan mode if needed */ - st->current_mode = &max1363_mode_table[chan->address]; - ret = max1363_set_scan_mode(st); - if (ret < 0) - return ret; - } - if (st->chip_info->bits != 8) { - /* Get reading */ - data = st->recv(client, rxbuf, 2); - if (data < 0) - return data; - - data = get_unaligned_be16(rxbuf) & - ((1 << st->chip_info->bits) - 1); - } else { - /* Get reading */ - data = st->recv(client, rxbuf, 1); - if (data < 0) - return data; - - data = rxbuf[0]; - } - *val = data; - - return 0; + /* Update scan mode if needed */ + st->current_mode = &max1363_mode_table[chan->address]; + ret = max1363_set_scan_mode(st); + if (ret < 0) + return ret; } - unreachable(); + if (st->chip_info->bits != 8) { + /* Get reading */ + data = st->recv(client, rxbuf, 2); + if (data < 0) + return data; + + data = get_unaligned_be16(rxbuf) & + ((1 << st->chip_info->bits) - 1); + } else { + /* Get reading */ + data = st->recv(client, rxbuf, 1); + if (data < 0) + return data; + + data = rxbuf[0]; + } + *val = data; + + return 0; } static int max1363_read_raw(struct iio_dev *indio_dev, @@ -426,7 +423,11 @@ static int max1363_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = max1363_read_single_chan(indio_dev, chan, val, m); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; return IIO_VAL_INT; @@ -947,46 +948,58 @@ static inline int __max1363_check_event_mask(int thismask, int checkmask) return ret; } +static int __max1363_write_event_config(struct max1363_state *st, + const struct iio_chan_spec *chan, + enum iio_event_direction dir, bool state) +{ + int number = chan->channel; + u16 unifiedmask; + int ret; + + guard(mutex)(&st->lock); + + unifiedmask = st->mask_low | st->mask_high; + if (dir == IIO_EV_DIR_FALLING) { + + if (state == 0) + st->mask_low &= ~(1 << number); + else { + ret = __max1363_check_event_mask((1 << number), + unifiedmask); + if (ret) + return ret; + st->mask_low |= (1 << number); + } + } else { + if (state == 0) + st->mask_high &= ~(1 << number); + else { + ret = __max1363_check_event_mask((1 << number), + unifiedmask); + if (ret) + return ret; + st->mask_high |= (1 << number); + } + } + + return 0; + +} static int max1363_write_event_config(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, enum iio_event_type type, enum iio_event_direction dir, bool state) { struct max1363_state *st = iio_priv(indio_dev); + int ret; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - int number = chan->channel; - u16 unifiedmask; - int ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; - guard(mutex)(&st->lock); - - unifiedmask = st->mask_low | st->mask_high; - if (dir == IIO_EV_DIR_FALLING) { - - if (state == 0) - st->mask_low &= ~(1 << number); - else { - ret = __max1363_check_event_mask((1 << number), - unifiedmask); - if (ret) - return ret; - st->mask_low |= (1 << number); - } - } else { - if (state == 0) - st->mask_high &= ~(1 << number); - else { - ret = __max1363_check_event_mask((1 << number), - unifiedmask); - if (ret) - return ret; - st->mask_high |= (1 << number); - } - } - } + ret = __max1363_write_event_config(st, chan, dir, state); + iio_device_release_direct(indio_dev); max1363_monitor_mode_update(st, !!(st->mask_high | st->mask_low)); - return 0; + return ret; } /* From 27ac40b6275d223def7c7102efd7d61ccab2b07a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:14 +0000 Subject: [PATCH 0263/2157] iio: adc: rtq6056: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context Reviewed-by: ChiYuan Huang Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-18-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rtq6056.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/iio/adc/rtq6056.c b/drivers/iio/adc/rtq6056.c index 337bc8b31b2c..54239df61d86 100644 --- a/drivers/iio/adc/rtq6056.c +++ b/drivers/iio/adc/rtq6056.c @@ -514,26 +514,37 @@ static int rtq6056_adc_read_avail(struct iio_dev *indio_dev, } } -static int rtq6056_adc_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, int val, - int val2, long mask) +static int __rtq6056_adc_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + long mask) { struct rtq6056_priv *priv = iio_priv(indio_dev); const struct richtek_dev_data *devdata = priv->devdata; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - switch (mask) { - case IIO_CHAN_INFO_SAMP_FREQ: - if (devdata->fixed_samp_freq) - return -EINVAL; - return rtq6056_adc_set_samp_freq(priv, chan, val); - case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - return devdata->set_average(priv, val); - default: + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + if (devdata->fixed_samp_freq) return -EINVAL; - } + return rtq6056_adc_set_samp_freq(priv, chan, val); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + return devdata->set_average(priv, val); + default: + return -EINVAL; } - unreachable(); +} + +static int rtq6056_adc_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, + int val2, long mask) +{ + int ret; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = __rtq6056_adc_write_raw(indio_dev, chan, val, mask); + iio_device_release_direct(indio_dev); + return ret; } static const char *rtq6056_channel_labels[RTQ6056_MAX_CHANNEL] = { @@ -590,9 +601,8 @@ static ssize_t shunt_resistor_store(struct device *dev, struct rtq6056_priv *priv = iio_priv(indio_dev); int val, val_fract, ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = iio_str_to_fixpoint(buf, 100000, &val, &val_fract); if (ret) @@ -601,7 +611,7 @@ static ssize_t shunt_resistor_store(struct device *dev, ret = rtq6056_set_shunt_resistor(priv, val * 1000000 + val_fract); out_store: - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret ?: len; } From dc100956600c59e7ce802cf93f56804ebffb641d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:15 +0000 Subject: [PATCH 0264/2157] iio: adc: ti-adc161s626: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-19-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-adc161s626.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ti-adc161s626.c b/drivers/iio/adc/ti-adc161s626.c index 474e733fb8e0..28aa6b80160c 100644 --- a/drivers/iio/adc/ti-adc161s626.c +++ b/drivers/iio/adc/ti-adc161s626.c @@ -137,13 +137,13 @@ static int ti_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = ti_adc_read_measurement(data, chan, val); - if (ret) - return ret; - return IIO_VAL_INT; - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ti_adc_read_measurement(data, chan, val); + iio_device_release_direct(indio_dev); + if (ret) + return ret; + return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: ret = regulator_get_voltage(data->ref); if (ret < 0) From 69deb972f9aadaf33edea89e4d9a53c84fe3aa18 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:16 +0000 Subject: [PATCH 0265/2157] iio: adc: ti-ads1119: Stop using iio_device_claim_direct_scoped() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: João Paulo Gonçalves Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-20-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads1119.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ti-ads1119.c b/drivers/iio/adc/ti-ads1119.c index de019b3faa48..f120e7e21cff 100644 --- a/drivers/iio/adc/ti-ads1119.c +++ b/drivers/iio/adc/ti-ads1119.c @@ -336,19 +336,24 @@ static int ads1119_read_raw(struct iio_dev *indio_dev, { struct ads1119_state *st = iio_priv(indio_dev); unsigned int index = chan->address; + int ret; if (index >= st->num_channels_cfg) return -EINVAL; switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ads1119_single_conversion(st, chan, val, false); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ads1119_single_conversion(st, chan, val, false); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_OFFSET: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ads1119_single_conversion(st, chan, val, true); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ads1119_single_conversion(st, chan, val, true); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_uV / 1000; *val /= st->channels_cfg[index].gain; From e4c569742b600dad560bbf62dd4d4f53cd7a19c7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:17 +0000 Subject: [PATCH 0266/2157] iio: addac: ad74413r: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Includes moving a mutex lock into a function rather than around it to simplify the error handling. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-21-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index daea2bde7acf..f14d12b03da6 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -826,6 +826,8 @@ static int _ad74413r_get_single_adc_result(struct ad74413r_state *st, unsigned int uval; int ret; + guard(mutex)(&st->lock); + reinit_completion(&st->adc_data_completion); ret = ad74413r_set_adc_channel_enable(st, channel, true); @@ -865,12 +867,14 @@ static int ad74413r_get_single_adc_result(struct iio_dev *indio_dev, unsigned int channel, int *val) { struct ad74413r_state *st = iio_priv(indio_dev); + int ret; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - guard(mutex)(&st->lock); - return _ad74413r_get_single_adc_result(st, channel, val); - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = _ad74413r_get_single_adc_result(st, channel, val); + iio_device_release_direct(indio_dev); + return ret; } static void ad74413r_adc_to_resistance_result(int adc_result, int *val) From 5e802eed70b140dd267811f881c345c2eda6cd0d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:18 +0000 Subject: [PATCH 0267/2157] iio: chemical: ens160: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: Gustavo Silva Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-22-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/ens160_core.c | 36 +++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/iio/chemical/ens160_core.c b/drivers/iio/chemical/ens160_core.c index 48d5ad2075b6..152f81ff57e3 100644 --- a/drivers/iio/chemical/ens160_core.c +++ b/drivers/iio/chemical/ens160_core.c @@ -100,25 +100,35 @@ static const struct iio_chan_spec ens160_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(2), }; -static int ens160_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int *val, int *val2, long mask) +static int __ens160_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val) { struct ens160_data *data = iio_priv(indio_dev); int ret; + guard(mutex)(&data->mutex); + ret = regmap_bulk_read(data->regmap, chan->address, + &data->buf, sizeof(data->buf)); + if (ret) + return ret; + *val = le16_to_cpu(data->buf); + return IIO_VAL_INT; +} + +static int ens160_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + int ret; + switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - guard(mutex)(&data->mutex); - ret = regmap_bulk_read(data->regmap, chan->address, - &data->buf, sizeof(data->buf)); - if (ret) - return ret; - *val = le16_to_cpu(data->buf); - return IIO_VAL_INT; - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = __ens160_read_raw(indio_dev, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: switch (chan->channel2) { case IIO_MOD_CO2: From 798fa301e19ffc8fc23e5d8c9ea2078e5bc9796a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:19 +0000 Subject: [PATCH 0268/2157] iio: dac: ad3552r-hs: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Angelo Dureghello Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-23-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r-hs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iio/dac/ad3552r-hs.c b/drivers/iio/dac/ad3552r-hs.c index c1dae58c1975..cd8dabb60c55 100644 --- a/drivers/iio/dac/ad3552r-hs.c +++ b/drivers/iio/dac/ad3552r-hs.c @@ -129,16 +129,19 @@ static int ad3552r_hs_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct ad3552r_hs_state *st = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; /* For RAW accesses, stay always in simple-spi. */ - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - return st->data->bus_reg_write(st->back, - AD3552R_REG_ADDR_CH_DAC_16B(chan->channel), - val, 2); - } - unreachable(); + ret = st->data->bus_reg_write(st->back, + AD3552R_REG_ADDR_CH_DAC_16B(chan->channel), + val, 2); + + iio_device_release_direct(indio_dev); + return ret; default: return -EINVAL; } From 41a316c8e531bc14d84918d29f3cac04caa2f003 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:20 +0000 Subject: [PATCH 0269/2157] iio: dac: ad8460: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Mariel Tinaco Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-24-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad8460.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/iio/dac/ad8460.c b/drivers/iio/dac/ad8460.c index 535ee3105af6..6e45686902dd 100644 --- a/drivers/iio/dac/ad8460.c +++ b/drivers/iio/dac/ad8460.c @@ -264,9 +264,12 @@ static ssize_t ad8460_write_toggle_en(struct iio_dev *indio_dev, uintptr_t priva if (ret) return ret; - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ad8460_enable_apg_mode(state, toggle_en); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = ad8460_enable_apg_mode(state, toggle_en); + iio_device_release_direct(indio_dev); + return ret; } static ssize_t ad8460_read_powerdown(struct iio_dev *indio_dev, uintptr_t private, @@ -421,14 +424,17 @@ static int ad8460_write_raw(struct iio_dev *indio_dev, long mask) { struct ad8460_state *state = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: switch (chan->type) { case IIO_VOLTAGE: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return ad8460_set_sample(state, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = ad8460_set_sample(state, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CURRENT: return regmap_write(state->regmap, AD8460_CTRL_REG(0x04), FIELD_PREP(AD8460_QUIESCENT_CURRENT_MSK, val)); From 73dad3ec96ae3f2da947ff0396c2910bd73f00a2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:21 +0000 Subject: [PATCH 0270/2157] iio: dummy: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Introduce two new utility functions to allow for direct returns with claim and release of direct mode in the caller. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-25-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/dummy/iio_simple_dummy.c | 119 ++++++++++++++++----------- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c index 09efacaf8f78..8575d4a08963 100644 --- a/drivers/iio/dummy/iio_simple_dummy.c +++ b/drivers/iio/dummy/iio_simple_dummy.c @@ -267,6 +267,65 @@ static const struct iio_chan_spec iio_dummy_channels[] = { }, }; +static int __iio_dummy_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val) +{ + struct iio_dummy_state *st = iio_priv(indio_dev); + + guard(mutex)(&st->lock); + switch (chan->type) { + case IIO_VOLTAGE: + if (chan->output) { + /* Set integer part to cached value */ + *val = st->dac_val; + return IIO_VAL_INT; + } else if (chan->differential) { + if (chan->channel == 1) + *val = st->differential_adc_val[0]; + else + *val = st->differential_adc_val[1]; + return IIO_VAL_INT; + } else { + *val = st->single_ended_adc_val; + return IIO_VAL_INT; + } + + case IIO_ACCEL: + *val = st->accel_val; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int __iio_dummy_read_processed(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val) +{ + struct iio_dummy_state *st = iio_priv(indio_dev); + + guard(mutex)(&st->lock); + switch (chan->type) { + case IIO_STEPS: + *val = st->steps; + return IIO_VAL_INT; + case IIO_ACTIVITY: + switch (chan->channel2) { + case IIO_MOD_RUNNING: + *val = st->activity_running; + return IIO_VAL_INT; + case IIO_MOD_WALKING: + *val = st->activity_walking; + return IIO_VAL_INT; + default: + return -EINVAL; + } + default: + return -EINVAL; + } +} + /** * iio_dummy_read_raw() - data read function. * @indio_dev: the struct iio_dev associated with this device instance @@ -283,59 +342,21 @@ static int iio_dummy_read_raw(struct iio_dev *indio_dev, long mask) { struct iio_dummy_state *st = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: /* magic value - channel value read */ - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - guard(mutex)(&st->lock); - switch (chan->type) { - case IIO_VOLTAGE: - if (chan->output) { - /* Set integer part to cached value */ - *val = st->dac_val; - return IIO_VAL_INT; - } else if (chan->differential) { - if (chan->channel == 1) - *val = st->differential_adc_val[0]; - else - *val = st->differential_adc_val[1]; - return IIO_VAL_INT; - } else { - *val = st->single_ended_adc_val; - return IIO_VAL_INT; - } - - case IIO_ACCEL: - *val = st->accel_val; - return IIO_VAL_INT; - default: - return -EINVAL; - } - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = __iio_dummy_read_raw(indio_dev, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_PROCESSED: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - guard(mutex)(&st->lock); - switch (chan->type) { - case IIO_STEPS: - *val = st->steps; - return IIO_VAL_INT; - case IIO_ACTIVITY: - switch (chan->channel2) { - case IIO_MOD_RUNNING: - *val = st->activity_running; - return IIO_VAL_INT; - case IIO_MOD_WALKING: - *val = st->activity_walking; - return IIO_VAL_INT; - default: - return -EINVAL; - } - default: - return -EINVAL; - } - } - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = __iio_dummy_read_processed(indio_dev, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_OFFSET: /* only single ended adc -> 7 */ *val = 7; From 0bee1bf85a9ec3d4db855eb0bb9ee4cba5dfbe13 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:22 +0000 Subject: [PATCH 0271/2157] iio: imu: bmi323: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Cc: Julien Stephan Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-26-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi323/bmi323_core.c | 44 ++++++++++++++++------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c index 7f386c5e58b4..fc54d464a3ae 100644 --- a/drivers/iio/imu/bmi323/bmi323_core.c +++ b/drivers/iio/imu/bmi323/bmi323_core.c @@ -1702,26 +1702,30 @@ static int bmi323_write_raw(struct iio_dev *indio_dev, int val2, long mask) { struct bmi323_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return bmi323_set_odr(data, - bmi323_iio_to_sensor(chan->type), - val, val2); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi323_set_odr(data, bmi323_iio_to_sensor(chan->type), + val, val2); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return bmi323_set_scale(data, - bmi323_iio_to_sensor(chan->type), - val, val2); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi323_set_scale(data, bmi323_iio_to_sensor(chan->type), + val, val2); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) - return bmi323_set_average(data, - bmi323_iio_to_sensor(chan->type), - val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi323_set_average(data, bmi323_iio_to_sensor(chan->type), + val); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_ENABLE: return bmi323_enable_steps(data, val); case IIO_CHAN_INFO_PROCESSED: { @@ -1747,6 +1751,7 @@ static int bmi323_read_raw(struct iio_dev *indio_dev, int *val2, long mask) { struct bmi323_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_PROCESSED: @@ -1755,10 +1760,11 @@ static int bmi323_read_raw(struct iio_dev *indio_dev, switch (chan->type) { case IIO_ACCEL: case IIO_ANGL_VEL: - iio_device_claim_direct_scoped(return -EBUSY, - indio_dev) - return bmi323_read_axis(data, chan, val); - unreachable(); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi323_read_axis(data, chan, val); + iio_device_release_direct(indio_dev); + return ret; case IIO_TEMP: return bmi323_get_temp_data(data, val); default: From 668d7167fc78f5fe59f0aad1e4f2705c8c9bd6cb Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:23 +0000 Subject: [PATCH 0272/2157] iio: light: bh1745: Stop using iio_device_claim_direct_scoped() This complex cleanup.h use case of conditional guards has proved to be more trouble that it is worth in terms of false positive compiler warnings and hard to read code. Move directly to the new claim/release_direct() that allow sparse to check for unbalanced context. Reviewed-by: Mudit Sharma Reviewed-by: David Lechner Link: https://patch.msgid.link/20250209180624.701140-27-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/light/bh1745.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iio/light/bh1745.c b/drivers/iio/light/bh1745.c index 3b4056be54a0..56ab5fe90ff9 100644 --- a/drivers/iio/light/bh1745.c +++ b/drivers/iio/light/bh1745.c @@ -426,16 +426,16 @@ static int bh1745_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - iio_device_claim_direct_scoped(return -EBUSY, indio_dev) { - ret = regmap_bulk_read(data->regmap, chan->address, - &value, 2); - if (ret) - return ret; - *val = value; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; - return IIO_VAL_INT; - } - unreachable(); + ret = regmap_bulk_read(data->regmap, chan->address, &value, 2); + iio_device_release_direct(indio_dev); + if (ret) + return ret; + *val = value; + + return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: { guard(mutex)(&data->lock); From 4c571885898c5c98934d086f2ab11b5e27e4f41f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 9 Feb 2025 18:06:24 +0000 Subject: [PATCH 0273/2157] iio: Drop iio_device_claim_direct_scoped() and related infrastructure Scoped conditional automated cleanup turned out to be harder to work with than expected. Despite several attempts to find a better solution non have surfaced. As such rip it out of the IIO code. Reviewed-by: David Lechner Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20250209180624.701140-28-jic23@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 5ed03e36178f..07a0e8132e88 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -688,32 +687,6 @@ static inline void iio_device_release_direct(struct iio_dev *indio_dev) __release(indio_dev); } -/* - * This autocleanup logic is normally used via - * iio_device_claim_direct_scoped(). - */ -DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), - iio_device_release_direct_mode(_T)) - -DEFINE_GUARD_COND(iio_claim_direct, _try, ({ - struct iio_dev *dev; - int d = iio_device_claim_direct_mode(_T); - - if (d < 0) - dev = NULL; - else - dev = _T; - dev; - })) - -/** - * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. - * @fail: What to do on failure to claim device. - * @iio_dev: Pointer to the IIO devices structure - */ -#define iio_device_claim_direct_scoped(fail, iio_dev) \ - scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) - int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); From f23209e9758a27c828606251f0895a2bbcb58235 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 10 Feb 2025 16:33:33 -0600 Subject: [PATCH 0274/2157] iio: adc: ad7606: use gpiod_multi_set_value_cansleep Reduce verbosity by using gpiod_multi_set_value_cansleep() instead of gpiod_set_array_value(). These are not called in an atomic context, so changing to the cansleep variant is fine. Also drop unnecessary braces while we are at it. Reviewed-by: Linus Walleij Signed-off-by: David Lechner Link: https://patch.msgid.link/20250210-gpio-set-array-helper-v3-7-d6a673674da8@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1e8b03a518b8..87908cc51e48 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -828,8 +828,7 @@ static int ad7606_write_os_hw(struct iio_dev *indio_dev, int val) values[0] = val & GENMASK(2, 0); - gpiod_set_array_value(st->gpio_os->ndescs, st->gpio_os->desc, - st->gpio_os->info, values); + gpiod_multi_set_value_cansleep(st->gpio_os, values); /* AD7616 requires a reset to update value */ if (st->chip_info->os_req_reset) @@ -1260,10 +1259,9 @@ static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev) * in the device tree, then they need to be set to high, * otherwise, they must be hardwired to VDD */ - if (st->gpio_os) { - gpiod_set_array_value(st->gpio_os->ndescs, st->gpio_os->desc, - st->gpio_os->info, os); - } + if (st->gpio_os) + gpiod_multi_set_value_cansleep(st->gpio_os, os); + /* OS of 128 and 256 are available only in software mode */ st->oversampling_avail = ad7606b_oversampling_avail; st->num_os_ratios = ARRAY_SIZE(ad7606b_oversampling_avail); From a927e72925c7a8aa2a1cf330c09ce13f4e5b1120 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 10 Feb 2025 16:33:34 -0600 Subject: [PATCH 0275/2157] iio: amplifiers: hmc425a: use gpiod_multi_set_value_cansleep Reduce verbosity by using gpiod_multi_set_value_cansleep() instead of gpiod_set_array_value_cansleep(). Passing NULL as the 3rd argument to gpiod_set_array_value_cansleep() only needs to be done if the array was constructed manually, which is not the case here. This change effectively replaces that argument with st->gpios->array_info. The possible side effect of this change is that it could make setting the GPIOs more efficient. Reviewed-by: Linus Walleij Signed-off-by: David Lechner Link: https://patch.msgid.link/20250210-gpio-set-array-helper-v3-8-d6a673674da8@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/amplifiers/hmc425a.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/amplifiers/hmc425a.c b/drivers/iio/amplifiers/hmc425a.c index 2ee4c0d70281..d9a359e1388a 100644 --- a/drivers/iio/amplifiers/hmc425a.c +++ b/drivers/iio/amplifiers/hmc425a.c @@ -161,8 +161,7 @@ static int hmc425a_write(struct iio_dev *indio_dev, u32 value) values[0] = value; - gpiod_set_array_value_cansleep(st->gpios->ndescs, st->gpios->desc, - NULL, values); + gpiod_multi_set_value_cansleep(st->gpios, values); return 0; } From 76ce6e6e5c4918844f939262b5d440e04fe5009a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 10 Feb 2025 16:33:35 -0600 Subject: [PATCH 0276/2157] iio: resolver: ad2s1210: use gpiod_multi_set_value_cansleep Reduce verbosity by using gpiod_multi_set_value_cansleep() instead of gpiod_set_array_value(). These are not called in an atomic context, so changing to the cansleep variant is fine. Reviewed-by: Linus Walleij Signed-off-by: David Lechner Link: https://patch.msgid.link/20250210-gpio-set-array-helper-v3-9-d6a673674da8@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/resolver/ad2s1210.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/iio/resolver/ad2s1210.c b/drivers/iio/resolver/ad2s1210.c index b681129a99b6..7f18df790157 100644 --- a/drivers/iio/resolver/ad2s1210.c +++ b/drivers/iio/resolver/ad2s1210.c @@ -182,8 +182,7 @@ static int ad2s1210_set_mode(struct ad2s1210_state *st, enum ad2s1210_mode mode) bitmap[0] = mode; - return gpiod_set_array_value(gpios->ndescs, gpios->desc, gpios->info, - bitmap); + return gpiod_multi_set_value_cansleep(gpios, bitmap); } /* @@ -1473,10 +1472,7 @@ static int ad2s1210_setup_gpios(struct ad2s1210_state *st) bitmap[0] = st->resolution; - ret = gpiod_set_array_value(resolution_gpios->ndescs, - resolution_gpios->desc, - resolution_gpios->info, - bitmap); + ret = gpiod_multi_set_value_cansleep(resolution_gpios, bitmap); if (ret < 0) return dev_err_probe(dev, ret, "failed to set resolution gpios\n"); From 1bd2aad57da95f7f2d2bb52f7ad15c0f4993a685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Mon, 17 Feb 2025 07:21:53 +0100 Subject: [PATCH 0277/2157] serial: mctrl_gpio: split disable_ms into sync and no_sync APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following splat has been observed on a SAMA5D27 platform using atmel_serial: BUG: sleeping function called from invalid context at kernel/irq/manage.c:738 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 27, name: kworker/u5:0 preempt_count: 1, expected: 0 INFO: lockdep is turned off. irq event stamp: 0 hardirqs last enabled at (0): [<00000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x1c4c/0x7bec softirqs last enabled at (0): [] copy_process+0x1ca0/0x7bec softirqs last disabled at (0): [<00000000>] 0x0 CPU: 0 UID: 0 PID: 27 Comm: kworker/u5:0 Not tainted 6.13.0-rc7+ #74 Hardware name: Atmel SAMA5 Workqueue: hci0 hci_power_on [bluetooth] Call trace: unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x44/0x70 dump_stack_lvl from __might_resched+0x38c/0x598 __might_resched from disable_irq+0x1c/0x48 disable_irq from mctrl_gpio_disable_ms+0x74/0xc0 mctrl_gpio_disable_ms from atmel_disable_ms.part.0+0x80/0x1f4 atmel_disable_ms.part.0 from atmel_set_termios+0x764/0x11e8 atmel_set_termios from uart_change_line_settings+0x15c/0x994 uart_change_line_settings from uart_set_termios+0x2b0/0x668 uart_set_termios from tty_set_termios+0x600/0x8ec tty_set_termios from ttyport_set_flow_control+0x188/0x1e0 ttyport_set_flow_control from wilc_setup+0xd0/0x524 [hci_wilc] wilc_setup [hci_wilc] from hci_dev_open_sync+0x330/0x203c [bluetooth] hci_dev_open_sync [bluetooth] from hci_dev_do_open+0x40/0xb0 [bluetooth] hci_dev_do_open [bluetooth] from hci_power_on+0x12c/0x664 [bluetooth] hci_power_on [bluetooth] from process_one_work+0x998/0x1a38 process_one_work from worker_thread+0x6e0/0xfb4 worker_thread from kthread+0x3d4/0x484 kthread from ret_from_fork+0x14/0x28 This warning is emitted when trying to toggle, at the highest level, some flow control (with serdev_device_set_flow_control) in a device driver. At the lowest level, the atmel_serial driver is using serial_mctrl_gpio lib to enable/disable the corresponding IRQs accordingly. The warning emitted by CONFIG_DEBUG_ATOMIC_SLEEP is due to disable_irq (called in mctrl_gpio_disable_ms) being possibly called in some atomic context (some tty drivers perform modem lines configuration in regions protected by port lock). Split mctrl_gpio_disable_ms into two differents APIs, a non-blocking one and a blocking one. Replace mctrl_gpio_disable_ms calls with the relevant version depending on whether the call is protected by some port lock. Suggested-by: Jiri Slaby Signed-off-by: Alexis Lothoré Acked-by: Richard Genoud Link: https://lore.kernel.org/r/20250217-atomic_sleep_mctrl_serial_gpio-v3-1-59324b313eef@bootlin.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/serial/driver.rst | 2 +- drivers/tty/serial/8250/8250_port.c | 2 +- drivers/tty/serial/atmel_serial.c | 2 +- drivers/tty/serial/imx.c | 2 +- drivers/tty/serial/serial_mctrl_gpio.c | 34 +++++++++++++++++----- drivers/tty/serial/serial_mctrl_gpio.h | 17 +++++++++-- drivers/tty/serial/sh-sci.c | 2 +- drivers/tty/serial/stm32-usart.c | 2 +- 8 files changed, 47 insertions(+), 16 deletions(-) diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst index df353211fc6b..fa1ebfcd4472 100644 --- a/Documentation/driver-api/serial/driver.rst +++ b/Documentation/driver-api/serial/driver.rst @@ -103,4 +103,4 @@ Some helpers are provided in order to set/get modem control lines via GPIO. .. kernel-doc:: drivers/tty/serial/serial_mctrl_gpio.c :identifiers: mctrl_gpio_init mctrl_gpio_to_gpiod mctrl_gpio_set mctrl_gpio_get mctrl_gpio_enable_ms - mctrl_gpio_disable_ms + mctrl_gpio_disable_ms_sync mctrl_gpio_disable_ms_no_sync diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 442967a6cd52..886e40f680d4 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1680,7 +1680,7 @@ static void serial8250_disable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; - mctrl_gpio_disable_ms(up->gpios); + mctrl_gpio_disable_ms_no_sync(up->gpios); up->ier &= ~UART_IER_MSI; serial_port_out(port, UART_IER, up->ier); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index f44f9d20a974..8918fbd4bddd 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -700,7 +700,7 @@ static void atmel_disable_ms(struct uart_port *port) atmel_port->ms_irq_enabled = false; - mctrl_gpio_disable_ms(atmel_port->gpios); + mctrl_gpio_disable_ms_no_sync(atmel_port->gpios); if (!mctrl_gpio_to_gpiod(atmel_port->gpios, UART_GPIO_CTS)) idr |= ATMEL_US_CTSIC; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 9c59ec128bb4..cfeb3f8cf45e 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1608,7 +1608,7 @@ static void imx_uart_shutdown(struct uart_port *port) imx_uart_dma_exit(sport); } - mctrl_gpio_disable_ms(sport->gpios); + mctrl_gpio_disable_ms_sync(sport->gpios); uart_port_lock_irqsave(&sport->port, &flags); ucr2 = imx_uart_readl(sport, UCR2); diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c index 85c172ad1de9..7b02c5ca4afd 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -296,11 +296,7 @@ void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios) } EXPORT_SYMBOL_GPL(mctrl_gpio_enable_ms); -/** - * mctrl_gpio_disable_ms - disable irqs and handling of changes to the ms lines - * @gpios: gpios to disable - */ -void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios) +static void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios, bool sync) { enum mctrl_gpio_idx i; @@ -316,10 +312,34 @@ void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios) if (!gpios->irq[i]) continue; - disable_irq(gpios->irq[i]); + if (sync) + disable_irq(gpios->irq[i]); + else + disable_irq_nosync(gpios->irq[i]); } } -EXPORT_SYMBOL_GPL(mctrl_gpio_disable_ms); + +/** + * mctrl_gpio_disable_ms_sync - disable irqs and handling of changes to the ms + * lines, and wait for any pending IRQ to be processed + * @gpios: gpios to disable + */ +void mctrl_gpio_disable_ms_sync(struct mctrl_gpios *gpios) +{ + mctrl_gpio_disable_ms(gpios, true); +} +EXPORT_SYMBOL_GPL(mctrl_gpio_disable_ms_sync); + +/** + * mctrl_gpio_disable_ms_no_sync - disable irqs and handling of changes to the + * ms lines, and return immediately + * @gpios: gpios to disable + */ +void mctrl_gpio_disable_ms_no_sync(struct mctrl_gpios *gpios) +{ + mctrl_gpio_disable_ms(gpios, false); +} +EXPORT_SYMBOL_GPL(mctrl_gpio_disable_ms_no_sync); void mctrl_gpio_enable_irq_wake(struct mctrl_gpios *gpios) { diff --git a/drivers/tty/serial/serial_mctrl_gpio.h b/drivers/tty/serial/serial_mctrl_gpio.h index ec4170084766..961e4ba0c6f8 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.h +++ b/drivers/tty/serial/serial_mctrl_gpio.h @@ -80,9 +80,16 @@ struct mctrl_gpios *mctrl_gpio_init_noauto(struct device *dev, void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios); /* - * Disable gpio interrupts to report status line changes. + * Disable gpio interrupts to report status line changes, and block until + * any corresponding IRQ is processed */ -void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios); +void mctrl_gpio_disable_ms_sync(struct mctrl_gpios *gpios); + +/* + * Disable gpio interrupts to report status line changes, and return + * immediately + */ +void mctrl_gpio_disable_ms_no_sync(struct mctrl_gpios *gpios); /* * Enable gpio wakeup interrupts to enable wake up source. @@ -136,7 +143,11 @@ static inline void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios) { } -static inline void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios) +static inline void mctrl_gpio_disable_ms_sync(struct mctrl_gpios *gpios) +{ +} + +static inline void mctrl_gpio_disable_ms_no_sync(struct mctrl_gpios *gpios) { } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 4bc637f9d649..e0ead0147bfe 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2310,7 +2310,7 @@ static void sci_shutdown(struct uart_port *port) dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); s->autorts = false; - mctrl_gpio_disable_ms(to_sci_port(port)->gpios); + mctrl_gpio_disable_ms_sync(to_sci_port(port)->gpios); uart_port_lock_irqsave(port, &flags); sci_stop_rx(port); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 1ec5d8c3aef8..4c97965ec43b 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -944,7 +944,7 @@ static void stm32_usart_enable_ms(struct uart_port *port) static void stm32_usart_disable_ms(struct uart_port *port) { - mctrl_gpio_disable_ms(to_stm32_port(port)->gpios); + mctrl_gpio_disable_ms_sync(to_stm32_port(port)->gpios); } /* Transmit stop */ From af7ac64ebd6f3ecf7560c8b299f31e8669ea4cd5 Mon Sep 17 00:00:00 2001 From: Catalin Popescu Date: Thu, 13 Feb 2025 10:43:36 +0100 Subject: [PATCH 0278/2157] dt-bindings: usb: microchip,usb2514: add support for vdda Microchip hub USB2514 has one 3V3 digital power supply and one 3V3 analog power supply. Add support for analog power supply vdda. Signed-off-by: Catalin Popescu Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250213094338.1611389-1-catalin.popescu@leica-geosystems.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/microchip,usb2514.yaml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index b14e6f37b298..d0479beee30b 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -9,9 +9,6 @@ title: Microchip USB2514 Hub Controller maintainers: - Fabio Estevam -allOf: - - $ref: usb-device.yaml# - properties: compatible: enum: @@ -28,6 +25,9 @@ properties: vdd-supply: description: 3.3V power supply. + vdda-supply: + description: 3.3V analog power supply. + clocks: description: External 24MHz clock connected to the CLKIN pin. maxItems: 1 @@ -43,6 +43,18 @@ patternProperties: $ref: /schemas/usb/usb-device.yaml additionalProperties: true +allOf: + - $ref: usb-device.yaml# + - if: + not: + properties: + compatible: + contains: + const: usb424,2514 + then: + properties: + vdda-supply: false + unevaluatedProperties: false examples: @@ -60,6 +72,7 @@ examples: clocks = <&clks IMX6QDL_CLK_CKO>; reset-gpios = <&gpio7 12 GPIO_ACTIVE_LOW>; vdd-supply = <®_3v3_hub>; + vdda-supply = <®_3v3a_hub>; #address-cells = <1>; #size-cells = <0>; From 233840bbdf7ca482645a934b3db8c41476d7391f Mon Sep 17 00:00:00 2001 From: Catalin Popescu Date: Thu, 13 Feb 2025 10:43:37 +0100 Subject: [PATCH 0279/2157] dt-bindings: usb: microchip,usb2514: add support for USB2512/USB2513 Extend support to Microchip hubs USB2512 & USB2513 which are identical to USB2514 but offer less downstream ports (i.e. respectively 2 and 3 ports). Signed-off-by: Catalin Popescu Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250213094338.1611389-2-catalin.popescu@leica-geosystems.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/microchip,usb2514.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index d0479beee30b..4e3901efed3f 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -11,11 +11,17 @@ maintainers: properties: compatible: - enum: - - usb424,2412 - - usb424,2417 - - usb424,2514 - - usb424,2517 + oneOf: + - enum: + - usb424,2412 + - usb424,2417 + - usb424,2514 + - usb424,2517 + - items: + - enum: + - usb424,2512 + - usb424,2513 + - const: usb424,2514 reg: true From 673655f7944faa377744e707e5c7f46be0a0bc7f Mon Sep 17 00:00:00 2001 From: Catalin Popescu Date: Thu, 13 Feb 2025 10:43:38 +0100 Subject: [PATCH 0280/2157] usb: misc: onboard_dev: add vdda support for Microchip USB2514 This hub is powered by digital and analog 3V3 power rails, so provide the possibility to use different regulators for digital (vdd) and analog (vdda) power rails. Signed-off-by: Catalin Popescu Link: https://lore.kernel.org/r/20250213094338.1611389-3-catalin.popescu@leica-geosystems.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_dev.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/onboard_usb_dev.h b/drivers/usb/misc/onboard_usb_dev.h index 317b3eb99c02..933797a7e084 100644 --- a/drivers/usb/misc/onboard_usb_dev.h +++ b/drivers/usb/misc/onboard_usb_dev.h @@ -23,6 +23,13 @@ static const struct onboard_dev_pdata microchip_usb424_data = { .is_hub = true, }; +static const struct onboard_dev_pdata microchip_usb2514_data = { + .reset_us = 1, + .num_supplies = 2, + .supply_names = { "vdd", "vdda" }, + .is_hub = true, +}; + static const struct onboard_dev_pdata microchip_usb5744_data = { .reset_us = 0, .power_on_delay_us = 10000, @@ -96,7 +103,7 @@ static const struct onboard_dev_pdata xmos_xvf3500_data = { static const struct of_device_id onboard_dev_match[] = { { .compatible = "usb424,2412", .data = µchip_usb424_data, }, - { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2514", .data = µchip_usb2514_data, }, { .compatible = "usb424,2517", .data = µchip_usb424_data, }, { .compatible = "usb424,2744", .data = µchip_usb5744_data, }, { .compatible = "usb424,5744", .data = µchip_usb5744_data, }, From 2ded07a8a21bfb6e48e49d293ae96a9705751feb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Feb 2025 12:42:35 +0100 Subject: [PATCH 0281/2157] dt-bindings: usb: usb-device: Replace free-form 'reg' with constraints Replace free-form text of 'reg' property with proper constraints so incorrect values can be actually reported. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250214114235.49476-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-device.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/usb-device.yaml b/Documentation/devicetree/bindings/usb/usb-device.yaml index da890ee60ce6..c67695681033 100644 --- a/Documentation/devicetree/bindings/usb/usb-device.yaml +++ b/Documentation/devicetree/bindings/usb/usb-device.yaml @@ -39,8 +39,10 @@ properties: reg: description: the number of the USB hub port or the USB host-controller - port to which this device is attached. The range is 1-255. - maxItems: 1 + port to which this device is attached. + items: + - minimum: 1 + maximum: 255 "#address-cells": description: should be 1 for hub nodes with device nodes, From fe54c948d38e6c2426ada456a0c00714e87b3312 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Sun, 16 Feb 2025 00:16:07 +0000 Subject: [PATCH 0282/2157] USB: docs: Fix typo in aspeed-lpc.yaml Correct 'Tehchnology' to 'Technology' in the copyright line. Signed-off-by: Suraj Patil Link: https://lore.kernel.org/r/20250216001609.106616-1-surajpatil522@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml b/Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml index 5dfe77aca167..d88854e60b7f 100644 --- a/Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml +++ b/Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -# # Copyright (c) 2021 Aspeed Tehchnology Inc. +# # Copyright (c) 2021 Aspeed Technology Inc. %YAML 1.2 --- $id: http://devicetree.org/schemas/mfd/aspeed-lpc.yaml# From 7b2328c5a0091e4b85b59f9e758b8d2cd1cbefcc Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Sun, 16 Feb 2025 00:16:08 +0000 Subject: [PATCH 0283/2157] docs: Fix typo in usb/CREDITS Correct 'Implementors' to 'Implementers'. Signed-off-by: Suraj Patil Link: https://lore.kernel.org/r/20250216001609.106616-2-surajpatil522@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/CREDITS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/usb/CREDITS b/Documentation/usb/CREDITS index 81ea3eb29e96..ce6450a6ed7c 100644 --- a/Documentation/usb/CREDITS +++ b/Documentation/usb/CREDITS @@ -161,7 +161,7 @@ THANKS file in Inaky's driver): - The people at the linux-usb mailing list, for reading so many messages :) Ok, no more kidding; for all your advises! - - All the people at the USB Implementors Forum for their + - All the people at the USB Implementers Forum for their help and assistance. - Nathan Myers , for his advice! (hope you From 3975e68cf31f670c1350710fa2d256556a5432b2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 17 Feb 2025 14:41:30 +0100 Subject: [PATCH 0284/2157] usb: dwc2: gadget: Introduce register restore flags dwc2_restore_device_registers() use a single boolean to decide about the register restoring behavior. So replace this with a flags parameter, which can be extended later. No functional change intended. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250217134132.36786-2-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 6 ++++-- drivers/usb/dwc2/gadget.c | 12 +++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 2bd74f3033ed..48f4b639ca2f 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1127,6 +1127,8 @@ struct dwc2_hsotg { #define DWC2_FS_IOT_ID 0x55310000 #define DWC2_HS_IOT_ID 0x55320000 +#define DWC2_RESTORE_DCTL BIT(0) + #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE) union dwc2_hcd_internal_flags { u32 d32; @@ -1420,7 +1422,7 @@ int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode); #define dwc2_is_device_connected(hsotg) (hsotg->connected) #define dwc2_is_device_enabled(hsotg) (hsotg->enabled) int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg); -int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup); +int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, unsigned int flags); int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg); int dwc2_gadget_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, int reset); @@ -1459,7 +1461,7 @@ static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, static inline int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg) { return 0; } static inline int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, - int remote_wakeup) + unsigned int flags) { return 0; } static inline int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg) { return 0; } diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index bd4c788f03bc..4fb7bac50559 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5204,11 +5204,11 @@ int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg) * if controller power were disabled. * * @hsotg: Programming view of the DWC_otg controller - * @remote_wakeup: Indicates whether resume is initiated by Device or Host. + * @flags: Defines which registers should be restored. * * Return: 0 if successful, negative error code otherwise */ -int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup) +int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, unsigned int flags) { struct dwc2_dregs_backup *dr; int i; @@ -5224,7 +5224,7 @@ int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup) } dr->valid = false; - if (!remote_wakeup) + if (flags & DWC2_RESTORE_DCTL) dwc2_writel(hsotg, dr->dctl, DCTL); dwc2_writel(hsotg, dr->daintmsk, DAINTMSK); @@ -5415,6 +5415,7 @@ int dwc2_gadget_exit_hibernation(struct dwc2_hsotg *hsotg, u32 gpwrdn; u32 dctl; int ret = 0; + unsigned int flags = 0; struct dwc2_gregs_backup *gr; struct dwc2_dregs_backup *dr; @@ -5477,6 +5478,7 @@ int dwc2_gadget_exit_hibernation(struct dwc2_hsotg *hsotg, dctl = dwc2_readl(hsotg, DCTL); dctl |= DCTL_PWRONPRGDONE; dwc2_writel(hsotg, dctl, DCTL); + flags |= DWC2_RESTORE_DCTL; } /* Wait for interrupts which must be cleared */ mdelay(2); @@ -5492,7 +5494,7 @@ int dwc2_gadget_exit_hibernation(struct dwc2_hsotg *hsotg, } /* Restore device registers */ - ret = dwc2_restore_device_registers(hsotg, rem_wakeup); + ret = dwc2_restore_device_registers(hsotg, flags); if (ret) { dev_err(hsotg->dev, "%s: failed to restore device registers\n", __func__); @@ -5620,7 +5622,7 @@ int dwc2_gadget_exit_partial_power_down(struct dwc2_hsotg *hsotg, /* Restore DCFG */ dwc2_writel(hsotg, dr->dcfg, DCFG); - ret = dwc2_restore_device_registers(hsotg, 0); + ret = dwc2_restore_device_registers(hsotg, DWC2_RESTORE_DCTL); if (ret) { dev_err(hsotg->dev, "%s: failed to restore device registers\n", __func__); From 8b7a1b3da2e290bcbe8024519c86ddf1aa2096e8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 17 Feb 2025 14:41:31 +0100 Subject: [PATCH 0285/2157] usb: dwc2: Refactor backup/restore of registers The DWC2 runtime PM code reuses similar patterns to backup and restore the registers. So consolidate them in USB mode specific variants. This also has the advantage it is reusable for further PM improvements. Special care is taken for DCFG register during device mode restore. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250217134132.36786-3-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 15 ++++++ drivers/usb/dwc2/gadget.c | 108 +++++++++++++++++++------------------- drivers/usb/dwc2/hcd.c | 99 +++++++++++++++++----------------- 3 files changed, 121 insertions(+), 101 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 48f4b639ca2f..265791fbe87f 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1128,6 +1128,7 @@ struct dwc2_hsotg { #define DWC2_HS_IOT_ID 0x55320000 #define DWC2_RESTORE_DCTL BIT(0) +#define DWC2_RESTORE_DCFG BIT(1) #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE) union dwc2_hcd_internal_flags { @@ -1437,6 +1438,9 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg); int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg); void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg); void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg); +int dwc2_gadget_backup_critical_registers(struct dwc2_hsotg *hsotg); +int dwc2_gadget_restore_critical_registers(struct dwc2_hsotg *hsotg, + unsigned int flags); static inline void dwc2_clear_fifo_map(struct dwc2_hsotg *hsotg) { hsotg->fifo_map = 0; } #else @@ -1484,6 +1488,11 @@ static inline int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg) { return 0; } static inline void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg) {} static inline void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg) {} +static inline int dwc2_gadget_backup_critical_registers(struct dwc2_hsotg *hsotg) +{ return 0; } +static inline int dwc2_gadget_restore_critical_registers(struct dwc2_hsotg *hsotg, + unsigned int flags) +{ return 0; } static inline void dwc2_clear_fifo_map(struct dwc2_hsotg *hsotg) {} #endif @@ -1507,6 +1516,8 @@ int dwc2_host_exit_partial_power_down(struct dwc2_hsotg *hsotg, void dwc2_host_enter_clock_gating(struct dwc2_hsotg *hsotg); void dwc2_host_exit_clock_gating(struct dwc2_hsotg *hsotg, int rem_wakeup); bool dwc2_host_can_poweroff_phy(struct dwc2_hsotg *dwc2); +int dwc2_host_backup_critical_registers(struct dwc2_hsotg *hsotg); +int dwc2_host_restore_critical_registers(struct dwc2_hsotg *hsotg); static inline void dwc2_host_schedule_phy_reset(struct dwc2_hsotg *hsotg) { schedule_work(&hsotg->phy_reset_work); } #else @@ -1546,6 +1557,10 @@ static inline void dwc2_host_exit_clock_gating(struct dwc2_hsotg *hsotg, int rem_wakeup) {} static inline bool dwc2_host_can_poweroff_phy(struct dwc2_hsotg *dwc2) { return false; } +static inline int dwc2_host_backup_critical_registers(struct dwc2_hsotg *hsotg) +{ return 0; } +static inline int dwc2_host_restore_critical_registers(struct dwc2_hsotg *hsotg) +{ return 0; } static inline void dwc2_host_schedule_phy_reset(struct dwc2_hsotg *hsotg) {} #endif diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4fb7bac50559..300ea4969f0c 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5224,6 +5224,9 @@ int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, unsigned int flags) } dr->valid = false; + if (flags & DWC2_RESTORE_DCFG) + dwc2_writel(hsotg, dr->dcfg, DCFG); + if (flags & DWC2_RESTORE_DCTL) dwc2_writel(hsotg, dr->dctl, DCTL); @@ -5310,6 +5313,49 @@ void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "GREFCLK=0x%08x\n", dwc2_readl(hsotg, GREFCLK)); } +int dwc2_gadget_backup_critical_registers(struct dwc2_hsotg *hsotg) +{ + int ret; + + /* Backup all registers */ + ret = dwc2_backup_global_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to backup global registers\n", + __func__); + return ret; + } + + ret = dwc2_backup_device_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to backup device registers\n", + __func__); + return ret; + } + + return 0; +} + +int dwc2_gadget_restore_critical_registers(struct dwc2_hsotg *hsotg, + unsigned int flags) +{ + int ret; + + ret = dwc2_restore_global_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to restore registers\n", + __func__); + return ret; + } + ret = dwc2_restore_device_registers(hsotg, flags); + if (ret) { + dev_err(hsotg->dev, "%s: failed to restore device registers\n", + __func__); + return ret; + } + + return 0; +} + /** * dwc2_gadget_enter_hibernation() - Put controller in Hibernation. * @@ -5327,18 +5373,9 @@ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg) /* Change to L2(suspend) state */ hsotg->lx_state = DWC2_L2; dev_dbg(hsotg->dev, "Start of hibernation completed\n"); - ret = dwc2_backup_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup global registers\n", - __func__); + ret = dwc2_gadget_backup_critical_registers(hsotg); + if (ret) return ret; - } - ret = dwc2_backup_device_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup device registers\n", - __func__); - return ret; - } gpwrdn = GPWRDN_PWRDNRSTN; udelay(10); @@ -5486,20 +5523,9 @@ int dwc2_gadget_exit_hibernation(struct dwc2_hsotg *hsotg, dwc2_writel(hsotg, 0xffffffff, GINTSTS); /* Restore global registers */ - ret = dwc2_restore_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore registers\n", - __func__); + ret = dwc2_gadget_restore_critical_registers(hsotg, flags); + if (ret) return ret; - } - - /* Restore device registers */ - ret = dwc2_restore_device_registers(hsotg, flags); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore device registers\n", - __func__); - return ret; - } if (rem_wakeup) { mdelay(10); @@ -5533,19 +5559,9 @@ int dwc2_gadget_enter_partial_power_down(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "Entering device partial power down started.\n"); /* Backup all registers */ - ret = dwc2_backup_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup global registers\n", - __func__); + ret = dwc2_gadget_backup_critical_registers(hsotg); + if (ret) return ret; - } - - ret = dwc2_backup_device_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup device registers\n", - __func__); - return ret; - } /* * Clear any pending interrupts since dwc2 will not be able to @@ -5592,11 +5608,8 @@ int dwc2_gadget_exit_partial_power_down(struct dwc2_hsotg *hsotg, { u32 pcgcctl; u32 dctl; - struct dwc2_dregs_backup *dr; int ret = 0; - dr = &hsotg->dr_backup; - dev_dbg(hsotg->dev, "Exiting device partial Power Down started.\n"); pcgcctl = dwc2_readl(hsotg, PCGCTL); @@ -5613,21 +5626,10 @@ int dwc2_gadget_exit_partial_power_down(struct dwc2_hsotg *hsotg, udelay(100); if (restore) { - ret = dwc2_restore_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore registers\n", - __func__); + ret = dwc2_gadget_restore_critical_registers(hsotg, DWC2_RESTORE_DCTL | + DWC2_RESTORE_DCFG); + if (ret) return ret; - } - /* Restore DCFG */ - dwc2_writel(hsotg, dr->dcfg, DCFG); - - ret = dwc2_restore_device_registers(hsotg, DWC2_RESTORE_DCTL); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore device registers\n", - __func__); - return ret; - } } /* Set the Power-On Programming done bit */ diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 8c3941ecaaf5..869245238d6c 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5474,6 +5474,49 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) return 0; } +int dwc2_host_backup_critical_registers(struct dwc2_hsotg *hsotg) +{ + int ret; + + /* Backup all registers */ + ret = dwc2_backup_global_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to backup global registers\n", + __func__); + return ret; + } + + ret = dwc2_backup_host_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to backup host registers\n", + __func__); + return ret; + } + + return 0; +} + +int dwc2_host_restore_critical_registers(struct dwc2_hsotg *hsotg) +{ + int ret; + + ret = dwc2_restore_global_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to restore registers\n", + __func__); + return ret; + } + + ret = dwc2_restore_host_registers(hsotg); + if (ret) { + dev_err(hsotg->dev, "%s: failed to restore host registers\n", + __func__); + return ret; + } + + return 0; +} + /** * dwc2_host_enter_hibernation() - Put controller in Hibernation. * @@ -5489,18 +5532,9 @@ int dwc2_host_enter_hibernation(struct dwc2_hsotg *hsotg) u32 gpwrdn; dev_dbg(hsotg->dev, "Preparing host for hibernation\n"); - ret = dwc2_backup_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup global registers\n", - __func__); + ret = dwc2_host_backup_critical_registers(hsotg); + if (ret) return ret; - } - ret = dwc2_backup_host_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup host registers\n", - __func__); - return ret; - } /* Enter USB Suspend Mode */ hprt0 = dwc2_readl(hsotg, HPRT0); @@ -5694,20 +5728,9 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, 0xffffffff, GINTSTS); /* Restore global registers */ - ret = dwc2_restore_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore registers\n", - __func__); + ret = dwc2_host_restore_critical_registers(hsotg); + if (ret) return ret; - } - - /* Restore host registers */ - ret = dwc2_restore_host_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore host registers\n", - __func__); - return ret; - } if (rem_wakeup) { dwc2_hcd_rem_wakeup(hsotg); @@ -5774,19 +5797,9 @@ int dwc2_host_enter_partial_power_down(struct dwc2_hsotg *hsotg) dev_warn(hsotg->dev, "Suspend wasn't generated\n"); /* Backup all registers */ - ret = dwc2_backup_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup global registers\n", - __func__); + ret = dwc2_host_backup_critical_registers(hsotg); + if (ret) return ret; - } - - ret = dwc2_backup_host_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to backup host registers\n", - __func__); - return ret; - } /* * Clear any pending interrupts since dwc2 will not be able to @@ -5855,19 +5868,9 @@ int dwc2_host_exit_partial_power_down(struct dwc2_hsotg *hsotg, udelay(100); if (restore) { - ret = dwc2_restore_global_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore registers\n", - __func__); + ret = dwc2_host_restore_critical_registers(hsotg); + if (ret) return ret; - } - - ret = dwc2_restore_host_registers(hsotg); - if (ret) { - dev_err(hsotg->dev, "%s: failed to restore host registers\n", - __func__); - return ret; - } } /* Drive resume signaling and exit suspend mode on the port. */ From ba6e518d136b25b340ddedb97a79db5642e4ed7f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 17 Feb 2025 14:41:32 +0100 Subject: [PATCH 0286/2157] usb: dwc2: Implement recovery after PM domain off According to the dt-bindings there are some platforms, which have a dedicated USB power domain for DWC2 IP core supply. If the power domain is switched off during system suspend then all USB register will lose their settings. Use GUSBCFG_TOUTCAL as a canary to detect that the power domain has been powered off during suspend. Since the GOTGCTL_CURMODE_HOST doesn't match on all platform with the current mode, additionally backup GINTSTS. This works reliable to decide which registers should be restored. Signed-off-by: Stefan Wahren Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20250217134132.36786-4-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.c | 1 + drivers/usb/dwc2/core.h | 2 ++ drivers/usb/dwc2/platform.c | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c index 9919ab725d54..c3d24312db0f 100644 --- a/drivers/usb/dwc2/core.c +++ b/drivers/usb/dwc2/core.c @@ -43,6 +43,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg) /* Backup global regs */ gr = &hsotg->gr_backup; + gr->gintsts = dwc2_readl(hsotg, GINTSTS); gr->gotgctl = dwc2_readl(hsotg, GOTGCTL); gr->gintmsk = dwc2_readl(hsotg, GINTMSK); gr->gahbcfg = dwc2_readl(hsotg, GAHBCFG); diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 265791fbe87f..34127b890b2a 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -667,6 +667,7 @@ struct dwc2_hw_params { /** * struct dwc2_gregs_backup - Holds global registers state before * entering partial power down + * @gintsts: Backup of GINTSTS register * @gotgctl: Backup of GOTGCTL register * @gintmsk: Backup of GINTMSK register * @gahbcfg: Backup of GAHBCFG register @@ -683,6 +684,7 @@ struct dwc2_hw_params { * @valid: True if registers values backuped. */ struct dwc2_gregs_backup { + u32 gintsts; u32 gotgctl; u32 gintmsk; u32 gahbcfg; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 91c80a92d9b8..12b4dc07d08a 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -685,6 +685,14 @@ static int __maybe_unused dwc2_suspend(struct device *dev) regulator_disable(dwc2->usb33d); } + if (is_device_mode) + ret = dwc2_gadget_backup_critical_registers(dwc2); + else + ret = dwc2_host_backup_critical_registers(dwc2); + + if (ret) + return ret; + if (dwc2->ll_hw_enabled && (is_device_mode || dwc2_host_can_poweroff_phy(dwc2))) { ret = __dwc2_lowlevel_hw_disable(dwc2); @@ -694,6 +702,24 @@ static int __maybe_unused dwc2_suspend(struct device *dev) return ret; } +static int dwc2_restore_critical_registers(struct dwc2_hsotg *hsotg) +{ + struct dwc2_gregs_backup *gr; + + gr = &hsotg->gr_backup; + + if (!gr->valid) { + dev_err(hsotg->dev, "No valid register backup, failed to restore\n"); + return -EINVAL; + } + + if (gr->gintsts & GINTSTS_CURMODE_HOST) + return dwc2_host_restore_critical_registers(hsotg); + + return dwc2_gadget_restore_critical_registers(hsotg, DWC2_RESTORE_DCTL | + DWC2_RESTORE_DCFG); +} + static int __maybe_unused dwc2_resume(struct device *dev) { struct dwc2_hsotg *dwc2 = dev_get_drvdata(dev); @@ -706,6 +732,18 @@ static int __maybe_unused dwc2_resume(struct device *dev) } dwc2->phy_off_for_suspend = false; + /* + * During suspend it's possible that the power domain for the + * DWC2 controller is disabled and all register values get lost. + * In case the GUSBCFG register is not initialized, it's clear the + * registers must be restored. + */ + if (!(dwc2_readl(dwc2, GUSBCFG) & GUSBCFG_TOUTCAL_MASK)) { + ret = dwc2_restore_critical_registers(dwc2); + if (ret) + return ret; + } + if (dwc2->params.activate_stm_id_vb_detection) { unsigned long flags; u32 ggpio, gotgctl; From 834d1cb7ecf3f2812fc3c8cbe870cf2ad192f68e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 18 Feb 2025 09:22:43 +0100 Subject: [PATCH 0287/2157] usb: typec: ps883x: fix probe error handling Fix the probe error handling to avoid unbalanced clock disable or leaving regulators on after probe failure. Note that the active-low reset pin should also be asserted to avoid leaking current after disabling the regulators. Fixes: 257a087c8b52 ("usb: typec: Add support for Parade PS8830 Type-C Retimer") Cc: Abel Vesa Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250218082243.9318-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/ps883x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/mux/ps883x.c b/drivers/usb/typec/mux/ps883x.c index 10e407ab6b7f..ef086989231f 100644 --- a/drivers/usb/typec/mux/ps883x.c +++ b/drivers/usb/typec/mux/ps883x.c @@ -387,10 +387,11 @@ static int ps883x_retimer_probe(struct i2c_client *client) err_switch_unregister: typec_switch_unregister(retimer->sw); -err_vregs_disable: - ps883x_disable_vregs(retimer); err_clk_disable: clk_disable_unprepare(retimer->xo_clk); +err_vregs_disable: + gpiod_set_value(retimer->reset_gpio, 1); + ps883x_disable_vregs(retimer); err_mux_put: typec_mux_put(retimer->typec_mux); err_switch_put: From 9f9de3e02d7f6f99ce6f4be92c28537706634ae9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 18 Feb 2025 16:29:31 +0100 Subject: [PATCH 0288/2157] usb: typec: ps883x: fix registration race Make sure that the retimer is fully setup before registering it to avoid having consumers try to access it while it is being reset. Fixes: 257a087c8b52 ("usb: typec: Add support for Parade PS8830 Type-C Retimer") Cc: Abel Vesa Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250218152933.22992-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/ps883x.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/usb/typec/mux/ps883x.c b/drivers/usb/typec/mux/ps883x.c index ef086989231f..274de7abe585 100644 --- a/drivers/usb/typec/mux/ps883x.c +++ b/drivers/usb/typec/mux/ps883x.c @@ -346,6 +346,22 @@ static int ps883x_retimer_probe(struct i2c_client *client) goto err_vregs_disable; } + /* skip resetting if already configured */ + if (regmap_test_bits(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, + CONN_STATUS_0_CONNECTION_PRESENT) == 1) { + gpiod_direction_output(retimer->reset_gpio, 0); + } else { + gpiod_direction_output(retimer->reset_gpio, 1); + + /* VDD IO supply enable to reset release delay */ + usleep_range(4000, 14000); + + gpiod_set_value(retimer->reset_gpio, 0); + + /* firmware initialization delay */ + msleep(60); + } + sw_desc.drvdata = retimer; sw_desc.fwnode = dev_fwnode(dev); sw_desc.set = ps883x_sw_set; @@ -368,21 +384,6 @@ static int ps883x_retimer_probe(struct i2c_client *client) goto err_switch_unregister; } - /* skip resetting if already configured */ - if (regmap_test_bits(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, - CONN_STATUS_0_CONNECTION_PRESENT) == 1) - return gpiod_direction_output(retimer->reset_gpio, 0); - - gpiod_direction_output(retimer->reset_gpio, 1); - - /* VDD IO supply enable to reset release delay */ - usleep_range(4000, 14000); - - gpiod_set_value(retimer->reset_gpio, 0); - - /* firmware initialization delay */ - msleep(60); - return 0; err_switch_unregister: From 9e7968c4424875fe9ce87c993f2f75784a2989cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 18 Feb 2025 16:29:32 +0100 Subject: [PATCH 0289/2157] usb: typec: ps883x: fix missing accessibility check Make sure that the retimer is accessible before registering to avoid having later consumer calls fail to configure it, something which, for example, can lead to a hotplugged display not being recognised: [drm:msm_dp_panel_read_sink_caps [msm]] *ERROR* read dpcd failed -110 Fixes: 257a087c8b52 ("usb: typec: Add support for Parade PS8830 Type-C Retimer") Cc: Abel Vesa Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250218152933.22992-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/ps883x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/mux/ps883x.c b/drivers/usb/typec/mux/ps883x.c index 274de7abe585..f8b47187f4cf 100644 --- a/drivers/usb/typec/mux/ps883x.c +++ b/drivers/usb/typec/mux/ps883x.c @@ -291,6 +291,7 @@ static int ps883x_retimer_probe(struct i2c_client *client) struct typec_switch_desc sw_desc = { }; struct typec_retimer_desc rtmr_desc = { }; struct ps883x_retimer *retimer; + unsigned int val; int ret; retimer = devm_kzalloc(dev, sizeof(*retimer), GFP_KERNEL); @@ -360,6 +361,16 @@ static int ps883x_retimer_probe(struct i2c_client *client) /* firmware initialization delay */ msleep(60); + + /* make sure device is accessible */ + ret = regmap_read(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, + &val); + if (ret) { + dev_err(dev, "failed to read conn_status_0: %d\n", ret); + if (ret == -ENXIO) + ret = -EIO; + goto err_clk_disable; + } } sw_desc.drvdata = retimer; From 21b1aea451b2790b17e0c8893fba914aeb6eed68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 18 Feb 2025 16:29:33 +0100 Subject: [PATCH 0290/2157] usb: typec: ps883x: fix configuration error handling Propagate errors to the consumers when configuring the retimer so that they can act on any failures as intended, for example: ps883x_retimer 2-0008: failed to write conn_status_0: -5 pmic_glink_altmode.pmic_glink_altmode pmic_glink.altmode.0: failed to setup retimer to DP: -5 Fixes: 257a087c8b52 ("usb: typec: Add support for Parade PS8830 Type-C Retimer") Cc: Abel Vesa Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250218152933.22992-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/ps883x.c | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/usb/typec/mux/ps883x.c b/drivers/usb/typec/mux/ps883x.c index f8b47187f4cf..ad59babf7cce 100644 --- a/drivers/usb/typec/mux/ps883x.c +++ b/drivers/usb/typec/mux/ps883x.c @@ -58,12 +58,31 @@ struct ps883x_retimer { unsigned int svid; }; -static void ps883x_configure(struct ps883x_retimer *retimer, int cfg0, - int cfg1, int cfg2) +static int ps883x_configure(struct ps883x_retimer *retimer, int cfg0, + int cfg1, int cfg2) { - regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, cfg0); - regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_1, cfg1); - regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_2, cfg2); + struct device *dev = &retimer->client->dev; + int ret; + + ret = regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_0, cfg0); + if (ret) { + dev_err(dev, "failed to write conn_status_0: %d\n", ret); + return ret; + } + + ret = regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_1, cfg1); + if (ret) { + dev_err(dev, "failed to write conn_status_1: %d\n", ret); + return ret; + } + + ret = regmap_write(retimer->regmap, REG_USB_PORT_CONN_STATUS_2, cfg2); + if (ret) { + dev_err(dev, "failed to write conn_status_2: %d\n", ret); + return ret; + } + + return 0; } static int ps883x_set(struct ps883x_retimer *retimer) @@ -74,8 +93,7 @@ static int ps883x_set(struct ps883x_retimer *retimer) if (retimer->orientation == TYPEC_ORIENTATION_NONE || retimer->mode == TYPEC_STATE_SAFE) { - ps883x_configure(retimer, cfg0, cfg1, cfg2); - return 0; + return ps883x_configure(retimer, cfg0, cfg1, cfg2); } if (retimer->mode != TYPEC_STATE_USB && retimer->svid != USB_TYPEC_DP_SID) @@ -113,9 +131,7 @@ static int ps883x_set(struct ps883x_retimer *retimer) return -EOPNOTSUPP; } - ps883x_configure(retimer, cfg0, cfg1, cfg2); - - return 0; + return ps883x_configure(retimer, cfg0, cfg1, cfg2); } static int ps883x_sw_set(struct typec_switch_dev *sw, From 0a86e49acfbb4f97ba86f05eb250f4c65c8bec0d Mon Sep 17 00:00:00 2001 From: Igor Belwon Date: Mon, 17 Feb 2025 20:44:04 +0100 Subject: [PATCH 0291/2157] dt-bindings: usb: samsung,exynos-dwc3 Add exynos990 compatible Add a compatible for the exynos990-dwusb3 node. It's compatible with the exynos850 variant when using the highspeed mode. Signed-off-by: Igor Belwon Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250217-exynos990-bindings-usb3-v2-1-3b3f0809f4fb@mentallysanemainliners.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/samsung,exynos-dwc3.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml index 2b3430cebe99..f11e767a8abe 100644 --- a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml @@ -11,12 +11,16 @@ maintainers: properties: compatible: - enum: - - google,gs101-dwusb3 - - samsung,exynos5250-dwusb3 - - samsung,exynos5433-dwusb3 - - samsung,exynos7-dwusb3 - - samsung,exynos850-dwusb3 + oneOf: + - enum: + - google,gs101-dwusb3 + - samsung,exynos5250-dwusb3 + - samsung,exynos5433-dwusb3 + - samsung,exynos7-dwusb3 + - samsung,exynos850-dwusb3 + - items: + - const: samsung,exynos990-dwusb3 + - const: samsung,exynos850-dwusb3 '#address-cells': const: 1 From c1baf6528bcfd6a86842093ff3f8ff8caf309c12 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Thu, 13 Feb 2025 11:31:12 +0100 Subject: [PATCH 0292/2157] staging: gpib: Fix cb7210 pcmcia Oops The pcmcia_driver struct was still only using the old .name initialization in the drv field. This led to a NULL pointer deref Oops in strcmp called from pcmcia_register_driver. Initialize the pcmcia_driver struct name field. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502131453.cb6d2e4a-lkp@intel.com Fixes: e9dc69956d4d ("staging: gpib: Add Computer Boards GPIB driver") Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250213103112.4415-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/cb7210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 381c508f62eb..0d601b6a00fb 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -1347,8 +1347,8 @@ static struct pcmcia_device_id cb_pcmcia_ids[] = { MODULE_DEVICE_TABLE(pcmcia, cb_pcmcia_ids); static struct pcmcia_driver cb_gpib_cs_driver = { + .name = "cb_gpib_cs", .owner = THIS_MODULE, - .drv = { .name = "cb_gpib_cs", }, .id_table = cb_pcmcia_ids, .probe = cb_gpib_probe, .remove = cb_gpib_remove, From 453b733ce316568559c98bbe2e69ef0de7fd10b8 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Tue, 4 Feb 2025 18:42:54 +0100 Subject: [PATCH 0293/2157] staging: gpib: Remove dependencies on !X86_PAE The dependecies on !X86_PAE were introduced to fix an i386 build issue due to drivers casting resource_type_t to void * commit 48e8a8160dba ("staging: gpib: Fix i386 build issue") Subsequently commit baf8855c9160 ("staging: gpib: fix address space mixup") properly resolved these issues by fixing the code in the drivers. Delete the lines "depends on !X86_PAE" Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250204174254.16576-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/staging/gpib/Kconfig b/drivers/staging/gpib/Kconfig index 2bf11df122e2..aa01538d5beb 100644 --- a/drivers/staging/gpib/Kconfig +++ b/drivers/staging/gpib/Kconfig @@ -50,7 +50,6 @@ config GPIB_CEC_PCI tristate "CEC PCI board" depends on PCI depends on HAS_IOPORT - depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help @@ -64,7 +63,6 @@ config GPIB_NI_PCI_ISA tristate "NI PCI/ISA compatible boards" depends on ISA_BUS || PCI || PCMCIA depends on HAS_IOPORT - depends on !X86_PAE depends on PCMCIA || !PCMCIA depends on HAS_IOPORT_MAP select GPIB_COMMON @@ -90,7 +88,6 @@ config GPIB_CB7210 tristate "Measurement Computing compatible boards" depends on HAS_IOPORT depends on ISA_BUS || PCI || PCMCIA - depends on !X86_PAE depends on PCMCIA || !PCMCIA select GPIB_COMMON select GPIB_NEC7210 @@ -181,7 +178,6 @@ config GPIB_INES depends on PCI || ISA_BUS || PCMCIA depends on PCMCIA || !PCMCIA depends on HAS_IOPORT - depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help From ce88577c6120c8e4bd4ac8b9cac3c2d3cdad7060 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:52 +0100 Subject: [PATCH 0294/2157] staging: gpib: agilent pci console messaging cleanup Remove dev_err messages on interrupted or timed-out reads and writes. User land code can figure out what went wrong with the errno return. Return -ENODEV instead of -1 on attach failure when no board is found. Delete commented out console messages. Use module name in pr_xxx and dev_xxx messages. Remove const char * definition of driver_name and extern definition in .h file. Use DRV_NAME defined as KBUILD_MODNAME instead. Remove driver_name parameter and agilent_82350b string prefix in dev_xxx messages. Use DRV_NAME instead of driver_name in pci_request_regions. Use DRV_NAME instead of hard coded string in the pci_driver struct. Change select dev_info's to dev_dbg. Change pr_err to dev_err where possible. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82350b/agilent_82350b.c | 118 ++++++------------ .../gpib/agilent_82350b/agilent_82350b.h | 3 - 2 files changed, 40 insertions(+), 81 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 5c62ec24fced..f83e1f321561 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -4,6 +4,10 @@ * copyright : (C) 2002, 2004 by Frank Mori Hess * ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "agilent_82350b.h" #include #include @@ -54,9 +58,6 @@ static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_ retval = tms9914_read(board, tms_priv, buffer, 1, end, &num_bytes); *bytes_read += num_bytes; - if (retval < 0) - dev_err(board->gpib_dev, "%s: tms9914_read failed retval=%i\n", - driver_name, retval); if (retval < 0 || *end) return retval; ++buffer; @@ -89,7 +90,6 @@ static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_ test_bit(DEV_CLEAR_BN, &tms_priv->state) || test_bit(TIMO_NUM, &board->status)); if (retval) { - dev_dbg(board->gpib_dev, "%s: read wait interrupted\n", driver_name); retval = -ERESTARTSYS; break; } @@ -103,13 +103,10 @@ static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_ *end = 1; } if (test_bit(TIMO_NUM, &board->status)) { - dev_err(board->gpib_dev, "%s: read timed out\n", driver_name); retval = -ETIMEDOUT; break; } if (test_bit(DEV_CLEAR_BN, &tms_priv->state)) { - dev_err(board->gpib_dev, "%s: device clear interrupted read\n", - driver_name); retval = -EINTR; break; } @@ -139,18 +136,12 @@ static int translate_wait_return_value(gpib_board_t *board, int retval) struct agilent_82350b_priv *a_priv = board->private_data; struct tms9914_priv *tms_priv = &a_priv->tms9914_priv; - if (retval) { - dev_err(board->gpib_dev, "%s: write wait interrupted\n", driver_name); + if (retval) return -ERESTARTSYS; - } - if (test_bit(TIMO_NUM, &board->status)) { - dev_err(board->gpib_dev, "%s: write timed out\n", driver_name); + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; - } - if (test_bit(DEV_CLEAR_BN, &tms_priv->state)) { - dev_err(board->gpib_dev, "%s: device clear interrupted write\n", driver_name); + if (test_bit(DEV_CLEAR_BN, &tms_priv->state)) return -EINTR; - } return 0; } @@ -176,10 +167,8 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size event_status = read_and_clear_event_status(board); - //pr_info("ag_ac_wr: event status 0x%x tms state 0x%lx\n", event_status, tms_priv->state); - #ifdef EXPERIMENTAL - pr_info("ag_ac_wr: wait for previous BO to complete if any\n"); + // wait for previous BO to complete if any retval = wait_event_interruptible(board->wait, test_bit(DEV_CLEAR_BN, &tms_priv->state) || test_bit(WRITE_READY_BN, &tms_priv->state) || @@ -190,14 +179,11 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size return retval; #endif - //pr_info("ag_ac_wr: sending first byte\n"); retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); *bytes_written += num_bytes; if (retval < 0) return retval; - //pr_info("ag_ac_wr: %ld bytes eoi %d tms state 0x%lx\n",length, send_eoi, tms_priv->state); - write_byte(tms_priv, tms_priv->imr0_bits & ~HR_BOIE, IMR0); for (i = 1; i < fifotransferlength;) { clear_bit(WRITE_READY_BN, &tms_priv->state); @@ -210,13 +196,8 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size } writeb(ENABLE_TI_TO_SRAM, a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG); - //pr_info("ag_ac_wr: send block: %d bytes tms 0x%lx\n", block_size, - // tms_priv->state); - - if (agilent_82350b_fifo_is_halted(a_priv)) { + if (agilent_82350b_fifo_is_halted(a_priv)) writeb(RESTART_STREAM_BIT, a_priv->gpib_base + STREAM_STATUS_REG); - // pr_info("ag_ac_wr: needed restart\n"); - } retval = wait_event_interruptible(board->wait, ((event_status = @@ -226,7 +207,6 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size test_bit(TIMO_NUM, &board->status)); writeb(0, a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG); num_bytes = block_size - read_transfer_counter(a_priv); - //pr_info("ag_ac_wr: sent %ld bytes tms 0x%lx\n", num_bytes, tms_priv->state); *bytes_written += num_bytes; retval = translate_wait_return_value(board, retval); @@ -238,9 +218,6 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size return retval; if (send_eoi) { - //pr_info("ag_ac_wr: sending last byte with eoi byte no: %d\n", - // fifotransferlength+1); - retval = agilent_82350b_write(board, buffer + fifotransferlength, 1, send_eoi, &num_bytes); *bytes_written += num_bytes; @@ -284,7 +261,6 @@ static irqreturn_t agilent_82350b_interrupt(int irq, void *arg) tms9914_interrupt_have_status(board, &a_priv->tms9914_priv, tms9914_status1, tms9914_status2); } -//pr_info("event_status=0x%x s1 %x s2 %x\n", event_status,tms9914_status1,tms9914_status2); //write-clear status bits if (event_status & (BUFFER_END_STATUS_BIT | TERM_COUNT_STATUS_BIT)) { writeb(event_status & (BUFFER_END_STATUS_BIT | TERM_COUNT_STATUS_BIT), @@ -298,8 +274,6 @@ static irqreturn_t agilent_82350b_interrupt(int irq, void *arg) static void agilent_82350b_detach(gpib_board_t *board); -const char *driver_name = "agilent_82350b"; - static int read_transfer_counter(struct agilent_82350b_priv *a_priv) { int lo, mid, value; @@ -536,11 +510,10 @@ static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t * return 0; // need to programme borg if (!config->init_data || config->init_data_length != firmware_length) { - dev_err(board->gpib_dev, "%s: the 82350A board requires firmware after powering on.\n", - driver_name); + dev_err(board->gpib_dev, "the 82350A board requires firmware after powering on.\n"); return -EIO; } - dev_info(board->gpib_dev, "%s: Loading firmware...\n", driver_name); + dev_dbg(board->gpib_dev, "Loading firmware...\n"); // tickle the borg writel(plx_cntrl_static_bits | PLX9050_USER3_DATA_BIT, @@ -559,7 +532,7 @@ static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t * usleep_range(10, 20); } if (j == timeout) { - dev_err(board->gpib_dev, "%s: timed out loading firmware.\n", driver_name); + dev_err(board->gpib_dev, "timed out loading firmware.\n"); return -ETIMEDOUT; } writeb(firmware_data[i], a_priv->gpib_base + CONFIG_DATA_REG); @@ -570,11 +543,10 @@ static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t * usleep_range(10, 20); } if (j == timeout) { - dev_err(board->gpib_dev, "%s: timed out waiting for firmware load to complete.\n", - driver_name); + dev_err(board->gpib_dev, "timed out waiting for firmware load to complete.\n"); return -ETIMEDOUT; } - dev_info(board->gpib_dev, "%s: ...done.\n", driver_name); + dev_dbg(board->gpib_dev, " ...done.\n"); return 0; } @@ -596,15 +568,14 @@ static int test_sram(gpib_board_t *board) unsigned int read_value = readb(a_priv->sram_base + i); if ((i & byte_mask) != read_value) { - dev_err(board->gpib_dev, "%s: SRAM test failed at %d wanted %d got %d\n", - driver_name, i, (i & byte_mask), read_value); + dev_err(board->gpib_dev, "SRAM test failed at %d wanted %d got %d\n", + i, (i & byte_mask), read_value); return -EIO; } if (need_resched()) schedule(); } - dev_info(board->gpib_dev, "%s: SRAM test passed 0x%x bytes checked\n", - driver_name, sram_length); + dev_dbg(board->gpib_dev, "SRAM test passed 0x%x bytes checked\n", sram_length); return 0; } @@ -632,14 +603,14 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c PCI_DEVICE_ID_82350B, NULL); if (a_priv->pci_device) { a_priv->model = MODEL_82350B; - dev_info(board->gpib_dev, "%s: Agilent 82350B board found\n", driver_name); + dev_dbg(board->gpib_dev, "Agilent 82350B board found\n"); } else { a_priv->pci_device = gpib_pci_get_device(config, PCI_VENDOR_ID_AGILENT, PCI_DEVICE_ID_82351A, NULL); if (a_priv->pci_device) { a_priv->model = MODEL_82351A; - dev_info(board->gpib_dev, "%s: Agilent 82351B board found\n", driver_name); + dev_dbg(board->gpib_dev, "Agilent 82351B board found\n"); } else { a_priv->pci_device = gpib_pci_get_subsys(config, PCI_VENDOR_ID_PLX, @@ -649,46 +620,40 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c a_priv->pci_device); if (a_priv->pci_device) { a_priv->model = MODEL_82350A; - dev_info(board->gpib_dev, "%s: HP/Agilent 82350A board found\n", - driver_name); + dev_dbg(board->gpib_dev, "HP/Agilent 82350A board found\n"); } else { - dev_err(board->gpib_dev, "%s: no 82350/82351 board found\n", - driver_name); + dev_err(board->gpib_dev, "no 82350/82351 board found\n"); return -ENODEV; } } } if (pci_enable_device(a_priv->pci_device)) { - dev_err(board->gpib_dev, "%s: error enabling pci device\n", driver_name); + dev_err(board->gpib_dev, "error enabling pci device\n"); return -EIO; } - if (pci_request_regions(a_priv->pci_device, driver_name)) - return -EIO; + if (pci_request_regions(a_priv->pci_device, DRV_NAME)) + return -ENOMEM; switch (a_priv->model) { case MODEL_82350A: a_priv->plx_base = ioremap(pci_resource_start(a_priv->pci_device, PLX_MEM_REGION), pci_resource_len(a_priv->pci_device, PLX_MEM_REGION)); - dev_dbg(board->gpib_dev, "%s: plx base address remapped to 0x%p\n", - driver_name, a_priv->plx_base); + dev_dbg(board->gpib_dev, "plx base address remapped to 0x%p\n", a_priv->plx_base); a_priv->gpib_base = ioremap(pci_resource_start(a_priv->pci_device, GPIB_82350A_REGION), pci_resource_len(a_priv->pci_device, GPIB_82350A_REGION)); - dev_dbg(board->gpib_dev, "%s: gpib base address remapped to 0x%p\n", - driver_name, a_priv->gpib_base); + dev_dbg(board->gpib_dev, "chip base address remapped to 0x%p\n", a_priv->gpib_base); tms_priv->mmiobase = a_priv->gpib_base + TMS9914_BASE_REG; a_priv->sram_base = ioremap(pci_resource_start(a_priv->pci_device, SRAM_82350A_REGION), pci_resource_len(a_priv->pci_device, SRAM_82350A_REGION)); - dev_dbg(board->gpib_dev, "%s: sram base address remapped to 0x%p\n", - driver_name, a_priv->sram_base); + dev_dbg(board->gpib_dev, "sram base address remapped to 0x%p\n", a_priv->sram_base); a_priv->borg_base = ioremap(pci_resource_start(a_priv->pci_device, BORG_82350A_REGION), pci_resource_len(a_priv->pci_device, BORG_82350A_REGION)); - dev_dbg(board->gpib_dev, "%s: borg base address remapped to 0x%p\n", - driver_name, a_priv->borg_base); + dev_dbg(board->gpib_dev, "borg base address remapped to 0x%p\n", a_priv->borg_base); retval = init_82350a_hardware(board, config); if (retval < 0) @@ -698,21 +663,18 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c case MODEL_82351A: a_priv->gpib_base = ioremap(pci_resource_start(a_priv->pci_device, GPIB_REGION), pci_resource_len(a_priv->pci_device, GPIB_REGION)); - dev_dbg(board->gpib_dev, "%s: gpib base address remapped to 0x%p\n", - driver_name, a_priv->gpib_base); + dev_dbg(board->gpib_dev, "chip base address remapped to 0x%p\n", a_priv->gpib_base); tms_priv->mmiobase = a_priv->gpib_base + TMS9914_BASE_REG; a_priv->sram_base = ioremap(pci_resource_start(a_priv->pci_device, SRAM_REGION), pci_resource_len(a_priv->pci_device, SRAM_REGION)); - dev_dbg(board->gpib_dev, "%s: sram base address remapped to 0x%p\n", - driver_name, a_priv->sram_base); + dev_dbg(board->gpib_dev, "sram base address remapped to 0x%p\n", a_priv->sram_base); a_priv->misc_base = ioremap(pci_resource_start(a_priv->pci_device, MISC_REGION), pci_resource_len(a_priv->pci_device, MISC_REGION)); - dev_dbg(board->gpib_dev, "%s: misc base address remapped to 0x%p\n", - driver_name, a_priv->misc_base); + dev_dbg(board->gpib_dev, "misc base address remapped to 0x%p\n", a_priv->misc_base); break; default: - pr_err("%s: invalid board\n", driver_name); - return -1; + dev_err(board->gpib_dev, "invalid board\n"); + return -ENODEV; } retval = test_sram(board); @@ -720,12 +682,12 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c return retval; if (request_irq(a_priv->pci_device->irq, agilent_82350b_interrupt, - IRQF_SHARED, driver_name, board)) { - pr_err("%s: can't request IRQ %d\n", driver_name, a_priv->pci_device->irq); + IRQF_SHARED, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to obtain irq %d\n", a_priv->pci_device->irq); return -EIO; } a_priv->irq = a_priv->pci_device->irq; - dev_dbg(board->gpib_dev, "%s: IRQ %d\n", driver_name, a_priv->irq); + dev_dbg(board->gpib_dev, " IRQ %d\n", a_priv->irq); writeb(0, a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG); a_priv->card_mode_bits = ENABLE_PCI_IRQ_BIT; @@ -873,7 +835,7 @@ static const struct pci_device_id agilent_82350b_pci_table[] = { MODULE_DEVICE_TABLE(pci, agilent_82350b_pci_table); static struct pci_driver agilent_82350b_pci_driver = { - .name = "agilent_82350b", + .name = DRV_NAME, .id_table = agilent_82350b_pci_table, .probe = &agilent_82350b_pci_probe }; @@ -884,19 +846,19 @@ static int __init agilent_82350b_init_module(void) result = pci_register_driver(&agilent_82350b_pci_driver); if (result) { - pr_err("agilent_82350b: pci_register_driver failed: error = %d\n", result); + pr_err("pci_register_driver failed: error = %d\n", result); return result; } result = gpib_register_driver(&agilent_82350b_unaccel_interface, THIS_MODULE); if (result) { - pr_err("agilent_82350b: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_unaccel; } result = gpib_register_driver(&agilent_82350b_interface, THIS_MODULE); if (result) { - pr_err("agilent_82350b: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_interface; } diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.h b/drivers/staging/gpib/agilent_82350b/agilent_82350b.h index 8b96ad12647e..1573230c619d 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.h +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.h @@ -57,9 +57,6 @@ struct agilent_82350b_priv { bool using_fifos; }; -// driver name -extern const char *driver_name; - //registers enum agilent_82350b_gpib_registers From 50a6ed0494bc082227c38f427b40731bca9bdf15 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:53 +0100 Subject: [PATCH 0295/2157] staging: gpib: agilent usb console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string in usb_driver struct. Remove __func__ parameter in dev_dbg messages as this can be enabled with dynamic debug. Remove __func__ parameter in dev_err messages as they are not needed. The module name is sufficient. Change pr_info to dev_dbg where needed and remove the rest where possible. Remove test and error messages for buffer over run in write and read_registers as these are just trying to catch bugs in the driver. Remove agilent_82357a string prefix in error messages. Return -EIO for too many reads in read_registers instead of continuing after printing an error message. Change pr_warn to dev_warn. Remove warning on calls for unsupported functionality. Remove test and message for buffer overflow in agilent_82357_init and agilent_82357a_go_idle which are just checking for a driver bug. Use actual indeces in the array instead of i and then incrementing i so that the code is clear and there is no need to check for overflow or print a message. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82357a/agilent_82357a.c | 359 +++++++----------- 1 file changed, 143 insertions(+), 216 deletions(-) diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 69f0e490d401..7ebebe00dc48 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -7,6 +7,10 @@ #define _GNU_SOURCE +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include #include #include @@ -79,14 +83,12 @@ static int agilent_82357a_send_bulk_msg(struct agilent_82357a_priv *a_priv, void retval = usb_submit_urb(a_priv->bulk_urb, GFP_KERNEL); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to submit bulk out urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit bulk out urb, retval=%i\n", retval); mutex_unlock(&a_priv->bulk_alloc_lock); goto cleanup; } mutex_unlock(&a_priv->bulk_alloc_lock); if (down_interruptible(&context->complete)) { - dev_err(&usb_dev->dev, "%s: interrupted\n", __func__); retval = -ERESTARTSYS; goto cleanup; } @@ -149,14 +151,12 @@ static int agilent_82357a_receive_bulk_msg(struct agilent_82357a_priv *a_priv, v retval = usb_submit_urb(a_priv->bulk_urb, GFP_KERNEL); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to submit bulk out urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit bulk in urb, retval=%i\n", retval); mutex_unlock(&a_priv->bulk_alloc_lock); goto cleanup; } mutex_unlock(&a_priv->bulk_alloc_lock); if (down_interruptible(&context->complete)) { - dev_err(&usb_dev->dev, "%s: interrupted\n", __func__); retval = -ERESTARTSYS; goto cleanup; } @@ -205,7 +205,6 @@ static int agilent_82357a_receive_control_msg(struct agilent_82357a_priv *a_priv static void agilent_82357a_dump_raw_block(const u8 *raw_data, int length) { - pr_info("hex block dump\n"); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 8, 1, raw_data, length, true); } @@ -225,7 +224,7 @@ static int agilent_82357a_write_registers(struct agilent_82357a_priv *a_priv, static const int max_writes = 31; if (num_writes > max_writes) { - dev_err(&usb_dev->dev, "%s: bug! num_writes=%i too large\n", __func__, num_writes); + dev_err(&usb_dev->dev, "bug! num_writes=%i too large\n", num_writes); return -EIO; } out_data_length = num_writes * bytes_per_write + header_length; @@ -239,8 +238,7 @@ static int agilent_82357a_write_registers(struct agilent_82357a_priv *a_priv, out_data[i++] = writes[j].address; out_data[i++] = writes[j].value; } - if (i > out_data_length) - dev_err(&usb_dev->dev, "%s: bug! buffer overrun\n", __func__); + retval = mutex_lock_interruptible(&a_priv->bulk_transfer_lock); if (retval) { kfree(out_data); @@ -249,8 +247,8 @@ static int agilent_82357a_write_registers(struct agilent_82357a_priv *a_priv, retval = agilent_82357a_send_bulk_msg(a_priv, out_data, i, &bytes_written, 1000); kfree(out_data); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); mutex_unlock(&a_priv->bulk_transfer_lock); return retval; } @@ -265,20 +263,19 @@ static int agilent_82357a_write_registers(struct agilent_82357a_priv *a_priv, mutex_unlock(&a_priv->bulk_transfer_lock); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); agilent_82357a_dump_raw_block(in_data, bytes_read); kfree(in_data); return -EIO; } if (in_data[0] != (0xff & ~DATA_PIPE_CMD_WR_REGS)) { - dev_err(&usb_dev->dev, "%s: error, bulk command=0x%x != ~DATA_PIPE_CMD_WR_REGS\n", - __func__, in_data[0]); + dev_err(&usb_dev->dev, "bulk command=0x%x != ~DATA_PIPE_CMD_WR_REGS\n", in_data[0]); return -EIO; } if (in_data[1]) { - dev_err(&usb_dev->dev, "%s: nonzero error code 0x%x in DATA_PIPE_CMD_WR_REGS response\n", - __func__, in_data[1]); + dev_err(&usb_dev->dev, "nonzero error code 0x%x in DATA_PIPE_CMD_WR_REGS response\n", + in_data[1]); return -EIO; } kfree(in_data); @@ -299,9 +296,10 @@ static int agilent_82357a_read_registers(struct agilent_82357a_priv *a_priv, static const int header_length = 2; static const int max_reads = 62; - if (num_reads > max_reads) - dev_err(&usb_dev->dev, "%s: bug! num_reads=%i too large\n", __func__, num_reads); - + if (num_reads > max_reads) { + dev_err(&usb_dev->dev, "bug! num_reads=%i too large\n", num_reads); + return -EIO; + } out_data_length = num_reads + header_length; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -311,8 +309,7 @@ static int agilent_82357a_read_registers(struct agilent_82357a_priv *a_priv, out_data[i++] = num_reads; for (j = 0; j < num_reads; j++) out_data[i++] = reads[j].address; - if (i > out_data_length) - dev_err(&usb_dev->dev, "%s: bug! buffer overrun\n", __func__); + if (blocking) { retval = mutex_lock_interruptible(&a_priv->bulk_transfer_lock); if (retval) { @@ -329,8 +326,8 @@ static int agilent_82357a_read_registers(struct agilent_82357a_priv *a_priv, retval = agilent_82357a_send_bulk_msg(a_priv, out_data, i, &bytes_written, 1000); kfree(out_data); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); mutex_unlock(&a_priv->bulk_transfer_lock); return retval; } @@ -345,21 +342,20 @@ static int agilent_82357a_read_registers(struct agilent_82357a_priv *a_priv, mutex_unlock(&a_priv->bulk_transfer_lock); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); agilent_82357a_dump_raw_block(in_data, bytes_read); kfree(in_data); return -EIO; } i = 0; if (in_data[i++] != (0xff & ~DATA_PIPE_CMD_RD_REGS)) { - dev_err(&usb_dev->dev, "%s: error, bulk command=0x%x != ~DATA_PIPE_CMD_RD_REGS\n", - __func__, in_data[0]); + dev_err(&usb_dev->dev, "bulk command=0x%x != ~DATA_PIPE_CMD_RD_REGS\n", in_data[0]); return -EIO; } if (in_data[i++]) { - dev_err(&usb_dev->dev, "%s: nonzero error code 0x%x in DATA_PIPE_CMD_RD_REGS response\n", - __func__, in_data[1]); + dev_err(&usb_dev->dev, "nonzero error code 0x%x in DATA_PIPE_CMD_RD_REGS response\n", + in_data[1]); return -EIO; } for (j = 0; j < num_reads; j++) @@ -390,14 +386,13 @@ static int agilent_82357a_abort(struct agilent_82357a_priv *a_priv, int flush) wIndex, status_data, status_data_len, 100); if (receive_control_retval < 0) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_receive_control_msg() returned %i\n", - __func__, receive_control_retval); + dev_err(&usb_dev->dev, "82357a_receive_control_msg() returned %i\n", + receive_control_retval); retval = -EIO; goto cleanup; } if (status_data[0] != (~XFER_ABORT & 0xff)) { - dev_err(&usb_dev->dev, "%s: error, major code=0x%x != ~XFER_ABORT\n", - __func__, status_data[0]); + dev_err(&usb_dev->dev, "major code=0x%x != ~XFER_ABORT\n", status_data[0]); retval = -EIO; goto cleanup; } @@ -413,8 +408,7 @@ static int agilent_82357a_abort(struct agilent_82357a_priv *a_priv, int flush) fallthrough; case UGP_ERR_FLUSHING_ALREADY: default: - dev_err(&usb_dev->dev, "%s: abort returned error code=0x%x\n", - __func__, status_data[1]); + dev_err(&usb_dev->dev, "abort returned error code=0x%x\n", status_data[1]); retval = -EIO; break; } @@ -469,8 +463,8 @@ static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t leng retval = agilent_82357a_send_bulk_msg(a_priv, out_data, i, &bytes_written, msec_timeout); kfree(out_data); if (retval || bytes_written != i) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); mutex_unlock(&a_priv->bulk_transfer_lock); if (retval < 0) return retval; @@ -501,19 +495,19 @@ static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t leng &extra_bytes_read, 100); bytes_read += extra_bytes_read; if (extra_bytes_retval) { - dev_err(&usb_dev->dev, "%s: extra_bytes_retval=%i, bytes_read=%i\n", - __func__, extra_bytes_retval, bytes_read); + dev_err(&usb_dev->dev, "extra_bytes_retval=%i, bytes_read=%i\n", + extra_bytes_retval, bytes_read); agilent_82357a_abort(a_priv, 0); } } else if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); agilent_82357a_abort(a_priv, 0); } mutex_unlock(&a_priv->bulk_transfer_lock); if (bytes_read > length + 1) { bytes_read = length + 1; - pr_warn("%s: bytes_read > length? truncating", __func__); + dev_warn(&usb_dev->dev, "bytes_read > length? truncating"); } if (bytes_read >= 1) { @@ -584,8 +578,8 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer kfree(out_data); if (retval || raw_bytes_written != i) { agilent_82357a_abort(a_priv, 0); - dev_err(&usb_dev->dev, "%s: agilent_82357a_send_bulk_msg returned %i, raw_bytes_written=%i, i=%i\n", - __func__, retval, raw_bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, raw_bytes_written=%i, i=%i\n", + retval, raw_bytes_written, i); mutex_unlock(&a_priv->bulk_transfer_lock); if (retval < 0) return retval; @@ -597,7 +591,7 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer &a_priv->interrupt_flags) || test_bit(TIMO_NUM, &board->status)); if (retval) { - dev_err(&usb_dev->dev, "%s: wait write complete interrupted\n", __func__); + dev_dbg(&usb_dev->dev, "wait write complete interrupted\n"); agilent_82357a_abort(a_priv, 0); mutex_unlock(&a_priv->bulk_transfer_lock); return -ERESTARTSYS; @@ -614,8 +608,7 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer read_reg.address = BSR; retval = agilent_82357a_read_registers(a_priv, &read_reg, 1, 1); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return -ETIMEDOUT; } @@ -632,8 +625,7 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer read_reg.address = ADSR; retval = agilent_82357a_read_registers(a_priv, &read_reg, 1, 1); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return -ETIMEDOUT; } adsr = read_reg.value; @@ -659,8 +651,7 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer 100); mutex_unlock(&a_priv->bulk_transfer_lock); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_receive_control_msg() returned %i\n", - __func__, retval); + dev_err(&usb_dev->dev, "receive_control_msg() returned %i\n", retval); kfree(status_data); return -EIO; } @@ -699,8 +690,7 @@ int agilent_82357a_take_control_internal(gpib_board_t *board, int synchronous) write.value = AUX_TCA; retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return retval; } @@ -741,8 +731,7 @@ static int agilent_82357a_go_to_standby(gpib_board_t *board) write.value = AUX_GTS; retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return 0; } @@ -771,8 +760,7 @@ static void agilent_82357a_request_system_control(gpib_board_t *board, int reque ++i; retval = agilent_82357a_write_registers(a_priv, writes, i); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return;// retval; } @@ -791,8 +779,7 @@ static void agilent_82357a_interface_clear(gpib_board_t *board, int assert) } retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); } static void agilent_82357a_remote_enable(gpib_board_t *board, int enable) @@ -808,8 +795,7 @@ static void agilent_82357a_remote_enable(gpib_board_t *board, int enable) write.value |= AUX_CS; retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); a_priv->ren_state = enable; return;// 0; } @@ -818,10 +804,9 @@ static int agilent_82357a_enable_eos(gpib_board_t *board, uint8_t eos_byte, int { struct agilent_82357a_priv *a_priv = board->private_data; - if (compare_8_bits == 0) { - pr_warn("%s: hardware only supports 8-bit EOS compare", __func__); + if (compare_8_bits == 0) return -EOPNOTSUPP; - } + a_priv->eos_char = eos_byte; a_priv->eos_mode = REOS | BIN; return 0; @@ -850,8 +835,7 @@ static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned i retval = agilent_82357a_read_registers(a_priv, &address_status, 1, 0); if (retval) { if (retval != -EAGAIN) - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return board->status; } // check for remote/local @@ -883,8 +867,7 @@ static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned i retval = agilent_82357a_read_registers(a_priv, &bus_status, 1, 0); if (retval) { if (retval != -EAGAIN) - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return board->status; } if (bus_status.value & BSR_SRQ_BIT) @@ -907,8 +890,7 @@ static int agilent_82357a_primary_address(gpib_board_t *board, unsigned int addr write.value = address & ADDRESS_MASK; retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return retval; } return retval; @@ -917,8 +899,8 @@ static int agilent_82357a_primary_address(gpib_board_t *board, unsigned int addr static int agilent_82357a_secondary_address(gpib_board_t *board, unsigned int address, int enable) { if (enable) - pr_warn("%s: warning: assigning a secondary address not supported\n", __func__); - return -EOPNOTSUPP; + return -EOPNOTSUPP; + return 0; } static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) @@ -936,16 +918,14 @@ static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) writes[1].value = a_priv->hw_control_bits & ~NOT_PARALLEL_POLL; retval = agilent_82357a_write_registers(a_priv, writes, 2); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return retval; } udelay(2); //silly, since usb write will take way longer read.address = CPTR; retval = agilent_82357a_read_registers(a_priv, &read, 1, 1); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return retval; } *result = read.value; @@ -956,8 +936,7 @@ static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) writes[1].value = AUX_RPP; retval = agilent_82357a_write_registers(a_priv, writes, 2); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return retval; } return 0; @@ -1005,8 +984,7 @@ static int agilent_82357a_line_status(const gpib_board_t *board) retval = agilent_82357a_read_registers(a_priv, &bus_status, 1, 0); if (retval) { if (retval != -EAGAIN) - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return retval; } if (bus_status.value & BSR_REN_BIT) @@ -1055,8 +1033,7 @@ static unsigned int agilent_82357a_t1_delay(gpib_board_t *board, unsigned int na write.value = nanosec_to_fast_talker_bits(&nanosec); retval = agilent_82357a_write_registers(a_priv, &write, 1); if (retval) - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return nanosec; } @@ -1081,7 +1058,7 @@ static void agilent_82357a_interrupt_complete(struct urb *urb) default: /* other error, resubmit */ retval = usb_submit_urb(a_priv->interrupt_urb, GFP_ATOMIC); if (retval) - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb\n", __func__); + dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); return; } @@ -1097,7 +1074,7 @@ static void agilent_82357a_interrupt_complete(struct urb *urb) retval = usb_submit_urb(a_priv->interrupt_urb, GFP_ATOMIC); if (retval) - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb\n", __func__); + dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); } static int agilent_82357a_setup_urbs(gpib_board_t *board) @@ -1133,8 +1110,7 @@ static int agilent_82357a_setup_urbs(gpib_board_t *board) if (retval) { usb_free_urb(a_priv->interrupt_urb); a_priv->interrupt_urb = NULL; - dev_err(&usb_dev->dev, "%s: failed to submit first interrupt urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit first interrupt urb, retval=%i\n", retval); goto setup_exit; } mutex_unlock(&a_priv->interrupt_alloc_lock); @@ -1184,108 +1160,78 @@ static void agilent_82357a_free_private(gpib_board_t *board) { kfree(board->private_data); board->private_data = NULL; - } +#define INIT_NUM_REG_WRITES 18 static int agilent_82357a_init(gpib_board_t *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); struct agilent_82357a_register_pairlet hw_control; - struct agilent_82357a_register_pairlet writes[0x20]; + struct agilent_82357a_register_pairlet writes[INIT_NUM_REG_WRITES]; int retval; - int i; unsigned int nanosec; - i = 0; - writes[i].address = LED_CONTROL; - writes[i].value = FAIL_LED_ON; - ++i; - writes[i].address = RESET_TO_POWERUP; - writes[i].value = RESET_SPACEBALL; - ++i; - retval = agilent_82357a_write_registers(a_priv, writes, i); + writes[0].address = LED_CONTROL; + writes[0].value = FAIL_LED_ON; + writes[1].address = RESET_TO_POWERUP; + writes[1].value = RESET_SPACEBALL; + retval = agilent_82357a_write_registers(a_priv, writes, 2); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return -EIO; } set_current_state(TASK_INTERRUPTIBLE); if (schedule_timeout(usec_to_jiffies(2000))) return -ERESTARTSYS; - i = 0; - writes[i].address = AUXCR; - writes[i].value = AUX_NBAF; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_HLDE; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_TON; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_LON; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_RSV2; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_INVAL; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_RPP; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_STDL; - ++i; - writes[i].address = AUXCR; - writes[i].value = AUX_VSTDL; - ++i; - writes[i].address = FAST_TALKER_T1; + writes[0].address = AUXCR; + writes[0].value = AUX_NBAF; + writes[1].address = AUXCR; + writes[1].value = AUX_HLDE; + writes[2].address = AUXCR; + writes[2].value = AUX_TON; + writes[3].address = AUXCR; + writes[3].value = AUX_LON; + writes[4].address = AUXCR; + writes[4].value = AUX_RSV2; + writes[5].address = AUXCR; + writes[5].value = AUX_INVAL; + writes[6].address = AUXCR; + writes[6].value = AUX_RPP; + writes[7].address = AUXCR; + writes[7].value = AUX_STDL; + writes[8].address = AUXCR; + writes[8].value = AUX_VSTDL; + writes[9].address = FAST_TALKER_T1; nanosec = board->t1_nano_sec; - writes[i].value = nanosec_to_fast_talker_bits(&nanosec); + writes[9].value = nanosec_to_fast_talker_bits(&nanosec); board->t1_nano_sec = nanosec; - ++i; - writes[i].address = ADR; - writes[i].value = board->pad & ADDRESS_MASK; - ++i; - writes[i].address = PPR; - writes[i].value = 0; - ++i; - writes[i].address = SPMR; - writes[i].value = 0; - ++i; - writes[i].address = PROTOCOL_CONTROL; - writes[i].value = WRITE_COMPLETE_INTERRUPT_EN; - ++i; - writes[i].address = IMR0; - writes[i].value = HR_BOIE | HR_BIIE; - ++i; - writes[i].address = IMR1; - writes[i].value = HR_SRQIE; - ++i; + writes[10].address = ADR; + writes[10].value = board->pad & ADDRESS_MASK; + writes[11].address = PPR; + writes[11].value = 0; + writes[12].address = SPMR; + writes[12].value = 0; + writes[13].address = PROTOCOL_CONTROL; + writes[13].value = WRITE_COMPLETE_INTERRUPT_EN; + writes[14].address = IMR0; + writes[14].value = HR_BOIE | HR_BIIE; + writes[15].address = IMR1; + writes[15].value = HR_SRQIE; // turn off reset state - writes[i].address = AUXCR; - writes[i].value = AUX_CHIP_RESET; - ++i; - writes[i].address = LED_CONTROL; - writes[i].value = FIRMWARE_LED_CONTROL; - ++i; - if (i > ARRAY_SIZE(writes)) { - dev_err(&usb_dev->dev, "%s: bug! writes[] overflow\n", __func__); - return -EFAULT; - } - retval = agilent_82357a_write_registers(a_priv, writes, i); + writes[16].address = AUXCR; + writes[16].value = AUX_CHIP_RESET; + writes[17].address = LED_CONTROL; + writes[17].value = FIRMWARE_LED_CONTROL; + retval = agilent_82357a_write_registers(a_priv, writes, INIT_NUM_REG_WRITES); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return -EIO; } hw_control.address = HW_CONTROL; retval = agilent_82357a_read_registers(a_priv, &hw_control, 1, 1); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_read_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "read_registers() returned error\n"); return -EIO; } a_priv->hw_control_bits = (hw_control.value & ~0x7) | NOT_TI_RESET | NOT_PARALLEL_POLL; @@ -1336,7 +1282,7 @@ static int agilent_82357a_attach(gpib_board_t *board, const gpib_board_config_t } if (i == MAX_NUM_82357A_INTERFACES) { dev_err(board->gpib_dev, - "No Agilent 82357 gpib adapters found, have you loaded its firmware?\n"); + "No supported adapters found, have you loaded its firmware?\n"); retval = -ENODEV; goto attach_fail; } @@ -1372,8 +1318,7 @@ static int agilent_82357a_attach(gpib_board_t *board, const gpib_board_config_t goto attach_fail; } - dev_info(&usb_dev->dev, - "bus %d dev num %d attached to gpib minor %d, agilent usb interface %i\n", + dev_info(&usb_dev->dev, "bus %d dev num %d attached to gpib%d, interface %i\n", usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); mutex_unlock(&agilent_82357a_hotplug_lock); return retval; @@ -1390,37 +1335,24 @@ static int agilent_82357a_go_idle(gpib_board_t *board) struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); struct agilent_82357a_register_pairlet writes[0x20]; int retval; - int i; - i = 0; // turn on tms9914 reset state - writes[i].address = AUXCR; - writes[i].value = AUX_CS | AUX_CHIP_RESET; - ++i; + writes[0].address = AUXCR; + writes[0].value = AUX_CS | AUX_CHIP_RESET; a_priv->hw_control_bits &= ~NOT_TI_RESET; - writes[i].address = HW_CONTROL; - writes[i].value = a_priv->hw_control_bits; - ++i; - writes[i].address = PROTOCOL_CONTROL; - writes[i].value = 0; - ++i; - writes[i].address = IMR0; - writes[i].value = 0; - ++i; - writes[i].address = IMR1; - writes[i].value = 0; - ++i; - writes[i].address = LED_CONTROL; - writes[i].value = 0; - ++i; - if (i > ARRAY_SIZE(writes)) { - dev_err(&usb_dev->dev, "%s: bug! writes[] overflow\n", __func__); - return -EFAULT; - } - retval = agilent_82357a_write_registers(a_priv, writes, i); + writes[1].address = HW_CONTROL; + writes[1].value = a_priv->hw_control_bits; + writes[2].address = PROTOCOL_CONTROL; + writes[2].value = 0; + writes[3].address = IMR0; + writes[3].value = 0; + writes[4].address = IMR1; + writes[4].value = 0; + writes[5].address = LED_CONTROL; + writes[5].value = 0; + retval = agilent_82357a_write_registers(a_priv, writes, 6); if (retval) { - dev_err(&usb_dev->dev, "%s: agilent_82357a_write_registers() returned error\n", - __func__); + dev_err(&usb_dev->dev, "write_registers() returned error\n"); return -EIO; } return 0; @@ -1445,7 +1377,6 @@ static void agilent_82357a_detach(gpib_board_t *board) agilent_82357a_release_urbs(a_priv); agilent_82357a_free_private(board); } - dev_info(board->gpib_dev, "%s: detached\n", __func__); mutex_unlock(&agilent_82357a_hotplug_lock); } @@ -1510,8 +1441,7 @@ static int agilent_82357a_driver_probe(struct usb_interface *interface, if (i == MAX_NUM_82357A_INTERFACES) { usb_put_dev(usb_dev); mutex_unlock(&agilent_82357a_hotplug_lock); - dev_err(&usb_dev->dev, "%s: out of space in agilent_82357a_driver_interfaces[]\n", - __func__); + dev_err(&usb_dev->dev, "out of space in agilent_82357a_driver_interfaces[]\n"); return -1; } path = kmalloc(path_length, GFP_KERNEL); @@ -1552,13 +1482,12 @@ static void agilent_82357a_driver_disconnect(struct usb_interface *interface) mutex_unlock(&a_priv->control_alloc_lock); } } - dev_dbg(&usb_dev->dev, "nulled agilent_82357a_driver_interfaces[%i]\n", i); agilent_82357a_driver_interfaces[i] = NULL; break; } } if (i == MAX_NUM_82357A_INTERFACES) - dev_err(&usb_dev->dev, "unable to find interface in agilent_82357a_driver_interfaces[]? bug?\n"); + dev_err(&usb_dev->dev, "unable to find interface - bug?\n"); usb_put_dev(usb_dev); mutex_unlock(&agilent_82357a_hotplug_lock); @@ -1583,18 +1512,18 @@ static int agilent_82357a_driver_suspend(struct usb_interface *interface, pm_mes agilent_82357a_abort(a_priv, 0); retval = agilent_82357a_go_idle(board); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to go idle, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to go idle, retval=%i\n", + retval); mutex_unlock(&agilent_82357a_hotplug_lock); return retval; } mutex_lock(&a_priv->interrupt_alloc_lock); agilent_82357a_cleanup_urbs(a_priv); mutex_unlock(&a_priv->interrupt_alloc_lock); - dev_info(&usb_dev->dev, - "bus %d dev num %d gpib minor %d, agilent usb interface %i suspended\n", - usb_dev->bus->busnum, usb_dev->devnum, - board->minor, i); + dev_dbg(&usb_dev->dev, + "bus %d dev num %d gpib %d, interface %i suspended\n", + usb_dev->bus->busnum, usb_dev->devnum, + board->minor, i); } } break; @@ -1631,8 +1560,8 @@ static int agilent_82357a_driver_resume(struct usb_interface *interface) mutex_lock(&a_priv->interrupt_alloc_lock); retval = usb_submit_urb(a_priv->interrupt_urb, GFP_KERNEL); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to resubmit interrupt urb in resume, retval=%i\n", + retval); mutex_unlock(&a_priv->interrupt_alloc_lock); mutex_unlock(&agilent_82357a_hotplug_lock); return retval; @@ -1655,9 +1584,9 @@ static int agilent_82357a_driver_resume(struct usb_interface *interface) // assert/unassert REN agilent_82357a_remote_enable(board, a_priv->ren_state); - dev_info(&usb_dev->dev, - "bus %d dev num %d gpib minor %d, agilent usb interface %i resumed\n", - usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); + dev_dbg(&usb_dev->dev, + "bus %d dev num %d gpib%d, interface %i resumed\n", + usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); } resume_exit: @@ -1667,7 +1596,7 @@ static int agilent_82357a_driver_resume(struct usb_interface *interface) } static struct usb_driver agilent_82357a_bus_driver = { - .name = "agilent_82357a_gpib", + .name = DRV_NAME, .probe = agilent_82357a_driver_probe, .disconnect = agilent_82357a_driver_disconnect, .suspend = agilent_82357a_driver_suspend, @@ -1680,19 +1609,18 @@ static int __init agilent_82357a_init_module(void) int i; int ret; - pr_info("agilent_82357a_gpib driver loading"); for (i = 0; i < MAX_NUM_82357A_INTERFACES; ++i) agilent_82357a_driver_interfaces[i] = NULL; ret = usb_register(&agilent_82357a_bus_driver); if (ret) { - pr_err("agilent_82357a: usb_register failed: error = %d\n", ret); + pr_err("usb_register failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&agilent_82357a_gpib_interface, THIS_MODULE); if (ret) { - pr_err("agilent_82357a: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); usb_deregister(&agilent_82357a_bus_driver); return ret; } @@ -1702,7 +1630,6 @@ static int __init agilent_82357a_init_module(void) static void __exit agilent_82357a_exit_module(void) { - pr_info("agilent_82357a_gpib driver unloading"); gpib_unregister_driver(&agilent_82357a_gpib_interface); usb_deregister(&agilent_82357a_bus_driver); } From 5d445a4395522652892b1d6d5d9600193d57276a Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:55 +0100 Subject: [PATCH 0296/2157] staging: gpib: cec_gpib console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "cec_gpib" in pci_driver struct. Remove "cec_gpib:" string prefix in messages since module name printing is enabled. Change pr_err to dev_err where possible. Return consistent error codes with error messages: -EBUSY when resources are busy -ENODEV when device is not present -EIO for others. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-5-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cec/cec_gpib.c | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/staging/gpib/cec/cec_gpib.c b/drivers/staging/gpib/cec/cec_gpib.c index 18933223711e..8f2b4b46a446 100644 --- a/drivers/staging/gpib/cec/cec_gpib.c +++ b/drivers/staging/gpib/cec/cec_gpib.c @@ -4,6 +4,10 @@ * copyright : (C) 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "cec.h" #include #include @@ -284,31 +288,29 @@ static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config break; } if (!cec_priv->pci_device) { - pr_err("gpib: no cec PCI board found\n"); - return -1; + dev_err(board->gpib_dev, "no cec PCI board found\n"); + return -ENODEV; } if (pci_enable_device(cec_priv->pci_device)) { - pr_err("error enabling pci device\n"); - return -1; + dev_err(board->gpib_dev, "error enabling pci device\n"); + return -EIO; } if (pci_request_regions(cec_priv->pci_device, "cec-gpib")) - return -1; + return -EBUSY; cec_priv->plx_iobase = pci_resource_start(cec_priv->pci_device, 1); - pr_info(" plx9050 base address 0x%lx\n", cec_priv->plx_iobase); - nec_priv->iobase = pci_resource_start(cec_priv->pci_device, 3); - pr_info(" nec7210 base address 0x%x\n", nec_priv->iobase); + nec_priv->iobase = pci_resource_start(cec_priv->pci_device, 3); isr_flags |= IRQF_SHARED; - if (request_irq(cec_priv->pci_device->irq, cec_interrupt, isr_flags, "pci-gpib", board)) { - pr_err("gpib: can't request IRQ %d\n", cec_priv->pci_device->irq); - return -1; + if (request_irq(cec_priv->pci_device->irq, cec_interrupt, isr_flags, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to obtain IRQ %d\n", cec_priv->pci_device->irq); + return -EBUSY; } cec_priv->irq = cec_priv->pci_device->irq; if (gpib_request_pseudo_irq(board, cec_interrupt)) { - pr_err("cec: failed to allocate pseudo irq\n"); + dev_err(board->gpib_dev, "failed to allocate pseudo irq\n"); return -1; } cec_init(cec_priv, board); @@ -355,7 +357,7 @@ static const struct pci_device_id cec_pci_table[] = { MODULE_DEVICE_TABLE(pci, cec_pci_table); static struct pci_driver cec_pci_driver = { - .name = "cec_gpib", + .name = DRV_NAME, .id_table = cec_pci_table, .probe = &cec_pci_probe }; @@ -366,13 +368,13 @@ static int __init cec_init_module(void) result = pci_register_driver(&cec_pci_driver); if (result) { - pr_err("cec_gpib: pci_register_driver failed: error = %d\n", result); + pr_err("pci_register_driver failed: error = %d\n", result); return result; } result = gpib_register_driver(&cec_pci_interface, THIS_MODULE); if (result) { - pr_err("cec_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); return result; } From 141765729ea7c83404d25096526662f2d6ddce36 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:56 +0100 Subject: [PATCH 0297/2157] staging: gpib: common core console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Change pr_err to dev_err wherever possible. Remove dev_dbg messages on entry to some functions. Remove error messages where userland can figure out what went wrong through errno. Remove __func__ and pid parameters in dev_dbg messages as these can be enabled by dynamic debug. Remove minor and "gpib" from dev_err and dev_dbg messages since this information is printed by the dev name. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-6-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/common/gpib_os.c | 127 ++++++++++---------------- drivers/staging/gpib/common/iblib.c | 109 ++++++++-------------- 2 files changed, 85 insertions(+), 151 deletions(-) diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c index 4901e660242e..301c8a1a62c2 100644 --- a/drivers/staging/gpib/common/gpib_os.c +++ b/drivers/staging/gpib/common/gpib_os.c @@ -5,6 +5,9 @@ *************************************************************************** */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt + #include "ibsys.h" #include #include @@ -90,7 +93,7 @@ void os_start_timer(gpib_board_t *board, unsigned int usec_timeout) /* Starts the timeout task */ { if (timer_pending(&board->timer)) { - pr_err("gpib: bug! timer already running?\n"); + dev_err(board->gpib_dev, "bug! timer already running?\n"); return; } clear_bit(TIMO_NUM, &board->status); @@ -140,7 +143,7 @@ static void pseudo_irq_handler(struct timer_list *t) int gpib_request_pseudo_irq(gpib_board_t *board, irqreturn_t (*handler)(int, void *)) { if (timer_pending(&board->pseudo_irq.timer) || board->pseudo_irq.handler) { - pr_err("gpib: only one pseudo interrupt per board allowed\n"); + dev_err(board->gpib_dev, "only one pseudo interrupt per board allowed\n"); return -1; } @@ -260,8 +263,6 @@ int get_serial_poll_byte(gpib_board_t *board, unsigned int pad, int sad, unsigne { gpib_status_queue_t *device; - dev_dbg(board->gpib_dev, "%s:()\n", __func__); - device = get_gpib_status_queue(board, pad, sad); if (num_status_bytes(device)) return pop_status_byte(board, device, poll_byte); @@ -273,7 +274,6 @@ int autopoll_all_devices(gpib_board_t *board) { int retval; - dev_dbg(board->gpib_dev, "entering %s()\n", __func__); if (mutex_lock_interruptible(&board->user_mutex)) return -ERESTARTSYS; if (mutex_lock_interruptible(&board->big_gpib_mutex)) { @@ -290,7 +290,7 @@ int autopoll_all_devices(gpib_board_t *board) return retval; } - dev_dbg(board->gpib_dev, "%s complete\n", __func__); + dev_dbg(board->gpib_dev, "complete\n"); /* need to wake wait queue in case someone is * waiting on RQS */ @@ -308,8 +308,6 @@ static int setup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) size_t bytes_written; int ret; - dev_dbg(board->gpib_dev, "entering %s()\n", __func__); - os_start_timer(board, usec_timeout); ret = ibcac(board, 1, 1); if (ret < 0) { @@ -326,7 +324,7 @@ static int setup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) ret = board->interface->command(board, cmd_string, i, &bytes_written); if (ret < 0 || bytes_written < i) { - pr_err("gpib: failed to setup serial poll\n"); + dev_dbg(board->gpib_dev, "failed to setup serial poll\n"); os_remove_timer(board); return -EIO; } @@ -344,7 +342,7 @@ static int read_serial_poll_byte(gpib_board_t *board, unsigned int pad, int i; size_t nbytes; - dev_dbg(board->gpib_dev, "entering %s(), pad=%i sad=%i\n", __func__, pad, sad); + dev_dbg(board->gpib_dev, "entering pad=%i sad=%i\n", pad, sad); os_start_timer(board, usec_timeout); ret = ibcac(board, 1, 1); @@ -361,7 +359,7 @@ static int read_serial_poll_byte(gpib_board_t *board, unsigned int pad, ret = board->interface->command(board, cmd_string, i, &nbytes); if (ret < 0 || nbytes < i) { - pr_err("gpib: failed to setup serial poll\n"); + dev_err(board->gpib_dev, "failed to setup serial poll\n"); os_remove_timer(board); return -EIO; } @@ -371,7 +369,7 @@ static int read_serial_poll_byte(gpib_board_t *board, unsigned int pad, // read poll result ret = board->interface->read(board, result, 1, &end_flag, &nbytes); if (ret < 0 || nbytes < 1) { - pr_err("gpib: serial poll failed\n"); + dev_err(board->gpib_dev, "serial poll failed\n"); os_remove_timer(board); return -EIO; } @@ -386,8 +384,6 @@ static int cleanup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) int ret; size_t bytes_written; - dev_dbg(board->gpib_dev, "entering %s()\n", __func__); - os_start_timer(board, usec_timeout); ret = ibcac(board, 1, 1); if (ret < 0) { @@ -399,7 +395,7 @@ static int cleanup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) cmd_string[1] = UNT; ret = board->interface->command(board, cmd_string, 2, &bytes_written); if (ret < 0 || bytes_written < 2) { - pr_err("gpib: failed to disable serial poll\n"); + dev_err(board->gpib_dev, "failed to disable serial poll\n"); os_remove_timer(board); return -EIO; } @@ -435,8 +431,6 @@ int serial_poll_all(gpib_board_t *board, unsigned int usec_timeout) u8 result; unsigned int num_bytes = 0; - dev_dbg(board->gpib_dev, "entering %s()\n", __func__); - head = &board->device_list; if (head->next == head) return 0; @@ -482,12 +476,12 @@ int dvrsp(gpib_board_t *board, unsigned int pad, int sad, int retval; if ((status & CIC) == 0) { - pr_err("gpib: not CIC during serial poll\n"); + dev_err(board->gpib_dev, "not CIC during serial poll\n"); return -1; } if (pad > MAX_GPIB_PRIMARY_ADDRESS || sad > MAX_GPIB_SECONDARY_ADDRESS || sad < -1) { - pr_err("gpib: bad address for serial poll"); + dev_err(board->gpib_dev, "bad address for serial poll"); return -1; } @@ -544,20 +538,16 @@ int ibopen(struct inode *inode, struct file *filep) priv = filep->private_data; init_gpib_file_private((gpib_file_private_t *)filep->private_data); - dev_dbg(board->gpib_dev, "pid %i, gpib: opening minor %d\n", current->pid, minor); - if (board->use_count == 0) { int retval; retval = request_module("gpib%i", minor); - if (retval) { - dev_dbg(board->gpib_dev, "pid %i, gpib: request module returned %i\n", - current->pid, retval); - } + if (retval) + dev_dbg(board->gpib_dev, "request module returned %i\n", retval); } if (board->interface) { if (!try_module_get(board->provider_module)) { - pr_err("gpib: try_module_get() failed\n"); + dev_err(board->gpib_dev, "try_module_get() failed\n"); return -EIO; } board->use_count++; @@ -580,21 +570,19 @@ int ibclose(struct inode *inode, struct file *filep) board = &board_array[minor]; - dev_dbg(board->gpib_dev, "pid %i, closing minor %d\n", current->pid, minor); - if (priv) { desc = handle_to_descriptor(priv, 0); if (desc) { if (desc->autopoll_enabled) { - dev_dbg(board->gpib_dev, "pid %i, decrementing autospollers\n", - current->pid); + dev_dbg(board->gpib_dev, "decrementing autospollers\n"); if (board->autospollers > 0) board->autospollers--; else - pr_err("gpib: Attempt to decrement zero autospollers\n"); + dev_err(board->gpib_dev, + "Attempt to decrement zero autospollers\n"); } } else { - pr_err("gpib: Unexpected null gpib_descriptor\n"); + dev_err(board->gpib_dev, "Unexpected null gpib_descriptor\n"); } cleanup_open_devices(priv, board); @@ -630,8 +618,8 @@ long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (mutex_lock_interruptible(&board->big_gpib_mutex)) return -ERESTARTSYS; - dev_dbg(board->gpib_dev, "pid %i, ioctl %d, interface=%s, use=%d, onl=%d\n", - current->pid, cmd & 0xff, + dev_dbg(board->gpib_dev, "ioctl %d, interface=%s, use=%d, onl=%d\n", + cmd & 0xff, board->interface ? board->interface->name : "", board->use_count, board->online); @@ -647,13 +635,13 @@ long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) break; } if (!board->interface) { - pr_err("gpib: no gpib board configured on /dev/gpib%i\n", minor); + dev_err(board->gpib_dev, "no gpib board configured\n"); retval = -ENODEV; goto done; } if (file_priv->got_module == 0) { if (!try_module_get(board->provider_module)) { - pr_err("gpib: try_module_get() failed\n"); + dev_err(board->gpib_dev, "try_module_get() failed\n"); retval = -EIO; goto done; } @@ -699,8 +687,6 @@ long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) } if (!board->online) { - pr_err("gpib: ioctl %i invalid for offline board\n", - cmd & 0xff); retval = -EINVAL; goto done; } @@ -737,8 +723,6 @@ long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) spin_lock(&board->locking_pid_spinlock); if (current->pid != board->locking_pid) { spin_unlock(&board->locking_pid_spinlock); - pr_err("gpib: need to hold board lock to perform ioctl %i\n", - cmd & 0xff); retval = -EPERM; goto done; } @@ -830,10 +814,8 @@ static int board_type_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (board->online) { - pr_err("gpib: can't change board type while board is online.\n"); + if (board->online) return -EBUSY; - } retval = copy_from_user(&cmd, (void __user *)arg, sizeof(board_type_ioctl_t)); if (retval) @@ -1140,8 +1122,8 @@ static int increment_open_device_count(gpib_board_t *board, struct list_head *he for (list_ptr = head->next; list_ptr != head; list_ptr = list_ptr->next) { device = list_entry(list_ptr, gpib_status_queue_t, list); if (gpib_address_equal(device->pad, device->sad, pad, sad)) { - dev_dbg(board->gpib_dev, "pid %i, incrementing open count for pad %i, sad %i\n", - current->pid, device->pad, device->sad); + dev_dbg(board->gpib_dev, "incrementing open count for pad %i, sad %i\n", + device->pad, device->sad); device->reference_count++; return 0; } @@ -1158,8 +1140,7 @@ static int increment_open_device_count(gpib_board_t *board, struct list_head *he list_add(&device->list, head); - dev_dbg(board->gpib_dev, "pid %i, opened pad %i, sad %i\n", - current->pid, device->pad, device->sad); + dev_dbg(board->gpib_dev, "opened pad %i, sad %i\n", device->pad, device->sad); return 0; } @@ -1173,23 +1154,23 @@ static int subtract_open_device_count(gpib_board_t *board, struct list_head *hea for (list_ptr = head->next; list_ptr != head; list_ptr = list_ptr->next) { device = list_entry(list_ptr, gpib_status_queue_t, list); if (gpib_address_equal(device->pad, device->sad, pad, sad)) { - dev_dbg(board->gpib_dev, "pid %i, decrementing open count for pad %i, sad %i\n", - current->pid, device->pad, device->sad); + dev_dbg(board->gpib_dev, "decrementing open count for pad %i, sad %i\n", + device->pad, device->sad); if (count > device->reference_count) { - pr_err("gpib: bug! in %s()\n", __func__); + dev_err(board->gpib_dev, "bug! in %s()\n", __func__); return -EINVAL; } device->reference_count -= count; if (device->reference_count == 0) { - dev_dbg(board->gpib_dev, "pid %i, closing pad %i, sad %i\n", - current->pid, device->pad, device->sad); + dev_dbg(board->gpib_dev, "closing pad %i, sad %i\n", + device->pad, device->sad); list_del(list_ptr); kfree(device); } return 0; } } - pr_err("gpib: bug! tried to close address that was never opened!\n"); + dev_err(board->gpib_dev, "bug! tried to close address that was never opened!\n"); return -EINVAL; } @@ -1306,8 +1287,6 @@ static int serial_poll_ioctl(gpib_board_t *board, unsigned long arg) serial_poll_ioctl_t serial_cmd; int retval; - dev_dbg(board->gpib_dev, "pid %i, entering %s()\n", current->pid, __func__); - retval = copy_from_user(&serial_cmd, (void __user *)arg, sizeof(serial_cmd)); if (retval) return -EFAULT; @@ -1639,11 +1618,12 @@ static int autospoll_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, board->autospollers--; retval = 0; } else { - pr_err("gpib: tried to set number of autospollers negative\n"); + dev_err(board->gpib_dev, + "tried to set number of autospollers negative\n"); retval = -EINVAL; } } else { - pr_err("gpib: autopoll disable requested before enable\n"); + dev_err(board->gpib_dev, "autopoll disable requested before enable\n"); retval = -EINVAL; } } @@ -1661,10 +1641,8 @@ static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, if (lock_mutex) { retval = mutex_lock_interruptible(&board->user_mutex); - if (retval) { - pr_warn("gpib: ioctl interrupted while waiting on lock\n"); + if (retval) return -ERESTARTSYS; - } spin_lock(&board->locking_pid_spinlock); board->locking_pid = current->pid; @@ -1672,13 +1650,12 @@ static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, atomic_set(&file_priv->holding_mutex, 1); - dev_dbg(board->gpib_dev, "pid %i, locked board %d mutex\n", - current->pid, board->minor); + dev_dbg(board->gpib_dev, "locked board mutex\n"); } else { spin_lock(&board->locking_pid_spinlock); if (current->pid != board->locking_pid) { - pr_err("gpib: bug! pid %i tried to release mutex held by pid %i\n", - current->pid, board->locking_pid); + dev_err(board->gpib_dev, "bug! pid %i tried to release mutex held by pid %i\n", + current->pid, board->locking_pid); spin_unlock(&board->locking_pid_spinlock); return -EPERM; } @@ -1688,8 +1665,7 @@ static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, atomic_set(&file_priv->holding_mutex, 0); mutex_unlock(&board->user_mutex); - dev_dbg(board->gpib_dev, "pid %i, unlocked board %i mutex\n", - current->pid, board->minor); + dev_dbg(board->gpib_dev, "unlocked board mutex\n"); } return 0; } @@ -1704,7 +1680,7 @@ static int timeout_ioctl(gpib_board_t *board, unsigned long arg) return -EFAULT; board->usec_timeout = timeout; - dev_dbg(board->gpib_dev, "pid %i, timeout set to %i usec\n", current->pid, timeout); + dev_dbg(board->gpib_dev, "timeout set to %i usec\n", timeout); return 0; } @@ -1744,10 +1720,8 @@ static int set_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg) if (retval) return -EFAULT; - if (!board->interface->local_parallel_poll_mode) { - pr_warn("gpib: local/remote parallel poll mode not supported by driver."); - return -EIO; - } + if (!board->interface->local_parallel_poll_mode) + return -ENOENT; board->local_ppoll_mode = cmd != 0; board->interface->local_parallel_poll_mode(board, board->local_ppoll_mode); @@ -1887,7 +1861,7 @@ static int push_gpib_event_nolock(gpib_board_t *board, short event_type) event = kmalloc(sizeof(gpib_event_t), GFP_ATOMIC); if (!event) { queue->dropped_event = 1; - pr_err("gpib: failed to allocate memory for event\n"); + dev_err(board->gpib_dev, "failed to allocate memory for event\n"); return -ENOMEM; } @@ -2007,10 +1981,8 @@ static int t1_delay_ioctl(gpib_board_t *board, unsigned long arg) unsigned int delay; int retval; - if (!board->interface->t1_delay) { - pr_warn("gpib: t1 delay not implemented in driver!\n"); - return -EIO; - } + if (!board->interface->t1_delay) + return -ENOENT; retval = copy_from_user(&cmd, (void __user *)arg, sizeof(cmd)); if (retval) @@ -2087,7 +2059,6 @@ void gpib_unregister_driver(gpib_interface_t *interface) kfree(entry); } } - pr_info("gpib: unregistered %s interface\n", interface->name); } EXPORT_SYMBOL(gpib_unregister_driver); @@ -2184,7 +2155,7 @@ static int __init gpib_common_init_module(void) { int i; - pr_info("Linux-GPIB core driver\n"); + pr_info("GPIB core driver\n"); init_board_array(board_array, GPIB_MAX_NUM_BOARDS); if (register_chrdev(GPIB_CODE, "gpib", &ib_fops)) { pr_err("gpib: can't get major %d\n", GPIB_CODE); diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c index 5f6fa135f505..fd2874c2fff4 100644 --- a/drivers/staging/gpib/common/iblib.c +++ b/drivers/staging/gpib/common/iblib.c @@ -4,6 +4,8 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define dev_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "ibsys.h" #include #include @@ -24,10 +26,8 @@ int ibcac(gpib_board_t *board, int sync, int fallback_to_async) int status = ibstatus(board); int retval; - if ((status & CIC) == 0) { - pr_err("gpib: not CIC during %s()\n", __func__); - return -1; - } + if ((status & CIC) == 0) + return -EINVAL; if (status & ATN) return 0; @@ -76,13 +76,6 @@ static int check_for_command_acceptors(gpib_board_t *board) if (lines < 0) return lines; - if (lines & ValidATN) { - if ((lines & BusATN) == 0) { - pr_err("gpib: ATN not asserted in %s()?", __func__); - return 0; - } - } - if ((lines & ValidNRFD) && (lines & ValidNDAC)) { if ((lines & BusNRFD) == 0 && (lines & BusNDAC) == 0) return -ENOTCONN; @@ -112,10 +105,8 @@ int ibcmd(gpib_board_t *board, uint8_t *buf, size_t length, size_t *bytes_writte status = ibstatus(board); - if ((status & CIC) == 0) { - pr_err("gpib: cannot send command when not controller-in-charge\n"); - return -EIO; - } + if ((status & CIC) == 0) + return -EINVAL; os_start_timer(board, board->usec_timeout); @@ -145,14 +136,10 @@ int ibgts(gpib_board_t *board) int status = ibstatus(board); int retval; - if ((status & CIC) == 0) { - pr_err("gpib: not CIC during %s()\n", __func__); - return -1; - } + if ((status & CIC) == 0) + return -EINVAL; retval = board->interface->go_to_standby(board); /* go to standby */ - if (retval < 0) - pr_err("gpib: error while going to standby\n"); board->interface->update_status(board, 0); @@ -200,16 +187,15 @@ static int autospoll_thread(void *board_void) retval = autopoll_all_devices(board); module_put(board->provider_module); } else { - pr_err("gpib%i: %s: try_module_get() failed!\n", board->minor, __func__); + dev_err(board->gpib_dev, "try_module_get() failed!\n"); } if (retval <= 0) { - pr_err("gpib%i: %s: stuck SRQ\n", board->minor, __func__); + dev_err(board->gpib_dev, "stuck SRQ\n"); atomic_set(&board->stuck_srq, 1); // XXX could be better set_bit(SRQI_NUM, &board->status); } } - pr_info("gpib%i: exiting autospoll thread\n", board->minor); return retval; } @@ -230,7 +216,6 @@ int ibonline(gpib_board_t *board) retval = board->interface->attach(board, &board->config); if (retval < 0) { board->interface->detach(board); - pr_err("gpib: interface attach failed\n"); return retval; } /* nios2nommu on 2.6.11 uclinux kernel has weird problems @@ -241,13 +226,13 @@ int ibonline(gpib_board_t *board) "gpib%d_autospoll_kthread", board->minor); retval = IS_ERR(board->autospoll_task); if (retval) { - pr_err("gpib: failed to create autospoll thread\n"); + dev_err(board->gpib_dev, "failed to create autospoll thread\n"); board->interface->detach(board); return retval; } #endif board->online = 1; - dev_dbg(board->gpib_dev, "gpib: board online\n"); + dev_dbg(board->gpib_dev, "board online\n"); return 0; } @@ -265,14 +250,14 @@ int iboffline(gpib_board_t *board) if (board->autospoll_task && !IS_ERR(board->autospoll_task)) { retval = kthread_stop(board->autospoll_task); if (retval) - pr_err("gpib: kthread_stop returned %i\n", retval); + dev_err(board->gpib_dev, "kthread_stop returned %i\n", retval); board->autospoll_task = NULL; } board->interface->detach(board); gpib_deallocate_board(board); board->online = 0; - dev_dbg(board->gpib_dev, "gpib: board offline\n"); + dev_dbg(board->gpib_dev, "board offline\n"); return 0; } @@ -320,10 +305,8 @@ int ibrd(gpib_board_t *board, uint8_t *buf, size_t length, int *end_flag, size_t *nbytes = 0; *end_flag = 0; - if (length == 0) { - pr_warn("gpib: %s() called with zero length?\n", __func__); + if (length == 0) return 0; - } if (board->master) { retval = ibgts(board); @@ -338,10 +321,9 @@ int ibrd(gpib_board_t *board, uint8_t *buf, size_t length, int *end_flag, size_t do { ret = board->interface->read(board, buf, length - *nbytes, end_flag, &bytes_read); - if (ret < 0) { - pr_err("gpib read error\n"); + if (ret < 0) goto ibrd_out; - } + buf += bytes_read; *nbytes += bytes_read; if (need_resched()) @@ -370,10 +352,8 @@ int ibrpp(gpib_board_t *board, uint8_t *result) if (retval) return -1; - if (board->interface->parallel_poll(board, result)) { - pr_err("gpib: parallel poll failed\n"); - retval = -1; - } + retval = board->interface->parallel_poll(board, result); + os_remove_timer(board); return retval; } @@ -392,10 +372,8 @@ int ibrsv2(gpib_board_t *board, uint8_t status_byte, int new_reason_for_service) int board_status = ibstatus(board); const unsigned int MSS = status_byte & request_service_bit; - if ((board_status & CIC)) { - pr_err("gpib: interface requested service while CIC\n"); + if ((board_status & CIC)) return -EINVAL; - } if (MSS == 0 && new_reason_for_service) return -EINVAL; @@ -424,19 +402,15 @@ int ibrsv2(gpib_board_t *board, uint8_t status_byte, int new_reason_for_service) */ int ibsic(gpib_board_t *board, unsigned int usec_duration) { - if (board->master == 0) { - pr_err("gpib: tried to assert IFC when not system controller\n"); - return -1; - } + if (board->master == 0) + return -EINVAL; if (usec_duration < 100) usec_duration = 100; - if (usec_duration > 1000) { + if (usec_duration > 1000) usec_duration = 1000; - pr_warn("gpib: warning, shortening long udelay\n"); - } - dev_dbg(board->gpib_dev, "sending interface clear\n"); + dev_dbg(board->gpib_dev, "sending interface clear, delay = %ius\n", usec_duration); board->interface->interface_clear(board, 1); udelay(usec_duration); board->interface->interface_clear(board, 0); @@ -444,14 +418,12 @@ int ibsic(gpib_board_t *board, unsigned int usec_duration) return 0; } + /* FIXME make int */ void ibrsc(gpib_board_t *board, int request_control) { board->master = request_control != 0; - if (!board->interface->request_system_control) { - pr_err("gpib: bug! driver does not implement request_system_control()\n"); - return; - } - board->interface->request_system_control(board, request_control); + if (board->interface->request_system_control) + board->interface->request_system_control(board, request_control); } /* @@ -460,10 +432,8 @@ void ibrsc(gpib_board_t *board, int request_control) */ int ibsre(gpib_board_t *board, int enable) { - if (board->master == 0) { - pr_err("gpib: tried to set REN when not system controller\n"); - return -1; - } + if (board->master == 0) + return -EINVAL; board->interface->remote_enable(board, enable); /* set or clear REN */ if (!enable) @@ -479,10 +449,9 @@ int ibsre(gpib_board_t *board, int enable) */ int ibpad(gpib_board_t *board, unsigned int addr) { - if (addr > MAX_GPIB_PRIMARY_ADDRESS) { - pr_err("gpib: invalid primary address %u\n", addr); - return -1; - } + if (addr > MAX_GPIB_PRIMARY_ADDRESS) + return -EINVAL; + board->pad = addr; if (board->online) board->interface->primary_address(board, board->pad); @@ -498,10 +467,8 @@ int ibpad(gpib_board_t *board, unsigned int addr) */ int ibsad(gpib_board_t *board, int addr) { - if (addr > MAX_GPIB_SECONDARY_ADDRESS) { - pr_err("gpib: invalid secondary address %i\n", addr); - return -1; - } + if (addr > MAX_GPIB_SECONDARY_ADDRESS) + return -EINVAL; board->sad = addr; if (board->online) { if (board->sad >= 0) @@ -523,10 +490,8 @@ int ibeos(gpib_board_t *board, int eos, int eosflags) { int retval; - if (eosflags & ~EOS_MASK) { - pr_err("bad EOS modes\n"); + if (eosflags & ~EOS_MASK) return -EINVAL; - } if (eosflags & REOS) { retval = board->interface->enable_eos(board, eos, eosflags & BIN); } else { @@ -717,10 +682,8 @@ int ibwrt(gpib_board_t *board, uint8_t *buf, size_t cnt, int send_eoi, size_t *b int ret = 0; int retval; - if (cnt == 0) { - pr_warn("gpib: %s() called with zero length?\n", __func__); + if (cnt == 0) return 0; - } if (board->master) { retval = ibgts(board); From f2bda0b660bd3759cdf54ad8823ddf0d4fc80c82 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:57 +0100 Subject: [PATCH 0298/2157] staging: gpib: fluke console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "fluke_gpib" in platform_driver struct. Remove commented printk's. Change pr_err to dev_err wherever possible Remove "fluke_gpib:" prefix in messages since this is printed with the module name. Correct dev_err message erroneously containing cb7210 identifier. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-7-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/eastwood/fluke_gpib.c | 90 +++++++--------------- 1 file changed, 27 insertions(+), 63 deletions(-) diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index d5b1a03abf11..012ce9cb8999 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -7,6 +7,10 @@ * copyright: (C) 2006, 2010, 2015 Fluke Corporation ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "fluke_gpib.h" #include "gpibP.h" @@ -263,9 +267,9 @@ static int wait_for_read(gpib_board_t *board) if (wait_event_interruptible(board->wait, lacs_or_read_ready(board) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) @@ -316,20 +320,17 @@ static int wait_for_data_out_ready(gpib_board_t *board) struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; int retval = 0; -// printk("%s: enter\n", __FUNCTION__); if (wait_event_interruptible(board->wait, (test_bit(TACS_NUM, &board->status) && source_handshake_is_sgns(e_priv)) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) retval = -EINTR; -// printk("%s: exit, retval=%i\n", __FUNCTION__, retval); return retval; } @@ -338,7 +339,6 @@ static int wait_for_sids_or_sgns(gpib_board_t *board) struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; int retval = 0; -// printk("%s: enter\n", __FUNCTION__); if (wait_event_interruptible(board->wait, source_handshake_is_sids_or_sgns(e_priv) || @@ -350,7 +350,6 @@ static int wait_for_sids_or_sgns(gpib_board_t *board) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) retval = -EINTR; -// printk("%s: exit, retval=%i\n", __FUNCTION__, retval); return retval; } @@ -362,7 +361,6 @@ static void fluke_dma_callback(void *arg) unsigned long flags; spin_lock_irqsave(&board->spinlock, flags); -// printk("%s: enter\n", __FUNCTION__); nec7210_set_reg_bits(nec_priv, IMR1, HR_DOIE | HR_DIIE, HR_DOIE | HR_DIIE); wake_up_interruptible(&board->wait); @@ -370,7 +368,7 @@ static void fluke_dma_callback(void *arg) fluke_gpib_internal_interrupt(board); clear_bit(DMA_WRITE_IN_PROGRESS_BN, &nec_priv->state); clear_bit(DMA_READ_IN_PROGRESS_BN, &nec_priv->state); -// printk("%s: exit\n", __FUNCTION__); + spin_unlock_irqrestore(&board->spinlock, flags); } @@ -385,7 +383,7 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, struct dma_async_tx_descriptor *tx_desc; *bytes_written = 0; -// printk("%s: enter\n", __FUNCTION__); + if (WARN_ON_ONCE(length > e_priv->dma_buffer_size)) return -EFAULT; dmaengine_terminate_all(e_priv->dma_channel); @@ -403,7 +401,7 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, tx_desc = dmaengine_prep_slave_single(e_priv->dma_channel, address, length, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!tx_desc) { - pr_err("fluke_gpib: failed to allocate dma transmit descriptor\n"); + dev_err(board->gpib_dev, "failed to allocate dma transmit descriptor\n"); retval = -ENOMEM; goto cleanup; } @@ -419,10 +417,8 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, clear_bit(WRITE_READY_BN, &nec_priv->state); set_bit(DMA_WRITE_IN_PROGRESS_BN, &nec_priv->state); - // printk("%s: in spin lock\n", __FUNCTION__); spin_unlock_irqrestore(&board->spinlock, flags); -// printk("%s: waiting for write.\n", __FUNCTION__); // suspend until message is sent if (wait_event_interruptible(board->wait, ((readl(e_priv->write_transfer_counter) & @@ -430,7 +426,6 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted!\n"); retval = -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) @@ -459,7 +454,6 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, cleanup: dma_unmap_single(board->dev, address, length, DMA_TO_DEVICE); -// printk("%s: exit, retval=%d\n", __FUNCTION__, retval); return retval; } @@ -474,7 +468,7 @@ static int fluke_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length size_t dma_remainder = remainder; if (!e_priv->dma_channel) { - pr_err("fluke_gpib: No dma channel available, cannot do accel write."); + dev_err(board->gpib_dev, "No dma channel available, cannot do accel write."); return -ENXIO; } @@ -486,7 +480,6 @@ static int fluke_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length if (send_eoi) --dma_remainder; -// printk("%s: entering while loop\n", __FUNCTION__); while (dma_remainder > 0) { size_t num_bytes; @@ -512,7 +505,7 @@ static int fluke_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length //handle sending of last byte with eoi if (send_eoi) { size_t num_bytes; - // printk("%s: handling last byte\n", __FUNCTION__); + if (WARN_ON_ONCE(remainder != 1)) return -EFAULT; @@ -533,7 +526,6 @@ static int fluke_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length return retval; remainder -= num_bytes; } -// printk("%s: bytes send=%i\n", __FUNCTION__, (int)(length - remainder)); return 0; } @@ -544,7 +536,7 @@ static int fluke_get_dma_residue(struct dma_chan *chan, dma_cookie_t cookie) result = dmaengine_pause(chan); if (result < 0) { - pr_err("fluke_gpib: dma pause failed?\n"); + pr_err("dma pause failed?\n"); return result; } dmaengine_tx_status(chan, cookie, &state); @@ -567,10 +559,6 @@ static int fluke_dma_read(gpib_board_t *board, uint8_t *buffer, int i; static const int timeout = 10; - // printk("%s: enter, bus_address=0x%x, length=%i\n", __FUNCTION__, - // (unsigned)bus_address, - // (int)length); - *bytes_read = 0; *end = 0; if (length == 0) @@ -589,7 +577,7 @@ static int fluke_dma_read(gpib_board_t *board, uint8_t *buffer, bus_address, length, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!tx_desc) { - pr_err("fluke_gpib: failed to allocate dma transmit descriptor\n"); + dev_err(board->gpib_dev, "failed to allocate dma transmit descriptor\n"); dma_unmap_single(NULL, bus_address, length, DMA_FROM_DEVICE); return -EIO; } @@ -608,14 +596,12 @@ static int fluke_dma_read(gpib_board_t *board, uint8_t *buffer, clear_bit(READ_READY_BN, &nec_priv->state); spin_unlock_irqrestore(&board->spinlock, flags); -// printk("waiting for data transfer.\n"); // wait for data to transfer if (wait_event_interruptible(board->wait, test_bit(DMA_READ_IN_PROGRESS_BN, &nec_priv->state) == 0 || test_bit(RECEIVED_END_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_warn("fluke: dma read wait interrupted\n"); retval = -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) @@ -682,10 +668,6 @@ static int fluke_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int retval = 0; size_t dma_nbytes; -/* printk("%s: enter, buffer=0x%p, length=%i\n", __FUNCTION__, - * buffer, (int)length); - * printk("\t dma_buffer=0x%p\n", e_priv->dma_buffer); - */ *end = 0; *bytes_read = 0; @@ -699,7 +681,6 @@ static int fluke_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, nec7210_release_rfd_holdoff(board, nec_priv); -// printk("%s: entering while loop\n", __FUNCTION__); while (remain > 0) { transfer_size = (e_priv->dma_buffer_size < remain) ? e_priv->dma_buffer_size : remain; @@ -709,14 +690,12 @@ static int fluke_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, *bytes_read += dma_nbytes; if (*end) break; - if (retval < 0) { -// printk("%s: early exit, retval=%i\n", __FUNCTION__, (int)retval); + if (retval < 0) return retval; - } if (need_resched()) schedule(); } -// printk("%s: exit, retval=%i\n", __FUNCTION__, (int)retval); + return retval; } @@ -830,13 +809,6 @@ irqreturn_t fluke_gpib_internal_interrupt(gpib_board_t *board) if (nec7210_interrupt_have_status(board, nec_priv, status1, status2) == IRQ_HANDLED) retval = IRQ_HANDLED; -/* - * if((status1 & nec_priv->reg_bits[IMR1]) || - * (status2 & (nec_priv->reg_bits[IMR2] & IMR2_ENABLE_INTR_MASK))) - * { - * printk("fluke: status1 0x%x, status2 0x%x\n", status1, status2); - * } - */ if (read_byte(nec_priv, ADR0) & DATA_IN_STATUS) { if (test_bit(RFD_HOLDOFF_BN, &nec_priv->state)) @@ -954,7 +926,7 @@ static int fluke_init(struct fluke_priv *e_priv, gpib_board_t *board, int handsh /* poll so we can detect ATN changes */ if (gpib_request_pseudo_irq(board, fluke_gpib_interrupt)) { - pr_err("fluke_gpib: failed to allocate pseudo_irq\n"); + dev_err(board->gpib_dev, "failed to allocate pseudo_irq\n"); return -EINVAL; } @@ -984,7 +956,7 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con dma_cap_mask_t dma_cap; if (!fluke_gpib_pdev) { - pr_err("No gpib platform device was found, attach failed.\n"); + dev_err(board->gpib_dev, "No fluke device was found, attach failed.\n"); return -ENODEV; } @@ -999,7 +971,7 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con res = platform_get_resource(fluke_gpib_pdev, IORESOURCE_MEM, 0); if (!res) { - dev_err(&fluke_gpib_pdev->dev, "Unable to locate mmio resource for cb7210 gpib\n"); + dev_err(&fluke_gpib_pdev->dev, "Unable to locate mmio resource\n"); return -ENODEV; } @@ -1012,10 +984,7 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con e_priv->gpib_iomem_res = res; nec_priv->mmiobase = ioremap(e_priv->gpib_iomem_res->start, - resource_size(e_priv->gpib_iomem_res)); - pr_info("gpib: mmiobase %llx remapped to %p, length=%d\n", - (u64)e_priv->gpib_iomem_res->start, - nec_priv->mmiobase, (int)resource_size(e_priv->gpib_iomem_res)); + resource_size(e_priv->gpib_iomem_res)); if (!nec_priv->mmiobase) { dev_err(&fluke_gpib_pdev->dev, "Could not map I/O memory\n"); return -ENOMEM; @@ -1050,19 +1019,14 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con e_priv->write_transfer_counter = ioremap(e_priv->write_transfer_counter_res->start, resource_size(e_priv->write_transfer_counter_res)); - pr_info("gpib: write transfer counter %lx remapped to %p, length=%d\n", - (unsigned long)e_priv->write_transfer_counter_res->start, - e_priv->write_transfer_counter, - (int)resource_size(e_priv->write_transfer_counter_res)); if (!e_priv->write_transfer_counter) { dev_err(&fluke_gpib_pdev->dev, "Could not map I/O memory\n"); return -ENOMEM; } irq = platform_get_irq(fluke_gpib_pdev, 0); - pr_info("gpib: irq %d\n", irq); if (irq < 0) { - dev_err(&fluke_gpib_pdev->dev, "fluke_gpib: request for IRQ failed\n"); + dev_err(&fluke_gpib_pdev->dev, "failed to obtain IRQ\n"); return -EBUSY; } retval = request_irq(irq, fluke_gpib_interrupt, isr_flags, fluke_gpib_pdev->name, board); @@ -1078,7 +1042,7 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con dma_cap_set(DMA_SLAVE, dma_cap); e_priv->dma_channel = dma_request_channel(dma_cap, gpib_dma_channel_filter, NULL); if (!e_priv->dma_channel) { - pr_err("fluke_gpib: failed to allocate a dma channel.\n"); + dev_err(board->gpib_dev, "failed to allocate a dma channel.\n"); // we don't error out here because unaccel interface will still // work without dma } @@ -1142,7 +1106,7 @@ MODULE_DEVICE_TABLE(of, fluke_gpib_of_match); static struct platform_driver fluke_gpib_platform_driver = { .driver = { - .name = "fluke_gpib", + .name = DRV_NAME, .of_match_table = fluke_gpib_of_match, }, .probe = &fluke_gpib_probe @@ -1154,25 +1118,25 @@ static int __init fluke_init_module(void) result = platform_driver_register(&fluke_gpib_platform_driver); if (result) { - pr_err("fluke_gpib: platform_driver_register failed: error = %d\n", result); + pr_err("platform_driver_register failed: error = %d\n", result); return result; } result = gpib_register_driver(&fluke_unaccel_interface, THIS_MODULE); if (result) { - pr_err("fluke_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_unaccel; } result = gpib_register_driver(&fluke_hybrid_interface, THIS_MODULE); if (result) { - pr_err("fluke_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_hybrid; } result = gpib_register_driver(&fluke_interface, THIS_MODULE); if (result) { - pr_err("fluke_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_interface; } From acdf4581545b83e9eeb5663fd004e745106f8818 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:58 +0100 Subject: [PATCH 0299/2157] staging: gpib: fmh console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "fmh_gpib" in platform_driver and pci_driver structs. Remove commented printk's. Change pr_err to dev_err wherever possible Remove "fmh_gpib_gpib:" prefix in messages since this is printed with the module name. Remove write interrupted dev_dbg messages. Remove read wait interrupted pr_warn. Change dev_notice attach to dev_dbg Change dev_info to dev_dbg. Correct dev_err message erroneously containing cb7210 identifier. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-8-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 140 +++++++++-------------- 1 file changed, 53 insertions(+), 87 deletions(-) diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index f950e7cdd8f8..d62c83368518 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -12,6 +12,10 @@ * (C) 2017 Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "fmh_gpib.h" #include "gpibP.h" @@ -334,7 +338,6 @@ static int wait_for_data_out_ready(gpib_board_t *board) struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; int retval = 0; -// printk("%s: enter\n", __FUNCTION__); if (wait_event_interruptible(board->wait, (test_bit(TACS_NUM, &board->status) && @@ -348,7 +351,7 @@ static int wait_for_data_out_ready(gpib_board_t *board) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) retval = -EINTR; -// printk("%s: exit, retval=%i\n", __FUNCTION__, retval); + return retval; } @@ -360,7 +363,6 @@ static void fmh_gpib_dma_callback(void *arg) unsigned long flags; spin_lock_irqsave(&board->spinlock, flags); -// printk("%s: enter\n", __FUNCTION__); nec7210_set_reg_bits(nec_priv, IMR1, HR_DOIE | HR_DIIE, HR_DOIE | HR_DIIE); wake_up_interruptible(&board->wait); @@ -370,7 +372,6 @@ static void fmh_gpib_dma_callback(void *arg) clear_bit(DMA_WRITE_IN_PROGRESS_BN, &nec_priv->state); clear_bit(DMA_READ_IN_PROGRESS_BN, &nec_priv->state); - // printk("%s: exit\n", __FUNCTION__); spin_unlock_irqrestore(&board->spinlock, flags); } @@ -399,14 +400,13 @@ static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t lengt struct dma_async_tx_descriptor *tx_desc; *bytes_written = 0; -// printk("%s: enter\n", __FUNCTION__); if (WARN_ON_ONCE(length > e_priv->dma_buffer_size)) return -EFAULT; dmaengine_terminate_all(e_priv->dma_channel); memcpy(e_priv->dma_buffer, buffer, length); address = dma_map_single(board->dev, e_priv->dma_buffer, length, DMA_TO_DEVICE); if (dma_mapping_error(board->dev, address)) - pr_err("dma mapping error in dma write!\n"); + dev_err(board->gpib_dev, "dma mapping error in dma write!\n"); /* program dma controller */ retval = fmh_gpib_config_dma(board, 1); if (retval) @@ -415,7 +415,7 @@ static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t lengt tx_desc = dmaengine_prep_slave_single(e_priv->dma_channel, address, length, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!tx_desc) { - pr_err("fmh_gpib_gpib: failed to allocate dma transmit descriptor\n"); + dev_err(board->gpib_dev, "failed to allocate dma transmit descriptor\n"); retval = -ENOMEM; goto cleanup; } @@ -432,19 +432,17 @@ static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t lengt dma_async_issue_pending(e_priv->dma_channel); clear_bit(WRITE_READY_BN, &nec_priv->state); set_bit(DMA_WRITE_IN_PROGRESS_BN, &nec_priv->state); -// printk("%s: in spin lock\n", __FUNCTION__); + spin_unlock_irqrestore(&board->spinlock, flags); -// printk("%s: waiting for write.\n", __FUNCTION__); // suspend until message is sent if (wait_event_interruptible(board->wait, fmh_gpib_all_bytes_are_sent(e_priv) || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted!\n"); + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) @@ -464,12 +462,8 @@ static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t lengt fifo_xfer_counter_mask); if (WARN_ON_ONCE(*bytes_written > length)) return -EFAULT; - /* printk("length=%i, *bytes_written=%i, residue=%i, retval=%i\n", - * length, *bytes_written, get_dma_residue(e_priv->dma_channel), retval); - */ cleanup: dma_unmap_single(board->dev, address, length, DMA_TO_DEVICE); -// printk("%s: exit, retval=%d\n", __FUNCTION__, retval); return retval; } @@ -484,7 +478,7 @@ static int fmh_gpib_accel_write(gpib_board_t *board, uint8_t *buffer, size_t dma_remainder = remainder; if (!e_priv->dma_channel) { - pr_err("fmh_gpib_gpib: No dma channel available, cannot do accel write."); + dev_err(board->gpib_dev, "No dma channel available, cannot do accel write."); return -ENXIO; } @@ -498,7 +492,6 @@ static int fmh_gpib_accel_write(gpib_board_t *board, uint8_t *buffer, if (send_eoi) --dma_remainder; -// printk("%s: entering while loop\n", __FUNCTION__); while (dma_remainder > 0) { size_t num_bytes; @@ -524,7 +517,7 @@ static int fmh_gpib_accel_write(gpib_board_t *board, uint8_t *buffer, //handle sending of last byte with eoi if (send_eoi) { size_t num_bytes; - // printk("%s: handling last byte\n", __FUNCTION__); + if (WARN_ON_ONCE(remainder != 1)) return -EFAULT; @@ -545,7 +538,6 @@ static int fmh_gpib_accel_write(gpib_board_t *board, uint8_t *buffer, return retval; remainder -= num_bytes; } -// printk("%s: bytes send=%i\n", __FUNCTION__, (int)(length - remainder)); return 0; } @@ -556,7 +548,7 @@ static int fmh_gpib_get_dma_residue(struct dma_chan *chan, dma_cookie_t cookie) result = dmaengine_pause(chan); if (result < 0) { - pr_err("fmh_gpib_gpib: dma pause failed?\n"); + pr_err("dma pause failed?\n"); return result; } dmaengine_tx_status(chan, cookie, &state); @@ -570,7 +562,6 @@ static int wait_for_tx_fifo_half_empty(gpib_board_t *board) struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; int retval = 0; -// printk("%s: enter\n", __FUNCTION__); if (wait_event_interruptible(board->wait, (test_bit(TACS_NUM, &board->status) && @@ -584,7 +575,7 @@ static int wait_for_tx_fifo_half_empty(gpib_board_t *board) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) retval = -EINTR; -// printk("%s: exit, retval=%i\n", __FUNCTION__, retval); + return retval; } @@ -600,7 +591,6 @@ static int fmh_gpib_fifo_write_countable(gpib_board_t *board, uint8_t *buffer, unsigned int remainder; *bytes_written = 0; -// printk("%s: enter\n", __FUNCTION__); if (WARN_ON_ONCE(length > fifo_xfer_counter_mask)) return -EFAULT; @@ -635,10 +625,9 @@ static int fmh_gpib_fifo_write_countable(gpib_board_t *board, uint8_t *buffer, fmh_gpib_all_bytes_are_sent(e_priv) || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted!\n"); + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &nec_priv->state)) @@ -655,11 +644,7 @@ static int fmh_gpib_fifo_write_countable(gpib_board_t *board, uint8_t *buffer, fifo_xfer_counter_mask); if (WARN_ON_ONCE(*bytes_written > length)) return -EFAULT; - /* printk("length=%i, *bytes_written=%i, residue=%i, retval=%i\n", - * length, *bytes_written, get_dma_residue(e_priv->dma_channel), retval); - */ -// printk("%s: exit, retval=%d\n", __FUNCTION__, retval); return retval; } @@ -678,8 +663,6 @@ static int fmh_gpib_fifo_write(gpib_board_t *board, uint8_t *buffer, size_t leng clear_bit(DEV_CLEAR_BN, &nec_priv->state); // XXX FIXME -// printk("%s: entering while loop\n", __FUNCTION__); - while (remainder > 0) { size_t num_bytes; int last_pass; @@ -708,7 +691,7 @@ static int fmh_gpib_fifo_write(gpib_board_t *board, uint8_t *buffer, size_t leng if (need_resched()) schedule(); } -// printk("%s: bytes send=%i\n", __FUNCTION__, (int)(length - remainder)); + return retval; } @@ -725,10 +708,6 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, struct dma_async_tx_descriptor *tx_desc; dma_cookie_t dma_cookie; - // printk("%s: enter, bus_address=0x%x, length=%i\n", __FUNCTION__, - //(unsigned)bus_address, -// (int)length); - *bytes_read = 0; *end = 0; if (length == 0) @@ -737,7 +716,7 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, bus_address = dma_map_single(board->dev, e_priv->dma_buffer, length, DMA_FROM_DEVICE); if (dma_mapping_error(board->dev, bus_address)) - pr_err("dma mapping error in dma read!"); + dev_err(board->gpib_dev, "dma mapping error in dma read!"); /* program dma controller */ retval = fmh_gpib_config_dma(board, 0); @@ -749,7 +728,7 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, length, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!tx_desc) { - pr_err("fmh_gpib_gpib: failed to allocate dma transmit descriptor\n"); + dev_err(board->gpib_dev, "failed to allocate dma transmit descriptor\n"); dma_unmap_single(board->dev, bus_address, length, DMA_FROM_DEVICE); return -EIO; } @@ -769,7 +748,7 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, set_bit(DMA_READ_IN_PROGRESS_BN, &nec_priv->state); spin_unlock_irqrestore(&board->spinlock, flags); -// printk("waiting for data transfer.\n"); + // wait for data to transfer wait_retval = wait_event_interruptible(board->wait, test_bit(DMA_READ_IN_PROGRESS_BN, &nec_priv->state) @@ -777,10 +756,9 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, test_bit(RECEIVED_END_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status)); - if (wait_retval) { - pr_warn("fmh_gpib: dma read wait interrupted\n"); + if (wait_retval) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) @@ -825,8 +803,6 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, *end = 1; } spin_unlock_irqrestore(&board->spinlock, flags); -// printk("\tbytes_read=%i, residue=%i, end=%i, retval=%i, wait_retval=%i\n", -// *bytes_read, residue, *end, retval, wait_retval); return retval; } @@ -925,10 +901,6 @@ static int fmh_gpib_fifo_read_countable(gpib_board_t *board, uint8_t *buffer, struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; int retval = 0; - // printk("%s: enter, bus_address=0x%x, length=%i\n", __FUNCTION__, - // (unsigned)bus_address, -// (int)length); - *bytes_read = 0; *end = 0; if (length == 0) @@ -977,9 +949,6 @@ static int fmh_gpib_fifo_read_countable(gpib_board_t *board, uint8_t *buffer, *end = 1; } -// printk("\tbytes_read=%i, end=%i, retval=%i, wait_retval=%i\n", -// *bytes_read, *end, retval, wait_retval); - return retval; } @@ -1376,7 +1345,7 @@ static int fmh_gpib_device_match(struct device *dev, const void *data) if (config->serial_number) return 0; - dev_notice(dev, "matched: %s\n", of_node_full_name(dev_of_node((dev)))); + dev_dbg(dev, "matched: %s\n", of_node_full_name(dev_of_node((dev)))); return 1; } @@ -1393,7 +1362,7 @@ static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t * board->dev = driver_find_device(&fmh_gpib_platform_driver.driver, NULL, (const void *)config, &fmh_gpib_device_match); if (!board->dev) { - pr_err("No matching fmh_gpib_core device was found, attach failed."); + dev_err(board->gpib_dev, "No matching fmh_gpib_core device was found, attach failed."); return -ENODEV; } // currently only used to mark the device as already attached @@ -1409,7 +1378,7 @@ static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gpib_control_status"); if (!res) { - dev_err(board->dev, "Unable to locate mmio resource for cb7210 gpib\n"); + dev_err(board->dev, "Unable to locate mmio resource\n"); return -ENODEV; } @@ -1422,13 +1391,13 @@ static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t * e_priv->gpib_iomem_res = res; nec_priv->mmiobase = ioremap(e_priv->gpib_iomem_res->start, - resource_size(e_priv->gpib_iomem_res)); + resource_size(e_priv->gpib_iomem_res)); if (!nec_priv->mmiobase) { - dev_err(board->dev, "Could not map I/O memory for gpib\n"); + dev_err(board->dev, "Could not map I/O memory\n"); return -ENOMEM; } - dev_info(board->dev, "iobase %pr remapped to %p\n", - e_priv->gpib_iomem_res, nec_priv->mmiobase); + dev_dbg(board->dev, "iobase %pr remapped to %p\n", + e_priv->gpib_iomem_res, nec_priv->mmiobase); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dma_fifos"); if (!res) { @@ -1448,14 +1417,13 @@ static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t * dev_err(board->dev, "Could not map I/O memory for fifos\n"); return -ENOMEM; } - dev_info(board->dev, "dma fifos 0x%lx remapped to %p, length=%ld\n", - (unsigned long)e_priv->dma_port_res->start, e_priv->fifo_base, - (unsigned long)resource_size(e_priv->dma_port_res)); + dev_dbg(board->dev, "dma fifos 0x%lx remapped to %p, length=%ld\n", + (unsigned long)e_priv->dma_port_res->start, e_priv->fifo_base, + (unsigned long)resource_size(e_priv->dma_port_res)); irq = platform_get_irq(pdev, 0); - pr_info("gpib: irq %d\n", irq); if (irq < 0) { - dev_err(board->dev, "fmh_gpib_gpib: request for IRQ failed\n"); + dev_err(board->dev, "request for IRQ failed\n"); return -EBUSY; } retval = request_irq(irq, fmh_gpib_interrupt, IRQF_SHARED, pdev->name, board); @@ -1546,7 +1514,7 @@ static int fmh_gpib_pci_attach_impl(gpib_board_t *board, const gpib_board_config pci_device = gpib_pci_get_device(config, BOGUS_PCI_VENDOR_ID_FLUKE, BOGUS_PCI_DEVICE_ID_FLUKE_BLADERUNNER, NULL); if (!pci_device) { - pr_err("No matching fmh_gpib_core pci device was found, attach failed."); + dev_err(board->gpib_dev, "No matching fmh_gpib_core pci device was found, attach failed."); return -ENODEV; } board->dev = &pci_device->dev; @@ -1563,34 +1531,32 @@ static int fmh_gpib_pci_attach_impl(gpib_board_t *board, const gpib_board_config return -EIO; } e_priv->gpib_iomem_res = &pci_device->resource[gpib_control_status_pci_resource_index]; - e_priv->dma_port_res = &pci_device->resource[gpib_fifo_pci_resource_index]; + e_priv->dma_port_res = &pci_device->resource[gpib_fifo_pci_resource_index]; nec_priv->mmiobase = ioremap(pci_resource_start(pci_device, - gpib_control_status_pci_resource_index), - pci_resource_len(pci_device, - gpib_control_status_pci_resource_index)); - dev_info(board->dev, "base address for gpib control/status registers remapped to 0x%p\n", - nec_priv->mmiobase); + gpib_control_status_pci_resource_index), + pci_resource_len(pci_device, + gpib_control_status_pci_resource_index)); + dev_dbg(board->dev, "base address for gpib control/status registers remapped to 0x%p\n", + nec_priv->mmiobase); if (e_priv->dma_port_res->flags & IORESOURCE_MEM) { e_priv->fifo_base = ioremap(pci_resource_start(pci_device, gpib_fifo_pci_resource_index), pci_resource_len(pci_device, gpib_fifo_pci_resource_index)); - dev_info(board->dev, "base address for gpib fifo registers remapped to 0x%p\n", - e_priv->fifo_base); + dev_dbg(board->dev, "base address for gpib fifo registers remapped to 0x%p\n", + e_priv->fifo_base); } else { e_priv->fifo_base = NULL; - dev_info(board->dev, "hardware has no gpib fifo registers.\n"); + dev_dbg(board->dev, "hardware has no gpib fifo registers.\n"); } if (pci_device->irq) { retval = request_irq(pci_device->irq, fmh_gpib_interrupt, IRQF_SHARED, KBUILD_MODNAME, board); if (retval) { - dev_err(board->dev, - "cannot register interrupt handler err=%d\n", - retval); + dev_err(board->dev, "cannot register interrupt handler err=%d\n", retval); return retval; } } @@ -1615,7 +1581,7 @@ int fmh_gpib_pci_attach_holdoff_end(gpib_board_t *board, const gpib_board_config retval = fmh_gpib_pci_attach_impl(board, config, HR_HLDE); e_priv = board->private_data; if (retval == 0 && e_priv && e_priv->supports_fifo_interrupts == 0) { - pr_err("fmh_gpib: your fmh_gpib_core does not appear to support fifo interrupts. Try the fmh_gpib_pci_unaccel board type instead."); + dev_err(board->gpib_dev, "your fmh_gpib_core does not appear to support fifo interrupts. Try the fmh_gpib_pci_unaccel board type instead."); return -EIO; } return retval; @@ -1662,7 +1628,7 @@ MODULE_DEVICE_TABLE(of, fmh_gpib_of_match); static struct platform_driver fmh_gpib_platform_driver = { .driver = { - .name = "fmh_gpib", + .name = DRV_NAME, .owner = THIS_MODULE, .of_match_table = fmh_gpib_of_match, }, @@ -1681,7 +1647,7 @@ static const struct pci_device_id fmh_gpib_pci_match[] = { MODULE_DEVICE_TABLE(pci, fmh_gpib_pci_match); static struct pci_driver fmh_gpib_pci_driver = { - .name = "fmh_gpib", + .name = DRV_NAME, .id_table = fmh_gpib_pci_match, .probe = &fmh_gpib_pci_probe }; @@ -1692,37 +1658,37 @@ static int __init fmh_gpib_init_module(void) result = platform_driver_register(&fmh_gpib_platform_driver); if (result) { - pr_err("fmh_gpib: platform_driver_register failed: error = %d\n", result); + pr_err("platform_driver_register failed: error = %d\n", result); return result; } result = pci_register_driver(&fmh_gpib_pci_driver); if (result) { - pr_err("fmh_gpib: pci_register_driver failed: error = %d\n", result); + pr_err("pci_register_driver failed: error = %d\n", result); goto err_pci_driver; } result = gpib_register_driver(&fmh_gpib_unaccel_interface, THIS_MODULE); if (result) { - pr_err("fmh_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_unaccel; } result = gpib_register_driver(&fmh_gpib_interface, THIS_MODULE); if (result) { - pr_err("fmh_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_interface; } result = gpib_register_driver(&fmh_gpib_pci_unaccel_interface, THIS_MODULE); if (result) { - pr_err("fmh_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pci_unaccel; } result = gpib_register_driver(&fmh_gpib_pci_interface, THIS_MODULE); if (result) { - pr_err("fmh_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pci; } From df2e3152f1cb798ed8ffa7e488c50261e6dc50e3 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:46:59 +0100 Subject: [PATCH 0300/2157] staging: gpib: gpio bitbang console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Change pr_info in the dbg_printk macro to dev_dbg. In order for dbg_printk macro to have the board variable defined the signatures and calls to bb_buffer_print and set_atn were changed to include board as a parameter. Remove the #ifdef CONFIG_GPIB_DEBUG code. Remove commented dbk_printk's. Change dbg_printk(0, to dev_err where an error message is needed. Remove dbg_printk for "not implemented" functions. Remove "gpib_bitbang:" prefix in pr_err as it will be printed with the module name. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-9-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/gpio/gpib_bitbang.c | 56 ++++++++++-------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/staging/gpib/gpio/gpib_bitbang.c b/drivers/staging/gpib/gpio/gpib_bitbang.c index 828c99ea613f..2012db188f58 100644 --- a/drivers/staging/gpib/gpio/gpib_bitbang.c +++ b/drivers/staging/gpib/gpio/gpib_bitbang.c @@ -25,6 +25,8 @@ * device support (non master operation) */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt #define NAME KBUILD_MODNAME #define ENABLE_IRQ(IRQ, TYPE) irq_set_irq_type(IRQ, TYPE) @@ -41,7 +43,7 @@ */ #define dbg_printk(level, frm, ...) \ do { if (debug >= (level)) \ - pr_info("%s:%s - " frm, NAME, __func__, ## __VA_ARGS__); } \ + dev_dbg(board->gpib_dev, frm, ## __VA_ARGS__); } \ while (0) #define LINVAL gpiod_get_value(DAV), \ @@ -316,13 +318,14 @@ struct bb_priv { }; static inline long usec_diff(struct timespec64 *a, struct timespec64 *b); -static void bb_buffer_print(unsigned char *buffer, size_t length, int cmd, int eoi); +static void bb_buffer_print(gpib_board_t *board, unsigned char *buffer, size_t length, + int cmd, int eoi); static void set_data_lines(u8 byte); static u8 get_data_lines(void); static void set_data_lines_input(void); static void set_data_lines_output(void); static inline int check_for_eos(struct bb_priv *priv, uint8_t byte); -static void set_atn(struct bb_priv *priv, int atn_asserted); +static void set_atn(gpib_board_t *board, int atn_asserted); static inline void SET_DIR_WRITE(struct bb_priv *priv); static inline void SET_DIR_READ(struct bb_priv *priv); @@ -334,11 +337,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB helper functions for bitbanging I/O"); /**** global variables ****/ -#ifdef CONFIG_GPIB_DEBUG -static int debug = 1; -#else static int debug; -#endif module_param(debug, int, 0644); static char printable(char x) @@ -508,7 +507,7 @@ static int bb_write(gpib_board_t *board, uint8_t *buffer, size_t length, board, mutex_is_locked(&board->user_mutex), length); if (debug > 1) - bb_buffer_print(buffer, length, priv->cmd, send_eoi); + bb_buffer_print(board, buffer, length, priv->cmd, send_eoi); priv->count = 0; priv->phase = 300; @@ -550,7 +549,6 @@ static int bb_write(gpib_board_t *board, uint8_t *buffer, size_t length, dbg_printk(1, "timeout after %zu/%zu at %d " LINFMT " eoi: %d\n", priv->w_cnt, length, priv->phase, LINVAL, send_eoi); } else { - // dbg_printk(1,"written %zu\n", priv->w_cnt); retval = priv->w_cnt; } } else { @@ -811,7 +809,8 @@ static char *cmd_string[32] = { "CFE" // 0x1f }; -static void bb_buffer_print(unsigned char *buffer, size_t length, int cmd, int eoi) +static void bb_buffer_print(gpib_board_t *board, unsigned char *buffer, size_t length, + int cmd, int eoi) { int i; @@ -843,11 +842,13 @@ static void bb_buffer_print(unsigned char *buffer, size_t length, int cmd, int e * STATUS Management * * * ***************************************************************************/ -static void set_atn(struct bb_priv *priv, int atn_asserted) +static void set_atn(gpib_board_t *board, int atn_asserted) { + struct bb_priv *priv = board->private_data; + if (priv->listener_state != listener_idle && priv->talker_state != talker_idle) { - dbg_printk(0, "listener/talker state machine conflict\n"); + dev_err(board->gpib_dev, "listener/talker state machine conflict\n"); } if (atn_asserted) { if (priv->listener_state == listener_active) @@ -869,7 +870,7 @@ static void set_atn(struct bb_priv *priv, int atn_asserted) static int bb_take_control(gpib_board_t *board, int synchronous) { dbg_printk(2, "%d\n", synchronous); - set_atn(board->private_data, 1); + set_atn(board, 1); set_bit(CIC_NUM, &board->status); return 0; } @@ -877,7 +878,7 @@ static int bb_take_control(gpib_board_t *board, int synchronous) static int bb_go_to_standby(gpib_board_t *board) { dbg_printk(2, "\n"); - set_atn(board->private_data, 0); + set_atn(board, 0); return 0; } @@ -988,13 +989,11 @@ static int bb_secondary_address(gpib_board_t *board, unsigned int address, int e static int bb_parallel_poll(gpib_board_t *board, uint8_t *result) { - dbg_printk(1, "%s\n", "not implemented"); - return -EPERM; + return -ENOENT; } static void bb_parallel_poll_configure(gpib_board_t *board, uint8_t config) { - dbg_printk(1, "%s\n", "not implemented"); } static void bb_parallel_poll_response(gpib_board_t *board, int ist) @@ -1003,13 +1002,11 @@ static void bb_parallel_poll_response(gpib_board_t *board, int ist) static void bb_serial_poll_response(gpib_board_t *board, uint8_t status) { - dbg_printk(1, "%s\n", "not implemented"); } static uint8_t bb_serial_poll_status(gpib_board_t *board) { - dbg_printk(1, "%s\n", "not implemented"); - return 0; // -ENOSYS; + return 0; // -ENOENT; } static unsigned int bb_t1_delay(gpib_board_t *board, unsigned int nano_sec) @@ -1030,15 +1027,12 @@ static unsigned int bb_t1_delay(gpib_board_t *board, unsigned int nano_sec) static void bb_return_to_local(gpib_board_t *board) { - dbg_printk(1, "%s\n", "not implemented"); } static int bb_line_status(const gpib_board_t *board) { int line_status = ValidALL; -// dbg_printk(1,"\n"); - if (gpiod_get_value(REN) == 0) line_status |= BusREN; if (gpiod_get_value(IFC) == 0) @@ -1091,11 +1085,11 @@ static int bb_get_irq(gpib_board_t *board, char *name, *irq = gpiod_to_irq(gpio); dbg_printk(2, "IRQ %s: %d\n", name, *irq); if (*irq < 0) { - dbg_printk(0, "gpib: can't get IRQ for %s\n", name); + dev_err(board->gpib_dev, "can't get IRQ for %s\n", name); return -1; } if (request_threaded_irq(*irq, handler, thread_fn, flags, name, board)) { - dbg_printk(0, "gpib: can't request IRQ for %s %d\n", name, *irq); + dev_err(board->gpib_dev, "can't request IRQ for %s %d\n", name, *irq); *irq = 0; return -1; } @@ -1163,8 +1157,8 @@ static int allocate_gpios(gpib_board_t *board) gpiod_add_lookup_table(lookup_table); goto try_again; } - dbg_printk(0, "Unable to obtain gpio descriptor for pin %d error %ld\n", - gpios_vector[j], PTR_ERR(desc)); + dev_err(board->gpib_dev, "Unable to obtain gpio descriptor for pin %d error %ld\n", + gpios_vector[j], PTR_ERR(desc)); error = true; break; } @@ -1253,7 +1247,7 @@ static int bb_attach(gpib_board_t *board, const gpib_board_config_t *config) gpios_vector[&(DC) - &all_descriptors[0]] = -1; gpios_vector[&(ACT_LED) - &all_descriptors[0]] = -1; } else { - dbg_printk(0, "Unrecognized pin mapping.\n"); + dev_err(board->gpib_dev, "Unrecognized pin map %s\n", pin_map); goto bb_attach_fail; } dbg_printk(0, "Using pin map \"%s\" %s\n", pin_map, (sn7516x) ? @@ -1344,19 +1338,15 @@ static int __init bb_init_module(void) int result = gpib_register_driver(&bb_interface, THIS_MODULE); if (result) { - pr_err("gpib_bitbang: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); return result; } - dbg_printk(0, "module loaded with pin map \"%s\"%s\n", - pin_map, (sn7516x_used) ? " and SN7516x driver support" : ""); return 0; } static void __exit bb_exit_module(void) { - dbg_printk(0, "module unloaded!"); - gpib_unregister_driver(&bb_interface); } From b51f7fc2e55a9775a66b94f3f9e87b4b2cb42a69 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:00 +0100 Subject: [PATCH 0301/2157] staging: gpib: hp82335 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "hp82335" in request_irq Change pr_err to dev_err wherever possible Remove pr_info's Remove "hp83335:" prefix in messages since this is printed with the module name. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-10-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82335/hp82335.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/staging/gpib/hp_82335/hp82335.c b/drivers/staging/gpib/hp_82335/hp82335.c index 451d5dc6d340..982544d1b382 100644 --- a/drivers/staging/gpib/hp_82335/hp82335.c +++ b/drivers/staging/gpib/hp_82335/hp82335.c @@ -8,6 +8,10 @@ * implement recovery from bus errors (if necessary) */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "hp82335.h" #include #include @@ -274,26 +278,23 @@ static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config case 0xfc000: break; default: - pr_err("hp82335: invalid base io address 0x%u\n", config->ibbase); + dev_err(board->gpib_dev, "invalid base io address 0x%x\n", config->ibbase); return -EINVAL; } if (!request_mem_region(upper_iomem_base, hp82335_upper_iomem_size, "hp82335")) { - pr_err("hp82335: failed to allocate io memory region 0x%lx-0x%lx\n", - upper_iomem_base, upper_iomem_base + hp82335_upper_iomem_size - 1); + dev_err(board->gpib_dev, "failed to allocate io memory region 0x%lx-0x%lx\n", + upper_iomem_base, upper_iomem_base + hp82335_upper_iomem_size - 1); return -EBUSY; } hp_priv->raw_iobase = upper_iomem_base; tms_priv->mmiobase = ioremap(upper_iomem_base, hp82335_upper_iomem_size); - pr_info("hp82335: upper half of 82335 iomem region 0x%lx remapped to 0x%p\n", - hp_priv->raw_iobase, tms_priv->mmiobase); - retval = request_irq(config->ibirq, hp82335_interrupt, 0, "hp82335", board); + retval = request_irq(config->ibirq, hp82335_interrupt, 0, DRV_NAME, board); if (retval) { - pr_err("hp82335: can't request IRQ %d\n", config->ibirq); + dev_err(board->gpib_dev, "can't request IRQ %d\n", config->ibirq); return retval; } hp_priv->irq = config->ibirq; - pr_info("hp82335: IRQ %d\n", config->ibirq); tms9914_board_reset(tms_priv); @@ -331,7 +332,7 @@ static int __init hp82335_init_module(void) int result = gpib_register_driver(&hp82335_interface, THIS_MODULE); if (result) { - pr_err("hp82335: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); return result; } From 22611a85d8504cbe4b47daf7bd170a639a4638db Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:01 +0100 Subject: [PATCH 0302/2157] staging: gpib: hp82341 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "hp82341" in request_region and request_irq Change pr_err to dev_err wherever possible Remove pr_warn and pr_debug for timed out or interrupted reads and writes. Remove "hp_82341:" prefix in messages since this is printed with the module name. Remove __func__ parameter in pr_err. Remove pr_info's. Remove cpmmented printk's. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-11-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82341/hp_82341.c | 70 ++++++++++-------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 800f99c05566..3ac87d1a1fab 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -6,6 +6,10 @@ * copyright : (C) 2002, 2005 by Frank Mori Hess * ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "hp_82341.h" #include #include @@ -57,7 +61,7 @@ static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t leng retval = tms9914_read(board, tms_priv, buffer, 1, end, &num_bytes); *bytes_read += num_bytes; if (retval < 0) - pr_err("tms9914_read failed retval=%i\n", retval); + dev_err(board->gpib_dev, "tms9914_read failed retval=%i\n", retval); if (retval < 0 || *end) return retval; ++buffer; @@ -93,7 +97,6 @@ static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t leng test_bit(DEV_CLEAR_BN, &tms_priv->state) || test_bit(TIMO_NUM, &board->status)); if (retval) { - pr_warn("%s: read wait interrupted\n", __func__); retval = -ERESTARTSYS; break; } @@ -118,12 +121,10 @@ static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t leng tms_priv->holdoff_active = 1; } if (test_bit(TIMO_NUM, &board->status)) { - pr_debug("%s: minor %i: read timed out\n", __FILE__, board->minor); retval = -ETIMEDOUT; break; } if (test_bit(DEV_CLEAR_BN, &tms_priv->state)) { - pr_warn("%s: device clear interrupted read\n", __FILE__); retval = -EINTR; break; } @@ -211,7 +212,7 @@ static int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t len outb(ENABLE_TI_BUFFER_BIT, hp_priv->iobase[3] + BUFFER_CONTROL_REG); retval = restart_write_fifo(board, hp_priv); if (retval < 0) { - pr_err("hp82341: failed to restart write stream\n"); + dev_err(board->gpib_dev, "failed to restart write stream\n"); break; } retval = wait_event_interruptible(board->wait, @@ -223,17 +224,14 @@ static int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t len outb(0, hp_priv->iobase[3] + BUFFER_CONTROL_REG); *bytes_written += block_size - read_transfer_counter(hp_priv); if (retval) { - pr_warn("%s: write wait interrupted\n", __FILE__); retval = -ERESTARTSYS; break; } if (test_bit(TIMO_NUM, &board->status)) { - pr_debug("%s: minor %i: write timed out\n", __FILE__, board->minor); retval = -ETIMEDOUT; break; } if (test_bit(DEV_CLEAR_BN, &tms_priv->state)) { - pr_warn("%s: device clear interrupted write\n", __FILE__); retval = -EINTR; break; } @@ -495,21 +493,21 @@ static int hp_82341_find_isapnp_board(struct pnp_dev **dev) *dev = pnp_find_dev(NULL, ISAPNP_VENDOR('H', 'W', 'P'), ISAPNP_FUNCTION(0x1411), NULL); if (!*dev || !(*dev)->card) { - pr_err("hp_82341: failed to find isapnp board\n"); + pr_err("failed to find isapnp board\n"); return -ENODEV; } if (pnp_device_attach(*dev) < 0) { - pr_err("hp_82341: board already active, skipping\n"); + pr_err("board already active, skipping\n"); return -EBUSY; } if (pnp_activate_dev(*dev) < 0) { pnp_device_detach(*dev); - pr_err("hp_82341: failed to activate() atgpib/tnt, aborting\n"); + pr_err("failed to activate(), aborting\n"); return -EAGAIN; } if (!pnp_port_valid(*dev, 0) || !pnp_irq_valid(*dev, 0)) { pnp_device_detach(*dev); - pr_err("hp_82341: invalid port or irq for atgpib/tnt, aborting\n"); + pr_err("invalid port or irq, aborting\n"); return -ENOMEM; } return 0; @@ -530,7 +528,7 @@ static int xilinx_ready(struct hp_82341_priv *hp_priv) else return 0; default: - pr_err("hp_82341: %s: bug! unknown hw_version\n", __func__); + pr_err("bug! unknown hw_version\n"); break; } return 0; @@ -550,7 +548,7 @@ static int xilinx_done(struct hp_82341_priv *hp_priv) else return 0; default: - pr_err("hp_82341: %s: bug! unknown hw_version\n", __func__); + pr_err("bug! unknown hw_version\n"); break; } return 0; @@ -571,7 +569,7 @@ static int irq_valid(struct hp_82341_priv *hp_priv, int irq) case 15: return 1; default: - pr_err("hp_82341: invalid irq=%i for 82341C, irq must be 3, 5, 7, 9, 10, 11, 12, or 15.\n", + pr_err("invalid irq=%i for 82341C, irq must be 3, 5, 7, 9, 10, 11, 12, or 15.\n", irq); return 0; } @@ -579,7 +577,7 @@ static int irq_valid(struct hp_82341_priv *hp_priv, int irq) case HW_VERSION_82341D: return 1; default: - pr_err("hp_82341: %s: bug! unknown hw_version\n", __func__); + pr_err("bug! unknown hw_version\n"); break; } return 0; @@ -601,7 +599,7 @@ static int hp_82341_load_firmware_array(struct hp_82341_priv *hp_priv, usleep_range(10, 15); } if (j == timeout) { - pr_err("hp_82341: timed out waiting for Xilinx ready.\n"); + pr_err("timed out waiting for Xilinx ready.\n"); return -ETIMEDOUT; } outb(firmware_data[i], hp_priv->iobase[0] + XILINX_DATA_REG); @@ -614,7 +612,7 @@ static int hp_82341_load_firmware_array(struct hp_82341_priv *hp_priv, usleep_range(10, 15); } if (j == timeout) { - pr_err("hp_82341: timed out waiting for Xilinx done.\n"); + pr_err("timed out waiting for Xilinx done.\n"); return -ETIMEDOUT; } return 0; @@ -625,27 +623,27 @@ static int hp_82341_load_firmware(struct hp_82341_priv *hp_priv, const gpib_boar if (config->init_data_length == 0) { if (xilinx_done(hp_priv)) return 0; - pr_err("hp_82341: board needs be initialized with firmware upload.\n" + pr_err("board needs be initialized with firmware upload.\n" "\tUse the --init-data option of gpib_config.\n"); return -EINVAL; } switch (hp_priv->hw_version) { case HW_VERSION_82341C: if (config->init_data_length != hp_82341c_firmware_length) { - pr_err("hp_82341: bad firmware length=%i for 82341c (expected %i).\n", + pr_err("bad firmware length=%i for 82341c (expected %i).\n", config->init_data_length, hp_82341c_firmware_length); return -EINVAL; } break; case HW_VERSION_82341D: if (config->init_data_length != hp_82341d_firmware_length) { - pr_err("hp_82341: bad firmware length=%i for 82341d (expected %i).\n", + pr_err("bad firmware length=%i for 82341d (expected %i).\n", config->init_data_length, hp_82341d_firmware_length); return -EINVAL; } break; default: - pr_err("hp_82341: %s: bug! unknown hw_version\n", __func__); + pr_err("bug! unknown hw_version\n"); break; } return hp_82341_load_firmware_array(hp_priv, config->init_data, config->init_data_length); @@ -723,13 +721,12 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi hp_priv->hw_version = HW_VERSION_82341C; hp_priv->io_region_offset = 0x400; } - pr_info("hp_82341: base io 0x%u\n", iobase); for (i = 0; i < hp_82341_num_io_regions; ++i) { start_addr = iobase + i * hp_priv->io_region_offset; - if (!request_region(start_addr, hp_82341_region_iosize, "hp_82341")) { - pr_err("hp_82341: failed to allocate io ports 0x%x-0x%x\n", - start_addr, - start_addr + hp_82341_region_iosize - 1); + if (!request_region(start_addr, hp_82341_region_iosize, DRV_NAME)) { + dev_err(board->gpib_dev, "failed to allocate io ports 0x%x-0x%x\n", + start_addr, + start_addr + hp_82341_region_iosize - 1); return -EIO; } hp_priv->iobase[i] = start_addr; @@ -739,7 +736,7 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi retval = isapnp_cfg_begin(hp_priv->pnp_dev->card->number, hp_priv->pnp_dev->number); if (retval < 0) { - pr_err("hp_82341: isapnp_cfg_begin returned error\n"); + dev_err(board->gpib_dev, "isapnp_cfg_begin returned error\n"); return retval; } isapnp_write_byte(PIO_DIRECTION_REG, HP_82341D_XILINX_READY_BIT | @@ -755,12 +752,11 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi return retval; if (irq_valid(hp_priv, irq) == 0) return -EINVAL; - if (request_irq(irq, hp_82341_interrupt, 0, "hp_82341", board)) { - pr_err("hp_82341: failed to allocate IRQ %d\n", irq); + if (request_irq(irq, hp_82341_interrupt, 0, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to allocate IRQ %d\n", irq); return -EIO; } hp_priv->irq = irq; - pr_info("hp_82341: IRQ %d\n", irq); hp_priv->config_control_bits &= ~IRQ_SELECT_MASK; hp_priv->config_control_bits |= IRQ_SELECT_BITS(irq); outb(hp_priv->config_control_bits, hp_priv->iobase[0] + CONFIG_CONTROL_STATUS_REG); @@ -777,9 +773,7 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi hp_priv->iobase[0] + EVENT_STATUS_REG); tms9914_online(board, tms_priv); - pr_info("hp_82341: board id %x %x %x %x\n", inb(hp_priv->iobase[1] + ID0_REG), - inb(hp_priv->iobase[1] + ID1_REG), inb(hp_priv->iobase[2] + ID2_REG), - inb(hp_priv->iobase[2] + ID3_REG)); + return 0; } @@ -820,13 +814,13 @@ static int __init hp_82341_init_module(void) ret = gpib_register_driver(&hp_82341_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("hp_82341: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&hp_82341_interface, THIS_MODULE); if (ret) { - pr_err("hp_82341: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); gpib_unregister_driver(&hp_82341_unaccel_interface); return ret; } @@ -871,7 +865,6 @@ static irqreturn_t hp_82341_interrupt(int irq, void *arg) spin_lock_irqsave(&board->spinlock, flags); event_status = inb(hp_priv->iobase[0] + EVENT_STATUS_REG); -// printk("hp_82341: interrupt event_status=0x%x\n", event_status); if (event_status & INTERRUPT_PENDING_EVENT_BIT) retval = IRQ_HANDLED; //write-clear status bits @@ -886,9 +879,6 @@ static irqreturn_t hp_82341_interrupt(int irq, void *arg) status1 = read_byte(tms_priv, ISR0); status2 = read_byte(tms_priv, ISR1); tms9914_interrupt_have_status(board, tms_priv, status1, status2); -/* printk("hp_82341: interrupt status1=0x%x status2=0x%x\n", - * status1, status2); - */ } spin_unlock_irqrestore(&board->spinlock, flags); return retval; From 060fb82d690ecb3583c70754561547d16b55f937 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:03 +0100 Subject: [PATCH 0303/2157] staging: gpib: lpvo console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Replace #define NAME "lpvo_usb_gpib" with KBUILD_MODNAME Update comments about Diagnostics and Debug Use dev_dbg instead of pr_alert in the DIA_LOG macro definition Remove TTY_LOG macro and its uses. There are non-console applications where this no longer makes sense. Remove commented printk's Remove now useless function printable(). Remove board parameter from SHOW_STATUS as it shadows the variable in the function. Remove the printing of the board pointer in SHOW_STATUS. Change select DIA_LOG(0,... to dev_err where we need an error message. Remove loops for printing message buffer contents. Remove dev_alerts for read errors. Change dev_alert to dev_err. Change some TTY_LOG to DIA_LOG(0,.. e.g. for attach and detach messages. Remove NAME parameter in dev_dbg as this is printed with the module name. Remove __func__ parameter in dev_dbg as this can be enabled by dynamic debug. Remove NOP message for unsuppoted functionality. Remove "lpvo_usb_gpib:" prefix in register driver pr_err as it is printed with the module name. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-13-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 255 ++++++------------ 1 file changed, 76 insertions(+), 179 deletions(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 85322af62c23..50faa0c17617 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -8,6 +8,10 @@ * copyright : (C) 2011 Marcello Carla' * ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define NAME KBUILD_MODNAME + /* base module includes */ #include @@ -31,8 +35,6 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for LPVO usb devices"); -#define NAME "lpvo_usb_gpib" - /* * Table of devices that work with this driver. * @@ -55,10 +57,11 @@ MODULE_DEVICE_TABLE(usb, skel_table); /* * *** Diagnostics and Debug *** - * + * To enable the diagnostic and debug messages either compile with DEBUG set + * or control via the dynamic debug mechanisms. * The module parameter "debug" controls the sending of debug messages to - * syslog. By default it is set to 0 or 1 according to GPIB_CONFIG_KERNEL_DEBUG. - * debug = 0: only register/deregister messages are generated + * syslog. By default it is set to 0 + * debug = 0: only attach/detach messages are sent * 1: every action is logged * 2: extended logging; each single exchanged byte is documented * (about twice the log volume of [1]) @@ -70,9 +73,9 @@ MODULE_DEVICE_TABLE(usb, skel_table); static int debug; module_param(debug, int, 0644); -#define DIA_LOG(level, format, ...) \ +#define DIA_LOG(level, format, ...) \ do { if (debug >= (level)) \ - pr_alert("%s:%s - " format, NAME, __func__, ## __VA_ARGS__); } \ + dev_dbg(board->gpib_dev, format, ## __VA_ARGS__); } \ while (0) /* standard and extended command sets of the usb-gpib adapter */ @@ -135,7 +138,7 @@ struct char_buf { /* used by one_char() routine */ }; struct usb_gpib_priv { /* private data to the device */ - u8 eos; /* eos character */ + u8 eos; /* eos character */ short eos_flags; /* eos mode */ int timeout; /* current value for timeout */ void *dev; /* the usb device private data structure */ @@ -143,42 +146,23 @@ struct usb_gpib_priv { /* private data to the device */ #define GPIB_DEV (((struct usb_gpib_priv *)board->private_data)->dev) -#define SHOW_STATUS(board) { \ - DIA_LOG(2, "# - board %p\n", board); \ - DIA_LOG(2, "# - buffer_length %d\n", board->buffer_length); \ - DIA_LOG(2, "# - status %lx\n", board->status); \ - DIA_LOG(2, "# - use_count %d\n", board->use_count); \ - DIA_LOG(2, "# - pad %x\n", board->pad); \ - DIA_LOG(2, "# - sad %x\n", board->sad); \ - DIA_LOG(2, "# - timeout %d\n", board->usec_timeout); \ - DIA_LOG(2, "# - ppc %d\n", board->parallel_poll_configuration); \ - DIA_LOG(2, "# - t1delay %d\n", board->t1_nano_sec); \ - DIA_LOG(2, "# - online %d\n", board->online); \ - DIA_LOG(2, "# - autopoll %d\n", board->autospollers); \ - DIA_LOG(2, "# - autopoll task %p\n", board->autospoll_task); \ - DIA_LOG(2, "# - minor %d\n", board->minor); \ - DIA_LOG(2, "# - master %d\n", board->master); \ - DIA_LOG(2, "# - list %d\n", board->ist); \ - } -/* - * n = 0; - * list_for_each (l, &board->device_list) n++; - * TTY_LOG ("%s:%s - devices in list %d\n", a, b, n); - */ - -/* - * TTY_LOG - write a message to the current work terminal (if any) - */ - -#define TTY_LOG(format, ...) { \ - char buf[128]; \ - struct tty_struct *tty = get_current_tty(); \ - if (tty) { \ - snprintf(buf, 128, format, __VA_ARGS__); \ - tty->driver->ops->write(tty, buf, strlen(buf)); \ - tty->driver->ops->write(tty, "\r", 1); \ - } \ - } +static void show_status(gpib_board_t *board) +{ + DIA_LOG(2, "# - buffer_length %d\n", board->buffer_length); + DIA_LOG(2, "# - status %lx\n", board->status); + DIA_LOG(2, "# - use_count %d\n", board->use_count); + DIA_LOG(2, "# - pad %x\n", board->pad); + DIA_LOG(2, "# - sad %x\n", board->sad); + DIA_LOG(2, "# - timeout %d\n", board->usec_timeout); + DIA_LOG(2, "# - ppc %d\n", board->parallel_poll_configuration); + DIA_LOG(2, "# - t1delay %d\n", board->t1_nano_sec); + DIA_LOG(2, "# - online %d\n", board->online); + DIA_LOG(2, "# - autopoll %d\n", board->autospollers); + DIA_LOG(2, "# - autopoll task %p\n", board->autospoll_task); + DIA_LOG(2, "# - minor %d\n", board->minor); + DIA_LOG(2, "# - master %d\n", board->master); + DIA_LOG(2, "# - list %d\n", board->ist); +} /* * GLOBAL VARIABLES: required for @@ -236,8 +220,6 @@ static int write_loop(void *dev, char *msg, int leng) return skel_do_write(dev, msg, leng); // if (val < 1) { -// printk (KERN_ALERT "%s:%s - write error: %d %d/%d\n", -// NAME, __func__, val, nchar, leng); // return -EIO; // } // nchar +=val; @@ -245,13 +227,6 @@ static int write_loop(void *dev, char *msg, int leng) // return leng; } -static char printable(char x) -{ - if (x < 32 || x > 126) - return ' '; - return x; -} - /** * send_command() - Send a byte sequence and return a single byte reply. * @@ -265,7 +240,7 @@ static char printable(char x) static int send_command(gpib_board_t *board, char *msg, int leng) { char buffer[64]; - int nchar, j; + int nchar; int retval; struct timespec64 before, after; @@ -280,17 +255,10 @@ static int send_command(gpib_board_t *board, char *msg, int leng) nchar = skel_do_read(GPIB_DEV, buffer, 64); if (nchar < 0) { - DIA_LOG(0, " return from read: %d\n", nchar); + dev_err(board->gpib_dev, " return from read: %d\n", nchar); return nchar; } else if (nchar != 1) { - for (j = 0 ; j < leng ; j++) { - DIA_LOG(0, " Irregular reply to command: %d %x %c\n", - j, msg[j], printable(msg[j])); - } - for (j = 0 ; j < nchar ; j++) { - DIA_LOG(0, " Irregular command reply: %d %x %c\n", - j, buffer[j] & 0xff, printable(buffer[j])); - } + dev_err(board->gpib_dev, " Irregular reply to command: %s\n", msg); return -EIO; } ktime_get_real_ts64 (&after); @@ -337,8 +305,8 @@ static int set_control_line(gpib_board_t *board, int line, int value) /* * one_char() - read one single byte from input buffer * - * @board: the gpib_board_struct data area for this gpib interface - * @char_buf: the routine private data structure + * @board: the gpib_board_struct data area for this gpib interface + * @char_buf: the routine private data structure */ static int one_char(gpib_board_t *board, struct char_buf *b) @@ -360,13 +328,7 @@ static int one_char(gpib_board_t *board, struct char_buf *b) if (b->nchar > 0) { DIA_LOG(2, "--> %x\n", b->inbuf[b->last - b->nchar]); return b->inbuf[b->last - b->nchar--]; - } else if (b->nchar == 0) { - dev_alert(board->gpib_dev, "%s:%s - read returned EOF\n", NAME, __func__); - return -EIO; } - dev_alert(board->gpib_dev, "%s:%s - read error %d\n", NAME, __func__, b->nchar); - TTY_LOG("\n *** %s *** Read Error - %s\n", NAME, - "Reset the adapter with 'gpib_config'\n"); return -EIO; } @@ -406,12 +368,10 @@ static void set_timeout(gpib_board_t *board) val = send_command(board, command, 0); } - if (val != ACK) { - dev_alert(board->gpib_dev, "%s:%s - error in timeout set: <%s>\n", - NAME, __func__, command); - } else { + if (val != ACK) + dev_err(board->gpib_dev, "error in timeout set: <%s>\n", command); + else data->timeout = board->usec_timeout; - } } /* @@ -451,8 +411,6 @@ static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *confi if (config->device_path) { /* if config->device_path given, try that first */ - dev_alert(board->gpib_dev, "%s:%s - Looking for device_path: %s\n", - NAME, __func__, config->device_path); for (j = 0 ; j < MAX_DEV ; j++) { if ((assigned_usb_minors & 1 << j) == 0) continue; @@ -487,8 +445,7 @@ static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *confi mutex_unlock(&minors_lock); if (j == MAX_DEV) { - dev_alert(board->gpib_dev, "%s:%s - Requested device is not registered.\n", - NAME, __func__); + dev_err(board->gpib_dev, "Requested device is not registered.\n"); return -EIO; } @@ -501,13 +458,13 @@ static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *confi DIA_LOG(1, "Skel open: %d\n", retval); if (retval) { - TTY_LOG("%s:%s - skel open failed.\n", NAME, __func__); + dev_err(board->gpib_dev, "skel open failed.\n"); kfree(board->private_data); board->private_data = NULL; return -ENODEV; } - SHOW_STATUS(board); + show_status(board); retval = send_command(board, USB_GPIB_ON, 0); DIA_LOG(1, "USB_GPIB_ON returns %x\n", retval); @@ -541,8 +498,8 @@ static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *confi if (retval != ACK) return -EIO; - SHOW_STATUS(board); - TTY_LOG("Module '%s' has been sucesfully configured\n", NAME); + show_status(board); + DIA_LOG(0, "attached\n"); return 0; } @@ -557,9 +514,9 @@ static void usb_gpib_detach(gpib_board_t *board) { int retval; - SHOW_STATUS(board); + show_status(board); - DIA_LOG(0, "detaching %p\n", board); + DIA_LOG(0, "detaching\n"); if (board->private_data) { if (GPIB_DEV) { @@ -573,8 +530,7 @@ static void usb_gpib_detach(gpib_board_t *board) board->private_data = NULL; } - DIA_LOG(0, "done %p\n", board); - TTY_LOG("Module '%s' has been detached\n", NAME); + DIA_LOG(0, "detached\n"); } /* @@ -733,8 +689,7 @@ static int usb_gpib_line_status(const gpib_board_t *board) buffer = send_command((gpib_board_t *)board, USB_GPIB_STATUS, 0); if (buffer < 0) { - dev_alert(board->gpib_dev, "%s:%s - line status read failed with %d\n", - NAME, __func__, buffer); + dev_err(board->gpib_dev, "line status read failed with %d\n", buffer); return -1; } @@ -773,20 +728,16 @@ static int usb_gpib_parallel_poll(gpib_board_t *board, uint8_t *result) DIA_LOG(1, "enter %p\n", board); retval = set_control_line(board, IB_BUS_EOI, 1); - if (retval != ACK) { - dev_alert(board->gpib_dev, "%s:%s - assert EOI failed\n", NAME, __func__); + if (retval != ACK) return -EIO; - } *result = send_command(board, USB_GPIB_READ_DATA, 0); DIA_LOG(1, "done with %x\n", *result); retval = set_control_line(board, IB_BUS_EOI, 0); - if (retval != 0x06) { - dev_alert(board->gpib_dev, "%s:%s - unassert EOI failed\n", NAME, __func__); + if (retval != 0x06) return -EIO; - } return 0; } @@ -866,8 +817,7 @@ static int usb_gpib_read(gpib_board_t *board, goto read_return; if (one_char(board, &b) != DLE || one_char(board, &b) != STX) { - dev_alert(board->gpib_dev, "%s:%s - wrong sequence\n", - NAME, __func__); + dev_err(board->gpib_dev, "wrong sequence\n"); retval = -EIO; goto read_return; } @@ -907,15 +857,12 @@ static int usb_gpib_read(gpib_board_t *board, retval = 0; goto read_return; } else { - dev_alert(board->gpib_dev, "%s:%s - %s %x\n", - NAME, __func__, - "Wrong end of message", c); + dev_err(board->gpib_dev, "wrong end of message %x", c); retval = -ETIME; goto read_return; } } else { - dev_alert(board->gpib_dev, "%s:%s - %s\n", NAME, __func__, - "lone in stream"); + dev_err(board->gpib_dev, "lone in stream"); retval = -EIO; goto read_return; } @@ -934,8 +881,7 @@ static int usb_gpib_read(gpib_board_t *board, c = one_char(board, &b); if (c == ACK) { if (MAX_READ_EXCESS - read_count > 1) - dev_alert(board->gpib_dev, "%s:%s - %s\n", NAME, __func__, - "small buffer - maybe some data lost"); + dev_dbg(board->gpib_dev, "small buffer - maybe some data lost"); retval = 0; goto read_return; } @@ -943,15 +889,13 @@ static int usb_gpib_read(gpib_board_t *board, } } - dev_alert(board->gpib_dev, "%s:%s - no input end - GPIB board in odd state\n", - NAME, __func__); + dev_err(board->gpib_dev, "no input end - board in odd state\n"); retval = -EIO; read_return: kfree(b.inbuf); - DIA_LOG(1, "done with byte/status: %d %x %d\n", - (int)*bytes_read, retval, *end); + DIA_LOG(1, "done with byte/status: %d %x %d\n", (int)*bytes_read, retval, *end); if (retval == 0 || retval == -ETIME) { if (send_command(board, USB_GPIB_UNTALK, sizeof(USB_GPIB_UNTALK)) == 0x06) @@ -970,8 +914,7 @@ static void usb_gpib_remote_enable(gpib_board_t *board, int enable) retval = set_control_line(board, IB_BUS_REN, enable ? 1 : 0); if (retval != ACK) - dev_alert(board->gpib_dev, "%s:%s - could not set REN line: %x\n", - NAME, __func__, retval); + dev_err(board->gpib_dev, "could not set REN line: %x\n", retval); DIA_LOG(1, "done with %x\n", retval); } @@ -1053,9 +996,8 @@ static int usb_gpib_write(gpib_board_t *board, *bytes_written = length; - if (send_command(board, USB_GPIB_UNLISTEN, sizeof(USB_GPIB_UNLISTEN)) - != 0x06) - return -EPIPE; + if (send_command(board, USB_GPIB_UNLISTEN, sizeof(USB_GPIB_UNLISTEN)) != 0x06) + return -EPIPE; return length; } @@ -1069,21 +1011,18 @@ static int usb_gpib_write(gpib_board_t *board, static void usb_gpib_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); } /* parallel_poll_response */ static void usb_gpib_parallel_poll_response(gpib_board_t *board, int ist) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); } /* primary_address */ static int usb_gpib_primary_address(gpib_board_t *board, unsigned int address) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); return 0; } @@ -1091,7 +1030,6 @@ static int usb_gpib_primary_address(gpib_board_t *board, unsigned int address) static void usb_gpib_return_to_local(gpib_board_t *board) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); } /* secondary_address */ @@ -1100,7 +1038,6 @@ static int usb_gpib_secondary_address(gpib_board_t *board, unsigned int address, int enable) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); return 0; } @@ -1108,14 +1045,12 @@ static int usb_gpib_secondary_address(gpib_board_t *board, static void usb_gpib_serial_poll_response(gpib_board_t *board, uint8_t status) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); } /* serial_poll_status */ static uint8_t usb_gpib_serial_poll_status(gpib_board_t *board) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); return 0; } @@ -1123,7 +1058,6 @@ static uint8_t usb_gpib_serial_poll_status(gpib_board_t *board) static unsigned int usb_gpib_t1_delay(gpib_board_t *board, unsigned int nano_sec) { - dev_alert(board->gpib_dev, "%s:%s - currently a NOP\n", NAME, __func__); return 0; } @@ -1181,7 +1115,7 @@ static int usb_gpib_init_module(struct usb_interface *interface) if (!assigned_usb_minors) { rv = gpib_register_driver(&usb_gpib_interface, THIS_MODULE); if (rv) { - pr_err("lpvo_usb_gpib: gpib_register_driver failed: error = %d\n", rv); + pr_err("gpib_register_driver failed: error = %d\n", rv); goto exit; } } else { @@ -1191,8 +1125,8 @@ static int usb_gpib_init_module(struct usb_interface *interface) for (j = 0 ; j < MAX_DEV ; j++) { if (usb_minors[j] == interface->minor && assigned_usb_minors & 1 << j) { - pr_alert("%s:%s - CODE BUG: USB minor %d registered at %d.\n", - NAME, __func__, interface->minor, j); + pr_err("CODE BUG: USB minor %d registered at %d.\n", + interface->minor, j); rv = -1; goto exit; } @@ -1207,13 +1141,11 @@ static int usb_gpib_init_module(struct usb_interface *interface) usb_minors[j] = interface->minor; lpvo_usb_interfaces[j] = interface; assigned_usb_minors |= mask; - DIA_LOG(0, "usb minor %d registered at %d\n", interface->minor, j); rv = 0; goto exit; } } - pr_alert("%s:%s - No slot available for interface %p minor %d\n", - NAME, __func__, interface, interface->minor); + pr_err("No slot available for interface %p minor %d\n", interface, interface->minor); rv = -1; exit: @@ -1235,7 +1167,7 @@ static void usb_gpib_exit_module(int minor) goto exit; } } - pr_alert("%s:%s - CODE BUG: USB minor %d not found.\n", NAME, __func__, minor); + pr_err("CODE BUG: USB minor %d not found.\n", minor); exit: mutex_unlock(&minors_lock); @@ -1267,7 +1199,7 @@ static int write_latency_timer(struct usb_device *udev) LATENCY_TIMER, LATENCY_CHANNEL, NULL, 0, WDR_TIMEOUT); if (rv < 0) - pr_alert("Unable to write latency timer: %i\n", rv); + dev_err(&udev->dev, "Unable to write latency timer: %i\n", rv); return rv; } @@ -1369,12 +1301,9 @@ static int skel_do_open(gpib_board_t *board, int subminor) struct usb_interface *interface; int retval = 0; - DIA_LOG(0, "Required minor: %d\n", subminor); - interface = usb_find_interface(&skel_driver, subminor); if (!interface) { - pr_err("%s - error, can't find device for minor %d\n", - __func__, subminor); + dev_err(board->gpib_dev, "can't find device for minor %d\n", subminor); retval = -ENODEV; goto exit; } @@ -1439,9 +1368,8 @@ static void skel_read_bulk_callback(struct urb *urb) if (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN)) - dev_err(&dev->interface->dev, - "%s - nonzero read bulk status received: %d\n", - __func__, urb->status); + dev_err(&dev->interface->dev, "nonzero read bulk status received: %d\n", + urb->status); dev->errors = urb->status; } else { @@ -1478,9 +1406,7 @@ static int skel_do_read_io(struct usb_skel *dev, size_t count) /* do it */ rv = usb_submit_urb(dev->bulk_in_urb, GFP_KERNEL); if (rv < 0) { - dev_err(&dev->interface->dev, - "%s - failed submitting read urb, error %d\n", - __func__, rv); + dev_err(&dev->interface->dev, "failed submitting read urb, error %d\n", rv); rv = (rv == -ENOMEM) ? rv : -EIO; spin_lock_irq(&dev->err_lock); dev->ongoing_read = 0; @@ -1504,14 +1430,10 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) if (!dev->bulk_in_urb || !count) return 0; - DIA_LOG(1, "enter for %zu.\n", count); - restart: /* added to comply with ftdi timeout technique */ /* no concurrent readers */ - DIA_LOG(2, "restart with %zd %zd.\n", dev->bulk_in_filled, dev->bulk_in_copied); - rv = mutex_lock_interruptible(&dev->io_mutex); if (rv < 0) return rv; @@ -1527,8 +1449,6 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) ongoing_io = dev->ongoing_read; spin_unlock_irq(&dev->err_lock); - DIA_LOG(2, "retry with %d.\n", ongoing_io); - if (ongoing_io) { // /* nonblocking IO shall not wait */ // /* no file, no O_NONBLOCK; maybe provide when from user space */ @@ -1569,8 +1489,6 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) // size_t chunk = min(available, count); /* compute chunk later */ size_t chunk; - DIA_LOG(2, "we have data: %zu %zu.\n", dev->bulk_in_filled, dev->bulk_in_copied); - if (!available) { /* * all data has been used @@ -1596,12 +1514,6 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) */ if (dev->bulk_in_copied) { - int j; - - for (j = 0 ; j < dev->bulk_in_filled ; j++) { - pr_alert("copy -> %x %zu %x\n", - j, dev->bulk_in_copied, dev->bulk_in_buffer[j]); - } chunk = min(available, count); memcpy(buffer, dev->bulk_in_buffer + dev->bulk_in_copied, chunk); rv = chunk; @@ -1613,7 +1525,7 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) /* account for two bytes to be discarded */ chunk = min(available, count + 2); if (chunk < 2) { - pr_alert("BAD READ - chunk: %zu\n", chunk); + dev_err(&dev->udev->dev, "BAD READ - chunk: %zu\n", chunk); rv = -EIO; goto exit; } @@ -1633,8 +1545,6 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) // if (available < count) // skel_do_read_io(dev, dev->bulk_in_size); } else { - DIA_LOG(1, "no data - start read - copied: %zd.\n", dev->bulk_in_copied); - /* no data in the buffer */ rv = skel_do_read_io(dev, dev->bulk_in_size); if (rv < 0) @@ -1645,10 +1555,10 @@ static ssize_t skel_do_read(struct usb_skel *dev, char *buffer, size_t count) exit: mutex_unlock(&dev->io_mutex); if (rv == 2) - goto restart; /* ftdi chip returns two status bytes after a latency anyhow */ - DIA_LOG(1, "exit with %d.\n", rv); + goto restart; /* ftdi chip returns two status bytes after a latency anyhow */ + if (rv > 0) - return rv - 2; /* account for 2 discarded bytes in a valid buffer */ + return rv - 2; /* account for 2 discarded bytes in a valid buffer */ return rv; } @@ -1669,8 +1579,7 @@ static void skel_write_bulk_callback(struct urb *urb) urb->status == -ECONNRESET || urb->status == -ESHUTDOWN)) dev_err(&dev->interface->dev, - "%s - nonzero write bulk status received: %d\n", - __func__, urb->status); + "nonzero write bulk status received: %d\n", urb->status); spin_lock_irqsave(&dev->err_lock, flags); dev->errors = urb->status; @@ -1763,9 +1672,7 @@ static ssize_t skel_do_write(struct usb_skel *dev, const char *buffer, size_t co retval = usb_submit_urb(urb, GFP_KERNEL); mutex_unlock(&dev->io_mutex); if (retval) { - dev_err(&dev->interface->dev, - "%s - failed submitting write urb, error %d\n", - __func__, retval); + dev_err(&dev->interface->dev, "failed submitting write urb, error %d\n", retval); goto error_unanchor; } @@ -1831,8 +1738,7 @@ static int skel_open(struct inode *inode, struct file *file) interface = usb_find_interface(&skel_driver, subminor); if (!interface) { - pr_err("%s - error, can't find device for minor %d\n", - __func__, subminor); + pr_err("can't find device for minor %d\n", subminor); retval = -ENODEV; goto exit; } @@ -1895,8 +1801,6 @@ static ssize_t skel_read(struct file *file, char __user *buffer, size_t count, rv = skel_do_read(dev, buf, count); - pr_alert("%s - return with %zu\n", __func__, rv); - if (rv > 0) { if (copy_to_user(buffer, buf, rv)) { kfree(buf); @@ -2015,8 +1919,8 @@ static int skel_probe(struct usb_interface *interface, /* let the world know */ device_path = kobject_get_path(&dev->udev->dev.kobj, GFP_KERNEL); - pr_alert("%s:%s - New lpvo_usb_device -> bus: %d dev: %d path: %s\n", NAME, __func__, - dev->udev->bus->busnum, dev->udev->devnum, device_path); + dev_dbg(&interface->dev, "New lpvo_usb_device -> bus: %d dev: %d path: %s\n", + dev->udev->bus->busnum, dev->udev->devnum, device_path); kfree(device_path); #if USER_DEVICE @@ -2029,14 +1933,9 @@ static int skel_probe(struct usb_interface *interface, usb_set_intfdata(interface, NULL); goto error; } - - /* let the user know what node this device is now attached to */ - dev_info(&interface->dev, - "lpvo_usb_gpib device now attached to lpvo_raw%d", - interface->minor); #endif - write_latency_timer(dev->udev); /* adjust the latency timer */ + write_latency_timer(dev->udev); /* adjust the latency timer */ usb_gpib_init_module(interface); /* last, init the lpvo for this minor */ @@ -2073,8 +1972,6 @@ static void skel_disconnect(struct usb_interface *interface) /* decrement our usage count */ kref_put(&dev->kref, skel_delete); - - dev_info(&interface->dev, "USB lpvo_raw #%d now disconnected", minor); } static void skel_draw_down(struct usb_skel *dev) From 23561c4d33fecdc970126ab011b939cefb1fdd19 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:04 +0100 Subject: [PATCH 0304/2157] staging: gpib: nec7210 console messaging cleanup Enable module name to be printed by default in dev_dbg Remove commented dev_dbg Remove pr_err on timeout and gpib bus error. Remove dev_dbg on wait interrupted, command timeout also read / write timeout, gpib bus error and interrupt. Remove commented printk and command variable initialization. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-14-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/nec7210/nec7210.c | 69 +++++++++----------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/drivers/staging/gpib/nec7210/nec7210.c b/drivers/staging/gpib/nec7210/nec7210.c index c9a837fad96e..85f1e79d658a 100644 --- a/drivers/staging/gpib/nec7210/nec7210.c +++ b/drivers/staging/gpib/nec7210/nec7210.c @@ -4,6 +4,8 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define dev_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "board.h" #include #include @@ -198,7 +200,6 @@ unsigned int nec7210_update_status_nolock(gpib_board_t *board, struct nec7210_pr priv->srq_pending = 0; set_bit(SPOLL_NUM, &board->status); } -// dev_dbg(board->gpib_dev, "status 0x%x, state 0x%x\n", board->status, priv->state); /* we rely on the interrupt handler to set the * rest of the status bits @@ -319,10 +320,8 @@ int nec7210_go_to_standby(gpib_board_t *board, struct nec7210_priv *priv) if (adsr_bits & HR_NATN) break; } - if (i == HZ) { - pr_err("nec7210: error waiting for NATN\n"); + if (i == HZ) return -ETIMEDOUT; - } } clear_bit(COMMAND_READY_BN, &priv->state); @@ -430,17 +429,14 @@ int nec7210_command(gpib_board_t *board, struct nec7210_priv *priv, uint8_t test_bit(COMMAND_READY_BN, &priv->state) || test_bit(BUS_ERROR_BN, &priv->state) || test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib command wait interrupted\n"); + dev_dbg(board->gpib_dev, "command wait interrupted\n"); retval = -ERESTARTSYS; break; } if (test_bit(TIMO_NUM, &board->status)) break; - if (test_and_clear_bit(BUS_ERROR_BN, &priv->state)) { - pr_err("nec7210: bus error on command byte\n"); + if (test_and_clear_bit(BUS_ERROR_BN, &priv->state)) break; - } - spin_lock_irqsave(&board->spinlock, flags); clear_bit(COMMAND_READY_BN, &priv->state); write_byte(priv, buffer[*bytes_written], CDOR); @@ -454,18 +450,14 @@ int nec7210_command(gpib_board_t *board, struct nec7210_priv *priv, uint8_t // wait for last byte to get sent if (wait_event_interruptible(board->wait, test_bit(COMMAND_READY_BN, &priv->state) || test_bit(BUS_ERROR_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib command wait interrupted\n"); + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } - if (test_bit(TIMO_NUM, &board->status)) { - dev_dbg(board->gpib_dev, "gpib command timed out\n"); + + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; - } - if (test_and_clear_bit(BUS_ERROR_BN, &priv->state)) { - pr_err("nec7210: bus error on command byte\n"); + + if (test_and_clear_bit(BUS_ERROR_BN, &priv->state)) retval = -EIO; - } return retval; } @@ -484,7 +476,6 @@ static int pio_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buf test_bit(READ_READY_BN, &priv->state) || test_bit(DEV_CLEAR_BN, &priv->state) || test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "nec7210: pio read wait interrupted\n"); retval = -ERESTARTSYS; break; } @@ -503,12 +494,10 @@ static int pio_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buf break; } if (test_bit(TIMO_NUM, &board->status)) { - dev_dbg(board->gpib_dev, "interrupted by timeout\n"); retval = -ETIMEDOUT; break; } if (test_bit(DEV_CLEAR_BN, &priv->state)) { - dev_dbg(board->gpib_dev, "interrupted by device clear\n"); retval = -EINTR; break; } @@ -557,10 +546,9 @@ static ssize_t __dma_read(gpib_board_t *board, struct nec7210_priv *priv, size_t if (wait_event_interruptible(board->wait, test_bit(DMA_READ_IN_PROGRESS_BN, &priv->state) == 0 || test_bit(DEV_CLEAR_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "nec7210: dma read wait interrupted\n"); + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_bit(DEV_CLEAR_BN, &priv->state)) @@ -638,22 +626,18 @@ static int pio_write_wait(gpib_board_t *board, struct nec7210_priv *priv, (wake_on_bus_error && test_bit(BUS_ERROR_BN, &priv->state)) || (wake_on_lacs && test_bit(LACS_NUM, &board->status)) || (wake_on_atn && test_bit(ATN_NUM, &board->status)) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } - if (test_bit(TIMO_NUM, &board->status)) { - dev_dbg(board->gpib_dev, "nec7210: write timed out\n"); + + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; - } - if (test_bit(DEV_CLEAR_BN, &priv->state)) { - dev_dbg(board->gpib_dev, "nec7210: write interrupted by clear\n"); + + if (test_bit(DEV_CLEAR_BN, &priv->state)) return -EINTR; - } - if (wake_on_bus_error && test_and_clear_bit(BUS_ERROR_BN, &priv->state)) { - dev_dbg(board->gpib_dev, "nec7210: bus error on write\n"); + + if (wake_on_bus_error && test_and_clear_bit(BUS_ERROR_BN, &priv->state)) return -EIO; - } + return 0; } @@ -677,7 +661,6 @@ static int pio_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *bu if (retval == -EIO) { /* resend last byte on bus error */ *bytes_written = last_count; - dev_dbg(board->gpib_dev, "resending %c\n", buffer[*bytes_written]); /* we can get unrecoverable bus errors, * so give up after a while */ @@ -733,10 +716,9 @@ static ssize_t __dma_write(gpib_board_t *board, struct nec7210_priv *priv, dma_a test_bit(DMA_WRITE_IN_PROGRESS_BN, &priv->state) == 0 || test_bit(BUS_ERROR_BN, &priv->state) || test_bit(DEV_CLEAR_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted!\n"); + test_bit(TIMO_NUM, &board->status))) retval = -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) retval = -ETIMEDOUT; if (test_and_clear_bit(DEV_CLEAR_BN, &priv->state)) @@ -937,13 +919,8 @@ irqreturn_t nec7210_interrupt_have_status(gpib_board_t *board, set_bit(COMMAND_READY_BN, &priv->state); // command pass through received - if (status1 & HR_CPT) { - unsigned int command; - - command = read_byte(priv, CPTR) & gpib_command_mask; + if (status1 & HR_CPT) write_byte(priv, AUX_NVAL, AUXMR); -// printk("gpib: command pass through 0x%x\n", command); - } if (status1 & HR_ERR) set_bit(BUS_ERROR_BN, &priv->state); From 18ce5b5d9167bffce02550a85c7c3316a05ea598 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:05 +0100 Subject: [PATCH 0305/2157] staging: gpib: ni_usb console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "ni_usb_gpib" in usb_driver struct. Remove __func__ parameter from pr_err and dev_err. Remove __func__ parameter from dev_dbg as this can be enabled by dynamic debug. Remove commented printk's and dev_err's. Remove kmalloc failed messages. Remove buffer over run bug dev_err message as this just checks for a bug in the driver which does not exist. Remove read/write length too long messages and return -EINVAL Change dev_info to dev_dbg where possible. Move attach message to the end of attach function. Remove buffer overrun message. Use actual array indeces instead of i and i++ to make code clear and check redundnant. Remove module init and exit pr_info's. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-15-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 425 ++++++++++------------ 1 file changed, 189 insertions(+), 236 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index d0656dc520f5..52c7530f07bb 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -5,6 +5,10 @@ * copyright : (C) 2004 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include #include #include @@ -75,7 +79,7 @@ static unsigned short ni_usb_timeout_code(unsigned int usec) */ else if (usec <= 1000000000) return 0x02; - pr_err("%s: bug? usec is greater than 1e9\n", __func__); + pr_err("bug? usec is greater than 1e9\n"); return 0xf0; } @@ -83,8 +87,6 @@ static void ni_usb_bulk_complete(struct urb *urb) { struct ni_usb_urb_ctx *context = urb->context; -// printk("debug: %s: status=0x%x, error_count=%i, actual_length=%i\n", __func__, -// urb->status, urb->error_count, urb->actual_length); complete(&context->complete); } @@ -137,8 +139,8 @@ static int ni_usb_nonblocking_send_bulk_msg(struct ni_usb_priv *ni_priv, void *d del_timer_sync(&ni_priv->bulk_timer); usb_free_urb(ni_priv->bulk_urb); ni_priv->bulk_urb = NULL; - dev_err(&usb_dev->dev, "%s: failed to submit bulk out urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit bulk out urb, retval=%i\n", + retval); mutex_unlock(&ni_priv->bulk_transfer_lock); return retval; } @@ -146,7 +148,7 @@ static int ni_usb_nonblocking_send_bulk_msg(struct ni_usb_priv *ni_priv, void *d wait_for_completion(&context->complete); // wait for ni_usb_bulk_complete if (context->timed_out) { usb_kill_urb(ni_priv->bulk_urb); - dev_err(&usb_dev->dev, "%s: killed urb due to timeout\n", __func__); + dev_err(&usb_dev->dev, "killed urb due to timeout\n"); retval = -ETIMEDOUT; } else { retval = ni_priv->bulk_urb->status; @@ -218,14 +220,12 @@ static int ni_usb_nonblocking_receive_bulk_msg(struct ni_usb_priv *ni_priv, if (timeout_msecs) mod_timer(&ni_priv->bulk_timer, jiffies + msecs_to_jiffies(timeout_msecs)); - //printk("%s: submitting urb\n", __func__); retval = usb_submit_urb(ni_priv->bulk_urb, GFP_KERNEL); if (retval) { del_timer_sync(&ni_priv->bulk_timer); usb_free_urb(ni_priv->bulk_urb); ni_priv->bulk_urb = NULL; - dev_err(&usb_dev->dev, "%s: failed to submit bulk out urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit bulk in urb, retval=%i\n", retval); mutex_unlock(&ni_priv->bulk_transfer_lock); return retval; } @@ -250,7 +250,7 @@ static int ni_usb_nonblocking_receive_bulk_msg(struct ni_usb_priv *ni_priv, } if (context->timed_out) { usb_kill_urb(ni_priv->bulk_urb); - dev_err(&usb_dev->dev, "%s: killed urb due to timeout\n", __func__); + dev_err(&usb_dev->dev, "killed urb due to timeout\n"); retval = -ETIMEDOUT; } else { if (ni_priv->bulk_urb->status) @@ -330,14 +330,14 @@ static void ni_usb_soft_update_status(gpib_board_t *board, unsigned int ni_usb_i ni_priv->monitored_ibsta_bits &= ~ni_usb_ibsta; need_monitoring_bits &= ~ni_priv->monitored_ibsta_bits; /* mm - monitored set */ spin_unlock_irqrestore(&board->spinlock, flags); - dev_dbg(&usb_dev->dev, "%s: need_monitoring_bits=0x%x\n", __func__, need_monitoring_bits); + dev_dbg(&usb_dev->dev, "need_monitoring_bits=0x%x\n", need_monitoring_bits); if (need_monitoring_bits & ~ni_usb_ibsta) ni_usb_set_interrupt_monitor(board, ni_usb_ibsta_monitor_mask); else if (need_monitoring_bits & ni_usb_ibsta) wake_up_interruptible(&board->wait); - dev_dbg(&usb_dev->dev, "%s: ni_usb_ibsta=0x%x\n", __func__, ni_usb_ibsta); + dev_dbg(&usb_dev->dev, "ibsta=0x%x\n", ni_usb_ibsta); } static int ni_usb_parse_status_block(const u8 *buffer, struct ni_usb_status_block *status) @@ -371,7 +371,7 @@ static int ni_usb_parse_register_read_block(const u8 *raw_data, unsigned int *re int k; if (raw_data[i++] != NIUSB_REGISTER_READ_DATA_START_ID) { - pr_err("%s: parse error: wrong start id\n", __func__); + pr_err("parse error: wrong start id\n"); unexpected = 1; } for (k = 0; k < results_per_chunk && j < num_results; ++k) @@ -380,18 +380,18 @@ static int ni_usb_parse_register_read_block(const u8 *raw_data, unsigned int *re while (i % 4) i++; if (raw_data[i++] != NIUSB_REGISTER_READ_DATA_END_ID) { - pr_err("%s: parse error: wrong end id\n", __func__); + pr_err("parse error: wrong end id\n"); unexpected = 1; } if (raw_data[i++] % results_per_chunk != num_results % results_per_chunk) { - pr_err("%s: parse error: wrong count=%i for NIUSB_REGISTER_READ_DATA_END\n", - __func__, (int)raw_data[i - 1]); + pr_err("parse error: wrong count=%i for NIUSB_REGISTER_READ_DATA_END\n", + (int)raw_data[i - 1]); unexpected = 1; } while (i % 4) { if (raw_data[i++] != 0) { - pr_err("%s: unexpected data: raw_data[%i]=0x%x, expected 0\n", - __func__, i - 1, (int)raw_data[i - 1]); + pr_err("unexpected data: raw_data[%i]=0x%x, expected 0\n", + i - 1, (int)raw_data[i - 1]); unexpected = 1; } } @@ -408,9 +408,8 @@ static int ni_usb_parse_termination_block(const u8 *buffer) buffer[i++] != 0x0 || buffer[i++] != 0x0 || buffer[i++] != 0x0) { - pr_err("%s: received unexpected termination block\n", __func__); - pr_err(" expected: 0x%x 0x%x 0x%x 0x%x\n", - NIUSB_TERM_ID, 0x0, 0x0, 0x0); + pr_err("received unexpected termination block\n"); + pr_err(" expected: 0x%x 0x%x 0x%x 0x%x\n", NIUSB_TERM_ID, 0x0, 0x0, 0x0); pr_err(" received: 0x%x 0x%x 0x%x 0x%x\n", buffer[i - 4], buffer[i - 3], buffer[i - 2], buffer[i - 1]); } @@ -438,12 +437,12 @@ static int parse_board_ibrd_readback(const u8 *raw_data, struct ni_usb_status_bl } else if (raw_data[i] == NIUSB_IBRD_EXTENDED_DATA_ID) { data_block_length = ibrd_extended_data_block_length; if (raw_data[++i] != 0) { - pr_err("%s: unexpected data: raw_data[%i]=0x%x, expected 0\n", - __func__, i, (int)raw_data[i]); + pr_err("unexpected data: raw_data[%i]=0x%x, expected 0\n", + i, (int)raw_data[i]); unexpected = 1; } } else { - pr_err("%s: logic bug!\n", __func__); + pr_err("Unexpected NIUSB_IBRD ID\n"); return -EINVAL; } ++i; @@ -457,7 +456,7 @@ static int parse_board_ibrd_readback(const u8 *raw_data, struct ni_usb_status_bl } i += ni_usb_parse_status_block(&raw_data[i], status); if (status->id != NIUSB_IBRD_STATUS_ID) { - pr_err("%s: bug: status->id=%i, != ibrd_status_id\n", __func__, status->id); + pr_err("bug: status->id=%i, != ibrd_status_id\n", status->id); return -EIO; } adr1_bits = raw_data[i++]; @@ -468,29 +467,28 @@ static int parse_board_ibrd_readback(const u8 *raw_data, struct ni_usb_status_bl *actual_bytes_read = 0; } if (*actual_bytes_read > j) - pr_err("%s: bug: discarded data. actual_bytes_read=%i, j=%i\n", - __func__, *actual_bytes_read, j); + pr_err("bug: discarded data. actual_bytes_read=%i, j=%i\n", *actual_bytes_read, j); for (k = 0; k < 2; k++) if (raw_data[i++] != 0) { - pr_err("%s: unexpected data: raw_data[%i]=0x%x, expected 0\n", - __func__, i - 1, (int)raw_data[i - 1]); + pr_err("unexpected data: raw_data[%i]=0x%x, expected 0\n", + i - 1, (int)raw_data[i - 1]); unexpected = 1; } i += ni_usb_parse_status_block(&raw_data[i], ®ister_write_status); if (register_write_status.id != NIUSB_REG_WRITE_ID) { - pr_err("%s: unexpected data: register write status id=0x%x, expected 0x%x\n", - __func__, register_write_status.id, NIUSB_REG_WRITE_ID); + pr_err("unexpected data: register write status id=0x%x, expected 0x%x\n", + register_write_status.id, NIUSB_REG_WRITE_ID); unexpected = 1; } if (raw_data[i++] != 2) { - pr_err("%s: unexpected data: register write count=%i, expected 2\n", - __func__, (int)raw_data[i - 1]); + pr_err("unexpected data: register write count=%i, expected 2\n", + (int)raw_data[i - 1]); unexpected = 1; } for (k = 0; k < 3; k++) if (raw_data[i++] != 0) { - pr_err("%s: unexpected data: raw_data[%i]=0x%x, expected 0\n", - __func__, i - 1, (int)raw_data[i - 1]); + pr_err("unexpected data: raw_data[%i]=0x%x, expected 0\n", + i - 1, (int)raw_data[i - 1]); unexpected = 1; } i += ni_usb_parse_termination_block(&raw_data[i]); @@ -530,18 +528,14 @@ static int ni_usb_write_registers(struct ni_usb_priv *ni_priv, out_data_length = num_writes * bytes_per_write + 0x10; out_data = kmalloc(out_data_length, GFP_KERNEL); - if (!out_data) { - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); + if (!out_data) return -ENOMEM; - } i += ni_usb_bulk_register_write_header(&out_data[i], num_writes); for (j = 0; j < num_writes; j++) i += ni_usb_bulk_register_write(&out_data[i], writes[j]); while (i % 4) out_data[i++] = 0x00; i += ni_usb_bulk_termination(&out_data[i]); - if (i > out_data_length) - dev_err(&usb_dev->dev, "%s: bug! buffer overrun\n", __func__); mutex_lock(&ni_priv->addressed_transfer_lock); @@ -549,22 +543,21 @@ static int ni_usb_write_registers(struct ni_usb_priv *ni_priv, kfree(out_data); if (retval) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } in_data = kmalloc(in_data_length, GFP_KERNEL); if (!in_data) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); return -ENOMEM; } retval = ni_usb_receive_bulk_msg(ni_priv, in_data, in_data_length, &bytes_read, 1000, 0); if (retval || bytes_read != 16) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); ni_usb_dump_raw_block(in_data, bytes_read); kfree(in_data); return retval; @@ -576,18 +569,16 @@ static int ni_usb_write_registers(struct ni_usb_priv *ni_priv, //FIXME parse extra 09 status bits and termination kfree(in_data); if (status.id != NIUSB_REG_WRITE_ID) { - dev_err(&usb_dev->dev, "%s: parse error, id=0x%x != NIUSB_REG_WRITE_ID\n", - __func__, status.id); + dev_err(&usb_dev->dev, "parse error, id=0x%x != NIUSB_REG_WRITE_ID\n", status.id); return -EIO; } if (status.error_code) { - dev_err(&usb_dev->dev, "%s: nonzero error code 0x%x\n", - __func__, status.error_code); + dev_err(&usb_dev->dev, "nonzero error code 0x%x\n", status.error_code); return -EIO; } if (reg_writes_completed != num_writes) { - dev_err(&usb_dev->dev, "%s: reg_writes_completed=%i, num_writes=%i\n", - __func__, reg_writes_completed, num_writes); + dev_err(&usb_dev->dev, "reg_writes_completed=%i, num_writes=%i\n", + reg_writes_completed, num_writes); return -EIO; } if (ibsta) @@ -614,10 +605,8 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, struct ni_usb_register reg; *bytes_read = 0; - if (length > max_read_length) { - length = max_read_length; - dev_err(&usb_dev->dev, "%s: read length too long\n", __func__); - } + if (length > max_read_length) + return -EINVAL; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -649,8 +638,8 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, if (retval || usb_bytes_written != i) { if (retval == 0) retval = -EIO; - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, usb_bytes_written=%i, i=%i\n", - __func__, retval, usb_bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, usb_bytes_written=%i, i=%i\n", + retval, usb_bytes_written, i); mutex_unlock(&ni_priv->addressed_transfer_lock); return retval; } @@ -668,8 +657,8 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, if (retval == -ERESTARTSYS) { } else if (retval) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, usb_bytes_read=%i\n", - __func__, retval, usb_bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, usb_bytes_read=%i\n", + retval, usb_bytes_read); kfree(in_data); return retval; } @@ -677,14 +666,14 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, if (parse_retval != usb_bytes_read) { if (parse_retval >= 0) parse_retval = -EIO; - dev_err(&usb_dev->dev, "%s: retval=%i usb_bytes_read=%i\n", - __func__, parse_retval, usb_bytes_read); + dev_err(&usb_dev->dev, "retval=%i usb_bytes_read=%i\n", + parse_retval, usb_bytes_read); kfree(in_data); return parse_retval; } if (actual_length != length - status.count) { - dev_err(&usb_dev->dev, "%s: actual_length=%i expected=%li\n", - __func__, actual_length, (long)(length - status.count)); + dev_err(&usb_dev->dev, "actual_length=%i expected=%li\n", + actual_length, (long)(length - status.count)); ni_usb_dump_raw_block(in_data, usb_bytes_read); } kfree(in_data); @@ -699,7 +688,7 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, break; case NIUSB_ATN_STATE_ERROR: retval = -EIO; - dev_err(&usb_dev->dev, "%s: read when ATN set\n", __func__); + dev_err(&usb_dev->dev, "read when ATN set\n"); break; case NIUSB_ADDRESSING_ERROR: retval = -EIO; @@ -708,12 +697,11 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, retval = -ETIMEDOUT; break; case NIUSB_EOSMODE_ERROR: - dev_err(&usb_dev->dev, "%s: driver bug, we should have been able to avoid NIUSB_EOSMODE_ERROR.\n", - __func__); + dev_err(&usb_dev->dev, "driver bug, we should have been able to avoid NIUSB_EOSMODE_ERROR.\n"); retval = -EINVAL; break; default: - dev_err(&usb_dev->dev, "%s: unknown error code=%i\n", __func__, status.error_code); + dev_err(&usb_dev->dev, "unknown error code=%i\n", status.error_code); retval = -EIO; break; } @@ -742,11 +730,8 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, static const int max_write_length = 0xffff; *bytes_written = 0; - if (length > max_write_length) { - length = max_write_length; - send_eoi = 0; - dev_err(&usb_dev->dev, "%s: write length too long\n", __func__); - } + if (length > max_write_length) + return -EINVAL; out_data_length = length + 0x10; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -777,8 +762,8 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, kfree(out_data); if (retval || usb_bytes_written != i) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, usb_bytes_written=%i, i=%i\n", - __func__, retval, usb_bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, usb_bytes_written=%i, i=%i\n", + retval, usb_bytes_written, i); return retval; } @@ -793,8 +778,8 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, mutex_unlock(&ni_priv->addressed_transfer_lock); if ((retval && retval != -ERESTARTSYS) || usb_bytes_read != 12) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, usb_bytes_read=%i\n", - __func__, retval, usb_bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, usb_bytes_read=%i\n", + retval, usb_bytes_read); kfree(in_data); return retval; } @@ -810,8 +795,8 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, */ break; case NIUSB_ADDRESSING_ERROR: - dev_err(&usb_dev->dev, "%s: Addressing error retval %d error code=%i\n", - __func__, retval, status.error_code); + dev_err(&usb_dev->dev, "Addressing error retval %d error code=%i\n", + retval, status.error_code); retval = -ENXIO; break; case NIUSB_NO_LISTENER_ERROR: @@ -821,8 +806,7 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, retval = -ETIMEDOUT; break; default: - dev_err(&usb_dev->dev, "%s: unknown error code=%i\n", - __func__, status.error_code); + dev_err(&usb_dev->dev, "unknown error code=%i\n", status.error_code); retval = -EPIPE; break; } @@ -873,8 +857,8 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len kfree(out_data); if (retval || bytes_written != i) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } @@ -890,8 +874,8 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len mutex_unlock(&ni_priv->addressed_transfer_lock); if ((retval && retval != -ERESTARTSYS) || bytes_read != 12) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return retval; } @@ -909,12 +893,12 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len case NIUSB_NO_BUS_ERROR: return -ENOTCONN; case NIUSB_EOSMODE_ERROR: - dev_err(&usb_dev->dev, "%s: got eosmode error. Driver bug?\n", __func__); + dev_err(&usb_dev->dev, "got eosmode error. Driver bug?\n"); return -EIO; case NIUSB_TIMEOUT_ERROR: return -ETIMEDOUT; default: - dev_err(&usb_dev->dev, "%s: unknown error code=%i\n", __func__, status.error_code); + dev_err(&usb_dev->dev, "unknown error code=%i\n", status.error_code); return -EIO; } ni_usb_soft_update_status(board, status.ibsta, 0); @@ -968,15 +952,14 @@ static int ni_usb_take_control(gpib_board_t *board, int synchronous) kfree(out_data); if (retval || bytes_written != i) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } in_data = kmalloc(in_data_length, GFP_KERNEL); if (!in_data) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); return -ENOMEM; } retval = ni_usb_receive_bulk_msg(ni_priv, in_data, in_data_length, &bytes_read, 1000, 1); @@ -986,8 +969,8 @@ static int ni_usb_take_control(gpib_board_t *board, int synchronous) if ((retval && retval != -ERESTARTSYS) || bytes_read != 12) { if (retval == 0) retval = -EIO; - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return retval; } @@ -1025,15 +1008,14 @@ static int ni_usb_go_to_standby(gpib_board_t *board) kfree(out_data); if (retval || bytes_written != i) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } in_data = kmalloc(in_data_length, GFP_KERNEL); if (!in_data) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); return -ENOMEM; } retval = ni_usb_receive_bulk_msg(ni_priv, in_data, in_data_length, &bytes_read, 1000, 0); @@ -1041,16 +1023,15 @@ static int ni_usb_go_to_standby(gpib_board_t *board) mutex_unlock(&ni_priv->addressed_transfer_lock); if (retval || bytes_read != 12) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return retval; } ni_usb_parse_status_block(in_data, &status); kfree(in_data); if (status.id != NIUSB_IBGTS_ID) - dev_err(&usb_dev->dev, "%s: bug: status.id 0x%x != INUSB_IBGTS_ID\n", - __func__, status.id); + dev_err(&usb_dev->dev, "bug: status.id 0x%x != INUSB_IBGTS_ID\n", status.id); ni_usb_soft_update_status(board, status.ibsta, 0); return 0; } @@ -1093,7 +1074,7 @@ static void ni_usb_request_system_control(gpib_board_t *board, int request_contr } retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return; // retval; } if (!request_control) @@ -1119,10 +1100,8 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) if (assert == 0) return; out_data = kmalloc(out_data_length, GFP_KERNEL); - if (!out_data) { - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); + if (!out_data) return; - } out_data[i++] = NIUSB_IBSIC_ID; out_data[i++] = 0x0; out_data[i++] = 0x0; @@ -1131,8 +1110,8 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) retval = ni_usb_send_bulk_msg(ni_priv, out_data, i, &bytes_written, 1000); kfree(out_data); if (retval || bytes_written != i) { - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return; } in_data = kmalloc(in_data_length, GFP_KERNEL); @@ -1141,8 +1120,8 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) retval = ni_usb_receive_bulk_msg(ni_priv, in_data, in_data_length, &bytes_read, 1000, 0); if (retval || bytes_read != 12) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return; } @@ -1167,7 +1146,7 @@ static void ni_usb_remote_enable(gpib_board_t *board, int enable) reg.value = AUX_CREN; retval = ni_usb_write_registers(ni_priv, ®, 1, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return; //retval; } ni_priv->ren_state = enable; @@ -1207,7 +1186,6 @@ static unsigned int ni_usb_update_status(gpib_board_t *board, unsigned int clear u8 *buffer; struct ni_usb_status_block status; - //printk("%s: receive control pipe is %i\n", __func__, pipe); buffer = kmalloc(buffer_length, GFP_KERNEL); if (!buffer) return board->status; @@ -1216,7 +1194,7 @@ static unsigned int ni_usb_update_status(gpib_board_t *board, unsigned int clear USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x200, 0x0, buffer, buffer_length, 1000); if (retval != buffer_length) { - dev_err(&usb_dev->dev, "%s: usb_control_msg returned %i\n", __func__, retval); + dev_err(&usb_dev->dev, "usb_control_msg returned %i\n", retval); kfree(buffer); return board->status; } @@ -1235,7 +1213,6 @@ static void ni_usb_stop(struct ni_usb_priv *ni_priv) u8 *buffer; struct ni_usb_status_block status; - //printk("%s: receive control pipe is %i\n", __func__, pipe); buffer = kmalloc(buffer_length, GFP_KERNEL); if (!buffer) return; @@ -1244,7 +1221,7 @@ static void ni_usb_stop(struct ni_usb_priv *ni_priv) USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0, 0x0, buffer, buffer_length, 1000); if (retval != buffer_length) { - dev_err(&usb_dev->dev, "%s: usb_control_msg returned %i\n", __func__, retval); + dev_err(&usb_dev->dev, "usb_control_msg returned %i\n", retval); kfree(buffer); return; } @@ -1271,7 +1248,7 @@ static int ni_usb_primary_address(gpib_board_t *board, unsigned int address) i++; retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1319,7 +1296,7 @@ static int ni_usb_secondary_address(gpib_board_t *board, unsigned int address, i i += ni_usb_write_sad(writes, address, enable); retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1353,8 +1330,8 @@ static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) kfree(out_data); if (retval || bytes_written != i) { - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } in_data = kmalloc(in_data_length, GFP_KERNEL); @@ -1366,8 +1343,8 @@ static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) &bytes_read, 1000, 1); if (retval && retval != -ERESTARTSYS) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return retval; } @@ -1393,7 +1370,7 @@ static void ni_usb_parallel_poll_configure(gpib_board_t *board, uint8_t config) i++; retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return;// retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1418,7 +1395,7 @@ static void ni_usb_parallel_poll_response(gpib_board_t *board, int ist) i++; retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return;// retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1440,7 +1417,7 @@ static void ni_usb_serial_poll_response(gpib_board_t *board, u8 status) i++; retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return;// retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1467,7 +1444,7 @@ static void ni_usb_return_to_local(gpib_board_t *board) i++; retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return;// retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1509,15 +1486,14 @@ static int ni_usb_line_status(const gpib_board_t *board) if (retval || bytes_written != i) { mutex_unlock(&ni_priv->addressed_transfer_lock); if (retval != -EAGAIN) - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%i\n", - __func__, retval, bytes_written, i); + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%i\n", + retval, bytes_written, i); return retval; } in_data = kmalloc(in_data_length, GFP_KERNEL); if (!in_data) { mutex_unlock(&ni_priv->addressed_transfer_lock); - dev_err(&usb_dev->dev, "%s: kmalloc failed\n", __func__); return -ENOMEM; } retval = ni_usb_nonblocking_receive_bulk_msg(ni_priv, in_data, in_data_length, @@ -1527,8 +1503,8 @@ static int ni_usb_line_status(const gpib_board_t *board) if (retval) { if (retval != -EAGAIN) - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); kfree(in_data); return retval; } @@ -1604,7 +1580,7 @@ static unsigned int ni_usb_t1_delay(gpib_board_t *board, unsigned int nano_sec) i = ni_usb_setup_t1_delay(writes, nano_sec, &actual_ns); retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return -1; //FIXME should change return type to int for error reporting } board->t1_nano_sec = actual_ns; @@ -1736,7 +1712,7 @@ static int ni_usb_setup_init(gpib_board_t *board, struct ni_usb_register *writes writes[i].value = AUX_CPPF; i++; if (i > NUM_INIT_WRITES) { - dev_err(&usb_dev->dev, "%s: bug!, buffer overrun, i=%i\n", __func__, i); + dev_err(&usb_dev->dev, "bug!, buffer overrun, i=%i\n", i); return 0; } return i; @@ -1762,7 +1738,7 @@ static int ni_usb_init(gpib_board_t *board) return -EFAULT; kfree(writes); if (retval) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return retval; } ni_usb_soft_update_status(board, ibsta, 0); @@ -1778,9 +1754,6 @@ static void ni_usb_interrupt_complete(struct urb *urb) struct ni_usb_status_block status; unsigned long flags; -// printk("debug: %s: status=0x%x, error_count=%i, actual_length=%i\n", __func__, -// urb->status, urb->error_count, urb->actual_length); - switch (urb->status) { /* success */ case 0: @@ -1793,23 +1766,21 @@ static void ni_usb_interrupt_complete(struct urb *urb) default: /* other error, resubmit */ retval = usb_submit_urb(ni_priv->interrupt_urb, GFP_ATOMIC); if (retval) - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb\n", __func__); + dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); return; } ni_usb_parse_status_block(urb->transfer_buffer, &status); -// printk("debug: ibsta=0x%x\n", status.ibsta); spin_lock_irqsave(&board->spinlock, flags); ni_priv->monitored_ibsta_bits &= ~status.ibsta; -// printk("debug: monitored_ibsta_bits=0x%x\n", ni_priv->monitored_ibsta_bits); spin_unlock_irqrestore(&board->spinlock, flags); wake_up_interruptible(&board->wait); retval = usb_submit_urb(ni_priv->interrupt_urb, GFP_ATOMIC); if (retval) - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb\n", __func__); + dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); } static int ni_usb_set_interrupt_monitor(gpib_board_t *board, unsigned int monitored_bits) @@ -1821,22 +1792,20 @@ static int ni_usb_set_interrupt_monitor(gpib_board_t *board, unsigned int monito u8 *buffer; struct ni_usb_status_block status; unsigned long flags; - //printk("%s: receive control pipe is %i\n", __func__, pipe); + buffer = kmalloc(buffer_length, GFP_KERNEL); if (!buffer) return -ENOMEM; spin_lock_irqsave(&board->spinlock, flags); ni_priv->monitored_ibsta_bits = ni_usb_ibsta_monitor_mask & monitored_bits; -// dev_err(&usb_dev->dev, "debug: %s: monitored_ibsta_bits=0x%x\n", -// __func__, ni_priv->monitored_ibsta_bits); spin_unlock_irqrestore(&board->spinlock, flags); retval = ni_usb_receive_control_msg(ni_priv, NI_USB_WAIT_REQUEST, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x300, ni_usb_ibsta_monitor_mask & monitored_bits, buffer, buffer_length, 1000); if (retval != buffer_length) { - dev_err(&usb_dev->dev, "%s: usb_control_msg returned %i\n", __func__, retval); + dev_err(&usb_dev->dev, "usb_control_msg returned %i\n", retval); kfree(buffer); return -1; } @@ -1872,8 +1841,7 @@ static int ni_usb_setup_urbs(gpib_board_t *board) retval = usb_submit_urb(ni_priv->interrupt_urb, GFP_KERNEL); mutex_unlock(&ni_priv->interrupt_transfer_lock); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to submit first interrupt urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "failed to submit first interrupt urb, retval=%i\n", retval); return retval; } return 0; @@ -1904,7 +1872,6 @@ static int ni_usb_b_read_serial_number(struct ni_usb_priv *ni_priv) int j; unsigned int serial_number; -// printk("%s: %s\n", __func__); in_data = kmalloc(in_data_length, GFP_KERNEL); if (!in_data) return -ENOMEM; @@ -1924,20 +1891,19 @@ static int ni_usb_b_read_serial_number(struct ni_usb_priv *ni_priv) i += ni_usb_bulk_termination(&out_data[i]); retval = ni_usb_send_bulk_msg(ni_priv, out_data, out_data_length, &bytes_written, 1000); if (retval) { - dev_err(&usb_dev->dev, "%s: ni_usb_send_bulk_msg returned %i, bytes_written=%i, i=%li\n", - __func__, + dev_err(&usb_dev->dev, "send_bulk_msg returned %i, bytes_written=%i, i=%li\n", retval, bytes_written, (long)out_data_length); goto serial_out; } retval = ni_usb_receive_bulk_msg(ni_priv, in_data, in_data_length, &bytes_read, 1000, 0); if (retval) { - dev_err(&usb_dev->dev, "%s: ni_usb_receive_bulk_msg returned %i, bytes_read=%i\n", - __func__, retval, bytes_read); + dev_err(&usb_dev->dev, "receive_bulk_msg returned %i, bytes_read=%i\n", + retval, bytes_read); ni_usb_dump_raw_block(in_data, bytes_read); goto serial_out; } if (ARRAY_SIZE(results) < num_reads) { - dev_err(&usb_dev->dev, "Setup bug\n"); + dev_err(&usb_dev->dev, "serial number eetup bug\n"); retval = -EINVAL; goto serial_out; } @@ -1945,7 +1911,7 @@ static int ni_usb_b_read_serial_number(struct ni_usb_priv *ni_priv) serial_number = 0; for (j = 0; j < num_reads; ++j) serial_number |= (results[j] & 0xff) << (8 * j); - dev_info(&usb_dev->dev, "%s: board serial number is 0x%x\n", __func__, serial_number); + dev_dbg(&usb_dev->dev, "board serial number is 0x%x\n", serial_number); retval = 0; serial_out: kfree(in_data); @@ -1973,22 +1939,22 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0, 0x0, buffer, buffer_size, 1000); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: usb_control_msg request 0x%x returned %i\n", - __func__, NI_USB_SERIAL_NUMBER_REQUEST, retval); + dev_err(&usb_dev->dev, "usb_control_msg request 0x%x returned %i\n", + NI_USB_SERIAL_NUMBER_REQUEST, retval); goto ready_out; } j = 0; if (buffer[j] != NI_USB_SERIAL_NUMBER_REQUEST) { - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x%x\n", - __func__, j, (int)buffer[j], NI_USB_SERIAL_NUMBER_REQUEST); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x%x\n", + j, (int)buffer[j], NI_USB_SERIAL_NUMBER_REQUEST); unexpected = 1; } if (unexpected) ni_usb_dump_raw_block(buffer, retval); // NI-USB-HS+ pads the serial with 0x0 to make 16 bytes if (retval != 5 && retval != 16) { - dev_err(&usb_dev->dev, "%s: received unexpected number of bytes = %i, expected 5 or 16\n", - __func__, retval); + dev_err(&usb_dev->dev, "received unexpected number of bytes = %i, expected 5 or 16\n", + retval); ni_usb_dump_raw_block(buffer, retval); } serial_number = 0; @@ -1996,7 +1962,7 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) serial_number |= (buffer[++j] << 8); serial_number |= (buffer[++j] << 16); serial_number |= (buffer[++j] << 24); - dev_info(&usb_dev->dev, "%s: board serial number is 0x%x\n", __func__, serial_number); + dev_dbg(&usb_dev->dev, "board serial number is 0x%x\n", serial_number); for (i = 0; i < timeout; ++i) { int ready = 0; @@ -2004,26 +1970,26 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0, 0x0, buffer, buffer_size, 100); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: usb_control_msg request 0x%x returned %i\n", - __func__, NI_USB_POLL_READY_REQUEST, retval); + dev_err(&usb_dev->dev, "usb_control_msg request 0x%x returned %i\n", + NI_USB_POLL_READY_REQUEST, retval); goto ready_out; } j = 0; unexpected = 0; if (buffer[j] != NI_USB_POLL_READY_REQUEST) { // [0] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x%x\n", - __func__, j, (int)buffer[j], NI_USB_POLL_READY_REQUEST); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x%x\n", + j, (int)buffer[j], NI_USB_POLL_READY_REQUEST); unexpected = 1; } ++j; if (buffer[j] != 0x1 && buffer[j] != 0x0) { // [1] HS+ sends 0x0 - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x1 or 0x0\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x1 or 0x0\n", + j, (int)buffer[j]); unexpected = 1; } if (buffer[++j] != 0x0) { // [2] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x%x\n", - __func__, j, (int)buffer[j], 0x0); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x%x\n", + j, (int)buffer[j], 0x0); unexpected = 1; } ++j; @@ -2031,22 +1997,22 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) // NI-USB-HS+ sends 0x0 if (buffer[j] != 0x1 && buffer[j] != 0x8 && buffer[j] != 0x7 && buffer[j] != 0x0) { // [3] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x0, 0x1, 0x7 or 0x8\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x0, 0x1, 0x7 or 0x8\n", + j, (int)buffer[j]); unexpected = 1; } ++j; // NI-USB-HS+ sends 0 here if (buffer[j] != 0x30 && buffer[j] != 0x0) { // [4] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x0 or 0x30\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x0 or 0x30\n", + j, (int)buffer[j]); unexpected = 1; } ++j; // MC usb-488 (and sometimes NI-USB-HS?) and NI-USB-HS+ sends 0x0 here if (buffer[j] != 0x1 && buffer[j] != 0x0) { // [5] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x1 or 0x0\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x1 or 0x0\n", + j, (int)buffer[j]); unexpected = 1; } if (buffer[++j] != 0x0) { // [6] @@ -2054,8 +2020,8 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) // NI-USB-HS+ sends 0xf here if (buffer[j] != 0x2 && buffer[j] != 0xe && buffer[j] != 0xf && buffer[j] != 0x16) { - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x2, 0xe, 0xf or 0x16\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x2, 0xe, 0xf or 0x16\n", + j, (int)buffer[j]); unexpected = 1; } } @@ -2064,30 +2030,30 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) // MC usb-488 sends 0x5 here; MC usb-488A sends 0x6 here if (buffer[j] != 0x3 && buffer[j] != 0x5 && buffer[j] != 0x6 && buffer[j] != 0x8) { - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x3 or 0x5, 0x6 or 0x08\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x3 or 0x5, 0x6 or 0x08\n", + j, (int)buffer[j]); unexpected = 1; } } ++j; if (buffer[j] != 0x0 && buffer[j] != 0x2) { // [8] MC usb-488 sends 0x2 here - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x0 or 0x2\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, " unexpected data: buffer[%i]=0x%x, expected 0x0 or 0x2\n", + j, (int)buffer[j]); unexpected = 1; } ++j; // MC usb-488A and NI-USB-HS sends 0x3 here; NI-USB-HS+ sends 0x30 here if (buffer[j] != 0x0 && buffer[j] != 0x3 && buffer[j] != 0x30) { // [9] - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x0, 0x3 or 0x30\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x0, 0x3 or 0x30\n", + j, (int)buffer[j]); unexpected = 1; } if (buffer[++j] != 0x0) { ready = 1; if (buffer[j] != 0x96 && buffer[j] != 0x7 && buffer[j] != 0x6e) { // [10] MC usb-488 sends 0x7 here - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[%i]=0x%x, expected 0x96, 0x07 or 0x6e\n", - __func__, j, (int)buffer[j]); + dev_err(&usb_dev->dev, "unexpected data: buffer[%i]=0x%x, expected 0x96, 0x07 or 0x6e\n", + j, (int)buffer[j]); unexpected = 1; } } @@ -2097,7 +2063,6 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) break; retval = msleep_interruptible(msec_sleep_duration); if (retval) { - dev_err(&usb_dev->dev, "ni_usb_gpib: msleep interrupted\n"); retval = -ERESTARTSYS; goto ready_out; } @@ -2106,7 +2071,7 @@ static int ni_usb_hs_wait_for_ready(struct ni_usb_priv *ni_priv) ready_out: kfree(buffer); - dev_dbg(&usb_dev->dev, "%s: exit retval=%d\n", __func__, retval); + dev_dbg(&usb_dev->dev, "exit retval=%d\n", retval); return retval; } @@ -2134,14 +2099,14 @@ static int ni_usb_hs_plus_extra_init(struct ni_usb_priv *ni_priv) USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0, 0x0, buffer, transfer_size, 1000); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: usb_control_msg request 0x%x returned %i\n", - __func__, NI_USB_HS_PLUS_0x48_REQUEST, retval); + dev_err(&usb_dev->dev, "usb_control_msg request 0x%x returned %i\n", + NI_USB_HS_PLUS_0x48_REQUEST, retval); break; } // expected response data: 48 f3 30 00 00 00 00 00 00 00 00 00 00 00 00 00 if (buffer[0] != NI_USB_HS_PLUS_0x48_REQUEST) - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[0]=0x%x, expected 0x%x\n", - __func__, (int)buffer[0], NI_USB_HS_PLUS_0x48_REQUEST); + dev_err(&usb_dev->dev, "unexpected data: buffer[0]=0x%x, expected 0x%x\n", + (int)buffer[0], NI_USB_HS_PLUS_0x48_REQUEST); transfer_size = 2; @@ -2149,14 +2114,14 @@ static int ni_usb_hs_plus_extra_init(struct ni_usb_priv *ni_priv) USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x1, 0x0, buffer, transfer_size, 1000); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: usb_control_msg request 0x%x returned %i\n", - __func__, NI_USB_HS_PLUS_LED_REQUEST, retval); + dev_err(&usb_dev->dev, "usb_control_msg request 0x%x returned %i\n", + NI_USB_HS_PLUS_LED_REQUEST, retval); break; } // expected response data: 4b 00 if (buffer[0] != NI_USB_HS_PLUS_LED_REQUEST) - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[0]=0x%x, expected 0x%x\n", - __func__, (int)buffer[0], NI_USB_HS_PLUS_LED_REQUEST); + dev_err(&usb_dev->dev, "unexpected data: buffer[0]=0x%x, expected 0x%x\n", + (int)buffer[0], NI_USB_HS_PLUS_LED_REQUEST); transfer_size = 9; @@ -2165,15 +2130,14 @@ static int ni_usb_hs_plus_extra_init(struct ni_usb_priv *ni_priv) USB_RECIP_INTERFACE, 0x0, 0x1, buffer, transfer_size, 1000); if (retval < 0) { - dev_err(&usb_dev->dev, "%s: usb_control_msg request 0x%x returned %i\n", - __func__, NI_USB_HS_PLUS_0xf8_REQUEST, retval); + dev_err(&usb_dev->dev, "usb_control_msg request 0x%x returned %i\n", + NI_USB_HS_PLUS_0xf8_REQUEST, retval); break; } // expected response data: f8 01 00 00 00 01 00 00 00 if (buffer[0] != NI_USB_HS_PLUS_0xf8_REQUEST) - dev_err(&usb_dev->dev, "%s: unexpected data: buffer[0]=0x%x, expected 0x%x\n", - __func__, (int)buffer[0], NI_USB_HS_PLUS_0xf8_REQUEST); - + dev_err(&usb_dev->dev, "unexpected data: buffer[0]=0x%x, expected 0x%x\n", + (int)buffer[0], NI_USB_HS_PLUS_0xf8_REQUEST); } while (0); // cleanup @@ -2192,7 +2156,7 @@ static inline int ni_usb_device_match(struct usb_interface *interface, static int ni_usb_attach(gpib_board_t *board, const gpib_board_config_t *config) { int retval; - int i; + int i, index; struct ni_usb_priv *ni_priv; int product_id; struct usb_device *usb_dev; @@ -2211,19 +2175,17 @@ static int ni_usb_attach(gpib_board_t *board, const gpib_board_config_t *config) ni_priv->bus_interface = ni_usb_driver_interfaces[i]; usb_set_intfdata(ni_usb_driver_interfaces[i], board); usb_dev = interface_to_usbdev(ni_priv->bus_interface); - dev_info(&usb_dev->dev, - "bus %d dev num %d attached to gpib minor %d, NI usb interface %i\n", - usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); + index = i; break; } } if (i == MAX_NUM_NI_USB_INTERFACES) { mutex_unlock(&ni_usb_hotplug_lock); - pr_err("No supported NI usb gpib adapters found, have you loaded its firmware?\n"); + dev_err(board->gpib_dev, "No supported adapters found, have you loaded its firmware?\n"); return -ENODEV; } if (usb_reset_configuration(interface_to_usbdev(ni_priv->bus_interface))) - dev_err(&usb_dev->dev, "ni_usb_gpib: usb_reset_configuration() failed.\n"); + dev_err(&usb_dev->dev, "usb_reset_configuration() failed.\n"); product_id = le16_to_cpu(usb_dev->descriptor.idProduct); ni_priv->product_id = product_id; @@ -2296,7 +2258,9 @@ static int ni_usb_attach(gpib_board_t *board, const gpib_board_config_t *config) } mutex_unlock(&ni_usb_hotplug_lock); - dev_info(&usb_dev->dev, "%s: attached\n", __func__); + dev_info(&usb_dev->dev, + "bus %d dev num %d attached to gpib%d, intf %i\n", + usb_dev->bus->busnum, usb_dev->devnum, board->minor, index); return retval; } @@ -2304,27 +2268,19 @@ static int ni_usb_shutdown_hardware(struct ni_usb_priv *ni_priv) { struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); int retval; - int i = 0; struct ni_usb_register writes[2]; static const int writes_length = ARRAY_SIZE(writes); unsigned int ibsta; -// printk("%s: %s\n", __func__); - writes[i].device = NIUSB_SUBDEV_TNT4882; - writes[i].address = nec7210_to_tnt4882_offset(AUXMR); - writes[i].value = AUX_CR; - i++; - writes[i].device = NIUSB_SUBDEV_UNKNOWN3; - writes[i].address = 0x10; - writes[i].value = 0x0; - i++; - if (i > writes_length) { - dev_err(&usb_dev->dev, "%s: bug!, buffer overrun, i=%i\n", __func__, i); - return -EINVAL; - } - retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); + writes[0].device = NIUSB_SUBDEV_TNT4882; + writes[0].address = nec7210_to_tnt4882_offset(AUXMR); + writes[0].value = AUX_CR; + writes[1].device = NIUSB_SUBDEV_UNKNOWN3; + writes[1].address = 0x10; + writes[1].value = 0x0; + retval = ni_usb_write_registers(ni_priv, writes, writes_length, &ibsta); if (retval) { - dev_err(&usb_dev->dev, "%s: register write failed, retval=%i\n", __func__, retval); + dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); return retval; } return 0; @@ -2413,7 +2369,7 @@ static int ni_usb_driver_probe(struct usb_interface *interface, const struct usb if (i == MAX_NUM_NI_USB_INTERFACES) { usb_put_dev(usb_dev); mutex_unlock(&ni_usb_hotplug_lock); - dev_err(&usb_dev->dev, "%s: ni_usb_driver_interfaces[] full\n", __func__); + dev_err(&usb_dev->dev, "ni_usb_driver_interfaces[] full\n"); return -1; } path = kmalloc(path_length, GFP_KERNEL); @@ -2423,7 +2379,7 @@ static int ni_usb_driver_probe(struct usb_interface *interface, const struct usb return -ENOMEM; } usb_make_path(usb_dev, path, path_length); - dev_info(&usb_dev->dev, "ni_usb_gpib: probe succeeded for path: %s\n", path); + dev_info(&usb_dev->dev, "probe succeeded for path: %s\n", path); kfree(path); mutex_unlock(&ni_usb_hotplug_lock); return 0; @@ -2458,8 +2414,7 @@ static void ni_usb_driver_disconnect(struct usb_interface *interface) } } if (i == MAX_NUM_NI_USB_INTERFACES) - dev_err(&usb_dev->dev, "%s: unable to find interface in ni_usb_driver_interfaces[]? bug?\n", - __func__); + dev_err(&usb_dev->dev, "unable to find interface bug?\n"); usb_put_dev(usb_dev); mutex_unlock(&ni_usb_hotplug_lock); } @@ -2498,9 +2453,9 @@ static int ni_usb_driver_suspend(struct usb_interface *interface, pm_message_t m ni_usb_cleanup_urbs(ni_priv); mutex_unlock(&ni_priv->interrupt_transfer_lock); } - dev_info(&usb_dev->dev, - "bus %d dev num %d gpib minor %d, ni usb interface %i suspended\n", - usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); + dev_dbg(&usb_dev->dev, + "bus %d dev num %d gpib%d, interface %i suspended\n", + usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); } mutex_unlock(&ni_usb_hotplug_lock); @@ -2535,15 +2490,15 @@ static int ni_usb_driver_resume(struct usb_interface *interface) mutex_lock(&ni_priv->interrupt_transfer_lock); retval = usb_submit_urb(ni_priv->interrupt_urb, GFP_KERNEL); if (retval) { - dev_err(&usb_dev->dev, "%s: failed to resubmit interrupt urb, retval=%i\n", - __func__, retval); + dev_err(&usb_dev->dev, "resume failed to resubmit interrupt urb, retval=%i\n", + retval); mutex_unlock(&ni_priv->interrupt_transfer_lock); mutex_unlock(&ni_usb_hotplug_lock); return retval; } mutex_unlock(&ni_priv->interrupt_transfer_lock); } else { - dev_err(&usb_dev->dev, "%s: bug! int urb not set up\n", __func__); + dev_err(&usb_dev->dev, "bug! resume int urb not set up\n"); mutex_unlock(&ni_usb_hotplug_lock); return -EINVAL; } @@ -2600,9 +2555,9 @@ static int ni_usb_driver_resume(struct usb_interface *interface) if (ni_priv->ren_state) ni_usb_remote_enable(board, 1); - dev_info(&usb_dev->dev, - "bus %d dev num %d gpib minor %d, ni usb interface %i resumed\n", - usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); + dev_dbg(&usb_dev->dev, + "bus %d dev num %d gpib%d, interface %i resumed\n", + usb_dev->bus->busnum, usb_dev->devnum, board->minor, i); } mutex_unlock(&ni_usb_hotplug_lock); @@ -2610,7 +2565,7 @@ static int ni_usb_driver_resume(struct usb_interface *interface) } static struct usb_driver ni_usb_bus_driver = { - .name = "ni_usb_gpib", + .name = DRV_NAME, .probe = ni_usb_driver_probe, .disconnect = ni_usb_driver_disconnect, .suspend = ni_usb_driver_suspend, @@ -2623,19 +2578,18 @@ static int __init ni_usb_init_module(void) int i; int ret; - pr_info("ni_usb_gpib driver loading\n"); for (i = 0; i < MAX_NUM_NI_USB_INTERFACES; i++) ni_usb_driver_interfaces[i] = NULL; ret = usb_register(&ni_usb_bus_driver); if (ret) { - pr_err("ni_usb_gpib: usb_register failed: error = %d\n", ret); + pr_err("usb_register failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&ni_usb_gpib_interface, THIS_MODULE); if (ret) { - pr_err("ni_usb_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); return ret; } @@ -2644,7 +2598,6 @@ static int __init ni_usb_init_module(void) static void __exit ni_usb_exit_module(void) { - pr_info("ni_usb_gpib driver unloading\n"); gpib_unregister_driver(&ni_usb_gpib_interface); usb_deregister(&ni_usb_bus_driver); } From 4d5092b188b363aa6b15cedb79c2ffd758aa5af7 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:06 +0100 Subject: [PATCH 0306/2157] staging: gpib: pc2 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Turn long pr_err into comment, short message in dev_err. Change pr_err to dev_err where possible. Use error return codes consistent with messages. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-16-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/pc2/pc2_gpib.c | 51 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/staging/gpib/pc2/pc2_gpib.c b/drivers/staging/gpib/pc2/pc2_gpib.c index 3eccd4c54afa..6711851301ec 100644 --- a/drivers/staging/gpib/pc2/pc2_gpib.c +++ b/drivers/staging/gpib/pc2/pc2_gpib.c @@ -4,6 +4,9 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt + #include #include #include @@ -268,7 +271,8 @@ static int pc2_generic_attach(gpib_board_t *board, const gpib_board_config_t *co * is adapted to use isa_register_driver. */ if (config->ibdma) - pr_err("DMA disabled for pc2 gpib, driver needs to be adapted to use isa_register_driver to get a struct device*"); + // driver needs to be adapted to use isa_register_driver to get a struct device* + dev_err(board->gpib_dev, "DMA disabled for pc2 gpib"); #else if (config->ibdma) { nec_priv->dma_buffer_length = 0x1000; @@ -280,7 +284,7 @@ static int pc2_generic_attach(gpib_board_t *board, const gpib_board_config_t *co // request isa dma channel if (request_dma(config->ibdma, "pc2")) { - pr_err("gpib: can't request DMA %d\n", config->ibdma); + dev_err(board->gpib_dev, "can't request DMA %d\n", config->ibdma); return -1; } nec_priv->dma_channel = config->ibdma; @@ -306,8 +310,8 @@ static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) nec_priv->offset = pc2_reg_offset; if (!request_region(config->ibbase, pc2_iosize, "pc2")) { - pr_err("gpib: ioports are already in use\n"); - return -1; + dev_err(board->gpib_dev, "ioports are already in use\n"); + return -EBUSY; } nec_priv->iobase = config->ibbase; @@ -316,14 +320,14 @@ static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) // install interrupt handler if (config->ibirq) { if (request_irq(config->ibirq, pc2_interrupt, isr_flags, "pc2", board)) { - pr_err("gpib: can't request IRQ %d\n", config->ibirq); - return -1; + dev_err(board->gpib_dev, "can't request IRQ %d\n", config->ibirq); + return -EBUSY; } } pc2_priv->irq = config->ibirq; /* poll so we can detect assertion of ATN */ if (gpib_request_pseudo_irq(board, pc2_interrupt)) { - pr_err("pc2_gpib: failed to allocate pseudo_irq\n"); + dev_err(board->gpib_dev, "failed to allocate pseudo_irq\n"); return -1; } /* set internal counter register for 8 MHz input clock */ @@ -384,18 +388,19 @@ static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *co case 0x62e1: break; default: - pr_err("PCIIa base range invalid, must be one of 0x[0246]2e1, but is 0x%x\n", - config->ibbase); + dev_err(board->gpib_dev, "PCIIa base range invalid, must be one of 0x[0246]2e1, but is 0x%x\n", + config->ibbase); return -1; } if (config->ibirq) { if (config->ibirq < 2 || config->ibirq > 7) { - pr_err("pc2_gpib: illegal interrupt level %i\n", config->ibirq); + dev_err(board->gpib_dev, "illegal interrupt level %i\n", + config->ibirq); return -1; } } else { - pr_err("pc2_gpib: interrupt disabled, using polling mode (slow)\n"); + dev_err(board->gpib_dev, "interrupt disabled, using polling mode (slow)\n"); } #ifdef CHECK_IOPORTS unsigned int err = 0; @@ -407,36 +412,36 @@ static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *co if (config->ibirq && check_region(pc2a_clear_intr_iobase + config->ibirq, 1)) err++; if (err) { - pr_err("gpib: ioports are already in use"); - return -1; + dev_err(board->gpib_dev, "ioports are already in use"); + return -EBUSY; } #endif for (i = 0; i < num_registers; i++) { if (!request_region(config->ibbase + i * pc2a_reg_offset, 1, "pc2a")) { - pr_err("gpib: ioports are already in use"); + dev_err(board->gpib_dev, "ioports are already in use"); for (j = 0; j < i; j++) release_region(config->ibbase + j * pc2a_reg_offset, 1); - return -1; + return -EBUSY; } } nec_priv->iobase = config->ibbase; if (config->ibirq) { if (!request_region(pc2a_clear_intr_iobase + config->ibirq, 1, "pc2a")) { - pr_err("gpib: ioports are already in use"); + dev_err(board->gpib_dev, "ioports are already in use"); return -1; } pc2_priv->clear_intr_addr = pc2a_clear_intr_iobase + config->ibirq; if (request_irq(config->ibirq, pc2a_interrupt, 0, "pc2a", board)) { - pr_err("gpib: can't request IRQ %d\n", config->ibirq); - return -1; + dev_err(board->gpib_dev, "can't request IRQ %d\n", config->ibirq); + return -EBUSY; } } pc2_priv->irq = config->ibirq; /* poll so we can detect assertion of ATN */ if (gpib_request_pseudo_irq(board, pc2_interrupt)) { - pr_err("pc2_gpib: failed to allocate pseudo_irq\n"); + dev_err(board->gpib_dev, "failed to allocate pseudo_irq\n"); return -1; } @@ -630,25 +635,25 @@ static int __init pc2_init_module(void) ret = gpib_register_driver(&pc2_interface, THIS_MODULE); if (ret) { - pr_err("pc2_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&pc2a_interface, THIS_MODULE); if (ret) { - pr_err("pc2_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pc2a; } ret = gpib_register_driver(&pc2a_cb7210_interface, THIS_MODULE); if (ret) { - pr_err("pc2_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_cb7210; } ret = gpib_register_driver(&pc2_2a_interface, THIS_MODULE); if (ret) { - pr_err("pc2_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pc2_2a; } From 18ea3495c54d291a566add350f1de8bfa3166517 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 14 Feb 2025 12:47:07 +0100 Subject: [PATCH 0307/2157] staging: gpib: tms9914 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Remove pr_err on go_to_standby timeout. Remove write wait and command wait interrupted messages. Remove __func__ parameter on error messages Change pr_err to dev_err where possible. Remove commented printk Uncomment dev_dbg in interrupt_have status Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250214114708.28947-17-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/tms9914/tms9914.c | 33 +++++++++++--------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/staging/gpib/tms9914/tms9914.c b/drivers/staging/gpib/tms9914/tms9914.c index ec8e1d4d762f..1f2bb163cfb5 100644 --- a/drivers/staging/gpib/tms9914/tms9914.c +++ b/drivers/staging/gpib/tms9914/tms9914.c @@ -4,6 +4,9 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt + #include #include #include @@ -83,10 +86,8 @@ int tms9914_go_to_standby(gpib_board_t *board, struct tms9914_priv *priv) break; udelay(1); } - if (i == timeout) { - pr_err("error waiting for NATN\n"); + if (i == timeout) return -ETIMEDOUT; - } clear_bit(COMMAND_READY_BN, &priv->state); @@ -175,7 +176,7 @@ void tms9914_set_holdoff_mode(struct tms9914_priv *priv, enum tms9914_holdoff_mo write_byte(priv, AUX_HLDA | AUX_CS, AUXCR); break; default: - pr_err("%s: bug! bad holdoff mode %i\n", __func__, mode); + pr_err("bug! bad holdoff mode %i\n", mode); break; } priv->holdoff_mode = mode; @@ -437,10 +438,9 @@ static int wait_for_read_byte(gpib_board_t *board, struct tms9914_priv *priv) if (wait_event_interruptible(board->wait, test_bit(READ_READY_BN, &priv->state) || test_bit(DEV_CLEAR_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - pr_debug("gpib: pio read wait interrupted\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; @@ -472,7 +472,7 @@ static inline uint8_t tms9914_read_data_in(gpib_board_t *board, struct tms9914_p case TMS9914_HOLDOFF_NONE: break; default: - pr_err("%s: bug! bad holdoff mode %i\n", __func__, priv->holdoff_mode); + dev_err(board->gpib_dev, "bug! bad holdoff mode %i\n", priv->holdoff_mode); break; } spin_unlock_irqrestore(&board->spinlock, flags); @@ -548,10 +548,9 @@ static int pio_write_wait(gpib_board_t *board, struct tms9914_priv *priv) test_bit(WRITE_READY_BN, &priv->state) || test_bit(BUS_ERROR_BN, &priv->state) || test_bit(DEV_CLEAR_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted!\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; if (test_bit(BUS_ERROR_BN, &priv->state)) @@ -667,10 +666,8 @@ int tms9914_command(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *bu if (wait_event_interruptible(board->wait, test_bit(COMMAND_READY_BN, &priv->state) || - test_bit(TIMO_NUM, &board->status))) { - pr_debug("gpib command wait interrupted\n"); + test_bit(TIMO_NUM, &board->status))) break; - } if (test_bit(TIMO_NUM, &board->status)) break; @@ -761,8 +758,6 @@ irqreturn_t tms9914_interrupt_have_status(gpib_board_t *board, struct tms9914_pr write_byte(priv, AUX_INVAL, AUXCR); } } else { - // printk("tms9914: unrecognized gpib command pass thru 0x%x\n", - // command_byte); // clear dac holdoff write_byte(priv, AUX_INVAL, AUXCR); } @@ -799,7 +794,7 @@ irqreturn_t tms9914_interrupt_have_status(gpib_board_t *board, struct tms9914_pr // check for being addressed with secondary addressing if (status1 & HR_APT) { if (board->sad < 0) - pr_err("tms9914: bug, APT interrupt without secondary addressing?\n"); + dev_err(board->gpib_dev, "bug, APT interrupt without secondary addressing?\n"); if ((read_byte(priv, CPTR) & gpib_command_mask) == MSA(board->sad)) write_byte(priv, AUX_VAL, AUXCR); else @@ -807,8 +802,8 @@ irqreturn_t tms9914_interrupt_have_status(gpib_board_t *board, struct tms9914_pr } if ((status0 & priv->imr0_bits) || (status1 & priv->imr1_bits)) { -// dev_dbg(board->gpib_dev, "isr0 0x%x, imr0 0x%x, isr1 0x%x, imr1 0x%x\n", -// status0, priv->imr0_bits, status1, priv->imr1_bits); + dev_dbg(board->gpib_dev, "isr0 0x%x, imr0 0x%x, isr1 0x%x, imr1 0x%x\n", + status0, priv->imr0_bits, status1, priv->imr1_bits); update_status_nolock(board, priv); wake_up_interruptible(&board->wait); } From b6fe18d08d18943bdb139c4cf9a88e31a7f6959a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Feb 2025 14:13:36 +0100 Subject: [PATCH 0308/2157] staging: gpib: comment out pnp_device_id tables This variable is not referenced in either of these two drivers driver, causing a warning when they are built-in and W=1 warnings are enabled with gcc: drivers/staging/gpib/tnt4882/tnt4882_gpib.c:1507:35: error: 'tnt4882_pnp_table' defined but not used [-Werror=unused-const-variable=] 1507 | static const struct pnp_device_id tnt4882_pnp_table[] = { | ^~~~~~~~~~~~~~~~~ drivers/staging/gpib/hp_82341/hp_82341.c:811:35: error: 'hp_82341_pnp_table' defined but not used [-Werror=unused-const-variable=] 811 | static const struct pnp_device_id hp_82341_pnp_table[] = { The MODULE_DEVICE_TABLE() entry does have the effect of loading the module when the PNP device is detected, so it is still needed for the modular case. Ideally the drivers should be converted to pnp_register_driver(), which would lead to the ID table actually being used. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250217131356.3759347-2-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82341/hp_82341.c | 3 +++ drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 3ac87d1a1fab..b93a830be8a9 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -802,11 +802,14 @@ static void hp_82341_detach(gpib_board_t *board) hp_82341_free_private(board); } +#if 0 +/* unused, will be needed when the driver is turned into a pnp_driver */ static const struct pnp_device_id hp_82341_pnp_table[] = { {.id = "HWP1411"}, {.id = ""} }; MODULE_DEVICE_TABLE(pnp, hp_82341_pnp_table); +#endif static int __init hp_82341_init_module(void) { diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index 2e1c3cbebaca..df92e9959fe1 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -1390,11 +1390,14 @@ static struct pci_driver tnt4882_pci_driver = { .probe = &tnt4882_pci_probe }; +#if 0 +/* unused, will be needed when the driver is turned into a pnp_driver */ static const struct pnp_device_id tnt4882_pnp_table[] = { {.id = "NICC601"}, {.id = ""} }; MODULE_DEVICE_TABLE(pnp, tnt4882_pnp_table); +#endif #ifdef GPIB_PCMCIA static gpib_interface_t ni_pcmcia_interface; From 0865b297b3a89fe371ded617ac87493b422a6d5d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Feb 2025 14:13:35 +0100 Subject: [PATCH 0309/2157] static: gpib: hp82341: add MODULE_DESCRIPTION One more description turned out to be missing WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/staging/gpib/hp_82341/hp_82341.o Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250217131356.3759347-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82341/hp_82341.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index b93a830be8a9..91fbaf953bcd 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -20,6 +20,7 @@ #include MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GPIB driver for hp 82341a/b/c/d boards"); static unsigned short read_and_clear_event_status(gpib_board_t *board); static void set_transfer_counter(struct hp_82341_priv *hp_priv, int count); From 020b814c2f864d1e1a19b3013266b73057f9f99b Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Mon, 10 Feb 2025 16:10:21 +0100 Subject: [PATCH 0310/2157] staging;gpib: Use Kconfig PCMCIA compilation symbol The drivers supporting the PCMCIA bus were using an intermediate symbol GPIB_PCMCIA for compiling optional PCMCIA support. This symbol is no longer needed for the in-tree drivers as the Kconfig symbol is available. Use the Kconfig symbol CONFIG_GPIB_PCMCIA directly for conditional compilation of PCMCIA support. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250210151022.4358-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/cb7210.c | 10 +++++----- drivers/staging/gpib/ines/ines_gpib.c | 8 ++++---- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 0d601b6a00fb..cb0d8a721687 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -1111,7 +1111,7 @@ static struct pci_driver cb7210_pci_driver = { * pcmcia skeleton example (presumably David Hinds) ***************************************************************************/ -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA #include #include @@ -1501,7 +1501,7 @@ static void cb_pcmcia_detach(gpib_board_t *board) cb7210_generic_detach(board); } -#endif /* GPIB_PCMCIA */ +#endif /* CONFIG_GPIB_PCMCIA */ static int __init cb7210_init_module(void) { @@ -1549,7 +1549,7 @@ static int __init cb7210_init_module(void) goto err_isa_unaccel; } -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA ret = gpib_register_driver(&cb_pcmcia_interface, THIS_MODULE); if (ret) { pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); @@ -1577,7 +1577,7 @@ static int __init cb7210_init_module(void) return 0; -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA err_pcmcia_driver: gpib_unregister_driver(&cb_pcmcia_unaccel_interface); err_pcmcia_unaccel: @@ -1611,7 +1611,7 @@ static void __exit cb7210_exit_module(void) gpib_unregister_driver(&cb_pci_unaccel_interface); gpib_unregister_driver(&cb_isa_accel_interface); gpib_unregister_driver(&cb_isa_unaccel_interface); -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA gpib_unregister_driver(&cb_pcmcia_interface); gpib_unregister_driver(&cb_pcmcia_accel_interface); gpib_unregister_driver(&cb_pcmcia_unaccel_interface); diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index 22a05a287bce..73dafbe68acb 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -977,7 +977,7 @@ static struct pci_driver ines_pci_driver = { .probe = &ines_pci_probe }; -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA #include #include @@ -1410,7 +1410,7 @@ void ines_pcmcia_detach(gpib_board_t *board) ines_free_private(board); } -#endif /* GPIB_PCMCIA */ +#endif /* CONFIG_GPIB_PCMCIA */ static int __init ines_init_module(void) { @@ -1446,7 +1446,7 @@ static int __init ines_init_module(void) goto err_isa; } -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA ret = gpib_register_driver(&ines_pcmcia_interface, THIS_MODULE); if (ret) { pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); @@ -1474,7 +1474,7 @@ static int __init ines_init_module(void) return 0; -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA err_pcmcia_driver: gpib_unregister_driver(&ines_pcmcia_accel_interface); err_pcmcia_accel: diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index df92e9959fe1..04e46554185c 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -1399,7 +1399,7 @@ static const struct pnp_device_id tnt4882_pnp_table[] = { MODULE_DEVICE_TABLE(pnp, tnt4882_pnp_table); #endif -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA static gpib_interface_t ni_pcmcia_interface; static gpib_interface_t ni_pcmcia_accel_interface; static int __init init_ni_gpib_cs(void); @@ -1464,7 +1464,7 @@ static int __init tnt4882_init_module(void) goto err_pci_accel; } -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA result = gpib_register_driver(&ni_pcmcia_interface, THIS_MODULE); if (result) { pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); @@ -1489,7 +1489,7 @@ static int __init tnt4882_init_module(void) return 0; -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA err_pcmcia_driver: gpib_unregister_driver(&ni_pcmcia_accel_interface); err_pcmcia_accel: @@ -1527,7 +1527,7 @@ static void __exit tnt4882_exit_module(void) gpib_unregister_driver(&ni_nec_isa_accel_interface); gpib_unregister_driver(&ni_pci_interface); gpib_unregister_driver(&ni_pci_accel_interface); -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA gpib_unregister_driver(&ni_pcmcia_interface); gpib_unregister_driver(&ni_pcmcia_accel_interface); exit_ni_gpib_cs(); @@ -1538,7 +1538,7 @@ static void __exit tnt4882_exit_module(void) pci_unregister_driver(&tnt4882_pci_driver); } -#ifdef GPIB_PCMCIA +#ifdef CONFIG_GPIB_PCMCIA #include #include @@ -1892,7 +1892,7 @@ static gpib_interface_t ni_pcmcia_accel_interface = { .return_to_local = tnt4882_return_to_local, }; -#endif // GPIB_PCMCIA +#endif // CONFIG_GPIB_PCMCIA module_init(tnt4882_init_module); module_exit(tnt4882_exit_module); From 3c9a0cf6a1ed45d454a1d3fa1033adc2dcd8df26 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Mon, 10 Feb 2025 16:10:22 +0100 Subject: [PATCH 0311/2157] staging:gpib: Remove GPIB_PCMCIA in Makefiles This symbol is no longer needed since it is being replaced directly by its Kconfig equivalent CONFIG_GPIB_PCMCIA in the drivers providing optional support for the PCMCIA bus. Remove the definition of GPIB_PCMCIA from the Makefiles. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250210151022.4358-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/Makefile | 1 - drivers/staging/gpib/ines/Makefile | 1 - drivers/staging/gpib/tnt4882/Makefile | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/staging/gpib/cb7210/Makefile b/drivers/staging/gpib/cb7210/Makefile index cda0725d6487..d239ae80b415 100644 --- a/drivers/staging/gpib/cb7210/Makefile +++ b/drivers/staging/gpib/cb7210/Makefile @@ -1,4 +1,3 @@ -ccflags-$(CONFIG_GPIB_PCMCIA) := -DGPIB_PCMCIA obj-$(CONFIG_GPIB_CB7210) += cb7210.o diff --git a/drivers/staging/gpib/ines/Makefile b/drivers/staging/gpib/ines/Makefile index 6b6e480fd811..88241f15ecea 100644 --- a/drivers/staging/gpib/ines/Makefile +++ b/drivers/staging/gpib/ines/Makefile @@ -1,4 +1,3 @@ -ccflags-$(CONFIG_GPIB_PCMCIA) := -DGPIB_PCMCIA obj-$(CONFIG_GPIB_INES) += ines_gpib.o diff --git a/drivers/staging/gpib/tnt4882/Makefile b/drivers/staging/gpib/tnt4882/Makefile index a3c3fb96d5ed..fa1687ad0d1b 100644 --- a/drivers/staging/gpib/tnt4882/Makefile +++ b/drivers/staging/gpib/tnt4882/Makefile @@ -1,4 +1,3 @@ -ccflags-$(CONFIG_GPIB_PCMCIA) := -DGPIB_PCMCIA obj-$(CONFIG_GPIB_NI_PCI_ISA) += tnt4882.o tnt4882-objs := tnt4882_gpib.o mite.o From 99ed5f695fac3d9a4991a8a8b939db58540b406e Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 14 Feb 2025 16:54:40 -0300 Subject: [PATCH 0312/2157] staging: gpib: cb7210: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variable 'nec_priv' in function 'cb7210_line_status'. This removes the following warning found compiling with W=1: warning: variable ‘nec_priv’ set but not used [-Wunused-but-set-variable] Signed-off-by: Gaston Gonzalez Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250214195456.104075-3-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/cb7210.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index cb0d8a721687..eff1872aa9ed 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -383,10 +383,8 @@ static int cb7210_line_status(const gpib_board_t *board) int status = ValidALL; int bsr_bits; struct cb7210_priv *cb_priv; - struct nec7210_priv *nec_priv; cb_priv = board->private_data; - nec_priv = &cb_priv->nec7210_priv; bsr_bits = cb7210_paged_read_byte(cb_priv, BUS_STATUS, BUS_STATUS_PAGE); From c725363401e228986848c67579153dc259eccb4b Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 14 Feb 2025 16:54:42 -0300 Subject: [PATCH 0313/2157] staging: gpib: eastwood: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variable 'nec_priv' in function 'fluke_line_status'. This removes the following warning found compiling with W=1: warning: variable ‘nec_priv’ set but not used [-Wunused-but-set-variable] Signed-off-by: Gaston Gonzalez Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250214195456.104075-5-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/eastwood/fluke_gpib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index 012ce9cb8999..731732bd8301 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -198,10 +198,8 @@ static int fluke_line_status(const gpib_board_t *board) int status = ValidALL; int bsr_bits; struct fluke_priv *e_priv; - struct nec7210_priv *nec_priv; e_priv = board->private_data; - nec_priv = &e_priv->nec7210_priv; bsr_bits = fluke_paged_read_byte(e_priv, BUS_STATUS, BUS_STATUS_PAGE); From a990ae96e6ef3e431a99b686fc174cd429d27288 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 14 Feb 2025 16:54:44 -0300 Subject: [PATCH 0314/2157] staging: gpib: ni_usb: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variable 'adr1_bits' in function 'parse_board_ibrd_readback' Since the value of the variable 'i' is increased when 'adr1_bits' is set, the 'i++' post-increment was added in order to keep the value of 'i' right. This change removes the following warning: warning: variable ‘adr1_bits’ set but not used [-Wunused-but-set-variable] Signed-off-by: Gaston Gonzalez Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250214195456.104075-7-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 52c7530f07bb..61b15b19e134 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -426,7 +426,6 @@ static int parse_board_ibrd_readback(const u8 *raw_data, struct ni_usb_status_bl int i = 0; int j = 0; int k; - unsigned int adr1_bits; int num_data_blocks = 0; struct ni_usb_status_block register_write_status; int unexpected = 0; @@ -459,7 +458,7 @@ static int parse_board_ibrd_readback(const u8 *raw_data, struct ni_usb_status_bl pr_err("bug: status->id=%i, != ibrd_status_id\n", status->id); return -EIO; } - adr1_bits = raw_data[i++]; + i++; if (num_data_blocks) { *actual_bytes_read = (num_data_blocks - 1) * data_block_length + raw_data[i++]; } else { From 1b268f7a47a46b46d09639902bd63c21d2a1fbb2 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 14 Feb 2025 16:54:46 -0300 Subject: [PATCH 0315/2157] staging: gpib: tnt4882: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variable 'retval' in function 'tnt4882_update_status'. A call to nec7210_update_status_nolock() is added as the status bits are written to board->status in that function. This change removes the following warning: warning: variable ‘retval’ set but not used [-Wunused-but-set-variable] Signed-off-by: Gaston Gonzalez Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250214195456.104075-9-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index 04e46554185c..e1840f16a326 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -704,12 +704,11 @@ static unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clea { unsigned long flags; u8 line_status; - unsigned int retval; struct tnt4882_priv *priv = board->private_data; spin_lock_irqsave(&board->spinlock, flags); board->status &= ~clear_mask; - retval = nec7210_update_status_nolock(board, &priv->nec7210_priv); + nec7210_update_status_nolock(board, &priv->nec7210_priv); /* set / clear SRQ state since it is not cleared by interrupt */ line_status = tnt_readb(priv, BSR); if (line_status & BCSR_SRQ_BIT) From bedc7002f7978bb516aa36da41a22ab92166917f Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 14 Feb 2025 16:54:48 -0300 Subject: [PATCH 0316/2157] staging: gpib: ines: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variable 'nec_priv' in function 'ines_line_status'. This change removes the following warning: warning: variable ‘nec_priv’ set but not used [-Wunused-but-set-variable] Signed-off-by: Gaston Gonzalez Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250214195456.104075-11-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ines/ines_gpib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index 73dafbe68acb..b9abb2dfa4f3 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -26,10 +26,8 @@ int ines_line_status(const gpib_board_t *board) int status = ValidALL; int bcm_bits; struct ines_priv *ines_priv; - struct nec7210_priv *nec_priv; ines_priv = board->private_data; - nec_priv = &ines_priv->nec7210_priv; bcm_bits = ines_inb(ines_priv, BUS_CONTROL_MONITOR); From 1cddb72bf892812407cc30f0d8eb47dac73d6de4 Mon Sep 17 00:00:00 2001 From: Michael Anckaert Date: Wed, 12 Feb 2025 10:12:40 +0000 Subject: [PATCH 0317/2157] staging: sm750fb: fix checkpatch warning architecture specific defines should be avoided Replace architecture-specific defines with CONFIG_X86 checks to improve portability and adhere to kernel coding standards. Fixes checkpatch warning: - CHECK: architecture specific defines should be avoided. Changes made: - Using CONFIG_X86 instead of i386 and x86. Reviewed-by: Thomas Zimmermann Signed-off-by: Michael Anckaert Link: https://lore.kernel.org/r/Z6x0GEM5sxcruYlS@michael-devbox Signed-off-by: Greg Kroah-Hartman --- drivers/staging/sm750fb/ddk750_chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/sm750fb/ddk750_chip.c b/drivers/staging/sm750fb/ddk750_chip.c index 02860d3ec365..025dae3756aa 100644 --- a/drivers/staging/sm750fb/ddk750_chip.c +++ b/drivers/staging/sm750fb/ddk750_chip.c @@ -228,8 +228,8 @@ int ddk750_init_hw(struct initchip_param *p_init_param) reg = peek32(VGA_CONFIGURATION); reg |= (VGA_CONFIGURATION_PLL | VGA_CONFIGURATION_MODE); poke32(VGA_CONFIGURATION, reg); +#ifdef CONFIG_X86 } else { -#if defined(__i386__) || defined(__x86_64__) /* set graphic mode via IO method */ outb_p(0x88, 0x3d4); outb_p(0x06, 0x3d5); From 6ef5b6fae304091593956be59065c0c8633ad9e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Feb 2025 17:39:38 +0100 Subject: [PATCH 0318/2157] kernfs: Drop kernfs_rwsem while invoking lookup_positive_unlocked(). syzbot reported two warnings: - kernfs_node::name was accessed outside of a RCU section so it created warning. The kernfs_rwsem was held so it was okay but it wasn't seen. - While kernfs_rwsem was held invoked lookup_positive_unlocked()-> kernfs_dop_revalidate() which acquired kernfs_rwsem. kernfs_rwsem was both acquired as a read lock so it can be acquired twice. However if a writer acquires the lock after the first reader then neither the writer nor the second reader can obtain the lock so it deadlocks. The reason for the lock is to ensure that kernfs_node::name remain stable during lookup_positive_unlocked()'s invocation. The function can not be invoked within a RCU section because it may sleep. Make a temporary copy of the kernfs_node::name under the lock so GFP_KERNEL can be used and use this instead. Reported-by: syzbot+ecccecbc636b455f9084@syzkaller.appspotmail.com Fixes: 5b2fabf7fe8f ("kernfs: Acquire kernfs_rwsem in kernfs_node_dentry().") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250218163938.xmvjlJ0K@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d1f512b7bf86..f1cea282aae3 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -220,12 +220,19 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, return dentry; root = kernfs_root(kn); - guard(rwsem_read)(&root->kernfs_rwsem); - - knparent = find_next_ancestor(kn, NULL); - if (WARN_ON(!knparent)) { - dput(dentry); + /* + * As long as kn is valid, its parent can not vanish. This is cgroup's + * kn so it not have its parent replaced. Therefore it is safe to use + * the ancestor node outside of the RCU or locked section. + */ + if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT))) return ERR_PTR(-EINVAL); + scoped_guard(rcu) { + knparent = find_next_ancestor(kn, NULL); + if (WARN_ON(!knparent)) { + dput(dentry); + return ERR_PTR(-EINVAL); + } } do { @@ -235,14 +242,22 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, if (kn == knparent) return dentry; - kntmp = find_next_ancestor(kn, knparent); - if (WARN_ON(!kntmp)) { - dput(dentry); - return ERR_PTR(-EINVAL); + + scoped_guard(rwsem_read, &root->kernfs_rwsem) { + kntmp = find_next_ancestor(kn, knparent); + if (WARN_ON(!kntmp)) { + dput(dentry); + return ERR_PTR(-EINVAL); + } + name = kstrdup(kernfs_rcu_name(kntmp), GFP_KERNEL); + } + if (!name) { + dput(dentry); + return ERR_PTR(-ENOMEM); } - name = rcu_dereference(kntmp->name); dtmp = lookup_positive_unlocked(name, dentry, strlen(name)); dput(dentry); + kfree(name); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; From ed872cea144c805e5ab43207830243db7703c0ac Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Wed, 19 Feb 2025 09:24:38 -0800 Subject: [PATCH 0319/2157] dt-bindings: input: touchscreen: Add Z2 controller Add bindings for touchscreen controllers attached using the Z2 protocol. Those are present in most Apple devices. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Neal Gompa Signed-off-by: Sasha Finkelstein Link: https://lore.kernel.org/r/20250217-z2-v6-1-c2115d6e5a8f@gmail.com Signed-off-by: Dmitry Torokhov --- .../touchscreen/apple,z2-multitouch.yaml | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml diff --git a/Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml b/Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml new file mode 100644 index 000000000000..402ca6bffd34 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/apple,z2-multitouch.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Apple touchscreens attached using the Z2 protocol + +maintainers: + - Sasha Finkelstein + +description: A series of touschscreen controllers used in Apple products + +allOf: + - $ref: touchscreen.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +properties: + compatible: + enum: + - apple,j293-touchbar + - apple,j493-touchbar + + interrupts: + maxItems: 1 + + reset-gpios: + maxItems: 1 + + firmware-name: + maxItems: 1 + + apple,z2-cal-blob: + $ref: /schemas/types.yaml#/definitions/uint8-array + maxItems: 4096 + description: + Calibration blob supplied by the bootloader + +required: + - compatible + - interrupts + - reset-gpios + - firmware-name + - touchscreen-size-x + - touchscreen-size-y + +unevaluatedProperties: false + +examples: + - | + #include + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@0 { + compatible = "apple,j293-touchbar"; + reg = <0>; + spi-max-frequency = <11500000>; + reset-gpios = <&pinctrl_ap 139 GPIO_ACTIVE_LOW>; + interrupts-extended = <&pinctrl_ap 194 IRQ_TYPE_EDGE_FALLING>; + firmware-name = "apple/dfrmtfw-j293.bin"; + touchscreen-size-x = <23045>; + touchscreen-size-y = <640>; + }; + }; + +... From 471a92f8a21a0e0219e254df7b07133b4f121f33 Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Wed, 19 Feb 2025 09:24:56 -0800 Subject: [PATCH 0320/2157] Input: apple_z2 - add a driver for Apple Z2 touchscreens Adds a driver for Apple touchscreens using the Z2 protocol. Signed-off-by: Janne Grunau Reviewed-by: Neal Gompa Signed-off-by: Sasha Finkelstein Link: https://lore.kernel.org/r/20250217-z2-v6-2-c2115d6e5a8f@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 13 + drivers/input/touchscreen/Makefile | 1 + drivers/input/touchscreen/apple_z2.c | 477 +++++++++++++++++++++++++++ 3 files changed, 491 insertions(+) create mode 100644 drivers/input/touchscreen/apple_z2.c diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 1a03de7fcfa6..6c885cc58f32 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -103,6 +103,19 @@ config TOUCHSCREEN_ADC To compile this driver as a module, choose M here: the module will be called resistive-adc-touch.ko. +config TOUCHSCREEN_APPLE_Z2 + tristate "Apple Z2 touchscreens" + default ARCH_APPLE + depends on SPI + help + Say Y here if you have an Apple device with + a touchscreen or a touchbar. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called apple_z2. + config TOUCHSCREEN_AR1021_I2C tristate "Microchip AR1020/1021 i2c touchscreen" depends on I2C && OF diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 82bc837ca01e..97a025c6a377 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879_I2C) += ad7879-i2c.o obj-$(CONFIG_TOUCHSCREEN_AD7879_SPI) += ad7879-spi.o obj-$(CONFIG_TOUCHSCREEN_ADC) += resistive-adc-touch.o obj-$(CONFIG_TOUCHSCREEN_ADS7846) += ads7846.o +obj-$(CONFIG_TOUCHSCREEN_APPLE_Z2) += apple_z2.o obj-$(CONFIG_TOUCHSCREEN_AR1021_I2C) += ar1021_i2c.o obj-$(CONFIG_TOUCHSCREEN_ATMEL_MXT) += atmel_mxt_ts.o obj-$(CONFIG_TOUCHSCREEN_AUO_PIXCIR) += auo-pixcir-ts.o diff --git a/drivers/input/touchscreen/apple_z2.c b/drivers/input/touchscreen/apple_z2.c new file mode 100644 index 000000000000..0de161eae59a --- /dev/null +++ b/drivers/input/touchscreen/apple_z2.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple Z2 touchscreen driver + * + * Copyright (C) The Asahi Linux Contributors + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APPLE_Z2_NUM_FINGERS_OFFSET 16 +#define APPLE_Z2_FINGERS_OFFSET 24 +#define APPLE_Z2_TOUCH_STARTED 3 +#define APPLE_Z2_TOUCH_MOVED 4 +#define APPLE_Z2_CMD_READ_INTERRUPT_DATA 0xEB +#define APPLE_Z2_HBPP_CMD_BLOB 0x3001 +#define APPLE_Z2_FW_MAGIC 0x5746325A +#define LOAD_COMMAND_INIT_PAYLOAD 0 +#define LOAD_COMMAND_SEND_BLOB 1 +#define LOAD_COMMAND_SEND_CALIBRATION 2 +#define CAL_PROP_NAME "apple,z2-cal-blob" + +struct apple_z2 { + struct spi_device *spidev; + struct gpio_desc *reset_gpio; + struct input_dev *input_dev; + struct completion boot_irq; + bool booted; + int index_parity; + struct touchscreen_properties props; + const char *fw_name; + u8 *tx_buf; + u8 *rx_buf; +}; + +struct apple_z2_finger { + u8 finger; + u8 state; + __le16 unknown2; + __le16 abs_x; + __le16 abs_y; + __le16 rel_x; + __le16 rel_y; + __le16 tool_major; + __le16 tool_minor; + __le16 orientation; + __le16 touch_major; + __le16 touch_minor; + __le16 unused[2]; + __le16 pressure; + __le16 multi; +} __packed; + +struct apple_z2_hbpp_blob_hdr { + __le16 cmd; + __le16 len; + __le32 addr; + __le16 checksum; +}; + +struct apple_z2_fw_hdr { + __le32 magic; + __le32 version; +}; + +struct apple_z2_read_interrupt_cmd { + u8 cmd; + u8 counter; + u8 unused[12]; + __le16 checksum; +}; + +static void apple_z2_parse_touches(struct apple_z2 *z2, + const u8 *msg, size_t msg_len) +{ + int i; + int nfingers; + int slot; + int slot_valid; + struct apple_z2_finger *fingers; + + if (msg_len <= APPLE_Z2_NUM_FINGERS_OFFSET) + return; + nfingers = msg[APPLE_Z2_NUM_FINGERS_OFFSET]; + fingers = (struct apple_z2_finger *)(msg + APPLE_Z2_FINGERS_OFFSET); + for (i = 0; i < nfingers; i++) { + slot = input_mt_get_slot_by_key(z2->input_dev, fingers[i].finger); + if (slot < 0) { + dev_warn(&z2->spidev->dev, "unable to get slot for finger\n"); + continue; + } + slot_valid = fingers[i].state == APPLE_Z2_TOUCH_STARTED || + fingers[i].state == APPLE_Z2_TOUCH_MOVED; + input_mt_slot(z2->input_dev, slot); + if (!input_mt_report_slot_state(z2->input_dev, MT_TOOL_FINGER, slot_valid)) + continue; + touchscreen_report_pos(z2->input_dev, &z2->props, + le16_to_cpu(fingers[i].abs_x), + le16_to_cpu(fingers[i].abs_y), + true); + input_report_abs(z2->input_dev, ABS_MT_WIDTH_MAJOR, + le16_to_cpu(fingers[i].tool_major)); + input_report_abs(z2->input_dev, ABS_MT_WIDTH_MINOR, + le16_to_cpu(fingers[i].tool_minor)); + input_report_abs(z2->input_dev, ABS_MT_ORIENTATION, + le16_to_cpu(fingers[i].orientation)); + input_report_abs(z2->input_dev, ABS_MT_TOUCH_MAJOR, + le16_to_cpu(fingers[i].touch_major)); + input_report_abs(z2->input_dev, ABS_MT_TOUCH_MINOR, + le16_to_cpu(fingers[i].touch_minor)); + } + input_mt_sync_frame(z2->input_dev); + input_sync(z2->input_dev); +} + +static int apple_z2_read_packet(struct apple_z2 *z2) +{ + struct apple_z2_read_interrupt_cmd *len_cmd = (void *)z2->tx_buf; + struct spi_transfer xfer; + int error; + size_t pkt_len; + + memset(&xfer, 0, sizeof(xfer)); + len_cmd->cmd = APPLE_Z2_CMD_READ_INTERRUPT_DATA; + len_cmd->counter = z2->index_parity + 1; + len_cmd->checksum = + cpu_to_le16(APPLE_Z2_CMD_READ_INTERRUPT_DATA + len_cmd->counter); + z2->index_parity = !z2->index_parity; + xfer.tx_buf = z2->tx_buf; + xfer.rx_buf = z2->rx_buf; + xfer.len = sizeof(*len_cmd); + + error = spi_sync_transfer(z2->spidev, &xfer, 1); + if (error) + return error; + + pkt_len = (get_unaligned_le16(z2->rx_buf + 1) + 8) & 0xfffffffc; + + error = spi_read(z2->spidev, z2->rx_buf, pkt_len); + if (error) + return error; + + apple_z2_parse_touches(z2, z2->rx_buf + 5, pkt_len - 5); + + return 0; +} + +static irqreturn_t apple_z2_irq(int irq, void *data) +{ + struct apple_z2 *z2 = data; + + if (unlikely(!z2->booted)) + complete(&z2->boot_irq); + else + apple_z2_read_packet(z2); + + return IRQ_HANDLED; +} + +/* Build calibration blob, caller is responsible for freeing the blob data. */ +static const u8 *apple_z2_build_cal_blob(struct apple_z2 *z2, + u32 address, size_t *size) +{ + u8 *cal_data; + int cal_size; + size_t blob_size; + u32 checksum; + u16 checksum_hdr; + int i; + struct apple_z2_hbpp_blob_hdr *hdr; + int error; + + if (!device_property_present(&z2->spidev->dev, CAL_PROP_NAME)) + return NULL; + + cal_size = device_property_count_u8(&z2->spidev->dev, CAL_PROP_NAME); + if (cal_size < 0) + return ERR_PTR(cal_size); + + blob_size = sizeof(struct apple_z2_hbpp_blob_hdr) + cal_size + sizeof(__le32); + u8 *blob_data __free(kfree) = kzalloc(blob_size, GFP_KERNEL); + if (!blob_data) + return ERR_PTR(-ENOMEM); + + hdr = (struct apple_z2_hbpp_blob_hdr *)blob_data; + hdr->cmd = cpu_to_le16(APPLE_Z2_HBPP_CMD_BLOB); + hdr->len = cpu_to_le16(round_up(cal_size, 4) / 4); + hdr->addr = cpu_to_le32(address); + + checksum_hdr = 0; + for (i = 2; i < 8; i++) + checksum_hdr += blob_data[i]; + hdr->checksum = cpu_to_le16(checksum_hdr); + + cal_data = blob_data + sizeof(struct apple_z2_hbpp_blob_hdr); + error = device_property_read_u8_array(&z2->spidev->dev, CAL_PROP_NAME, + cal_data, cal_size); + if (error) + return ERR_PTR(error); + + checksum = 0; + for (i = 0; i < cal_size; i++) + checksum += cal_data[i]; + put_unaligned_le32(checksum, cal_data + cal_size); + + *size = blob_size; + return no_free_ptr(blob_data); +} + +static int apple_z2_send_firmware_blob(struct apple_z2 *z2, const u8 *data, + u32 size, bool init) +{ + struct spi_message msg; + struct spi_transfer blob_xfer, ack_xfer; + int error; + + z2->tx_buf[0] = 0x1a; + z2->tx_buf[1] = 0xa1; + + spi_message_init(&msg); + memset(&blob_xfer, 0, sizeof(blob_xfer)); + memset(&ack_xfer, 0, sizeof(ack_xfer)); + + blob_xfer.tx_buf = data; + blob_xfer.len = size; + blob_xfer.bits_per_word = init ? 8 : 16; + spi_message_add_tail(&blob_xfer, &msg); + + ack_xfer.tx_buf = z2->tx_buf; + ack_xfer.len = 2; + spi_message_add_tail(&ack_xfer, &msg); + + reinit_completion(&z2->boot_irq); + error = spi_sync(z2->spidev, &msg); + if (error) + return error; + + /* Irq only happens sometimes, but the thing boots reliably nonetheless */ + wait_for_completion_timeout(&z2->boot_irq, msecs_to_jiffies(20)); + + return 0; +} + +static int apple_z2_upload_firmware(struct apple_z2 *z2) +{ + const struct apple_z2_fw_hdr *fw_hdr; + size_t fw_idx = sizeof(struct apple_z2_fw_hdr); + int error; + u32 load_cmd; + u32 address; + bool init; + size_t size; + + const struct firmware *fw __free(firmware) = NULL; + error = request_firmware(&fw, z2->fw_name, &z2->spidev->dev); + if (error) { + dev_err(&z2->spidev->dev, "unable to load firmware\n"); + return error; + } + + fw_hdr = (const struct apple_z2_fw_hdr *)fw->data; + if (le32_to_cpu(fw_hdr->magic) != APPLE_Z2_FW_MAGIC || le32_to_cpu(fw_hdr->version) != 1) { + dev_err(&z2->spidev->dev, "invalid firmware header\n"); + return -EINVAL; + } + + /* + * This will interrupt the upload half-way if the file is malformed + * As the device has no non-volatile storage to corrupt, and gets reset + * on boot anyway, this is fine. + */ + while (fw_idx < fw->size) { + if (fw->size - fw_idx < 8) { + dev_err(&z2->spidev->dev, "firmware malformed\n"); + return -EINVAL; + } + + load_cmd = le32_to_cpup((__force __le32 *)(fw->data + fw_idx)); + fw_idx += sizeof(u32); + if (load_cmd == LOAD_COMMAND_INIT_PAYLOAD || load_cmd == LOAD_COMMAND_SEND_BLOB) { + size = le32_to_cpup((__force __le32 *)(fw->data + fw_idx)); + fw_idx += sizeof(u32); + if (fw->size - fw_idx < size) { + dev_err(&z2->spidev->dev, "firmware malformed\n"); + return -EINVAL; + } + init = load_cmd == LOAD_COMMAND_INIT_PAYLOAD; + error = apple_z2_send_firmware_blob(z2, fw->data + fw_idx, + size, init); + if (error) + return error; + fw_idx += size; + } else if (load_cmd == LOAD_COMMAND_SEND_CALIBRATION) { + address = le32_to_cpup((__force __le32 *)(fw->data + fw_idx)); + fw_idx += sizeof(u32); + + const u8 *data __free(kfree) = + apple_z2_build_cal_blob(z2, address, &size); + if (IS_ERR(data)) + return PTR_ERR(data); + + if (data) { + error = apple_z2_send_firmware_blob(z2, data, size, false); + if (error) + return error; + } + } else { + dev_err(&z2->spidev->dev, "firmware malformed\n"); + return -EINVAL; + } + fw_idx = round_up(fw_idx, 4); + } + + + z2->booted = true; + apple_z2_read_packet(z2); + return 0; +} + +static int apple_z2_boot(struct apple_z2 *z2) +{ + int error; + + reinit_completion(&z2->boot_irq); + enable_irq(z2->spidev->irq); + gpiod_set_value(z2->reset_gpio, 0); + if (!wait_for_completion_timeout(&z2->boot_irq, msecs_to_jiffies(20))) + return -ETIMEDOUT; + + error = apple_z2_upload_firmware(z2); + if (error) { + gpiod_set_value(z2->reset_gpio, 1); + disable_irq(z2->spidev->irq); + return error; + } + + return 0; +} + +static int apple_z2_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct apple_z2 *z2; + int error; + + z2 = devm_kzalloc(dev, sizeof(*z2), GFP_KERNEL); + if (!z2) + return -ENOMEM; + + z2->tx_buf = devm_kzalloc(dev, sizeof(struct apple_z2_read_interrupt_cmd), GFP_KERNEL); + if (!z2->tx_buf) + return -ENOMEM; + /* 4096 will end up being rounded up to 8192 due to devres header */ + z2->rx_buf = devm_kzalloc(dev, 4000, GFP_KERNEL); + if (!z2->rx_buf) + return -ENOMEM; + + z2->spidev = spi; + init_completion(&z2->boot_irq); + spi_set_drvdata(spi, z2); + + /* Reset the device on boot */ + z2->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(z2->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(z2->reset_gpio), "unable to get reset\n"); + + error = devm_request_threaded_irq(dev, z2->spidev->irq, NULL, apple_z2_irq, + IRQF_ONESHOT | IRQF_NO_AUTOEN, + "apple-z2-irq", z2); + if (error) + return dev_err_probe(dev, error, "unable to request irq\n"); + + error = device_property_read_string(dev, "firmware-name", &z2->fw_name); + if (error) + return dev_err_probe(dev, error, "unable to get firmware name\n"); + + z2->input_dev = devm_input_allocate_device(dev); + if (!z2->input_dev) + return -ENOMEM; + + z2->input_dev->name = (char *)spi_get_device_id(spi)->driver_data; + z2->input_dev->phys = "apple_z2"; + z2->input_dev->id.bustype = BUS_SPI; + + /* Allocate the axes before setting from DT */ + input_set_abs_params(z2->input_dev, ABS_MT_POSITION_X, 0, 0, 0, 0); + input_set_abs_params(z2->input_dev, ABS_MT_POSITION_Y, 0, 0, 0, 0); + touchscreen_parse_properties(z2->input_dev, true, &z2->props); + input_abs_set_res(z2->input_dev, ABS_MT_POSITION_X, 100); + input_abs_set_res(z2->input_dev, ABS_MT_POSITION_Y, 100); + input_set_abs_params(z2->input_dev, ABS_MT_WIDTH_MAJOR, 0, 65535, 0, 0); + input_set_abs_params(z2->input_dev, ABS_MT_WIDTH_MINOR, 0, 65535, 0, 0); + input_set_abs_params(z2->input_dev, ABS_MT_TOUCH_MAJOR, 0, 65535, 0, 0); + input_set_abs_params(z2->input_dev, ABS_MT_TOUCH_MINOR, 0, 65535, 0, 0); + input_set_abs_params(z2->input_dev, ABS_MT_ORIENTATION, -32768, 32767, 0, 0); + + error = input_mt_init_slots(z2->input_dev, 256, INPUT_MT_DIRECT); + if (error) + return dev_err_probe(dev, error, "unable to initialize multitouch slots\n"); + + error = input_register_device(z2->input_dev); + if (error) + return dev_err_probe(dev, error, "unable to register input device\n"); + + /* Wait for device reset to finish */ + usleep_range(5000, 10000); + error = apple_z2_boot(z2); + if (error) + return error; + + return 0; +} + +static void apple_z2_shutdown(struct spi_device *spi) +{ + struct apple_z2 *z2 = spi_get_drvdata(spi); + + disable_irq(z2->spidev->irq); + gpiod_direction_output(z2->reset_gpio, 1); + z2->booted = false; +} + +static int apple_z2_suspend(struct device *dev) +{ + apple_z2_shutdown(to_spi_device(dev)); + + return 0; +} + +static int apple_z2_resume(struct device *dev) +{ + struct apple_z2 *z2 = spi_get_drvdata(to_spi_device(dev)); + + return apple_z2_boot(z2); +} + +static DEFINE_SIMPLE_DEV_PM_OPS(apple_z2_pm, apple_z2_suspend, apple_z2_resume); + +static const struct of_device_id apple_z2_of_match[] = { + { .compatible = "apple,j293-touchbar" }, + { .compatible = "apple,j493-touchbar" }, + {} +}; +MODULE_DEVICE_TABLE(of, apple_z2_of_match); + +static struct spi_device_id apple_z2_of_id[] = { + { .name = "j293-touchbar", .driver_data = (kernel_ulong_t)"MacBookPro17,1 Touch Bar" }, + { .name = "j493-touchbar", .driver_data = (kernel_ulong_t)"Mac14,7 Touch Bar" }, + {} +}; +MODULE_DEVICE_TABLE(spi, apple_z2_of_id); + +static struct spi_driver apple_z2_driver = { + .driver = { + .name = "apple-z2", + .pm = pm_sleep_ptr(&apple_z2_pm), + .of_match_table = apple_z2_of_match, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, + .id_table = apple_z2_of_id, + .probe = apple_z2_probe, + .remove = apple_z2_shutdown, +}; + +module_spi_driver(apple_z2_driver); + +MODULE_LICENSE("GPL"); +MODULE_FIRMWARE("apple/dfrmtfw-*.bin"); +MODULE_DESCRIPTION("Apple Z2 touchscreens driver"); From 9995b98a4b2a704fa6744d2bee9c5d50b2c33836 Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Wed, 19 Feb 2025 09:25:32 -0800 Subject: [PATCH 0321/2157] MAINTAINERS: Add entries for Apple Z2 touchscreen driver Add the MAINTAINERS entries for the driver Reviewed-by: Neal Gompa Acked-by: Sven Peter Signed-off-by: Sasha Finkelstein Link: https://lore.kernel.org/r/20250217-z2-v6-4-c2115d6e5a8f@gmail.com Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index baf0eeb9a355..78e6507d0f7a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2193,6 +2193,7 @@ F: Documentation/devicetree/bindings/clock/apple,nco.yaml F: Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml F: Documentation/devicetree/bindings/dma/apple,admac.yaml F: Documentation/devicetree/bindings/i2c/apple,i2c.yaml +F: Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml F: Documentation/devicetree/bindings/interrupt-controller/apple,* F: Documentation/devicetree/bindings/iommu/apple,dart.yaml F: Documentation/devicetree/bindings/iommu/apple,sart.yaml @@ -2213,6 +2214,7 @@ F: drivers/dma/apple-admac.c F: drivers/pmdomain/apple/ F: drivers/i2c/busses/i2c-pasemi-core.c F: drivers/i2c/busses/i2c-pasemi-platform.c +F: drivers/input/touchscreen/apple_z2.c F: drivers/iommu/apple-dart.c F: drivers/iommu/io-pgtable-dart.c F: drivers/irqchip/irq-apple-aic.c From ed625c61b85c2333324dca43146c4ebabf8b236f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 8 Feb 2025 13:53:19 +0200 Subject: [PATCH 0322/2157] tools/power turbostat: Add idle governor statistics reporting The idle governor provides the following per-idle state sysfs files: * above - Indicates overshoots, where a more shallow state should have been requested (if avaliale and enabled). * below - Indicates undershoots, where a deeper state should have been requested (if available and enabled). These files offer valuable insights into how effectively the Linux kernel idle governor selects idle states for a given workload. This commit adds support for these files in turbostat. Expose the contents of these files with the following naming convention: * C1: The number of times the C1 state was requested (existing counter). * C1+: The number of times the idle governor selected C1, but a deeper idle state should have been selected instead. * C1-: The number of times the idle governor selected C1, but a shallower idle state should have been selected instead. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 4 +++ tools/power/x86/turbostat/turbostat.c | 50 ++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ed258f248152..52d727e29ea7 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -160,6 +160,10 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. .PP +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +.PP +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +.PP \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. .PP \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d3af2bf307e1..f29e47fe4249 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10265,6 +10265,7 @@ void probe_sysfs(void) char name_buf[16]; FILE *input; int state; + int min_state = 1024, max_state = 0; char *sp; for (state = 10; state >= 0; --state) { @@ -10296,6 +10297,11 @@ void probe_sysfs(void) continue; add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + + if (state > max_state) + max_state = state; + if (state < min_state) + min_state = state; } for (state = 10; state >= 0; --state) { @@ -10306,26 +10312,52 @@ void probe_sysfs(void) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ - sp = strchr(name_buf, '-'); - if (!sp) - sp = strchrnul(name_buf, '\n'); - *sp = '\0'; fclose(input); remove_underbar(name_buf); - sprintf(path, "cpuidle/state%d/usage", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); - } + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + /* + * The 'below' sysfs file always contains 0 for the deepest state (largest index), + * do not add it. + */ + if (state != max_state) { + /* + * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for + * the last state, so do not add it. + */ + + *sp = '+'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/below", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + + *sp = '\0'; + sprintf(path, "cpuidle/state%d/usage", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + + /* + * The 'above' sysfs file always contains 0 for the shallowest state (smallest + * index), do not add it. + */ + if (state != min_state) { + *sp = '-'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/above", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + } } /* From 82e3508046f9bce75e14b6cab5805e52f27f5405 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Thu, 20 Feb 2025 10:09:18 +0100 Subject: [PATCH 0323/2157] staging: gpib: cb7210 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "cb7210" everywhere. Remove "cb7210:" string prefix in messages since module name printing is enabled. Change pr_err to dev_err where possible. Remove interrupt warnings. Return consistent error codes with error messages: -EBUSY when resources are busy -ENODEV when device is not present -EIO for others. Return -ENOMEM for failed kmalloc (no message in driver) Remove PCMCIA debug comments, PCMCIA_DEBUG conditional compilation symbol, the DEBUG macro definition and its uses. Change pr_warn to dev_warn and pr_err to dev_err where possible. Remove commented printk. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250220090920.32497-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/cb7210.c | 133 +++++++++++---------------- 1 file changed, 52 insertions(+), 81 deletions(-) diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index eff1872aa9ed..19dfd8b4a8e7 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -5,6 +5,10 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "cb7210.h" #include #include @@ -83,12 +87,12 @@ static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t * *bytes_read = 0; if (cb_priv->fifo_iobase == 0) { - pr_err("cb7210: fifo iobase is zero!\n"); + dev_err(board->gpib_dev, "fifo iobase is zero!\n"); return -EIO; } *end = 0; if (length <= cb7210_fifo_size) { - pr_err("cb7210: bug! %s with length < fifo size\n", __func__); + dev_err(board->gpib_dev, " bug! fifo read length < fifo size\n"); return -EINVAL; } @@ -103,7 +107,6 @@ static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t * test_bit(RECEIVED_END_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_warn("cb7210: fifo half full wait interrupted\n"); retval = -ERESTARTSYS; nec7210_set_reg_bits(nec_priv, IMR2, HR_DMAI, 0); break; @@ -153,7 +156,6 @@ static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t * test_bit(RECEIVED_END_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_warn("cb7210: fifo half full wait interrupted\n"); retval = -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) @@ -188,7 +190,6 @@ static int cb7210_accel_read(gpib_board_t *board, uint8_t *buffer, test_bit(READ_READY_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_warn("cb7210: read ready wait interrupted\n"); return -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) @@ -274,7 +275,7 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ *bytes_written = 0; if (cb_priv->fifo_iobase == 0) { - pr_err("cb7210: fifo iobase is zero!\n"); + dev_err(board->gpib_dev, "fifo iobase is zero!\n"); return -EINVAL; } if (length == 0) @@ -293,7 +294,6 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_warn("cb7210: fifo wait interrupted\n"); retval = -ERESTARTSYS; break; } @@ -309,7 +309,7 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ if (num_bytes + count > length) num_bytes = length - count; if (num_bytes % cb7210_fifo_width) { - pr_err("cb7210: bug! %s with odd number of bytes\n", __func__); + dev_err(board->gpib_dev, " bug! fifo write with odd number of bytes\n"); retval = -EINVAL; break; } @@ -334,7 +334,6 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_err("cb7210: wait for last byte interrupted\n"); retval = -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) @@ -480,7 +479,7 @@ static irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) status2 = read_byte(nec_priv, ISR2); nec7210_interrupt_have_status(board, nec_priv, status1, status2); - dev_dbg(board->gpib_dev, "cb7210: status 0x%x, mode 0x%x\n", hs_status, priv->hs_mode_bits); + dev_dbg(board->gpib_dev, "status 0x%x, mode 0x%x\n", hs_status, priv->hs_mode_bits); clear_bits = 0; @@ -858,7 +857,7 @@ static int cb7210_allocate_private(gpib_board_t *board) board->private_data = kmalloc(sizeof(struct cb7210_priv), GFP_KERNEL); if (!board->private_data) - return -1; + return -ENOMEM; priv = board->private_data; memset(priv, 0, sizeof(struct cb7210_priv)); init_nec7210_private(&priv->nec7210_priv); @@ -920,7 +919,7 @@ static int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) /* poll so we can detect assertion of ATN */ if (gpib_request_pseudo_irq(board, cb_pci_interrupt)) { - pr_err("pc2_gpib: failed to allocate pseudo_irq\n"); + pr_err("failed to allocate pseudo_irq\n"); return -1; } return 0; @@ -960,17 +959,17 @@ static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) } } if (!cb_priv->pci_device) { - pr_warn("cb7210: no supported boards found.\n"); - return -1; + dev_err(board->gpib_dev, "no supported boards found.\n"); + return -ENODEV; } if (pci_enable_device(cb_priv->pci_device)) { - pr_err("cb7210: error enabling pci device\n"); - return -1; + dev_err(board->gpib_dev, "error enabling pci device\n"); + return -EIO; } - if (pci_request_regions(cb_priv->pci_device, "cb7210")) - return -1; + if (pci_request_regions(cb_priv->pci_device, DRV_NAME)) + return -EBUSY; switch (cb_priv->pci_chip) { case PCI_CHIP_AMCC_S5933: cb_priv->amcc_iobase = pci_resource_start(cb_priv->pci_device, 0); @@ -982,13 +981,14 @@ static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) cb_priv->fifo_iobase = nec_priv->iobase; break; default: - pr_err("cb7210: bug! unhandled pci_chip=%i\n", cb_priv->pci_chip); + dev_err(board->gpib_dev, "bug! unhandled pci_chip=%i\n", cb_priv->pci_chip); return -EIO; } isr_flags |= IRQF_SHARED; - if (request_irq(cb_priv->pci_device->irq, cb_pci_interrupt, isr_flags, "cb7210", board)) { - pr_err("cb7210: can't request IRQ %d\n", cb_priv->pci_device->irq); - return -1; + if (request_irq(cb_priv->pci_device->irq, cb_pci_interrupt, isr_flags, DRV_NAME, board)) { + dev_err(board->gpib_dev, "can't request IRQ %d\n", + cb_priv->pci_device->irq); + return -EBUSY; } cb_priv->irq = cb_priv->pci_device->irq; @@ -1043,20 +1043,22 @@ static int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) return retval; cb_priv = board->private_data; nec_priv = &cb_priv->nec7210_priv; - if (!request_region(config->ibbase, cb7210_iosize, "cb7210")) { - pr_err("gpib: ioports starting at 0x%x are already in use\n", config->ibbase); - return -EIO; + if (!request_region(config->ibbase, cb7210_iosize, DRV_NAME)) { + dev_err(board->gpib_dev, "ioports starting at 0x%x are already in use\n", + config->ibbase); + return -EBUSY; } nec_priv->iobase = config->ibbase; cb_priv->fifo_iobase = nec7210_iobase(cb_priv); bits = irq_bits(config->ibirq); if (bits == 0) - pr_err("board incapable of using irq %i, try 2-5, 7, 10, or 11\n", config->ibirq); + dev_err(board->gpib_dev, "board incapable of using irq %i, try 2-5, 7, 10, or 11\n", + config->ibirq); // install interrupt handler - if (request_irq(config->ibirq, cb7210_interrupt, isr_flags, "cb7210", board)) { - pr_err("gpib: can't request IRQ %d\n", config->ibirq); + if (request_irq(config->ibirq, cb7210_interrupt, isr_flags, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to obtain IRQ %d\n", config->ibirq); return -EBUSY; } cb_priv->irq = config->ibirq; @@ -1096,7 +1098,7 @@ static const struct pci_device_id cb7210_pci_table[] = { MODULE_DEVICE_TABLE(pci, cb7210_pci_table); static struct pci_driver cb7210_pci_driver = { - .name = "cb7210", + .name = DRV_NAME, .id_table = cb7210_pci_table, .probe = &cb7210_pci_probe }; @@ -1119,23 +1121,6 @@ static struct pci_driver cb7210_pci_driver = { #include #include -/* - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - * you do not define PCMCIA_DEBUG at all, all the debug code will be - * left out. If you compile with PCMCIA_DEBUG=0, the debug code will - * be present but disabled -- but it can then be enabled for specific - * modules at load time with a 'pc_debug=#' option to insmod. - */ - -#define PCMCIA_DEBUG 1 - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -#define DEBUG(n, args...) do {if (pc_debug > (n)) pr_debug(args); } while (0) -#else -#define DEBUG(args...) -#endif - /* * The event() function is this driver's Card Services event handler. * It will be called by Card Services when an appropriate card status @@ -1200,8 +1185,6 @@ static int cb_gpib_probe(struct pcmcia_device *link) // int ret, i; - DEBUG(0, "%s(0x%p)\n", __func__, link); - /* Allocate space for private device-specific data */ info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -1241,8 +1224,6 @@ static void cb_gpib_remove(struct pcmcia_device *link) struct local_info *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); - if (info->dev) cb_pcmcia_detach(info->dev); cb_gpib_release(link); @@ -1270,7 +1251,6 @@ static int cb_gpib_config(struct pcmcia_device *link) handle = link; dev = link->priv; - DEBUG(0, "%s(0x%p)\n", __func__, link); retval = pcmcia_loop_config(link, &cb_gpib_config_iteration, NULL); if (retval) { @@ -1279,8 +1259,6 @@ static int cb_gpib_config(struct pcmcia_device *link) return -ENODEV; } - DEBUG(0, "gpib_cs: manufacturer: 0x%x card: 0x%x\n", link->manf_id, link->card_id); - /* * This actually configures the PCMCIA socket -- setting up * the I/O windows and the interrupt mapping. @@ -1292,7 +1270,6 @@ static int cb_gpib_config(struct pcmcia_device *link) return -ENODEV; } - pr_info("gpib device loaded\n"); return 0; } /* gpib_config */ @@ -1304,7 +1281,6 @@ static int cb_gpib_config(struct pcmcia_device *link) static void cb_gpib_release(struct pcmcia_device *link) { - DEBUG(0, "%s(0x%p)\n", __func__, link); pcmcia_disable_device(link); } @@ -1312,10 +1288,9 @@ static int cb_gpib_suspend(struct pcmcia_device *link) { //struct local_info *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); if (link->open) - pr_warn("Device still open ???\n"); + dev_warn(&link->dev, "Device still open\n"); //netif_device_detach(dev); return 0; @@ -1325,11 +1300,9 @@ static int cb_gpib_resume(struct pcmcia_device *link) { //struct local_info *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); /*if (link->open) { * ni_gpib_probe(dev); / really? - * printk("Gpib resumed ???\n"); * //netif_device_attach(dev); * */ @@ -1356,7 +1329,6 @@ static struct pcmcia_driver cb_gpib_cs_driver = { static void cb_pcmcia_cleanup_module(void) { - DEBUG(0, "cb_gpib_cs: unloading\n"); pcmcia_unregister_driver(&cb_gpib_cs_driver); } @@ -1451,8 +1423,8 @@ static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf int retval; if (!curr_dev) { - pr_err("no cb pcmcia cards found\n"); - return -1; + dev_err(board->gpib_dev, "no cb pcmcia cards found\n"); + return -ENODEV; } retval = cb7210_generic_attach(board); @@ -1463,18 +1435,17 @@ static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf nec_priv = &cb_priv->nec7210_priv; if (!request_region(curr_dev->resource[0]->start, resource_size(curr_dev->resource[0]), - "cb7210")) { - pr_err("gpib: ioports starting at 0x%lx are already in use\n", - (unsigned long)curr_dev->resource[0]->start); - return -EIO; + DRV_NAME)) { + dev_err(board->gpib_dev, "ioports starting at 0x%lx are already in use\n", + (unsigned long)curr_dev->resource[0]->start); + return -EBUSY; } nec_priv->iobase = curr_dev->resource[0]->start; cb_priv->fifo_iobase = curr_dev->resource[0]->start; - if (request_irq(curr_dev->irq, cb7210_interrupt, IRQF_SHARED, - "cb7210", board)) { - pr_err("cb7210: failed to request IRQ %d\n", curr_dev->irq); - return -1; + if (request_irq(curr_dev->irq, cb7210_interrupt, IRQF_SHARED, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to request IRQ %d\n", curr_dev->irq); + return -EBUSY; } cb_priv->irq = curr_dev->irq; @@ -1507,68 +1478,68 @@ static int __init cb7210_init_module(void) ret = pci_register_driver(&cb7210_pci_driver); if (ret) { - pr_err("cb7210: pci_register_driver failed: error = %d\n", ret); + pr_err("pci_register_driver failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&cb_pci_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci; } ret = gpib_register_driver(&cb_isa_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_isa; } ret = gpib_register_driver(&cb_pci_accel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci_accel; } ret = gpib_register_driver(&cb_pci_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci_unaccel; } ret = gpib_register_driver(&cb_isa_accel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_isa_accel; } ret = gpib_register_driver(&cb_isa_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_isa_unaccel; } #ifdef CONFIG_GPIB_PCMCIA ret = gpib_register_driver(&cb_pcmcia_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia; } ret = gpib_register_driver(&cb_pcmcia_accel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia_accel; } ret = gpib_register_driver(&cb_pcmcia_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("cb7210: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia_unaccel; } ret = pcmcia_register_driver(&cb_gpib_cs_driver); if (ret) { - pr_err("cb7210: pcmcia_register_driver failed: error = %d\n", ret); + pr_err("pcmcia_register_driver failed: error = %d\n", ret); goto err_pcmcia_driver; } #endif From 0de51244e7b7e3ea97f9da68318fbc9e7e16f6a5 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Thu, 20 Feb 2025 10:09:19 +0100 Subject: [PATCH 0324/2157] staging: gpib: ines console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "ines_gpib" in pci_request_regions, request_irq and request_region. Remove "ines:" and "ines_gpib:" string prefixes in messages since module name printing is enabled. Change pr_err to dev_err where possible. Remove interrupt warnings. Remove PCMCIA debug comments, PCMCIA_DEBUG conditional compilation symbol, the DEBUG macro definition and its uses. Change pr_debug to dev_dbg. Remove pr_info Remove commented printk. Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250220090920.32497-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ines/ines_gpib.c | 101 +++++++++++--------------- 1 file changed, 42 insertions(+), 59 deletions(-) diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index b9abb2dfa4f3..56da6cd91188 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -5,6 +5,10 @@ * (C) 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include "ines.h" #include @@ -54,7 +58,7 @@ int ines_line_status(const gpib_board_t *board) void ines_set_xfer_counter(struct ines_priv *priv, unsigned int count) { if (count > 0xffff) { - pr_err("ines: bug! tried to set xfer counter > 0xffff\n"); + pr_err("bug! tried to set xfer counter > 0xffff\n"); return; } ines_outb(priv, (count >> 8) & 0xff, XFER_COUNT_UPPER); @@ -104,21 +108,18 @@ static ssize_t pio_read(gpib_board_t *board, struct ines_priv *ines_priv, uint8_ num_in_fifo_bytes(ines_priv) || test_bit(RECEIVED_END_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { - pr_warn("gpib: pio read wait interrupted\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) return -EINTR; num_fifo_bytes = num_in_fifo_bytes(ines_priv); - if (num_fifo_bytes + *nbytes > length) { - pr_warn("ines: counter allowed %li extra byte(s)\n", - (long)(num_fifo_bytes - (length - *nbytes))); + if (num_fifo_bytes + *nbytes > length) num_fifo_bytes = length - *nbytes; - } + for (i = 0; i < num_fifo_bytes; i++) buffer[(*nbytes)++] = read_byte(nec_priv, DIR); if (test_bit(RECEIVED_END_BN, &nec_priv->state) && @@ -199,10 +200,9 @@ static int ines_write_wait(gpib_board_t *board, struct ines_priv *ines_priv, num_out_fifo_bytes(ines_priv) < fifo_threshold || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } + if (test_bit(BUS_ERROR_BN, &nec_priv->state)) return -EIO; if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) @@ -299,7 +299,7 @@ irqreturn_t ines_interrupt(gpib_board_t *board) wake++; } if (isr3_bits & FIFO_ERROR_BIT) - pr_err("ines gpib: fifo error\n"); + dev_err(board->gpib_dev, "fifo error\n"); if (isr3_bits & XFER_COUNT_BIT) wake++; @@ -767,16 +767,16 @@ static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t } while (1); } if (!ines_priv->pci_device) { - pr_err("gpib: could not find ines PCI board\n"); + dev_err(board->gpib_dev, "could not find ines PCI board\n"); return -1; } if (pci_enable_device(ines_priv->pci_device)) { - pr_err("error enabling pci device\n"); + dev_err(board->gpib_dev, "error enabling pci device\n"); return -1; } - if (pci_request_regions(ines_priv->pci_device, "ines-gpib")) + if (pci_request_regions(ines_priv->pci_device, DRV_NAME)) return -1; nec_priv->iobase = pci_resource_start(ines_priv->pci_device, found_id.gpib_region); @@ -795,7 +795,7 @@ static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t case PCI_CHIP_QUICKLOGIC5030: break; default: - pr_err("gpib: unspecified chip type? (bug)\n"); + dev_err(board->gpib_dev, "unspecified chip type? (bug)\n"); nec_priv->iobase = 0; pci_release_regions(ines_priv->pci_device); return -1; @@ -811,8 +811,8 @@ static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t #endif isr_flags |= IRQF_SHARED; if (request_irq(ines_priv->pci_device->irq, ines_pci_interrupt, isr_flags, - "pci-gpib", board)) { - pr_err("gpib: can't request IRQ %d\n", ines_priv->pci_device->irq); + DRV_NAME, board)) { + dev_err(board->gpib_dev, "can't request IRQ %d\n", ines_priv->pci_device->irq); return -1; } ines_priv->irq = ines_priv->pci_device->irq; @@ -844,7 +844,7 @@ static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t case PCI_CHIP_QUICKLOGIC5030: break; default: - pr_err("gpib: unspecified chip type? (bug)\n"); + dev_err(board->gpib_dev, "unspecified chip type? (bug)\n"); return -1; } @@ -897,15 +897,16 @@ int ines_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) ines_priv = board->private_data; nec_priv = &ines_priv->nec7210_priv; - if (!request_region(config->ibbase, ines_isa_iosize, "ines_gpib")) { - pr_err("ines_gpib: ioports at 0x%x already in use\n", config->ibbase); - return -1; + if (!request_region(config->ibbase, ines_isa_iosize, DRV_NAME)) { + dev_err(board->gpib_dev, "ioports at 0x%x already in use\n", + config->ibbase); + return -EBUSY; } nec_priv->iobase = config->ibbase; nec_priv->offset = 1; nec7210_board_reset(nec_priv, board); - if (request_irq(config->ibirq, ines_pci_interrupt, isr_flags, "ines_gpib", board)) { - pr_err("ines_gpib: failed to allocate IRQ %d\n", config->ibirq); + if (request_irq(config->ibirq, ines_pci_interrupt, isr_flags, DRV_NAME, board)) { + dev_err(board->gpib_dev, "failed to allocate IRQ %d\n", config->ibirq); return -1; } ines_priv->irq = config->ibirq; @@ -986,13 +987,6 @@ static struct pci_driver ines_pci_driver = { #include #include -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -#define DEBUG(n, args...) do {if (pc_debug > (n)) pr_debug(args)} while (0) -#else -#define DEBUG(args...) -#endif - static const int ines_pcmcia_iosize = 0x20; /* The event() function is this driver's Card Services event handler. @@ -1061,8 +1055,6 @@ static int ines_gpib_probe(struct pcmcia_device *link) // int ret, i; - DEBUG(0, "%s(0x%p)\n", __func__ link); - /* Allocate space for private device-specific data */ info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -1096,8 +1088,6 @@ static void ines_gpib_remove(struct pcmcia_device *link) struct local_info *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); - if (info->dev) ines_pcmcia_detach(info->dev); ines_gpib_release(link); @@ -1123,7 +1113,6 @@ static int ines_gpib_config(struct pcmcia_device *link) void __iomem *virt; dev = link->priv; - DEBUG(0, "%s(0x%p)\n", __func__, link); retval = pcmcia_loop_config(link, &ines_gpib_config_iteration, NULL); if (retval) { @@ -1132,8 +1121,8 @@ static int ines_gpib_config(struct pcmcia_device *link) return -ENODEV; } - pr_debug("ines_cs: manufacturer: 0x%x card: 0x%x\n", - link->manf_id, link->card_id); + dev_dbg(&link->dev, "ines_cs: manufacturer: 0x%x card: 0x%x\n", + link->manf_id, link->card_id); /* for the ines card we have to setup the configuration registers in * attribute memory here @@ -1165,7 +1154,6 @@ static int ines_gpib_config(struct pcmcia_device *link) ines_gpib_release(link); return -ENODEV; } - pr_info("ines gpib device loaded\n"); return 0; } /* gpib_config */ @@ -1177,7 +1165,6 @@ static int ines_gpib_config(struct pcmcia_device *link) static void ines_gpib_release(struct pcmcia_device *link) { - DEBUG(0, "%s(0x%p)\n", __func__, link); pcmcia_disable_device(link); } /* gpib_release */ @@ -1185,10 +1172,9 @@ static int ines_gpib_suspend(struct pcmcia_device *link) { //struct local_info *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); if (link->open) - pr_err("Device still open ???\n"); + dev_err(&link->dev, "Device still open\n"); //netif_device_detach(dev); return 0; @@ -1198,11 +1184,9 @@ static int ines_gpib_resume(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); /*if (link->open) { * ni_gpib_probe(dev); / really? - * printk("Gpib resumed ???\n"); * //netif_device_attach(dev); *} */ @@ -1227,7 +1211,6 @@ static struct pcmcia_driver ines_gpib_cs_driver = { void ines_pcmcia_cleanup_module(void) { - DEBUG(0, "ines_cs: unloading\n"); pcmcia_unregister_driver(&ines_gpib_cs_driver); } @@ -1329,7 +1312,7 @@ int ines_common_pcmcia_attach(gpib_board_t *board) int retval; if (!curr_dev) { - pr_err("no ines pcmcia cards found\n"); + dev_err(board->gpib_dev, "no ines pcmcia cards found\n"); return -1; } @@ -1341,9 +1324,9 @@ int ines_common_pcmcia_attach(gpib_board_t *board) nec_priv = &ines_priv->nec7210_priv; if (!request_region(curr_dev->resource[0]->start, - resource_size(curr_dev->resource[0]), "ines_gpib")) { - pr_err("ines_gpib: ioports at 0x%lx already in use\n", - (unsigned long)(curr_dev->resource[0]->start)); + resource_size(curr_dev->resource[0]), DRV_NAME)) { + dev_err(board->gpib_dev, "ioports at 0x%lx already in use\n", + (unsigned long)(curr_dev->resource[0]->start)); return -1; } @@ -1353,7 +1336,7 @@ int ines_common_pcmcia_attach(gpib_board_t *board) if (request_irq(curr_dev->irq, ines_pcmcia_interrupt, IRQF_SHARED, "pcmcia-gpib", board)) { - pr_err("gpib: can't request IRQ %d\n", curr_dev->irq); + dev_err(board->gpib_dev, "can't request IRQ %d\n", curr_dev->irq); return -1; } ines_priv->irq = curr_dev->irq; @@ -1416,56 +1399,56 @@ static int __init ines_init_module(void) ret = pci_register_driver(&ines_pci_driver); if (ret) { - pr_err("ines_gpib: pci_register_driver failed: error = %d\n", ret); + pr_err("pci_register_driver failed: error = %d\n", ret); return ret; } ret = gpib_register_driver(&ines_pci_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci; } ret = gpib_register_driver(&ines_pci_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci_unaccel; } ret = gpib_register_driver(&ines_pci_accel_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pci_accel; } ret = gpib_register_driver(&ines_isa_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_isa; } #ifdef CONFIG_GPIB_PCMCIA ret = gpib_register_driver(&ines_pcmcia_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia; } ret = gpib_register_driver(&ines_pcmcia_unaccel_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia_unaccel; } ret = gpib_register_driver(&ines_pcmcia_accel_interface, THIS_MODULE); if (ret) { - pr_err("ines_gpib: gpib_register_driver failed: error = %d\n", ret); + pr_err("gpib_register_driver failed: error = %d\n", ret); goto err_pcmcia_accel; } ret = pcmcia_register_driver(&ines_gpib_cs_driver); if (ret) { - pr_err("ines_gpib: pcmcia_register_driver failed: error = %d\n", ret); + pr_err("pcmcia_register_driver failed: error = %d\n", ret); goto err_pcmcia_driver; } #endif From 20a351c36afc7d72956b57bdefbc79858f18923d Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Thu, 20 Feb 2025 10:09:20 +0100 Subject: [PATCH 0325/2157] staging: gpib: tnt4882 console messaging cleanup Enable module name to be printed in pr_xxx and dev_xxx Use DRV_NAME defined as KBUILD_MODNAME instead of hard coded string "tnt4882_gpib" in pci_driver struct, request_region and request_irq. Remove unsupported chipset pr_err's Remove messages on interrupted or timed out reads and writes. Remove board not found messages and return -ENODEV Remove "tnt4882:" prefix in messages as it will be printed by pr_fmt and dev_fmt. Change pr_err to dev_err where possible. Remove irq and chipset pr_info's. Replace error messages with appropriate error returns. Remove call to mite_list_devices() and the function in mite.c Remove PCMCIA debug comments, PCMCIA_DEBUG conditional compilation symbol, the DEBUG macro definition and its uses. Remove pr_info's in mite.c Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250220090920.32497-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/tnt4882/mite.c | 17 -- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 186 +++++++------------- 2 files changed, 60 insertions(+), 143 deletions(-) diff --git a/drivers/staging/gpib/tnt4882/mite.c b/drivers/staging/gpib/tnt4882/mite.c index ea64dde46bcb..847b96f411bd 100644 --- a/drivers/staging/gpib/tnt4882/mite.c +++ b/drivers/staging/gpib/tnt4882/mite.c @@ -88,7 +88,6 @@ int mite_setup(struct mite_struct *mite) pr_err("mite: failed to remap mite io memory address.\n"); return -ENOMEM; } - pr_info("mite: 0x%08lx mapped to %p\n", mite->mite_phys_addr, mite->mite_io_addr); addr = pci_resource_start(mite->pcidev, 1); mite->daq_phys_addr = addr; mite->daq_io_addr = ioremap(mite->daq_phys_addr, pci_resource_len(mite->pcidev, 1)); @@ -96,7 +95,6 @@ int mite_setup(struct mite_struct *mite) pr_err("mite: failed to remap daq io memory address.\n"); return -ENOMEM; } - pr_info("mite: daq: 0x%08lx mapped to %p\n", mite->daq_phys_addr, mite->daq_io_addr); writel(mite->daq_phys_addr | WENAB, mite->mite_io_addr + MITE_IODWBSR); mite->used = 1; return 0; @@ -133,18 +131,3 @@ void mite_unsetup(struct mite_struct *mite) } mite->used = 0; } - -void mite_list_devices(void) -{ - struct mite_struct *mite, *next; - - pr_info("Available NI PCI device IDs:"); - if (mite_devices) - for (mite = mite_devices; mite; mite = next) { - next = mite->next; - pr_info(" 0x%04x", mite_device_id(mite)); - if (mite->used) - pr_info("(used)"); - } - pr_info("\n"); -} diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index e1840f16a326..d32420dee5e5 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -5,6 +5,10 @@ * copyright : (C) 2001, 2002 by Frank Mori Hess ***************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt +#define DRV_NAME KBUILD_MODNAME + #include #include #include @@ -97,7 +101,6 @@ static inline unsigned short tnt_readb(struct tnt4882_priv *priv, unsigned long retval = 0; break; default: - pr_err("tnt4882: bug! unsupported ni_chipset\n"); retval = 0; break; } @@ -132,7 +135,6 @@ static inline void tnt_writeb(struct tnt4882_priv *priv, unsigned short value, u case NEC7210: break; default: - pr_err("tnt4882: bug! unsupported ni_chipset\n"); break; } break; @@ -326,22 +328,18 @@ static int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t lengt test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(ADR_CHANGE_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_err("tnt4882: read interrupted\n"); retval = -ERESTARTSYS; break; } if (test_bit(TIMO_NUM, &board->status)) { - //pr_info("tnt4882: minor %i read timed out\n", board->minor); retval = -ETIMEDOUT; break; } if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) { - pr_err("tnt4882: device clear interrupted read\n"); retval = -EINTR; break; } if (test_bit(ADR_CHANGE_BN, &nec_priv->state)) { - pr_err("tnt4882: address change interrupted read\n"); retval = -EINTR; break; } @@ -368,20 +366,14 @@ static int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t lengt test_bit(DEV_CLEAR_BN, &nec_priv->state) || test_bit(ADR_CHANGE_BN, &nec_priv->state) || test_bit(TIMO_NUM, &board->status))) { - pr_err("tnt4882: read interrupted\n"); retval = -ERESTARTSYS; } if (test_bit(TIMO_NUM, &board->status)) - //pr_info("tnt4882: read timed out\n"); retval = -ETIMEDOUT; - if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) { - pr_err("tnt4882: device clear interrupted read\n"); + if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) retval = -EINTR; - } - if (test_bit(ADR_CHANGE_BN, &nec_priv->state)) { - pr_err("tnt4882: address change interrupted read\n"); + if (test_bit(ADR_CHANGE_BN, &nec_priv->state)) retval = -EINTR; - } count += drain_fifo_words(tnt_priv, &buffer[count], length - count); if (fifo_byte_available(tnt_priv) && count < length) buffer[count++] = tnt_readb(tnt_priv, FIFOB); @@ -444,22 +436,15 @@ static int write_wait(gpib_board_t *board, struct tnt4882_priv *tnt_priv, fifo_xfer_done(tnt_priv) || test_bit(BUS_ERROR_BN, &nec_priv->state) || test_bit(DEV_CLEAR_BN, &nec_priv->state) || - test_bit(TIMO_NUM, &board->status))) { - dev_dbg(board->gpib_dev, "gpib write interrupted\n"); + test_bit(TIMO_NUM, &board->status))) return -ERESTARTSYS; - } - if (test_bit(TIMO_NUM, &board->status)) { - pr_info("tnt4882: write timed out\n"); + + if (test_bit(TIMO_NUM, &board->status)) return -ETIMEDOUT; - } - if (test_and_clear_bit(BUS_ERROR_BN, &nec_priv->state)) { - pr_err("tnt4882: write bus error\n"); + if (test_and_clear_bit(BUS_ERROR_BN, &nec_priv->state)) return (send_commands) ? -ENOTCONN : -ECOMM; - } - if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) { - pr_err("tnt4882: device clear interrupted write\n"); + if (test_bit(DEV_CLEAR_BN, &nec_priv->state)) return -EINTR; - } return 0; } @@ -592,7 +577,7 @@ static irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board) if (isr3_bits & HR_DONE) priv->imr3_bits &= ~HR_DONE; if (isr3_bits & (HR_INTR | HR_TLCI)) { - dev_dbg(board->gpib_dev, "tnt4882: minor %i isr0 0x%x imr0 0x%x isr3 0x%x imr3 0x%x\n", + dev_dbg(board->gpib_dev, "minor %i isr0 0x%x imr0 0x%x isr3 0x%x imr3 0x%x\n", board->minor, isr0_bits, priv->imr0_bits, isr3_bits, imr3_bits); tnt_writeb(priv, priv->imr3_bits, IMR3); wake_up_interruptible(&board->wait); @@ -932,10 +917,8 @@ static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) nec_priv->write_byte = nec7210_locking_iomem_write_byte; nec_priv->offset = atgpib_reg_offset; - if (!mite_devices) { - pr_err("no National Instruments PCI boards found\n"); - return -1; - } + if (!mite_devices) + return -ENODEV; for (mite = mite_devices; mite; mite = mite->next) { short found_board; @@ -966,37 +949,32 @@ static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) if (found_board) break; } - if (!mite) { - pr_err("no NI PCI-GPIB boards found\n"); - return -1; - } + if (!mite) + return -ENODEV; + tnt_priv->mite = mite; retval = mite_setup(tnt_priv->mite); - if (retval < 0) { - pr_err("tnt4882: error setting up mite.\n"); + if (retval < 0) return retval; - } nec_priv->mmiobase = tnt_priv->mite->daq_io_addr; // get irq - if (request_irq(mite_irq(tnt_priv->mite), tnt4882_interrupt, isr_flags, - "ni-pci-gpib", board)) { - pr_err("gpib: can't request IRQ %d\n", mite_irq(tnt_priv->mite)); - return -1; + retval = request_irq(mite_irq(tnt_priv->mite), tnt4882_interrupt, isr_flags, "ni-pci-gpib", + board); + if (retval) { + dev_err(board->gpib_dev, "failed to obtain pci irq %d\n", mite_irq(tnt_priv->mite)); + return retval; } tnt_priv->irq = mite_irq(tnt_priv->mite); - pr_info("tnt4882: irq %i\n", tnt_priv->irq); // TNT5004 detection switch (tnt_readb(tnt_priv, CSR) & 0xf0) { case 0x30: nec_priv->type = TNT4882; - pr_info("tnt4882: TNT4882 chipset detected\n"); break; case 0x40: nec_priv->type = TNT5004; - pr_info("tnt4882: TNT5004 chipset detected\n"); break; } tnt4882_init(tnt_priv, board); @@ -1026,23 +1004,17 @@ static int ni_isapnp_find(struct pnp_dev **dev) { *dev = pnp_find_dev(NULL, ISAPNP_VENDOR_ID_NI, ISAPNP_FUNCTION(ISAPNP_ID_NI_ATGPIB_TNT), NULL); - if (!*dev || !(*dev)->card) { - pr_err("tnt4882: failed to find isapnp board\n"); + if (!*dev || !(*dev)->card) return -ENODEV; - } - if (pnp_device_attach(*dev) < 0) { - pr_err("tnt4882: atgpib/tnt board already active, skipping\n"); + if (pnp_device_attach(*dev) < 0) return -EBUSY; - } if (pnp_activate_dev(*dev) < 0) { pnp_device_detach(*dev); - pr_err("tnt4882: failed to activate() atgpib/tnt, aborting\n"); return -EAGAIN; } if (!pnp_port_valid(*dev, 0) || !pnp_irq_valid(*dev, 0)) { pnp_device_detach(*dev); - pr_err("tnt4882: invalid port or irq for atgpib/tnt, aborting\n"); - return -ENOMEM; + return -EINVAL; } return 0; } @@ -1055,6 +1027,7 @@ static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t * int isr_flags = 0; u32 iobase; int irq; + int retval; board->status = 0; @@ -1070,7 +1043,6 @@ static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t * // look for plug-n-play board if (config->ibbase == 0) { struct pnp_dev *dev; - int retval; retval = ni_isapnp_find(&dev); if (retval < 0) @@ -1083,18 +1055,18 @@ static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t * irq = config->ibirq; } // allocate ioports - if (!request_region(iobase, atgpib_iosize, "atgpib")) { - pr_err("tnt4882: failed to allocate ioports\n"); - return -1; - } + if (!request_region(iobase, atgpib_iosize, "atgpib")) + return -EBUSY; + nec_priv->mmiobase = ioport_map(iobase, atgpib_iosize); if (!nec_priv->mmiobase) - return -1; + return -EBUSY; // get irq - if (request_irq(irq, tnt4882_interrupt, isr_flags, "atgpib", board)) { - pr_err("gpib: can't request IRQ %d\n", irq); - return -1; + retval = request_irq(irq, tnt4882_interrupt, isr_flags, "atgpib", board); + if (retval) { + dev_err(board->gpib_dev, "failed to request ISA irq %d\n", irq); + return retval; } tnt_priv->irq = irq; @@ -1384,7 +1356,7 @@ static const struct pci_device_id tnt4882_pci_table[] = { MODULE_DEVICE_TABLE(pci, tnt4882_pci_table); static struct pci_driver tnt4882_pci_driver = { - .name = "tnt4882", + .name = DRV_NAME, .id_table = tnt4882_pci_table, .probe = &tnt4882_pci_probe }; @@ -1411,80 +1383,79 @@ static int __init tnt4882_init_module(void) result = pci_register_driver(&tnt4882_pci_driver); if (result) { - pr_err("tnt4882_gpib: pci_register_driver failed: error = %d\n", result); + pr_err("pci_register_driver failed: error = %d\n", result); return result; } result = gpib_register_driver(&ni_isa_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_isa; } result = gpib_register_driver(&ni_isa_accel_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_isa_accel; } result = gpib_register_driver(&ni_nat4882_isa_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_nat4882_isa; } result = gpib_register_driver(&ni_nat4882_isa_accel_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_nat4882_isa_accel; } result = gpib_register_driver(&ni_nec_isa_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_nec_isa; } result = gpib_register_driver(&ni_nec_isa_accel_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_nec_isa_accel; } result = gpib_register_driver(&ni_pci_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pci; } result = gpib_register_driver(&ni_pci_accel_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pci_accel; } #ifdef CONFIG_GPIB_PCMCIA result = gpib_register_driver(&ni_pcmcia_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pcmcia; } result = gpib_register_driver(&ni_pcmcia_accel_interface, THIS_MODULE); if (result) { - pr_err("tnt4882_gpib: gpib_register_driver failed: error = %d\n", result); + pr_err("gpib_register_driver failed: error = %d\n", result); goto err_pcmcia_accel; } result = init_ni_gpib_cs(); if (result) { - pr_err("tnt4882_gpib: pcmcia_register_driver failed: error = %d\n", result); + pr_err("pcmcia_register_driver failed: error = %d\n", result); goto err_pcmcia_driver; } #endif mite_init(); - mite_list_devices(); return 0; @@ -1550,25 +1521,6 @@ static void __exit tnt4882_exit_module(void) #include #include -/* - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - * you do not define PCMCIA_DEBUG at all, all the debug code will be - * left out. If you compile with PCMCIA_DEBUG=0, the debug code will - * be present but disabled -- but it can then be enabled for specific - * modules at load time with a 'pc_debug=#' option to insmod. - */ -#define PCMCIA_DEBUG 1 -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) \ - do {if (pc_debug > (n)) \ - pr_debug(args); } \ - while (0) -#else -#define DEBUG(args...) -#endif - static int ni_gpib_config(struct pcmcia_device *link); static void ni_gpib_release(struct pcmcia_device *link); static void ni_pcmcia_detach(gpib_board_t *board); @@ -1606,8 +1558,6 @@ static int ni_gpib_probe(struct pcmcia_device *link) struct local_info_t *info; //struct gpib_board_t *dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); - /* Allocate space for private device-specific data */ info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -1641,8 +1591,6 @@ static void ni_gpib_remove(struct pcmcia_device *link) struct local_info_t *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(%p)\n", __func__, link); - if (info->dev) ni_pcmcia_detach(info->dev); ni_gpib_release(link); @@ -1673,8 +1621,6 @@ static int ni_gpib_config(struct pcmcia_device *link) //gpib_board_t *dev = info->dev; int last_ret; - DEBUG(0, "%s(0x%p)\n", __func__, link); - last_ret = pcmcia_loop_config(link, &ni_gpib_config_iteration, NULL); if (last_ret) { dev_warn(&link->dev, "no configuration found\n"); @@ -1697,7 +1643,6 @@ static int ni_gpib_config(struct pcmcia_device *link) */ static void ni_gpib_release(struct pcmcia_device *link) { - DEBUG(0, "%s(0x%p)\n", __func__, link); pcmcia_disable_device(link); } /* ni_gpib_release */ @@ -1705,10 +1650,9 @@ static int ni_gpib_suspend(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); if (link->open) - pr_err("Device still open ???\n"); + dev_warn(&link->dev, "Device still open\n"); //netif_device_detach(dev); return 0; @@ -1718,11 +1662,9 @@ static int ni_gpib_resume(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; //struct gpib_board_t *dev = info->dev; - DEBUG(0, "%s(0x%p)\n", __func__, link); /*if (link->open) { * ni_gpib_probe(dev); / really? - * printk("Gpib resumed ???\n"); * //netif_device_attach(dev); *} */ @@ -1755,7 +1697,6 @@ static int __init init_ni_gpib_cs(void) static void __exit exit_ni_gpib_cs(void) { - DEBUG(0, "ni_gpib_cs: unloading\n"); pcmcia_unregister_driver(&ni_gpib_cs_driver); } @@ -1767,13 +1708,10 @@ static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf struct tnt4882_priv *tnt_priv; struct nec7210_priv *nec_priv; int isr_flags = IRQF_SHARED; + int retval; - DEBUG(0, "%s(0x%p)\n", __func__, board); - - if (!curr_dev) { - pr_err("gpib: no NI PCMCIA board found\n"); - return -1; - } + if (!curr_dev) + return -ENODEV; info = curr_dev->priv; info->dev = board; @@ -1782,6 +1720,7 @@ static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf if (tnt4882_allocate_private(board)) return -ENOMEM; + tnt_priv = board->private_data; nec_priv = &tnt_priv->nec7210_priv; nec_priv->type = TNT4882; @@ -1789,23 +1728,20 @@ static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf nec_priv->write_byte = nec7210_locking_ioport_write_byte; nec_priv->offset = atgpib_reg_offset; - DEBUG(0, "ioport1 window attributes: 0x%lx\n", curr_dev->resource[0]->flags); if (!request_region(curr_dev->resource[0]->start, resource_size(curr_dev->resource[0]), - "tnt4882")) { - pr_err("gpib: ioports starting at 0x%lx are already in use\n", - (unsigned long)curr_dev->resource[0]->start); - return -EIO; - } + DRV_NAME)) + return -ENOMEM; nec_priv->mmiobase = ioport_map(curr_dev->resource[0]->start, resource_size(curr_dev->resource[0])); if (!nec_priv->mmiobase) - return -1; + return -ENOMEM; // get irq - if (request_irq(curr_dev->irq, tnt4882_interrupt, isr_flags, "tnt4882", board)) { - pr_err("gpib: can't request IRQ %d\n", curr_dev->irq); - return -1; + retval = request_irq(curr_dev->irq, tnt4882_interrupt, isr_flags, DRV_NAME, board); + if (retval) { + dev_err(board->gpib_dev, "failed to obtain PCMCIA irq %d\n", curr_dev->irq); + return retval; } tnt_priv->irq = curr_dev->irq; @@ -1819,8 +1755,6 @@ static void ni_pcmcia_detach(gpib_board_t *board) struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv; - DEBUG(0, "%s(0x%p)\n", __func__, board); - if (tnt_priv) { nec_priv = &tnt_priv->nec7210_priv; if (tnt_priv->irq) From 8fd74a31eaf3def0d57264ada57fc981902aeadf Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 22:15:26 +0800 Subject: [PATCH 0326/2157] driver core: class: Remove needless return in void API class_remove_file() Remove return since both class_remove_file() and class_remove_file_ns() are void functions. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-cls_rmv_return-v1-1-091b37945aac@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 45ee3a634999..65880e60c720 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -193,7 +193,7 @@ static inline int __must_check class_create_file(const struct class *class, static inline void class_remove_file(const struct class *class, const struct class_attribute *attr) { - return class_remove_file_ns(class, attr, NULL); + class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ From a44073c28bc6d4118891d61e31c9fa9dc4333dc0 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 23:18:39 +0800 Subject: [PATCH 0327/2157] driver core: Remove needless return in void API device_remove_group() Remove return since both device_remove_group() and device_remove_groups() are void functions. Fixes: e323b2dddc1c ("driver core: add device_{add|remove}_group() helpers") Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-fix_device_remove_group-v1-1-8a5b0ac0ce5c@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 80a5b3268986..605b60254f6d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1268,7 +1268,7 @@ static inline void device_remove_group(struct device *dev, { const struct attribute_group *groups[] = { grp, NULL }; - return device_remove_groups(dev, groups); + device_remove_groups(dev, groups); } int __must_check devm_device_add_group(struct device *dev, From be382372d55d65b5c7e5a523793ca5e403f8c595 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Mon, 20 Jan 2025 22:05:47 +0800 Subject: [PATCH 0328/2157] greybus: gb-beagleplay: Add error handling for gb_greybus_init Add error handling for the gb_greybus_init(bg) function call during the firmware reflash process to maintain consistency in error handling throughout the codebase. If initialization fails, log an error and return FW_UPLOAD_ERR_RW_ERROR. Fixes: 0cf7befa3ea2 ("greybus: gb-beagleplay: Add firmware upload API") Signed-off-by: Wentao Liang Reviewed-by: Ayush Singh Link: https://lore.kernel.org/r/20250120140547.1460-1-vulab@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/gb-beagleplay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index 473ac3f2d382..da31f1131afc 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -912,7 +912,9 @@ static enum fw_upload_err cc1352_prepare(struct fw_upload *fw_upload, cc1352_bootloader_reset(bg); WRITE_ONCE(bg->flashing_mode, false); msleep(200); - gb_greybus_init(bg); + if (gb_greybus_init(bg) < 0) + return dev_err_probe(&bg->sd->dev, FW_UPLOAD_ERR_RW_ERROR, + "Failed to initialize greybus"); gb_beagleplay_start_svc(bg); return FW_UPLOAD_ERR_FW_INVALID; } From 1a09cd9b7bc76e9e1d753c77584079d302241c23 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Fri, 31 Jan 2025 17:54:30 +0200 Subject: [PATCH 0329/2157] scripts/tags.sh: tag SYM_*START*() assembler symbols The `startup_64` symbol and many other assembler symbols are not tagged. Add a generic rule to tag assembler symbols defined with macros like SYM_*START*(symbol). Signed-off-by: Costa Shulyupin Link: https://lore.kernel.org/r/20250131155439.2025038-1-costa.shul@redhat.com Signed-off-by: Greg Kroah-Hartman --- scripts/tags.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/tags.sh b/scripts/tags.sh index 45eaf35f5bff..98680e9cd7be 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -146,6 +146,7 @@ dogtags() # a ^[^#] is prepended by setup_regex unless an anchor is already present regex_asm=( '/^\(ENTRY\|_GLOBAL\)([[:space:]]*\([[:alnum:]_\\]*\)).*/\2/' + '/^SYM_[[:alnum:]_]*START[[:alnum:]_]*([[:space:]]*\([[:alnum:]_\\]*\)).*/\1/' ) regex_c=( '/^SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/sys_\1/' From 10d43ecbb0121fee8ddbc1e9755edc0402e458ca Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 30 Jan 2025 01:26:54 +0000 Subject: [PATCH 0330/2157] mei: Remove unused functions The following functions have been in the mei code for a long time but have never been used. mei_txe_setup_satt2() was added in 2014 by commit 32e2b59fca2c ("mei: txe: add hw-txe.c") mei_me_cl_rm_by_uuid_id() was added in 2015 by commit 79563db9ddd3 ("mei: add reference counting for me clients") mei_cldev_uuid() was added in 2015 by commit baeacd037697 ("mei: bus: export uuid and protocol version to mei_cl bus drivers") mei_cldev_recv_nonblock() was added in 2016 by commit 076802d00615 ("mei: bus: enable non-blocking RX") it is the only user of mei_cldev_recv_nonblock_vtag(). Remove them. Signed-off-by: Dr. David Alan Gilbert Acked-by: Arnd Bergmann Reviewed-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250130012654.255119-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 52 -------------------------------------- drivers/misc/mei/client.c | 22 ---------------- drivers/misc/mei/client.h | 2 -- drivers/misc/mei/hw-txe.c | 45 --------------------------------- drivers/misc/mei/hw-txe.h | 2 -- include/linux/mei_cl_bus.h | 5 ---- 6 files changed, 128 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 718ec5d81d94..67176caf5416 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -323,28 +323,6 @@ ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, } EXPORT_SYMBOL_GPL(mei_cldev_recv_vtag); -/** - * mei_cldev_recv_nonblock_vtag - non block client receive with vtag (read) - * - * @cldev: me client device - * @buf: buffer to receive - * @length: buffer length - * @vtag: virtual tag - * - * Return: - * * read size in bytes - * * -EAGAIN if function will block. - * * < 0 on other error - */ -ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, - size_t length, u8 *vtag) -{ - struct mei_cl *cl = cldev->cl; - - return __mei_cl_recv(cl, buf, length, vtag, MEI_CL_IO_RX_NONBLOCK, 0); -} -EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock_vtag); - /** * mei_cldev_recv_timeout - client receive with timeout (read) * @@ -438,23 +416,6 @@ ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) } EXPORT_SYMBOL_GPL(mei_cldev_recv); -/** - * mei_cldev_recv_nonblock - non block client receive (read) - * - * @cldev: me client device - * @buf: buffer to receive - * @length: buffer length - * - * Return: read size in bytes of < 0 on error - * -EAGAIN if function will block. - */ -ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, - size_t length) -{ - return mei_cldev_recv_nonblock_vtag(cldev, buf, length, NULL); -} -EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock); - /** * mei_cl_bus_rx_work - dispatch rx event for a bus device * @@ -640,19 +601,6 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data) } EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata); -/** - * mei_cldev_uuid - return uuid of the underlying me client - * - * @cldev: mei client device - * - * Return: me client uuid - */ -const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev) -{ - return mei_me_cl_uuid(cldev->me_cl); -} -EXPORT_SYMBOL_GPL(mei_cldev_uuid); - /** * mei_cldev_ver - return protocol version of the underlying me client * diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index be011cef12e5..3db07d2a881f 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -271,28 +271,6 @@ void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid) up_write(&dev->me_clients_rwsem); } -/** - * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id - * - * @dev: the device structure - * @uuid: me client uuid - * @id: me client id - * - * Locking: called under "dev->device_lock" lock - */ -void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id) -{ - struct mei_me_client *me_cl; - - dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id); - - down_write(&dev->me_clients_rwsem); - me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id); - __mei_me_cl_del(dev, me_cl); - mei_me_cl_put(me_cl); - up_write(&dev->me_clients_rwsem); -} - /** * mei_me_cl_rm_all - remove all me clients * diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 9052860bcfe0..01ed26a148c4 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -29,8 +29,6 @@ struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id); struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 client_id); void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid); -void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, - const uuid_le *uuid, u8 id); void mei_me_cl_rm_all(struct mei_device *dev); /** diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 5d0f68b95c29..e9476f9ae25d 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -1209,48 +1209,3 @@ struct mei_device *mei_txe_dev_init(struct pci_dev *pdev) return dev; } - -/** - * mei_txe_setup_satt2 - SATT2 configuration for DMA support. - * - * @dev: the device structure - * @addr: physical address start of the range - * @range: physical range size - * - * Return: 0 on success an error code otherwise - */ -int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range) -{ - struct mei_txe_hw *hw = to_txe_hw(dev); - - u32 lo32 = lower_32_bits(addr); - u32 hi32 = upper_32_bits(addr); - u32 ctrl; - - /* SATT is limited to 36 Bits */ - if (hi32 & ~0xF) - return -EINVAL; - - /* SATT has to be 16Byte aligned */ - if (lo32 & 0xF) - return -EINVAL; - - /* SATT range has to be 4Bytes aligned */ - if (range & 0x4) - return -EINVAL; - - /* SATT is limited to 32 MB range*/ - if (range > SATT_RANGE_MAX) - return -EINVAL; - - ctrl = SATT2_CTRL_VALID_MSK; - ctrl |= hi32 << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT; - - mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range); - mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32); - mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl); - dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n", - range, lo32, ctrl); - - return 0; -} diff --git a/drivers/misc/mei/hw-txe.h b/drivers/misc/mei/hw-txe.h index 96511b04bf88..6790e646895d 100644 --- a/drivers/misc/mei/hw-txe.h +++ b/drivers/misc/mei/hw-txe.h @@ -59,7 +59,5 @@ irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id); int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req); -int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range); - #endif /* _MEI_HW_TXE_H_ */ diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index b38a56a13f39..725fd7727422 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -97,8 +97,6 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, - size_t length); ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, @@ -107,8 +105,6 @@ ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); -ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, - size_t length, u8 *vtag); ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag, unsigned long timeout); @@ -116,7 +112,6 @@ int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, mei_cldev_cb_t notif_cb); -const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); u8 mei_cldev_ver(const struct mei_cl_device *cldev); void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); From 790b2f242a1e2b7ec8309fd354a0579e2279661c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 12 Feb 2025 12:48:41 +0100 Subject: [PATCH 0331/2157] virtio: console: Use str_yes_no() helper in port_debugfs_show() Remove hard-coded strings by using the str_yes_no() helper function. Signed-off-by: Thorsten Blum Reviewed-by: Amit Shah Link: https://lore.kernel.org/r/20250212114841.74650-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 24442485e73e..35af0cc11d93 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "../tty/hvc/hvc_console.h" #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) @@ -1269,8 +1270,7 @@ static int port_debugfs_show(struct seq_file *s, void *data) seq_printf(s, "bytes_sent: %lu\n", port->stats.bytes_sent); seq_printf(s, "bytes_received: %lu\n", port->stats.bytes_received); seq_printf(s, "bytes_discarded: %lu\n", port->stats.bytes_discarded); - seq_printf(s, "is_console: %s\n", - is_console_port(port) ? "yes" : "no"); + seq_printf(s, "is_console: %s\n", str_yes_no(is_console_port(port))); seq_printf(s, "console_vtermno: %u\n", port->cons.vtermno); return 0; From 17f18e04a125a75fd8d5fe1f7d88db9ccbf552cc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Feb 2025 11:59:45 +0200 Subject: [PATCH 0332/2157] virtio_console: Get rid of unneeded temporary variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling a kernel with GCC using `make W=1` with CONFIG_WERROR=y (which is default nowadays), the build fails: drivers/char/virtio_console.c:1427:9: note: ‘snprintf’ output between 9 and 27 bytes into a destination of size 16 Indeed, GCC can't see the limits of the variables that are in use. Fix this by using dev_name() of the newly created device that is luckily the same as the string used for the DebugFS node name. Signed-off-by: Andy Shevchenko Reviewed-by: Amit Shah Link: https://lore.kernel.org/r/20250210095946.4122771-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 35af0cc11d93..9e6cee6082b5 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1321,7 +1321,6 @@ static void send_sigio_to_port(struct port *port) static int add_port(struct ports_device *portdev, u32 id) { - char debugfs_name[16]; struct port *port; dev_t devt; int err; @@ -1424,9 +1423,7 @@ static int add_port(struct ports_device *portdev, u32 id) * Finally, create the debugfs file that we can use to * inspect a port's state at any time */ - snprintf(debugfs_name, sizeof(debugfs_name), "vport%up%u", - port->portdev->vdev->index, id); - port->debugfs_file = debugfs_create_file(debugfs_name, 0444, + port->debugfs_file = debugfs_create_file(dev_name(port->dev), 0444, pdrvdata.debugfs_dir, port, &port_debugfs_fops); return 0; From 74826b3fd7d2f131ee9baebe66189b0ff3c50a96 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 10 Feb 2025 13:31:03 +0100 Subject: [PATCH 0333/2157] sonypi: Use str_on_off() helper in sonypi_display_info() Remove hard-coded strings by using the str_on_off() helper function. Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250210123103.112938-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/char/sonypi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index f887569fd3d0..677bb5ac950a 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1268,12 +1269,12 @@ static void sonypi_display_info(void) "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n", sonypi_device.model, verbose, - fnkeyinit ? "on" : "off", - camera ? "on" : "off", - compat ? "on" : "off", + str_on_off(fnkeyinit), + str_on_off(camera), + str_on_off(compat), mask, - useinput ? "on" : "off", - SONYPI_ACPI_ACTIVE ? "on" : "off"); + str_on_off(useinput), + str_on_off(SONYPI_ACPI_ACTIVE)); printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n", sonypi_device.irq, sonypi_device.ioport1, sonypi_device.ioport2); From 78c0a5056c5856f24bf862ccd641394fd3dfd1dd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Feb 2025 20:27:51 +0900 Subject: [PATCH 0334/2157] binder: remove unneeded inclusion from binder_internal.h binder_internal.h is included only in the following two C files: $ git grep binder_internal.h drivers/android/binder.c:#include "binder_internal.h" drivers/android/binderfs.c:#include "binder_internal.h" Neither of these files use the EXPORT_SYMBOL macro, so including is unnecessary. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20250217112756.1011333-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_internal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index e4eb8357989c..6a66c9769c6c 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -3,7 +3,6 @@ #ifndef _LINUX_BINDER_INTERNAL_H #define _LINUX_BINDER_INTERNAL_H -#include #include #include #include From d9406677428e9234ea62bb2d2f5e996d1b777760 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 18 Feb 2025 16:09:59 -0600 Subject: [PATCH 0335/2157] eeprom: ee1004: Check chip before probing Like other eeprom drivers, check if the device is really there and functional before probing. Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20250218220959.721698-1-eajames@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/ee1004.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index 89224d4af4a2..e13f9fdd9d7b 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -304,6 +304,10 @@ static int ee1004_probe(struct i2c_client *client) I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_READ_BYTE_DATA)) return -EPFNOSUPPORT; + err = i2c_smbus_read_byte(client); + if (err < 0) + return -ENODEV; + mutex_lock(&ee1004_bus_lock); err = ee1004_init_bus_data(client); From 6aa9826330539abcfd3435de93b45cb506e7b86d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 23 Jan 2025 09:32:46 -0300 Subject: [PATCH 0336/2157] char: misc: improve testing Kconfig description Describe that it tests the miscdevice API and include the usual disclaimer about KUnit not being fit for production kernels. While at it, also fix KUnit capitalization. Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20250123123249.4081674-2-cascardo@igalia.com Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06..76343a203d74 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2479,13 +2479,20 @@ config TEST_IDA tristate "Perform selftest on IDA functions" config TEST_MISC_MINOR - tristate "Basic misc minor Kunit test" if !KUNIT_ALL_TESTS + tristate "miscdevice KUnit test" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS help - Kunit test for the misc minor. - It tests misc minor functions for dynamic and misc dynamic minor. - This include misc_xxx functions + Kunit test for miscdevice API, specially its behavior in respect to + static and dynamic minor numbers. + + KUnit tests run during boot and output the results to the debug log + in TAP format (https://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. If unsure, say N. From a2d1afe65a152e8e581b48cebb1517e6bdf09d04 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:13 +0100 Subject: [PATCH 0337/2157] serial: xilinx_uartps: Use helper function hrtimer_update_function() The field 'function' of struct hrtimer should not be changed directly, as the write is lockless and a concurrent timer expiry might end up using the wrong function pointer. Switch to use hrtimer_update_function() which also performs runtime checks that it is safe to modify the callback. Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/af7823518fb060c6c97105a2513cfc61adbdf38f.1738746927.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 92ec51870d1d..afc044461f2b 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -454,7 +454,7 @@ static void cdns_uart_handle_tx(void *dev_id) if (cdns_uart->port->rs485.flags & SER_RS485_ENABLED && (kfifo_is_empty(&tport->xmit_fifo) || uart_tx_stopped(port))) { - cdns_uart->tx_timer.function = &cdns_rs485_rx_callback; + hrtimer_update_function(&cdns_uart->tx_timer, cdns_rs485_rx_callback); hrtimer_start(&cdns_uart->tx_timer, ns_to_ktime(cdns_calc_after_tx_delay(cdns_uart)), HRTIMER_MODE_REL); } @@ -734,7 +734,7 @@ static void cdns_uart_start_tx(struct uart_port *port) if (cdns_uart->port->rs485.flags & SER_RS485_ENABLED) { if (!cdns_uart->rs485_tx_started) { - cdns_uart->tx_timer.function = &cdns_rs485_tx_callback; + hrtimer_update_function(&cdns_uart->tx_timer, cdns_rs485_tx_callback); cdns_rs485_tx_setup(cdns_uart); return hrtimer_start(&cdns_uart->tx_timer, ms_to_ktime(port->rs485.delay_rts_before_send), From d45545c32904d933a423a8f35edd1cb3cd4adf40 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:45:56 +0100 Subject: [PATCH 0338/2157] serial: 8250: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/991926d130cc272df30d226760d5d74187991669.1738746904.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_bcm7271.c | 3 +-- drivers/tty/serial/8250/8250_port.c | 10 ++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index d0b18358859e..742004d63c6f 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -1056,8 +1056,7 @@ static int brcmuart_probe(struct platform_device *pdev) } /* setup HR timer */ - hrtimer_init(&priv->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - priv->hrt.function = brcmuart_hrtimer_func; + hrtimer_setup(&priv->hrt, brcmuart_hrtimer_func, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); up.port.shutdown = brcmuart_shutdown; up.port.startup = brcmuart_startup; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 886e40f680d4..3f256e96c722 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -566,12 +566,10 @@ static int serial8250_em485_init(struct uart_8250_port *p) if (!p->em485) return -ENOMEM; - hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - p->em485->stop_tx_timer.function = &serial8250_em485_handle_stop_tx; - p->em485->start_tx_timer.function = &serial8250_em485_handle_start_tx; + hrtimer_setup(&p->em485->stop_tx_timer, &serial8250_em485_handle_stop_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + hrtimer_setup(&p->em485->start_tx_timer, &serial8250_em485_handle_start_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); p->em485->port = p; p->em485->active_timer = NULL; p->em485->tx_stopped = true; From 8cb44188b986014c6e532f2b39982fdd06cda07d Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:45:57 +0100 Subject: [PATCH 0339/2157] serial: amba-pl011: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/78e8c0d1b38998eab983fad265751ed13c2b9009.1738746904.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 9a9a1d6306d0..047fb9f51539 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2918,11 +2918,10 @@ static int pl011_probe(struct amba_device *dev, const struct amba_id *id) return -EINVAL; } } - - hrtimer_init(&uap->trigger_start_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_init(&uap->trigger_stop_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - uap->trigger_start_tx.function = pl011_trigger_start_tx; - uap->trigger_stop_tx.function = pl011_trigger_stop_tx; + hrtimer_setup(&uap->trigger_start_tx, pl011_trigger_start_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + hrtimer_setup(&uap->trigger_stop_tx, pl011_trigger_stop_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); ret = pl011_setup_port(&dev->dev, uap, &dev->res, portnr); if (ret) From afa51660033c5542612dd0fccdef300aa522cf95 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:45:58 +0100 Subject: [PATCH 0340/2157] serial: imx: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/ad27070bc67c13f8a9acbd5cbf4cbae72797e3e1.1738746904.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index cfeb3f8cf45e..19c819705bf9 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2582,10 +2582,10 @@ static int imx_uart_probe(struct platform_device *pdev) imx_uart_writel(sport, ucr3, UCR3); } - hrtimer_init(&sport->trigger_start_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_init(&sport->trigger_stop_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - sport->trigger_start_tx.function = imx_trigger_start_tx; - sport->trigger_stop_tx.function = imx_trigger_stop_tx; + hrtimer_setup(&sport->trigger_start_tx, imx_trigger_start_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + hrtimer_setup(&sport->trigger_stop_tx, imx_trigger_stop_tx, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); /* * Allocate the IRQ(s) i.MX1 has three interrupts whereas later From 7ba2facc3f91809a5af083ccfb1f1dc79f18b48b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:45:59 +0100 Subject: [PATCH 0341/2157] serial: sh-sci: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/c21664d013015584aebbb6bb8cedd748182cb551.1738746904.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index e0ead0147bfe..2db2d85003f7 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1714,8 +1714,7 @@ static void sci_request_dma(struct uart_port *port) dma += s->buf_len_rx; } - hrtimer_init(&s->rx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - s->rx_timer.function = sci_dma_rx_timer_fn; + hrtimer_setup(&s->rx_timer, sci_dma_rx_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); s->chan_rx_saved = s->chan_rx = chan; From d2fa8e52cf9193babd1a9179dc2459868965a40c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:46:00 +0100 Subject: [PATCH 0342/2157] serial: xilinx_uartps: Switch to use hrtimer_setup() hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Patch was created by using Coccinelle. Acked-by: Zack Rusin Signed-off-by: Nam Cao Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/4a028a23126b3350a5e243dcb49e1ef1b2a4b740.1738746904.git.namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index afc044461f2b..fe457bf1e15b 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1626,8 +1626,8 @@ static int cdns_rs485_config(struct uart_port *port, struct ktermios *termios, writel(val, port->membase + CDNS_UART_MODEMCR); /* Timer setup */ - hrtimer_init(&cdns_uart->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - cdns_uart->tx_timer.function = &cdns_rs485_tx_callback; + hrtimer_setup(&cdns_uart->tx_timer, &cdns_rs485_tx_callback, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); /* Disable transmitter and make Rx setup*/ cdns_uart_stop_tx(port); From 6f61e5dce268b0fb97a7a6e91f0bd2d5633e5db2 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 4 Feb 2025 11:11:51 -0500 Subject: [PATCH 0343/2157] MAINTAINERS: Remove Conor Culhane from Silvaco I3C Conor Culhane's email address (conor.culhane@silvaco.com) is no longer accessible, and there has been no activity from him on https://lore.kernel.org/linux-i3c/ in the past two years. To avoid email delivery failure messages when sending patches to the I3C mailing list, remove him from the Maintainers section and move him to the credits. Signed-off-by: Frank Li Acked-by: Miquel Raynal Link: https://lore.kernel.org/r/20250204161151.2179596-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 1f9f0f078b4a..f4f688c7a14a 100644 --- a/CREDITS +++ b/CREDITS @@ -855,6 +855,10 @@ N: John Crispin E: john@phrozen.org D: MediaTek MT7623 Gigabit ethernet support +N: Conor Culhane +E: conor.culhane@silvaco.com +D: Silvaco I3C master driver + N: Laurence Culhane E: loz@holmes.demon.co.uk D: Wrote the initial alpha SLIP code diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..0dfec7275948 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21662,7 +21662,6 @@ F: drivers/video/fbdev/sm712* SILVACO I3C DUAL-ROLE MASTER M: Miquel Raynal -M: Conor Culhane L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml From bdffad83d75608eb6289858909fafcc72f046547 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 15 Aug 2024 10:16:09 -0400 Subject: [PATCH 0344/2157] MAINTAINERS: Add Frank Li to Silvaco I3C Add Frank Li as NXP i3c controller driver as maintainer and add mail list imx@lists.linux.dev for it. Add Frank Li as I3C subsystem reviewer. Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240815141609.4089406-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0dfec7275948..3c9410c156ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10998,6 +10998,7 @@ F: drivers/i3c/master/dw* I3C SUBSYSTEM M: Alexandre Belloni +R: Frank Li L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained C: irc://chat.freenode.net/linux-i3c @@ -21662,7 +21663,9 @@ F: drivers/video/fbdev/sm712* SILVACO I3C DUAL-ROLE MASTER M: Miquel Raynal +M: Frank Li L: linux-i3c@lists.infradead.org (moderated for non-subscribers) +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c From 6866c91f8c2327546d850d5a85025fb81e2089bf Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Tue, 4 Feb 2025 17:17:01 +0800 Subject: [PATCH 0345/2157] i3c: Remove the const qualifier from i2c_msg pointer in i2c_xfers API The change is necessary to enable the use of the `i2c_get_dma_safe_msg_buf()` API, which requires a non-const `struct i2c_msg *` to operate. The `i2c_get_dma_safe_msg_buf()` function ensures safe handling of I2C messages when using DMA, making it essential for scenarios where DMA transfers are involved. By removing the `const` qualifier, this patch allows drivers to prepare and manage DMA-safe buffers directly. Signed-off-by: Billy Tsai Reviewed-by: Frank Li Reviewed-by: Wolfram Sang Acked-by: Mukesh Kumar Savaliya Link: https://lore.kernel.org/r/20250204091702.4014466-1-billy_tsai@aspeedtech.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/dw-i3c-master.c | 2 +- drivers/i3c/master/i3c-master-cdns.c | 2 +- drivers/i3c/master/mipi-i3c-hci/core.c | 2 +- drivers/i3c/master/svc-i3c-master.c | 2 +- include/linux/i3c/master.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 2fbf8b2addd0..611c22b72c15 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -1079,7 +1079,7 @@ static void dw_i3c_master_detach_i3c_dev(struct i3c_dev_desc *dev) } static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, - const struct i2c_msg *i2c_xfers, + struct i2c_msg *i2c_xfers, int i2c_nxfers) { struct dw_i3c_i2c_dev_data *data = i2c_dev_get_master_data(dev); diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index fedbe6624a1c..fd3752cea654 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -813,7 +813,7 @@ static int cdns_i3c_master_priv_xfers(struct i3c_dev_desc *dev, } static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, - const struct i2c_msg *xfers, int nxfers) + struct i2c_msg *xfers, int nxfers) { struct i3c_master_controller *m = i2c_dev_get_master(dev); struct cdns_i3c_master *master = to_cdns_i3c_master(m); diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index 648c501407ce..a34eec850bcc 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -367,7 +367,7 @@ static int i3c_hci_priv_xfers(struct i3c_dev_desc *dev, } static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, - const struct i2c_msg *i2c_xfers, int nxfers) + struct i2c_msg *i2c_xfers, int nxfers) { struct i3c_master_controller *m = i2c_dev_get_master(dev); struct i3c_hci *hci = to_i3c_hci(m); diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index d6057d8c7dec..413479b12ba6 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1584,7 +1584,7 @@ static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev, } static int svc_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, - const struct i2c_msg *xfers, + struct i2c_msg *xfers, int nxfers) { struct i3c_master_controller *m = i2c_dev_get_master(dev); diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 12d532b012c5..c67922ece617 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -475,7 +475,7 @@ struct i3c_master_controller_ops { int (*attach_i2c_dev)(struct i2c_dev_desc *dev); void (*detach_i2c_dev)(struct i2c_dev_desc *dev); int (*i2c_xfers)(struct i2c_dev_desc *dev, - const struct i2c_msg *xfers, int nxfers); + struct i2c_msg *xfers, int nxfers); int (*request_ibi)(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_dev_desc *dev); From effed5dac8f8b3db006c4971cdd592504afd1c5d Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Tue, 4 Feb 2025 17:17:02 +0800 Subject: [PATCH 0346/2157] i3c: mipi-i3c-hci: Use I2C DMA-safe api Use the i2c_get/put_dma_safe_msg_buf for I2C transfers instead of using the I3C-specific API. Signed-off-by: Billy Tsai Acked-by: Mukesh Kumar Savaliya Reviewed-by: Jarkko Nikula Link: https://lore.kernel.org/r/20250204091702.4014466-2-billy_tsai@aspeedtech.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/mipi-i3c-hci/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index a34eec850bcc..a71226d7ca59 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -382,14 +382,11 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, return -ENOMEM; for (i = 0; i < nxfers; i++) { - xfer[i].data = i2c_xfers[i].buf; + xfer[i].data = i2c_get_dma_safe_msg_buf(&i2c_xfers[i], 1); xfer[i].data_len = i2c_xfers[i].len; xfer[i].rnw = i2c_xfers[i].flags & I2C_M_RD; hci->cmd->prep_i2c_xfer(hci, dev, &xfer[i]); xfer[i].cmd_desc[0] |= CMD_0_ROC; - ret = i3c_hci_alloc_safe_xfer_buf(hci, &xfer[i]); - if (ret) - goto out; } last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; @@ -412,7 +409,8 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, out: for (i = 0; i < nxfers; i++) - i3c_hci_free_safe_xfer_buf(hci, &xfer[i]); + i2c_put_dma_safe_msg_buf(xfer[i].data, &i2c_xfers[i], + ret ? false : true); hci_free_xfer(xfer, nxfers); return ret; From a892ee4cf22a50e1d6988d0464a9a421f3e5db2f Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 29 Jan 2025 11:22:50 -0500 Subject: [PATCH 0347/2157] i3c: master: svc: Flush FIFO before sending Dynamic Address Assignment(DAA) Ensure the FIFO is empty before issuing the DAA command to prevent incorrect command data from being sent. Align with other data transfers, such as svc_i3c_master_start_xfer_locked(), which flushes the FIFO before sending a command. Signed-off-by: Frank Li Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20250129162250.3629189-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 413479b12ba6..98f800f99969 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -892,6 +892,8 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, u32 reg; int ret, i; + svc_i3c_master_flush_fifo(master); + while (true) { /* clean SVC_I3C_MINT_IBIWON w1c bits */ writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); From b1b620bfa984b8fb91a284b60df702346b4294a4 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 3 Feb 2025 09:50:00 +0700 Subject: [PATCH 0348/2157] kernel: Fix "select" wording on HZ_250 description HZ_250 config description contains alternative choice for NTSC media users (HZ_300), which is written as "selected 300Hz". This is incorrect, as it implies that HZ_300 is automatically selected whereas the user has chosen HZ_250 instead. Fix the wording to "select 300Hz". Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250203025000.17953-1-bagasdotme@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/Kconfig.hz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 38ef6d06888e..ce1435cb08b1 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -30,7 +30,7 @@ choice 250 Hz is a good compromise choice allowing server performance while also showing good interactive responsiveness even on SMP and NUMA systems. If you are going to be using NTSC video - or multimedia, selected 300Hz instead. + or multimedia, select 300Hz instead. config HZ_300 bool "300 HZ" From 6fb1ee255ed92b903b9b74e8483d05390cf9cfe6 Mon Sep 17 00:00:00 2001 From: Bharadwaj Raju Date: Tue, 4 Feb 2025 03:33:09 +0530 Subject: [PATCH 0349/2157] drivers/base/bus.c: fix spelling of "subsystem" Fix spelling, "subystem" -> "subsystem" Signed-off-by: Bharadwaj Raju Link: https://lore.kernel.org/r/20250203220312.1052986-1-bharadwaj.raju777@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 6b9e65a42cd2..5ea3b03af9ba 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1291,7 +1291,7 @@ EXPORT_SYMBOL_GPL(subsys_system_register); * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/ root device - * with the name of the subystem. The root device can carry subsystem-wide + * with the name of the subsystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. From 0514059ca09e407614a02fb3687ea78e607e6526 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 22:45:48 +0800 Subject: [PATCH 0350/2157] MAINTAINERS: Add driver core headers to DRIVER CORE maintainers According to get_maintainer.pl output, there are neither maintainer nor supporter for the following driver core headers: include/linux/device.h include/linux/device/ Add them to DRIVER CORE maintainers. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-drv_core_hdr-v1-1-8205b0483e3f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f..23ab17594359 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7098,7 +7098,9 @@ F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ F: fs/sysfs/ +F: include/linux/device/ F: include/linux/debugfs.h +F: include/linux/device.h F: include/linux/fwnode.h F: include/linux/kobj* F: include/linux/property.h From 177cbd5249b1af0b92e47fbd480f277cf3a0598d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 5 Feb 2025 12:58:52 -0800 Subject: [PATCH 0351/2157] drivers: base: component: Allow more space for device name Some drivers use - as the aggregate device name which uses more than 20 chars, causing the status not to be aligned correctly. Example for mei_gsc_proxy on LNL: Before: aggregate_device name status ------------------------------------------------------------- 0000:00:16.0-0f73db04-97ab-4125-b893-e904ad0d5464 bound After: aggregate_device name status ----------------------------------------------------------------------- 0000:00:16.0-0f73db04-97ab-4125-b893-e904ad0d5464 bound Give it 10 more chars for proper alignment. Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250205205851.2355820-2-lucas.demarchi@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/component.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/component.c b/drivers/base/component.c index 741497324d78..747c5542c70f 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -87,17 +87,17 @@ static int component_devices_show(struct seq_file *s, void *data) size_t i; mutex_lock(&component_mutex); - seq_printf(s, "%-40s %20s\n", "aggregate_device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); - seq_printf(s, "%-40s %20s\n\n", + seq_printf(s, "%-50s %20s\n", "aggregate_device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n\n", dev_name(m->parent), m->bound ? "bound" : "not bound"); - seq_printf(s, "%-40s %20s\n", "device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n", "device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { struct component *component = match->compare[i].component; - seq_printf(s, "%-40s %20s\n", + seq_printf(s, "%-50s %20s\n", component ? dev_name(component->dev) : "(unknown)", component ? (component->bound ? "bound" : "not bound") : "not registered"); } From 1d2d45b62784e81b6e08ec0ab7cca4eaa23ff581 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 11 Feb 2025 14:24:09 +0100 Subject: [PATCH 0352/2157] driver core: location: Use str_yes_no() helper function Remove hard-coded strings by using the str_yes_no() helper function. Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250211132409.700073-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/base/physical_location.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/physical_location.c b/drivers/base/physical_location.c index 5db06e825c94..a5539e294d4d 100644 --- a/drivers/base/physical_location.c +++ b/drivers/base/physical_location.c @@ -7,6 +7,7 @@ #include #include +#include #include "physical_location.h" @@ -116,7 +117,7 @@ static ssize_t dock_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->dock ? "yes" : "no"); + str_yes_no(dev->physical_location->dock)); } static DEVICE_ATTR_RO(dock); @@ -124,7 +125,7 @@ static ssize_t lid_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->lid ? "yes" : "no"); + str_yes_no(dev->physical_location->lid)); } static DEVICE_ATTR_RO(lid); From 7de24e20a7aa83295e567982b0b29f3b53152759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 14 Jan 2025 22:25:14 +0100 Subject: [PATCH 0353/2157] cxl/port: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20250114-sysfs-const-bin_attr-cxl-v1-1-5afa23fe2a52@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/port.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d2bfd1ff5492..a35fc5552845 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -153,7 +153,7 @@ static int cxl_port_probe(struct device *dev) } static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -170,7 +170,7 @@ static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, port->cdat.length); } -static BIN_ATTR_ADMIN_RO(CDAT, 0); +static const BIN_ATTR_ADMIN_RO(CDAT, 0); static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, const struct bin_attribute *attr, int i) @@ -184,13 +184,13 @@ static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, return 0; } -static struct bin_attribute *cxl_cdat_bin_attributes[] = { +static const struct bin_attribute *const cxl_cdat_bin_attributes[] = { &bin_attr_CDAT, NULL, }; -static struct attribute_group cxl_cdat_attribute_group = { - .bin_attrs = cxl_cdat_bin_attributes, +static const struct attribute_group cxl_cdat_attribute_group = { + .bin_attrs_new = cxl_cdat_bin_attributes, .is_bin_visible = cxl_port_bin_attr_is_visible, }; From 14e694dbf285e74af97d50212e3272d0d22ea653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:14 +0100 Subject: [PATCH 0354/2157] firmware: dmi: Mark bin_attributes as __ro_after_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attributes are only modified during the __init phase. Protect them against accidental or intentional modifications afterwards. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-1-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi_scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index fde0656481cc..70d39adf50dc 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -761,8 +761,8 @@ static void __init dmi_scan_machine(void) pr_info("DMI not present or invalid.\n"); } -static BIN_ATTR_SIMPLE_ADMIN_RO(smbios_entry_point); -static BIN_ATTR_SIMPLE_ADMIN_RO(DMI); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(smbios_entry_point); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(DMI); static int __init dmi_init(void) { From 80d3989b9ce3b05f7a12cc5c4c8c565fc4ed88f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:15 +0100 Subject: [PATCH 0355/2157] firmware: dmi: Define bin_attributes through macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro makes the code shorter and simplifies constification of the callback arguments. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-2-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi-sysfs.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 8d91997036e4..6baa921a6664 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -431,9 +431,9 @@ static ssize_t dmi_sel_raw_read_helper(struct dmi_sysfs_entry *entry, } } -static ssize_t dmi_sel_raw_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) +static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj->parent); struct dmi_read_state state = { @@ -445,10 +445,7 @@ static ssize_t dmi_sel_raw_read(struct file *filp, struct kobject *kobj, return find_dmi_entry(entry, dmi_sel_raw_read_helper, &state); } -static struct bin_attribute dmi_sel_raw_attr = { - .attr = {.name = "raw_event_log", .mode = 0400}, - .read = dmi_sel_raw_read, -}; +static BIN_ATTR_ADMIN_RO(raw_event_log, 0); static int dmi_system_event_log(struct dmi_sysfs_entry *entry) { @@ -464,7 +461,7 @@ static int dmi_system_event_log(struct dmi_sysfs_entry *entry) if (ret) goto out_free; - ret = sysfs_create_bin_file(entry->child, &dmi_sel_raw_attr); + ret = sysfs_create_bin_file(entry->child, &bin_attr_raw_event_log); if (ret) goto out_del; @@ -537,10 +534,10 @@ static ssize_t dmi_entry_raw_read_helper(struct dmi_sysfs_entry *entry, &state->pos, dh, entry_length); } -static ssize_t dmi_entry_raw_read(struct file *filp, - struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) +static ssize_t raw_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj); struct dmi_read_state state = { @@ -552,10 +549,7 @@ static ssize_t dmi_entry_raw_read(struct file *filp, return find_dmi_entry(entry, dmi_entry_raw_read_helper, &state); } -static const struct bin_attribute dmi_entry_raw_attr = { - .attr = {.name = "raw", .mode = 0400}, - .read = dmi_entry_raw_read, -}; +static const BIN_ATTR_ADMIN_RO(raw, 0); static void dmi_sysfs_entry_release(struct kobject *kobj) { @@ -630,7 +624,7 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh, goto out_err; /* Create the raw binary file to access the entry */ - *ret = sysfs_create_bin_file(&entry->kobj, &dmi_entry_raw_attr); + *ret = sysfs_create_bin_file(&entry->kobj, &bin_attr_raw); if (*ret) goto out_err; From 1c83b02c91c1cc42c3030788f437fe403c7e3b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:16 +0100 Subject: [PATCH 0356/2157] firmware: dmi: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-3-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 6baa921a6664..9cc963b2edc0 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -432,7 +432,7 @@ static ssize_t dmi_sel_raw_read_helper(struct dmi_sysfs_entry *entry, } static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj->parent); @@ -445,7 +445,7 @@ static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, return find_dmi_entry(entry, dmi_sel_raw_read_helper, &state); } -static BIN_ATTR_ADMIN_RO(raw_event_log, 0); +static const BIN_ATTR_ADMIN_RO(raw_event_log, 0); static int dmi_system_event_log(struct dmi_sysfs_entry *entry) { @@ -536,7 +536,7 @@ static ssize_t dmi_entry_raw_read_helper(struct dmi_sysfs_entry *entry, static ssize_t raw_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj); From 7787bfb3b0ea62432d3ffcd31eb66daec4b462ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:47 +0100 Subject: [PATCH 0357/2157] drm/sysfs: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-1-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index fb3bbb6adcd1..60c1f26edb6f 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -261,7 +261,7 @@ static ssize_t enabled_show(struct device *device, } static ssize_t edid_show(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *connector_dev = kobj_to_dev(kobj); @@ -315,21 +315,21 @@ static struct attribute *connector_dev_attrs[] = { NULL }; -static struct bin_attribute edid_attr = { +static const struct bin_attribute edid_attr = { .attr.name = "edid", .attr.mode = 0444, .size = 0, - .read = edid_show, + .read_new = edid_show, }; -static struct bin_attribute *connector_bin_attrs[] = { +static const struct bin_attribute *const connector_bin_attrs[] = { &edid_attr, NULL }; static const struct attribute_group connector_dev_group = { .attrs = connector_dev_attrs, - .bin_attrs = connector_bin_attrs, + .bin_attrs_new = connector_bin_attrs, }; static const struct attribute_group *connector_dev_groups[] = { From cf3864d84fe98b7f7ff37a90156c3fc8f2c0067e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:48 +0100 Subject: [PATCH 0358/2157] drm/lima: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-2-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/lima/lima_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 2067c5b65c57..11ace5cebf4c 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -310,7 +310,7 @@ static bool lima_read_block(struct lima_block_reader *reader, } static ssize_t lima_error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -336,7 +336,7 @@ static ssize_t lima_error_state_read(struct file *filp, struct kobject *kobj, } static ssize_t lima_error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -362,8 +362,8 @@ static const struct bin_attribute lima_error_state_attr = { .attr.name = "error", .attr.mode = 0600, .size = 0, - .read = lima_error_state_read, - .write = lima_error_state_write, + .read_new = lima_error_state_read, + .write_new = lima_error_state_write, }; static int lima_pdev_probe(struct platform_device *pdev) From e3626a456599304f59f827494746940ec025ad6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:49 +0100 Subject: [PATCH 0359/2157] drm/i915: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Andi Shyti Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-3-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- drivers/gpu/drm/i915/i915_sysfs.c | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 819ab933bb10..a6613eed3398 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2490,7 +2490,7 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) } static ssize_t error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -2526,7 +2526,7 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, } static ssize_t error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -2542,8 +2542,8 @@ static const struct bin_attribute error_state_attr = { .attr.name = "error", .attr.mode = S_IRUSR | S_IWUSR, .size = 0, - .read = error_state_read, - .write = error_state_write, + .read_new = error_state_read, + .write_new = error_state_write, }; void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 8775beab9cb8..f936e8f1f129 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -60,7 +60,7 @@ static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) static ssize_t i915_l3_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -88,7 +88,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj, static ssize_t i915_l3_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -140,8 +140,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute dpf_attrs = { .attr = {.name = "l3_parity", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read = i915_l3_read, - .write = i915_l3_write, + .read_new = i915_l3_read, + .write_new = i915_l3_write, .mmap = NULL, .private = (void *)0 }; @@ -149,8 +149,8 @@ static const struct bin_attribute dpf_attrs = { static const struct bin_attribute dpf_attrs_1 = { .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read = i915_l3_read, - .write = i915_l3_write, + .read_new = i915_l3_read, + .write_new = i915_l3_write, .mmap = NULL, .private = (void *)1 }; From 2d0f5001b61c4831d413d12c10caed0e99d73b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:50 +0100 Subject: [PATCH 0360/2157] drm/amdgpu: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-4-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 ++++++------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d100bb7a137c..e6fa63f97687 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -225,7 +225,7 @@ static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -261,8 +261,8 @@ static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, return bytes_read; } -BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, - AMDGPU_SYS_REG_STATE_END); +static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e5fc80ed06ea..bb02846797eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -4000,7 +4000,7 @@ int is_psp_fw_valid(struct psp_bin_desc bin) } static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4036,7 +4036,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, } static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4088,11 +4088,11 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, * Writing to this file will stage an IFWI for update. Reading from this file * will trigger the update process. */ -static struct bin_attribute psp_vbflash_bin_attr = { +static const struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, - .write = amdgpu_psp_vbflash_write, - .read = amdgpu_psp_vbflash_read, + .write_new = amdgpu_psp_vbflash_write, + .read_new = amdgpu_psp_vbflash_read, }; /** @@ -4119,7 +4119,7 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, } static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); -static struct bin_attribute *bin_flash_attrs[] = { +static const struct bin_attribute *const bin_flash_attrs[] = { &psp_vbflash_bin_attr, NULL }; @@ -4155,7 +4155,7 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, - .bin_attrs = bin_flash_attrs, + .bin_attrs_new = bin_flash_attrs, .is_bin_visible = amdgpu_bin_flash_attr_is_visible, .is_visible = amdgpu_flash_attr_is_visible, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e4..83a5f7180cde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1733,7 +1733,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, - struct kobject *kobj, struct bin_attribute *attr, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct amdgpu_ras *con = @@ -2065,8 +2065,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) /* debugfs end */ /* ras fs */ -static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, - amdgpu_ras_sysfs_badpages_read, NULL, 0); +static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, + amdgpu_ras_sysfs_badpages_read, NULL, 0); static DEVICE_ATTR(features, S_IRUGO, amdgpu_ras_sysfs_features_read, NULL); static DEVICE_ATTR(version, 0444, @@ -2088,7 +2088,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) &con->event_state_attr.attr, NULL }; - struct bin_attribute *bin_attrs[] = { + const struct bin_attribute *bin_attrs[] = { NULL, NULL, }; @@ -2114,11 +2114,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ - bin_attr_gpu_vram_bad_pages.private = NULL; con->badpages_attr = bin_attr_gpu_vram_bad_pages; + sysfs_bin_attr_init(&con->badpages_attr); bin_attrs[0] = &con->badpages_attr; - group.bin_attrs = bin_attrs; - sysfs_bin_attr_init(bin_attrs[0]); + group.bin_attrs_new = bin_attrs; } r = sysfs_create_group(&adev->dev->kobj, &group); From 600aa8d31af9bf46c62ca0375cc2abb4f1d20c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:51 +0100 Subject: [PATCH 0361/2157] drm/amd/display: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Harry Wentland Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-5-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541..e27d07739632 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -614,7 +614,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) * incorrect/corrupted and we should correct our SRM by getting it from PSP */ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; @@ -638,7 +638,7 @@ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, } static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; @@ -698,8 +698,8 @@ static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, static const struct bin_attribute data_attr = { .attr = {.name = "hdcp_srm", .mode = 0664}, .size = PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, /* Limit SRM size */ - .write = srm_data_write, - .read = srm_data_read, + .write_new = srm_data_write, + .read_new = srm_data_read, }; struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, From 4ab0279857bb0b1c7a1ed61186527e33db693b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 15:10:53 +0100 Subject: [PATCH 0362/2157] fsi: core: Use const 'struct bin_attribute' callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now provides callback variants that explicitly take a const pointer. Make use of it to match the attribute definition. Signed-off-by: Thomas Weißschuh Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-fsi-v1-1-b717f76a0146@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/fsi-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index e2e1e9df6115..50e8736039fe 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -554,7 +554,7 @@ static unsigned long aligned_access_size(size_t offset, size_t count) } static ssize_t fsi_slave_sysfs_raw_read(struct file *file, - struct kobject *kobj, struct bin_attribute *attr, char *buf, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct fsi_slave *slave = to_fsi_slave(kobj_to_dev(kobj)); @@ -581,7 +581,7 @@ static ssize_t fsi_slave_sysfs_raw_read(struct file *file, } static ssize_t fsi_slave_sysfs_raw_write(struct file *file, - struct kobject *kobj, struct bin_attribute *attr, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct fsi_slave *slave = to_fsi_slave(kobj_to_dev(kobj)); @@ -613,8 +613,8 @@ static const struct bin_attribute fsi_slave_raw_attr = { .mode = 0600, }, .size = 0, - .read = fsi_slave_sysfs_raw_read, - .write = fsi_slave_sysfs_raw_write, + .read_new = fsi_slave_sysfs_raw_read, + .write_new = fsi_slave_sysfs_raw_write, }; static void fsi_slave_release(struct device *dev) From f800cc58598eb4564cabcc4129d5d1a6f7f598b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 13:25:12 +0100 Subject: [PATCH 0363/2157] accel/habanalabs: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-habanalabs-v1-1-b35463197efb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/accel/habanalabs/common/sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index e9f8ccc0bbf9..9d58efa2ff38 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -368,7 +368,7 @@ static ssize_t max_power_store(struct device *dev, } static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t offset, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { struct device *dev = kobj_to_dev(kobj); @@ -443,10 +443,10 @@ static DEVICE_ATTR_RO(security_enabled); static DEVICE_ATTR_RO(module_id); static DEVICE_ATTR_RO(parent_device); -static struct bin_attribute bin_attr_eeprom = { +static const struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, .size = PAGE_SIZE, - .read = eeprom_read_handler + .read_new = eeprom_read_handler }; static struct attribute *hl_dev_attrs[] = { @@ -472,14 +472,14 @@ static struct attribute *hl_dev_attrs[] = { NULL, }; -static struct bin_attribute *hl_dev_bin_attrs[] = { +static const struct bin_attribute *const hl_dev_bin_attrs[] = { &bin_attr_eeprom, NULL }; static struct attribute_group hl_dev_attr_group = { .attrs = hl_dev_attrs, - .bin_attrs = hl_dev_bin_attrs, + .bin_attrs_new = hl_dev_bin_attrs, }; static struct attribute_group hl_dev_clks_attr_group; From f9c883f0df2765c0cb1db3c306a36787ccb2055a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 22 Dec 2024 21:00:43 +0100 Subject: [PATCH 0364/2157] Input: goodix-berlin - constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241222-sysfs-const-bin_attr-input-v1-1-1229dbe5ae71@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/goodix_berlin_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 3fc03cf0ca23..9b53d98055e9 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -673,7 +673,7 @@ static void goodix_berlin_power_off_act(void *data) } static ssize_t registers_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -686,7 +686,7 @@ static ssize_t registers_read(struct file *filp, struct kobject *kobj, } static ssize_t registers_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -698,15 +698,15 @@ static ssize_t registers_write(struct file *filp, struct kobject *kobj, return error ? error : count; } -static BIN_ATTR_ADMIN_RW(registers, 0); +static const BIN_ATTR_ADMIN_RW(registers, 0); -static struct bin_attribute *goodix_berlin_bin_attrs[] = { +static const struct bin_attribute *const goodix_berlin_bin_attrs[] = { &bin_attr_registers, NULL, }; static const struct attribute_group goodix_berlin_attr_group = { - .bin_attrs = goodix_berlin_bin_attrs, + .bin_attrs_new = goodix_berlin_bin_attrs, }; const struct attribute_group *goodix_berlin_groups[] = { From ae7a15fb2920844e61cc71199cd1a08795716c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 15:15:48 +0100 Subject: [PATCH 0365/2157] efi/mokvar: Use const 'struct bin_attribute' callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now provides callback variants that explicitly take a const pointer. Use them so the non-const variants can be removed. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-mokvar-v1-1-d5a3d1fff8d1@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/mokvar-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 5ed0602c2f75..59b090849a2a 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -266,7 +266,7 @@ struct efi_mokvar_table_entry *efi_mokvar_entry_find(const char *name) * amount of data in this mokvar config table entry. */ static ssize_t efi_mokvar_sysfs_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct efi_mokvar_table_entry *mokvar_entry = bin_attr->private; @@ -343,7 +343,7 @@ static int __init efi_mokvar_sysfs_init(void) mokvar_sysfs->bin_attr.attr.name = mokvar_entry->name; mokvar_sysfs->bin_attr.attr.mode = 0400; mokvar_sysfs->bin_attr.size = mokvar_entry->data_size; - mokvar_sysfs->bin_attr.read = efi_mokvar_sysfs_read; + mokvar_sysfs->bin_attr.read_new = efi_mokvar_sysfs_read; err = sysfs_create_bin_file(mokvar_kobj, &mokvar_sysfs->bin_attr); From 05a9896fa9e15466456a1b1dc9d2eacdf3551b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 14:18:59 +0100 Subject: [PATCH 0366/2157] pcmcia: cistpl: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-pcmcia-v1-1-ebb82e47d834@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/cistpl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index d018f36f3a89..0c801e4ccc6c 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1540,7 +1540,7 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf, static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { unsigned int size = 0x200; @@ -1571,7 +1571,7 @@ static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj, static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pcmcia_socket *s; @@ -1605,6 +1605,6 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj, const struct bin_attribute pccard_cis_attr = { .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR }, .size = 0x200, - .read = pccard_show_cis, - .write = pccard_store_cis, + .read_new = pccard_show_cis, + .write_new = pccard_store_cis, }; From 10f10210f674a79b1214abae58077475f5de81b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:43 +0100 Subject: [PATCH 0367/2157] powerpc/secvar: Mark __init functions as such MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The setup functions are only called during the init phase of the kernel. They can be discarded and their memory reused after that. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-1-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/secvar-sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index fbeb1cbac01b..b7536fbe8c4f 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -130,7 +130,7 @@ static const struct kobj_type secvar_ktype = { .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { u64 varsize; @@ -145,7 +145,7 @@ static int update_kobj_size(void) return 0; } -static int secvar_sysfs_config(struct kobject *kobj) +static __init int secvar_sysfs_config(struct kobject *kobj) { struct attribute_group config_group = { .name = "config", @@ -158,7 +158,7 @@ static int secvar_sysfs_config(struct kobject *kobj) return 0; } -static int add_var(const char *name) +static __init int add_var(const char *name) { struct kobject *kobj; int rc; @@ -181,7 +181,7 @@ static int add_var(const char *name) return 0; } -static int secvar_sysfs_load(void) +static __init int secvar_sysfs_load(void) { u64 namesize = 0; char *name; @@ -209,7 +209,7 @@ static int secvar_sysfs_load(void) return rc; } -static int secvar_sysfs_load_static(void) +static __init int secvar_sysfs_load_static(void) { const char * const *name_ptr = secvar_ops->var_names; int rc; @@ -224,7 +224,7 @@ static int secvar_sysfs_load_static(void) return 0; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_init(void) { u64 max_size; int rc; From 982d13db108c9a30b98d1eb3445011dbc5616532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:44 +0100 Subject: [PATCH 0368/2157] powerpc/secvar: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-2-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/secvar-sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index b7536fbe8c4f..afb690a172b4 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -52,7 +52,7 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { char *data; @@ -85,7 +85,7 @@ static ssize_t data_read(struct file *filep, struct kobject *kobj, } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -104,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -121,7 +121,7 @@ static struct attribute *secvar_attrs[] = { static const struct attribute_group secvar_attr_group = { .attrs = secvar_attrs, - .bin_attrs = secvar_bin_attrs, + .bin_attrs_new = secvar_bin_attrs, }; __ATTRIBUTE_GROUPS(secvar_attr); From f629576662e024f57cb2c8d4ca6f297db9b904a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:45 +0100 Subject: [PATCH 0369/2157] powerpc/powernv/ultravisor: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-3-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/ultravisor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index 67c8c4b2d8b1..157d9a8134e4 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -32,15 +32,15 @@ int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, static struct memcons *uv_memcons; static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return memcons_copy(uv_memcons, to, pos, count); } -static struct bin_attribute uv_msglog_attr = { +static struct bin_attribute uv_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = uv_msglog_read + .read_new = uv_msglog_read }; static int __init uv_init(void) From f2b62c03a28279cbeeb34d12037740a7e7703a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:46 +0100 Subject: [PATCH 0370/2157] powerpc/powernv/opal: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-4-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-core.c | 10 +++++----- arch/powerpc/platforms/powernv/opal-dump.c | 4 ++-- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++-- arch/powerpc/platforms/powernv/opal-flash.c | 4 ++-- arch/powerpc/platforms/powernv/opal-msglog.c | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index c9a9b759cc92..d95a5f67211b 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -159,7 +159,7 @@ static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, * Returns number of bytes read on success, -errno on failure. */ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { struct opalcore *m; @@ -206,9 +206,9 @@ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, return (tpos - pos); } -static struct bin_attribute opal_core_attr = { +static struct bin_attribute opal_core_attr __ro_after_init = { .attr = {.name = "core", .mode = 0400}, - .read = read_opalcore + .read_new = read_opalcore }; /* @@ -599,7 +599,7 @@ static struct attribute *mpipl_attr[] = { NULL, }; -static struct bin_attribute *mpipl_bin_attr[] = { +static const struct bin_attribute *const mpipl_bin_attr[] = { &opal_core_attr, NULL, @@ -607,7 +607,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, - .bin_attrs = mpipl_bin_attr, + .bin_attrs_new = mpipl_bin_attr, }; static int __init opalcore_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 608e4b68c5ea..27e25693cf39 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -286,7 +286,7 @@ static int64_t dump_read_data(struct dump_obj *dump) } static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { ssize_t rc; @@ -342,7 +342,7 @@ static void create_dump_obj(uint32_t id, size_t size, uint32_t type) dump->dump_attr.attr.name = "dump"; dump->dump_attr.attr.mode = 0400; dump->dump_attr.size = size; - dump->dump_attr.read = dump_attr_read; + dump->dump_attr.read_new = dump_attr_read; dump->id = id; dump->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5db1e733143b..de33f354e9fd 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -156,7 +156,7 @@ static const struct kobj_type elog_ktype = { #define OPAL_MAX_ERRLOG_SIZE 16384 static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int opal_rc; @@ -203,7 +203,7 @@ static void create_elog_obj(uint64_t id, size_t size, uint64_t type) elog->raw_attr.attr.name = "raw"; elog->raw_attr.attr.mode = 0400; elog->raw_attr.size = size; - elog->raw_attr.read = raw_attr_read; + elog->raw_attr.read_new = raw_attr_read; elog->id = id; elog->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index d5ea04e8e4c5..fd8c8621e973 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -432,7 +432,7 @@ static int alloc_image_buf(char *buffer, size_t count) * and pre-allocate required memory. */ static ssize_t image_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int rc; @@ -493,7 +493,7 @@ static ssize_t image_data_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute image_data_attr = { .attr = {.name = "image", .mode = 0200}, .size = MAX_IMAGE_SIZE, /* Limit image size */ - .write = image_data_write, + .write_new = image_data_write, }; static struct kobj_attribute validate_attribute = diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 22d6efe17b0d..f1988d0ab45c 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -94,15 +94,15 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) } static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return opal_msglog_copy(to, pos, count); } -static struct bin_attribute opal_msglog_attr = { +static struct bin_attribute opal_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = opal_msglog_read + .read_new = opal_msglog_read }; struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) From 4aad348d0fa62f816ae3e9dfbfc6663443357c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:47 +0100 Subject: [PATCH 0371/2157] powerpc/perf/hv-24x7: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-5-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/hv-24x7.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index d400fa391c27..b0768f3d2893 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -998,7 +998,7 @@ static int create_events_from_catalog(struct attribute ***events_, } static ssize_t catalog_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { long hret; @@ -1108,14 +1108,14 @@ PAGE_0_ATTR(catalog_version, "%lld\n", (unsigned long long)be64_to_cpu(page_0->version)); PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); -static BIN_ATTR_RO(catalog, 0/* real length varies */); +static const BIN_ATTR_RO(catalog, 0/* real length varies */); static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); static DEVICE_ATTR_RO(cpumask); -static struct bin_attribute *if_bin_attrs[] = { +static const struct bin_attribute *const if_bin_attrs[] = { &bin_attr_catalog, NULL, }; @@ -1141,7 +1141,7 @@ static struct attribute *if_attrs[] = { static const struct attribute_group if_group = { .name = "interface", - .bin_attrs = if_bin_attrs, + .bin_attrs_new = if_bin_attrs, .attrs = if_attrs, }; From 80f756cabfbf81d0d629b45eb8e3f9f0196728d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 14 Jan 2025 22:50:20 +0100 Subject: [PATCH 0372/2157] firmware: qemu_fw_cfg: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Acked-by: Gabriel Somlo Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20250114-sysfs-const-bin_attr-qemu_fw_cfg-v1-1-76f525a3ee72@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qemu_fw_cfg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index d58da3e4500a..2615fb780e3c 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -460,7 +460,7 @@ static const struct kobj_type fw_cfg_sysfs_entry_ktype = { /* raw-read method and attribute */ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct fw_cfg_sysfs_entry *entry = to_entry(kobj); @@ -474,9 +474,9 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj, return fw_cfg_read_blob(entry->select, buf, pos, count); } -static struct bin_attribute fw_cfg_sysfs_attr_raw = { +static const struct bin_attribute fw_cfg_sysfs_attr_raw = { .attr = { .name = "raw", .mode = S_IRUSR }, - .read = fw_cfg_sysfs_read_raw, + .read_new = fw_cfg_sysfs_read_raw, }; /* From 5d0fbf548cbfcd1d7559b2daddedbeaa212d477f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 13:22:54 +0100 Subject: [PATCH 0373/2157] rapidio: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-rapidio-v1-1-0f47f4719683@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/rio-sysfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 90d391210533..6f89b232f1d5 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -114,7 +114,7 @@ static struct attribute *rio_dev_attrs[] = { static ssize_t rio_read_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(kobj_to_dev(kobj)); @@ -185,7 +185,7 @@ rio_read_config(struct file *filp, struct kobject *kobj, static ssize_t rio_write_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(kobj_to_dev(kobj)); @@ -241,17 +241,17 @@ rio_write_config(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute rio_config_attr = { +static const struct bin_attribute rio_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, }, .size = RIO_MAINT_SPACE_SZ, - .read = rio_read_config, - .write = rio_write_config, + .read_new = rio_read_config, + .write_new = rio_write_config, }; -static struct bin_attribute *rio_dev_bin_attrs[] = { +static const struct bin_attribute *const rio_dev_bin_attrs[] = { &rio_config_attr, NULL, }; @@ -278,7 +278,7 @@ static umode_t rio_dev_is_attr_visible(struct kobject *kobj, static const struct attribute_group rio_dev_group = { .attrs = rio_dev_attrs, .is_visible = rio_dev_is_attr_visible, - .bin_attrs = rio_dev_bin_attrs, + .bin_attrs_new = rio_dev_bin_attrs, }; const struct attribute_group *rio_dev_groups[] = { From e965efc4aa14d9195d26a956a6bff5041110a155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 22 Nov 2024 11:31:15 +0100 Subject: [PATCH 0374/2157] efi: rci2: mark bin_attribute as __ro_after_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attribute is only modified during __init phase. Protect it against accidental or intentional modifications afterwards. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241122-sysfs-const-bin_attr-rci2-v1-1-3db1ec9aa203@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/rci2-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c index 4fd45d6f69a4..c1bedd244817 100644 --- a/drivers/firmware/efi/rci2-table.c +++ b/drivers/firmware/efi/rci2-table.c @@ -40,7 +40,7 @@ static u8 *rci2_base; static u32 rci2_table_len; unsigned long rci2_table_phys __ro_after_init = EFI_INVALID_TABLE_ADDR; -static BIN_ATTR_SIMPLE_ADMIN_RO(rci2); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(rci2); static u16 checksum(void) { From c749f058b4371430a8338e1ca72b9ae38fef613b Mon Sep 17 00:00:00 2001 From: Kannappan R Date: Thu, 20 Feb 2025 16:13:39 +0200 Subject: [PATCH 0375/2157] USB: core: Add eUSB2 descriptor and parsing in USB core Add support for the 'eUSB2 Isochronous Endpoint Companion Descriptor' introduced in the recent USB 2.0 specification 'USB 2.0 Double Isochronous IN Bandwidth' ECN. It allows embedded USB2 (eUSB2) devices to report and use higher bandwidths for isochronous IN transfers in order to support higher camera resolutions on the lid of laptops and tablets with minimal change to the USB2 protocol. The motivation for expanding USB 2.0 is further clarified in an additional Embedded USB2 version 2.0 (eUSB2v2) supplement to the USB 2.0 specification. It points out this is optimized for performance, power and cost by using the USB 2.0 low-voltage, power efficient PHY and half-duplex link for the asymmetric camera bandwidth needs, avoiding the costly and complex full-duplex USB 3.x symmetric link and gigabit receivers. eUSB2 devices that support the higher isochronous IN bandwidth and the new descriptor can be identified by their device descriptor bcdUSB value of 0x0220 Co-developed-by: Amardeep Rai Signed-off-by: Amardeep Rai Signed-off-by: Kannappan R Co-developed-by: Mathias Nyman Signed-off-by: Mathias Nyman Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250220141339.1939448-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 51 ++++++++++++++++++++++++++++++++---- include/linux/usb.h | 8 +++--- include/uapi/linux/usb/ch9.h | 15 +++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index f7bf8d1de3ad..13bd4ec4ea5f 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -64,6 +64,37 @@ static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev, memcpy(&ep->ssp_isoc_ep_comp, desc, USB_DT_SSP_ISOC_EP_COMP_SIZE); } +static void usb_parse_eusb2_isoc_endpoint_companion(struct device *ddev, + int cfgno, int inum, int asnum, struct usb_host_endpoint *ep, + unsigned char *buffer, int size) +{ + struct usb_eusb2_isoc_ep_comp_descriptor *desc; + struct usb_descriptor_header *h; + + /* + * eUSB2 isochronous endpoint companion descriptor for this endpoint + * shall be declared before the next endpoint or interface descriptor + */ + while (size >= USB_DT_EUSB2_ISOC_EP_COMP_SIZE) { + h = (struct usb_descriptor_header *)buffer; + + if (h->bDescriptorType == USB_DT_EUSB2_ISOC_ENDPOINT_COMP) { + desc = (struct usb_eusb2_isoc_ep_comp_descriptor *)buffer; + ep->eusb2_isoc_ep_comp = *desc; + return; + } + if (h->bDescriptorType == USB_DT_ENDPOINT || + h->bDescriptorType == USB_DT_INTERFACE) + break; + + buffer += h->bLength; + size -= h->bLength; + } + + dev_notice(ddev, "No eUSB2 isoc ep %d companion for config %d interface %d altsetting %d\n", + ep->desc.bEndpointAddress, cfgno, inum, asnum); +} + static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, int inum, int asnum, struct usb_host_endpoint *ep, unsigned char *buffer, int size) @@ -258,8 +289,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int n, i, j, retval; unsigned int maxp; const unsigned short *maxpacket_maxes; + u16 bcdUSB; d = (struct usb_endpoint_descriptor *) buffer; + bcdUSB = le16_to_cpu(udev->descriptor.bcdUSB); buffer += d->bLength; size -= d->bLength; @@ -409,15 +442,17 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, /* * Validate the wMaxPacketSize field. - * Some devices have isochronous endpoints in altsetting 0; - * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 - * (see the end of section 5.6.3), so don't warn about them. + * eUSB2 devices (see USB 2.0 Double Isochronous IN ECN 9.6.6 Endpoint) + * and devices with isochronous endpoints in altsetting 0 (see USB 2.0 + * end of section 5.6.3) have wMaxPacketSize = 0. + * So don't warn about those. */ maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize); - if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { + + if (maxp == 0 && bcdUSB != 0x0220 && + !(usb_endpoint_xfer_isoc(d) && asnum == 0)) dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); - } /* Find the highest legal maxpacket size for this endpoint */ i = 0; /* additional transactions per microframe */ @@ -465,6 +500,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, maxp); } + /* Parse a possible eUSB2 periodic endpoint companion descriptor */ + if (bcdUSB == 0x0220 && d->wMaxPacketSize == 0 && + (usb_endpoint_xfer_isoc(d) || usb_endpoint_xfer_int(d))) + usb_parse_eusb2_isoc_endpoint_companion(ddev, cfgno, inum, asnum, + endpoint, buffer, size); + /* Parse a possible SuperSpeed endpoint companion descriptor */ if (udev->speed >= USB_SPEED_SUPER) usb_parse_ss_endpoint_companion(ddev, cfgno, diff --git a/include/linux/usb.h b/include/linux/usb.h index cfa8005e24f9..b46738701f8d 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -51,6 +51,7 @@ struct ep_device; * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint * @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint + * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint * @urb_list: urbs queued to this endpoint; maintained by usbcore * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH) * with one or more transfer descriptors (TDs) per urb @@ -64,9 +65,10 @@ struct ep_device; * descriptor within an active interface in a given USB configuration. */ struct usb_host_endpoint { - struct usb_endpoint_descriptor desc; - struct usb_ss_ep_comp_descriptor ss_ep_comp; - struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_endpoint_descriptor desc; + struct usb_ss_ep_comp_descriptor ss_ep_comp; + struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_eusb2_isoc_ep_comp_descriptor eusb2_isoc_ep_comp; struct list_head urb_list; void *hcpriv; struct ep_device *ep_dev; /* For sysfs info */ diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 91f0f7e214a5..475af9358173 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -253,6 +253,9 @@ struct usb_ctrlrequest { #define USB_DT_BOS 0x0f #define USB_DT_DEVICE_CAPABILITY 0x10 #define USB_DT_WIRELESS_ENDPOINT_COMP 0x11 +/* From the eUSB2 spec */ +#define USB_DT_EUSB2_ISOC_ENDPOINT_COMP 0x12 +/* From Wireless USB spec */ #define USB_DT_WIRE_ADAPTER 0x21 /* From USB Device Firmware Upgrade Specification, Revision 1.1 */ #define USB_DT_DFU_FUNCTIONAL 0x21 @@ -675,6 +678,18 @@ static inline int usb_endpoint_interrupt_type( /*-------------------------------------------------------------------------*/ +/* USB_DT_EUSB2_ISOC_ENDPOINT_COMP: eUSB2 Isoch Endpoint Companion descriptor */ +struct usb_eusb2_isoc_ep_comp_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __le16 wMaxPacketSize; + __le32 dwBytesPerInterval; +} __attribute__ ((packed)); + +#define USB_DT_EUSB2_ISOC_EP_COMP_SIZE 8 + +/*-------------------------------------------------------------------------*/ + /* USB_DT_SSP_ISOC_ENDPOINT_COMP: SuperSpeedPlus Isochronous Endpoint Companion * descriptor */ From ac9c5170a18162d45c6edd1f0fa2d2b2504bc2cb Mon Sep 17 00:00:00 2001 From: Subramanian Mohan Date: Wed, 19 Feb 2025 09:36:15 +0530 Subject: [PATCH 0376/2157] pps: generators: replace copy of pps-gen info struct with const pointer Some PPS generator drivers may need to retrieve a pointer to their internal data while executing the PPS generator enable() method. During the driver registration the pps_gen_device pointer is returned from the framework, and for that reason, there is difficulty in getting generator driver data back in the enable function. We won't be able to use container_of macro as it results in static assert, and we might end up in using static pointer. To solve the issue and to get back the generator driver data back, we should not copy the struct pps_gen_source_info within the struct pps_gen_device during the registration stage, but simply save the pointer of the driver one. In this manner, driver may get a pointer to their internal data as shown below: struct pps_gen_foo_data_s { ... struct pps_gen_source_info gen_info; struct pps_gen_device *pps_gen; ... }; static int __init pps_gen_foo_init(void) { struct pps_gen_foo_data_s *foo; ... foo->pps_gen = pps_gen_register_source(&foo->gen_info); ... } Then, in the enable() method, we can retrieve the pointer to the main struct by using the code below: static int pps_gen_foo_enable(struct pps_gen_device *pps_gen, bool enable) { struct pps_gen_foo_data_s *foo = container_of(pps_gen->info, struct pps_gen_foo_data_s, gen_info); ... } Signed-off-by: Rodolfo Giometti Tested-by: Subramanian Mohan Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Subramanian Mohan Link: https://lore.kernel.org/r/20250219040618.70962-2-subramanian.mohan@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/pps.rst | 3 +-- drivers/pps/generators/pps_gen-dummy.c | 2 +- drivers/pps/generators/pps_gen.c | 14 +++++++------- drivers/pps/generators/sysfs.c | 6 +++--- include/linux/pps_gen_kernel.h | 4 ++-- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst index 71ad04c82d6c..04f1b88778fc 100644 --- a/Documentation/driver-api/pps.rst +++ b/Documentation/driver-api/pps.rst @@ -206,8 +206,7 @@ To do so the class pps-gen has been added. PPS generators can be registered in the kernel by defining a struct pps_gen_source_info as follows:: - static struct pps_gen_source_info pps_gen_dummy_info = { - .name = "dummy", + static const struct pps_gen_source_info pps_gen_dummy_info = { .use_system_clock = true, .get_time = pps_gen_dummy_get_time, .enable = pps_gen_dummy_enable, diff --git a/drivers/pps/generators/pps_gen-dummy.c b/drivers/pps/generators/pps_gen-dummy.c index b284c200cbe5..55de4aecf35e 100644 --- a/drivers/pps/generators/pps_gen-dummy.c +++ b/drivers/pps/generators/pps_gen-dummy.c @@ -61,7 +61,7 @@ static int pps_gen_dummy_enable(struct pps_gen_device *pps_gen, bool enable) * The PPS info struct */ -static struct pps_gen_source_info pps_gen_dummy_info = { +static const struct pps_gen_source_info pps_gen_dummy_info = { .use_system_clock = true, .get_time = pps_gen_dummy_get_time, .enable = pps_gen_dummy_enable, diff --git a/drivers/pps/generators/pps_gen.c b/drivers/pps/generators/pps_gen.c index ca592f1736f4..5b8bb454913c 100644 --- a/drivers/pps/generators/pps_gen.c +++ b/drivers/pps/generators/pps_gen.c @@ -66,7 +66,7 @@ static long pps_gen_cdev_ioctl(struct file *file, if (ret) return -EFAULT; - ret = pps_gen->info.enable(pps_gen, status); + ret = pps_gen->info->enable(pps_gen, status); if (ret) return ret; pps_gen->enabled = status; @@ -76,7 +76,7 @@ static long pps_gen_cdev_ioctl(struct file *file, case PPS_GEN_USESYSTEMCLOCK: dev_dbg(pps_gen->dev, "PPS_GEN_USESYSTEMCLOCK\n"); - ret = put_user(pps_gen->info.use_system_clock, uiuarg); + ret = put_user(pps_gen->info->use_system_clock, uiuarg); if (ret) return -EFAULT; @@ -175,7 +175,7 @@ static int pps_gen_register_cdev(struct pps_gen_device *pps_gen) devt = MKDEV(MAJOR(pps_gen_devt), pps_gen->id); cdev_init(&pps_gen->cdev, &pps_gen_cdev_fops); - pps_gen->cdev.owner = pps_gen->info.owner; + pps_gen->cdev.owner = pps_gen->info->owner; err = cdev_add(&pps_gen->cdev, devt, 1); if (err) { @@ -183,8 +183,8 @@ static int pps_gen_register_cdev(struct pps_gen_device *pps_gen) MAJOR(pps_gen_devt), pps_gen->id); goto free_ida; } - pps_gen->dev = device_create(pps_gen_class, pps_gen->info.parent, devt, - pps_gen, "pps-gen%d", pps_gen->id); + pps_gen->dev = device_create(pps_gen_class, pps_gen->info->parent, devt, + pps_gen, "pps-gen%d", pps_gen->id); if (IS_ERR(pps_gen->dev)) { err = PTR_ERR(pps_gen->dev); goto del_cdev; @@ -225,7 +225,7 @@ static void pps_gen_unregister_cdev(struct pps_gen_device *pps_gen) * Return: the PPS generator device in case of success, and ERR_PTR(errno) * otherwise. */ -struct pps_gen_device *pps_gen_register_source(struct pps_gen_source_info *info) +struct pps_gen_device *pps_gen_register_source(const struct pps_gen_source_info *info) { struct pps_gen_device *pps_gen; int err; @@ -235,7 +235,7 @@ struct pps_gen_device *pps_gen_register_source(struct pps_gen_source_info *info) err = -ENOMEM; goto pps_gen_register_source_exit; } - pps_gen->info = *info; + pps_gen->info = info; pps_gen->enabled = false; init_waitqueue_head(&pps_gen->queue); diff --git a/drivers/pps/generators/sysfs.c b/drivers/pps/generators/sysfs.c index faf8b1c6d202..6d6bc0006fea 100644 --- a/drivers/pps/generators/sysfs.c +++ b/drivers/pps/generators/sysfs.c @@ -19,7 +19,7 @@ static ssize_t system_show(struct device *dev, struct device_attribute *attr, { struct pps_gen_device *pps_gen = dev_get_drvdata(dev); - return sysfs_emit(buf, "%d\n", pps_gen->info.use_system_clock); + return sysfs_emit(buf, "%d\n", pps_gen->info->use_system_clock); } static DEVICE_ATTR_RO(system); @@ -30,7 +30,7 @@ static ssize_t time_show(struct device *dev, struct device_attribute *attr, struct timespec64 time; int ret; - ret = pps_gen->info.get_time(pps_gen, &time); + ret = pps_gen->info->get_time(pps_gen, &time); if (ret) return ret; @@ -49,7 +49,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - ret = pps_gen->info.enable(pps_gen, status); + ret = pps_gen->info->enable(pps_gen, status); if (ret) return ret; pps_gen->enabled = status; diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h index 022ea0ac4440..6214c8aa2e02 100644 --- a/include/linux/pps_gen_kernel.h +++ b/include/linux/pps_gen_kernel.h @@ -43,7 +43,7 @@ struct pps_gen_source_info { /* The main struct */ struct pps_gen_device { - struct pps_gen_source_info info; /* PSS generator info */ + const struct pps_gen_source_info *info; /* PSS generator info */ bool enabled; /* PSS generator status */ unsigned int event; @@ -70,7 +70,7 @@ extern const struct attribute_group *pps_gen_groups[]; */ extern struct pps_gen_device *pps_gen_register_source( - struct pps_gen_source_info *info); + const struct pps_gen_source_info *info); extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen); extern void pps_gen_event(struct pps_gen_device *pps_gen, unsigned int event, void *data); From c89755d1111fa17ecfb69b50c336778a2d37dae4 Mon Sep 17 00:00:00 2001 From: Subramanian Mohan Date: Wed, 19 Feb 2025 09:36:16 +0530 Subject: [PATCH 0377/2157] pps: generators: Add PPS Generator TIO Driver The Intel Timed IO PPS generator driver outputs a PPS signal using dedicated hardware that is more accurate than software actuated PPS. The Timed IO hardware generates output events using the ART timer. The ART timer period varies based on platform type, but is less than 100 nanoseconds for all current platforms. Timed IO output accuracy is within 1 ART period. PPS output is enabled by writing '1' the 'enable' sysfs attribute. The driver uses hrtimers to schedule a wake-up 10 ms before each event (edge) target time. At wakeup, the driver converts the target time in terms of CLOCK_REALTIME to ART trigger time and writes this to the Timed IO hardware. The Timed IO hardware generates an event precisely at the requested system time without software involvement. Co-developed-by: Christopher Hall Signed-off-by: Christopher Hall Co-developed-by: Pandith N Signed-off-by: Pandith N Co-developed-by: Thejesh Reddy T R Signed-off-by: Thejesh Reddy T R Signed-off-by: Lakshmi Sowjanya D Reviewed-by: Eddie Dong Reviewed-by: Andy Shevchenko Acked-by: Rodolfo Giometti Signed-off-by: Subramanian Mohan Link: https://lore.kernel.org/r/20250219040618.70962-3-subramanian.mohan@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/pps/generators/Kconfig | 16 ++ drivers/pps/generators/Makefile | 1 + drivers/pps/generators/pps_gen_tio.c | 272 +++++++++++++++++++++++++++ 3 files changed, 289 insertions(+) create mode 100644 drivers/pps/generators/pps_gen_tio.c diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig index cd94bf3bfaf2..b3f340ed3163 100644 --- a/drivers/pps/generators/Kconfig +++ b/drivers/pps/generators/Kconfig @@ -31,4 +31,20 @@ config PPS_GENERATOR_PARPORT utilizes STROBE pin of a parallel port to send PPS signals. It uses parport abstraction layer and hrtimers to precisely control the signal. +config PPS_GENERATOR_TIO + tristate "TIO PPS signal generator" + depends on X86 && CPU_SUP_INTEL + help + If you say yes here you get support for a PPS TIO signal generator + which generates a pulse at a prescribed time based on the system clock. + It uses time translation and hrtimers to precisely generate a pulse. + This hardware is present on 2019 and newer Intel CPUs. However, this + driver is not useful without adding highly specialized hardware outside + the Linux system to observe these pulses. + + To compile this driver as a module, choose M here: the module + will be called pps_gen_tio. + + If unsure, say N. + endif # PPS_GENERATOR diff --git a/drivers/pps/generators/Makefile b/drivers/pps/generators/Makefile index dc1aa5a4688b..e109920e8a2d 100644 --- a/drivers/pps/generators/Makefile +++ b/drivers/pps/generators/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_PPS_GENERATOR) := pps_gen_core.o obj-$(CONFIG_PPS_GENERATOR_DUMMY) += pps_gen-dummy.o obj-$(CONFIG_PPS_GENERATOR_PARPORT) += pps_gen_parport.o +obj-$(CONFIG_PPS_GENERATOR_TIO) += pps_gen_tio.o ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c new file mode 100644 index 000000000000..6c46b46c66cd --- /dev/null +++ b/drivers/pps/generators/pps_gen_tio.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel PPS signal Generator Driver + * + * Copyright (C) 2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define TIOCTL 0x00 +#define TIOCOMPV 0x10 +#define TIOEC 0x30 + +/* Control Register */ +#define TIOCTL_EN BIT(0) +#define TIOCTL_DIR BIT(1) +#define TIOCTL_EP GENMASK(3, 2) +#define TIOCTL_EP_RISING_EDGE FIELD_PREP(TIOCTL_EP, 0) +#define TIOCTL_EP_FALLING_EDGE FIELD_PREP(TIOCTL_EP, 1) +#define TIOCTL_EP_TOGGLE_EDGE FIELD_PREP(TIOCTL_EP, 2) + +/* Safety time to set hrtimer early */ +#define SAFE_TIME_NS (10 * NSEC_PER_MSEC) + +#define MAGIC_CONST (NSEC_PER_SEC - SAFE_TIME_NS) +#define ART_HW_DELAY_CYCLES 2 + +struct pps_tio { + struct pps_gen_source_info gen_info; + struct pps_gen_device *pps_gen; + struct hrtimer timer; + void __iomem *base; + u32 prev_count; + spinlock_t lock; + struct device *dev; +}; + +static inline u32 pps_tio_read(u32 offset, struct pps_tio *tio) +{ + return readl(tio->base + offset); +} + +static inline void pps_ctl_write(u32 value, struct pps_tio *tio) +{ + writel(value, tio->base + TIOCTL); +} + +/* + * For COMPV register, It's safer to write + * higher 32-bit followed by lower 32-bit + */ +static inline void pps_compv_write(u64 value, struct pps_tio *tio) +{ + hi_lo_writeq(value, tio->base + TIOCOMPV); +} + +static inline ktime_t first_event(struct pps_tio *tio) +{ + return ktime_set(ktime_get_real_seconds() + 1, MAGIC_CONST); +} + +static u32 pps_tio_disable(struct pps_tio *tio) +{ + u32 ctrl; + + ctrl = pps_tio_read(TIOCTL, tio); + pps_compv_write(0, tio); + + ctrl &= ~TIOCTL_EN; + pps_ctl_write(ctrl, tio); + tio->pps_gen->enabled = false; + tio->prev_count = 0; + return ctrl; +} + +static void pps_tio_enable(struct pps_tio *tio) +{ + u32 ctrl; + + ctrl = pps_tio_read(TIOCTL, tio); + ctrl |= TIOCTL_EN; + pps_ctl_write(ctrl, tio); + tio->pps_gen->enabled = true; +} + +static void pps_tio_direction_output(struct pps_tio *tio) +{ + u32 ctrl; + + ctrl = pps_tio_disable(tio); + + /* + * We enable the device, be sure that the + * 'compare' value is invalid + */ + pps_compv_write(0, tio); + + ctrl &= ~(TIOCTL_DIR | TIOCTL_EP); + ctrl |= TIOCTL_EP_TOGGLE_EDGE; + pps_ctl_write(ctrl, tio); + pps_tio_enable(tio); +} + +static bool pps_generate_next_pulse(ktime_t expires, struct pps_tio *tio) +{ + u64 art; + + if (!ktime_real_to_base_clock(expires, CSID_X86_ART, &art)) { + pps_tio_disable(tio); + return false; + } + + pps_compv_write(art - ART_HW_DELAY_CYCLES, tio); + return true; +} + +static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer) +{ + ktime_t expires, now; + u32 event_count; + struct pps_tio *tio = container_of(timer, struct pps_tio, timer); + + guard(spinlock)(&tio->lock); + + /* + * Check if any event is missed. + * If an event is missed, TIO will be disabled. + */ + event_count = pps_tio_read(TIOEC, tio); + if (tio->prev_count && tio->prev_count == event_count) + goto err; + tio->prev_count = event_count; + + expires = hrtimer_get_expires(timer); + + now = ktime_get_real(); + if (now - expires >= SAFE_TIME_NS) + goto err; + + tio->pps_gen->enabled = pps_generate_next_pulse(expires + SAFE_TIME_NS, tio); + if (!tio->pps_gen->enabled) + return HRTIMER_NORESTART; + + hrtimer_forward(timer, now, NSEC_PER_SEC / 2); + return HRTIMER_RESTART; + +err: + dev_err(tio->dev, "Event missed, Disabling Timed I/O"); + pps_tio_disable(tio); + pps_gen_event(tio->pps_gen, PPS_GEN_EVENT_MISSEDPULSE, NULL); + return HRTIMER_NORESTART; +} + +static int pps_tio_gen_enable(struct pps_gen_device *pps_gen, bool enable) +{ + struct pps_tio *tio = container_of(pps_gen->info, struct pps_tio, gen_info); + + if (!timekeeping_clocksource_has_base(CSID_X86_ART)) { + dev_err_once(tio->dev, "PPS cannot be used as clock is not related to ART"); + return -ENODEV; + } + + guard(spinlock_irqsave)(&tio->lock); + if (enable && !pps_gen->enabled) { + pps_tio_direction_output(tio); + hrtimer_start(&tio->timer, first_event(tio), HRTIMER_MODE_ABS); + } else if (!enable && pps_gen->enabled) { + hrtimer_cancel(&tio->timer); + pps_tio_disable(tio); + } + + return 0; +} + +static int pps_tio_get_time(struct pps_gen_device *pps_gen, + struct timespec64 *time) +{ + struct system_time_snapshot snap; + + ktime_get_snapshot(&snap); + *time = ktime_to_timespec64(snap.real); + + return 0; +} + +static int pps_gen_tio_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pps_tio *tio; + + if (!(cpu_feature_enabled(X86_FEATURE_TSC_KNOWN_FREQ) && + cpu_feature_enabled(X86_FEATURE_ART))) { + dev_warn(dev, "TSC/ART is not enabled"); + return -ENODEV; + } + + tio = devm_kzalloc(dev, sizeof(*tio), GFP_KERNEL); + if (!tio) + return -ENOMEM; + + tio->gen_info.use_system_clock = true; + tio->gen_info.enable = pps_tio_gen_enable; + tio->gen_info.get_time = pps_tio_get_time; + tio->gen_info.owner = THIS_MODULE; + + tio->pps_gen = pps_gen_register_source(&tio->gen_info); + if (IS_ERR(tio->pps_gen)) + return PTR_ERR(tio->pps_gen); + + tio->dev = dev; + tio->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(tio->base)) + return PTR_ERR(tio->base); + + pps_tio_disable(tio); + hrtimer_init(&tio->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tio->timer.function = hrtimer_callback; + spin_lock_init(&tio->lock); + platform_set_drvdata(pdev, &tio); + + return 0; +} + +static void pps_gen_tio_remove(struct platform_device *pdev) +{ + struct pps_tio *tio = platform_get_drvdata(pdev); + + hrtimer_cancel(&tio->timer); + pps_tio_disable(tio); + pps_gen_unregister_source(tio->pps_gen); +} + +static const struct acpi_device_id intel_pmc_tio_acpi_match[] = { + { "INTC1021" }, + { "INTC1022" }, + { "INTC1023" }, + { "INTC1024" }, + {} +}; +MODULE_DEVICE_TABLE(acpi, intel_pmc_tio_acpi_match); + +static struct platform_driver pps_gen_tio_driver = { + .probe = pps_gen_tio_probe, + .remove = pps_gen_tio_remove, + .driver = { + .name = "intel-pps-gen-tio", + .acpi_match_table = intel_pmc_tio_acpi_match, + }, +}; +module_platform_driver(pps_gen_tio_driver); + +MODULE_AUTHOR("Christopher Hall "); +MODULE_AUTHOR("Lakshmi Sowjanya D "); +MODULE_AUTHOR("Pandith N "); +MODULE_AUTHOR("Thejesh Reddy T R "); +MODULE_AUTHOR("Subramanian Mohan "); +MODULE_DESCRIPTION("Intel PMC Time-Aware IO Generator Driver"); +MODULE_LICENSE("GPL"); From 6b22c3de1c2d93c89f0af4288be09c26b6101114 Mon Sep 17 00:00:00 2001 From: Subramanian Mohan Date: Wed, 19 Feb 2025 09:36:17 +0530 Subject: [PATCH 0378/2157] Documentation: driver-api: pps: Add Intel Timed I/O PPS generator Add Intel Timed I/O PPS usage instructions. Co-developed-by: Pandith N Signed-off-by: Pandith N Signed-off-by: Lakshmi Sowjanya D Reviewed-by: Andy Shevchenko Acked-by: Rodolfo Giometti Signed-off-by: Subramanian Mohan Link: https://lore.kernel.org/r/20250219040618.70962-4-subramanian.mohan@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/pps.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst index 04f1b88778fc..598729f9cd27 100644 --- a/Documentation/driver-api/pps.rst +++ b/Documentation/driver-api/pps.rst @@ -285,3 +285,27 @@ delay between assert and clear edge as small as possible to reduce system latencies. But if it is too small slave won't be able to capture clear edge transition. The default of 30us should be good enough in most situations. The delay can be selected using 'delay' pps_gen_parport module parameter. + + +Intel Timed I/O PPS signal generator +------------------------------------ + +Intel Timed I/O is a high precision device, present on 2019 and newer Intel +CPUs, that can generate PPS signals. + +Timed I/O and system time are both driven by same hardware clock. The signal +is generated with a precision of ~20 nanoseconds. The generated PPS signal +is used to synchronize an external device with system clock. For example, +it can be used to share your clock with a device that receives PPS signal, +generated by Timed I/O device. There are dedicated Timed I/O pins to deliver +the PPS signal to an external device. + +Usage of Intel Timed I/O as PPS generator: + +Start generating PPS signal:: + + $echo 1 > /sys/class/pps-gen/pps-genx/enable + +Stop generating PPS signal:: + + $echo 0 > /sys/class/pps-gen/pps-genx/enable From 264ff8415aed324584acc85740596f6e1df7b663 Mon Sep 17 00:00:00 2001 From: Subramanian Mohan Date: Wed, 19 Feb 2025 09:36:18 +0530 Subject: [PATCH 0379/2157] ABI: pps: Add ABI documentation for Intel TIO Document sysfs interface for Intel Timed I/O PPS driver. Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Subramanian Mohan Link: https://lore.kernel.org/r/20250219040618.70962-5-subramanian.mohan@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-pps-gen-tio | 6 ++++++ MAINTAINERS | 1 + 2 files changed, 7 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-pps-gen-tio diff --git a/Documentation/ABI/testing/sysfs-pps-gen-tio b/Documentation/ABI/testing/sysfs-pps-gen-tio new file mode 100644 index 000000000000..3c34ff17a335 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-pps-gen-tio @@ -0,0 +1,6 @@ +What: /sys/class/pps-gen/pps-genx/enable +Date: April 2025 +KernelVersion: 6.15 +Contact: Subramanian Mohan +Description: + Enable or disable PPS TIO generator output. diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353d..d4280facbe51 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18916,6 +18916,7 @@ S: Maintained W: http://wiki.enneenne.com/index.php/LinuxPPS_support F: Documentation/ABI/testing/sysfs-pps F: Documentation/ABI/testing/sysfs-pps-gen +F: Documentation/ABI/testing/sysfs-pps-gen-tio F: Documentation/devicetree/bindings/pps/pps-gpio.yaml F: Documentation/driver-api/pps.rst F: drivers/pps/ From c5020c5be9d266f66fa5ba3286f0e8d2d2265970 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 21 Feb 2025 09:42:32 +0100 Subject: [PATCH 0380/2157] kernfs: Move dput() outside of the RCU section. Al Viro pointed out that dput() might sleep and must not be invoked within an RCU section. Keep only find_next_ancestor() winthin the RCU section. Correct the wording in the comment. Fixes: 6ef5b6fae3040 ("kernfs: Drop kernfs_rwsem while invoking lookup_positive_unlocked().") Reported-by: Al Viro Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250221084232.xksA_IQ4@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f1cea282aae3..5124e196c2bf 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -222,17 +222,17 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, root = kernfs_root(kn); /* * As long as kn is valid, its parent can not vanish. This is cgroup's - * kn so it not have its parent replaced. Therefore it is safe to use + * kn so it can't have its parent replaced. Therefore it is safe to use * the ancestor node outside of the RCU or locked section. */ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT))) return ERR_PTR(-EINVAL); scoped_guard(rcu) { knparent = find_next_ancestor(kn, NULL); - if (WARN_ON(!knparent)) { - dput(dentry); - return ERR_PTR(-EINVAL); - } + } + if (WARN_ON(!knparent)) { + dput(dentry); + return ERR_PTR(-EINVAL); } do { From 0e14e062f5ff98aa15264dfa87c5f5e924028561 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 9 Jan 2025 21:53:48 +0000 Subject: [PATCH 0381/2157] coresight: catu: Fix number of pages while using 64k pages Trying to record a trace on kernel with 64k pages resulted in -ENOMEM. This happens due to a bug in calculating the number of table pages, which returns zero. Fix the issue by rounding up. $ perf record --kcore -e cs_etm/@tmc_etr55,cycacc,branch_broadcast/k --per-thread taskset --cpu-list 1 dd if=/dev/zero of=/dev/null failed to mmap with 12 (Cannot allocate memory) Fixes: 8ed536b1e283 ("coresight: catu: Add support for scatter gather tables") Signed-off-by: Ilkka Koskinen Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250109215348.5483-1-ilkka@os.amperecomputing.com --- drivers/hwtracing/coresight/coresight-catu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 275cc0d9f505..3378bb77e6b4 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -269,7 +269,7 @@ catu_init_sg_table(struct device *catu_dev, int node, * Each table can address upto 1MB and we can have * CATU_PAGES_PER_SYSPAGE tables in a system page. */ - nr_tpages = DIV_ROUND_UP(size, SZ_1M) / CATU_PAGES_PER_SYSPAGE; + nr_tpages = DIV_ROUND_UP(size, CATU_PAGES_PER_SYSPAGE * SZ_1M); catu_table = tmc_alloc_sg_table(catu_dev, node, nr_tpages, size >> PAGE_SHIFT, pages); if (IS_ERR(catu_table)) From 87b8166a732886140dfa5c41cf55919aa9f1ce75 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:11 +0530 Subject: [PATCH 0382/2157] dt-bindings: arm: coresight-tmc: Add "memory-region" property memory-region 0: Reserved trace buffer memory TMC ETR: When available, use this reserved memory region for trace data capture. Same region is used for trace data retention after a panic or watchdog reset. TMC ETF: When available, use this reserved memory region for trace data retention synced from internal SRAM after a panic or watchdog reset. memory-region 1: Reserved meta data memory TMC ETR, ETF: When available, use this memory for register snapshot retention synced from hardware registers after a panic or watchdog reset. Reviewed-by: Rob Herring Signed-off-by: Linu Cherian Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-2-lcherian@marvell.com --- .../bindings/arm/arm,coresight-tmc.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml index cb8dceaca70e..4787d7c6bac2 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml @@ -101,6 +101,29 @@ properties: and ETF configurations. $ref: /schemas/graph.yaml#/properties/port + memory-region: + items: + - description: Reserved trace buffer memory for ETR and ETF sinks. + For ETR, this reserved memory region is used for trace data capture. + Same region is used for trace data retention as well after a panic + or watchdog reset. + This reserved memory region is used as trace buffer or used for trace + data retention only if specifically selected by the user in sysfs + interface. + The default memory usage models for ETR in sysfs/perf modes are + otherwise unaltered. + + For ETF, this reserved memory region is used by default for + retention of trace data synced from internal SRAM after a panic + or watchdog reset. + - description: Reserved meta data memory. Used for ETR and ETF sinks + for storing metadata. + + memory-region-names: + items: + - const: tracedata + - const: metadata + required: - compatible - reg @@ -115,6 +138,9 @@ examples: etr@20070000 { compatible = "arm,coresight-tmc", "arm,primecell"; reg = <0x20070000 0x1000>; + memory-region = <&etr_trace_mem_reserved>, + <&etr_mdata_mem_reserved>; + memory-region-names = "tracedata", "metadata"; clocks = <&oscclk6a>; clock-names = "apb_pclk"; From 91a2086aa6d1558da427e2539eb20a9d6b352361 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:12 +0530 Subject: [PATCH 0383/2157] coresight: tmc-etr: Add support to use reserved trace memory Add support to use reserved memory for coresight ETR trace buffer. Introduce a new ETR buffer mode called ETR_MODE_RESRV, which becomes available when ETR device tree node is supplied with a valid reserved memory region. ETR_MODE_RESRV can be selected only by explicit user request. $ echo resrv >/sys/bus/coresight/devices/tmc_etr/buf_mode_preferred Signed-off-by: Anil Kumar Reddy Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-3-lcherian@marvell.com --- .../hwtracing/coresight/coresight-tmc-core.c | 50 ++++++++++++ .../hwtracing/coresight/coresight-tmc-etr.c | 79 +++++++++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 25 ++++++ 3 files changed, 154 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index e9876252a789..1e807b7916c7 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -398,6 +399,53 @@ static inline bool tmc_etr_has_non_secure_access(struct tmc_drvdata *drvdata) static const struct amba_id tmc_ids[]; +static int of_tmc_get_reserved_resource_by_name(struct device *dev, + const char *name, + struct resource *res) +{ + int index, rc = -ENODEV; + struct device_node *node; + + if (!is_of_node(dev->fwnode)) + return -ENODEV; + + index = of_property_match_string(dev->of_node, "memory-region-names", + name); + if (index < 0) + return rc; + + node = of_parse_phandle(dev->of_node, "memory-region", index); + if (!node) + return rc; + + if (!of_address_to_resource(node, 0, res) && + res->start != 0 && resource_size(res) != 0) + rc = 0; + of_node_put(node); + + return rc; +} + +static void tmc_get_reserved_region(struct device *parent) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(parent); + struct resource res; + + if (of_tmc_get_reserved_resource_by_name(parent, "tracedata", &res)) + return; + + drvdata->resrv_buf.vaddr = memremap(res.start, + resource_size(&res), + MEMREMAP_WC); + if (IS_ERR_OR_NULL(drvdata->resrv_buf.vaddr)) { + dev_err(parent, "Reserved trace buffer mapping failed\n"); + return; + } + + drvdata->resrv_buf.paddr = res.start; + drvdata->resrv_buf.size = resource_size(&res); +} + /* Detect and initialise the capabilities of a TMC ETR */ static int tmc_etr_setup_caps(struct device *parent, u32 devid, struct csdev_access *access) @@ -508,6 +556,8 @@ static int __tmc_probe(struct device *dev, struct resource *res) drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4; } + tmc_get_reserved_region(dev); + desc.dev = dev; switch (drvdata->config_type) { diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index a48bb85d0e7f..8bca5b36334a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -30,6 +30,7 @@ struct etr_buf_hw { bool has_iommu; bool has_etr_sg; bool has_catu; + bool has_resrv; }; /* @@ -695,6 +696,75 @@ static const struct etr_buf_operations etr_flat_buf_ops = { .get_data = tmc_etr_get_data_flat_buf, }; +/* + * tmc_etr_alloc_resrv_buf: Allocate a contiguous DMA buffer from reserved region. + */ +static int tmc_etr_alloc_resrv_buf(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + struct etr_flat_buf *resrv_buf; + struct device *real_dev = drvdata->csdev->dev.parent; + + /* We cannot reuse existing pages for resrv buf */ + if (pages) + return -EINVAL; + + resrv_buf = kzalloc(sizeof(*resrv_buf), GFP_KERNEL); + if (!resrv_buf) + return -ENOMEM; + + resrv_buf->daddr = dma_map_resource(real_dev, drvdata->resrv_buf.paddr, + drvdata->resrv_buf.size, + DMA_FROM_DEVICE, 0); + if (dma_mapping_error(real_dev, resrv_buf->daddr)) { + dev_err(real_dev, "failed to map source buffer address\n"); + kfree(resrv_buf); + return -ENOMEM; + } + + resrv_buf->vaddr = drvdata->resrv_buf.vaddr; + resrv_buf->size = etr_buf->size = drvdata->resrv_buf.size; + resrv_buf->dev = &drvdata->csdev->dev; + etr_buf->hwaddr = resrv_buf->daddr; + etr_buf->mode = ETR_MODE_RESRV; + etr_buf->private = resrv_buf; + return 0; +} + +static void tmc_etr_free_resrv_buf(struct etr_buf *etr_buf) +{ + struct etr_flat_buf *resrv_buf = etr_buf->private; + + if (resrv_buf && resrv_buf->daddr) { + struct device *real_dev = resrv_buf->dev->parent; + + dma_unmap_resource(real_dev, resrv_buf->daddr, + resrv_buf->size, DMA_FROM_DEVICE, 0); + } + kfree(resrv_buf); +} + +static void tmc_etr_sync_resrv_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + /* + * Adjust the buffer to point to the beginning of the trace data + * and update the available trace data. + */ + etr_buf->offset = rrp - etr_buf->hwaddr; + if (etr_buf->full) + etr_buf->len = etr_buf->size; + else + etr_buf->len = rwp - rrp; +} + +static const struct etr_buf_operations etr_resrv_buf_ops = { + .alloc = tmc_etr_alloc_resrv_buf, + .free = tmc_etr_free_resrv_buf, + .sync = tmc_etr_sync_resrv_buf, + .get_data = tmc_etr_get_data_flat_buf, +}; + /* * tmc_etr_alloc_sg_buf: Allocate an SG buf @etr_buf. Setup the parameters * appropriately. @@ -801,6 +871,7 @@ static const struct etr_buf_operations *etr_buf_ops[] = { [ETR_MODE_FLAT] = &etr_flat_buf_ops, [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, [ETR_MODE_CATU] = NULL, + [ETR_MODE_RESRV] = &etr_resrv_buf_ops }; void tmc_etr_set_catu_ops(const struct etr_buf_operations *catu) @@ -826,6 +897,7 @@ static inline int tmc_etr_mode_alloc_buf(int mode, case ETR_MODE_FLAT: case ETR_MODE_ETR_SG: case ETR_MODE_CATU: + case ETR_MODE_RESRV: if (etr_buf_ops[mode] && etr_buf_ops[mode]->alloc) rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, node, pages); @@ -844,6 +916,7 @@ static void get_etr_buf_hw(struct device *dev, struct etr_buf_hw *buf_hw) buf_hw->has_iommu = iommu_get_domain_for_dev(dev->parent); buf_hw->has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG); buf_hw->has_catu = !!tmc_etr_get_catu_device(drvdata); + buf_hw->has_resrv = tmc_has_reserved_buffer(drvdata); } static bool etr_can_use_flat_mode(struct etr_buf_hw *buf_hw, ssize_t etr_buf_size) @@ -1831,6 +1904,7 @@ static const char *const buf_modes_str[] = { [ETR_MODE_FLAT] = "flat", [ETR_MODE_ETR_SG] = "tmc-sg", [ETR_MODE_CATU] = "catu", + [ETR_MODE_RESRV] = "resrv", [ETR_MODE_AUTO] = "auto", }; @@ -1849,6 +1923,9 @@ static ssize_t buf_modes_available_show(struct device *dev, if (buf_hw.has_catu) size += sysfs_emit_at(buf, size, "%s ", buf_modes_str[ETR_MODE_CATU]); + if (buf_hw.has_resrv) + size += sysfs_emit_at(buf, size, "%s ", buf_modes_str[ETR_MODE_RESRV]); + size += sysfs_emit_at(buf, size, "\n"); return size; } @@ -1876,6 +1953,8 @@ static ssize_t buf_mode_preferred_store(struct device *dev, drvdata->etr_mode = ETR_MODE_ETR_SG; else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_CATU]) && buf_hw.has_catu) drvdata->etr_mode = ETR_MODE_CATU; + else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_RESRV]) && buf_hw.has_resrv) + drvdata->etr_mode = ETR_MODE_RESRV; else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_AUTO])) drvdata->etr_mode = ETR_MODE_AUTO; else diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 2671926be62a..d2261eddab71 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -135,6 +135,7 @@ enum etr_mode { ETR_MODE_FLAT, /* Uses contiguous flat buffer */ ETR_MODE_ETR_SG, /* Uses in-built TMC ETR SG mechanism */ ETR_MODE_CATU, /* Use SG mechanism in CATU */ + ETR_MODE_RESRV, /* Use reserved region contiguous buffer */ ETR_MODE_AUTO, /* Use the default mechanism */ }; @@ -164,6 +165,17 @@ struct etr_buf { void *private; }; +/** + * @paddr : Start address of reserved memory region. + * @vaddr : Corresponding CPU virtual address. + * @size : Size of reserved memory region. + */ +struct tmc_resrv_buf { + phys_addr_t paddr; + void *vaddr; + size_t size; +}; + /** * struct tmc_drvdata - specifics associated to an TMC component * @pclk: APB clock if present, otherwise NULL @@ -189,6 +201,10 @@ struct etr_buf { * @idr_mutex: Access serialisation for idr. * @sysfs_buf: SYSFS buffer for ETR. * @perf_buf: PERF buffer for ETR. + * @resrv_buf: Used by ETR as hardware trace buffer and for trace data + * retention (after crash) only when ETR_MODE_RESRV buffer + * mode is enabled. Used by ETF for trace data retention + * (after crash) by default. */ struct tmc_drvdata { struct clk *pclk; @@ -214,6 +230,7 @@ struct tmc_drvdata { struct mutex idr_mutex; struct etr_buf *sysfs_buf; struct etr_buf *perf_buf; + struct tmc_resrv_buf resrv_buf; }; struct etr_buf_operations { @@ -331,6 +348,14 @@ tmc_sg_table_buf_size(struct tmc_sg_table *sg_table) return (unsigned long)sg_table->data_pages.nr_pages << PAGE_SHIFT; } +static inline bool tmc_has_reserved_buffer(struct tmc_drvdata *drvdata) +{ + if (drvdata->resrv_buf.vaddr && + drvdata->resrv_buf.size) + return true; + return false; +} + struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata); void tmc_etr_set_catu_ops(const struct etr_buf_operations *catu); From 46006ceb5d029f92df405db15c9a31f0ee41628c Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:13 +0530 Subject: [PATCH 0384/2157] coresight: core: Add provision for panic callbacks Panic callback handlers allows coresight device drivers to sync relevant trace data and trace metadata to reserved memory regions so that they can be retrieved later in the subsequent boot or in the crashdump kernel. Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-4-lcherian@marvell.com --- drivers/hwtracing/coresight/coresight-core.c | 42 ++++++++++++++++++++ include/linux/coresight.h | 12 ++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 0a9380350fb5..ab55e10d4b79 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "coresight-etm-perf.h" #include "coresight-priv.h" @@ -1453,6 +1454,36 @@ const struct bus_type coresight_bustype = { .name = "coresight", }; +static int coresight_panic_sync(struct device *dev, void *data) +{ + int mode; + struct coresight_device *csdev; + + /* Run through panic sync handlers for all enabled devices */ + csdev = container_of(dev, struct coresight_device, dev); + mode = coresight_get_mode(csdev); + + if ((mode == CS_MODE_SYSFS) || (mode == CS_MODE_PERF)) { + if (panic_ops(csdev)) + panic_ops(csdev)->sync(csdev); + } + + return 0; +} + +static int coresight_panic_cb(struct notifier_block *self, + unsigned long v, void *p) +{ + bus_for_each_dev(&coresight_bustype, NULL, NULL, + coresight_panic_sync); + + return 0; +} + +static struct notifier_block coresight_notifier = { + .notifier_call = coresight_panic_cb, +}; + static int __init coresight_init(void) { int ret; @@ -1465,11 +1496,20 @@ static int __init coresight_init(void) if (ret) goto exit_bus_unregister; + /* Register function to be called for panic */ + ret = atomic_notifier_chain_register(&panic_notifier_list, + &coresight_notifier); + if (ret) + goto exit_perf; + /* initialise the coresight syscfg API */ ret = cscfg_init(); if (!ret) return 0; + atomic_notifier_chain_unregister(&panic_notifier_list, + &coresight_notifier); +exit_perf: etm_perf_exit(); exit_bus_unregister: bus_unregister(&coresight_bustype); @@ -1479,6 +1519,8 @@ static int __init coresight_init(void) static void __exit coresight_exit(void) { cscfg_exit(); + atomic_notifier_chain_unregister(&panic_notifier_list, + &coresight_notifier); etm_perf_exit(); bus_unregister(&coresight_bustype); } diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 17276965ff1d..3eef4e91df3f 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -340,6 +340,7 @@ enum cs_mode { #define link_ops(csdev) csdev->ops->link_ops #define helper_ops(csdev) csdev->ops->helper_ops #define ect_ops(csdev) csdev->ops->ect_ops +#define panic_ops(csdev) csdev->ops->panic_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -409,11 +410,22 @@ struct coresight_ops_helper { int (*disable)(struct coresight_device *csdev, void *data); }; + +/** + * struct coresight_ops_panic - Generic device ops for panic handing + * + * @sync : Sync the device register state/trace data + */ +struct coresight_ops_panic { + int (*sync)(struct coresight_device *csdev); +}; + struct coresight_ops { const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; const struct coresight_ops_helper *helper_ops; + const struct coresight_ops_panic *panic_ops; }; static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, From 6dbcbcfc4496598c08c87de37456ba618abe59ce Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:14 +0530 Subject: [PATCH 0385/2157] coresight: tmc: Enable panic sync handling - Get reserved region from device tree node for metadata - Define metadata format for TMC - Add TMC ETR panic sync handler that syncs register snapshot to metadata region - Add TMC ETF panic sync handler that syncs register snapshot to metadata region and internal SRAM to reserved trace buffer region. Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-5-lcherian@marvell.com --- .../hwtracing/coresight/coresight-tmc-core.c | 14 ++++ .../hwtracing/coresight/coresight-tmc-etf.c | 80 +++++++++++++++++++ .../hwtracing/coresight/coresight-tmc-etr.c | 73 +++++++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 66 +++++++++++++++ 4 files changed, 233 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 1e807b7916c7..c6224eed705d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -444,6 +444,20 @@ static void tmc_get_reserved_region(struct device *parent) drvdata->resrv_buf.paddr = res.start; drvdata->resrv_buf.size = resource_size(&res); + + if (of_tmc_get_reserved_resource_by_name(parent, "metadata", &res)) + return; + + drvdata->crash_mdata.vaddr = memremap(res.start, + resource_size(&res), + MEMREMAP_WC); + if (IS_ERR_OR_NULL(drvdata->crash_mdata.vaddr)) { + dev_err(parent, "Metadata memory mapping failed\n"); + return; + } + + drvdata->crash_mdata.paddr = res.start; + drvdata->crash_mdata.size = resource_size(&res); } /* Detect and initialise the capabilities of a TMC ETR */ diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d4f641cd9de6..0f9155a10ac2 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -590,6 +590,81 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, return to_read; } +static int tmc_panic_sync_etf(struct coresight_device *csdev) +{ + u32 val; + struct tmc_crash_metadata *mdata; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + mdata = (struct tmc_crash_metadata *)drvdata->crash_mdata.vaddr; + + /* Make sure we have valid reserved memory */ + if (!tmc_has_reserved_buffer(drvdata) || + !tmc_has_crash_mdata_buffer(drvdata)) + return 0; + + tmc_crashdata_set_invalid(drvdata); + + CS_UNLOCK(drvdata->base); + + /* Proceed only if ETF is enabled or configured as sink */ + val = readl(drvdata->base + TMC_CTL); + if (!(val & TMC_CTL_CAPT_EN)) + goto out; + val = readl(drvdata->base + TMC_MODE); + if (val != TMC_MODE_CIRCULAR_BUFFER) + goto out; + + val = readl(drvdata->base + TMC_FFSR); + /* Do manual flush and stop only if its not auto-stopped */ + if (!(val & TMC_FFSR_FT_STOPPED)) { + dev_dbg(&csdev->dev, + "%s: Triggering manual flush\n", __func__); + tmc_flush_and_stop(drvdata); + } else + tmc_wait_for_tmcready(drvdata); + + /* Sync registers from hardware to metadata region */ + mdata->tmc_sts = readl(drvdata->base + TMC_STS); + mdata->tmc_mode = readl(drvdata->base + TMC_MODE); + mdata->tmc_ffcr = readl(drvdata->base + TMC_FFCR); + mdata->tmc_ffsr = readl(drvdata->base + TMC_FFSR); + + /* Sync Internal SRAM to reserved trace buffer region */ + drvdata->buf = drvdata->resrv_buf.vaddr; + tmc_etb_dump_hw(drvdata); + /* Store as per RSZ register convention */ + mdata->tmc_ram_size = drvdata->len >> 2; + + /* Other fields for processing trace buffer reads */ + mdata->tmc_rrp = 0; + mdata->tmc_dba = 0; + mdata->tmc_rwp = drvdata->len; + mdata->trace_paddr = drvdata->resrv_buf.paddr; + + mdata->version = CS_CRASHDATA_VERSION; + + /* + * Make sure all previous writes are ordered, + * before we mark valid + */ + dmb(sy); + mdata->valid = true; + /* + * Below order need to maintained, since crc of metadata + * is dependent on first + */ + mdata->crc32_tdata = find_crash_tracedata_crc(drvdata, mdata); + mdata->crc32_mdata = find_crash_metadata_crc(mdata); + + tmc_disable_hw(drvdata); + + dev_dbg(&csdev->dev, "%s: success\n", __func__); +out: + CS_UNLOCK(drvdata->base); + return 0; +} + static const struct coresight_ops_sink tmc_etf_sink_ops = { .enable = tmc_enable_etf_sink, .disable = tmc_disable_etf_sink, @@ -603,6 +678,10 @@ static const struct coresight_ops_link tmc_etf_link_ops = { .disable = tmc_disable_etf_link, }; +static const struct coresight_ops_panic tmc_etf_sync_ops = { + .sync = tmc_panic_sync_etf, +}; + const struct coresight_ops tmc_etb_cs_ops = { .sink_ops = &tmc_etf_sink_ops, }; @@ -610,6 +689,7 @@ const struct coresight_ops tmc_etb_cs_ops = { const struct coresight_ops tmc_etf_cs_ops = { .sink_ops = &tmc_etf_sink_ops, .link_ops = &tmc_etf_link_ops, + .panic_ops = &tmc_etf_sync_ops, }; int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 8bca5b36334a..fb944a68a11c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1814,6 +1814,74 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) return 0; } +static int tmc_panic_sync_etr(struct coresight_device *csdev) +{ + u32 val; + struct tmc_crash_metadata *mdata; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + mdata = (struct tmc_crash_metadata *)drvdata->crash_mdata.vaddr; + + if (!drvdata->etr_buf) + return 0; + + /* Being in RESRV mode implies valid reserved memory as well */ + if (drvdata->etr_buf->mode != ETR_MODE_RESRV) + return 0; + + if (!tmc_has_crash_mdata_buffer(drvdata)) + return 0; + + CS_UNLOCK(drvdata->base); + + /* Proceed only if ETR is enabled */ + val = readl(drvdata->base + TMC_CTL); + if (!(val & TMC_CTL_CAPT_EN)) + goto out; + + val = readl(drvdata->base + TMC_FFSR); + /* Do manual flush and stop only if its not auto-stopped */ + if (!(val & TMC_FFSR_FT_STOPPED)) { + dev_dbg(&csdev->dev, + "%s: Triggering manual flush\n", __func__); + tmc_flush_and_stop(drvdata); + } else + tmc_wait_for_tmcready(drvdata); + + /* Sync registers from hardware to metadata region */ + mdata->tmc_ram_size = readl(drvdata->base + TMC_RSZ); + mdata->tmc_sts = readl(drvdata->base + TMC_STS); + mdata->tmc_mode = readl(drvdata->base + TMC_MODE); + mdata->tmc_ffcr = readl(drvdata->base + TMC_FFCR); + mdata->tmc_ffsr = readl(drvdata->base + TMC_FFSR); + mdata->tmc_rrp = tmc_read_rrp(drvdata); + mdata->tmc_rwp = tmc_read_rwp(drvdata); + mdata->tmc_dba = tmc_read_dba(drvdata); + mdata->trace_paddr = drvdata->resrv_buf.paddr; + mdata->version = CS_CRASHDATA_VERSION; + + /* + * Make sure all previous writes are ordered, + * before we mark valid + */ + dmb(sy); + mdata->valid = true; + /* + * Below order need to maintained, since crc of metadata + * is dependent on first + */ + mdata->crc32_tdata = find_crash_tracedata_crc(drvdata, mdata); + mdata->crc32_mdata = find_crash_metadata_crc(mdata); + + tmc_disable_hw(drvdata); + + dev_dbg(&csdev->dev, "%s: success\n", __func__); +out: + CS_UNLOCK(drvdata->base); + + return 0; +} + static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, @@ -1822,8 +1890,13 @@ static const struct coresight_ops_sink tmc_etr_sink_ops = { .free_buffer = tmc_free_etr_buffer, }; +static const struct coresight_ops_panic tmc_etr_sync_ops = { + .sync = tmc_panic_sync_etr, +}; + const struct coresight_ops tmc_etr_cs_ops = { .sink_ops = &tmc_etr_sink_ops, + .panic_ops = &tmc_etr_sync_ops, }; int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index d2261eddab71..d76e83fc840b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -12,6 +12,7 @@ #include #include #include +#include #define TMC_RSZ 0x004 #define TMC_STS 0x00c @@ -76,6 +77,9 @@ #define TMC_AXICTL_AXCACHE_OS (0xf << 2) #define TMC_AXICTL_ARCACHE_OS (0xf << 16) +/* TMC_FFSR - 0x300 */ +#define TMC_FFSR_FT_STOPPED BIT(1) + /* TMC_FFCR - 0x304 */ #define TMC_FFCR_FLUSHMAN_BIT 6 #define TMC_FFCR_EN_FMT BIT(0) @@ -94,6 +98,9 @@ #define TMC_AUTH_NSID_MASK GENMASK(1, 0) +/* Major version 1 Minor version 0 */ +#define CS_CRASHDATA_VERSION (1 << 16) + enum tmc_config_type { TMC_CONFIG_TYPE_ETB, TMC_CONFIG_TYPE_ETR, @@ -131,6 +138,25 @@ enum tmc_mem_intf_width { #define CORESIGHT_SOC_600_ETR_CAPS \ (TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE) +/* TMC metadata region for ETR and ETF configurations */ +struct tmc_crash_metadata { + uint32_t crc32_mdata; /* crc of metadata */ + uint32_t crc32_tdata; /* crc of tracedata */ + uint32_t version; /* 31:16 Major version, 15:0 Minor version */ + uint32_t valid; /* Indicate if this ETF/ETR was enabled */ + uint32_t tmc_ram_size; /* Ram Size register */ + uint32_t tmc_sts; /* Status register */ + uint32_t tmc_mode; /* Mode register */ + uint32_t tmc_ffcr; /* Formatter and flush control register */ + uint32_t tmc_ffsr; /* Formatter and flush status register */ + uint32_t reserved32; + uint64_t tmc_rrp; /* Ram Read pointer register */ + uint64_t tmc_rwp; /* Ram Write pointer register */ + uint64_t tmc_dba; /* Data buffer address register */ + uint64_t trace_paddr; /* Phys address of trace buffer */ + uint64_t reserved64[3]; +}; + enum etr_mode { ETR_MODE_FLAT, /* Uses contiguous flat buffer */ ETR_MODE_ETR_SG, /* Uses in-built TMC ETR SG mechanism */ @@ -205,6 +231,8 @@ struct tmc_resrv_buf { * retention (after crash) only when ETR_MODE_RESRV buffer * mode is enabled. Used by ETF for trace data retention * (after crash) by default. + * @crash_mdata: Reserved memory for storing tmc crash metadata. + * Used by ETR/ETF. */ struct tmc_drvdata { struct clk *pclk; @@ -231,6 +259,7 @@ struct tmc_drvdata { struct etr_buf *sysfs_buf; struct etr_buf *perf_buf; struct tmc_resrv_buf resrv_buf; + struct tmc_resrv_buf crash_mdata; }; struct etr_buf_operations { @@ -356,6 +385,43 @@ static inline bool tmc_has_reserved_buffer(struct tmc_drvdata *drvdata) return false; } +static inline bool tmc_has_crash_mdata_buffer(struct tmc_drvdata *drvdata) +{ + if (drvdata->crash_mdata.vaddr && + drvdata->crash_mdata.size) + return true; + return false; +} + +static inline void tmc_crashdata_set_invalid(struct tmc_drvdata *drvdata) +{ + struct tmc_crash_metadata *mdata; + + mdata = (struct tmc_crash_metadata *)drvdata->crash_mdata.vaddr; + + if (tmc_has_crash_mdata_buffer(drvdata)) + mdata->valid = false; +} + +static inline uint32_t find_crash_metadata_crc(struct tmc_crash_metadata *md) +{ + unsigned long crc_size; + + crc_size = sizeof(struct tmc_crash_metadata) - + offsetof(struct tmc_crash_metadata, crc32_tdata); + return crc32_le(0, (void *)&md->crc32_tdata, crc_size); +} + +static inline uint32_t find_crash_tracedata_crc(struct tmc_drvdata *drvdata, + struct tmc_crash_metadata *md) +{ + unsigned long crc_size; + + /* Take CRC of configured buffer size to keep it simple */ + crc_size = md->tmc_ram_size << 2; + return crc32_le(0, (void *)drvdata->resrv_buf.vaddr, crc_size); +} + struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata); void tmc_etr_set_catu_ops(const struct etr_buf_operations *catu); From d58a70bdab575264fe75e4826464aaef0dd096b4 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:15 +0530 Subject: [PATCH 0386/2157] coresight: tmc: Add support for reading crash data * Add support for reading crashdata using special device files. The special device files /dev/crash_tmc_xxx would be available for read file operation only when the crash data is valid. * User can read the crash data as below For example, for reading crash data from tmc_etf sink #dd if=/dev/crash_tmc_etfXX of=~/cstrace.bin Signed-off-by: Anil Kumar Reddy Signed-off-by: Tanmay Jagdale Signed-off-by: Linu Cherian Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-6-lcherian@marvell.com --- .../hwtracing/coresight/coresight-tmc-core.c | 226 +++++++++++++++++- .../hwtracing/coresight/coresight-tmc-etr.c | 22 +- drivers/hwtracing/coresight/coresight-tmc.h | 13 +- 3 files changed, 258 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index c6224eed705d..045d37606750 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -105,6 +105,128 @@ u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata) return mask; } +static bool is_tmc_crashdata_valid(struct tmc_drvdata *drvdata) +{ + struct tmc_crash_metadata *mdata; + + if (!tmc_has_reserved_buffer(drvdata) || + !tmc_has_crash_mdata_buffer(drvdata)) + return false; + + mdata = drvdata->crash_mdata.vaddr; + + /* Check version match */ + if (mdata->version != CS_CRASHDATA_VERSION) + return false; + + /* Check for valid metadata */ + if (!mdata->valid) { + dev_dbg(&drvdata->csdev->dev, + "Data invalid in tmc crash metadata\n"); + return false; + } + + /* + * Buffer address given by metadata for retrieval of trace data + * from previous boot is expected to be same as the reserved + * trace buffer memory region provided through DTS + */ + if (drvdata->resrv_buf.paddr != mdata->trace_paddr) { + dev_dbg(&drvdata->csdev->dev, + "Trace buffer address of previous boot invalid\n"); + return false; + } + + /* Check data integrity of metadata */ + if (mdata->crc32_mdata != find_crash_metadata_crc(mdata)) { + dev_err(&drvdata->csdev->dev, + "CRC mismatch in tmc crash metadata\n"); + return false; + } + /* Check data integrity of tracedata */ + if (mdata->crc32_tdata != find_crash_tracedata_crc(drvdata, mdata)) { + dev_err(&drvdata->csdev->dev, + "CRC mismatch in tmc crash tracedata\n"); + return false; + } + + return true; +} + +static inline ssize_t tmc_get_resvbuf_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + s64 offset; + ssize_t actual = len; + struct tmc_resrv_buf *rbuf = &drvdata->resrv_buf; + + if (pos + actual > rbuf->len) + actual = rbuf->len - pos; + if (actual <= 0) + return 0; + + /* Compute the offset from which we read the data */ + offset = rbuf->offset + pos; + if (offset >= rbuf->size) + offset -= rbuf->size; + + /* Adjust the length to limit this transaction to end of buffer */ + actual = (actual < (rbuf->size - offset)) ? + actual : rbuf->size - offset; + + *bufpp = (char *)rbuf->vaddr + offset; + + return actual; +} + +static int tmc_prepare_crashdata(struct tmc_drvdata *drvdata) +{ + char *bufp; + ssize_t len; + u32 status, size; + u64 rrp, rwp, dba; + struct tmc_resrv_buf *rbuf; + struct tmc_crash_metadata *mdata; + + mdata = drvdata->crash_mdata.vaddr; + rbuf = &drvdata->resrv_buf; + + rrp = mdata->tmc_rrp; + rwp = mdata->tmc_rwp; + dba = mdata->tmc_dba; + status = mdata->tmc_sts; + size = mdata->tmc_ram_size << 2; + + /* Sync the buffer pointers */ + rbuf->offset = rrp - dba; + if (status & TMC_STS_FULL) + rbuf->len = size; + else + rbuf->len = rwp - rrp; + + /* Additional sanity checks for validating metadata */ + if ((rbuf->offset > size) || + (rbuf->len > size)) { + dev_dbg(&drvdata->csdev->dev, + "Offset and length invalid in tmc crash metadata\n"); + return -EINVAL; + } + + if (status & TMC_STS_FULL) { + len = tmc_get_resvbuf_trace(drvdata, 0x0, + CORESIGHT_BARRIER_PKT_SIZE, &bufp); + if (len >= CORESIGHT_BARRIER_PKT_SIZE) { + coresight_insert_barrier_packet(bufp); + /* Recalculate crc */ + mdata->crc32_tdata = find_crash_tracedata_crc(drvdata, + mdata); + mdata->crc32_mdata = find_crash_metadata_crc(mdata); + } + } + + return 0; +} + static int tmc_read_prepare(struct tmc_drvdata *drvdata) { int ret = 0; @@ -223,6 +345,84 @@ static const struct file_operations tmc_fops = { .release = tmc_release, }; +static int tmc_crashdata_open(struct inode *inode, struct file *file) +{ + int err = 0; + unsigned long flags; + struct tmc_resrv_buf *rbuf; + struct tmc_crash_metadata *mdata; + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, + crashdev); + + mdata = drvdata->crash_mdata.vaddr; + rbuf = &drvdata->resrv_buf; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (mdata->valid) + rbuf->reading = true; + else + err = -ENOENT; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + if (err) + goto exit; + + nonseekable_open(inode, file); + dev_dbg(&drvdata->csdev->dev, "%s: successfully opened\n", __func__); +exit: + return err; +} + +static ssize_t tmc_crashdata_read(struct file *file, char __user *data, + size_t len, loff_t *ppos) +{ + char *bufp; + ssize_t actual; + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, + crashdev); + + actual = tmc_get_resvbuf_trace(drvdata, *ppos, len, &bufp); + if (actual <= 0) + return 0; + + if (copy_to_user(data, bufp, actual)) { + dev_dbg(&drvdata->csdev->dev, + "%s: copy_to_user failed\n", __func__); + return -EFAULT; + } + + *ppos += actual; + dev_dbg(&drvdata->csdev->dev, "%zu bytes copied\n", actual); + + return actual; +} + +static int tmc_crashdata_release(struct inode *inode, struct file *file) +{ + int ret = 0; + unsigned long flags; + struct tmc_resrv_buf *rbuf; + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, + crashdev); + + rbuf = &drvdata->resrv_buf; + spin_lock_irqsave(&drvdata->spinlock, flags); + rbuf->reading = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__); + return ret; +} + +static const struct file_operations tmc_crashdata_fops = { + .owner = THIS_MODULE, + .open = tmc_crashdata_open, + .read = tmc_crashdata_read, + .release = tmc_crashdata_release, +}; + static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid) { enum tmc_mem_intf_width memwidth; @@ -532,6 +732,22 @@ static u32 tmc_etr_get_max_burst_size(struct device *dev) return burst_size; } +static void register_crash_dev_interface(struct tmc_drvdata *drvdata, + const char *name) +{ + drvdata->crashdev.name = + devm_kasprintf(&drvdata->csdev->dev, GFP_KERNEL, "%s_%s", "crash", name); + drvdata->crashdev.minor = MISC_DYNAMIC_MINOR; + drvdata->crashdev.fops = &tmc_crashdata_fops; + if (misc_register(&drvdata->crashdev)) { + dev_dbg(&drvdata->csdev->dev, + "Failed to setup user interface for crashdata\n"); + drvdata->crashdev.fops = NULL; + } else + dev_info(&drvdata->csdev->dev, + "Valid crash tracedata found\n"); +} + static int __tmc_probe(struct device *dev, struct resource *res) { int ret = 0; @@ -632,9 +848,15 @@ static int __tmc_probe(struct device *dev, struct resource *res) drvdata->miscdev.minor = MISC_DYNAMIC_MINOR; drvdata->miscdev.fops = &tmc_fops; ret = misc_register(&drvdata->miscdev); - if (ret) + if (ret) { coresight_unregister(drvdata->csdev); + goto out; + } + out: + if (is_tmc_crashdata_valid(drvdata) && + !tmc_prepare_crashdata(drvdata)) + register_crash_dev_interface(drvdata, desc.name); return ret; } @@ -687,6 +909,8 @@ static void __tmc_remove(struct device *dev) * handler to this device is closed. */ misc_deregister(&drvdata->miscdev); + if (drvdata->crashdev.fops) + misc_deregister(&drvdata->crashdev); coresight_unregister(drvdata->csdev); } diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index fb944a68a11c..c92556e1b4db 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -2012,6 +2012,26 @@ static ssize_t buf_mode_preferred_show(struct device *dev, return sysfs_emit(buf, "%s\n", buf_modes_str[drvdata->etr_mode]); } +static int buf_mode_set_resrv(struct tmc_drvdata *drvdata) +{ + int err = -EBUSY; + unsigned long flags; + struct tmc_resrv_buf *rbuf; + + rbuf = &drvdata->resrv_buf; + + /* Ensure there are no active crashdata read sessions */ + spin_lock_irqsave(&drvdata->spinlock, flags); + if (!rbuf->reading) { + tmc_crashdata_set_invalid(drvdata); + rbuf->len = 0; + drvdata->etr_mode = ETR_MODE_RESRV; + err = 0; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return err; +} + static ssize_t buf_mode_preferred_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) @@ -2027,7 +2047,7 @@ static ssize_t buf_mode_preferred_store(struct device *dev, else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_CATU]) && buf_hw.has_catu) drvdata->etr_mode = ETR_MODE_CATU; else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_RESRV]) && buf_hw.has_resrv) - drvdata->etr_mode = ETR_MODE_RESRV; + return buf_mode_set_resrv(drvdata) ? : size; else if (sysfs_streq(buf, buf_modes_str[ETR_MODE_AUTO])) drvdata->etr_mode = ETR_MODE_AUTO; else diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index d76e83fc840b..3e2744af7e15 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -195,11 +195,17 @@ struct etr_buf { * @paddr : Start address of reserved memory region. * @vaddr : Corresponding CPU virtual address. * @size : Size of reserved memory region. + * @offset : Offset of the trace data in the buffer for consumption. + * @reading : Flag to indicate if reading is active + * @len : Available trace data @buf (may round up to the beginning). */ struct tmc_resrv_buf { phys_addr_t paddr; void *vaddr; size_t size; + unsigned long offset; + bool reading; + s64 len; }; /** @@ -208,6 +214,8 @@ struct tmc_resrv_buf { * @base: memory mapped base address for this component. * @csdev: component vitals needed by the framework. * @miscdev: specifics to handle "/dev/xyz.tmc" entry. + * @crashdev: specifics to handle "/dev/crash_tmc_xyz" entry for reading + * crash tracedata. * @spinlock: only one at a time pls. * @pid: Process ID of the process that owns the session that is using * this component. For example this would be the pid of the Perf @@ -227,7 +235,7 @@ struct tmc_resrv_buf { * @idr_mutex: Access serialisation for idr. * @sysfs_buf: SYSFS buffer for ETR. * @perf_buf: PERF buffer for ETR. - * @resrv_buf: Used by ETR as hardware trace buffer and for trace data + * @resrv_buf: Used by ETR as hardware trace buffer and for trace data * retention (after crash) only when ETR_MODE_RESRV buffer * mode is enabled. Used by ETF for trace data retention * (after crash) by default. @@ -239,6 +247,7 @@ struct tmc_drvdata { void __iomem *base; struct coresight_device *csdev; struct miscdevice miscdev; + struct miscdevice crashdev; spinlock_t spinlock; pid_t pid; bool reading; @@ -309,6 +318,7 @@ void tmc_flush_and_stop(struct tmc_drvdata *drvdata); void tmc_enable_hw(struct tmc_drvdata *drvdata); void tmc_disable_hw(struct tmc_drvdata *drvdata); u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata); +int tmc_read_prepare_crashdata(struct tmc_drvdata *drvdata); /* ETB/ETF functions */ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata); @@ -371,6 +381,7 @@ void tmc_sg_table_sync_data_range(struct tmc_sg_table *table, u64 offset, u64 size); ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, u64 offset, size_t len, char **bufpp); + static inline unsigned long tmc_sg_table_buf_size(struct tmc_sg_table *sg_table) { From 942bbeeaf84491645b843680555b50ff9e336711 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:16 +0530 Subject: [PATCH 0387/2157] coresight: tmc: Stop trace capture on FlIn Configure TMC ETR and ETF to flush and stop trace capture on FlIn event based on sysfs attribute, /sys/bus/coresight/devices/tmc_etXn/stop_on_flush. Signed-off-by: Linu Cherian Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-7-lcherian@marvell.com --- .../hwtracing/coresight/coresight-tmc-core.c | 31 +++++++++++++++++++ .../hwtracing/coresight/coresight-tmc-etf.c | 12 ++++--- .../hwtracing/coresight/coresight-tmc-etr.c | 12 ++++--- drivers/hwtracing/coresight/coresight-tmc.h | 2 ++ 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 045d37606750..d5122e12daa7 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -532,9 +532,40 @@ static ssize_t buffer_size_store(struct device *dev, static DEVICE_ATTR_RW(buffer_size); +static ssize_t stop_on_flush_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + return sprintf(buf, "%#x\n", drvdata->stop_on_flush); +} + +static ssize_t stop_on_flush_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + u8 val; + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtou8(buf, 0, &val); + if (ret) + return ret; + if (val) + drvdata->stop_on_flush = true; + else + drvdata->stop_on_flush = false; + + return size; +} + +static DEVICE_ATTR_RW(stop_on_flush); + + static struct attribute *coresight_tmc_attrs[] = { &dev_attr_trigger_cntr.attr, &dev_attr_buffer_size.attr, + &dev_attr_stop_on_flush.attr, NULL, }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 0f9155a10ac2..bdc3a7e9ba06 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -19,6 +19,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, static int __tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { int rc = 0; + u32 ffcr; CS_UNLOCK(drvdata->base); @@ -32,10 +33,12 @@ static int __tmc_etb_enable_hw(struct tmc_drvdata *drvdata) } writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); - writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | - TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | - TMC_FFCR_TRIGON_TRIGIN, - drvdata->base + TMC_FFCR); + + ffcr = TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | TMC_FFCR_FON_FLIN | + TMC_FFCR_FON_TRIG_EVT | TMC_FFCR_TRIGON_TRIGIN; + if (drvdata->stop_on_flush) + ffcr |= TMC_FFCR_STOP_ON_FLUSH; + writel_relaxed(ffcr, drvdata->base + TMC_FFCR); writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); tmc_enable_hw(drvdata); @@ -225,7 +228,6 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) used = true; drvdata->buf = buf; } - ret = tmc_etb_enable_hw(drvdata); if (!ret) { coresight_set_mode(csdev, CS_MODE_SYSFS); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index c92556e1b4db..eda7cdad0e2b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1060,7 +1060,7 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) static int __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { - u32 axictl, sts; + u32 axictl, sts, ffcr; struct etr_buf *etr_buf = drvdata->etr_buf; int rc = 0; @@ -1106,10 +1106,12 @@ static int __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) writel_relaxed(sts, drvdata->base + TMC_STS); } - writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | - TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | - TMC_FFCR_TRIGON_TRIGIN, - drvdata->base + TMC_FFCR); + ffcr = TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | TMC_FFCR_FON_FLIN | + TMC_FFCR_FON_TRIG_EVT | TMC_FFCR_TRIGON_TRIGIN; + if (drvdata->stop_on_flush) + ffcr |= TMC_FFCR_STOP_ON_FLUSH; + writel_relaxed(ffcr, drvdata->base + TMC_FFCR); + writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); tmc_enable_hw(drvdata); diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 3e2744af7e15..b48bc9a01cc0 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -220,6 +220,7 @@ struct tmc_resrv_buf { * @pid: Process ID of the process that owns the session that is using * this component. For example this would be the pid of the Perf * process. + * @stop_on_flush: Stop on flush trigger user configuration. * @buf: Snapshot of the trace data for ETF/ETB. * @etr_buf: details of buffer used in TMC-ETR * @len: size of the available trace for ETF/ETB. @@ -251,6 +252,7 @@ struct tmc_drvdata { spinlock_t spinlock; pid_t pid; bool reading; + bool stop_on_flush; union { char *buf; /* TMC ETB */ struct etr_buf *etr_buf; /* TMC ETR */ From 4b7e62627a38521e5b83dcc3a9d4ad3c18d7fd3f Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:17 +0530 Subject: [PATCH 0388/2157] coresight: config: Add preloaded configuration Add a preloaded configuration for generating external trigger on address match. This can be used by CTI and ETR blocks to stop trace capture on kernel panic. Kernel address for "panic" function is used as the default trigger address. This new configuration is available as, /sys/kernel/config/cs-syscfg/configurations/panicstop Signed-off-by: Linu Cherian Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-8-lcherian@marvell.com --- drivers/hwtracing/coresight/Makefile | 2 +- .../coresight/coresight-cfg-preload.c | 2 + .../coresight/coresight-cfg-preload.h | 2 + .../hwtracing/coresight/coresight-cfg-pstop.c | 83 +++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 drivers/hwtracing/coresight/coresight-cfg-pstop.c diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index 4ba478211b31..46ce7f39d05f 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -25,7 +25,7 @@ subdir-ccflags-y += $(condflags) obj-$(CONFIG_CORESIGHT) += coresight.o coresight-y := coresight-core.o coresight-etm-perf.o coresight-platform.o \ coresight-sysfs.o coresight-syscfg.o coresight-config.o \ - coresight-cfg-preload.o coresight-cfg-afdo.o \ + coresight-cfg-preload.o coresight-cfg-afdo.o coresight-cfg-pstop.o \ coresight-syscfg-configfs.o coresight-trace-id.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \ diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.c b/drivers/hwtracing/coresight/coresight-cfg-preload.c index e237a4edfa09..4980e68483c5 100644 --- a/drivers/hwtracing/coresight/coresight-cfg-preload.c +++ b/drivers/hwtracing/coresight/coresight-cfg-preload.c @@ -13,6 +13,7 @@ static struct cscfg_feature_desc *preload_feats[] = { #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) &strobe_etm4x, + &gen_etrig_etm4x, #endif NULL }; @@ -20,6 +21,7 @@ static struct cscfg_feature_desc *preload_feats[] = { static struct cscfg_config_desc *preload_cfgs[] = { #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) &afdo_etm4x, + &pstop_etm4x, #endif NULL }; diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.h b/drivers/hwtracing/coresight/coresight-cfg-preload.h index 21299e175477..291ba530a6a5 100644 --- a/drivers/hwtracing/coresight/coresight-cfg-preload.h +++ b/drivers/hwtracing/coresight/coresight-cfg-preload.h @@ -10,4 +10,6 @@ #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) extern struct cscfg_feature_desc strobe_etm4x; extern struct cscfg_config_desc afdo_etm4x; +extern struct cscfg_feature_desc gen_etrig_etm4x; +extern struct cscfg_config_desc pstop_etm4x; #endif diff --git a/drivers/hwtracing/coresight/coresight-cfg-pstop.c b/drivers/hwtracing/coresight/coresight-cfg-pstop.c new file mode 100644 index 000000000000..c2bfbd07bfaf --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-cfg-pstop.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(C) 2023 Marvell. + * Based on coresight-cfg-afdo.c + */ + +#include "coresight-config.h" + +/* ETMv4 includes and features */ +#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) +#include "coresight-etm4x-cfg.h" + +/* preload configurations and features */ + +/* preload in features for ETMv4 */ + +/* panic_stop feature */ +static struct cscfg_parameter_desc gen_etrig_params[] = { + { + .name = "address", + .value = (u64)panic, + }, +}; + +static struct cscfg_regval_desc gen_etrig_regs[] = { + /* resource selector */ + { + .type = CS_CFG_REG_TYPE_RESOURCE, + .offset = TRCRSCTLRn(2), + .hw_info = ETM4_CFG_RES_SEL, + .val32 = 0x40001, + }, + /* single address comparator */ + { + .type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_64BIT | + CS_CFG_REG_TYPE_VAL_PARAM, + .offset = TRCACVRn(0), + .val32 = 0x0, + }, + { + .type = CS_CFG_REG_TYPE_RESOURCE, + .offset = TRCACATRn(0), + .val64 = 0xf00, + }, + /* Driver external output[0] with comparator out */ + { + .type = CS_CFG_REG_TYPE_RESOURCE, + .offset = TRCEVENTCTL0R, + .val32 = 0x2, + }, + /* end of regs */ +}; + +struct cscfg_feature_desc gen_etrig_etm4x = { + .name = "gen_etrig", + .description = "Generate external trigger on address match\n" + "parameter \'address\': address of kernel address\n", + .match_flags = CS_CFG_MATCH_CLASS_SRC_ETM4, + .nr_params = ARRAY_SIZE(gen_etrig_params), + .params_desc = gen_etrig_params, + .nr_regs = ARRAY_SIZE(gen_etrig_regs), + .regs_desc = gen_etrig_regs, +}; + +/* create a panic stop configuration */ + +/* the total number of parameters in used features */ +#define PSTOP_NR_PARAMS ARRAY_SIZE(gen_etrig_params) + +static const char *pstop_ref_names[] = { + "gen_etrig", +}; + +struct cscfg_config_desc pstop_etm4x = { + .name = "panicstop", + .description = "Stop ETM on kernel panic\n", + .nr_feat_refs = ARRAY_SIZE(pstop_ref_names), + .feat_ref_names = pstop_ref_names, + .nr_total_params = PSTOP_NR_PARAMS, +}; + +/* end of ETM4x configurations */ +#endif /* IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) */ From b47d1fcd0d4b65c055d1e0b60fdb33c9b93b7bc5 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 12 Feb 2025 17:19:18 +0530 Subject: [PATCH 0389/2157] Documentation: coresight: Panic support Add documentation on using coresight during panic and watchdog. Signed-off-by: Linu Cherian Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250212114918.548431-9-lcherian@marvell.com --- Documentation/trace/coresight/panic.rst | 362 ++++++++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 Documentation/trace/coresight/panic.rst diff --git a/Documentation/trace/coresight/panic.rst b/Documentation/trace/coresight/panic.rst new file mode 100644 index 000000000000..a58aa914c241 --- /dev/null +++ b/Documentation/trace/coresight/panic.rst @@ -0,0 +1,362 @@ +=================================================== +Using Coresight for Kernel panic and Watchdog reset +=================================================== + +Introduction +------------ +This documentation is about using Linux coresight trace support to +debug kernel panic and watchdog reset scenarios. + +Coresight trace during Kernel panic +----------------------------------- +From the coresight driver point of view, addressing the kernel panic +situation has four main requirements. + +a. Support for allocation of trace buffer pages from reserved memory area. + Platform can advertise this using a new device tree property added to + relevant coresight nodes. + +b. Support for stopping coresight blocks at the time of panic + +c. Saving required metadata in the specified format + +d. Support for reading trace data captured at the time of panic + +Allocation of trace buffer pages from reserved RAM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A new optional device tree property "memory-region" is added to the +Coresight TMC device nodes, that would give the base address and size of trace +buffer. + +Static allocation of trace buffers would ensure that both IOMMU enabled +and disabled cases are handled. Also, platforms that support persistent +RAM will allow users to read trace data in the subsequent boot without +booting the crashdump kernel. + +Note: +For ETR sink devices, this reserved region will be used for both trace +capture and trace data retrieval. +For ETF sink devices, internal SRAM would be used for trace capture, +and they would be synced to reserved region for retrieval. + + +Disabling coresight blocks at the time of panic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In order to avoid the situation of losing relevant trace data after a +kernel panic, it would be desirable to stop the coresight blocks at the +time of panic. + +This can be achieved by configuring the comparator, CTI and sink +devices as below:: + + Trigger on panic + Comparator --->External out --->CTI -->External In---->ETR/ETF stop + +Saving metadata at the time of kernel panic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Coresight metadata involves all additional data that are required for a +successful trace decode in addition to the trace data. This involves +ETR/ETF/ETB register snapshot etc. + +A new optional device property "memory-region" is added to +the ETR/ETF/ETB device nodes for this. + +Reading trace data captured at the time of panic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Trace data captured at the time of panic, can be read from rebooted kernel +or from crashdump kernel using a special device file /dev/crash_tmc_xxx. +This device file is created only when there is a valid crashdata available. + +General flow of trace capture and decode incase of kernel panic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +1. Enable source and sink on all the cores using the sysfs interface. + ETR sinks should have trace buffers allocated from reserved memory, + by selecting "resrv" buffer mode from sysfs. + +2. Run relevant tests. + +3. On a kernel panic, all coresight blocks are disabled, necessary + metadata is synced by kernel panic handler. + + System would eventually reboot or boot a crashdump kernel. + +4. For platforms that supports crashdump kernel, raw trace data can be + dumped using the coresight sysfs interface from the crashdump kernel + itself. Persistent RAM is not a requirement in this case. + +5. For platforms that supports persistent RAM, trace data can be dumped + using the coresight sysfs interface in the subsequent Linux boot. + Crashdump kernel is not a requirement in this case. Persistent RAM + ensures that trace data is intact across reboot. + +Coresight trace during Watchdog reset +------------------------------------- +The main difference between addressing the watchdog reset and kernel panic +case are below, + +a. Saving coresight metadata need to be taken care by the + SCP(system control processor) firmware in the specified format, + instead of kernel. + +b. Reserved memory region given by firmware for trace buffer and metadata + has to be in persistent RAM. + Note: This is a requirement for watchdog reset case but optional + in kernel panic case. + +Watchdog reset can be supported only on platforms that meet the above +two requirements. + +Sample commands for testing a Kernel panic case with ETR sink +------------------------------------------------------------- + +1. Boot Linux kernel with "crash_kexec_post_notifiers" added to the kernel + bootargs. This is mandatory if the user would like to read the tracedata + from the crashdump kernel. + +2. Enable the preloaded ETM configuration:: + + #echo 1 > /sys/kernel/config/cs-syscfg/configurations/panicstop/enable + +3. Configure CTI using sysfs interface:: + + #./cti_setup.sh + + #cat cti_setup.sh + + + cd /sys/bus/coresight/devices/ + + ap_cti_config () { + #ETM trig out[0] trigger to Channel 0 + echo 0 4 > channels/trigin_attach + } + + etf_cti_config () { + #ETF Flush in trigger from Channel 0 + echo 0 1 > channels/trigout_attach + echo 1 > channels/trig_filter_enable + } + + etr_cti_config () { + #ETR Flush in from Channel 0 + echo 0 1 > channels/trigout_attach + echo 1 > channels/trig_filter_enable + } + + ctidevs=`find . -name "cti*"` + + for i in $ctidevs + do + cd $i + + connection=`find . -name "ete*"` + if [ ! -z "$connection" ] + then + echo "AP CTI config for $i" + ap_cti_config + fi + + connection=`find . -name "tmc_etf*"` + if [ ! -z "$connection" ] + then + echo "ETF CTI config for $i" + etf_cti_config + fi + + connection=`find . -name "tmc_etr*"` + if [ ! -z "$connection" ] + then + echo "ETR CTI config for $i" + etr_cti_config + fi + + cd .. + done + +Note: CTI connections are SOC specific and hence the above script is +added just for reference. + +4. Choose reserved buffer mode for ETR buffer:: + + #echo "resrv" > /sys/bus/coresight/devices/tmc_etr0/buf_mode_preferred + +5. Enable stop on flush trigger configuration:: + + #echo 1 > /sys/bus/coresight/devices/tmc_etr0/stop_on_flush + +6. Start Coresight tracing on cores 1 and 2 using sysfs interface + +7. Run some application on core 1:: + + #taskset -c 1 dd if=/dev/urandom of=/dev/null & + +8. Invoke kernel panic on core 2:: + + #echo 1 > /proc/sys/kernel/panic + #taskset -c 2 echo c > /proc/sysrq-trigger + +9. From rebooted kernel or crashdump kernel, read crashdata:: + + #dd if=/dev/crash_tmc_etr0 of=/trace/cstrace.bin + +10. Run opencsd decoder tools/scripts to generate the instruction trace. + +Sample instruction trace dump +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Core1 dump:: + + A etm4_enable_hw: ffff800008ae1dd4 + CONTEXT EL2 etm4_enable_hw: ffff800008ae1dd4 + I etm4_enable_hw: ffff800008ae1dd4: + d503201f nop + I etm4_enable_hw: ffff800008ae1dd8: + d503201f nop + I etm4_enable_hw: ffff800008ae1ddc: + d503201f nop + I etm4_enable_hw: ffff800008ae1de0: + d503201f nop + I etm4_enable_hw: ffff800008ae1de4: + d503201f nop + I etm4_enable_hw: ffff800008ae1de8: + d503233f paciasp + I etm4_enable_hw: ffff800008ae1dec: + a9be7bfd stp x29, x30, [sp, #-32]! + I etm4_enable_hw: ffff800008ae1df0: + 910003fd mov x29, sp + I etm4_enable_hw: ffff800008ae1df4: + a90153f3 stp x19, x20, [sp, #16] + I etm4_enable_hw: ffff800008ae1df8: + 2a0003f4 mov w20, w0 + I etm4_enable_hw: ffff800008ae1dfc: + 900085b3 adrp x19, ffff800009b95000 + I etm4_enable_hw: ffff800008ae1e00: + 910f4273 add x19, x19, #0x3d0 + I etm4_enable_hw: ffff800008ae1e04: + f8747a60 ldr x0, [x19, x20, lsl #3] + E etm4_enable_hw: ffff800008ae1e08: + b4000140 cbz x0, ffff800008ae1e30 + I 149.039572921 etm4_enable_hw: ffff800008ae1e30: + a94153f3 ldp x19, x20, [sp, #16] + I 149.039572921 etm4_enable_hw: ffff800008ae1e34: + 52800000 mov w0, #0x0 // #0 + I 149.039572921 etm4_enable_hw: ffff800008ae1e38: + a8c27bfd ldp x29, x30, [sp], #32 + + ..snip + + 149.052324811 chacha_block_generic: ffff800008642d80: + 9100a3e0 add x0, + I 149.052324811 chacha_block_generic: ffff800008642d84: + b86178a2 ldr w2, [x5, x1, lsl #2] + I 149.052324811 chacha_block_generic: ffff800008642d88: + 8b010803 add x3, x0, x1, lsl #2 + I 149.052324811 chacha_block_generic: ffff800008642d8c: + b85fc063 ldur w3, [x3, #-4] + I 149.052324811 chacha_block_generic: ffff800008642d90: + 0b030042 add w2, w2, w3 + I 149.052324811 chacha_block_generic: ffff800008642d94: + b8217882 str w2, [x4, x1, lsl #2] + I 149.052324811 chacha_block_generic: ffff800008642d98: + 91000421 add x1, x1, #0x1 + I 149.052324811 chacha_block_generic: ffff800008642d9c: + f100443f cmp x1, #0x11 + + +Core 2 dump:: + + A etm4_enable_hw: ffff800008ae1dd4 + CONTEXT EL2 etm4_enable_hw: ffff800008ae1dd4 + I etm4_enable_hw: ffff800008ae1dd4: + d503201f nop + I etm4_enable_hw: ffff800008ae1dd8: + d503201f nop + I etm4_enable_hw: ffff800008ae1ddc: + d503201f nop + I etm4_enable_hw: ffff800008ae1de0: + d503201f nop + I etm4_enable_hw: ffff800008ae1de4: + d503201f nop + I etm4_enable_hw: ffff800008ae1de8: + d503233f paciasp + I etm4_enable_hw: ffff800008ae1dec: + a9be7bfd stp x29, x30, [sp, #-32]! + I etm4_enable_hw: ffff800008ae1df0: + 910003fd mov x29, sp + I etm4_enable_hw: ffff800008ae1df4: + a90153f3 stp x19, x20, [sp, #16] + I etm4_enable_hw: ffff800008ae1df8: + 2a0003f4 mov w20, w0 + I etm4_enable_hw: ffff800008ae1dfc: + 900085b3 adrp x19, ffff800009b95000 + I etm4_enable_hw: ffff800008ae1e00: + 910f4273 add x19, x19, #0x3d0 + I etm4_enable_hw: ffff800008ae1e04: + f8747a60 ldr x0, [x19, x20, lsl #3] + E etm4_enable_hw: ffff800008ae1e08: + b4000140 cbz x0, ffff800008ae1e30 + I 149.046243445 etm4_enable_hw: ffff800008ae1e30: + a94153f3 ldp x19, x20, [sp, #16] + I 149.046243445 etm4_enable_hw: ffff800008ae1e34: + 52800000 mov w0, #0x0 // #0 + I 149.046243445 etm4_enable_hw: ffff800008ae1e38: + a8c27bfd ldp x29, x30, [sp], #32 + I 149.046243445 etm4_enable_hw: ffff800008ae1e3c: + d50323bf autiasp + E 149.046243445 etm4_enable_hw: ffff800008ae1e40: + d65f03c0 ret + A ete_sysreg_write: ffff800008adfa18 + + ..snip + + I 149.05422547 panic: ffff800008096300: + a90363f7 stp x23, x24, [sp, #48] + I 149.05422547 panic: ffff800008096304: + 6b00003f cmp w1, w0 + I 149.05422547 panic: ffff800008096308: + 3a411804 ccmn w0, #0x1, #0x4, ne // ne = any + N 149.05422547 panic: ffff80000809630c: + 540001e0 b.eq ffff800008096348 // b.none + I 149.05422547 panic: ffff800008096310: + f90023f9 str x25, [sp, #64] + E 149.05422547 panic: ffff800008096314: + 97fe44ef bl ffff8000080276d0 + A panic: ffff80000809634c + I 149.05422547 panic: ffff80000809634c: + 910102d5 add x21, x22, #0x40 + I 149.05422547 panic: ffff800008096350: + 52800020 mov w0, #0x1 // #1 + E 149.05422547 panic: ffff800008096354: + 94166b8b bl ffff800008631180 + N 149.054225518 bust_spinlocks: ffff800008631180: + 340000c0 cbz w0, ffff800008631198 + I 149.054225518 bust_spinlocks: ffff800008631184: + f000a321 adrp x1, ffff800009a98000 + I 149.054225518 bust_spinlocks: ffff800008631188: + b9405c20 ldr w0, [x1, #92] + I 149.054225518 bust_spinlocks: ffff80000863118c: + 11000400 add w0, w0, #0x1 + I 149.054225518 bust_spinlocks: ffff800008631190: + b9005c20 str w0, [x1, #92] + E 149.054225518 bust_spinlocks: ffff800008631194: + d65f03c0 ret + A panic: ffff800008096358 + +Perf based testing +------------------ + +Starting perf session +~~~~~~~~~~~~~~~~~~~~~ +ETF:: + + perf record -e cs_etm/panicstop,@tmc_etf1/ -C 1 + perf record -e cs_etm/panicstop,@tmc_etf2/ -C 2 + +ETR:: + + perf record -e cs_etm/panicstop,@tmc_etr0/ -C 1,2 + +Reading trace data after panic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Same sysfs based method explained above can be used to retrieve and +decode the trace data after the reboot on kernel panic. From 66e80e2f21762bdaa56a4d63c79e5aca5f6bd93c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 10 Feb 2025 16:33:36 -0600 Subject: [PATCH 0390/2157] iio: resolver: ad2s1210: use bitmap_write Replace bitmap array access with bitmap_write. Accessing the bitmap array directly is not recommended and now there is a helper function that can be used. Reviewed-by: Linus Walleij Signed-off-by: David Lechner Link: https://patch.msgid.link/20250210-gpio-set-array-helper-v3-10-d6a673674da8@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/resolver/ad2s1210.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iio/resolver/ad2s1210.c b/drivers/iio/resolver/ad2s1210.c index 7f18df790157..ab860cedecd1 100644 --- a/drivers/iio/resolver/ad2s1210.c +++ b/drivers/iio/resolver/ad2s1210.c @@ -46,6 +46,7 @@ */ #include +#include #include #include #include @@ -175,12 +176,12 @@ struct ad2s1210_state { static int ad2s1210_set_mode(struct ad2s1210_state *st, enum ad2s1210_mode mode) { struct gpio_descs *gpios = st->mode_gpios; - DECLARE_BITMAP(bitmap, 2); + DECLARE_BITMAP(bitmap, 2) = { }; if (!gpios) return mode == st->fixed_mode ? 0 : -EOPNOTSUPP; - bitmap[0] = mode; + bitmap_write(bitmap, mode, 0, 2); return gpiod_multi_set_value_cansleep(gpios, bitmap); } @@ -1426,7 +1427,7 @@ static int ad2s1210_setup_gpios(struct ad2s1210_state *st) struct device *dev = &st->sdev->dev; struct gpio_descs *resolution_gpios; struct gpio_desc *reset_gpio; - DECLARE_BITMAP(bitmap, 2); + DECLARE_BITMAP(bitmap, 2) = { }; int ret; /* should not be sampling on startup */ @@ -1470,7 +1471,7 @@ static int ad2s1210_setup_gpios(struct ad2s1210_state *st) return dev_err_probe(dev, -EINVAL, "requires exactly 2 resolution-gpios\n"); - bitmap[0] = st->resolution; + bitmap_write(bitmap, st->resolution, 0, 2); ret = gpiod_multi_set_value_cansleep(resolution_gpios, bitmap); if (ret < 0) From 3b29bcee8f6f703a5952b85fc2ffcbcfb0862db4 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:45 +0200 Subject: [PATCH 0391/2157] iio: imu: adis: Add custom ops struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a custom ops struct letting users define custom read and write functions. Some adis devices might define a completely different spi protocol from the one used in the default implementation. Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Nuno Sá Signed-off-by: Nuno Sá Signed-off-by: Robert Budai Link: https://patch.msgid.link/20250217105753.605465-2-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 16 +++++++++++++--- include/linux/iio/imu/adis.h | 28 +++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 494171844812..54915c7a3e76 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -223,13 +223,13 @@ int __adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask, int ret; u32 __val; - ret = __adis_read_reg(adis, reg, &__val, size); + ret = adis->ops->read(adis, reg, &__val, size); if (ret) return ret; __val = (__val & ~mask) | (val & mask); - return __adis_write_reg(adis, reg, __val, size); + return adis->ops->write(adis, reg, __val, size); } EXPORT_SYMBOL_NS_GPL(__adis_update_bits_base, "IIO_ADISLIB"); @@ -468,7 +468,7 @@ int adis_single_conversion(struct iio_dev *indio_dev, guard(mutex)(&adis->state_lock); - ret = __adis_read_reg(adis, chan->address, &uval, + ret = adis->ops->read(adis, chan->address, &uval, chan->scan_type.storagebits / 8); if (ret) return ret; @@ -488,6 +488,11 @@ int adis_single_conversion(struct iio_dev *indio_dev, } EXPORT_SYMBOL_NS_GPL(adis_single_conversion, "IIO_ADISLIB"); +static const struct adis_ops adis_default_ops = { + .read = __adis_read_reg, + .write = __adis_write_reg, +}; + /** * adis_init() - Initialize adis device structure * @adis: The adis device @@ -517,6 +522,11 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->spi = spi; adis->data = data; + if (!adis->ops->write && !adis->ops->read) + adis->ops = &adis_default_ops; + else if (!adis->ops->write || !adis->ops->read) + return -EINVAL; + iio_device_set_drvdata(indio_dev, adis); if (data->has_paging) { diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 4bb98d9731de..89cfa75ae9ea 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -94,6 +94,18 @@ struct adis_data { unsigned int burst_max_speed_hz; }; +/** + * struct adis_ops: Custom ops for adis devices. + * @write: Custom spi write implementation. + * @read: Custom spi read implementation. + */ +struct adis_ops { + int (*write)(struct adis *adis, unsigned int reg, unsigned int value, + unsigned int size); + int (*read)(struct adis *adis, unsigned int reg, unsigned int *value, + unsigned int size); +}; + /** * struct adis - ADIS device instance data * @spi: Reference to SPI device which owns this ADIS IIO device @@ -101,6 +113,7 @@ struct adis_data { * @data: ADIS chip variant specific data * @burst_extra_len: Burst extra length. Should only be used by devices that can * dynamically change their burst mode length. + * @ops: ops struct for custom read and write functions * @state_lock: Lock used by the device to protect state * @msg: SPI message object * @xfer: SPI transfer objects to be used for a @msg @@ -116,6 +129,7 @@ struct adis { const struct adis_data *data; unsigned int burst_extra_len; + const struct adis_ops *ops; /** * The state_lock is meant to be used during operations that require * a sequence of SPI R/W in order to protect the SPI transfer @@ -168,7 +182,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, u8 val) { - return __adis_write_reg(adis, reg, val, 1); + return adis->ops->write(adis, reg, val, 1); } /** @@ -180,7 +194,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, u16 val) { - return __adis_write_reg(adis, reg, val, 2); + return adis->ops->write(adis, reg, val, 2); } /** @@ -192,7 +206,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, u32 val) { - return __adis_write_reg(adis, reg, val, 4); + return adis->ops->write(adis, reg, val, 4); } /** @@ -207,7 +221,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 2); + ret = adis->ops->read(adis, reg, &tmp, 2); if (ret == 0) *val = tmp; @@ -226,7 +240,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 4); + ret = adis->ops->read(adis, reg, &tmp, 4); if (ret == 0) *val = tmp; @@ -244,7 +258,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, unsigned int val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_write_reg(adis, reg, val, size); + return adis->ops->write(adis, reg, val, size); } /** @@ -258,7 +272,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_read_reg(adis, reg, val, size); + return adis->ops->read(adis, reg, val, size); } /** From 7f15d7a7d12dbcb4a846ca19d9565e0c11ea6694 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:46 +0200 Subject: [PATCH 0392/2157] iio: imu: adis: Add reset to custom ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows the custom definition of reset functionality for adis object. It is useful in cases where the driver does not need to sleep after the reset since it is handled by the library. Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Nuno Sá Signed-off-by: Nuno Sá Signed-off-by: Robert Budai Link: https://patch.msgid.link/20250217105753.605465-3-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 5 +++-- include/linux/iio/imu/adis.h | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 54915c7a3e76..84344f052fb7 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -491,6 +491,7 @@ EXPORT_SYMBOL_NS_GPL(adis_single_conversion, "IIO_ADISLIB"); static const struct adis_ops adis_default_ops = { .read = __adis_read_reg, .write = __adis_write_reg, + .reset = __adis_reset, }; /** @@ -522,9 +523,9 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->spi = spi; adis->data = data; - if (!adis->ops->write && !adis->ops->read) + if (!adis->ops->write && !adis->ops->read && !adis->ops->reset) adis->ops = &adis_default_ops; - else if (!adis->ops->write || !adis->ops->read) + else if (!adis->ops->write || !adis->ops->read || !adis->ops->reset) return -EINVAL; iio_device_set_drvdata(indio_dev, adis); diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 89cfa75ae9ea..52652f51db2e 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -98,12 +98,15 @@ struct adis_data { * struct adis_ops: Custom ops for adis devices. * @write: Custom spi write implementation. * @read: Custom spi read implementation. + * @reset: Custom sw reset implementation. The custom implementation does not + * need to sleep after the reset. It's done by the library already. */ struct adis_ops { int (*write)(struct adis *adis, unsigned int reg, unsigned int value, unsigned int size); int (*read)(struct adis *adis, unsigned int reg, unsigned int *value, unsigned int size); + int (*reset)(struct adis *adis); }; /** From 9fa98d94131806cae0441d05213d36da480d7389 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:47 +0200 Subject: [PATCH 0393/2157] iio: imu: adis: Add DIAG_STAT register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices may have more than 16 bits of status. This patch allows the user to specify the size of the DIAG_STAT register. It defaults to 2 if not specified. This is mainly for backward compatibility. Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Nuno Sá Signed-off-by: Nuno Sá Signed-off-by: Robert Budai Link: https://patch.msgid.link/20250217105753.605465-4-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 18 +++++++++++++++--- include/linux/iio/imu/adis.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 84344f052fb7..1c646c36aeb1 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -304,11 +304,20 @@ EXPORT_SYMBOL_NS(__adis_enable_irq, "IIO_ADISLIB"); */ int __adis_check_status(struct adis *adis) { - u16 status; + unsigned int status; + int diag_stat_bits; + u16 status_16; int ret; int i; - ret = __adis_read_reg_16(adis, adis->data->diag_stat_reg, &status); + if (adis->data->diag_stat_size) { + ret = adis->ops->read(adis, adis->data->diag_stat_reg, &status, + adis->data->diag_stat_size); + } else { + ret = __adis_read_reg_16(adis, adis->data->diag_stat_reg, + &status_16); + status = status_16; + } if (ret) return ret; @@ -317,7 +326,10 @@ int __adis_check_status(struct adis *adis) if (status == 0) return 0; - for (i = 0; i < 16; ++i) { + diag_stat_bits = BITS_PER_BYTE * (adis->data->diag_stat_size ? + adis->data->diag_stat_size : 2); + + for (i = 0; i < diag_stat_bits; ++i) { if (status & BIT(i)) { dev_err(&adis->spi->dev, "%s.\n", adis->data->status_error_msgs[i]); diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 52652f51db2e..aa160511e265 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -44,6 +44,8 @@ struct adis_timeout { * @glob_cmd_reg: Register address of the GLOB_CMD register * @msc_ctrl_reg: Register address of the MSC_CTRL register * @diag_stat_reg: Register address of the DIAG_STAT register + * @diag_stat_size: Length (in bytes) of the DIAG_STAT register. If 0 the + * default length is 2 bytes long. * @prod_id_reg: Register address of the PROD_ID register * @prod_id: Product ID code that should be expected when reading @prod_id_reg * @self_test_mask: Bitmask of supported self-test operations @@ -70,6 +72,7 @@ struct adis_data { unsigned int glob_cmd_reg; unsigned int msc_ctrl_reg; unsigned int diag_stat_reg; + unsigned int diag_stat_size; unsigned int prod_id_reg; unsigned int prod_id; From 6e507f996c47dc6999ae8d58205efa78231f18c9 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:48 +0200 Subject: [PATCH 0394/2157] dt-bindings: iio: Add adis16550 bindings Document the ADIS16550 device devicetree bindings. Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Signed-off-by: Robert Budai Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250217105753.605465-5-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/imu/adi,adis16550.yaml | 74 +++++++++++++++++++ MAINTAINERS | 10 +++ 2 files changed, 84 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml diff --git a/Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml b/Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml new file mode 100644 index 000000000000..a4c273c7a67f --- /dev/null +++ b/Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/imu/adi,adis16550.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices ADIS16550 and similar IMUs + +maintainers: + - Nuno Sa + - Ramona Gradinariu + - Antoniu Miclaus + - Robert Budai + +properties: + compatible: + enum: + - adi,adis16550 + + reg: + maxItems: 1 + + spi-cpha: true + + spi-cpol: true + + spi-max-frequency: + maximum: 15000000 + + vdd-supply: true + + interrupts: + maxItems: 1 + + reset-gpios: + description: + Active low RESET pin. + maxItems: 1 + + clocks: + description: If not provided, then the internal clock is used. + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - spi-cpha + - spi-cpol + - spi-max-frequency + - vdd-supply + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +additionalProperties: false + +examples: + - | + #include + spi { + #address-cells = <1>; + #size-cells = <0>; + imu@0 { + compatible = "adi,adis16550"; + reg = <0>; + spi-max-frequency = <15000000>; + spi-cpol; + spi-cpha; + vdd-supply = <&vdd>; + interrupts = <4 IRQ_TYPE_EDGE_FALLING>; + interrupt-parent = <&gpio>; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index bd04375ab4a2..d8ca113b83ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1497,6 +1497,16 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml F: drivers/iio/imu/adis16475.c +ANALOG DEVICES INC ADIS16550 DRIVER +M: Nuno Sa +M: Ramona Gradinariu +M: Antoniu Miclaus +M: Robert Budai +L: linux-iio@vger.kernel.org +S: Supported +W: https://ez.analog.com/linux-software-drivers +F: Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml + ANALOG DEVICES INC ADM1177 DRIVER M: Michael Hennerich L: linux-hwmon@vger.kernel.org From 354fd6e86fac60b7c1ce2e6c83d4e6bf8af95f59 Mon Sep 17 00:00:00 2001 From: Fiona Behrens Date: Mon, 17 Feb 2025 21:58:14 +0100 Subject: [PATCH 0395/2157] rust: io: rename `io::Io` accessors Rename the I/O accessors provided by `Io` to encode the type as number instead of letter. This is in preparation for Port I/O support to use a trait for generic accessors. Add a `c_fn` argument to the accessor generation macro to translate between rust and C names. Suggested-by: Danilo Krummrich Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/PIO.20support/near/499460541 Signed-off-by: Fiona Behrens Acked-by: Danilo Krummrich Acked-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250217-io-generic-rename-v1-1-06d97a9e3179@kloenk.dev Signed-off-by: Greg Kroah-Hartman --- rust/kernel/io.rs | 66 ++++++++++++++++----------------- samples/rust/rust_driver_pci.rs | 12 +++--- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index d4a73e52e3ee..72d80a6f131e 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -98,9 +98,9 @@ pub fn maxsize(&self) -> usize { ///# fn no_run() -> Result<(), Error> { /// // SAFETY: Invalid usage for example purposes. /// let iomem = unsafe { IoMem::<{ core::mem::size_of::() }>::new(0xBAAAAAAD)? }; -/// iomem.writel(0x42, 0x0); -/// assert!(iomem.try_writel(0x42, 0x0).is_ok()); -/// assert!(iomem.try_writel(0x42, 0x4).is_err()); +/// iomem.write32(0x42, 0x0); +/// assert!(iomem.try_write32(0x42, 0x0).is_ok()); +/// assert!(iomem.try_write32(0x42, 0x4).is_err()); /// # Ok(()) /// # } /// ``` @@ -108,7 +108,7 @@ pub fn maxsize(&self) -> usize { pub struct Io(IoRaw); macro_rules! define_read { - ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $c_fn:ident -> $type_name:ty) => { /// Read IO data from a given offset known at compile time. /// /// Bound checks are performed on compile time, hence if the offset is not known at compile @@ -119,7 +119,7 @@ pub fn $name(&self, offset: usize) -> $type_name { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(addr as _) } + unsafe { bindings::$c_fn(addr as _) } } /// Read IO data from a given offset. @@ -131,13 +131,13 @@ pub fn $try_name(&self, offset: usize) -> Result<$type_name> { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$name(addr as _) }) + Ok(unsafe { bindings::$c_fn(addr as _) }) } }; } macro_rules! define_write { - ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $c_fn:ident <- $type_name:ty) => { /// Write IO data from a given offset known at compile time. /// /// Bound checks are performed on compile time, hence if the offset is not known at compile @@ -148,7 +148,7 @@ pub fn $name(&self, value: $type_name, offset: usize) { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(value, addr as _, ) } + unsafe { bindings::$c_fn(value, addr as _, ) } } /// Write IO data from a given offset. @@ -160,7 +160,7 @@ pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(value, addr as _) } + unsafe { bindings::$c_fn(value, addr as _) } Ok(()) } }; @@ -218,43 +218,43 @@ fn io_addr_assert(&self, offset: usize) -> usize { self.addr() + offset } - define_read!(readb, try_readb, u8); - define_read!(readw, try_readw, u16); - define_read!(readl, try_readl, u32); + define_read!(read8, try_read8, readb -> u8); + define_read!(read16, try_read16, readw -> u16); + define_read!(read32, try_read32, readl -> u32); define_read!( #[cfg(CONFIG_64BIT)] - readq, - try_readq, - u64 + read64, + try_read64, + readq -> u64 ); - define_read!(readb_relaxed, try_readb_relaxed, u8); - define_read!(readw_relaxed, try_readw_relaxed, u16); - define_read!(readl_relaxed, try_readl_relaxed, u32); + define_read!(read8_relaxed, try_read8_relaxed, readb_relaxed -> u8); + define_read!(read16_relaxed, try_read16_relaxed, readw_relaxed -> u16); + define_read!(read32_relaxed, try_read32_relaxed, readl_relaxed -> u32); define_read!( #[cfg(CONFIG_64BIT)] - readq_relaxed, - try_readq_relaxed, - u64 + read64_relaxed, + try_read64_relaxed, + readq_relaxed -> u64 ); - define_write!(writeb, try_writeb, u8); - define_write!(writew, try_writew, u16); - define_write!(writel, try_writel, u32); + define_write!(write8, try_write8, writeb <- u8); + define_write!(write16, try_write16, writew <- u16); + define_write!(write32, try_write32, writel <- u32); define_write!( #[cfg(CONFIG_64BIT)] - writeq, - try_writeq, - u64 + write64, + try_write64, + writeq <- u64 ); - define_write!(writeb_relaxed, try_writeb_relaxed, u8); - define_write!(writew_relaxed, try_writew_relaxed, u16); - define_write!(writel_relaxed, try_writel_relaxed, u32); + define_write!(write8_relaxed, try_write8_relaxed, writeb_relaxed <- u8); + define_write!(write16_relaxed, try_write16_relaxed, writew_relaxed <- u16); + define_write!(write32_relaxed, try_write32_relaxed, writel_relaxed <- u32); define_write!( #[cfg(CONFIG_64BIT)] - writeq_relaxed, - try_writeq_relaxed, - u64 + write64_relaxed, + try_write64_relaxed, + writeq_relaxed <- u64 ); } diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index 1fb6e44f3395..ddc52db71a82 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -43,17 +43,17 @@ struct SampleDriver { impl SampleDriver { fn testdev(index: &TestIndex, bar: &Bar0) -> Result { // Select the test. - bar.writeb(index.0, Regs::TEST); + bar.write8(index.0, Regs::TEST); - let offset = u32::from_le(bar.readl(Regs::OFFSET)) as usize; - let data = bar.readb(Regs::DATA); + let offset = u32::from_le(bar.read32(Regs::OFFSET)) as usize; + let data = bar.read8(Regs::DATA); // Write `data` to `offset` to increase `count` by one. // - // Note that we need `try_writeb`, since `offset` can't be checked at compile-time. - bar.try_writeb(data, offset)?; + // Note that we need `try_write8`, since `offset` can't be checked at compile-time. + bar.try_write8(data, offset)?; - Ok(bar.readl(Regs::COUNT)) + Ok(bar.read32(Regs::COUNT)) } } From cba6bdfd79291b82422b6e6b4036ebc076d5625f Mon Sep 17 00:00:00 2001 From: Mrinmay Sarkar Date: Thu, 5 Dec 2024 12:24:19 +0530 Subject: [PATCH 0396/2157] bus: mhi: host: pci_generic: Add support for SA8775P endpoint Add MHI controller config for SA8775P endpoint. SA8775P supports IP_SW usecase only. Hence use separate configuration to enable IP_SW channels. Signed-off-by: Mrinmay Sarkar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241205065422.2515086-2-quic_msarkar@quicinc.com [mani: updated commit message and removed 'modem' reference in config] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index c41119b9079f..efe43592e170 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -297,6 +297,19 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = { .sideband_wake = false, }; +static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = { + MHI_CHANNEL_CONFIG_UL(46, "IP_SW0", 2048, 1), + MHI_CHANNEL_CONFIG_DL(47, "IP_SW0", 2048, 2), +}; + +static struct mhi_event_config mhi_qcom_sa8775p_events[] = { + /* first ring is control+data ring */ + MHI_EVENT_CONFIG_CTRL(0, 64), + /* Software channels dedicated event ring */ + MHI_EVENT_CONFIG_SW_DATA(1, 64), + MHI_EVENT_CONFIG_SW_DATA(2, 64), +}; + static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = { MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1), MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1), @@ -327,6 +340,15 @@ static struct mhi_event_config modem_qcom_v1_mhi_events[] = { MHI_EVENT_CONFIG_HW_DATA(5, 2048, 101) }; +static const struct mhi_controller_config mhi_qcom_sa8775p_config = { + .max_channels = 128, + .timeout_ms = 8000, + .num_channels = ARRAY_SIZE(mhi_qcom_sa8775p_channels), + .ch_cfg = mhi_qcom_sa8775p_channels, + .num_events = ARRAY_SIZE(mhi_qcom_sa8775p_events), + .event_cfg = mhi_qcom_sa8775p_events, +}; + static const struct mhi_controller_config modem_qcom_v2_mhiv_config = { .max_channels = 128, .timeout_ms = 8000, @@ -346,6 +368,16 @@ static const struct mhi_controller_config modem_qcom_v1_mhiv_config = { .event_cfg = modem_qcom_v1_mhi_events, }; +static const struct mhi_pci_dev_info mhi_qcom_sa8775p_info = { + .name = "qcom-sa8775p", + .edl_trigger = false, + .config = &mhi_qcom_sa8775p_config, + .bar_num = MHI_PCI_DEFAULT_BAR_NUM, + .dma_data_width = 32, + .mru_default = 32768, + .sideband_wake = false, +}; + static const struct mhi_pci_dev_info mhi_qcom_sdx75_info = { .name = "qcom-sdx75m", .fw = "qcom/sdx75m/xbl.elf", @@ -772,6 +804,8 @@ static const struct mhi_pci_dev_info mhi_netprisma_fcun69_info = { /* Keep the list sorted based on the PID. New VID should be added as the last entry */ static const struct pci_device_id mhi_pci_id_table[] = { + {PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0116), + .driver_data = (kernel_ulong_t) &mhi_qcom_sa8775p_info }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, PCI_VENDOR_ID_QCOM, 0x010c), From 72cef52b353cc693d71ad37d80237d975f9951d9 Mon Sep 17 00:00:00 2001 From: Alan Borzeszkowski Date: Tue, 21 Jan 2025 19:04:01 +0100 Subject: [PATCH 0397/2157] thunderbolt: Make tb_tunnel_alloc_usb3() error paths consistent with the rest Make the tb_tunnel_alloc_usb3() error codepaths consistent with the DisplayPort and PCIe counterparts. No functional changes. Signed-off-by: Alan Borzeszkowski Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 8229a6fbda5a..072f7e80263e 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -2224,19 +2224,15 @@ struct tb_tunnel *tb_tunnel_alloc_usb3(struct tb *tb, struct tb_port *up, path = tb_path_alloc(tb, down, TB_USB3_HOPID, up, TB_USB3_HOPID, 0, "USB3 Down"); - if (!path) { - tb_tunnel_put(tunnel); - return NULL; - } + if (!path) + goto err_free; tb_usb3_init_path(path); tunnel->paths[TB_USB3_PATH_DOWN] = path; path = tb_path_alloc(tb, up, TB_USB3_HOPID, down, TB_USB3_HOPID, 0, "USB3 Up"); - if (!path) { - tb_tunnel_put(tunnel); - return NULL; - } + if (!path) + goto err_free; tb_usb3_init_path(path); tunnel->paths[TB_USB3_PATH_UP] = path; @@ -2253,6 +2249,10 @@ struct tb_tunnel *tb_tunnel_alloc_usb3(struct tb *tb, struct tb_port *up, } return tunnel; + +err_free: + tb_tunnel_put(tunnel); + return NULL; } /** From 2ecdbebefea7ecb007c89c5f998125e9a86afce3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Feb 2025 12:38:57 +0100 Subject: [PATCH 0398/2157] coresight: catu: Constify amba_id table 'struct amba_id' table is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250222-coresight-const-arm-id-v1-1-69a377cd098b@linaro.org --- drivers/hwtracing/coresight/coresight-catu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 3378bb77e6b4..fa170c966bc3 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -594,7 +594,7 @@ static void catu_remove(struct amba_device *adev) __catu_remove(&adev->dev); } -static struct amba_id catu_ids[] = { +static const struct amba_id catu_ids[] = { CS_AMBA_ID(0x000bb9ee), {}, }; From 7b6d52e83705197372e850674d7b1eb4d0bb445c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Feb 2025 12:38:58 +0100 Subject: [PATCH 0399/2157] coresight: tpda: Constify amba_id table 'struct amba_id' table is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250222-coresight-const-arm-id-v1-2-69a377cd098b@linaro.org --- drivers/hwtracing/coresight/coresight-tpda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c index 189a4abc2561..573da8427428 100644 --- a/drivers/hwtracing/coresight/coresight-tpda.c +++ b/drivers/hwtracing/coresight/coresight-tpda.c @@ -331,7 +331,7 @@ static void tpda_remove(struct amba_device *adev) * Different TPDA has different periph id. * The difference is 0-7 bits' value. So ignore 0-7 bits. */ -static struct amba_id tpda_ids[] = { +static const struct amba_id tpda_ids[] = { { .id = 0x000f0f00, .mask = 0x000fff00, From 2197cbc22c1be0cbbd361956fdb14335d5b018fd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Feb 2025 12:38:59 +0100 Subject: [PATCH 0400/2157] coresight: tpdm: Constify amba_id table 'struct amba_id' table is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250222-coresight-const-arm-id-v1-3-69a377cd098b@linaro.org --- drivers/hwtracing/coresight/coresight-tpdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c index c38f9701665e..afc4c18dd35d 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.c +++ b/drivers/hwtracing/coresight/coresight-tpdm.c @@ -1305,7 +1305,7 @@ static void tpdm_remove(struct amba_device *adev) * Different TPDM has different periph id. * The difference is 0-7 bits' value. So ignore 0-7 bits. */ -static struct amba_id tpdm_ids[] = { +static const struct amba_id tpdm_ids[] = { { .id = 0x001f0e00, .mask = 0x00ffff00, From 00204ae3d6712ee053353920e3ce2b00c35ef75b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 10 Feb 2025 16:14:22 +0100 Subject: [PATCH 0401/2157] dm-integrity: set ti->error on memory allocation failure The dm-integrity target didn't set the error string when memory allocation failed. This patch fixes it. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-integrity.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ee9f7cecd78e..f41e64f1dab2 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5081,16 +5081,19 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); if (!ic->recalc_bitmap) { + ti->error = "Could not allocate memory for bitmap"; r = -ENOMEM; goto bad; } ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); if (!ic->may_write_bitmap) { + ti->error = "Could not allocate memory for bitmap"; r = -ENOMEM; goto bad; } ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL); if (!ic->bbs) { + ti->error = "Could not allocate memory for bitmap"; r = -ENOMEM; goto bad; } From 3be1f253935b1f1f3a445451ae8358781e1f62f6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 10 Feb 2025 16:15:22 +0100 Subject: [PATCH 0402/2157] dm-bufio: remove unused return value The return value of the function "forget_buffer" is not tested, so we can remove it. Signed-off-by: Mikulas Patocka --- drivers/md/dm-bufio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index aab8240429b0..9c8ed65cd87e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -2234,7 +2234,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c } EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); -static bool forget_buffer(struct dm_bufio_client *c, sector_t block) +static void forget_buffer(struct dm_bufio_client *c, sector_t block) { struct dm_buffer *b; @@ -2249,8 +2249,6 @@ static bool forget_buffer(struct dm_bufio_client *c, sector_t block) cache_put_and_wake(c, b); } } - - return b ? true : false; } /* From 51ba14fb368e5fadadcca803ec64a51f3edbbdc1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 17 Feb 2025 22:36:02 +0100 Subject: [PATCH 0403/2157] dm-verity: do forward error correction on metadata I/O errors Do forward error correction if metadata I/O fails. Signed-off-by: Mikulas Patocka Reviewed-by: Sami Tolvanen --- drivers/md/dm-verity-target.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e86c1431b108..fc5ae123670b 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -311,7 +311,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) { data = dm_bufio_get(v->bufio, hash_block, &buf); - if (data == NULL) { + if (IS_ERR_OR_NULL(data)) { /* * In tasklet and the hash was not in the bufio cache. * Return early and resume execution from a work-queue @@ -324,8 +324,24 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, &buf, bio->bi_ioprio); } - if (IS_ERR(data)) - return PTR_ERR(data); + if (IS_ERR(data)) { + if (skip_unverified) + return 1; + r = PTR_ERR(data); + data = dm_bufio_new(v->bufio, hash_block, &buf); + if (IS_ERR(data)) + return r; + if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA, + hash_block, data) == 0) { + aux = dm_bufio_get_aux_data(buf); + aux->hash_verified = 1; + goto release_ok; + } else { + dm_bufio_release(buf); + dm_bufio_forget(v->bufio, hash_block); + return r; + } + } aux = dm_bufio_get_aux_data(buf); @@ -366,6 +382,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, } } +release_ok: data += offset; memcpy(want_digest, data, v->digest_size); r = 0; @@ -1761,7 +1778,7 @@ static struct target_type verity_target = { .name = "verity", /* Note: the LSMs depend on the singleton and immutable features */ .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, From ff3f7115f4ff365ce759cd4e67e6653e9acfca86 Mon Sep 17 00:00:00 2001 From: Sweet Tea Dorminy Date: Wed, 19 Feb 2025 17:55:39 -0500 Subject: [PATCH 0404/2157] dm vdo: remove remaining ring references Lists are the new rings, so update all remaining references to rings to talk about lists. Signed-off-by: Sweet Tea Dorminy Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/block-map.c | 2 +- drivers/md/dm-vdo/dedupe.c | 20 ++++++++++---------- drivers/md/dm-vdo/packer.h | 2 +- drivers/md/dm-vdo/priority-table.c | 2 +- drivers/md/dm-vdo/recovery-journal.h | 6 +++--- drivers/md/dm-vdo/slab-depot.c | 10 +++++----- drivers/md/dm-vdo/wait-queue.c | 2 +- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c index 1f7cdd837ff9..baf683cabb1b 100644 --- a/drivers/md/dm-vdo/block-map.c +++ b/drivers/md/dm-vdo/block-map.c @@ -451,7 +451,7 @@ static struct page_info * __must_check find_page(struct vdo_page_cache *cache, * select_lru_page() - Determine which page is least recently used. * * Picks the least recently used from among the non-busy entries at the front of each of the lru - * ring. Since whenever we mark a page busy we also put it to the end of the ring it is unlikely + * list. Since whenever we mark a page busy we also put it to the end of the list it is unlikely * that the entries at the front are busy unless the queue is very short, but not impossible. * * Return: A pointer to the info structure for a relevant page, or NULL if no such page can be diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index b6f8e2dc7729..00fff21b8058 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -226,7 +226,7 @@ struct hash_lock { * A list containing the data VIOs sharing this lock, all having the same record name and * data block contents, linked by their hash_lock_node fields. */ - struct list_head duplicate_ring; + struct list_head duplicate_vios; /* The number of data_vios sharing this lock instance */ data_vio_count_t reference_count; @@ -343,7 +343,7 @@ static void return_hash_lock_to_pool(struct hash_zone *zone, struct hash_lock *l { memset(lock, 0, sizeof(*lock)); INIT_LIST_HEAD(&lock->pool_node); - INIT_LIST_HEAD(&lock->duplicate_ring); + INIT_LIST_HEAD(&lock->duplicate_vios); vdo_waitq_init(&lock->waiters); list_add_tail(&lock->pool_node, &zone->lock_pool); } @@ -441,7 +441,7 @@ static void set_hash_lock(struct data_vio *data_vio, struct hash_lock *new_lock) VDO_ASSERT_LOG_ONLY(data_vio->hash_zone != NULL, "must have a hash zone when holding a hash lock"); VDO_ASSERT_LOG_ONLY(!list_empty(&data_vio->hash_lock_entry), - "must be on a hash lock ring when holding a hash lock"); + "must be on a hash lock list when holding a hash lock"); VDO_ASSERT_LOG_ONLY(old_lock->reference_count > 0, "hash lock reference must be counted"); @@ -464,10 +464,10 @@ static void set_hash_lock(struct data_vio *data_vio, struct hash_lock *new_lock) if (new_lock != NULL) { /* - * Keep all data_vios sharing the lock on a ring since they can complete in any + * Keep all data_vios sharing the lock on a list since they can complete in any * order and we'll always need a pointer to one to compare data. */ - list_move_tail(&data_vio->hash_lock_entry, &new_lock->duplicate_ring); + list_move_tail(&data_vio->hash_lock_entry, &new_lock->duplicate_vios); new_lock->reference_count += 1; if (new_lock->max_references < new_lock->reference_count) new_lock->max_references = new_lock->reference_count; @@ -1789,10 +1789,10 @@ static bool is_hash_collision(struct hash_lock *lock, struct data_vio *candidate struct hash_zone *zone; bool collides; - if (list_empty(&lock->duplicate_ring)) + if (list_empty(&lock->duplicate_vios)) return false; - lock_holder = list_first_entry(&lock->duplicate_ring, struct data_vio, + lock_holder = list_first_entry(&lock->duplicate_vios, struct data_vio, hash_lock_entry); zone = candidate->hash_zone; collides = !blocks_equal(lock_holder->vio.data, candidate->vio.data); @@ -1815,7 +1815,7 @@ static inline int assert_hash_lock_preconditions(const struct data_vio *data_vio return result; result = VDO_ASSERT(list_empty(&data_vio->hash_lock_entry), - "must not already be a member of a hash lock ring"); + "must not already be a member of a hash lock list"); if (result != VDO_SUCCESS) return result; @@ -1942,8 +1942,8 @@ void vdo_release_hash_lock(struct data_vio *data_vio) "returned hash lock must not be in use with state %s", get_hash_lock_state_name(lock->state)); VDO_ASSERT_LOG_ONLY(list_empty(&lock->pool_node), - "hash lock returned to zone must not be in a pool ring"); - VDO_ASSERT_LOG_ONLY(list_empty(&lock->duplicate_ring), + "hash lock returned to zone must not be in a pool list"); + VDO_ASSERT_LOG_ONLY(list_empty(&lock->duplicate_vios), "hash lock returned to zone must not reference DataVIOs"); return_hash_lock_to_pool(zone, lock); diff --git a/drivers/md/dm-vdo/packer.h b/drivers/md/dm-vdo/packer.h index 0f3be44710b5..8c8d6892582d 100644 --- a/drivers/md/dm-vdo/packer.h +++ b/drivers/md/dm-vdo/packer.h @@ -46,7 +46,7 @@ struct compressed_block { /* * Each packer_bin holds an incomplete batch of data_vios that only partially fill a compressed - * block. The bins are kept in a ring sorted by the amount of unused space so the first bin with + * block. The bins are kept in a list sorted by the amount of unused space so the first bin with * enough space to hold a newly-compressed data_vio can easily be found. When the bin fills up or * is flushed, the first uncanceled data_vio in the bin is selected to be the agent for that bin. * Upon entering the packer, each data_vio already has its compressed data in the first slot of the diff --git a/drivers/md/dm-vdo/priority-table.c b/drivers/md/dm-vdo/priority-table.c index 42d3d8d0e4b5..9bae8256ba4e 100644 --- a/drivers/md/dm-vdo/priority-table.c +++ b/drivers/md/dm-vdo/priority-table.c @@ -199,7 +199,7 @@ void vdo_priority_table_remove(struct priority_table *table, struct list_head *e /* * Remove the entry from the bucket list, remembering a pointer to another entry in the - * ring. + * list. */ next_entry = entry->next; list_del_init(entry); diff --git a/drivers/md/dm-vdo/recovery-journal.h b/drivers/md/dm-vdo/recovery-journal.h index 899071173015..25e7ec6d19f6 100644 --- a/drivers/md/dm-vdo/recovery-journal.h +++ b/drivers/md/dm-vdo/recovery-journal.h @@ -43,9 +43,9 @@ * has a vio which is used to commit that block to disk. The vio's data is the on-disk * representation of the journal block. In addition each in-memory block has a buffer which is used * to accumulate entries while a partial commit of the block is in progress. In-memory blocks are - * kept on two rings. Free blocks live on the 'free_tail_blocks' ring. When a block becomes active - * (see below) it is moved to the 'active_tail_blocks' ring. When a block is fully committed, it is - * moved back to the 'free_tail_blocks' ring. + * kept on two lists. Free blocks live on the 'free_tail_blocks' list. When a block becomes active + * (see below) it is moved to the 'active_tail_blocks' list. When a block is fully committed, it is + * moved back to the 'free_tail_blocks' list. * * When entries are added to the journal, they are added to the active in-memory block, as * indicated by the 'active_block' field. If the caller wishes to wait for the entry to be diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index c4d3bbdf5b69..1ab01b6f263a 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -139,7 +139,7 @@ static bool is_slab_journal_blank(const struct vdo_slab *slab) } /** - * mark_slab_journal_dirty() - Put a slab journal on the dirty ring of its allocator in the correct + * mark_slab_journal_dirty() - Put a slab journal on the dirty list of its allocator in the correct * order. * @journal: The journal to be marked dirty. * @lock: The recovery journal lock held by the slab journal. @@ -821,7 +821,7 @@ static void commit_tail(struct slab_journal *journal) /* * Since we are about to commit the tail block, this journal no longer needs to be on the - * ring of journals which the recovery journal might ask to commit. + * list of journals which the recovery journal might ask to commit. */ mark_slab_journal_clean(journal); @@ -1371,7 +1371,7 @@ static unsigned int calculate_slab_priority(struct vdo_slab *slab) static void prioritize_slab(struct vdo_slab *slab) { VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry), - "a slab must not already be on a ring when prioritizing"); + "a slab must not already be on a list when prioritizing"); slab->priority = calculate_slab_priority(slab); vdo_priority_table_enqueue(slab->allocator->prioritized_slabs, slab->priority, &slab->allocq_entry); @@ -2562,7 +2562,7 @@ static void queue_slab(struct vdo_slab *slab) int result; VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry), - "a requeued slab must not already be on a ring"); + "a requeued slab must not already be on a list"); if (vdo_is_read_only(allocator->depot->vdo)) return; @@ -3297,7 +3297,7 @@ int vdo_release_block_reference(struct block_allocator *allocator, * This is a min_heap callback function orders slab_status structures using the 'is_clean' field as * the primary key and the 'emptiness' field as the secondary key. * - * Slabs need to be pushed onto the rings in the same order they are to be popped off. Popping + * Slabs need to be pushed onto the lists in the same order they are to be popped off. Popping * should always get the most empty first, so pushing should be from most empty to least empty. * Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements * before larger ones. diff --git a/drivers/md/dm-vdo/wait-queue.c b/drivers/md/dm-vdo/wait-queue.c index 6e1e739277ef..f81ed0cee2bf 100644 --- a/drivers/md/dm-vdo/wait-queue.c +++ b/drivers/md/dm-vdo/wait-queue.c @@ -34,7 +34,7 @@ void vdo_waitq_enqueue_waiter(struct vdo_wait_queue *waitq, struct vdo_waiter *w waitq->last_waiter->next_waiter = waiter; } - /* In both cases, the waiter we added to the ring becomes the last waiter. */ + /* In both cases, the waiter we added to the list becomes the last waiter. */ waitq->last_waiter = waiter; waitq->length += 1; } From dc8f646cd870671ccebf896b35433362252b850d Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Wed, 19 Feb 2025 18:00:19 -0500 Subject: [PATCH 0405/2157] dm vdo: rework processing of loaded refcount byte arrays Clear provisional refcount values and count free/allocated blocks in one integrated loop. Process 8 aligned bytes at a time instead of every byte individually. On an Intel i7-11850H this reduces the CPU time needed to process a loaded refcount block by a factor of about 5-6. On a large system the refcount loading may be the largest factor in device startup time. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/slab-depot.c | 109 +++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 1ab01b6f263a..f3d80ff7bef5 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -2164,23 +2164,6 @@ static void dirty_all_reference_blocks(struct vdo_slab *slab) dirty_block(&slab->reference_blocks[i]); } -/** - * clear_provisional_references() - Clear the provisional reference counts from a reference block. - * @block: The block to clear. - */ -static void clear_provisional_references(struct reference_block *block) -{ - vdo_refcount_t *counters = get_reference_counters_for_block(block); - block_count_t j; - - for (j = 0; j < COUNTS_PER_BLOCK; j++) { - if (counters[j] == PROVISIONAL_REFERENCE_COUNT) { - counters[j] = EMPTY_REFERENCE_COUNT; - block->allocated_count--; - } - } -} - static inline bool journal_points_equal(struct journal_point first, struct journal_point second) { @@ -2188,6 +2171,90 @@ static inline bool journal_points_equal(struct journal_point first, (first.entry_count == second.entry_count)); } +/** + * match_bytes() - Check an 8-byte word for bytes matching the value specified + * @input: A word to examine the bytes of + * @match: The byte value sought + * + * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise + */ +static inline u64 match_bytes(u64 input, u8 match) +{ + u64 temp = input ^ (match * 0x0101010101010101ULL); + /* top bit of each byte is set iff top bit of temp byte is clear; rest are 0 */ + u64 test_top_bits = ~temp & 0x8080808080808080ULL; + /* top bit of each byte is set iff low 7 bits of temp byte are clear; rest are useless */ + u64 test_low_bits = 0x8080808080808080ULL - (temp & 0x7f7f7f7f7f7f7f7fULL); + /* return 1 when both tests indicate temp byte is 0 */ + return (test_top_bits & test_low_bits) >> 7; +} + +/** + * count_valid_references() - Process a newly loaded refcount array + * @counters: the array of counters from a metadata block + * + * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't + * cleaned up at shutdown, changing them internally to "empty". + * + * Return: the number of blocks that are referenced (counters not "empty") + */ +static unsigned int count_valid_references(vdo_refcount_t *counters) +{ + u64 *words = (u64 *)counters; + /* It's easier to count occurrences of a specific byte than its absences. */ + unsigned int empty_count = 0; + /* For speed, we process 8 bytes at once. */ + unsigned int words_left = COUNTS_PER_BLOCK / sizeof(u64); + + /* + * Sanity check assumptions used for optimizing this code: Counters are bytes. The counter + * array is a multiple of the word size. + */ + BUILD_BUG_ON(sizeof(vdo_refcount_t) != 1); + BUILD_BUG_ON((COUNTS_PER_BLOCK % sizeof(u64)) != 0); + + while (words_left > 0) { + /* + * This is used effectively as 8 byte-size counters. Byte 0 counts how many words + * had the target value found in byte 0, etc. We just have to avoid overflow. + */ + u64 split_count = 0; + /* + * The counter "% 255" trick used below to fold split_count into empty_count + * imposes a limit of 254 bytes examined each iteration of the outer loop. We + * process a word at a time, so that limit gets rounded down to 31 u64 words. + */ + const unsigned int max_words_per_iteration = 254 / sizeof(u64); + unsigned int iter_words_left = min_t(unsigned int, words_left, + max_words_per_iteration); + + words_left -= iter_words_left; + + while (iter_words_left--) { + u64 word = *words; + u64 temp; + + /* First, if we have any provisional refcount values, clear them. */ + temp = match_bytes(word, PROVISIONAL_REFERENCE_COUNT); + if (temp) { + /* + * 'temp' has 0x01 bytes where 'word' has PROVISIONAL; this xor + * will alter just those bytes, changing PROVISIONAL to EMPTY. + */ + word ^= temp * (PROVISIONAL_REFERENCE_COUNT ^ EMPTY_REFERENCE_COUNT); + *words = word; + } + + /* Now count the EMPTY_REFERENCE_COUNT bytes, updating the 8 counters. */ + split_count += match_bytes(word, EMPTY_REFERENCE_COUNT); + words++; + } + empty_count += split_count % 255; + } + + return COUNTS_PER_BLOCK - empty_count; +} + /** * unpack_reference_block() - Unpack reference counts blocks into the internal memory structure. * @packed: The written reference block to be unpacked. @@ -2196,7 +2263,6 @@ static inline bool journal_points_equal(struct journal_point first, static void unpack_reference_block(struct packed_reference_block *packed, struct reference_block *block) { - block_count_t index; sector_count_t i; struct vdo_slab *slab = block->slab; vdo_refcount_t *counters = get_reference_counters_for_block(block); @@ -2222,11 +2288,7 @@ static void unpack_reference_block(struct packed_reference_block *packed, } } - block->allocated_count = 0; - for (index = 0; index < COUNTS_PER_BLOCK; index++) { - if (counters[index] != EMPTY_REFERENCE_COUNT) - block->allocated_count++; - } + block->allocated_count = count_valid_references(counters); } /** @@ -2247,7 +2309,6 @@ static void finish_reference_block_load(struct vdo_completion *completion) struct packed_reference_block *packed = (struct packed_reference_block *) data; unpack_reference_block(packed, block); - clear_provisional_references(block); slab->free_blocks -= block->allocated_count; } return_vio_to_pool(pooled); From 88f7f56d16f568f19e1a695af34a7f4a6ce537a6 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Thu, 20 Feb 2025 19:20:14 +0800 Subject: [PATCH 0406/2157] dm: fix unconditional IO throttle caused by REQ_PREFLUSH When a bio with REQ_PREFLUSH is submitted to dm, __send_empty_flush() generates a flush_bio with REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, which causes the flush_bio to be throttled by wbt_wait(). An example from v5.4, similar problem also exists in upstream: crash> bt 2091206 PID: 2091206 TASK: ffff2050df92a300 CPU: 109 COMMAND: "kworker/u260:0" #0 [ffff800084a2f7f0] __switch_to at ffff80004008aeb8 #1 [ffff800084a2f820] __schedule at ffff800040bfa0c4 #2 [ffff800084a2f880] schedule at ffff800040bfa4b4 #3 [ffff800084a2f8a0] io_schedule at ffff800040bfa9c4 #4 [ffff800084a2f8c0] rq_qos_wait at ffff8000405925bc #5 [ffff800084a2f940] wbt_wait at ffff8000405bb3a0 #6 [ffff800084a2f9a0] __rq_qos_throttle at ffff800040592254 #7 [ffff800084a2f9c0] blk_mq_make_request at ffff80004057cf38 #8 [ffff800084a2fa60] generic_make_request at ffff800040570138 #9 [ffff800084a2fae0] submit_bio at ffff8000405703b4 #10 [ffff800084a2fb50] xlog_write_iclog at ffff800001280834 [xfs] #11 [ffff800084a2fbb0] xlog_sync at ffff800001280c3c [xfs] #12 [ffff800084a2fbf0] xlog_state_release_iclog at ffff800001280df4 [xfs] #13 [ffff800084a2fc10] xlog_write at ffff80000128203c [xfs] #14 [ffff800084a2fcd0] xlog_cil_push at ffff8000012846dc [xfs] #15 [ffff800084a2fda0] xlog_cil_push_work at ffff800001284a2c [xfs] #16 [ffff800084a2fdb0] process_one_work at ffff800040111d08 #17 [ffff800084a2fe00] worker_thread at ffff8000401121cc #18 [ffff800084a2fe70] kthread at ffff800040118de4 After commit 2def2845cc33 ("xfs: don't allow log IO to be throttled"), the metadata submitted by xlog_write_iclog() should not be throttled. But due to the existence of the dm layer, throttling flush_bio indirectly causes the metadata bio to be throttled. Fix this by conditionally adding REQ_IDLE to flush_bio.bi_opf, which makes wbt_should_throttle() return false to avoid wbt_wait(). Signed-off-by: Jinliang Zheng Reviewed-by: Tianxiang Peng Reviewed-by: Hao Peng Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4d1e42891d24..5ab7574c0c76 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1540,14 +1540,18 @@ static void __send_empty_flush(struct clone_info *ci) { struct dm_table *t = ci->map; struct bio flush_bio; + blk_opf_t opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + + if ((ci->io->orig_bio->bi_opf & (REQ_IDLE | REQ_SYNC)) == + (REQ_IDLE | REQ_SYNC)) + opf |= REQ_IDLE; /* * Use an on-stack bio for this, it's safe since we don't * need to reference it after submit. It's just used as * the basis for the clone(s). */ - bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, - REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC); + bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, opf); ci->bio = &flush_bio; ci->sector_count = 0; From 8bd2fa086a04886798b505f28db4002525895203 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 8 Aug 2024 10:51:41 +0300 Subject: [PATCH 0407/2157] virtio: break and reset virtio devices on device_shutdown() Hongyu reported a hang on kexec in a VM. QEMU reported invalid memory accesses during the hang. Invalid read at addr 0x102877002, size 2, region '(null)', reason: rejected Invalid write at addr 0x102877A44, size 2, region '(null)', reason: rejected ... It was traced down to virtio-console. Kexec works fine if virtio-console is not in use. The issue is that virtio-console continues to write to the MMIO even after underlying virtio-pci device is reset. Additionally, Eric noticed that IOMMUs are reset before devices, if devices are not reset on shutdown they continue to poke at guest memory and get errors from the IOMMU. Some devices get wedged then. The problem can be solved by breaking all virtio devices on virtio bus shutdown, then resetting them. Reported-by: Eric Auger Reported-by: Hongyu Ning Message-ID: Tested-by: Eric Auger Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index ba37665188b5..150753c3b578 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -395,6 +395,34 @@ static const struct cpumask *virtio_irq_get_affinity(struct device *_d, return dev->config->get_vq_affinity(dev, irq_vec); } +static void virtio_dev_shutdown(struct device *_d) +{ + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + + /* + * Stop accesses to or from the device. + * We only need to do it if there's a driver - no accesses otherwise. + */ + if (!drv) + return; + + /* + * Some devices get wedged if you kick them after they are + * reset. Mark all vqs as broken to make sure we don't. + */ + virtio_break_device(dev); + /* + * Guarantee that any callback will see vq->broken as true. + */ + virtio_synchronize_cbs(dev); + /* + * As IOMMUs are reset on shutdown, this will block device access to memory. + * Some devices get wedged if this happens, so reset to make sure it does not. + */ + dev->config->reset(dev); +} + static const struct bus_type virtio_bus = { .name = "virtio", .match = virtio_dev_match, @@ -403,6 +431,7 @@ static const struct bus_type virtio_bus = { .probe = virtio_dev_probe, .remove = virtio_dev_remove, .irq_get_affinity = virtio_irq_get_affinity, + .shutdown = virtio_dev_shutdown, }; int __register_virtio_driver(struct virtio_driver *driver, struct module *owner) From 040b17ae0e15bd7100432e0a20c6557d463a8c9f Mon Sep 17 00:00:00 2001 From: Fiona Behrens Date: Mon, 24 Feb 2025 19:36:43 +0100 Subject: [PATCH 0408/2157] rust: io: fix devres test with new io accessor functions Fix doctest of `Devres` which still used `writeb` instead of `write8`. Fixes: 354fd6e86fac ("rust: io: rename `io::Io` accessors") Signed-off-by: Fiona Behrens Link: https://lore.kernel.org/r/20250224-rust-iowrite-read8-fix-v1-1-c6abee346897@kloenk.dev Signed-off-by: Greg Kroah-Hartman --- rust/kernel/devres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 942376f6f3af..ddb1ce4a78d9 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -92,7 +92,7 @@ struct DevresInner { /// let devres = Devres::new(&dev, iomem, GFP_KERNEL)?; /// /// let res = devres.try_access().ok_or(ENXIO)?; -/// res.writel(0x42, 0x0); +/// res.write8(0x42, 0x0); /// # Ok(()) /// # } /// ``` From ae376910f52b815003d433243bee93ca000537c0 Mon Sep 17 00:00:00 2001 From: Yufeng Wang Date: Mon, 13 Jan 2025 18:03:00 +0800 Subject: [PATCH 0409/2157] tools/virtio: Add DMA_MAPPING_ERROR and sg_dma_len api define for virtio test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when we build tools/virtio, meet below error information. cc -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register -pthread -c -o virtio_ring.o ../../drivers/virtio/virtio_ring.c ../../drivers/virtio/virtio_ring.c: in function 'vring_need_unmap_buffer': ../../drivers/virtio/virtio_ring.c:294:54: error:'DMA_MAPPING_ERROR'Undeclared (first use within this function) 294 | return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR); | ^~~~~~~~~~~~~~~~~ ../../drivers/virtio/virtio_ring.c:294:54: Note: Each undeclared identifier is only reported once within the function it appears in ../../drivers/virtio/virtio_ring.c: in function 'vring_map_one_sg': ../../drivers/virtio/virtio_ring.c:369:24: error:Implicit declaration function'sg_dma_len' [-Wimplicit-function-declaration] 369 | *len = sg_dma_len(sg); | ^~~~~~~~~~ ../../drivers/virtio/virtio_ring.c: in function'virtqueue_add_desc_split': ../../drivers/virtio/virtio_ring.c:518:37: error:'DMA_MAPPING_ERROR'Undeclared (first use within this function) 518 | extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; | ^~~~~~~~~~~~~~~~~ ../../drivers/virtio/virtio_ring.c: in function'virtqueue_add_indirect_packed': ../../drivers/virtio/virtio_ring.c:1370:61: error: 'DMA_MAPPING_ERROR'Undeclared (first use within this function) 1370 | extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; | ^~~~~~~~~~~~~~~~~ ../../drivers/virtio/virtio_ring.c: in function'virtqueue_add_packed': ../../drivers/virtio/virtio_ring.c:1535:41: error:'DMA_MAPPING_ERROR'Undeclared (first use within this function) 1535 | DMA_MAPPING_ERROR : addr; | ^~~~~~~~~~~~~~~~~ to fix, add DMA_MAPPING_ERROR define for virtio test. Fixes: c7e1b422afac ("virtio_ring: perform premapped operations based on per-buffer") Signed-off-by: Yufeng Wang Message-Id: <20250113100300.174382-1-wangyufeng@kylinos.cn> Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 822ecaa8e4df..095958461788 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -31,6 +31,7 @@ enum dma_data_direction { #define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) #define sg_dma_address(sg) (0) +#define sg_dma_len(sg) (0) #define dma_need_sync(v, a) (0) #define dma_unmap_single_attrs(d, a, s, r, t) do { \ (void)(d); (void)(a); (void)(s); (void)(r); (void)(t); \ @@ -43,4 +44,16 @@ enum dma_data_direction { } while (0) #define dma_max_mapping_size(...) SIZE_MAX +/* + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. + */ +#define DMA_MAPPING_ERROR (~(dma_addr_t)0) + #endif From 83dc0370f915b9ad996387c141399615627e32b6 Mon Sep 17 00:00:00 2001 From: Yufeng Wang Date: Tue, 14 Jan 2025 11:36:35 +0800 Subject: [PATCH 0410/2157] tools: virtio/linux/compiler.h: Add data_race() define. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port over the definition of data_race() so we can build tools/virtio. cc -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register -pthread -c -o virtio_ring.o ../../drivers/virtio/virtio_ring.c ../../drivers/virtio/virtio_ring.c: in function'vring_interrupt': ../../drivers/virtio/virtio_ring.c:2711:17: error:Implicit declaration function'data_race' [-Wimplicit-function-declaration] 2711 | data_race(vq->event_triggered = true); | ^~~~~~~~~ Signed-off-by: Yufeng Wang Message-Id: <20250114033635.20623-1-wangyufeng@kylinos.cn> Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/compiler.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h index 1f3a15b954b9..204ef0e9f542 100644 --- a/tools/virtio/linux/compiler.h +++ b/tools/virtio/linux/compiler.h @@ -10,4 +10,29 @@ #define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) #define __aligned(x) __attribute((__aligned__(x))) + +/** + * data_race - mark an expression as containing intentional data races + * + * This data_race() macro is useful for situations in which data races + * should be forgiven. One example is diagnostic code that accesses + * shared variables but is not a part of the core synchronization design. + * For example, if accesses to a given variable are protected by a lock, + * except for diagnostic code, then the accesses under the lock should + * be plain C-language accesses and those in the diagnostic code should + * use data_race(). This way, KCSAN will complain if buggy lockless + * accesses to that variable are introduced, even if the buggy accesses + * are protected by READ_ONCE() or WRITE_ONCE(). + * + * This macro *does not* affect normal code generation, but is a hint + * to tooling that data races here are to be ignored. If the access must + * be atomic *and* KCSAN should ignore the access, use both data_race() + * and READ_ONCE(), for example, data_race(READ_ONCE(x)). + */ +#define data_race(expr) \ +({ \ + __auto_type __v = (expr); \ + __v; \ +}) + #endif From ec05544c858fef45bc00738c2ced196ed91be611 Mon Sep 17 00:00:00 2001 From: Yufeng Wang Date: Tue, 14 Jan 2025 14:47:26 +0800 Subject: [PATCH 0411/2157] tools: virtio/linux/module.h add MODULE_DESCRIPTION() define. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when we build tools/virtio, meet below error information. cc -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register -pthread -c -o virtio_ring.o ../../drivers/virtio/virtio_ring.c ../../drivers/virtio/virtio_ring.c:3276:20: error:expected declaration specifiers or ‘...’ before string constant 3276 | MODULE_DESCRIPTION("Virtio ring implementation"); | to fix, add MODULE_DESCRIPTION() define for virtio test. Fixes: ab0727f3ddb8 ("virtio: add missing MODULE_DESCRIPTION() macros") Signed-off-by: Yufeng Wang Message-Id: <20250114064726.33079-1-wangyufeng@kylinos.cn> Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/module.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h index 9dfa96fea2b2..b91681fc1571 100644 --- a/tools/virtio/linux/module.h +++ b/tools/virtio/linux/module.h @@ -5,3 +5,10 @@ static __attribute__((unused)) const char *__MODULE_LICENSE_name = \ __MODULE_LICENSE_value +#ifndef MODULE_AUTHOR +#define MODULE_AUTHOR(x) +#endif + +#ifndef MODULE_DESCRIPTION +#define MODULE_DESCRIPTION(x) +#endif From 5dd639a1646ef5fe8f4bf270fad47c5c3755b9b6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 29 Jan 2025 15:09:22 -0600 Subject: [PATCH 0412/2157] vhost-scsi: Fix handling of multiple calls to vhost_scsi_set_endpoint If vhost_scsi_set_endpoint is called multiple times without a vhost_scsi_clear_endpoint between them, we can hit multiple bugs found by Haoran Zhang: 1. Use-after-free when no tpgs are found: This fixes a use after free that occurs when vhost_scsi_set_endpoint is called more than once and calls after the first call do not find any tpgs to add to the vs_tpg. When vhost_scsi_set_endpoint first finds tpgs to add to the vs_tpg array match=true, so we will do: vhost_vq_set_backend(vq, vs_tpg); ... kfree(vs->vs_tpg); vs->vs_tpg = vs_tpg; If vhost_scsi_set_endpoint is called again and no tpgs are found match=false so we skip the vhost_vq_set_backend call leaving the pointer to the vs_tpg we then free via: kfree(vs->vs_tpg); vs->vs_tpg = vs_tpg; If a scsi request is then sent we do: vhost_scsi_handle_vq -> vhost_scsi_get_req -> vhost_vq_get_backend which sees the vs_tpg we just did a kfree on. 2. Tpg dir removal hang: This patch fixes an issue where we cannot remove a LIO/target layer tpg (and structs above it like the target) dir due to the refcount dropping to -1. The problem is that if vhost_scsi_set_endpoint detects a tpg is already in the vs->vs_tpg array or if the tpg has been removed so target_depend_item fails, the undepend goto handler will do target_undepend_item on all tpgs in the vs_tpg array dropping their refcount to 0. At this time vs_tpg contains both the tpgs we have added in the current vhost_scsi_set_endpoint call as well as tpgs we added in previous calls which are also in vs->vs_tpg. Later, when vhost_scsi_clear_endpoint runs it will do target_undepend_item on all the tpgs in the vs->vs_tpg which will drop their refcount to -1. Userspace will then not be able to remove the tpg and will hang when it tries to do rmdir on the tpg dir. 3. Tpg leak: This fixes a bug where we can leak tpgs and cause them to be un-removable because the target name is overwritten when vhost_scsi_set_endpoint is called multiple times but with different target names. The bug occurs if a user has called VHOST_SCSI_SET_ENDPOINT and setup a vhost-scsi device to target/tpg mapping, then calls VHOST_SCSI_SET_ENDPOINT again with a new target name that has tpgs we haven't seen before (target1 has tpg1 but target2 has tpg2). When this happens we don't teardown the old target tpg mapping and just overwrite the target name and the vs->vs_tpg array. Later when we do vhost_scsi_clear_endpoint, we are passed in either target1 or target2's name and we will only match that target's tpgs when we loop over the vs->vs_tpg. We will then return from the function without doing target_undepend_item on the tpgs. Because of all these bugs, it looks like being able to call vhost_scsi_set_endpoint multiple times was never supported. The major user, QEMU, already has checks to prevent this use case. So to fix the issues, this patch prevents vhost_scsi_set_endpoint from being called if it's already successfully added tpgs. To add, remove or change the tpg config or target name, you must do a vhost_scsi_clear_endpoint first. Fixes: 25b98b64e284 ("vhost scsi: alloc cmds per vq instead of session") Fixes: 4f7f46d32c98 ("tcm_vhost: Use vq->private_data to indicate if the endpoint is setup") Reported-by: Haoran Zhang Closes: https://lore.kernel.org/virtualization/e418a5ee-45ca-4d18-9b5d-6f8b6b1add8e@oracle.com/T/#me6c0041ce376677419b9b2563494172a01487ecb Signed-off-by: Mike Christie Reviewed-by: Stefan Hajnoczi Message-Id: <20250129210922.121533-1-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefano Garzarella --- drivers/vhost/scsi.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 718fa4e0b31e..7aeff435c1d8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1699,14 +1699,19 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, } } + if (vs->vs_tpg) { + pr_err("vhost-scsi endpoint already set for %s.\n", + vs->vs_vhost_wwpn); + ret = -EEXIST; + goto out; + } + len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET; vs_tpg = kzalloc(len, GFP_KERNEL); if (!vs_tpg) { ret = -ENOMEM; goto out; } - if (vs->vs_tpg) - memcpy(vs_tpg, vs->vs_tpg, len); mutex_lock(&vhost_scsi_mutex); list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) { @@ -1722,12 +1727,6 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, tv_tport = tpg->tport; if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { - if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { - mutex_unlock(&tpg->tv_tpg_mutex); - mutex_unlock(&vhost_scsi_mutex); - ret = -EEXIST; - goto undepend; - } /* * In order to ensure individual vhost-scsi configfs * groups cannot be removed while in use by vhost ioctl, @@ -1774,15 +1773,15 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, } ret = 0; } else { - ret = -EEXIST; + ret = -ENODEV; + goto free_tpg; } /* - * Act as synchronize_rcu to make sure access to - * old vs->vs_tpg is finished. + * Act as synchronize_rcu to make sure requests after this point + * see a fully setup device. */ vhost_scsi_flush(vs); - kfree(vs->vs_tpg); vs->vs_tpg = vs_tpg; goto out; @@ -1802,6 +1801,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, target_undepend_item(&tpg->se_tpg.tpg_group.cg_item); } } +free_tpg: kfree(vs_tpg); out: mutex_unlock(&vs->dev.mutex); @@ -1904,6 +1904,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vhost_scsi_flush(vs); kfree(vs->vs_tpg); vs->vs_tpg = NULL; + memset(vs->vs_vhost_wwpn, 0, sizeof(vs->vs_vhost_wwpn)); WARN_ON(vs->vs_events_nr); mutex_unlock(&vs->dev.mutex); return 0; From 439252e167ac45a5d46f573aac1da7d8f3e051ad Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Tue, 4 Feb 2025 11:31:27 -0600 Subject: [PATCH 0413/2157] vdpa/mlx5: Fix mlx5_vdpa_get_config() endianness on big-endian machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mlx5_vdpa_dev_add() doesn’t initialize mvdev->actual_features. It’s initialized later by mlx5_vdpa_set_driver_features(). However, mlx5_vdpa_get_config() depends on the VIRTIO_F_VERSION_1 flag in actual_features, to return data with correct endianness. When it’s called before mlx5_vdpa_set_driver_features(), the data are incorrectly returned as big-endian on big-endian machines, while QEMU then interprets them as little-endian. The fix is to initialize this VIRTIO_F_VERSION_1 as early as possible, especially considering that mlx5_vdpa_dev_add() insists on this flag to always be set anyway. Signed-off-by: Konstantin Shkolnyy Message-Id: <20250204173127.166673-1-kshk@linux.ibm.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Dragos Tatulea Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 36099047560d..cccc49a08a1a 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3884,6 +3884,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->mvdev.max_vqs = max_vqs; mvdev = &ndev->mvdev; mvdev->mdev = mdev; + /* cpu_to_mlx5vdpa16() below depends on this flag */ + mvdev->actual_features = + (device_features & BIT_ULL(VIRTIO_F_VERSION_1)); ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); From a6097e0a54a5c24f8d577ffecbc35289ae281c2e Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 20 Feb 2025 21:37:33 +0200 Subject: [PATCH 0414/2157] vdpa/mlx5: Fix oversized null mkey longer than 32bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit create_user_mr() has correct code to count the number of null keys used to fill in a hole for the memory map. However, fill_indir() does not follow the same to cap the range up to the 1GB limit correspondingly. Fill in more null keys for the gaps in between, so that null keys are correctly populated. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Cc: stable@vger.kernel.org Reported-by: Cong Meng Signed-off-by: Si-Wei Liu Signed-off-by: Dragos Tatulea Acked-by: Eugenio Pérez Message-Id: <20250220193732.521462-2-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 8455f08f5d40..61424342c096 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -190,9 +190,12 @@ static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, v klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); preve = dmr->end; } else { + u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE); + klm->key = cpu_to_be32(mvdev->res.null_mkey); - klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); - preve = dmr->start; + klm->bcount = cpu_to_be32(klm_bcount(bcount)); + preve += bcount; + goto again; } } From 3c7df2e27346eb40a0e86230db1ccab195c97cfe Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 16 Jan 2025 11:40:59 -0800 Subject: [PATCH 0415/2157] sound/virtio: Fix cancel_sync warnings on uninitialized work_structs Betty reported hitting the following warning: [ 8.709131][ T221] WARNING: CPU: 2 PID: 221 at kernel/workqueue.c:4182 ... [ 8.713282][ T221] Call trace: [ 8.713365][ T221] __flush_work+0x8d0/0x914 [ 8.713468][ T221] __cancel_work_sync+0xac/0xfc [ 8.713570][ T221] cancel_work_sync+0x24/0x34 [ 8.713667][ T221] virtsnd_remove+0xa8/0xf8 [virtio_snd ab15f34d0dd772f6d11327e08a81d46dc9c36276] [ 8.713868][ T221] virtsnd_probe+0x48c/0x664 [virtio_snd ab15f34d0dd772f6d11327e08a81d46dc9c36276] [ 8.714035][ T221] virtio_dev_probe+0x28c/0x390 [ 8.714139][ T221] really_probe+0x1bc/0x4c8 ... It seems we're hitting the error path in virtsnd_probe(), which triggers a virtsnd_remove() which iterates over the substreams calling cancel_work_sync() on the elapsed_period work_struct. Looking at the code, from earlier in: virtsnd_probe()->virtsnd_build_devs()->virtsnd_pcm_parse_cfg() We set snd->nsubstreams, allocate the snd->substreams, and if we then hit an error on the info allocation or something in virtsnd_ctl_query_info() fails, we will exit without having initialized the elapsed_period work_struct. When that error path unwinds we then call virtsnd_remove() which as long as the substreams array is allocated, will iterate through calling cancel_work_sync() on the uninitialized work struct hitting this warning. Takashi Iwai suggested this fix, which initializes the substreams structure right after allocation, so that if we hit the error paths we avoid trying to cleanup uninitialized data. Note: I have not yet managed to reproduce the issue myself, so this patch has had limited testing. Feedback or thoughts would be appreciated! Cc: Anton Yakovlev Cc: "Michael S. Tsirkin" Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: virtualization@lists.linux.dev Cc: linux-sound@vger.kernel.org Cc: kernel-team@android.com Reported-by: Betty Zhou Suggested-by: Takashi Iwai Signed-off-by: John Stultz Message-Id: <20250116194114.3375616-1-jstultz@google.com> Signed-off-by: Michael S. Tsirkin --- sound/virtio/virtio_pcm.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/virtio/virtio_pcm.c b/sound/virtio/virtio_pcm.c index 967e4c45be9b..2f7c5e709f07 100644 --- a/sound/virtio/virtio_pcm.c +++ b/sound/virtio/virtio_pcm.c @@ -339,6 +339,21 @@ int virtsnd_pcm_parse_cfg(struct virtio_snd *snd) if (!snd->substreams) return -ENOMEM; + /* + * Initialize critical substream fields early in case we hit an + * error path and end up trying to clean up uninitialized structures + * elsewhere. + */ + for (i = 0; i < snd->nsubstreams; ++i) { + struct virtio_pcm_substream *vss = &snd->substreams[i]; + + vss->snd = snd; + vss->sid = i; + INIT_WORK(&vss->elapsed_period, virtsnd_pcm_period_elapsed); + init_waitqueue_head(&vss->msg_empty); + spin_lock_init(&vss->lock); + } + info = kcalloc(snd->nsubstreams, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; @@ -352,12 +367,6 @@ int virtsnd_pcm_parse_cfg(struct virtio_snd *snd) struct virtio_pcm_substream *vss = &snd->substreams[i]; struct virtio_pcm *vpcm; - vss->snd = snd; - vss->sid = i; - INIT_WORK(&vss->elapsed_period, virtsnd_pcm_period_elapsed); - init_waitqueue_head(&vss->msg_empty); - spin_lock_init(&vss->lock); - rc = virtsnd_pcm_build_hw(vss, &info[i]); if (rc) goto on_exit; From fc80842a2799f83fd0fe73bafdaa8eaae5336ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 21 Jan 2025 11:33:46 +0100 Subject: [PATCH 0416/2157] vduse: add virtio_fs to allowed dev id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A VDUSE device that implements virtiofs device works fine just by adding the device id to the whitelist. Signed-off-by: Eugenio Pérez Message-Id: <20250121103346.1030165-1-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/vdpa/vdpa_user/vduse_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 7ae99691efdf..6a9a37351310 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -144,6 +144,7 @@ static struct workqueue_struct *vduse_irq_bound_wq; static u32 allowed_device_id[] = { VIRTIO_ID_BLOCK, VIRTIO_ID_NET, + VIRTIO_ID_FS, }; static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) From 4c1f3a7d74277903b290dd989cbd2ea8627fc925 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:08 -0600 Subject: [PATCH 0417/2157] vhost-scsi: Reduce mem use by moving upages to per queue Each worker thread can process 1 command at a time so there's no need to allocate a upages array per cmd. This patch moves it to per queue. Even a small device with 128 cmds and 1 queue this brings mem use for the array from 2 MB = 8 bytes per page pointer * 2048 pointers * 128 cmds to 16K = 8 bytes per pointer * 2048 * 1 queue Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 7aeff435c1d8..c266171045c5 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -83,7 +83,6 @@ struct vhost_scsi_cmd { /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; struct scatterlist *tvc_prot_sgl; - struct page **tvc_upages; /* Pointer to response header iovec */ struct iovec *tvc_resp_iov; /* Pointer to vhost_scsi for our device */ @@ -187,6 +186,7 @@ struct vhost_scsi_virtqueue { struct vhost_scsi_cmd *scsi_cmds; struct sbitmap scsi_tags; int max_cmds; + struct page **upages; struct vhost_work completion_work; struct llist_head completion_list; @@ -619,7 +619,6 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, struct vhost_scsi_nexus *tv_nexus; struct scatterlist *sg, *prot_sg; struct iovec *tvc_resp_iov; - struct page **pages; int tag; tv_nexus = tpg->tpg_nexus; @@ -637,12 +636,10 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, cmd = &svq->scsi_cmds[tag]; sg = cmd->tvc_sgl; prot_sg = cmd->tvc_prot_sgl; - pages = cmd->tvc_upages; tvc_resp_iov = cmd->tvc_resp_iov; memset(cmd, 0, sizeof(*cmd)); cmd->tvc_sgl = sg; cmd->tvc_prot_sgl = prot_sg; - cmd->tvc_upages = pages; cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = scsi_tag; cmd->tvc_lun = lun; @@ -669,7 +666,9 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, struct scatterlist *sgl, bool is_prot) { - struct page **pages = cmd->tvc_upages; + struct vhost_scsi_virtqueue *svq = container_of(cmd->tvc_vq, + struct vhost_scsi_virtqueue, vq); + struct page **pages = svq->upages; struct scatterlist *sg = sgl; ssize_t bytes, mapped_bytes; size_t offset, mapped_offset; @@ -1598,11 +1597,11 @@ static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) kfree(tv_cmd->tvc_sgl); kfree(tv_cmd->tvc_prot_sgl); - kfree(tv_cmd->tvc_upages); kfree(tv_cmd->tvc_resp_iov); } sbitmap_free(&svq->scsi_tags); + kfree(svq->upages); kfree(svq->scsi_cmds); svq->scsi_cmds = NULL; } @@ -1628,6 +1627,11 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) return -ENOMEM; } + svq->upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES, sizeof(struct page *), + GFP_KERNEL); + if (!svq->upages) + goto out; + for (i = 0; i < max_cmds; i++) { tv_cmd = &svq->scsi_cmds[i]; @@ -1639,14 +1643,6 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) goto out; } - tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES, - sizeof(struct page *), - GFP_KERNEL); - if (!tv_cmd->tvc_upages) { - pr_err("Unable to allocate tv_cmd->tvc_upages\n"); - goto out; - } - tv_cmd->tvc_resp_iov = kcalloc(UIO_MAXIOV, sizeof(struct iovec), GFP_KERNEL); From bf2d650391be508dd8b5e188b65ed32300cf3489 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:09 -0600 Subject: [PATCH 0418/2157] vhost-scsi: Allocate T10 PI structs only when enabled T10 PI is not a widely used feature. This has us only allocate the structs for it if the feature has been enabled. For a common small setup where you have 1 virtqueue and 128 commands per queue, this saves: 8MB = 32 bytes per sg * 2048 entries * 128 commands per vhost-scsi device. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c266171045c5..e361c43d0730 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1651,12 +1651,14 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) goto out; } - tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!tv_cmd->tvc_prot_sgl) { - pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); - goto out; + if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) { + tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!tv_cmd->tvc_prot_sgl) { + pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); + goto out; + } } } return 0; From 3ca51662f8186b569b8fb282242c20ccbb3993c2 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:10 -0600 Subject: [PATCH 0419/2157] vhost-scsi: Add better resource allocation failure handling If we can't allocate mem to map in data for a request or can't find a tag for a command, we currently drop the command. This leads to the error handler running to clean it up. Instead of dropping the command this has us return an error telling the initiator that it queued more commands than we can handle. The initiator will then reduce how many commands it will send us and retry later. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-4-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e361c43d0730..ed4fd45da287 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -629,7 +629,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, tag = sbitmap_get(&svq->scsi_tags); if (tag < 0) { - pr_err("Unable to obtain tag for vhost_scsi_cmd\n"); + pr_warn_once("Guest sent too many cmds. Returning TASK_SET_FULL.\n"); return ERR_PTR(-ENOMEM); } @@ -928,6 +928,24 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) target_submit(se_cmd); } +static void +vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + int head, unsigned int out, u8 status) +{ + struct virtio_scsi_cmd_resp __user *resp; + struct virtio_scsi_cmd_resp rsp; + int ret; + + memset(&rsp, 0, sizeof(rsp)); + rsp.status = status; + resp = vq->iov[out].iov_base; + ret = __copy_to_user(resp, &rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(&vs->dev, vq, head, 0); + else + pr_err("Faulted on virtio_scsi_cmd_resp\n"); +} + static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, struct vhost_virtqueue *vq, @@ -1215,8 +1233,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) exp_data_len + prot_bytes, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_get_cmd failed %ld\n", - PTR_ERR(cmd)); + ret = PTR_ERR(cmd); + vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret); goto err; } cmd->tvc_vhost = vs; @@ -1253,11 +1271,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) * EINVAL: Invalid response buffer, drop the request * EIO: Respond with bad target * EAGAIN: Pending request + * ENOMEM: Could not allocate resources for request */ if (ret == -ENXIO) break; else if (ret == -EIO) vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + else if (ret == -ENOMEM) + vhost_scsi_send_status(vs, vq, vc.head, vc.out, + SAM_STAT_TASK_SET_FULL); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); From 891b99eab0f89dbe08d216f4ab71acbeaf7a3102 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:11 -0600 Subject: [PATCH 0420/2157] vhost-scsi: Return queue full for page alloc failures during copy This has us return queue full if we can't allocate a page during the copy operation so the initiator can retry. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-5-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ed4fd45da287..c087fbc6f8d3 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -756,7 +756,7 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, size_t len = iov_iter_count(iter); unsigned int nbytes = 0; struct page *page; - int i; + int i, ret; if (cmd->tvc_data_direction == DMA_FROM_DEVICE) { cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, @@ -769,6 +769,7 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, page = alloc_page(GFP_KERNEL); if (!page) { i--; + ret = -ENOMEM; goto err; } @@ -776,8 +777,10 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, sg_set_page(&sg[i], page, nbytes, 0); if (cmd->tvc_data_direction == DMA_TO_DEVICE && - copy_page_from_iter(page, 0, nbytes, iter) != nbytes) + copy_page_from_iter(page, 0, nbytes, iter) != nbytes) { + ret = -EFAULT; goto err; + } len -= nbytes; } @@ -792,7 +795,7 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, for (; i >= 0; i--) __free_page(sg_page(&sg[i])); kfree(cmd->saved_iter_addr); - return -ENOMEM; + return ret; } static int @@ -1249,9 +1252,9 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) " %d\n", cmd, exp_data_len, prot_bytes, data_direction); if (data_direction != DMA_NONE) { - if (unlikely(vhost_scsi_mapal(cmd, prot_bytes, - &prot_iter, exp_data_len, - &data_iter))) { + ret = vhost_scsi_mapal(cmd, prot_bytes, &prot_iter, + exp_data_len, &data_iter); + if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd); goto err; From bca939d5bcd00d6faea99c47eafd60bed573ef03 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:12 -0600 Subject: [PATCH 0421/2157] vhost-scsi: Dynamically allocate scatterlists We currently preallocate scatterlists which have 2048 entries for each command. For a small device with just 1 queue this results in: 8 MB = 32 bytes per sg * 2048 entries * 128 cmd When mq is turned on and we increase the virtqueue_size so we can handle commands from multiple queues in parallel, then this can sky rocket. This patch allows us to dynamically allocate the scatterlist like is done with drivers like NVMe and SCSI. For small IO (4-16K) IOPs testing, we didn't see any regressions, but for throughput testing we sometimes saw a 2-5% regression when the backend device was very fast (8 NVMe drives in a MD RAID0 config or a memory backed device). As a result this patch makes the dynamic allocation feature a modparam so userspace can decide how it wants to balance mem use and perf. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-6-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/Kconfig | 1 + drivers/vhost/scsi.c | 260 ++++++++++++++++++++++++++++-------------- 2 files changed, 173 insertions(+), 88 deletions(-) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index b455d9ab6f3d..020d4fbb947c 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -47,6 +47,7 @@ config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD select VHOST + select SG_POOL default n help Say M here to enable the vhost_scsi TCM fabric module diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c087fbc6f8d3..c7bb8485e447 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -46,6 +46,50 @@ #define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 +static unsigned int vhost_scsi_inline_sg_cnt = VHOST_SCSI_PREALLOC_SGLS; + +#ifdef CONFIG_ARCH_NO_SG_CHAIN +static int vhost_scsi_set_inline_sg_cnt(const char *buf, + const struct kernel_param *kp) +{ + pr_err("Setting inline_sg_cnt is not supported.\n"); + return -EOPNOTSUPP; +} +#else +static int vhost_scsi_set_inline_sg_cnt(const char *buf, + const struct kernel_param *kp) +{ + unsigned int cnt; + int ret; + + ret = kstrtouint(buf, 10, &cnt); + if (ret) + return ret; + + if (ret > VHOST_SCSI_PREALLOC_SGLS) { + pr_err("Max inline_sg_cnt is %u\n", VHOST_SCSI_PREALLOC_SGLS); + return -EINVAL; + } + + vhost_scsi_inline_sg_cnt = cnt; + return 0; +} +#endif + +static int vhost_scsi_get_inline_sg_cnt(char *buf, + const struct kernel_param *kp) +{ + return sprintf(buf, "%u\n", vhost_scsi_inline_sg_cnt); +} + +static const struct kernel_param_ops vhost_scsi_inline_sg_cnt_op = { + .get = vhost_scsi_get_inline_sg_cnt, + .set = vhost_scsi_set_inline_sg_cnt, +}; + +module_param_cb(inline_sg_cnt, &vhost_scsi_inline_sg_cnt_op, NULL, 0644); +MODULE_PARM_DESC(inline_sg_cnt, "Set the number of scatterlist entries to pre-allocate. The default is 2048."); + /* Max number of requests before requeueing the job. * Using this limit prevents one virtqueue from starving others with * request. @@ -80,9 +124,10 @@ struct vhost_scsi_cmd { u32 copied_iov:1; const void *saved_iter_addr; struct iov_iter saved_iter; - /* Pointer to the SGL formatted memory from virtio-scsi */ - struct scatterlist *tvc_sgl; - struct scatterlist *tvc_prot_sgl; + struct scatterlist *sgl; + struct sg_table table; + struct scatterlist *prot_sgl; + struct sg_table prot_table; /* Pointer to response header iovec */ struct iovec *tvc_resp_iov; /* Pointer to vhost_scsi for our device */ @@ -206,6 +251,8 @@ struct vhost_scsi { bool vs_events_missed; /* any missed events, protected by vq->mutex */ int vs_events_nr; /* num of pending events, protected by vq->mutex */ + + unsigned int inline_sg_cnt; }; struct vhost_scsi_tmf { @@ -328,23 +375,35 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) { struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); + struct vhost_scsi *vs = tv_cmd->tvc_vhost; struct vhost_scsi_virtqueue *svq = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); struct vhost_scsi_inflight *inflight = tv_cmd->inflight; + struct scatterlist *sg; + struct page *page; int i; if (tv_cmd->tvc_sgl_count) { - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) { + for_each_sgtable_sg(&tv_cmd->table, sg, i) { + page = sg_page(sg); + if (!page) + continue; + if (tv_cmd->copied_iov) - __free_page(sg_page(&tv_cmd->tvc_sgl[i])); + __free_page(page); else - put_page(sg_page(&tv_cmd->tvc_sgl[i])); + put_page(page); } kfree(tv_cmd->saved_iter_addr); + sg_free_table_chained(&tv_cmd->table, vs->inline_sg_cnt); } if (tv_cmd->tvc_prot_sgl_count) { - for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++) - put_page(sg_page(&tv_cmd->tvc_prot_sgl[i])); + for_each_sgtable_sg(&tv_cmd->prot_table, sg, i) { + page = sg_page(sg); + if (page) + put_page(page); + } + sg_free_table_chained(&tv_cmd->prot_table, vs->inline_sg_cnt); } sbitmap_clear_bit(&svq->scsi_tags, se_cmd->map_tag); @@ -534,14 +593,17 @@ static void vhost_scsi_evt_work(struct vhost_work *work) static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) { struct iov_iter *iter = &cmd->saved_iter; - struct scatterlist *sg = cmd->tvc_sgl; + struct scatterlist *sg; struct page *page; size_t len; int i; - for (i = 0; i < cmd->tvc_sgl_count; i++) { - page = sg_page(&sg[i]); - len = sg[i].length; + for_each_sgtable_sg(&cmd->table, sg, i) { + page = sg_page(sg); + if (!page) + continue; + + len = sg->length; if (copy_page_to_iter(page, 0, len, iter) != len) { pr_err("Could not copy data while handling misaligned cmd. Error %zu\n", @@ -617,7 +679,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, struct vhost_scsi_virtqueue, vq); struct vhost_scsi_cmd *cmd; struct vhost_scsi_nexus *tv_nexus; - struct scatterlist *sg, *prot_sg; + struct scatterlist *sgl, *prot_sgl; struct iovec *tvc_resp_iov; int tag; @@ -634,12 +696,12 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, } cmd = &svq->scsi_cmds[tag]; - sg = cmd->tvc_sgl; - prot_sg = cmd->tvc_prot_sgl; + sgl = cmd->sgl; + prot_sgl = cmd->prot_sgl; tvc_resp_iov = cmd->tvc_resp_iov; memset(cmd, 0, sizeof(*cmd)); - cmd->tvc_sgl = sg; - cmd->tvc_prot_sgl = prot_sg; + cmd->sgl = sgl; + cmd->prot_sgl = prot_sgl; cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = scsi_tag; cmd->tvc_lun = lun; @@ -655,6 +717,27 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, return cmd; } +static void vhost_scsi_revert_map_iov_to_sgl(struct iov_iter *iter, + struct scatterlist *curr, + struct scatterlist *end) +{ + size_t revert_bytes = 0; + struct page *page; + + while (curr != end) { + page = sg_page(curr); + + if (page) { + put_page(page); + revert_bytes += curr->length; + } + /* Clear so we can re-use it for the copy path */ + sg_set_page(curr, NULL, 0, 0); + curr = sg_next(curr); + } + iov_iter_revert(iter, revert_bytes); +} + /* * Map a user memory range into a scatterlist * @@ -663,16 +746,17 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, static int vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct scatterlist *sgl, + struct sg_table *sg_table, + struct scatterlist **sgl, bool is_prot) { struct vhost_scsi_virtqueue *svq = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); struct page **pages = svq->upages; - struct scatterlist *sg = sgl; - ssize_t bytes, mapped_bytes; - size_t offset, mapped_offset; - unsigned int npages = 0; + struct scatterlist *sg = *sgl; + ssize_t bytes; + size_t offset; + unsigned int n, npages = 0; bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, VHOST_SCSI_PREALLOC_UPAGES, &offset); @@ -680,11 +764,8 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, if (bytes <= 0) return bytes < 0 ? bytes : -EFAULT; - mapped_bytes = bytes; - mapped_offset = offset; - while (bytes) { - unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); + n = min_t(unsigned int, PAGE_SIZE - offset, bytes); /* * The block layer requires bios/requests to be a multiple of * 512 bytes, but Windows can send us vecs that are misaligned. @@ -705,25 +786,24 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, goto revert_iter_get_pages; } - sg_set_page(sg++, pages[npages++], n, offset); + sg_set_page(sg, pages[npages++], n, offset); + sg = sg_next(sg); bytes -= n; offset = 0; } + *sgl = sg; return npages; revert_iter_get_pages: - iov_iter_revert(iter, mapped_bytes); + vhost_scsi_revert_map_iov_to_sgl(iter, *sgl, sg); - npages = 0; - while (mapped_bytes) { - unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset, - mapped_bytes); + iov_iter_revert(iter, bytes); + while (bytes) { + n = min_t(unsigned int, PAGE_SIZE, bytes); put_page(pages[npages++]); - - mapped_bytes -= n; - mapped_offset = 0; + bytes -= n; } return -EINVAL; @@ -751,10 +831,11 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls) static int vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct scatterlist *sg, int sg_count) + struct sg_table *sg_table, int sg_count) { size_t len = iov_iter_count(iter); unsigned int nbytes = 0; + struct scatterlist *sg; struct page *page; int i, ret; @@ -765,16 +846,15 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, return -ENOMEM; } - for (i = 0; i < sg_count; i++) { + for_each_sgtable_sg(sg_table, sg, i) { page = alloc_page(GFP_KERNEL); if (!page) { - i--; ret = -ENOMEM; goto err; } nbytes = min_t(unsigned int, PAGE_SIZE, len); - sg_set_page(&sg[i], page, nbytes, 0); + sg_set_page(sg, page, nbytes, 0); if (cmd->tvc_data_direction == DMA_TO_DEVICE && copy_page_from_iter(page, 0, nbytes, iter) != nbytes) { @@ -792,39 +872,29 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, pr_err("Could not read %u bytes while handling misaligned cmd\n", nbytes); - for (; i >= 0; i--) - __free_page(sg_page(&sg[i])); + for_each_sgtable_sg(sg_table, sg, i) { + page = sg_page(sg); + if (page) + __free_page(page); + } kfree(cmd->saved_iter_addr); return ret; } static int vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct scatterlist *sg, int sg_count, bool is_prot) + struct sg_table *sg_table, int sg_count, bool is_prot) { - struct scatterlist *p = sg; - size_t revert_bytes; + struct scatterlist *sg = sg_table->sgl; int ret; while (iov_iter_count(iter)) { - ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot); + ret = vhost_scsi_map_to_sgl(cmd, iter, sg_table, &sg, is_prot); if (ret < 0) { - revert_bytes = 0; - - while (p < sg) { - struct page *page = sg_page(p); - - if (page) { - put_page(page); - revert_bytes += p->length; - } - p++; - } - - iov_iter_revert(iter, revert_bytes); + vhost_scsi_revert_map_iov_to_sgl(iter, sg_table->sgl, + sg); return ret; } - sg += ret; } return 0; @@ -835,23 +905,29 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, size_t prot_bytes, struct iov_iter *prot_iter, size_t data_bytes, struct iov_iter *data_iter) { + struct vhost_scsi *vs = cmd->tvc_vhost; int sgl_count, ret; if (prot_bytes) { sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes, VHOST_SCSI_PREALLOC_PROT_SGLS); - if (sgl_count < 0) - return sgl_count; + cmd->prot_table.sgl = cmd->prot_sgl; + ret = sg_alloc_table_chained(&cmd->prot_table, sgl_count, + cmd->prot_table.sgl, + vs->inline_sg_cnt); + if (ret) + return ret; - sg_init_table(cmd->tvc_prot_sgl, sgl_count); cmd->tvc_prot_sgl_count = sgl_count; pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, - cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); + cmd->prot_table.sgl, cmd->tvc_prot_sgl_count); ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter, - cmd->tvc_prot_sgl, + &cmd->prot_table, cmd->tvc_prot_sgl_count, true); if (ret < 0) { + sg_free_table_chained(&cmd->prot_table, + vs->inline_sg_cnt); cmd->tvc_prot_sgl_count = 0; return ret; } @@ -861,20 +937,23 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, if (sgl_count < 0) return sgl_count; - sg_init_table(cmd->tvc_sgl, sgl_count); + cmd->table.sgl = cmd->sgl; + ret = sg_alloc_table_chained(&cmd->table, sgl_count, cmd->table.sgl, + vs->inline_sg_cnt); + if (ret) + return ret; + cmd->tvc_sgl_count = sgl_count; pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, - cmd->tvc_sgl, cmd->tvc_sgl_count); + cmd->table.sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, &cmd->table, cmd->tvc_sgl_count, false); - if (ret == -EINVAL) { - sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + if (ret == -EINVAL) + ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, &cmd->table, cmd->tvc_sgl_count); - } - if (ret < 0) { + sg_free_table_chained(&cmd->table, vs->inline_sg_cnt); cmd->tvc_sgl_count = 0; return ret; } @@ -906,10 +985,10 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) /* FIXME: BIDI operation */ if (cmd->tvc_sgl_count) { - sg_ptr = cmd->tvc_sgl; + sg_ptr = cmd->table.sgl; if (cmd->tvc_prot_sgl_count) - sg_prot_ptr = cmd->tvc_prot_sgl; + sg_prot_ptr = cmd->prot_table.sgl; else se_cmd->prot_pto = true; } else { @@ -1620,8 +1699,8 @@ static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) for (i = 0; i < svq->max_cmds; i++) { tv_cmd = &svq->scsi_cmds[i]; - kfree(tv_cmd->tvc_sgl); - kfree(tv_cmd->tvc_prot_sgl); + kfree(tv_cmd->sgl); + kfree(tv_cmd->prot_sgl); kfree(tv_cmd->tvc_resp_iov); } @@ -1635,6 +1714,7 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) { struct vhost_scsi_virtqueue *svq = container_of(vq, struct vhost_scsi_virtqueue, vq); + struct vhost_scsi *vs = svq->vs; struct vhost_scsi_cmd *tv_cmd; unsigned int i; @@ -1660,12 +1740,14 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) for (i = 0; i < max_cmds; i++) { tv_cmd = &svq->scsi_cmds[i]; - tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!tv_cmd->tvc_sgl) { - pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); - goto out; + if (vs->inline_sg_cnt) { + tv_cmd->sgl = kcalloc(vs->inline_sg_cnt, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!tv_cmd->sgl) { + pr_err("Unable to allocate tv_cmd->sgl\n"); + goto out; + } } tv_cmd->tvc_resp_iov = kcalloc(UIO_MAXIOV, @@ -1676,12 +1758,13 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) goto out; } - if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) { - tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!tv_cmd->tvc_prot_sgl) { - pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); + if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI) && + vs->inline_sg_cnt) { + tv_cmd->prot_sgl = kcalloc(vs->inline_sg_cnt, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!tv_cmd->prot_sgl) { + pr_err("Unable to allocate tv_cmd->prot_sgl\n"); goto out; } } @@ -1972,6 +2055,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs = kvzalloc(sizeof(*vs), GFP_KERNEL); if (!vs) goto err_vs; + vs->inline_sg_cnt = vhost_scsi_inline_sg_cnt; if (nvqs > VHOST_SCSI_MAX_IO_VQ) { pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs, From ddc5b5f68ec553a037a822f81150acbbdefa3ad1 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:13 -0600 Subject: [PATCH 0422/2157] vhost-scsi: Stop duplicating se_cmd fields When setting up the command we will initially set values like lun and data direction on the vhost scsi command. We then pass them to LIO which stores them again on the LIO se_cmd. The se_cmd is actually stored in the vhost scsi command so we are storing these values twice on the same struct. So this patch has stop duplicating the storing of SCSI values like lun, data dir, data len, cdb, etc on the vhost scsi command and just pass them to LIO which will store them on the se_cmd. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-7-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 95 +++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c7bb8485e447..fbcdc9866e80 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -106,21 +106,11 @@ struct vhost_scsi_inflight { struct vhost_scsi_cmd { /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ int tvc_vq_desc; - /* virtio-scsi initiator task attribute */ - int tvc_task_attr; /* virtio-scsi response incoming iovecs */ int tvc_in_iovs; - /* virtio-scsi initiator data direction */ - enum dma_data_direction tvc_data_direction; - /* Expected data transfer length from virtio-scsi header */ - u32 tvc_exp_data_len; - /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */ - u64 tvc_tag; /* The number of scatterlists associated with this cmd */ u32 tvc_sgl_count; u32 tvc_prot_sgl_count; - /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */ - u32 tvc_lun; u32 copied_iov:1; const void *saved_iter_addr; struct iov_iter saved_iter; @@ -130,16 +120,10 @@ struct vhost_scsi_cmd { struct sg_table prot_table; /* Pointer to response header iovec */ struct iovec *tvc_resp_iov; - /* Pointer to vhost_scsi for our device */ - struct vhost_scsi *tvc_vhost; /* Pointer to vhost_virtqueue for the cmd */ struct vhost_virtqueue *tvc_vq; - /* Pointer to vhost nexus memory */ - struct vhost_scsi_nexus *tvc_nexus; /* The TCM I/O descriptor that is accessed via container_of() */ struct se_cmd tvc_se_cmd; - /* Copy of the incoming SCSI command descriptor block (CDB) */ - unsigned char tvc_cdb[VHOST_SCSI_MAX_CDB_SIZE]; /* Sense buffer that will be mapped into outgoing status */ unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER]; /* Completed commands list, serviced from vhost worker thread */ @@ -375,9 +359,9 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) { struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); - struct vhost_scsi *vs = tv_cmd->tvc_vhost; struct vhost_scsi_virtqueue *svq = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); + struct vhost_scsi *vs = svq->vs; struct vhost_scsi_inflight *inflight = tv_cmd->inflight; struct scatterlist *sg; struct page *page; @@ -671,24 +655,15 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct vhost_scsi_cmd * -vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, - unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr, - u32 exp_data_len, int data_direction) +vhost_scsi_get_cmd(struct vhost_virtqueue *vq, u64 scsi_tag) { struct vhost_scsi_virtqueue *svq = container_of(vq, struct vhost_scsi_virtqueue, vq); struct vhost_scsi_cmd *cmd; - struct vhost_scsi_nexus *tv_nexus; struct scatterlist *sgl, *prot_sgl; struct iovec *tvc_resp_iov; int tag; - tv_nexus = tpg->tpg_nexus; - if (!tv_nexus) { - pr_err("Unable to locate active struct vhost_scsi_nexus\n"); - return ERR_PTR(-EIO); - } - tag = sbitmap_get(&svq->scsi_tags); if (tag < 0) { pr_warn_once("Guest sent too many cmds. Returning TASK_SET_FULL.\n"); @@ -703,17 +678,9 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, cmd->sgl = sgl; cmd->prot_sgl = prot_sgl; cmd->tvc_se_cmd.map_tag = tag; - cmd->tvc_tag = scsi_tag; - cmd->tvc_lun = lun; - cmd->tvc_task_attr = task_attr; - cmd->tvc_exp_data_len = exp_data_len; - cmd->tvc_data_direction = data_direction; - cmd->tvc_nexus = tv_nexus; cmd->inflight = vhost_scsi_get_inflight(vq); cmd->tvc_resp_iov = tvc_resp_iov; - memcpy(cmd->tvc_cdb, cdb, VHOST_SCSI_MAX_CDB_SIZE); - return cmd; } @@ -831,7 +798,8 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls) static int vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct sg_table *sg_table, int sg_count) + struct sg_table *sg_table, int sg_count, + int data_dir) { size_t len = iov_iter_count(iter); unsigned int nbytes = 0; @@ -839,7 +807,7 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, struct page *page; int i, ret; - if (cmd->tvc_data_direction == DMA_FROM_DEVICE) { + if (data_dir == DMA_FROM_DEVICE) { cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, GFP_KERNEL); if (!cmd->saved_iter_addr) @@ -856,7 +824,7 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, nbytes = min_t(unsigned int, PAGE_SIZE, len); sg_set_page(sg, page, nbytes, 0); - if (cmd->tvc_data_direction == DMA_TO_DEVICE && + if (data_dir == DMA_TO_DEVICE && copy_page_from_iter(page, 0, nbytes, iter) != nbytes) { ret = -EFAULT; goto err; @@ -901,11 +869,10 @@ vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, } static int -vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, +vhost_scsi_mapal(struct vhost_scsi *vs, struct vhost_scsi_cmd *cmd, size_t prot_bytes, struct iov_iter *prot_iter, - size_t data_bytes, struct iov_iter *data_iter) + size_t data_bytes, struct iov_iter *data_iter, int data_dir) { - struct vhost_scsi *vs = cmd->tvc_vhost; int sgl_count, ret; if (prot_bytes) { @@ -951,7 +918,7 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, cmd->tvc_sgl_count, false); if (ret == -EINVAL) ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, &cmd->table, - cmd->tvc_sgl_count); + cmd->tvc_sgl_count, data_dir); if (ret < 0) { sg_free_table_chained(&cmd->table, vs->inline_sg_cnt); cmd->tvc_sgl_count = 0; @@ -977,10 +944,13 @@ static int vhost_scsi_to_tcm_attr(int attr) return TCM_SIMPLE_TAG; } -static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) +static void vhost_scsi_target_queue_cmd(struct vhost_scsi_nexus *nexus, + struct vhost_scsi_cmd *cmd, + unsigned char *cdb, u16 lun, + int task_attr, int data_dir, + u32 exp_data_len) { struct se_cmd *se_cmd = &cmd->tvc_se_cmd; - struct vhost_scsi_nexus *tv_nexus; struct scatterlist *sg_ptr, *sg_prot_ptr = NULL; /* FIXME: BIDI operation */ @@ -994,15 +964,13 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) } else { sg_ptr = NULL; } - tv_nexus = cmd->tvc_nexus; se_cmd->tag = 0; - target_init_cmd(se_cmd, tv_nexus->tvn_se_sess, &cmd->tvc_sense_buf[0], - cmd->tvc_lun, cmd->tvc_exp_data_len, - vhost_scsi_to_tcm_attr(cmd->tvc_task_attr), - cmd->tvc_data_direction, TARGET_SCF_ACK_KREF); + target_init_cmd(se_cmd, nexus->tvn_se_sess, &cmd->tvc_sense_buf[0], + lun, exp_data_len, vhost_scsi_to_tcm_attr(task_attr), + data_dir, TARGET_SCF_ACK_KREF); - if (target_submit_prep(se_cmd, cmd->tvc_cdb, sg_ptr, + if (target_submit_prep(se_cmd, cdb, sg_ptr, cmd->tvc_sgl_count, NULL, 0, sg_prot_ptr, cmd->tvc_prot_sgl_count, GFP_KERNEL)) return; @@ -1159,6 +1127,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) struct vhost_scsi_tpg **vs_tpg, *tpg; struct virtio_scsi_cmd_req v_req; struct virtio_scsi_cmd_req_pi v_req_pi; + struct vhost_scsi_nexus *nexus; struct vhost_scsi_ctx vc; struct vhost_scsi_cmd *cmd; struct iov_iter in_iter, prot_iter, data_iter; @@ -1168,7 +1137,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) u16 lun; u8 task_attr; bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); - void *cdb; + u8 *cdb; mutex_lock(&vq->mutex); /* @@ -1311,28 +1280,34 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE); goto err; } - cmd = vhost_scsi_get_cmd(vq, tpg, cdb, tag, lun, task_attr, - exp_data_len + prot_bytes, - data_direction); + + nexus = tpg->tpg_nexus; + if (!nexus) { + vq_err(vq, "Unable to locate active struct vhost_scsi_nexus\n"); + ret = -EIO; + goto err; + } + + cmd = vhost_scsi_get_cmd(vq, tag); if (IS_ERR(cmd)) { ret = PTR_ERR(cmd); vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret); goto err; } - cmd->tvc_vhost = vs; cmd->tvc_vq = vq; for (i = 0; i < vc.in ; i++) cmd->tvc_resp_iov[i] = vq->iov[vc.out + i]; cmd->tvc_in_iovs = vc.in; pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", - cmd->tvc_cdb[0], cmd->tvc_lun); + cdb[0], lun); pr_debug("cmd: %p exp_data_len: %d, prot_bytes: %d data_direction:" " %d\n", cmd, exp_data_len, prot_bytes, data_direction); if (data_direction != DMA_NONE) { - ret = vhost_scsi_mapal(cmd, prot_bytes, &prot_iter, - exp_data_len, &data_iter); + ret = vhost_scsi_mapal(vs, cmd, prot_bytes, &prot_iter, + exp_data_len, &data_iter, + data_direction); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd); @@ -1345,7 +1320,9 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) * vhost_scsi_queue_data_in() and vhost_scsi_queue_status() */ cmd->tvc_vq_desc = vc.head; - vhost_scsi_target_queue_cmd(cmd); + vhost_scsi_target_queue_cmd(nexus, cmd, cdb, lun, task_attr, + data_direction, + exp_data_len + prot_bytes); ret = 0; err: /* From fd47976581333765964f4ce0ea01176fa1e646d5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:14 -0600 Subject: [PATCH 0423/2157] vhost-scsi: Allocate iov_iter used for unaligned copies when needed It's extremely rare that we get unaligned requests that need to drop down to the data copy code path. However, the iov_iter is almost 5% of the mem used for the vhost_scsi_cmd. This patch has us allocate the iov_iter only when needed since it's not a perf path that uses the struct. This along with the patches that removed the duplicated fields on the vhost_scsd_cmd allow us to reduce mem use by 1 MB in mid size setups where we have 16 virtqueues and are doing 1024 cmds per queue. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index fbcdc9866e80..82389e2ac0c9 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -112,8 +112,8 @@ struct vhost_scsi_cmd { u32 tvc_sgl_count; u32 tvc_prot_sgl_count; u32 copied_iov:1; - const void *saved_iter_addr; - struct iov_iter saved_iter; + const void *read_iov; + struct iov_iter *read_iter; struct scatterlist *sgl; struct sg_table table; struct scatterlist *prot_sgl; @@ -378,7 +378,8 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) else put_page(page); } - kfree(tv_cmd->saved_iter_addr); + kfree(tv_cmd->read_iter); + kfree(tv_cmd->read_iov); sg_free_table_chained(&tv_cmd->table, vs->inline_sg_cnt); } if (tv_cmd->tvc_prot_sgl_count) { @@ -576,7 +577,7 @@ static void vhost_scsi_evt_work(struct vhost_work *work) static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) { - struct iov_iter *iter = &cmd->saved_iter; + struct iov_iter *iter = cmd->read_iter; struct scatterlist *sg; struct page *page; size_t len; @@ -624,7 +625,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) cmd, se_cmd->residual_count, se_cmd->scsi_status); memset(&v_rsp, 0, sizeof(v_rsp)); - if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) { + if (cmd->read_iter && vhost_scsi_copy_sgl_to_iov(cmd)) { v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET; } else { v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, @@ -808,10 +809,15 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, int i, ret; if (data_dir == DMA_FROM_DEVICE) { - cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, - GFP_KERNEL); - if (!cmd->saved_iter_addr) + cmd->read_iter = kzalloc(sizeof(*cmd->read_iter), GFP_KERNEL); + if (!cmd->read_iter) return -ENOMEM; + + cmd->read_iov = dup_iter(cmd->read_iter, iter, GFP_KERNEL); + if (!cmd->read_iov) { + ret = -ENOMEM; + goto free_iter; + } } for_each_sgtable_sg(sg_table, sg, i) { @@ -845,7 +851,9 @@ vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, if (page) __free_page(page); } - kfree(cmd->saved_iter_addr); + kfree(cmd->read_iov); +free_iter: + kfree(cmd->read_iter); return ret; } From 9d8960672d63db4b3b04542f5622748b345c637a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 3 Dec 2024 13:15:15 -0600 Subject: [PATCH 0424/2157] vhost-scsi: Reduce response iov mem use We have to save N iov entries to copy the virtio_scsi_cmd_resp struct back to the guest's buffer. The difficulty is that we can't assume the virtio_scsi_cmd_resp will be in 1 iov because older virtio specs allowed you to break it up. The worst case is that the guest was doing something like breaking up the virtio_scsi_cmd_resp struct into 108 (the struct is 108 bytes) byte sized vecs like: iov[0].iov_base = ((unsigned char *)virtio_scsi_cmd_resp)[0] iov[0].iov_len = 1 iov[1].iov_base = ((unsigned char *)virtio_scsi_cmd_resp)[1] iov[1].iov_len = 1 .... iov[107].iov_base = ((unsigned char *)virtio_scsi_cmd_resp)[107] iov[1].iov_len = 1 Right now we allocate UIO_MAXIOV vecs which is 1024 and so for a small device with just 1 queue and 128 commands per queue, we are wasting 1.8 MB = (1024 current entries - 108) * 16 bytes per entry * 128 cmds The most common case is going to be where the initiator puts the entire virtio_scsi_cmd_resp in the first iov and does not split it. We've always done it this way for Linux and the windows driver looks like it's always done the same. It's highly unlikely anyone has ever split the response and if they did it might just be where they have the sense in a second iov but that doesn't seem likely as well. So to optimize for the common implementation, this has us only pre-allocate the single iovec. If we do hit the split up response case this has us allocate the needed iovec when needed. Signed-off-by: Mike Christie Message-Id: <20241203191705.19431-9-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 82 ++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 82389e2ac0c9..f6f5a7ac7894 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -45,6 +45,11 @@ #define VHOST_SCSI_PREALLOC_SGLS 2048 #define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 +/* + * For the legacy descriptor case we allocate an iov per byte in the + * virtio_scsi_cmd_resp struct. + */ +#define VHOST_SCSI_MAX_RESP_IOVS sizeof(struct virtio_scsi_cmd_resp) static unsigned int vhost_scsi_inline_sg_cnt = VHOST_SCSI_PREALLOC_SGLS; @@ -106,8 +111,6 @@ struct vhost_scsi_inflight { struct vhost_scsi_cmd { /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ int tvc_vq_desc; - /* virtio-scsi response incoming iovecs */ - int tvc_in_iovs; /* The number of scatterlists associated with this cmd */ u32 tvc_sgl_count; u32 tvc_prot_sgl_count; @@ -118,8 +121,12 @@ struct vhost_scsi_cmd { struct sg_table table; struct scatterlist *prot_sgl; struct sg_table prot_table; - /* Pointer to response header iovec */ - struct iovec *tvc_resp_iov; + /* Fast path response header iovec used when only one vec is needed */ + struct iovec tvc_resp_iov; + /* Number of iovs for response */ + unsigned int tvc_resp_iovs_cnt; + /* Pointer to response header iovecs if more than one is needed */ + struct iovec *tvc_resp_iovs; /* Pointer to vhost_virtqueue for the cmd */ struct vhost_virtqueue *tvc_vq; /* The TCM I/O descriptor that is accessed via container_of() */ @@ -391,6 +398,8 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) sg_free_table_chained(&tv_cmd->prot_table, vs->inline_sg_cnt); } + if (tv_cmd->tvc_resp_iovs != &tv_cmd->tvc_resp_iov) + kfree(tv_cmd->tvc_resp_iovs); sbitmap_clear_bit(&svq->scsi_tags, se_cmd->map_tag); vhost_scsi_put_inflight(inflight); } @@ -638,8 +647,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) se_cmd->scsi_sense_length); } - iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov, - cmd->tvc_in_iovs, sizeof(v_rsp)); + iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iovs, + cmd->tvc_resp_iovs_cnt, sizeof(v_rsp)); ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter); if (likely(ret == sizeof(v_rsp))) { signal = true; @@ -662,7 +671,6 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, u64 scsi_tag) struct vhost_scsi_virtqueue, vq); struct vhost_scsi_cmd *cmd; struct scatterlist *sgl, *prot_sgl; - struct iovec *tvc_resp_iov; int tag; tag = sbitmap_get(&svq->scsi_tags); @@ -674,13 +682,11 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, u64 scsi_tag) cmd = &svq->scsi_cmds[tag]; sgl = cmd->sgl; prot_sgl = cmd->prot_sgl; - tvc_resp_iov = cmd->tvc_resp_iov; memset(cmd, 0, sizeof(*cmd)); cmd->sgl = sgl; cmd->prot_sgl = prot_sgl; cmd->tvc_se_cmd.map_tag = tag; cmd->inflight = vhost_scsi_get_inflight(vq); - cmd->tvc_resp_iov = tvc_resp_iov; return cmd; } @@ -1124,6 +1130,43 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, return ret; } +static int +vhost_scsi_setup_resp_iovs(struct vhost_scsi_cmd *cmd, struct iovec *in_iovs, + unsigned int in_iovs_cnt) +{ + int i, cnt; + + if (!in_iovs_cnt) + return 0; + /* + * Initiator's normally just put the virtio_scsi_cmd_resp in the first + * iov, but just in case they wedged in some data with it we check for + * greater than or equal to the response struct. + */ + if (in_iovs[0].iov_len >= sizeof(struct virtio_scsi_cmd_resp)) { + cmd->tvc_resp_iovs = &cmd->tvc_resp_iov; + cmd->tvc_resp_iovs_cnt = 1; + } else { + /* + * Legacy descriptor layouts didn't specify that we must put + * the entire response in one iov. Worst case we have a + * iov per byte. + */ + cnt = min(VHOST_SCSI_MAX_RESP_IOVS, in_iovs_cnt); + cmd->tvc_resp_iovs = kcalloc(cnt, sizeof(struct iovec), + GFP_KERNEL); + if (!cmd->tvc_resp_iovs) + return -ENOMEM; + + cmd->tvc_resp_iovs_cnt = cnt; + } + + for (i = 0; i < cmd->tvc_resp_iovs_cnt; i++) + cmd->tvc_resp_iovs[i] = in_iovs[i]; + + return 0; +} + static u16 vhost_buf_to_lun(u8 *lun_buf) { return ((lun_buf[2] << 8) | lun_buf[3]) & 0x3FFF; @@ -1141,7 +1184,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) struct iov_iter in_iter, prot_iter, data_iter; u64 tag; u32 exp_data_len, data_direction; - int ret, prot_bytes, i, c = 0; + int ret, prot_bytes, c = 0; u16 lun; u8 task_attr; bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); @@ -1303,9 +1346,13 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) goto err; } cmd->tvc_vq = vq; - for (i = 0; i < vc.in ; i++) - cmd->tvc_resp_iov[i] = vq->iov[vc.out + i]; - cmd->tvc_in_iovs = vc.in; + + ret = vhost_scsi_setup_resp_iovs(cmd, &vq->iov[vc.out], vc.in); + if (ret) { + vq_err(vq, "Failed to alloc recv iovs\n"); + vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd); + goto err; + } pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", cdb[0], lun); @@ -1686,7 +1733,6 @@ static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) kfree(tv_cmd->sgl); kfree(tv_cmd->prot_sgl); - kfree(tv_cmd->tvc_resp_iov); } sbitmap_free(&svq->scsi_tags); @@ -1735,14 +1781,6 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) } } - tv_cmd->tvc_resp_iov = kcalloc(UIO_MAXIOV, - sizeof(struct iovec), - GFP_KERNEL); - if (!tv_cmd->tvc_resp_iov) { - pr_err("Unable to allocate tv_cmd->tvc_resp_iov\n"); - goto out; - } - if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI) && vs->inline_sg_cnt) { tv_cmd->prot_sgl = kcalloc(vs->inline_sg_cnt, From e678900df2640b30c341c1c4121e859e7d3f267b Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Mon, 24 Feb 2025 16:36:03 -0500 Subject: [PATCH 0425/2157] dm vdo indexer: reorder uds_request to reduce padding Reorder fields and make uds_request_type and uds_zone_message packed, to squeeze out some space. Use struct_group so the request reset code no longer needs to care about field order. On x86_64 this reduces the struct size from 144 to 120, which saves 48 kB (about 12%) per VDO hash zone. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/indexer/index-session.c | 6 +-- drivers/md/dm-vdo/indexer/indexer.h | 53 +++++++++++------------ 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c index aee0914d604a..aa575a24e0b2 100644 --- a/drivers/md/dm-vdo/indexer/index-session.c +++ b/drivers/md/dm-vdo/indexer/index-session.c @@ -100,7 +100,6 @@ static int get_index_session(struct uds_index_session *index_session) int uds_launch_request(struct uds_request *request) { - size_t internal_size; int result; if (request->callback == NULL) { @@ -121,10 +120,7 @@ int uds_launch_request(struct uds_request *request) } /* Reset all internal fields before processing. */ - internal_size = - sizeof(struct uds_request) - offsetof(struct uds_request, zone_number); - // FIXME should be using struct_group for this instead - memset((char *) request + sizeof(*request) - internal_size, 0, internal_size); + memset(&request->internal, 0, sizeof(request->internal)); result = get_index_session(request->session); if (result != UDS_SUCCESS) diff --git a/drivers/md/dm-vdo/indexer/indexer.h b/drivers/md/dm-vdo/indexer/indexer.h index 183a94eb7e92..7c1fc4577f5b 100644 --- a/drivers/md/dm-vdo/indexer/indexer.h +++ b/drivers/md/dm-vdo/indexer/indexer.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -73,7 +74,7 @@ enum uds_request_type { /* Remove any mapping for a name. */ UDS_DELETE, -}; +} __packed; enum uds_open_index_type { /* Create a new index. */ @@ -226,7 +227,7 @@ struct uds_zone_message { enum uds_zone_message_type type; /* The virtual chapter number to which the message applies */ u64 virtual_chapter; -}; +} __packed; struct uds_index_session; struct uds_index; @@ -253,34 +254,32 @@ struct uds_request { /* The existing data associated with the request name, if any */ struct uds_record_data old_metadata; - /* Either UDS_SUCCESS or an error code for the request */ - int status; /* True if the record name had an existing entry in the index */ bool found; + /* Either UDS_SUCCESS or an error code for the request */ + int status; - /* - * The remaining fields are used internally and should not be altered by clients. The index - * relies on zone_number being the first field in this section. - */ - - /* The number of the zone which will process this request*/ - unsigned int zone_number; - /* A link for adding a request to a lock-free queue */ - struct funnel_queue_entry queue_link; - /* A link for adding a request to a standard linked list */ - struct uds_request *next_request; - /* A pointer to the index processing this request */ - struct uds_index *index; - /* Control message for coordinating between zones */ - struct uds_zone_message zone_message; - /* If true, process request immediately by waking the worker thread */ - bool unbatched; - /* If true, continue this request before processing newer requests */ - bool requeued; - /* The virtual chapter containing the record name, if known */ - u64 virtual_chapter; - /* The region of the index containing the record name */ - enum uds_index_region location; + /* The remaining fields are used internally and should not be altered by clients. */ + struct_group(internal, + /* The virtual chapter containing the record name, if known */ + u64 virtual_chapter; + /* The region of the index containing the record name */ + enum uds_index_region location; + /* If true, process request immediately by waking the worker thread */ + bool unbatched; + /* If true, continue this request before processing newer requests */ + bool requeued; + /* Control message for coordinating between zones */ + struct uds_zone_message zone_message; + /* The number of the zone which will process this request*/ + unsigned int zone_number; + /* A link for adding a request to a lock-free queue */ + struct funnel_queue_entry queue_link; + /* A link for adding a request to a standard linked list */ + struct uds_request *next_request; + /* A pointer to the index processing this request */ + struct uds_index *index; + ); }; /* A session is required for most index operations. */ From 2034fe663a1a5b4dc4db659d28ff32b989d3985f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Jan 2025 13:58:43 +0100 Subject: [PATCH 0426/2157] dt-bindings: input: Correct indentation and style in DTS example DTS example in the bindings should be indented with 2- or 4-spaces and aligned with opening '- |', so correct any differences like 3-spaces or mixtures 2- and 4-spaces in one binding. No functional changes here, but saves some comments during reviews of new patches built on existing code. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250107125844.226466-1-krzysztof.kozlowski@linaro.org Signed-off-by: Dmitry Torokhov --- .../bindings/input/qcom,pm8921-keypad.yaml | 46 +++++++++---------- .../bindings/input/qcom,pm8921-pwrkey.yaml | 36 +++++++-------- .../input/touchscreen/ti,ads7843.yaml | 30 ++++++------ 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml b/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml index 88764adcd696..e03611eef93d 100644 --- a/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml +++ b/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml @@ -62,28 +62,28 @@ unevaluatedProperties: false examples: - | - #include - #include - pmic { - #address-cells = <1>; - #size-cells = <0>; + #include + #include + pmic { + #address-cells = <1>; + #size-cells = <0>; - keypad@148 { - compatible = "qcom,pm8921-keypad"; - reg = <0x148>; - interrupt-parent = <&pmicintc>; - interrupts = <74 IRQ_TYPE_EDGE_RISING>, <75 IRQ_TYPE_EDGE_RISING>; - linux,keymap = < - MATRIX_KEY(0, 0, KEY_VOLUMEUP) - MATRIX_KEY(0, 1, KEY_VOLUMEDOWN) - MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS) - MATRIX_KEY(0, 3, KEY_CAMERA) - >; - keypad,num-rows = <1>; - keypad,num-columns = <5>; - debounce = <15>; - scan-delay = <32>; - row-hold = <91500>; - }; - }; + keypad@148 { + compatible = "qcom,pm8921-keypad"; + reg = <0x148>; + interrupt-parent = <&pmicintc>; + interrupts = <74 IRQ_TYPE_EDGE_RISING>, <75 IRQ_TYPE_EDGE_RISING>; + linux,keymap = < + MATRIX_KEY(0, 0, KEY_VOLUMEUP) + MATRIX_KEY(0, 1, KEY_VOLUMEDOWN) + MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS) + MATRIX_KEY(0, 3, KEY_CAMERA) + >; + keypad,num-rows = <1>; + keypad,num-columns = <5>; + debounce = <15>; + scan-delay = <32>; + row-hold = <91500>; + }; + }; ... diff --git a/Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml b/Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml index 12c74c083258..64590894857a 100644 --- a/Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml +++ b/Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml @@ -52,24 +52,24 @@ unevaluatedProperties: false examples: - | - #include - ssbi { - #address-cells = <1>; - #size-cells = <0>; + #include + ssbi { + #address-cells = <1>; + #size-cells = <0>; - pmic@0 { - reg = <0x0>; - #address-cells = <1>; - #size-cells = <0>; + pmic@0 { + reg = <0x0>; + #address-cells = <1>; + #size-cells = <0>; - pwrkey@1c { - compatible = "qcom,pm8921-pwrkey"; - reg = <0x1c>; - interrupt-parent = <&pmicint>; - interrupts = <50 IRQ_TYPE_EDGE_RISING>, <51 IRQ_TYPE_EDGE_RISING>; - debounce = <15625>; - pull-up; - }; - }; - }; + pwrkey@1c { + compatible = "qcom,pm8921-pwrkey"; + reg = <0x1c>; + interrupt-parent = <&pmicint>; + interrupts = <50 IRQ_TYPE_EDGE_RISING>, <51 IRQ_TYPE_EDGE_RISING>; + debounce = <15625>; + pull-up; + }; + }; + }; ... diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml index 604921733d2c..8f6335d7da1c 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml @@ -164,20 +164,20 @@ examples: #size-cells = <0>; touchscreen@0 { - compatible = "ti,tsc2046"; - reg = <0>; /* CS0 */ - interrupt-parent = <&gpio1>; - interrupts = <8 0>; /* BOOT6 / GPIO 8 */ - pendown-gpio = <&gpio1 8 0>; - spi-max-frequency = <1000000>; - vcc-supply = <®_vcc3>; - wakeup-source; + compatible = "ti,tsc2046"; + reg = <0>; /* CS0 */ + interrupt-parent = <&gpio1>; + interrupts = <8 0>; /* BOOT6 / GPIO 8 */ + pendown-gpio = <&gpio1 8 0>; + spi-max-frequency = <1000000>; + vcc-supply = <®_vcc3>; + wakeup-source; - ti,pressure-max = /bits/ 16 <255>; - ti,x-max = /bits/ 16 <8000>; - ti,x-min = /bits/ 16 <0>; - ti,x-plate-ohms = /bits/ 16 <40>; - ti,y-max = /bits/ 16 <4800>; - ti,y-min = /bits/ 16 <0>; - }; + ti,pressure-max = /bits/ 16 <255>; + ti,x-max = /bits/ 16 <8000>; + ti,x-min = /bits/ 16 <0>; + ti,x-plate-ohms = /bits/ 16 <40>; + ti,y-max = /bits/ 16 <4800>; + ti,y-min = /bits/ 16 <0>; + }; }; From e51cb50f0bc0724634369e42a826dd302aae787c Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Fri, 10 Jan 2025 06:49:02 +0100 Subject: [PATCH 0427/2157] dt-bindings: input: matrix_keypad: convert to YAML Convert the gpio-matrix-keypad bindings from text to DT schema. Signed-off-by: Markus Burri Reviewed-by: Rob Herring (Arm) Reviewed-by: Manuel Traut Link: https://lore.kernel.org/r/20250110054906.354296-4-markus.burri@mt.com Signed-off-by: Dmitry Torokhov --- .../bindings/input/gpio-matrix-keypad.txt | 49 ----------- .../bindings/input/gpio-matrix-keypad.yaml | 88 +++++++++++++++++++ .../bindings/power/wakeup-source.txt | 2 +- 3 files changed, 89 insertions(+), 50 deletions(-) delete mode 100644 Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt create mode 100644 Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt deleted file mode 100644 index 570dc10f0cd7..000000000000 --- a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt +++ /dev/null @@ -1,49 +0,0 @@ -* GPIO driven matrix keypad device tree bindings - -GPIO driven matrix keypad is used to interface a SoC with a matrix keypad. -The matrix keypad supports multiple row and column lines, a key can be -placed at each intersection of a unique row and a unique column. The matrix -keypad can sense a key-press and key-release by means of GPIO lines and -report the event using GPIO interrupts to the cpu. - -Required Properties: -- compatible: Should be "gpio-matrix-keypad" -- row-gpios: List of gpios used as row lines. The gpio specifier - for this property depends on the gpio controller to - which these row lines are connected. -- col-gpios: List of gpios used as column lines. The gpio specifier - for this property depends on the gpio controller to - which these column lines are connected. -- linux,keymap: The definition can be found at - bindings/input/matrix-keymap.txt - -Optional Properties: -- linux,no-autorepeat: do no enable autorepeat feature. -- wakeup-source: use any event on keypad as wakeup event. - (Legacy property supported: "linux,wakeup") -- debounce-delay-ms: debounce interval in milliseconds -- col-scan-delay-us: delay, measured in microseconds, that is needed - before we can scan keypad after activating column gpio -- drive-inactive-cols: drive inactive columns during scan, - default is to turn inactive columns into inputs. - -Example: - matrix-keypad { - compatible = "gpio-matrix-keypad"; - debounce-delay-ms = <5>; - col-scan-delay-us = <2>; - - row-gpios = <&gpio2 25 0 - &gpio2 26 0 - &gpio2 27 0>; - - col-gpios = <&gpio2 21 0 - &gpio2 22 0>; - - linux,keymap = <0x0000008B - 0x0100009E - 0x02000069 - 0x0001006A - 0x0101001C - 0x0201006C>; - }; diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml new file mode 100644 index 000000000000..0f348b9b7bad --- /dev/null +++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/input/gpio-matrix-keypad.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPIO matrix keypad + +maintainers: + - Marek Vasut + +description: + GPIO driven matrix keypad is used to interface a SoC with a matrix keypad. + The matrix keypad supports multiple row and column lines, a key can be + placed at each intersection of a unique row and a unique column. The matrix + keypad can sense a key-press and key-release by means of GPIO lines and + report the event using GPIO interrupts to the cpu. + +allOf: + - $ref: /schemas/input/matrix-keymap.yaml# + +properties: + compatible: + const: gpio-matrix-keypad + + row-gpios: + description: + List of GPIOs used as row lines. The gpio specifier for this property + depends on the gpio controller to which these row lines are connected. + + col-gpios: + description: + List of GPIOs used as column lines. The gpio specifier for this property + depends on the gpio controller to which these column lines are connected. + + linux,keymap: true + + linux,no-autorepeat: + type: boolean + description: Do not enable autorepeat feature. + + + debounce-delay-ms: + description: Debounce interval in milliseconds. + default: 0 + + col-scan-delay-us: + description: + Delay, measured in microseconds, that is needed + before we can scan keypad after activating column gpio. + default: 0 + + drive-inactive-cols: + type: boolean + description: + Drive inactive columns during scan, + default is to turn inactive columns into inputs. + +required: + - compatible + - row-gpios + - col-gpios + - linux,keymap + +additionalProperties: false + +examples: + - | + matrix-keypad { + compatible = "gpio-matrix-keypad"; + debounce-delay-ms = <5>; + col-scan-delay-us = <2>; + + row-gpios = <&gpio2 25 0 + &gpio2 26 0 + &gpio2 27 0>; + + col-gpios = <&gpio2 21 0 + &gpio2 22 0>; + + linux,keymap = <0x0000008B + 0x0100009E + 0x02000069 + 0x0001006A + 0x0101001C + 0x0201006C>; + }; diff --git a/Documentation/devicetree/bindings/power/wakeup-source.txt b/Documentation/devicetree/bindings/power/wakeup-source.txt index 27f1797be963..66bb016305f9 100644 --- a/Documentation/devicetree/bindings/power/wakeup-source.txt +++ b/Documentation/devicetree/bindings/power/wakeup-source.txt @@ -23,7 +23,7 @@ List of legacy properties and respective binding document 1. "gpio-key,wakeup" Documentation/devicetree/bindings/input/gpio-keys{,-polled}.txt 2. "has-tpo" Documentation/devicetree/bindings/rtc/rtc-opal.txt -3. "linux,wakeup" Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt +3. "linux,wakeup" Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml Documentation/devicetree/bindings/mfd/tc3589x.txt Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml 4. "linux,keypad-wakeup" Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml From 2eeac6d4bb5ea1063dd634c490c89bb04cb79265 Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Tue, 25 Feb 2025 09:55:34 -0800 Subject: [PATCH 0428/2157] dt-bindings: input: matrix_keypad: add settle time after enabling all columns Matrix keypads with high capacity need a longer settle time after enabling all columns. Add an optional property to specify the settle time. Signed-off-by: Markus Burri Reviewed-by: Rob Herring (Arm) Reviewed-by: Manuel Traut Link: https://lore.kernel.org/r/20250110054906.354296-6-markus.burri@mt.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/gpio-matrix-keypad.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml index 0f348b9b7bad..4a5893edf323 100644 --- a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml +++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml @@ -51,6 +51,12 @@ properties: before we can scan keypad after activating column gpio. default: 0 + all-cols-on-delay-us: + description: + Delay, measured in microseconds, that is needed + after activating all column gpios. + default: 0 + drive-inactive-cols: type: boolean description: From 90a0a63451e42e3638039dde2e511e95a8486880 Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Tue, 25 Feb 2025 09:59:07 -0800 Subject: [PATCH 0429/2157] Input: matrix_keypad - add settle time after enabling all columns Matrix keypads with high capacity need a longer settle time after enabling all columns before re-enabling interrupts. The delay gives the system time to settle and avoids spurious interrupts. Add a new optional device property to configure the delay after enabling all columns. The default is no delay. Signed-off-by: Markus Burri Reviewed-by: Manuel Traut Link: https://lore.kernel.org/r/20250110054906.354296-7-markus.burri@mt.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 2a3b3bfc2878..889505e59f85 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -26,6 +26,7 @@ struct matrix_keypad { unsigned int row_shift; unsigned int col_scan_delay_us; + unsigned int all_cols_on_delay_us; /* key debounce interval in milli-second */ unsigned int debounce_ms; bool drive_inactive_cols; @@ -77,6 +78,9 @@ static void activate_all_cols(struct matrix_keypad *keypad, bool on) for (col = 0; col < keypad->num_col_gpios; col++) __activate_col(keypad, col, on); + + if (on && keypad->all_cols_on_delay_us) + fsleep(keypad->all_cols_on_delay_us); } static bool row_asserted(struct matrix_keypad *keypad, int row) @@ -392,6 +396,8 @@ static int matrix_keypad_probe(struct platform_device *pdev) &keypad->debounce_ms); device_property_read_u32(&pdev->dev, "col-scan-delay-us", &keypad->col_scan_delay_us); + device_property_read_u32(&pdev->dev, "all-cols-on-delay-us", + &keypad->all_cols_on_delay_us); err = matrix_keypad_init_gpio(pdev, keypad); if (err) From e71087ebcd0eb02ca7b4b99c468e1f20435f4bcb Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Tue, 25 Feb 2025 10:59:00 -0800 Subject: [PATCH 0430/2157] Input: matrix_keypad - use fsleep for delays after activating columns The delay is specified in a device property, so the duration can be arbitrarily large. fsleep() determines the best way of delaying (sleep vs spin) based on duration. see Documentation/timers/delay_sleep_functions.rst Signed-off-by: Markus Burri Reviewed-by: Manuel Traut Link: https://lore.kernel.org/r/20250110054906.354296-2-markus.burri@mt.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 889505e59f85..e46473cb817c 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -69,7 +69,7 @@ static void activate_col(struct matrix_keypad *keypad, int col, bool on) __activate_col(keypad, col, on); if (on && keypad->col_scan_delay_us) - udelay(keypad->col_scan_delay_us); + fsleep(keypad->col_scan_delay_us); } static void activate_all_cols(struct matrix_keypad *keypad, bool on) From 564dcfc124c3ee862b50b2ee3ba438f09e2259de Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Tue, 25 Feb 2025 08:48:24 +0100 Subject: [PATCH 0431/2157] Input: apple_z2 - fix potential confusion in Kconfig Add a dependency on ARCH_APPLE and clarify the description to make it more obvious that this is for ARM machines, not x86 ones. Signed-off-by: Sasha Finkelstein Link: https://lore.kernel.org/r/20250225-z2-kconfig-v1-1-a67d9b778a6c@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 6c885cc58f32..91a2b584dab1 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -106,9 +106,9 @@ config TOUCHSCREEN_ADC config TOUCHSCREEN_APPLE_Z2 tristate "Apple Z2 touchscreens" default ARCH_APPLE - depends on SPI + depends on SPI && (ARCH_APPLE || COMPILE_TEST) help - Say Y here if you have an Apple device with + Say Y here if you have an ARM Apple device with a touchscreen or a touchbar. If unsure, say N. From 496b7d2e5b936e3e8eae651b51db8a2f9cf0f8b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 12:25:35 -0800 Subject: [PATCH 0432/2157] Input: synaptics - hide unused smbus_pnp_ids[] array When SMBUS is disabled, this is never referenced, causing a W=1 warning: drivers/input/mouse/synaptics.c:164:27: error: 'smbus_pnp_ids' defined but not used [-Werror=unused-const-variable=] Hide the array behind the same #ifdef as the code referencing it. Fixes: e839ffab0289 ("Input: synaptics - add support for Intertouch devices") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250225145451.1141995-1-arnd@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index aba57abe6978..309c360aab55 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -161,6 +161,7 @@ static const char * const topbuttonpad_pnp_ids[] = { NULL }; +#ifdef CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ @@ -196,6 +197,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3257", /* HP Envy 13-ad105ng */ NULL }; +#endif static const char * const forcepad_pnp_ids[] = { "SYN300D", From 7f7573bd4f37d4edc168c5b5def0bc2a1951c657 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Sun, 16 Feb 2025 20:03:36 +0300 Subject: [PATCH 0433/2157] Input: pm8941-pwrkey - fix dev_dbg() output in pm8941_pwrkey_irq() Since 'sw_debounce_end_time' of 'struct pm8941_pwrkey' is of type 'ktime_t', use 'ktime_to_us()' to print the value in microseconds as it is announced in a call to 'dev_dbg()'. Compile tested only. Fixes: 0b65118e6ba3 ("Input: pm8941-pwrkey - add software key press debouncing support") Signed-off-by: Dmitry Antipov Reviewed-by: David Collins Link: https://lore.kernel.org/r/20250216170336.861025-1-dmantipov@yandex.ru Signed-off-by: Dmitry Torokhov --- drivers/input/misc/pm8941-pwrkey.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c index d0c46665e527..d952c16f2458 100644 --- a/drivers/input/misc/pm8941-pwrkey.c +++ b/drivers/input/misc/pm8941-pwrkey.c @@ -154,8 +154,8 @@ static irqreturn_t pm8941_pwrkey_irq(int irq, void *_data) if (pwrkey->sw_debounce_time_us) { if (ktime_before(ktime_get(), pwrkey->sw_debounce_end_time)) { dev_dbg(pwrkey->dev, - "ignoring key event received before debounce end %llu us\n", - pwrkey->sw_debounce_end_time); + "ignoring key event received before debounce end %lld us\n", + ktime_to_us(pwrkey->sw_debounce_end_time)); return IRQ_HANDLED; } } From cade8a89b101dbcd0a169fd464a8dff079d11a2b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 10 Feb 2025 12:36:36 +0100 Subject: [PATCH 0434/2157] coresight: etm4x: don't include '' directly The header clearly states that it does not want to be included directly, only via ''. Which is already present, so delete the superfluous include. Signed-off-by: Wolfram Sang Acked-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250210113635.51935-2-wsa+renesas@sang-engineering.com --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 2c1a60577728..bdb9262798cc 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include From 4ff6039ffb79a4a8a44b63810a8a2f2b43264856 Mon Sep 17 00:00:00 2001 From: Yuanfang Zhang Date: Thu, 16 Jan 2025 17:04:20 +0800 Subject: [PATCH 0435/2157] coresight-etm4x: add isb() before reading the TRCSTATR As recommended by section 4.3.7 ("Synchronization when using system instructions to progrom the trace unit") of ARM IHI 0064H.b, the self-hosted trace analyzer must perform a Context synchronization event between writing to the TRCPRGCTLR and reading the TRCSTATR. Additionally, add an ISB between the each read of TRCSTATR on coresight_timeout() when using system instructions to program the trace unit. Fixes: 1ab3bb9df5e3 ("coresight: etm4x: Add necessary synchronization for sysreg access") Signed-off-by: Yuanfang Zhang Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250116-etm_sync-v4-1-39f2b05e9514@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 20 ++++++-- .../coresight/coresight-etm4x-core.c | 48 +++++++++++++++++-- include/linux/coresight.h | 4 ++ 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index ab55e10d4b79..48bfb8036924 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1093,18 +1093,20 @@ static void coresight_remove_conns(struct coresight_device *csdev) } /** - * coresight_timeout - loop until a bit has changed to a specific register - * state. + * coresight_timeout_action - loop until a bit has changed to a specific register + * state, with a callback after every trial. * @csa: coresight device access for the device * @offset: Offset of the register from the base of the device. * @position: the position of the bit of interest. * @value: the value the bit should have. + * @cb: Call back after each trial. * * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if * TIMEOUT_US has elapsed, which ever happens first. */ -int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value) +int coresight_timeout_action(struct csdev_access *csa, u32 offset, + int position, int value, + coresight_timeout_cb_t cb) { int i; u32 val; @@ -1120,7 +1122,8 @@ int coresight_timeout(struct csdev_access *csa, u32 offset, if (!(val & BIT(position))) return 0; } - + if (cb) + cb(csa, offset, position, value); /* * Delay is arbitrary - the specification doesn't say how long * we are expected to wait. Extra check required to make sure @@ -1132,6 +1135,13 @@ int coresight_timeout(struct csdev_access *csa, u32 offset, return -EAGAIN; } +EXPORT_SYMBOL_GPL(coresight_timeout_action); + +int coresight_timeout(struct csdev_access *csa, u32 offset, + int position, int value) +{ + return coresight_timeout_action(csa, offset, position, value, NULL); +} EXPORT_SYMBOL_GPL(coresight_timeout); u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index bdb9262798cc..335fa157f30f 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -427,6 +427,29 @@ static void etm4_check_arch_features(struct etmv4_drvdata *drvdata, } #endif /* CONFIG_ETM4X_IMPDEF_FEATURE */ +static void etm4x_sys_ins_barrier(struct csdev_access *csa, u32 offset, int pos, int val) +{ + if (!csa->io_mem) + isb(); +} + +/* + * etm4x_wait_status: Poll for TRCSTATR. == . While using system + * instruction to access the trace unit, each access must be separated by a + * synchronization barrier. See ARM IHI0064H.b section "4.3.7 Synchronization of + * register updates", for system instructions section, in "Notes": + * + * "In particular, whenever disabling or enabling the trace unit, a poll of + * TRCSTATR needs explicit synchronization between each read of TRCSTATR" + */ +static int etm4x_wait_status(struct csdev_access *csa, int pos, int val) +{ + if (!csa->io_mem) + return coresight_timeout_action(csa, TRCSTATR, pos, val, + etm4x_sys_ins_barrier); + return coresight_timeout(csa, TRCSTATR, pos, val); +} + static int etm4_enable_hw(struct etmv4_drvdata *drvdata) { int i, rc; @@ -458,7 +481,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) isb(); /* wait for TRCSTATR.IDLE to go up */ - if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) + if (etm4x_wait_status(csa, TRCSTATR_IDLE_BIT, 1)) dev_err(etm_dev, "timeout while waiting for Idle Trace Status\n"); if (drvdata->nr_pe) @@ -551,7 +574,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) isb(); /* wait for TRCSTATR.IDLE to go back down to '0' */ - if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 0)) + if (etm4x_wait_status(csa, TRCSTATR_IDLE_BIT, 0)) dev_err(etm_dev, "timeout while waiting for Idle Trace Status\n"); @@ -940,10 +963,25 @@ static void etm4_disable_hw(void *info) tsb_csync(); etm4x_relaxed_write32(csa, control, TRCPRGCTLR); + /* + * As recommended by section 4.3.7 ("Synchronization when using system + * instructions to progrom the trace unit") of ARM IHI 0064H.b, the + * self-hosted trace analyzer must perform a Context synchronization + * event between writing to the TRCPRGCTLR and reading the TRCSTATR. + */ + if (!csa->io_mem) + isb(); + /* wait for TRCSTATR.PMSTABLE to go to '1' */ - if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) + if (etm4x_wait_status(csa, TRCSTATR_PMSTABLE_BIT, 1)) dev_err(etm_dev, "timeout while waiting for PM stable Trace Status\n"); + /* + * As recommended by section 4.3.7 (Synchronization of register updates) + * of ARM IHI 0064H.b. + */ + isb(); + /* read the status of the single shot comparators */ for (i = 0; i < drvdata->nr_ss_cmp; i++) { config->ss_status[i] = @@ -1745,7 +1783,7 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) etm4_os_lock(drvdata); /* wait for TRCSTATR.PMSTABLE to go up */ - if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) { + if (etm4x_wait_status(csa, TRCSTATR_PMSTABLE_BIT, 1)) { dev_err(etm_dev, "timeout while waiting for PM Stable Status\n"); etm4_os_unlock(drvdata); @@ -1836,7 +1874,7 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcpdcr = etm4x_read32(csa, TRCPDCR); /* wait for TRCSTATR.IDLE to go up */ - if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) { + if (etm4x_wait_status(csa, TRCSTATR_PMSTABLE_BIT, 1)) { dev_err(etm_dev, "timeout while waiting for Idle Trace Status\n"); etm4_os_unlock(drvdata); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 3eef4e91df3f..c7614ccb3232 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -661,6 +661,10 @@ extern int coresight_enable_sysfs(struct coresight_device *csdev); extern void coresight_disable_sysfs(struct coresight_device *csdev); extern int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); +typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int); +extern int coresight_timeout_action(struct csdev_access *csa, u32 offset, + int position, int value, + coresight_timeout_cb_t cb); extern int coresight_claim_device(struct coresight_device *csdev); extern int coresight_claim_device_unlocked(struct coresight_device *csdev); From ee39dbe9395bd91435aed0194abc3c7c83dba146 Mon Sep 17 00:00:00 2001 From: Mao Jinlong Date: Tue, 25 Feb 2025 22:40:06 -0800 Subject: [PATCH 0436/2157] coresight-tpdm: Add MCMB dataset support MCMB (Multi-lane CMB) is a special form of CMB dataset type. MCMB subunit TPDM has the same number and usage of registers as CMB subunit TPDM. MCMB subunit can be enabled for data collection by writing 1 to the first bit of CMB_CR register. The difference is that MCMB subunit TPDM needs to select the lane and enable it in using it. Signed-off-by: Tao Zhang Signed-off-by: Mao Jinlong Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250226064008.2531037-2-quic_jinlmao@quicinc.com --- drivers/hwtracing/coresight/coresight-tpda.c | 7 ++-- drivers/hwtracing/coresight/coresight-tpdm.c | 40 +++++++++++++++++--- drivers/hwtracing/coresight/coresight-tpdm.h | 27 +++++++------ 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c index 573da8427428..c88c548e6d49 100644 --- a/drivers/hwtracing/coresight/coresight-tpda.c +++ b/drivers/hwtracing/coresight/coresight-tpda.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -68,11 +68,12 @@ static int tpdm_read_element_size(struct tpda_drvdata *drvdata, int rc = -EINVAL; struct tpdm_drvdata *tpdm_data = dev_get_drvdata(csdev->dev.parent); - if (tpdm_has_dsb_dataset(tpdm_data)) { + if (tpdm_data->dsb) { rc = fwnode_property_read_u32(dev_fwnode(csdev->dev.parent), "qcom,dsb-element-bits", &drvdata->dsb_esize); } - if (tpdm_has_cmb_dataset(tpdm_data)) { + + if (tpdm_data->cmb) { rc = fwnode_property_read_u32(dev_fwnode(csdev->dev.parent), "qcom,cmb-element-bits", &drvdata->cmb_esize); } diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c index afc4c18dd35d..311727f15c32 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.c +++ b/drivers/hwtracing/coresight/coresight-tpdm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -21,6 +21,21 @@ DEFINE_CORESIGHT_DEVLIST(tpdm_devs, "tpdm"); +static bool tpdm_has_dsb_dataset(struct tpdm_drvdata *drvdata) +{ + return (drvdata->datasets & TPDM_PIDR0_DS_DSB); +} + +static bool tpdm_has_cmb_dataset(struct tpdm_drvdata *drvdata) +{ + return (drvdata->datasets & TPDM_PIDR0_DS_CMB); +} + +static bool tpdm_has_mcmb_dataset(struct tpdm_drvdata *drvdata) +{ + return (drvdata->datasets & TPDM_PIDR0_DS_MCMB); +} + /* Read dataset array member with the index number */ static ssize_t tpdm_simple_dataset_show(struct device *dev, struct device_attribute *attr, @@ -198,7 +213,7 @@ static umode_t tpdm_cmb_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); - if (drvdata && tpdm_has_cmb_dataset(drvdata)) + if (drvdata && drvdata->cmb) return attr->mode; return 0; @@ -388,7 +403,7 @@ static void tpdm_enable_cmb(struct tpdm_drvdata *drvdata) { u32 val, i; - if (!tpdm_has_cmb_dataset(drvdata)) + if (!drvdata->cmb) return; /* Configure pattern registers */ @@ -415,6 +430,19 @@ static void tpdm_enable_cmb(struct tpdm_drvdata *drvdata) val |= TPDM_CMB_CR_MODE; else val &= ~TPDM_CMB_CR_MODE; + + if (tpdm_has_mcmb_dataset(drvdata)) { + val &= ~TPDM_CMB_CR_XTRIG_LNSEL; + /* Set the lane participates in the output pattern */ + val |= FIELD_PREP(TPDM_CMB_CR_XTRIG_LNSEL, + drvdata->cmb->mcmb.trig_lane); + + /* Set the enablement of the lane */ + val &= ~TPDM_CMB_CR_E_LN; + val |= FIELD_PREP(TPDM_CMB_CR_E_LN, + drvdata->cmb->mcmb.lane_select); + } + /* Set the enable bit of CMB control register to 1 */ val |= TPDM_CMB_CR_ENA; writel_relaxed(val, drvdata->base + TPDM_CMB_CR); @@ -480,7 +508,7 @@ static void tpdm_disable_cmb(struct tpdm_drvdata *drvdata) { u32 val; - if (!tpdm_has_cmb_dataset(drvdata)) + if (!drvdata->cmb) return; val = readl_relaxed(drvdata->base + TPDM_CMB_CR); @@ -542,12 +570,14 @@ static int tpdm_datasets_setup(struct tpdm_drvdata *drvdata) if (!drvdata->dsb) return -ENOMEM; } - if (tpdm_has_cmb_dataset(drvdata) && (!drvdata->cmb)) { + if ((tpdm_has_cmb_dataset(drvdata) || tpdm_has_mcmb_dataset(drvdata)) + && (!drvdata->cmb)) { drvdata->cmb = devm_kzalloc(drvdata->dev, sizeof(*drvdata->cmb), GFP_KERNEL); if (!drvdata->cmb) return -ENOMEM; } + tpdm_reset_datasets(drvdata); return 0; diff --git a/drivers/hwtracing/coresight/coresight-tpdm.h b/drivers/hwtracing/coresight/coresight-tpdm.h index e08d212642e3..62a3fd5ddec7 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.h +++ b/drivers/hwtracing/coresight/coresight-tpdm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _CORESIGHT_CORESIGHT_TPDM_H @@ -9,7 +9,7 @@ /* The max number of the datasets that TPDM supports */ #define TPDM_DATASETS 7 -/* CMB Subunit Registers */ +/* CMB/MCMB Subunit Registers */ #define TPDM_CMB_CR (0xA00) /* CMB subunit timestamp insertion enable register */ #define TPDM_CMB_TIER (0xA04) @@ -28,6 +28,10 @@ #define TPDM_CMB_CR_ENA BIT(0) /* Trace collection mode for CMB subunit */ #define TPDM_CMB_CR_MODE BIT(1) +/* MCMB trigger lane select */ +#define TPDM_CMB_CR_XTRIG_LNSEL GENMASK(20, 18) +/* MCMB lane enablement */ +#define TPDM_CMB_CR_E_LN GENMASK(17, 10) /* Timestamp control for pattern match */ #define TPDM_CMB_TIER_PATT_TSENAB BIT(0) /* CMB CTI timestamp request */ @@ -112,11 +116,13 @@ * PERIPHIDR0[0] : Fix to 1 if ImplDef subunit present, else 0 * PERIPHIDR0[1] : Fix to 1 if DSB subunit present, else 0 * PERIPHIDR0[2] : Fix to 1 if CMB subunit present, else 0 + * PERIPHIDR0[6] : Fix to 1 if MCMB subunit present, else 0 */ #define TPDM_PIDR0_DS_IMPDEF BIT(0) #define TPDM_PIDR0_DS_DSB BIT(1) #define TPDM_PIDR0_DS_CMB BIT(2) +#define TPDM_PIDR0_DS_MCMB BIT(6) #define TPDM_DSB_MAX_LINES 256 /* MAX number of EDCR registers */ @@ -256,6 +262,9 @@ struct dsb_dataset { * @patt_ts: Indicates if pattern match for timestamp is enabled. * @trig_ts: Indicates if CTI trigger for timestamp is enabled. * @ts_all: Indicates if timestamp is enabled for all packets. + * struct mcmb_dataset + * @mcmb_trig_lane: Save data for trigger lane + * @mcmb_lane_select: Save data for lane enablement */ struct cmb_dataset { u32 trace_mode; @@ -267,6 +276,10 @@ struct cmb_dataset { bool patt_ts; bool trig_ts; bool ts_all; + struct { + u8 trig_lane; + u8 lane_select; + } mcmb; }; /** @@ -324,14 +337,4 @@ struct tpdm_dataset_attribute { enum dataset_mem mem; u32 idx; }; - -static bool tpdm_has_dsb_dataset(struct tpdm_drvdata *drvdata) -{ - return (drvdata->datasets & TPDM_PIDR0_DS_DSB); -} - -static bool tpdm_has_cmb_dataset(struct tpdm_drvdata *drvdata) -{ - return (drvdata->datasets & TPDM_PIDR0_DS_CMB); -} #endif /* _CORESIGHT_CORESIGHT_TPDM_H */ From 07f7c21745db0afa71a813e594f0983b8bd0f031 Mon Sep 17 00:00:00 2001 From: Tao Zhang Date: Tue, 25 Feb 2025 22:40:07 -0800 Subject: [PATCH 0437/2157] coresight-tpdm: Add support to select lane TPDM MCMB subunits supports up to 8 lanes CMB. For MCMB configurations, the field "XTRIG_LNSEL" in CMB_CR register selects which lane participates in the output pattern mach cross trigger mechanism governed by the M_CMB_DXPR and M_CMB_XPMR regisers. Signed-off-by: Tao Zhang Signed-off-by: Mao Jinlong Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250226064008.2531037-3-quic_jinlmao@quicinc.com --- .../testing/sysfs-bus-coresight-devices-tpdm | 8 +++ drivers/hwtracing/coresight/coresight-tpdm.c | 51 +++++++++++++++++++ drivers/hwtracing/coresight/coresight-tpdm.h | 3 ++ 3 files changed, 62 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm index bf710ea6e0ef..547540e330c6 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm @@ -257,3 +257,11 @@ Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) /mcmb_trig_lane +Date: Feb 2025 +KernelVersion 6.15 +Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) +Description: + (RW) Set/Get which lane participates in the output pattern + match cross trigger mechanism for the MCMB subunit TPDM. diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c index 311727f15c32..dc7d6e35fc51 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.c +++ b/drivers/hwtracing/coresight/coresight-tpdm.c @@ -252,6 +252,18 @@ static umode_t tpdm_cmb_msr_is_visible(struct kobject *kobj, return 0; } +static umode_t tpdm_mcmb_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (drvdata && tpdm_has_mcmb_dataset(drvdata)) + return attr->mode; + + return 0; +} + static void tpdm_reset_datasets(struct tpdm_drvdata *drvdata) { if (tpdm_has_dsb_dataset(drvdata)) { @@ -1020,6 +1032,34 @@ static ssize_t cmb_trig_ts_store(struct device *dev, } static DEVICE_ATTR_RW(cmb_trig_ts); +static ssize_t mcmb_trig_lane_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + return sysfs_emit(buf, "%u\n", + (unsigned int)drvdata->cmb->mcmb.trig_lane); +} + +static ssize_t mcmb_trig_lane_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t size) +{ + struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned long val; + + if ((kstrtoul(buf, 0, &val)) || (val >= TPDM_MCMB_MAX_LANES)) + return -EINVAL; + + guard(spinlock)(&drvdata->spinlock); + drvdata->cmb->mcmb.trig_lane = val; + + return size; +} +static DEVICE_ATTR_RW(mcmb_trig_lane); + static struct attribute *tpdm_dsb_edge_attrs[] = { &dev_attr_ctrl_idx.attr, &dev_attr_ctrl_val.attr, @@ -1182,6 +1222,11 @@ static struct attribute *tpdm_cmb_msr_attrs[] = { NULL, }; +static struct attribute *tpdm_mcmb_attrs[] = { + &dev_attr_mcmb_trig_lane.attr, + NULL, +}; + static struct attribute *tpdm_dsb_attrs[] = { &dev_attr_dsb_mode.attr, &dev_attr_dsb_trig_ts.attr, @@ -1248,6 +1293,11 @@ static struct attribute_group tpdm_cmb_msr_grp = { .name = "cmb_msr", }; +static struct attribute_group tpdm_mcmb_attr_grp = { + .attrs = tpdm_mcmb_attrs, + .is_visible = tpdm_mcmb_is_visible, +}; + static const struct attribute_group *tpdm_attr_grps[] = { &tpdm_attr_grp, &tpdm_dsb_attr_grp, @@ -1259,6 +1309,7 @@ static const struct attribute_group *tpdm_attr_grps[] = { &tpdm_cmb_trig_patt_grp, &tpdm_cmb_patt_grp, &tpdm_cmb_msr_grp, + &tpdm_mcmb_attr_grp, NULL, }; diff --git a/drivers/hwtracing/coresight/coresight-tpdm.h b/drivers/hwtracing/coresight/coresight-tpdm.h index 62a3fd5ddec7..932c5dfc89b1 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.h +++ b/drivers/hwtracing/coresight/coresight-tpdm.h @@ -45,6 +45,9 @@ /* MAX number of DSB MSR */ #define TPDM_CMB_MAX_MSR 32 +/* MAX lanes in the output pattern for MCMB configurations*/ +#define TPDM_MCMB_MAX_LANES 8 + /* DSB Subunit Registers */ #define TPDM_DSB_CR (0x780) #define TPDM_DSB_TIER (0x784) From 0c0b6c05e208abc60ae0f1aab0660ab93b25cfc7 Mon Sep 17 00:00:00 2001 From: Tao Zhang Date: Tue, 25 Feb 2025 22:40:08 -0800 Subject: [PATCH 0438/2157] coresight-tpdm: Add support to enable the lane for MCMB TPDM Add the sysfs file to set/get the enablement of the lane. For MCMB configurations, the field "E_LN" in CMB_CR register is the individual lane enables. MCMB lane N is enabled for trace generation when M_CMB_CR.E=1 and M_CMB_CR.E_LN[N]=1. For lanes that are not implemented on a given MCMB configuration, the corresponding bits of this field read as 0 and ignore writes. Signed-off-by: Tao Zhang Signed-off-by: Mao Jinlong Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250226064008.2531037-4-quic_jinlmao@quicinc.com --- .../testing/sysfs-bus-coresight-devices-tpdm | 7 +++++ drivers/hwtracing/coresight/coresight-tpdm.c | 29 +++++++++++++++++++ drivers/hwtracing/coresight/coresight-tpdm.h | 3 ++ 3 files changed, 39 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm index 547540e330c6..a47ea46c6f9b 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm @@ -265,3 +265,10 @@ Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) /mcmb_lanes_select +Date: Feb 2025 +KernelVersion 6.15 +Contact: Jinlong Mao (QUIC) , Tao Zhang (QUIC) +Description: + (RW) Set/Get the enablement of the individual lane. diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c index dc7d6e35fc51..af806d2275d2 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.c +++ b/drivers/hwtracing/coresight/coresight-tpdm.c @@ -1060,6 +1060,34 @@ static ssize_t mcmb_trig_lane_store(struct device *dev, } static DEVICE_ATTR_RW(mcmb_trig_lane); +static ssize_t mcmb_lanes_select_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + return sysfs_emit(buf, "%u\n", + (unsigned int)drvdata->cmb->mcmb.lane_select); +} + +static ssize_t mcmb_lanes_select_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t size) +{ + struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned long val; + + if (kstrtoul(buf, 0, &val) || (val & ~TPDM_MCMB_E_LN_MASK)) + return -EINVAL; + + guard(spinlock)(&drvdata->spinlock); + drvdata->cmb->mcmb.lane_select = val & TPDM_MCMB_E_LN_MASK; + + return size; +} +static DEVICE_ATTR_RW(mcmb_lanes_select); + static struct attribute *tpdm_dsb_edge_attrs[] = { &dev_attr_ctrl_idx.attr, &dev_attr_ctrl_val.attr, @@ -1224,6 +1252,7 @@ static struct attribute *tpdm_cmb_msr_attrs[] = { static struct attribute *tpdm_mcmb_attrs[] = { &dev_attr_mcmb_trig_lane.attr, + &dev_attr_mcmb_lanes_select.attr, NULL, }; diff --git a/drivers/hwtracing/coresight/coresight-tpdm.h b/drivers/hwtracing/coresight/coresight-tpdm.h index 932c5dfc89b1..b11754389734 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.h +++ b/drivers/hwtracing/coresight/coresight-tpdm.h @@ -48,6 +48,9 @@ /* MAX lanes in the output pattern for MCMB configurations*/ #define TPDM_MCMB_MAX_LANES 8 +/* Filter bit 0~7 from the value for CR_E_LN */ +#define TPDM_MCMB_E_LN_MASK GENMASK(7, 0) + /* DSB Subunit Registers */ #define TPDM_DSB_CR (0x780) #define TPDM_DSB_TIER (0x784) From d72deaf05ac18e421d7e52a6be8966fd6ee185f4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 Feb 2025 12:29:13 +0100 Subject: [PATCH 0439/2157] dt-bindings: coresight: qcom,coresight-tpda: Fix too many 'reg' Binding listed variable number of IO addresses without defining them, however example DTS code, all in-tree DTS and Linux kernel driver mention only one address space, so drop the second to make binding precise and correctly describe the hardware. Fixes: a8fbe1442c2b ("dt-bindings: arm: Adds CoreSight TPDA hardware definitions") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250226112914.94361-1-krzysztof.kozlowski@linaro.org --- Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml index 76163abed655..5ed40f21b8eb 100644 --- a/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml +++ b/Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml @@ -55,8 +55,7 @@ properties: - const: arm,primecell reg: - minItems: 1 - maxItems: 2 + maxItems: 1 clocks: maxItems: 1 From 1e4e454223f770748775f211455513c79cb3121e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 Feb 2025 12:29:14 +0100 Subject: [PATCH 0440/2157] dt-bindings: coresight: qcom,coresight-tpdm: Fix too many 'reg' Binding listed variable number of IO addresses without defining them, however example DTS code, all in-tree DTS and Linux kernel driver mention only one address space, so drop the second to make binding precise and correctly describe the hardware. Fixes: 6c781a35133d ("dt-bindings: arm: Add CoreSight TPDM hardware") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250226112914.94361-2-krzysztof.kozlowski@linaro.org --- Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml index 8eec07d9d454..07d21a3617f5 100644 --- a/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml +++ b/Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml @@ -41,8 +41,7 @@ properties: - const: arm,primecell reg: - minItems: 1 - maxItems: 2 + maxItems: 1 qcom,dsb-element-bits: description: From 84b25926fa7abb5b634d4af9544f47ab0aabc399 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:18 -0700 Subject: [PATCH 0441/2157] acpi: numa: Add support to enumerate and store extended linear address mode Store the address mode as part of the cache attriutes. Export the mode attribute to sysfs as all other cache attributes. Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/ Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- Documentation/ABI/stable/sysfs-devices-node | 6 ++++++ drivers/acpi/numa/hmat.c | 5 +++++ drivers/base/node.c | 2 ++ include/linux/node.h | 7 +++++++ 4 files changed, 20 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index 402af4b2b905..a02707cb7cbc 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -177,6 +177,12 @@ Description: The cache write policy: 0 for write-back, 1 for write-through, other or unknown. +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode +Date: March 2025 +Contact: Dave Jiang +Description: + The address mode: 0 for reserved, 1 for extended-linear. + What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes Date: November 2021 Contact: Jarkko Sakkinen diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index bfbb08b1e6af..2630511937f5 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -506,6 +506,11 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header, switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) { case ACPI_HMAT_CA_DIRECT_MAPPED: tcache->cache_attrs.indexing = NODE_CACHE_DIRECT_MAP; + /* Extended Linear mode is only valid if cache is direct mapped */ + if (cache->address_mode == ACPI_HMAT_CACHE_MODE_EXTENDED_LINEAR) { + tcache->cache_attrs.address_mode = + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR; + } break; case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING: tcache->cache_attrs.indexing = NODE_CACHE_INDEXED; diff --git a/drivers/base/node.c b/drivers/base/node.c index 0ea653fa3433..cd13ef287011 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -244,12 +244,14 @@ CACHE_ATTR(size, "%llu") CACHE_ATTR(line_size, "%u") CACHE_ATTR(indexing, "%u") CACHE_ATTR(write_policy, "%u") +CACHE_ATTR(address_mode, "%#x") static struct attribute *cache_attrs[] = { &dev_attr_indexing.attr, &dev_attr_size.attr, &dev_attr_line_size.attr, &dev_attr_write_policy.attr, + &dev_attr_address_mode.attr, NULL, }; ATTRIBUTE_GROUPS(cache); diff --git a/include/linux/node.h b/include/linux/node.h index 9a881c2208b3..2b7517892230 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -57,6 +57,11 @@ enum cache_write_policy { NODE_CACHE_WRITE_OTHER, }; +enum cache_mode { + NODE_CACHE_ADDR_MODE_RESERVED, + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR, +}; + /** * struct node_cache_attrs - system memory caching attributes * @@ -65,6 +70,7 @@ enum cache_write_policy { * @size: Total size of cache in bytes * @line_size: Number of bytes fetched on a cache miss * @level: The cache hierarchy level + * @address_mode: The address mode */ struct node_cache_attrs { enum cache_indexing indexing; @@ -72,6 +78,7 @@ struct node_cache_attrs { u64 size; u16 line_size; u8 level; + u16 address_mode; }; #ifdef CONFIG_HMEM_REPORTING From 0ec9849b63338da7883440ed3f52757cd8c847b1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:19 -0700 Subject: [PATCH 0442/2157] acpi/hmat / cxl: Add extended linear cache support for CXL The current cxl region size only indicates the size of the CXL memory region without accounting for the extended linear cache size. Retrieve the cache size from HMAT and append that to the cxl region size for the cxl region range that matches the SRAT range that has extended linear cache enabled. The SRAT defines the whole memory range that includes the extended linear cache and the CXL memory region. The new HMAT ECN/ECR to the Memory Side Cache Information Structure defines the size of the extended linear cache size and matches to the SRAT Memory Affinity Structure by the memory proxmity domain. Add a helper to match the cxl range to the SRAT memory range in order to retrieve the cache size. There are several places that checks the cxl region range against the decoder range. Use new helper to check between the two ranges and address the new cache size. Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/acpi/numa/hmat.c | 39 ++++++++++++++++++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/acpi.c | 11 +++++ drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/region.c | 86 +++++++++++++++++++++++++++++++++++---- drivers/cxl/cxl.h | 2 + include/linux/acpi.h | 11 +++++ tools/testing/cxl/Kbuild | 1 + 8 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 drivers/cxl/core/acpi.c diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 2630511937f5..9d9052258e92 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -108,6 +108,45 @@ static struct memory_target *find_mem_target(unsigned int mem_pxm) return NULL; } +/** + * hmat_get_extended_linear_cache_size - Retrieve the extended linear cache size + * @backing_res: resource from the backing media + * @nid: node id for the memory region + * @cache_size: (Output) size of extended linear cache. + * + * Return: 0 on success. Errno on failure. + * + */ +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, + resource_size_t *cache_size) +{ + unsigned int pxm = node_to_pxm(nid); + struct memory_target *target; + struct target_cache *tcache; + struct resource *res; + + target = find_mem_target(pxm); + if (!target) + return -ENOENT; + + list_for_each_entry(tcache, &target->caches, node) { + if (tcache->cache_attrs.address_mode != + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR) + continue; + + res = &target->memregions; + if (!resource_contains(res, backing_res)) + continue; + + *cache_size = tcache->cache_attrs.size; + return 0; + } + + *cache_size = 0; + return 0; +} +EXPORT_SYMBOL_NS_GPL(hmat_get_extended_linear_cache_size, "CXL"); + static struct memory_target *acpi_find_genport_target(u32 uid) { struct memory_target *target; diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9259bcc6773c..1a0c9c6ca818 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,5 +14,6 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o +cxl_core-y += acpi.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c new file mode 100644 index 000000000000..f13b4dae6ac5 --- /dev/null +++ b/drivers/cxl/core/acpi.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#include +#include "cxl.h" +#include "core.h" + +int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return hmat_get_extended_linear_cache_size(backing_res, nid, size); +} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..0fb779b612d1 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -115,4 +115,7 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e8d11a988fd9..69af651a8f46 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -824,6 +824,21 @@ static int match_free_decoder(struct device *dev, const void *data) return 1; } +static bool region_res_match_cxl_range(const struct cxl_region_params *p, + struct range *range) +{ + if (!p->res) + return false; + + /* + * If an extended linear cache region then the CXL range is assumed + * to be fronted by the DRAM range in current known implementation. + * This assumption will be made until a variant implementation exists. + */ + return p->res->start + p->cache_size == range->start && + p->res->end == range->end; +} + static int match_auto_decoder(struct device *dev, const void *data) { const struct cxl_region_params *p = data; @@ -836,7 +851,7 @@ static int match_auto_decoder(struct device *dev, const void *data) cxld = to_cxl_decoder(dev); r = &cxld->hpa_range; - if (p->res && p->res->start == r->start && p->res->end == r->end) + if (region_res_match_cxl_range(p, r)) return 1; return 0; @@ -1424,8 +1439,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || cxld->interleave_granularity != ig || - cxld->hpa_range.start != p->res->start || - cxld->hpa_range.end != p->res->end || + !region_res_match_cxl_range(p, &cxld->hpa_range) || ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { dev_err(&cxlr->dev, "%s:%s %s expected iw: %d ig: %d %pr\n", @@ -1951,13 +1965,13 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } - if (resource_size(cxled->dpa_res) * p->interleave_ways != + if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size != resource_size(p->res)) { dev_dbg(&cxlr->dev, - "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n", + "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), (u64)resource_size(cxled->dpa_res), p->interleave_ways, - (u64)resource_size(p->res)); + (u64)p->cache_size, (u64)resource_size(p->res)); return -EINVAL; } @@ -2921,7 +2935,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); /* Apply the hpa_offset to the region base address */ - hpa = hpa_offset + p->res->start; + hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ if (cxlrd->hpa_to_spa) @@ -3224,6 +3238,52 @@ static int match_region_by_range(struct device *dev, const void *data) return rc; } +static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, + struct resource *res) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_region_params *p = &cxlr->params; + int nid = phys_to_target_node(res->start); + resource_size_t size, cache_size, start; + int rc; + + size = resource_size(res); + if (!size) + return -EINVAL; + + rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); + if (rc) + return rc; + + if (!cache_size) + return 0; + + if (size != cache_size) { + dev_warn(&cxlr->dev, + "Extended Linear Cache size %#lld != CXL size %#lld. No Support!", + cache_size, size); + return -EOPNOTSUPP; + } + + /* + * Move the start of the range to where the cache range starts. The + * implementation assumes that the cache range is in front of the + * CXL range. This is not dictated by the HMAT spec but is how the + * current known implementation is configured. + * + * The cache range is expected to be within the CFMWS. The adjusted + * res->start should not be less than cxlrd->res->start. + */ + start = res->start - cache_size; + if (start < cxlrd->res->start) + return -ENXIO; + + res->start = start; + p->cache_size = cache_size; + + return 0; +} + /* Establish an empty region covering the given HPA range */ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, struct cxl_endpoint_decoder *cxled) @@ -3270,6 +3330,18 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), dev_name(&cxlr->dev)); + + rc = cxl_extended_linear_cache_resize(cxlr, res); + if (rc) { + /* + * Failing to support extended linear cache region resize does not + * prevent the region from functioning. Only causes cxl list showing + * incorrect region size. + */ + dev_warn(cxlmd->dev.parent, + "Extended linear cache calculation failed.\n"); + } + rc = insert_resource(cxlrd->res, res); if (rc) { /* diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bbbaa0d0a670..7ee96867ac73 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -493,6 +493,7 @@ enum cxl_config_state { * @res: allocated iomem capacity for this region * @targets: active ordered targets in current decoder configuration * @nr_targets: number of targets + * @cache_size: extended linear cache size if exists, otherwise zero. * * State transitions are protected by the cxl_region_rwsem */ @@ -504,6 +505,7 @@ struct cxl_region_params { struct resource *res; struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE]; int nr_targets; + resource_size_t cache_size; }; /* diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4e495b29c640..cbd933504dbf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1095,6 +1095,17 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) #endif /* !CONFIG_ACPI */ +#ifdef CONFIG_ACPI_HMAT +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, + resource_size_t *size); +#else +static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return -EOPNOTSUPP; +} +#endif + extern void arch_post_acpi_subsys_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index b1256fee3567..1ae13987a8a2 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -61,6 +61,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o +cxl_core-y += $(CXL_CORE_SRC)/acpi.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o From 8c520c5f1e767ed6b47feefca1ed32a097e9b707 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:20 -0700 Subject: [PATCH 0443/2157] cxl: Add extended linear cache address alias emission for cxl events Add the aliased address of extended linear cache when emitting event trace for poison, DRAM and general media of CXL events. Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-4-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 14 ++++++++++---- drivers/cxl/core/region.c | 2 +- drivers/cxl/core/trace.h | 34 ++++++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..f26b96dd7410 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -871,7 +871,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, } if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) { - u64 dpa, hpa = ULLONG_MAX; + u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX; struct cxl_region *cxlr; /* @@ -884,14 +884,20 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); - if (cxlr) + if (cxlr) { + u64 cache_size = cxlr->params.cache_size; + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); + if (cache_size) + hpa_alias = hpa - cache_size; + } if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, cxlr, hpa, - &evt->gen_media); + hpa_alias, &evt->gen_media); else if (event_type == CXL_CPER_EVENT_DRAM) - trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram); + trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias, + &evt->dram); } } EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 69af651a8f46..a20ef3f10fef 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3260,7 +3260,7 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, if (size != cache_size) { dev_warn(&cxlr->dev, - "Extended Linear Cache size %#lld != CXL size %#lld. No Support!", + "Extended Linear Cache size %lld != CXL size %lld. No Support!", cache_size, size); return -EOPNOTSUPP; } diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index cea706b683b5..e3f842dcdf1d 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -392,9 +392,10 @@ TRACE_EVENT(cxl_generic_event, TRACE_EVENT(cxl_general_media, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec), + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, + struct cxl_event_gen_media *rec), - TP_ARGS(cxlmd, log, cxlr, hpa, rec), + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -408,6 +409,7 @@ TRACE_EVENT(cxl_general_media, __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) /* Following are out of order to pack trace record */ __field(u64, hpa) + __field(u64, hpa_alias0) __field_struct(uuid_t, region_uuid) __field(u16, validity_flags) __field(u8, rank) @@ -438,6 +440,7 @@ TRACE_EVENT(cxl_general_media, CXL_EVENT_GEN_MED_COMP_ID_SIZE); __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); __entry->hpa = hpa; + __entry->hpa_alias0 = hpa_alias0; if (cxlr) { __assign_str(region_name); uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); @@ -455,7 +458,7 @@ TRACE_EVENT(cxl_general_media, "device=%x validity_flags='%s' " \ "comp_id=%s comp_id_pldm_valid_flags='%s' " \ "pldm_entity_id=%s pldm_resource_id=%s " \ - "hpa=%llx region=%s region_uuid=%pUb " \ + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ "cme_threshold_ev_flags='%s' cme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), @@ -470,7 +473,7 @@ TRACE_EVENT(cxl_general_media, CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), show_pldm_resource_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), - __entry->hpa, __get_str(region_name), &__entry->region_uuid, + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cme_count ) ); @@ -529,9 +532,10 @@ TRACE_EVENT(cxl_general_media, TRACE_EVENT(cxl_dram, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec), + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, + struct cxl_event_dram *rec), - TP_ARGS(cxlmd, log, cxlr, hpa, rec), + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -547,6 +551,7 @@ TRACE_EVENT(cxl_dram, __field(u32, row) __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE) __field(u64, hpa) + __field(u64, hpa_alias0) __field_struct(uuid_t, region_uuid) __field(u8, rank) /* Out of order to pack trace record */ __field(u8, bank_group) /* Out of order to pack trace record */ @@ -584,6 +589,7 @@ TRACE_EVENT(cxl_dram, memcpy(__entry->cor_mask, &rec->correction_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE); __entry->hpa = hpa; + __entry->hpa_alias0 = hpa_alias0; if (cxlr) { __assign_str(region_name); uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); @@ -604,7 +610,7 @@ TRACE_EVENT(cxl_dram, "validity_flags='%s' " \ "comp_id=%s comp_id_pldm_valid_flags='%s' " \ "pldm_entity_id=%s pldm_resource_id=%s " \ - "hpa=%llx region=%s region_uuid=%pUb " \ + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ "sub_channel=%u cme_threshold_ev_flags='%s' cvme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), @@ -622,7 +628,7 @@ TRACE_EVENT(cxl_dram, CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), show_pldm_resource_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), - __entry->hpa, __get_str(region_name), &__entry->region_uuid, + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, __entry->sub_channel, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cvme_count ) @@ -870,6 +876,7 @@ TRACE_EVENT(cxl_poison, __string(region, cxlr ? dev_name(&cxlr->dev) : "") __field(u64, overflow_ts) __field(u64, hpa) + __field(u64, hpa_alias0) __field(u64, dpa) __field(u32, dpa_length) __array(char, uuid, 16) @@ -892,16 +899,22 @@ TRACE_EVENT(cxl_poison, memcpy(__entry->uuid, &cxlr->params.uuid, 16); __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); + if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) + __entry->hpa_alias0 = __entry->hpa + + cxlr->params.cache_size; + else + __entry->hpa_alias0 = ULLONG_MAX; } else { __assign_str(region); memset(__entry->uuid, 0, 16); __entry->hpa = ULLONG_MAX; + __entry->hpa_alias0 = ULLONG_MAX; } ), TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \ - "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \ - "source=%s flags=%s overflow_time=%llu", + "region_uuid=%pU hpa=0x%llx hpa_alias0=0x%llx dpa=0x%llx " \ + "dpa_length=0x%x source=%s flags=%s overflow_time=%llu", __get_str(memdev), __get_str(host), __entry->serial, @@ -909,6 +922,7 @@ TRACE_EVENT(cxl_poison, __get_str(region), __entry->uuid, __entry->hpa, + __entry->hpa_alias0, __entry->dpa, __entry->dpa_length, show_poison_source(__entry->source), From 516e5bd0b6bf4ae1ad072df637b428a737c3c870 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 26 Feb 2025 09:21:21 -0700 Subject: [PATCH 0444/2157] cxl: Add mce notifier to emit aliased address for extended linear cache Below is a setup with extended linear cache configuration with an example layout of memory region shown below presented as a single memory region consists of 256G memory where there's 128G of DRAM and 128G of CXL memory. The kernel sees a region of total 256G of system memory. 128G DRAM 128G CXL memory |-----------------------------------|-------------------------------------| Data resides in either DRAM or far memory (FM) with no replication. Hot data is swapped into DRAM by the hardware behind the scenes. When error is detected in one location, it is possible that error also resides in the aliased location. Therefore when a memory location that is flagged by MCE is part of the special region, the aliased memory location needs to be offlined as well. Add an mce notify callback to identify if the MCE address location is part of an extended linear cache region and handle accordingly. Added symbol export to set_mce_nospec() in x86 code in order to call set_mce_nospec() from the CXL MCE notify callback. Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/ Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250226162224.3633792-5-dave.jiang@intel.com Signed-off-by: Dave Jiang --- arch/x86/mm/pat/set_memory.c | 1 + drivers/cxl/Kconfig | 4 +++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/mbox.c | 6 ++++ drivers/cxl/core/mce.c | 65 ++++++++++++++++++++++++++++++++++++ drivers/cxl/core/mce.h | 20 +++++++++++ drivers/cxl/core/region.c | 28 ++++++++++++++++ drivers/cxl/cxl.h | 6 ++++ drivers/cxl/cxlmem.h | 2 ++ tools/testing/cxl/Kbuild | 1 + 10 files changed, 134 insertions(+) create mode 100644 drivers/cxl/core/mce.c create mode 100644 drivers/cxl/core/mce.h diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index ef4514d64c05..255a3d176956 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2081,6 +2081,7 @@ int set_mce_nospec(unsigned long pfn) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; } +EXPORT_SYMBOL_GPL(set_mce_nospec); /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 876469e23f7a..d1c91dacae56 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -146,4 +146,8 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. +config CXL_MCE + def_bool y + depends on X86_MCE && MEMORY_FAILURE + endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 1a0c9c6ca818..61c9332b3582 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -17,3 +17,4 @@ cxl_core-y += cdat.o cxl_core-y += acpi.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o +cxl_core-$(CONFIG_CXL_MCE) += mce.o diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index f26b96dd7410..c06f19a729e8 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -11,6 +11,7 @@ #include "core.h" #include "trace.h" +#include "mce.h" static bool cxl_raw_allow_all; @@ -1444,6 +1445,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) { struct cxl_memdev_state *mds; + int rc; mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); if (!mds) { @@ -1459,6 +1461,10 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; + rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); + if (rc) + return ERR_PTR(rc); + return mds; } EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL"); diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c new file mode 100644 index 000000000000..ff8d078c6ca1 --- /dev/null +++ b/drivers/cxl/core/mce.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include "mce.h" + +static int cxl_handle_mce(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state, + mce_notifier); + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; + struct cxl_port *endpoint = cxlmd->endpoint; + struct mce *mce = data; + u64 spa, spa_alias; + unsigned long pfn; + + if (!mce || !mce_usable_address(mce)) + return NOTIFY_DONE; + + if (!endpoint) + return NOTIFY_DONE; + + spa = mce->addr & MCI_ADDR_PHYSADDR; + + pfn = spa >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return NOTIFY_DONE; + + spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa); + if (spa_alias == ~0ULL) + return NOTIFY_DONE; + + pfn = spa_alias >> PAGE_SHIFT; + + /* + * Take down the aliased memory page. The original memory page flagged + * by the MCE will be taken cared of by the standard MCE handler. + */ + dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n", + spa_alias); + if (!memory_failure(pfn, 0)) + set_mce_nospec(pfn); + + return NOTIFY_OK; +} + +static void cxl_unregister_mce_notifier(void *mce_notifier) +{ + mce_unregister_decode_chain(mce_notifier); +} + +int devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifier) +{ + mce_notifier->notifier_call = cxl_handle_mce; + mce_notifier->priority = MCE_PRIO_UC; + mce_register_decode_chain(mce_notifier); + + return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier, + mce_notifier); +} diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h new file mode 100644 index 000000000000..ace73424eeb6 --- /dev/null +++ b/drivers/cxl/core/mce.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#ifndef _CXL_CORE_MCE_H_ +#define _CXL_CORE_MCE_H_ + +#include + +#ifdef CONFIG_CXL_MCE +int devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifer); +#else +static inline int +devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifier) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index a20ef3f10fef..c2b4162aee42 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3447,6 +3447,34 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) } EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL"); +u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) +{ + struct cxl_region_ref *iter; + unsigned long index; + + if (!endpoint) + return ~0ULL; + + guard(rwsem_write)(&cxl_region_rwsem); + + xa_for_each(&endpoint->regions, index, iter) { + struct cxl_region_params *p = &iter->region->params; + + if (p->res->start <= spa && spa <= p->res->end) { + if (!p->cache_size) + return ~0ULL; + + if (spa > p->res->start + p->cache_size) + return spa - p->cache_size; + + return spa + p->cache_size; + } + } + + return ~0ULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL"); + static int is_system_ram(struct resource *res, void *arg) { struct cxl_region *cxlr = arg; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 7ee96867ac73..4785cff5209f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -877,6 +877,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); +u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -895,6 +896,11 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) { return NULL; } +static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, + u64 spa) +{ + return 0; +} #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..55752cbf408c 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -477,6 +477,7 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @poison: poison driver state info * @security: security driver state info * @fw: firmware upload / activation state + * @mce_notifier: MCE notifier * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -503,6 +504,7 @@ struct cxl_memdev_state { struct cxl_poison_state poison; struct cxl_security_state security; struct cxl_fw_state fw; + struct notifier_block mce_notifier; }; static inline struct cxl_memdev_state * diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1ae13987a8a2..f625eb2d2dc5 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -64,6 +64,7 @@ cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-y += $(CXL_CORE_SRC)/acpi.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o +cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From be2f35e15939836d0e0115e99d8cd7a49b89cc8f Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 18 Feb 2025 12:29:24 +0530 Subject: [PATCH 0445/2157] soundwire: amd: change the log level for command response log Change log level for command response log to dev_dbg_ratelimited when command is ignored. Signed-off-by: Vijendar Mukunda Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250218065924.917915-1-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index dcf85f94950a..a12c68b93b1c 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -346,7 +346,7 @@ static enum sdw_command_response amd_sdw_fill_msg_resp(struct amd_sdw_manager *a msg->dev_num); return SDW_CMD_FAIL; } - dev_err_ratelimited(amd_manager->dev, "command is ignored for Slave %d\n", + dev_dbg_ratelimited(amd_manager->dev, "command is ignored for Slave %d\n", msg->dev_num); return SDW_CMD_IGNORED; } From 186fdd3d87e7b77f7537ff65f686d029cb6560d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 17:33:12 +0100 Subject: [PATCH 0446/2157] dmaengine: img-mdc: remove incorrect of_match_ptr annotation Building with W=1 shows a warning about of_ftpm_tee_ids being unused when CONFIG_OF is disabled: drivers/dma/img-mdc-dma.c:863:34: error: unused variable 'mdc_dma_of_match' [-Werror,-Wunused-const-variable] Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250225163315.4168033-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/img-mdc-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c index 4127c1bdcca7..fd55bcd060ab 100644 --- a/drivers/dma/img-mdc-dma.c +++ b/drivers/dma/img-mdc-dma.c @@ -1073,7 +1073,7 @@ static struct platform_driver mdc_dma_driver = { .driver = { .name = "img-mdc-dma", .pm = &img_mdc_pm_ops, - .of_match_table = of_match_ptr(mdc_dma_of_match), + .of_match_table = mdc_dma_of_match, }, .probe = mdc_dma_probe, .remove = mdc_dma_remove, From e19ba02eeb8e98086708ee11ca1b123d17ec1977 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Fri, 14 Feb 2025 11:08:16 -0700 Subject: [PATCH 0447/2157] dt-bindings: dma: atmel: add microchip,sama7d65-dma Add microchip,sama7d65-dma compatible string to DT bindings documentation. Signed-off-by: Ryan Wanner Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/8b69f0c6d8955790edcdbe5d1e205b43dedb99ff.1739555984.git.Ryan.Wanner@microchip.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml b/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml index 9ca1c5d1f00f..73fc13b902b3 100644 --- a/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml +++ b/Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml @@ -32,6 +32,9 @@ properties: - microchip,sam9x60-dma - microchip,sam9x7-dma - const: atmel,sama5d4-dma + - items: + - const: microchip,sama7d65-dma + - const: microchip,sama7g5-dma "#dma-cells": description: | From 34436106af3d00b9cf94dbf7200b8162937f9943 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 21 Feb 2025 17:21:53 -0500 Subject: [PATCH 0448/2157] dt-bindings: dma: fsl,edma: Add i.MX94 support Add support for the i.MX94 DMA controllers. The SoC includes two DMA controllers: one compatible with i.MX93 eDMA3 and another compatible with i.MX95 eDMA5. Add compatible string "fsl,imx94-edma3" with fallback to "fsl,imx93-edma3". Add compatible string "fsl,imx94-edma5" with fallback to "fsl,imx95-edma5". Signed-off-by: Frank Li Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250221222153.405285-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/fsl,edma.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml index 4f925469533e..950e8fa4f4ab 100644 --- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml @@ -27,6 +27,14 @@ properties: - fsl,imx93-edma4 - fsl,imx95-edma5 - nxp,s32g2-edma + - items: + - enum: + - fsl,imx94-edma3 + - const: fsl,imx93-edma3 + - items: + - enum: + - fsl,imx94-edma5 + - const: fsl,imx95-edma5 - items: - const: fsl,ls1028a-edma - const: fsl,vf610-edma From 95032938c7c9b2e5ebb69f0ee10ebe340fa3af53 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 22 Feb 2025 10:50:28 +0100 Subject: [PATCH 0449/2157] dmaengine: bcm2835-dma: fix warning when CONFIG_PM=n The old SET_LATE_SYSTEM_SLEEP_PM_OPS macro cause a build warning when CONFIG_PM is disabled: warning: 'bcm2835_dma_suspend_late' defined but not used [-Wunused-function] Change this to the modern replacement. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501071533.yrFb156H-lkp@intel.com/ Signed-off-by: Stefan Wahren Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20250222095028.48818-1-wahrenst@gmx.net Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 20b10c15c696..0117bb2e8591 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -893,7 +893,7 @@ static int bcm2835_dma_suspend_late(struct device *dev) } static const struct dev_pm_ops bcm2835_dma_pm_ops = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(bcm2835_dma_suspend_late, NULL) + LATE_SYSTEM_SLEEP_PM_OPS(bcm2835_dma_suspend_late, NULL) }; static int bcm2835_dma_probe(struct platform_device *pdev) From 0da30874729baeb01889b0eca16cfda122687503 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 24 Feb 2025 16:04:17 +0200 Subject: [PATCH 0450/2157] dmaengine: ti: k3-udma-glue: Drop skip_fdq argument from k3_udma_glue_reset_rx_chn The user of k3_udma_glue_reset_rx_chn() e.g. ti_am65_cpsw_nuss can run on multiple platforms having different DMA architectures. On some platforms there can be one FDQ for all flows in the RX channel while for others there is a separate FDQ for each flow in the RX channel. So far we have been relying on the skip_fdq argument of k3_udma_glue_reset_rx_chn(). Instead of relying on the user to provide this information, infer it based on DMA architecture during k3_udma_glue_request_rx_chn() and save it in an internal flag 'single_fdq'. Use that flag at k3_udma_glue_reset_rx_chn() to deicide if the FDQ needs to be cleared for every flow or just for flow 0. Fixes the below issue on ti_am65_cpsw_nuss driver on AM62-SK. > ip link set eth1 down > ip link set eth0 down > ethtool -L eth0 rx 8 > ip link set eth0 up > modprobe -r ti_am65_cpsw_nuss [ 103.045726] ------------[ cut here ]------------ [ 103.050505] k3_knav_desc_pool size 512000 != avail 64000 [ 103.050703] WARNING: CPU: 1 PID: 450 at drivers/net/ethernet/ti/k3-cppi-desc-pool.c:33 k3_cppi_desc_pool_destroy+0xa0/0xa8 [k3_cppi_desc_pool] [ 103.068810] Modules linked in: ti_am65_cpsw_nuss(-) k3_cppi_desc_pool snd_soc_hdmi_codec crct10dif_ce snd_soc_simple_card snd_soc_simple_card_utils display_connector rtc_ti_k3 k3_j72xx_bandgap tidss drm_client_lib snd_soc_davinci_mcas p drm_dma_helper tps6598x phylink snd_soc_ti_udma rti_wdt drm_display_helper snd_soc_tlv320aic3x_i2c typec at24 phy_gmii_sel snd_soc_ti_edma snd_soc_tlv320aic3x sii902x snd_soc_ti_sdma sa2ul omap_mailbox drm_kms_helper authenc cfg80211 r fkill fuse drm drm_panel_orientation_quirks backlight ip_tables x_tables ipv6 [last unloaded: k3_cppi_desc_pool] [ 103.119950] CPU: 1 UID: 0 PID: 450 Comm: modprobe Not tainted 6.13.0-rc7-00001-g9c5e3435fa66 #1011 [ 103.119968] Hardware name: Texas Instruments AM625 SK (DT) [ 103.119974] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 103.119983] pc : k3_cppi_desc_pool_destroy+0xa0/0xa8 [k3_cppi_desc_pool] [ 103.148007] lr : k3_cppi_desc_pool_destroy+0xa0/0xa8 [k3_cppi_desc_pool] [ 103.154709] sp : ffff8000826ebbc0 [ 103.158015] x29: ffff8000826ebbc0 x28: ffff0000090b6300 x27: 0000000000000000 [ 103.165145] x26: 0000000000000000 x25: 0000000000000000 x24: ffff0000019df6b0 [ 103.172271] x23: ffff0000019df6b8 x22: ffff0000019df410 x21: ffff8000826ebc88 [ 103.179397] x20: 000000000007d000 x19: ffff00000a3b3000 x18: 0000000000000000 [ 103.186522] x17: 0000000000000000 x16: 0000000000000000 x15: 000001e8c35e1cde [ 103.193647] x14: 0000000000000396 x13: 000000000000035c x12: 0000000000000000 [ 103.200772] x11: 000000000000003a x10: 00000000000009c0 x9 : ffff8000826eba20 [ 103.207897] x8 : ffff0000090b6d20 x7 : ffff00007728c180 x6 : ffff00007728c100 [ 103.215022] x5 : 0000000000000001 x4 : ffff000000508a50 x3 : ffff7ffff6146000 [ 103.222147] x2 : 0000000000000000 x1 : e300b4173ee6b200 x0 : 0000000000000000 [ 103.229274] Call trace: [ 103.231714] k3_cppi_desc_pool_destroy+0xa0/0xa8 [k3_cppi_desc_pool] (P) [ 103.238408] am65_cpsw_nuss_free_rx_chns+0x28/0x4c [ti_am65_cpsw_nuss] [ 103.244942] devm_action_release+0x14/0x20 [ 103.249040] release_nodes+0x3c/0x68 [ 103.252610] devres_release_all+0x8c/0xdc [ 103.256614] device_unbind_cleanup+0x18/0x60 [ 103.260876] device_release_driver_internal+0xf8/0x178 [ 103.266004] driver_detach+0x50/0x9c [ 103.269571] bus_remove_driver+0x6c/0xbc [ 103.273485] driver_unregister+0x30/0x60 [ 103.277401] platform_driver_unregister+0x14/0x20 [ 103.282096] am65_cpsw_nuss_driver_exit+0x18/0xff4 [ti_am65_cpsw_nuss] [ 103.288620] __arm64_sys_delete_module+0x17c/0x25c [ 103.293404] invoke_syscall+0x44/0x100 [ 103.297149] el0_svc_common.constprop.0+0xc0/0xe0 [ 103.301845] do_el0_svc+0x1c/0x28 [ 103.305155] el0_svc+0x28/0x98 [ 103.308207] el0t_64_sync_handler+0xc8/0xcc [ 103.312384] el0t_64_sync+0x198/0x19c [ 103.316040] ---[ end trace 0000000000000000 ]--- Signed-off-by: Roger Quadros Acked-by: Jakub Kicinski Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20250224-k3-udma-glue-single-fdq-v2-1-cbe7621f2507@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma-glue.c | 15 +++++++++++---- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 ++-- drivers/net/ethernet/ti/icssg/icssg_common.c | 2 +- include/linux/dma/k3-udma-glue.h | 3 +-- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index 7c224c3ab7a0..f87d244cc2d6 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -84,6 +84,7 @@ struct k3_udma_glue_rx_channel { struct k3_udma_glue_rx_flow *flows; u32 flow_num; u32 flows_ready; + bool single_fdq; /* one FDQ for all flows */ }; static void k3_udma_chan_dev_release(struct device *dev) @@ -970,10 +971,13 @@ k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name, ep_cfg = rx_chn->common.ep_config; - if (xudma_is_pktdma(rx_chn->common.udmax)) + if (xudma_is_pktdma(rx_chn->common.udmax)) { rx_chn->udma_rchan_id = ep_cfg->mapped_channel_id; - else + rx_chn->single_fdq = false; + } else { rx_chn->udma_rchan_id = -1; + rx_chn->single_fdq = true; + } /* request and cfg UDMAP RX channel */ rx_chn->udma_rchanx = xudma_rchan_get(rx_chn->common.udmax, @@ -1103,6 +1107,9 @@ k3_udma_glue_request_remote_rx_chn_common(struct k3_udma_glue_rx_channel *rx_chn rx_chn->common.chan_dev.dma_coherent = true; dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev, DMA_BIT_MASK(48)); + rx_chn->single_fdq = false; + } else { + rx_chn->single_fdq = true; } ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg); @@ -1453,7 +1460,7 @@ EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_rx_chn); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, - void (*cleanup)(void *data, dma_addr_t desc_dma), bool skip_fdq) + void (*cleanup)(void *data, dma_addr_t desc_dma)) { struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num]; struct device *dev = rx_chn->common.dev; @@ -1465,7 +1472,7 @@ void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, dev_dbg(dev, "RX reset flow %u occ_rx %u\n", flow_num, occ_rx); /* Skip RX FDQ in case one FDQ is used for the set of flows */ - if (skip_fdq) + if (rx_chn->single_fdq && flow_num) goto do_reset; /* diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index b663271e79f7..a1a482ca955f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -515,7 +515,7 @@ static void am65_cpsw_destroy_rxq(struct am65_cpsw_common *common, int id) napi_disable(&flow->napi_rx); hrtimer_cancel(&flow->rx_hrtimer); k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, id, rx_chn, - am65_cpsw_nuss_rx_cleanup, !!id); + am65_cpsw_nuss_rx_cleanup); for (port = 0; port < common->port_num; port++) { if (!common->ports[port].ndev) @@ -3406,7 +3406,7 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) for (i = 0; i < common->rx_ch_num_flows; i++) k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, - am65_cpsw_nuss_rx_cleanup, !!i); + am65_cpsw_nuss_rx_cleanup); k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 74f0f200a89d..62065416e886 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -955,7 +955,7 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, for (i = 0; i < num_flows; i++) k3_udma_glue_reset_rx_chn(chn->rx_chn, i, chn, - prueth_rx_cleanup, !!i); + prueth_rx_cleanup); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); } diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 2dea217629d0..5d43881e6fb7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -138,8 +138,7 @@ int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, - void (*cleanup)(void *data, dma_addr_t desc_dma), - bool skip_fdq); + void (*cleanup)(void *data, dma_addr_t desc_dma)); int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, From 6f9669f3634b4e85374676dfdff9181bee14567d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 19 Feb 2025 11:54:17 +0100 Subject: [PATCH 0451/2157] dmaengine: Fix typo in comment s/consumer/consume/ Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250219105419.2025-2-thorsten.blum@linux.dev Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index fdd41e1c2263..6b4fce453c85 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -725,7 +725,7 @@ static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( slave_addr = ops->slave_addr(schan); /* - * Allocate the sg list dynamically as it would consumer too much stack + * Allocate the sg list dynamically as it would consume too much stack * space. */ sgl = kmalloc_array(sg_len, sizeof(*sgl), GFP_KERNEL); From 49166afbf4ce3b5049295534150886a99b9867ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Feb 2025 08:05:18 +0100 Subject: [PATCH 0452/2157] phy: exynos5-usbdrd: Do not depend on Type-C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older Exynos designs, like Exynos5422, do not have USB Type-C and the USB DRD PHY does not really need CONFIG_TYPEC for these devices at all. Incorrectly added optional dependency on CONFIG_TYPEC caused this driver to be missing for exynos_defconfig and as result Exynos5422-based boards like Hardkernel Odroid HC1 failed to probe USB. Reported-by: Krzysztof Kozlowski Closes: https://krzk.eu/#/builders/21/builds/6139 Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/3c0b77e6-357d-453e-8b63-4757c3231bde@samsung.com/ Fixes: 09dc674295a3 ("phy: exynos5-usbdrd: subscribe to orientation notifier if required") Reviewed-by: André Draszik Tested-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250227070518.5468-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/samsung/Kconfig b/drivers/phy/samsung/Kconfig index 7fba571c0e2b..6566100441d6 100644 --- a/drivers/phy/samsung/Kconfig +++ b/drivers/phy/samsung/Kconfig @@ -81,7 +81,7 @@ config PHY_EXYNOS5_USBDRD tristate "Exynos5 SoC series USB DRD PHY driver" depends on (ARCH_EXYNOS && OF) || COMPILE_TEST depends on HAS_IOMEM - depends on TYPEC || (TYPEC=n && COMPILE_TEST) + depends on TYPEC || !TYPEC depends on USB_DWC3_EXYNOS select GENERIC_PHY select MFD_SYSCON From 41112160ca87d6b5280813ef61f1c35bb9ee2f82 Mon Sep 17 00:00:00 2001 From: Tomita Moeko Date: Fri, 24 Jan 2025 00:34:15 +0800 Subject: [PATCH 0453/2157] vfio/pci: match IGD devices in display controller class IGD device can either expose as a VGA controller or display controller depending on whether it is configured as the primary display device in BIOS. In both cases, the OpRegion may be present. A new helper function vfio_pci_is_intel_display() is introduced to check if the device might be an IGD device. Signed-off-by: Tomita Moeko Link: https://lore.kernel.org/r/20250123163416.7653-1-tomitamoeko@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 +--- drivers/vfio/pci/vfio_pci_igd.c | 6 ++++++ drivers/vfio/pci/vfio_pci_priv.h | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e727941f589d..5f169496376a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -111,9 +111,7 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev) if (ret) return ret; - if (vfio_pci_is_vga(pdev) && - pdev->vendor == PCI_VENDOR_ID_INTEL && - IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { + if (vfio_pci_is_intel_display(pdev)) { ret = vfio_pci_igd_init(vdev); if (ret && ret != -ENODEV) { pci_warn(pdev, "Failed to setup Intel IGD regions\n"); diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index dd70e2431bd7..ef490a4545f4 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -435,6 +435,12 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev) return 0; } +bool vfio_pci_is_intel_display(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_INTEL) && + ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY); +} + int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { int ret; diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index 5e4fa69aee16..a9972eacb293 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -67,8 +67,14 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD +bool vfio_pci_is_intel_display(struct pci_dev *pdev); int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); #else +static inline bool vfio_pci_is_intel_display(struct pci_dev *pdev) +{ + return false; +} + static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { return -ENODEV; From afe84f3b7a26037b258be0f0a1e1754fc1db37e8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:01 -0700 Subject: [PATCH 0454/2157] vfio/type1: Catch zero from pin_user_pages_remote() pin_user_pages_remote() can currently return zero for invalid args or zero nr_pages, neither of which should ever happen. However vaddr_get_pfns() indicates it should only ever return a positive value or -errno and there's a theoretical case where this can slip through and be unhandled by callers. Therefore convert zero to -EFAULT. Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250218222209.1382449-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 50ebc9593c9d..119cf886d8c0 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -564,6 +564,8 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, if (ret > 0) { *pfn = page_to_pfn(pages[0]); goto done; + } else if (!ret) { + ret = -EFAULT; } vaddr = untagged_addr_remote(mm, vaddr); From 7a701e90fc8e3d7a7d07246b14c59a2da258539a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:02 -0700 Subject: [PATCH 0455/2157] vfio/type1: Convert all vaddr_get_pfns() callers to use vfio_batch This is a step towards passing the structure to vaddr_get_pfns() directly in order to provide greater distinction between page backed pfns and pfnmaps. Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Link: https://lore.kernel.org/r/20250218222209.1382449-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 119cf886d8c0..2e95f5f4d881 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot) #define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) -static void vfio_batch_init(struct vfio_batch *batch) +static void __vfio_batch_init(struct vfio_batch *batch, bool single) { batch->size = 0; batch->offset = 0; - if (unlikely(disable_hugepages)) + if (single || unlikely(disable_hugepages)) goto fallback; batch->pages = (struct page **) __get_free_page(GFP_KERNEL); @@ -491,6 +491,16 @@ static void vfio_batch_init(struct vfio_batch *batch) batch->capacity = 1; } +static void vfio_batch_init(struct vfio_batch *batch) +{ + __vfio_batch_init(batch, false); +} + +static void vfio_batch_init_single(struct vfio_batch *batch) +{ + __vfio_batch_init(batch, true); +} + static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma) { while (batch->size) { @@ -730,7 +740,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { - struct page *pages[1]; + struct vfio_batch batch; struct mm_struct *mm; int ret; @@ -738,7 +748,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, if (!mmget_not_zero(mm)) return -ENODEV; - ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); + vfio_batch_init_single(&batch); + + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, batch.pages); if (ret != 1) goto out; @@ -757,6 +769,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, } out: + vfio_batch_fini(&batch); mmput(mm); return ret; } From eb996eec783c1e7e1e9c62e0336f8b86a08cf541 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:03 -0700 Subject: [PATCH 0456/2157] vfio/type1: Use vfio_batch for vaddr_get_pfns() Passing the vfio_batch to vaddr_get_pfns() allows for greater distinction between page backed pfns and pfnmaps. In the case of page backed pfns, vfio_batch.size is set to a positive value matching the number of pages filled in vfio_batch.pages. For a pfnmap, vfio_batch.size remains zero as vfio_batch.pages are not used. In both cases the return value continues to indicate the number of pfns and the provided pfn arg is set to the initial pfn value. This allows us to shortcut the pfnmap case, which is detected by the zero vfio_batch.size. pfnmaps do not contribute to locked memory accounting, therefore we can update counters and continue directly, which also enables a future where vaddr_get_pfns() can return a value greater than one for consecutive pfnmaps. NB. Now that we're not guessing whether the initial pfn is page backed or pfnmap, we no longer need to special case the put_pfn() and batch size reset. It's safe for vfio_batch_unpin() to handle this case. Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Link: https://lore.kernel.org/r/20250218222209.1382449-4-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 63 ++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 2e95f5f4d881..fafd8af125c7 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -555,12 +555,16 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, /* * Returns the positive number of pfns successfully obtained or a negative - * error code. + * error code. The initial pfn is stored in the pfn arg. For page-backed + * pfns, the provided batch is also updated to indicate the filled pages and + * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and + * returned initial pfn are provided; subsequent pfns are contiguous. */ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, long npages, int prot, unsigned long *pfn, - struct page **pages) + struct vfio_batch *batch) { + long pin_pages = min_t(long, npages, batch->capacity); struct vm_area_struct *vma; unsigned int flags = 0; int ret; @@ -569,10 +573,12 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM, - pages, NULL); + ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM, + batch->pages, NULL); if (ret > 0) { - *pfn = page_to_pfn(pages[0]); + *pfn = page_to_pfn(batch->pages[0]); + batch->size = ret; + batch->offset = 0; goto done; } else if (!ret) { ret = -EFAULT; @@ -628,32 +634,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, *pfn_base = 0; } + if (unlikely(disable_hugepages)) + npage = 1; + while (npage) { if (!batch->size) { /* Empty batch, so refill it. */ - long req_pages = min_t(long, npage, batch->capacity); - - ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot, - &pfn, batch->pages); + ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot, + &pfn, batch); if (ret < 0) goto unpin_out; - batch->size = ret; - batch->offset = 0; - if (!*pfn_base) { *pfn_base = pfn; rsvd = is_invalid_reserved_pfn(*pfn_base); } + + /* Handle pfnmap */ + if (!batch->size) { + if (pfn != *pfn_base + pinned || !rsvd) + goto out; + + pinned += ret; + npage -= ret; + vaddr += (PAGE_SIZE * ret); + iova += (PAGE_SIZE * ret); + continue; + } } /* - * pfn is preset for the first iteration of this inner loop and - * updated at the end to handle a VM_PFNMAP pfn. In that case, - * batch->pages isn't valid (there's no struct page), so allow - * batch->pages to be touched only when there's more than one - * pfn to check, which guarantees the pfns are from a - * !VM_PFNMAP vma. + * pfn is preset for the first iteration of this inner loop + * due to the fact that vaddr_get_pfns() needs to provide the + * initial pfn for pfnmaps. Therefore to reduce redundancy, + * the next pfn is fetched at the end of the loop. + * A PageReserved() page could still qualify as page backed + * and rsvd here, and therefore continues to use the batch. */ while (true) { if (pfn != *pfn_base + pinned || @@ -688,21 +704,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, pfn = page_to_pfn(batch->pages[batch->offset]); } - - if (unlikely(disable_hugepages)) - break; } out: ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: - if (batch->size == 1 && !batch->offset) { - /* May be a VM_PFNMAP pfn, which the batch can't remember. */ - put_pfn(pfn, dma->prot); - batch->size = 0; - } - if (ret < 0) { if (pinned && !rsvd) { for (pfn = *pfn_base ; pinned ; pfn++, pinned--) @@ -750,7 +757,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, vfio_batch_init_single(&batch); - ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, batch.pages); + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch); if (ret != 1) goto out; From 0635559233434a337aa1c20d53abae18b3663796 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:04 -0700 Subject: [PATCH 0457/2157] vfio/type1: Use consistent types for page counts Page count should more consistently be an unsigned long when passed as an argument while functions returning a number of pages should use a signed long to allow for -errno. vaddr_get_pfns() can therefore be upgraded to return long, though in practice it's currently limited by the batch capacity. In fact, the batch indexes are noted to never hold negative values, so while it doesn't make sense to bloat the structure with unsigned longs in this case, it does make sense to specify these as unsigned. No change in behavior expected. Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250218222209.1382449-5-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index fafd8af125c7..ce661f03f139 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -103,9 +103,9 @@ struct vfio_dma { struct vfio_batch { struct page **pages; /* for pin_user_pages_remote */ struct page *fallback_page; /* if pages alloc fails */ - int capacity; /* length of pages array */ - int size; /* of batch currently */ - int offset; /* of next entry in pages */ + unsigned int capacity; /* length of pages array */ + unsigned int size; /* of batch currently */ + unsigned int offset; /* of next entry in pages */ }; struct vfio_iommu_group { @@ -560,14 +560,14 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and * returned initial pfn are provided; subsequent pfns are contiguous. */ -static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, - long npages, int prot, unsigned long *pfn, - struct vfio_batch *batch) +static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, + unsigned long npages, int prot, unsigned long *pfn, + struct vfio_batch *batch) { - long pin_pages = min_t(long, npages, batch->capacity); + unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity); struct vm_area_struct *vma; unsigned int flags = 0; - int ret; + long ret; if (prot & IOMMU_WRITE) flags |= FOLL_WRITE; @@ -612,7 +612,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, * first page and all consecutive pages with the same locking. */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, - long npage, unsigned long *pfn_base, + unsigned long npage, unsigned long *pfn_base, unsigned long limit, struct vfio_batch *batch) { unsigned long pfn; @@ -724,7 +724,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, } static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, - unsigned long pfn, long npage, + unsigned long pfn, unsigned long npage, bool do_accounting) { long unlocked = 0, locked = 0; From 62fb8adc43afad5fa1c9cadc6f3a8e9fb72af194 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:05 -0700 Subject: [PATCH 0458/2157] mm: Provide address mask in struct follow_pfnmap_args follow_pfnmap_start() walks the page table for a given address and fills out the struct follow_pfnmap_args in pfnmap_args_setup(). The address mask of the page table level is already provided to this latter function for calculating the pfn. This address mask can also be useful for the caller to determine the extent of the contiguous mapping. For example, vfio-pci now supports huge_fault for pfnmaps and is able to insert pud and pmd mappings. When we DMA map these pfnmaps, ex. PCI MMIO BARs, we iterate follow_pfnmap_start() to get each pfn to test for a contiguous pfn range. Providing the mapping address mask allows us to skip the extent of the mapping level. Assuming a 1GB pud level and 4KB page size, iterations are reduced by a factor of 256K. In wall clock time, mapping a 32GB PCI BAR is reduced from ~1s to <1ms. Cc: Andrew Morton Cc: David Hildenbrand Cc: linux-mm@kvack.org Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Link: https://lore.kernel.org/r/20250218222209.1382449-6-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- include/linux/mm.h | 2 ++ mm/memory.c | 1 + 2 files changed, 3 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b1068ddcbb7..92b30dba7e38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2417,11 +2417,13 @@ struct follow_pfnmap_args { * Outputs: * * @pfn: the PFN of the address + * @addr_mask: address mask covering pfn * @pgprot: the pgprot_t of the mapping * @writable: whether the mapping is writable * @special: whether the mapping is a special mapping (real PFN maps) */ unsigned long pfn; + unsigned long addr_mask; pgprot_t pgprot; bool writable; bool special; diff --git a/mm/memory.c b/mm/memory.c index b4d3d4893267..68aa0f11633e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6488,6 +6488,7 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, args->lock = lock; args->ptep = ptep; args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); + args->addr_mask = addr_mask; args->pgprot = pgprot; args->writable = writable; args->special = special; From 0fd06844de5d063cb384384e06a11ec7141a35d5 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 18 Feb 2025 15:22:06 -0700 Subject: [PATCH 0459/2157] vfio/type1: Use mapping page mask for pfnmaps vfio-pci supports huge_fault for PCI MMIO BARs and will insert pud and pmd mappings for well aligned mappings. follow_pfnmap_start() walks the page table and therefore knows the page mask of the level where the address is found and returns this through follow_pfnmap_args.addr_mask. Subsequent pfns from this address until the end of the mapping page are necessarily consecutive. Use this information to retrieve a range of pfnmap pfns in a single pass. With optimal mappings and alignment on systems with 1GB pud and 4KB page size, this reduces iterations for DMA mapping PCI BARs by a factor of 256K. In real world testing, the overhead of iterating pfns for a VM DMA mapping a 32GB PCI BAR is reduced from ~1s to sub-millisecond overhead. Reviewed-by: Peter Xu Reviewed-by: Mitchell Augustin Tested-by: Mitchell Augustin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250218222209.1382449-7-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index ce661f03f139..0ac56072af9f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -520,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch) static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, - bool write_fault) + unsigned long *addr_mask, bool write_fault) { struct follow_pfnmap_args args = { .vma = vma, .address = vaddr }; int ret; @@ -544,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, return ret; } - if (write_fault && !args.writable) + if (write_fault && !args.writable) { ret = -EFAULT; - else + } else { *pfn = args.pfn; + *addr_mask = args.addr_mask; + } follow_pfnmap_end(&args); return ret; @@ -590,15 +592,22 @@ static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, vma = vma_lookup(mm, vaddr); if (vma && vma->vm_flags & VM_PFNMAP) { - ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE); + unsigned long addr_mask; + + ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask, + prot & IOMMU_WRITE); if (ret == -EAGAIN) goto retry; if (!ret) { - if (is_invalid_reserved_pfn(*pfn)) - ret = 1; - else + if (is_invalid_reserved_pfn(*pfn)) { + unsigned long epfn; + + epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1; + ret = min_t(long, npages, epfn - *pfn); + } else { ret = -EFAULT; + } } } done: From aed2626f465ee6ed6c2d4cdcef97456432a8e779 Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Thu, 27 Feb 2025 16:32:54 -0800 Subject: [PATCH 0460/2157] dt-bindings: input: matrix_keypad - add missing property The property is implemented in the driver but not described in dt-bindings. Add missing property 'gpio-activelow' to DT schema. Signed-off-by: Markus Burri Link: https://lore.kernel.org/r/20250226152843.43932-3-markus.burri@mt.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/gpio-matrix-keypad.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml index 4a5893edf323..73bb153ed241 100644 --- a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml +++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml @@ -40,6 +40,11 @@ properties: type: boolean description: Do not enable autorepeat feature. + gpio-activelow: + type: boolean + description: + Force GPIO polarity to active low. + In the absence of this property GPIOs are treated as active high. debounce-delay-ms: description: Debounce interval in milliseconds. From 6853d9d13dbe596a73ae968e6fb27ba9680b2441 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 25 Feb 2025 16:29:00 -0500 Subject: [PATCH 0461/2157] rust/faux: Drop #[repr(transparent)] from faux::Registration I think this change got missed during review, we don't need #[repr(transparent)] since Registration just holds a single NonNull. This attribute had originally been added by me when I was still figuring out how the bindings should look like but got committed by mistake. So, just drop it. Signed-off-by: Lyude Paul Cc: Greg Kroah-Hartman Acked-by: Danilo Krummrich Reviewed-by: Fiona Behrens Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250225213112.872264-2-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 5acc0c02d451..41751403cd86 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -19,7 +19,6 @@ /// `self.0` always holds a valid pointer to an initialized and registered [`struct faux_device`]. /// /// [`struct faux_device`]: srctree/include/linux/device/faux.h -#[repr(transparent)] pub struct Registration(NonNull); impl Registration { From 95cb0cb546c2892b7a31ff2fce6573f201a214b8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 27 Feb 2025 14:35:06 -0500 Subject: [PATCH 0462/2157] rust/faux: Add missing parent argument to Registration::new() A little late in the review of the faux device interface, we added the ability to specify a parent device when creating new faux devices - but this never got ported over to the rust bindings. So, let's add the missing argument now so we don't have to convert other users later down the line. Signed-off-by: Lyude Paul Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250227193522.198344-1-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 13 +++++++++++-- samples/rust/rust_driver_faux.rs | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 41751403cd86..3277f35c3f79 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -23,11 +23,20 @@ impl Registration { /// Create and register a new faux device with the given name. - pub fn new(name: &CStr) -> Result { + pub fn new(name: &CStr, parent: Option<&device::Device>) -> Result { // SAFETY: // - `name` is copied by this function into its own storage // - `faux_ops` is safe to leave NULL according to the C API - let dev = unsafe { bindings::faux_device_create(name.as_char_ptr(), null_mut(), null()) }; + // - `parent` can be either NULL or a pointer to a `struct device`, and `faux_device_create` + // will take a reference to `parent` using `device_add` - ensuring that it remains valid + // for the lifetime of the faux device. + let dev = unsafe { + bindings::faux_device_create( + name.as_char_ptr(), + parent.map_or(null_mut(), |p| p.as_raw()), + null(), + ) + }; // The above function will return either a valid device, or NULL on failure // INVARIANT: The device will remain registered until faux_device_destroy() is called, which diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs index 048c6cb98b29..58a3a94121bf 100644 --- a/samples/rust/rust_driver_faux.rs +++ b/samples/rust/rust_driver_faux.rs @@ -20,7 +20,7 @@ impl Module for SampleModule { fn init(_module: &'static ThisModule) -> Result { pr_info!("Initialising Rust Faux Device Sample\n"); - let reg = faux::Registration::new(c_str!("rust-faux-sample-device"))?; + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"), None)?; dev_info!(reg.as_ref(), "Hello from faux device!\n"); From 21b0dc55bed6d9b5dd5d1ad22b75d9d1c7426bbc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Feb 2025 07:35:46 +0100 Subject: [PATCH 0463/2157] driver core: faux: only create the device if probe() succeeds It's really hard to know if a faux device properly passes the callback to probe() without having to poke around in the faux_device structure and then clean up. Instead of having to have every user of the api do this logic, just do it in the faux device core itself. This makes the use of a custom probe() callback for a faux device much simpler overall. Suggested-by: Kurt Borja Cc: Rafael J. Wysocki Reviewed-by: Kurt Borja Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/2025022545-unroasted-common-fa0e@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/faux.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 531e9d789ee0..407c1d1aad50 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -102,7 +102,9 @@ static void faux_device_release(struct device *dev) * * Note, when this function is called, the functions specified in struct * faux_ops can be called before the function returns, so be prepared for - * everything to be properly initialized before that point in time. + * everything to be properly initialized before that point in time. If the + * probe callback (if one is present) does NOT succeed, the creation of the + * device will fail and NULL will be returned. * * Return: * * NULL if an error happened with creating the device @@ -147,6 +149,17 @@ struct faux_device *faux_device_create_with_groups(const char *name, return NULL; } + /* + * Verify that we did bind the driver to the device (i.e. probe worked), + * if not, let's fail the creation as trying to guess if probe was + * successful is almost impossible to determine by the caller. + */ + if (!dev->driver) { + dev_err(dev, "probe did not succeed, tearing down the device\n"); + faux_device_destroy(faux_dev); + faux_dev = NULL; + } + return faux_dev; } EXPORT_SYMBOL_GPL(faux_device_create_with_groups); From f36d6362c62c0acc2f45698495691b5cb349a375 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Feb 2025 12:41:45 +0100 Subject: [PATCH 0464/2157] rtc: fsl-ftm-alarm: Mark acpi_id table as maybe unused For !ACPI builds, the acpi_device_id table will not be referenced because of ACPI_PTR: rtc-fsl-ftm-alarm.c:312:36: error: unused variable 'ftm_imx_acpi_ids' [-Werror,-Wunused-const-variable] Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250222114146.162835-1-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-fsl-ftm-alarm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index a72c4ad0cec6..c8015f04c71f 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -309,7 +309,7 @@ static const struct of_device_id ftm_rtc_match[] = { }; MODULE_DEVICE_TABLE(of, ftm_rtc_match); -static const struct acpi_device_id ftm_imx_acpi_ids[] = { +static const struct acpi_device_id ftm_imx_acpi_ids[] __maybe_unused = { {"NXP0014",}, { } }; From 72ce39bc61fab14ef8eda1614763d6422ce38736 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Feb 2025 12:41:46 +0100 Subject: [PATCH 0465/2157] rtc: pl030: Constify amba_id table 'struct amba_id' table is not modified so can be changed to const for more safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250222114146.162835-2-krzysztof.kozlowski@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pl030.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index 39038c0754ee..1326f310bf93 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -148,7 +148,7 @@ static void pl030_remove(struct amba_device *dev) amba_release_regions(dev); } -static struct amba_id pl030_ids[] = { +static const struct amba_id pl030_ids[] = { { .id = 0x00041030, .mask = 0x000fffff, From 0f05886a40fdc55016ba4d9ae0a9c41f8312f15b Mon Sep 17 00:00:00 2001 From: Kuhanh Murugasen Krishnan Date: Thu, 13 Feb 2025 06:12:49 +0800 Subject: [PATCH 0466/2157] fpga: altera-cvp: Increase credit timeout Increase the timeout for SDM (Secure device manager) data credits from 20ms to 40ms. Internal stress tests running at 500 loops failed with the current timeout of 20ms. At the start of a FPGA configuration, the CVP host driver reads the transmit credits from SDM. It then sends bitstream FPGA data to SDM based on the total credits. Each credit allows the CVP host driver to send 4kBytes of data. There are situations whereby, the SDM did not respond in time during testing. Signed-off-by: Ang Tien Sung Signed-off-by: Kuhanh Murugasen Krishnan Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20250212221249.2715929-1-tien.sung.ang@intel.com Signed-off-by: Xu Yilun --- drivers/fpga/altera-cvp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index 6b0914432445..5af0bd33890c 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -52,7 +52,7 @@ /* V2 Defines */ #define VSE_CVP_TX_CREDITS 0x49 /* 8bit */ -#define V2_CREDIT_TIMEOUT_US 20000 +#define V2_CREDIT_TIMEOUT_US 40000 #define V2_CHECK_CREDIT_US 10 #define V2_POLL_TIMEOUT_US 1000000 #define V2_USER_TIMEOUT_US 500000 From e19890a0088b5884cb9e6a6f5717dc56cc45fc31 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 17:35:07 +0100 Subject: [PATCH 0467/2157] fpga: versal: remove incorrect of_match_ptr annotation Building with W=1 shows a warning about versal_fpga_of_match being unused when CONFIG_OF is disabled: drivers/fpga/versal-fpga.c:62:34: error: unused variable 'versal_fpga_of_match' [-Werror,-Wunused-const-variable] Acked-by: Xu Yilun Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250225163510.4168911-1-arnd@kernel.org Signed-off-by: Xu Yilun --- drivers/fpga/versal-fpga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/fpga/versal-fpga.c b/drivers/fpga/versal-fpga.c index 3710e8f01be2..e6189106c468 100644 --- a/drivers/fpga/versal-fpga.c +++ b/drivers/fpga/versal-fpga.c @@ -69,7 +69,7 @@ static struct platform_driver versal_fpga_driver = { .probe = versal_fpga_probe, .driver = { .name = "versal_fpga_manager", - .of_match_table = of_match_ptr(versal_fpga_of_match), + .of_match_table = versal_fpga_of_match, }, }; module_platform_driver(versal_fpga_driver); From e4b5f415391a56ec6c5f2158bf25d1d3e7936ccc Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 19 Feb 2025 09:36:44 +0100 Subject: [PATCH 0468/2157] usb: core: replace usb_sndaddr0pipe macro with usb_sndctrlpipe The usb_sndaddr0pipe macro is only used in the hub_set_address function. Replace it with usb_sndctrlpipe which provides the same functionality but would also consider the endpoint device number. If the device has not been initialised, it is safe to use usb_sndctrlpipe in this context because udev->devnum is set to 0. Therefore, this change does not affect behaviour, but reduces code complexity by reusing the existing macro. Signed-off-by: Stefan Eichenberger Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250219083745.10406-1-eichest@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a76bb50b6202..f34c5acb48fc 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4708,8 +4708,6 @@ void usb_ep0_reinit(struct usb_device *udev) } EXPORT_SYMBOL_GPL(usb_ep0_reinit); -#define usb_sndaddr0pipe() (PIPE_CONTROL << 30) - static int hub_set_address(struct usb_device *udev, int devnum) { int retval; @@ -4733,7 +4731,7 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (hcd->driver->address_device) retval = hcd->driver->address_device(hcd, udev, timeout_ms); else - retval = usb_control_msg(udev, usb_sndaddr0pipe(), + retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_ADDRESS, 0, devnum, 0, NULL, 0, timeout_ms); if (retval == 0) { From 99111a7b1538b0bfd2f19238b3c2050419edf3b5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 19 Feb 2025 15:01:43 +0100 Subject: [PATCH 0469/2157] dt-bindings: usb: richtek,rt1711h: Add missing vbus power supply The RichTek RT1711H and RT1719 chips do have a vbus pin that is used for attach/detach detection (respectively, pin A1 and 11): allow a vbus-supply phandle for that. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250219140143.104037-1-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml b/Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml index 8da4d2ad1a91..ae611f7e57ca 100644 --- a/Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml +++ b/Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml @@ -30,6 +30,9 @@ properties: interrupts: maxItems: 1 + vbus-supply: + description: VBUS power supply + wakeup-source: type: boolean From 9fab91e0eae3a9ec1861119877a61d571f21536a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 23 Feb 2025 16:06:02 +0000 Subject: [PATCH 0470/2157] usb: ulpi: Remove unused otg_ulpi_create otg_ulpi_create() has been unused since 2022's commit 8ca79aaad8be ("ARM: pxa: remove unused pxa3xx-ulpi") Remove it. The devm_ variant is still used. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250223160602.91916-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-ulpi.c | 23 ----------------------- include/linux/usb/ulpi.h | 9 --------- 2 files changed, 32 deletions(-) diff --git a/drivers/usb/phy/phy-ulpi.c b/drivers/usb/phy/phy-ulpi.c index e683a37e3a7a..4df63e67bb37 100644 --- a/drivers/usb/phy/phy-ulpi.c +++ b/drivers/usb/phy/phy-ulpi.c @@ -255,29 +255,6 @@ static void otg_ulpi_init(struct usb_phy *phy, struct usb_otg *otg, otg->set_vbus = ulpi_set_vbus; } -struct usb_phy * -otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags) -{ - struct usb_phy *phy; - struct usb_otg *otg; - - phy = kzalloc(sizeof(*phy), GFP_KERNEL); - if (!phy) - return NULL; - - otg = kzalloc(sizeof(*otg), GFP_KERNEL); - if (!otg) { - kfree(phy); - return NULL; - } - - otg_ulpi_init(phy, otg, ops, flags); - - return phy; -} -EXPORT_SYMBOL_GPL(otg_ulpi_create); - struct usb_phy * devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5050f502c1ed..4b651065738a 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -49,19 +49,10 @@ /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_USB_ULPI) -struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags); - struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags); #else -static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags) -{ - return NULL; -} - static inline struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags) From 93868d5f39530a0521e7ee63305b7b856cac2182 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Feb 2025 15:42:31 -0600 Subject: [PATCH 0471/2157] dt-bindings: usb: generic-xhci: Allow dma-coherent Some platforms like Marvell are cache coherent, so allow the "dma-coherent" property. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250226214231.3745400-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/generic-xhci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/generic-xhci.yaml b/Documentation/devicetree/bindings/usb/generic-xhci.yaml index 6ceafa4af292..a2b94a138999 100644 --- a/Documentation/devicetree/bindings/usb/generic-xhci.yaml +++ b/Documentation/devicetree/bindings/usb/generic-xhci.yaml @@ -51,6 +51,8 @@ properties: - const: core - const: reg + dma-coherent: true + power-domains: maxItems: 1 From c8c35b6cdb781398015340aa08edf26d2d381e56 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Tue, 25 Feb 2025 14:28:43 +0800 Subject: [PATCH 0472/2157] usb: chipidea: imx: fix some typo bootlader -> bootloader set_wakeup failed -> hsic_set_clk failed Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250225062843.3930041-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/usbmisc_imx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 1394881fde5f..6243d8005f5d 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -440,7 +440,7 @@ static int usbmisc_imx6q_init(struct imx_usbmisc_data *data) else if (data->oc_pol_configured) reg &= ~MX6_BM_OVER_CUR_POLARITY; } - /* If the polarity is not set keep it as setup by the bootlader */ + /* If the polarity is not set keep it as setup by the bootloader */ if (data->pwr_pol == 1) reg |= MX6_BM_PWR_POLARITY; writel(reg, usbmisc->base + data->index * 4); @@ -645,7 +645,7 @@ static int usbmisc_imx7d_init(struct imx_usbmisc_data *data) else if (data->oc_pol_configured) reg &= ~MX6_BM_OVER_CUR_POLARITY; } - /* If the polarity is not set keep it as setup by the bootlader */ + /* If the polarity is not set keep it as setup by the bootloader */ if (data->pwr_pol == 1) reg |= MX6_BM_PWR_POLARITY; writel(reg, usbmisc->base); @@ -939,7 +939,7 @@ static int usbmisc_imx7ulp_init(struct imx_usbmisc_data *data) else if (data->oc_pol_configured) reg &= ~MX6_BM_OVER_CUR_POLARITY; } - /* If the polarity is not set keep it as setup by the bootlader */ + /* If the polarity is not set keep it as setup by the bootloader */ if (data->pwr_pol == 1) reg |= MX6_BM_PWR_POLARITY; @@ -1185,7 +1185,7 @@ int imx_usbmisc_suspend(struct imx_usbmisc_data *data, bool wakeup) if (usbmisc->ops->hsic_set_clk && data->hsic) ret = usbmisc->ops->hsic_set_clk(data, false); if (ret) { - dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + dev_err(data->dev, "hsic_set_clk failed, ret=%d\n", ret); return ret; } @@ -1224,7 +1224,7 @@ int imx_usbmisc_resume(struct imx_usbmisc_data *data, bool wakeup) if (usbmisc->ops->hsic_set_clk && data->hsic) ret = usbmisc->ops->hsic_set_clk(data, true); if (ret) { - dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret); + dev_err(data->dev, "hsic_set_clk failed, ret=%d\n", ret); goto hsic_set_clk_fail; } From c16006852732dc4fe37c14b81f9b4458df05b832 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 20 Feb 2025 23:40:03 -0600 Subject: [PATCH 0473/2157] ucsi_ccg: Don't show failed to get FW build information error The error `failed to get FW build information` is added for what looks to be for misdetection of the device property firmware-name. If the property is missing (such as on non-nvidia HW) this error shows up. Move the error into the scope of the property parser for "firmware-name" to avoid showing errors on systems without the firmware-name property. Fixes: 5c9ae5a87573d ("usb: typec: ucsi: ccg: add firmware flashing support") Signed-off-by: Mario Limonciello Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250221054137.1631765-2-superm1@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 47498ee6cca8..3c4dcc3fa7cf 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1429,11 +1429,10 @@ static int ucsi_ccg_probe(struct i2c_client *client) uc->fw_build = CCG_FW_BUILD_NVIDIA_TEGRA; else if (!strcmp(fw_name, "nvidia,gpu")) uc->fw_build = CCG_FW_BUILD_NVIDIA; + if (!uc->fw_build) + dev_err(uc->dev, "failed to get FW build information\n"); } - if (!uc->fw_build) - dev_err(uc->dev, "failed to get FW build information\n"); - /* reset ccg device and initialize ucsi */ status = ucsi_ccg_init(uc); if (status < 0) { From 403849c8edc1551d1a489b1b942f68d75758e98a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 20 Feb 2025 23:40:04 -0600 Subject: [PATCH 0474/2157] ucsi_ccg: Don't show non-functional attributes If no fw_build is recognized for the controller there is no point to exposing the `do_flash` attribute. Add an is_visible callback to the attribute group and check for that fw_build member to hide when not applicable. Signed-off-by: Mario Limonciello Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250221054137.1631765-3-superm1@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 3c4dcc3fa7cf..802b1d73ca12 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1387,22 +1387,35 @@ static ssize_t do_flash_store(struct device *dev, if (!flash) return n; - if (uc->fw_build == 0x0) { - dev_err(dev, "fail to flash FW due to missing FW build info\n"); - return -EINVAL; - } - schedule_work(&uc->work); return n; } +static umode_t ucsi_ccg_attrs_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct ucsi_ccg *uc = i2c_get_clientdata(to_i2c_client(dev)); + + if (!uc->fw_build) + return 0; + + return attr->mode; +} + static DEVICE_ATTR_WO(do_flash); static struct attribute *ucsi_ccg_attrs[] = { &dev_attr_do_flash.attr, NULL, }; -ATTRIBUTE_GROUPS(ucsi_ccg); +static struct attribute_group ucsi_ccg_attr_group = { + .attrs = ucsi_ccg_attrs, + .is_visible = ucsi_ccg_attrs_is_visible, +}; +static const struct attribute_group *ucsi_ccg_groups[] = { + &ucsi_ccg_attr_group, + NULL, +}; static int ucsi_ccg_probe(struct i2c_client *client) { From 7a4dc56023651f00a8d5e30ec84ec2a12294b8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:16 +0100 Subject: [PATCH 0475/2157] usb: storage: jumpshot: Use const for constant arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These arrays are not modified. Make them const. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-1-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/jumpshot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/storage/jumpshot.c b/drivers/usb/storage/jumpshot.c index 39ca84d68591..089c6f8ac85f 100644 --- a/drivers/usb/storage/jumpshot.c +++ b/drivers/usb/storage/jumpshot.c @@ -367,16 +367,16 @@ static int jumpshot_handle_mode_sense(struct us_data *us, struct scsi_cmnd * srb, int sense_6) { - static unsigned char rw_err_page[12] = { + static const unsigned char rw_err_page[12] = { 0x1, 0xA, 0x21, 1, 0, 0, 0, 0, 1, 0, 0, 0 }; - static unsigned char cache_page[12] = { + static const unsigned char cache_page[12] = { 0x8, 0xA, 0x1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - static unsigned char rbac_page[12] = { + static const unsigned char rbac_page[12] = { 0x1B, 0xA, 0, 0x81, 0, 0, 0, 0, 0, 0, 0, 0 }; - static unsigned char timer_page[8] = { + static const unsigned char timer_page[8] = { 0x1C, 0x6, 0, 0, 0, 0 }; unsigned char pc, page_code; @@ -477,7 +477,7 @@ static int jumpshot_transport(struct scsi_cmnd *srb, struct us_data *us) int rc; unsigned long block, blocks; unsigned char *ptr = us->iobuf; - static unsigned char inquiry_response[8] = { + static const unsigned char inquiry_response[8] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; From 13155c6b2e31dc0e8deb9d45a0df00f7ca40c366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:17 +0100 Subject: [PATCH 0476/2157] usb: storage: transport: Use const for constant array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This array is only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-2-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index e6bc8ecaecbb..1aa1bd26c81f 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -528,7 +528,7 @@ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb) u32 sector; /* To Report "Medium Error: Record Not Found */ - static unsigned char record_not_found[18] = { + static const unsigned char record_not_found[18] = { [0] = 0x70, /* current error */ [2] = MEDIUM_ERROR, /* = 0x03 */ [7] = 0x0a, /* additional length */ From 734b0a7a5d857b5c0b98ccbb857406d4d531d574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:18 +0100 Subject: [PATCH 0477/2157] usb: storage: alauda: Use const for card ID array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The alauda_card_ids array is only read, and not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-3-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/alauda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c index 6263c4e61678..e01f3a42bde4 100644 --- a/drivers/usb/storage/alauda.c +++ b/drivers/usb/storage/alauda.c @@ -174,7 +174,7 @@ struct alauda_card_info { unsigned char zoneshift; /* 1<iobuf; int ready = 0; - struct alauda_card_info *media_info; + const struct alauda_card_info *media_info; unsigned int num_zones; while (ready == 0) { @@ -1132,7 +1132,7 @@ static int alauda_transport(struct scsi_cmnd *srb, struct us_data *us) int rc; struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char *ptr = us->iobuf; - static unsigned char inquiry_response[36] = { + static const unsigned char inquiry_response[36] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; From 6623c40bed7aac98e783e0bb2d7ad1afb8c63e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:19 +0100 Subject: [PATCH 0478/2157] usb: storage: datafab: Use const for constant arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These arrays are only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-4-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/datafab.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/storage/datafab.c b/drivers/usb/storage/datafab.c index bbfa2398b170..9ba369483c9b 100644 --- a/drivers/usb/storage/datafab.c +++ b/drivers/usb/storage/datafab.c @@ -319,7 +319,7 @@ static int datafab_determine_lun(struct us_data *us, // // There might be a better way of doing this? - static unsigned char scommand[8] = { 0, 1, 0, 0, 0, 0xa0, 0xec, 1 }; + static const unsigned char scommand[8] = { 0, 1, 0, 0, 0, 0xa0, 0xec, 1 }; unsigned char *command = us->iobuf; unsigned char *buf; int count = 0, rc; @@ -384,7 +384,7 @@ static int datafab_id_device(struct us_data *us, // to the ATA spec, 'Sector Count' isn't used but the Windows driver // sets this bit so we do too... // - static unsigned char scommand[8] = { 0, 1, 0, 0, 0, 0xa0, 0xec, 1 }; + static const unsigned char scommand[8] = { 0, 1, 0, 0, 0, 0xa0, 0xec, 1 }; unsigned char *command = us->iobuf; unsigned char *reply; int rc; @@ -437,16 +437,16 @@ static int datafab_handle_mode_sense(struct us_data *us, struct scsi_cmnd * srb, int sense_6) { - static unsigned char rw_err_page[12] = { + static const unsigned char rw_err_page[12] = { 0x1, 0xA, 0x21, 1, 0, 0, 0, 0, 1, 0, 0, 0 }; - static unsigned char cache_page[12] = { + static const unsigned char cache_page[12] = { 0x8, 0xA, 0x1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - static unsigned char rbac_page[12] = { + static const unsigned char rbac_page[12] = { 0x1B, 0xA, 0, 0x81, 0, 0, 0, 0, 0, 0, 0, 0 }; - static unsigned char timer_page[8] = { + static const unsigned char timer_page[8] = { 0x1C, 0x6, 0, 0, 0, 0 }; unsigned char pc, page_code; @@ -550,7 +550,7 @@ static int datafab_transport(struct scsi_cmnd *srb, struct us_data *us) int rc; unsigned long block, blocks; unsigned char *ptr = us->iobuf; - static unsigned char inquiry_reply[8] = { + static const unsigned char inquiry_reply[8] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; From b1a23e3bbf5f72785164bb1c6491c4cea7369557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:20 +0100 Subject: [PATCH 0479/2157] usb: storage: initializers: Use const for constant array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit init_string is only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-5-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/initializers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/initializers.c b/drivers/usb/storage/initializers.c index f8f9ce8dc710..b243bd5521a6 100644 --- a/drivers/usb/storage/initializers.c +++ b/drivers/usb/storage/initializers.c @@ -54,7 +54,7 @@ int usb_stor_ucr61s2b_init(struct us_data *us) struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap*) us->iobuf; int res; unsigned int partial; - static char init_string[] = "\xec\x0a\x06\x00$PCCHIPS"; + static const char init_string[] = "\xec\x0a\x06\x00$PCCHIPS"; usb_stor_dbg(us, "Sending UCR-61S2B initialization packet...\n"); From 024ad5424c42a49c5d1e4dd0b094db9efad354c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:21 +0100 Subject: [PATCH 0480/2157] usb: storage: realtek_cr: Use const for constant arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These arrays are only read, never modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-6-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/realtek_cr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 2a82ed7b68ea..4e516b445136 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -191,7 +191,7 @@ MODULE_DEVICE_TABLE(usb, realtek_cr_ids); .initFunction = init_function, \ } -static struct us_unusual_dev realtek_cr_unusual_dev_list[] = { +static const struct us_unusual_dev realtek_cr_unusual_dev_list[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; @@ -797,10 +797,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); static int card_first_show = 1; - static u8 media_not_present[] = { 0x70, 0, 0x02, 0, 0, 0, 0, + static const u8 media_not_present[] = { 0x70, 0, 0x02, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x3A, 0, 0, 0, 0, 0 }; - static u8 invalid_cmd_field[] = { 0x70, 0, 0x05, 0, 0, 0, 0, + static const u8 invalid_cmd_field[] = { 0x70, 0, 0x05, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x24, 0, 0, 0, 0, 0 }; int ret; From 5dc02c96e4f268f88962b37ad62f469938fb22ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:22 +0100 Subject: [PATCH 0481/2157] usb: storage: sddr09: Use const for constant arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nand_flash_ids, inquiry_response, and mode_page_01 arrays are only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-7-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/sddr09.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/storage/sddr09.c b/drivers/usb/storage/sddr09.c index d21ce3466e25..e66b920e99e2 100644 --- a/drivers/usb/storage/sddr09.c +++ b/drivers/usb/storage/sddr09.c @@ -144,7 +144,7 @@ static inline char *nand_flash_manufacturer(int manuf_id) { * 256 MB NAND flash has a 5-byte ID with 2nd byte 0xaa, 0xba, 0xca or 0xda. */ -static struct nand_flash_dev nand_flash_ids[] = { +static const struct nand_flash_dev nand_flash_ids[] = { /* NAND flash */ { 0x6e, 20, 8, 4, 8, 2}, /* 1 MB */ { 0xe8, 20, 8, 4, 8, 2}, /* 1 MB */ @@ -169,7 +169,7 @@ static struct nand_flash_dev nand_flash_ids[] = { { 0,} }; -static struct nand_flash_dev * +static const struct nand_flash_dev * nand_find_id(unsigned char id) { int i; @@ -1133,9 +1133,9 @@ sddr09_reset(struct us_data *us) { } #endif -static struct nand_flash_dev * +static const struct nand_flash_dev * sddr09_get_cardinfo(struct us_data *us, unsigned char flags) { - struct nand_flash_dev *cardinfo; + const struct nand_flash_dev *cardinfo; unsigned char deviceID[4]; char blurbtxt[256]; int result; @@ -1545,12 +1545,12 @@ static int sddr09_transport(struct scsi_cmnd *srb, struct us_data *us) struct sddr09_card_info *info; - static unsigned char inquiry_response[8] = { + static const unsigned char inquiry_response[8] = { 0x00, 0x80, 0x00, 0x02, 0x1F, 0x00, 0x00, 0x00 }; /* note: no block descriptor support */ - static unsigned char mode_page_01[19] = { + static const unsigned char mode_page_01[19] = { 0x00, 0x0F, 0x00, 0x0, 0x0, 0x0, 0x00, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 @@ -1584,7 +1584,7 @@ static int sddr09_transport(struct scsi_cmnd *srb, struct us_data *us) } if (srb->cmnd[0] == READ_CAPACITY) { - struct nand_flash_dev *cardinfo; + const struct nand_flash_dev *cardinfo; sddr09_get_wp(us, info); /* read WP bit */ From 9dd3aa746d12d401c9b71cea934f49e6117ee2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:23 +0100 Subject: [PATCH 0482/2157] usb: storage: sddr55: Use const for constant arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These arrays are only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-8-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/sddr55.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/sddr55.c b/drivers/usb/storage/sddr55.c index d5cdff30f6f3..b323f0a36260 100644 --- a/drivers/usb/storage/sddr55.c +++ b/drivers/usb/storage/sddr55.c @@ -775,11 +775,11 @@ static void sddr55_card_info_destructor(void *extra) { static int sddr55_transport(struct scsi_cmnd *srb, struct us_data *us) { int result; - static unsigned char inquiry_response[8] = { + static const unsigned char inquiry_response[8] = { 0x00, 0x80, 0x00, 0x02, 0x1F, 0x00, 0x00, 0x00 }; // write-protected for now, no block descriptor support - static unsigned char mode_page_01[20] = { + static const unsigned char mode_page_01[20] = { 0x0, 0x12, 0x00, 0x80, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 From 1534692e953c767ae6a3b6b504596de1c2c30082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2025 16:11:24 +0100 Subject: [PATCH 0483/2157] usb: storage: shuttle_usbat: Use const for constant array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This array is only read, not modified. Declaring data as const makes it easier to see what's going on, and can prevent unintended writes through placement in a read-only section. Signed-off-by: Jonathan Neuschäfer Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250228-misc-const-v3-9-09b417ded9c4@posteo.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/shuttle_usbat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/shuttle_usbat.c b/drivers/usb/storage/shuttle_usbat.c index c33cbf177e6f..27faa0ead11d 100644 --- a/drivers/usb/storage/shuttle_usbat.c +++ b/drivers/usb/storage/shuttle_usbat.c @@ -1683,7 +1683,7 @@ static int usbat_flash_transport(struct scsi_cmnd * srb, struct us_data *us) struct usbat_info *info = (struct usbat_info *) (us->extra); unsigned long block, blocks; unsigned char *ptr = us->iobuf; - static unsigned char inquiry_response[36] = { + static const unsigned char inquiry_response[36] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; From a6e804c44db4f9d26eadf80cdf4365332d0899a7 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 27 Feb 2025 19:19:07 +0800 Subject: [PATCH 0484/2157] dt-bindings: usb: dwc3: Add support for rk3562 The USB dwc3 core on Rockchip's RK3562 is the same as the one already included in generic snps,dwc3. Extend the binding accordingly to allow compatible = "rockchip,rk3562-dwc3", "snps,dwc3"; There are 4 clocks with different name sequency, add schema for it. Signed-off-by: Kever Yang Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250227111913.2344207-10-kever.yang@rock-chips.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/rockchip,dwc3.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml index a21cc098542d..fba2cb05ecba 100644 --- a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml @@ -26,6 +26,7 @@ select: contains: enum: - rockchip,rk3328-dwc3 + - rockchip,rk3562-dwc3 - rockchip,rk3568-dwc3 - rockchip,rk3576-dwc3 - rockchip,rk3588-dwc3 @@ -37,6 +38,7 @@ properties: items: - enum: - rockchip,rk3328-dwc3 + - rockchip,rk3562-dwc3 - rockchip,rk3568-dwc3 - rockchip,rk3576-dwc3 - rockchip,rk3588-dwc3 @@ -72,6 +74,7 @@ properties: - enum: - grf_clk - utmi + - pipe - const: pipe power-domains: @@ -111,6 +114,22 @@ allOf: - const: suspend_clk - const: bus_clk - const: grf_clk + - if: + properties: + compatible: + contains: + const: rockchip,rk3562-dwc3 + then: + properties: + clocks: + minItems: 4 + maxItems: 4 + clock-names: + items: + - const: ref_clk + - const: suspend_clk + - const: bus_clk + - const: pipe - if: properties: compatible: From 43092fcd79a44c3d6cdc10178c29da17a64b0131 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sat, 1 Mar 2025 01:10:33 +0530 Subject: [PATCH 0485/2157] dt-bindings: usb: samsung,exynos-dwc3: add exynos7870 support Document compatible string for Exynos7870 DWC3-compatible USB 2.0 driver. The devicetree node requires three clocks, named "bus_early", "ref", and "ctrl". Unlike other variants, Exynos7870's USB controller requires a single 3.0V regulator. Assert that the other 1.0V regulator requirement is enforced on variants individually other than Exynos7870's. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250301-exynos7870-usb-v3-1-f01697165d19@disroot.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/usb/samsung,exynos-dwc3.yaml | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml index f11e767a8abe..256bee2a03ca 100644 --- a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml @@ -17,6 +17,7 @@ properties: - samsung,exynos5250-dwusb3 - samsung,exynos5433-dwusb3 - samsung,exynos7-dwusb3 + - samsung,exynos7870-dwusb3 - samsung,exynos850-dwusb3 - items: - const: samsung,exynos990-dwusb3 @@ -56,7 +57,6 @@ required: - clock-names - ranges - '#size-cells' - - vdd10-supply - vdd33-supply allOf: @@ -76,6 +76,8 @@ allOf: - const: susp_clk - const: link_aclk - const: link_pclk + required: + - vdd10-supply - if: properties: @@ -90,6 +92,8 @@ allOf: clock-names: items: - const: usbdrd30 + required: + - vdd10-supply - if: properties: @@ -107,6 +111,8 @@ allOf: - const: susp_clk - const: phyclk - const: pipe_pclk + required: + - vdd10-supply - if: properties: @@ -123,6 +129,24 @@ allOf: - const: usbdrd30 - const: usbdrd30_susp_clk - const: usbdrd30_axius_clk + required: + - vdd10-supply + + - if: + properties: + compatible: + contains: + const: samsung,exynos7870-dwusb3 + then: + properties: + clocks: + minItems: 3 + maxItems: 3 + clock-names: + items: + - const: bus_early + - const: ref + - const: ctrl - if: properties: @@ -138,6 +162,8 @@ allOf: items: - const: bus_early - const: ref + required: + - vdd10-supply additionalProperties: false From 811d22141369d572319ee3350ff8b40fa05850f8 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sat, 1 Mar 2025 01:10:34 +0530 Subject: [PATCH 0486/2157] usb: dwc3: exynos: add support for exynos7870 Exynos7870 devices have a DWC3 compatible USB 2.0 controller. Add support in the driver by: - Adding its own compatible string, "samsung,exynos7870-dwusb3". - Adding three USBDRD clocks named "bus_early", "ref", and "ctrl", to be controlled by the driver. Signed-off-by: Kaustabh Chakraborty Acked-by: Thinh Nguyen Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250301-exynos7870-usb-v3-2-f01697165d19@disroot.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-exynos.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index f5d963fae9e0..de686b9e6404 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -163,6 +163,12 @@ static const struct dwc3_exynos_driverdata exynos7_drvdata = { .suspend_clk_idx = 1, }; +static const struct dwc3_exynos_driverdata exynos7870_drvdata = { + .clk_names = { "bus_early", "ref", "ctrl" }, + .num_clks = 3, + .suspend_clk_idx = -1, +}; + static const struct dwc3_exynos_driverdata exynos850_drvdata = { .clk_names = { "bus_early", "ref" }, .num_clks = 2, @@ -185,6 +191,9 @@ static const struct of_device_id exynos_dwc3_match[] = { }, { .compatible = "samsung,exynos7-dwusb3", .data = &exynos7_drvdata, + }, { + .compatible = "samsung,exynos7870-dwusb3", + .data = &exynos7870_drvdata, }, { .compatible = "samsung,exynos850-dwusb3", .data = &exynos850_drvdata, From f25f405d250fdc7e2e67be8d1732cbbc7cd782d0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 25 Feb 2025 12:08:38 +0200 Subject: [PATCH 0487/2157] eeprom: at24: Drop of_match_ptr() and ACPI_PTR() protections These result in a very small reduction in driver size, but at the cost of more complex build and slightly harder to read code. In the case of of_match_ptr() it also prevents use of PRP0001 ACPI based identification. In this particular case we have a valid ACPI/PNP ID that should be used in preference to PRP0001 but doesn't mean we should prevent that route. With this done, drop unneeded of*.h inclusions and __maybe_unused markers. Signed-off-by: Andy Shevchenko Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250225100838.362125-1-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 0a7c7f29406c..f721825199ce 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include #include @@ -252,7 +250,7 @@ static const struct i2c_device_id at24_ids[] = { }; MODULE_DEVICE_TABLE(i2c, at24_ids); -static const struct of_device_id __maybe_unused at24_of_match[] = { +static const struct of_device_id at24_of_match[] = { { .compatible = "atmel,24c00", .data = &at24_data_24c00 }, { .compatible = "atmel,24c01", .data = &at24_data_24c01 }, { .compatible = "atmel,24cs01", .data = &at24_data_24cs01 }, @@ -286,7 +284,7 @@ static const struct of_device_id __maybe_unused at24_of_match[] = { }; MODULE_DEVICE_TABLE(of, at24_of_match); -static const struct acpi_device_id __maybe_unused at24_acpi_ids[] = { +static const struct acpi_device_id at24_acpi_ids[] = { { "INT3499", (kernel_ulong_t)&at24_data_INT3499 }, { "TPF0001", (kernel_ulong_t)&at24_data_24c1024 }, { /* END OF LIST */ } @@ -848,8 +846,8 @@ static struct i2c_driver at24_driver = { .driver = { .name = "at24", .pm = &at24_pm_ops, - .of_match_table = of_match_ptr(at24_of_match), - .acpi_match_table = ACPI_PTR(at24_acpi_ids), + .of_match_table = at24_of_match, + .acpi_match_table = at24_acpi_ids, }, .probe = at24_probe, .remove = at24_remove, From 2ff0e408db36c21ed3fa5e3c1e0e687c82cf132f Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Thu, 23 Jan 2025 19:39:45 +0300 Subject: [PATCH 0488/2157] acpi: nfit: fix narrowing conversion in acpi_nfit_ctl Syzkaller has reported a warning in to_nfit_bus_uuid(): "only secondary bus families can be translated". This warning is emited if the argument is equal to NVDIMM_BUS_FAMILY_NFIT == 0. Function acpi_nfit_ctl() first verifies that a user-provided value call_pkg->nd_family of type u64 is not equal to 0. Then the value is converted to int, and only after that is compared to NVDIMM_BUS_FAMILY_MAX. This can lead to passing an invalid argument to acpi_nfit_ctl(), if call_pkg->nd_family is non-zero, while the lower 32 bits are zero. Furthermore, it is best to return EINVAL immediately upon seeing the invalid user input. The WARNING is insufficient to prevent further undefined behavior based on other invalid user input. All checks of the input value should be applied to the original variable call_pkg->nd_family. [iweiny: update commit message] Fixes: 6450ddbd5d8e ("ACPI: NFIT: Define runtime firmware activation commands") Cc: stable@vger.kernel.org Reported-by: syzbot+c80d8dc0d9fa81a3cd8c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c80d8dc0d9fa81a3cd8c Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250123163945.251-1-m.masimov@mt-integration.ru Signed-off-by: Ira Weiny --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index a5d47819b3a4..ae035b93da08 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -485,7 +485,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nd_desc->cmd_mask; if (cmd == ND_CMD_CALL && call_pkg->nd_family) { family = call_pkg->nd_family; - if (family > NVDIMM_BUS_FAMILY_MAX || + if (call_pkg->nd_family > NVDIMM_BUS_FAMILY_MAX || !test_bit(family, &nd_desc->bus_family_mask)) return -EINVAL; family = array_index_nospec(family, From 878a8e1ecb4abd587ccdf0af8cd3d3b3fda0e07c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 20 Feb 2025 00:45:37 +0000 Subject: [PATCH 0489/2157] libnvdimm: Remove unused nd_region_conflict nd_region_conflict() has been unused since 2019's commit a3619190d62e ("libnvdimm/pfn: stop padding pmem namespaces to section alignment") Remove it, and the region_confict() helper it uses, and the associated struct conflict_context. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250220004538.84585-2-linux@treblig.org Signed-off-by: Ira Weiny --- drivers/nvdimm/nd-core.h | 2 -- drivers/nvdimm/region_devs.c | 41 ------------------------------------ 2 files changed, 43 deletions(-) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 86976a9e8a15..2b37b7fc4fef 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -127,8 +127,6 @@ resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region); resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping); resource_size_t nd_region_available_dpa(struct nd_region *nd_region); -int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, - resource_size_t size); resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id); int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 37417ce5ec7b..de1ee5ebc851 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1229,45 +1229,4 @@ bool is_nvdimm_sync(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(is_nvdimm_sync); -struct conflict_context { - struct nd_region *nd_region; - resource_size_t start, size; -}; - -static int region_conflict(struct device *dev, void *data) -{ - struct nd_region *nd_region; - struct conflict_context *ctx = data; - resource_size_t res_end, region_end, region_start; - - if (!is_memory(dev)) - return 0; - - nd_region = to_nd_region(dev); - if (nd_region == ctx->nd_region) - return 0; - - res_end = ctx->start + ctx->size; - region_start = nd_region->ndr_start; - region_end = region_start + nd_region->ndr_size; - if (ctx->start >= region_start && ctx->start < region_end) - return -EBUSY; - if (res_end > region_start && res_end <= region_end) - return -EBUSY; - return 0; -} - -int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, - resource_size_t size) -{ - struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); - struct conflict_context ctx = { - .nd_region = nd_region, - .start = start, - .size = size, - }; - - return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); -} - MODULE_IMPORT_NS("DEVMEM"); From 2318fa87f808362994dee2773a7d3307c1cebabd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 20 Feb 2025 00:45:38 +0000 Subject: [PATCH 0490/2157] libnvdimm: Remove unused nd_attach_ndns nd_attach_ndns() hasn't been used since 2017's commit 452bae0aede7 ("libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering") Remove it. Note the __ version is still used and has been left. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250220004538.84585-3-linux@treblig.org Signed-off-by: Ira Weiny --- drivers/nvdimm/claim.c | 11 ----------- drivers/nvdimm/nd-core.h | 2 -- 2 files changed, 13 deletions(-) diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 9e84ab411564..51614651d2e7 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -56,17 +56,6 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, return true; } -bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, - struct nd_namespace_common **_ndns) -{ - bool claimed; - - nvdimm_bus_lock(&attach->dev); - claimed = __nd_attach_ndns(dev, attach, _ndns); - nvdimm_bus_unlock(&attach->dev); - return claimed; -} - static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) { struct nd_region *nd_region = to_nd_region(dev->parent); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 2b37b7fc4fef..bfc6bfeb6e24 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -134,8 +134,6 @@ void get_ndd(struct nvdimm_drvdata *ndd); resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); -bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, - struct nd_namespace_common **_ndns); bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, struct nd_namespace_common **_ndns); ssize_t nd_namespace_store(struct device *dev, From 04572d18921d47939465d2cba32c19d05f27cda9 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:19 +0800 Subject: [PATCH 0491/2157] rtc: stm32: Use resource managed API to simplify code Use devm_pm_set_wake_irq and devm_device_init_wakeup to cleanup the error handling code and 'driver.remove()' hook. Reviewed-by: Antonio Borneo Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-1-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-stm32.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c index a0564d443569..1b715db47160 100644 --- a/drivers/rtc/rtc-stm32.c +++ b/drivers/rtc/rtc-stm32.c @@ -1143,11 +1143,11 @@ static int stm32_rtc_probe(struct platform_device *pdev) goto err; } - ret = device_init_wakeup(&pdev->dev, true); + ret = devm_device_init_wakeup(&pdev->dev); if (ret) goto err; - ret = dev_pm_set_wake_irq(&pdev->dev, rtc->irq_alarm); + ret = devm_pm_set_wake_irq(&pdev->dev, rtc->irq_alarm); if (ret) goto err; @@ -1208,9 +1208,6 @@ static int stm32_rtc_probe(struct platform_device *pdev) if (rtc->data->need_dbp) regmap_update_bits(rtc->dbp, rtc->dbp_reg, rtc->dbp_mask, 0); - dev_pm_clear_wake_irq(&pdev->dev); - device_init_wakeup(&pdev->dev, false); - return ret; } @@ -1237,9 +1234,6 @@ static void stm32_rtc_remove(struct platform_device *pdev) /* Enable backup domain write protection if needed */ if (rtc->data->need_dbp) regmap_update_bits(rtc->dbp, rtc->dbp_reg, rtc->dbp_mask, 0); - - dev_pm_clear_wake_irq(&pdev->dev); - device_init_wakeup(&pdev->dev, false); } static int stm32_rtc_suspend(struct device *dev) From 6b296dee3eb703cd09f7944e35554713887cce7e Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:20 +0800 Subject: [PATCH 0492/2157] rtc: nxp-bbnsm: Use resource managed API to simplify code Use devm_pm_set_wake_irq and devm_device_init_wakeup to cleanup the error handling code and 'driver.remove()' hook. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-2-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nxp-bbnsm.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/rtc/rtc-nxp-bbnsm.c b/drivers/rtc/rtc-nxp-bbnsm.c index fa3b0328c7a2..d4fc9dc583d3 100644 --- a/drivers/rtc/rtc-nxp-bbnsm.c +++ b/drivers/rtc/rtc-nxp-bbnsm.c @@ -189,36 +189,26 @@ static int bbnsm_rtc_probe(struct platform_device *pdev) /* clear all the pending events */ regmap_write(bbnsm->regmap, BBNSM_EVENTS, 0x7A); - device_init_wakeup(&pdev->dev, true); - dev_pm_set_wake_irq(&pdev->dev, bbnsm->irq); + ret = devm_device_init_wakeup(&pdev->dev); + if (ret) + dev_err(&pdev->dev, "failed to init wakeup, %d\n", ret); + + ret = devm_pm_set_wake_irq(&pdev->dev, bbnsm->irq); + if (ret) + dev_err(&pdev->dev, "failed to set wake irq, %d\n", ret); ret = devm_request_irq(&pdev->dev, bbnsm->irq, bbnsm_rtc_irq_handler, IRQF_SHARED, "rtc alarm", &pdev->dev); if (ret) { dev_err(&pdev->dev, "failed to request irq %d: %d\n", bbnsm->irq, ret); - goto err; + return ret; } bbnsm->rtc->ops = &bbnsm_rtc_ops; bbnsm->rtc->range_max = U32_MAX; - ret = devm_rtc_register_device(bbnsm->rtc); - if (ret) - goto err; - - return 0; - -err: - dev_pm_clear_wake_irq(&pdev->dev); - device_init_wakeup(&pdev->dev, false); - return ret; -} - -static void bbnsm_rtc_remove(struct platform_device *pdev) -{ - dev_pm_clear_wake_irq(&pdev->dev); - device_init_wakeup(&pdev->dev, false); + return devm_rtc_register_device(bbnsm->rtc); } static const struct of_device_id bbnsm_dt_ids[] = { @@ -233,7 +223,6 @@ static struct platform_driver bbnsm_rtc_driver = { .of_match_table = bbnsm_dt_ids, }, .probe = bbnsm_rtc_probe, - .remove = bbnsm_rtc_remove, }; module_platform_driver(bbnsm_rtc_driver); From 5ad218f101e4f6c2698977d267e137ddb6655051 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:21 +0800 Subject: [PATCH 0493/2157] rtc: ds1343: Use devm_pm_set_wake_irq Use devm_pm_set_wake_irq, then the 'driver.remove()' could be cleaned up. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-3-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1343.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index ed5a6ba89a3e..aa9500791b7e 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -427,18 +427,13 @@ static int ds1343_probe(struct spi_device *spi) "unable to request irq for rtc ds1343\n"); } else { device_init_wakeup(&spi->dev, true); - dev_pm_set_wake_irq(&spi->dev, spi->irq); + devm_pm_set_wake_irq(&spi->dev, spi->irq); } } return 0; } -static void ds1343_remove(struct spi_device *spi) -{ - dev_pm_clear_wake_irq(&spi->dev); -} - #ifdef CONFIG_PM_SLEEP static int ds1343_suspend(struct device *dev) @@ -471,7 +466,6 @@ static struct spi_driver ds1343_driver = { .pm = &ds1343_pm, }, .probe = ds1343_probe, - .remove = ds1343_remove, .id_table = ds1343_id, }; From e8a0b6e62429f5675d37a02be5169de8453a08be Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:22 +0800 Subject: [PATCH 0494/2157] rtc: pm8xxx: Use devm_pm_set_wake_irq Use devm_pm_set_wake_irq, then the 'driver.remove()' could be cleaned up. Reviewed-by: Linus Walleij Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-4-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index b2518aea4218..852d80188bd0 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -523,21 +523,11 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (rc) return rc; - rc = dev_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); - if (rc) - return rc; - - return 0; -} - -static void pm8xxx_remove(struct platform_device *pdev) -{ - dev_pm_clear_wake_irq(&pdev->dev); + return devm_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); } static struct platform_driver pm8xxx_rtc_driver = { .probe = pm8xxx_rtc_probe, - .remove = pm8xxx_remove, .driver = { .name = "rtc-pm8xxx", .of_match_table = pm8xxx_id_table, From ca36c93011173b9e241a2e31bc449fdd801b3ede Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:23 +0800 Subject: [PATCH 0495/2157] rtc: ab8500: Use resource managed API to simplify code Use devm_pm_set_wake_irq and devm_device_init_wakeup to cleanup the error handling code and 'driver.remove()' hook. Reviewed-by: Linus Walleij Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-5-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ab8500.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index 2dcda96f4a8e..ed2b6b8bb3bf 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -361,7 +361,7 @@ static int ab8500_rtc_probe(struct platform_device *pdev) return -ENODEV; } - device_init_wakeup(&pdev->dev, true); + devm_device_init_wakeup(&pdev->dev); rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc)) @@ -375,7 +375,7 @@ static int ab8500_rtc_probe(struct platform_device *pdev) if (err < 0) return err; - dev_pm_set_wake_irq(&pdev->dev, irq); + devm_pm_set_wake_irq(&pdev->dev, irq); platform_set_drvdata(pdev, rtc); set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features); @@ -392,18 +392,11 @@ static int ab8500_rtc_probe(struct platform_device *pdev) return devm_rtc_register_device(rtc); } -static void ab8500_rtc_remove(struct platform_device *pdev) -{ - dev_pm_clear_wake_irq(&pdev->dev); - device_init_wakeup(&pdev->dev, false); -} - static struct platform_driver ab8500_rtc_driver = { .driver = { .name = "ab8500-rtc", }, .probe = ab8500_rtc_probe, - .remove = ab8500_rtc_remove, .id_table = ab85xx_rtc_ids, }; From d8690ce183bb351881a16291971dcc7f40fb8846 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:24 +0800 Subject: [PATCH 0496/2157] rtc: mpfs: Use devm_pm_set_wake_irq Use devm_pm_set_wake_irq, then the 'driver.remove()' could be cleaned up. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-6-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-mpfs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-mpfs.c b/drivers/rtc/rtc-mpfs.c index 3892b0f9917f..5a38649cbd43 100644 --- a/drivers/rtc/rtc-mpfs.c +++ b/drivers/rtc/rtc-mpfs.c @@ -267,18 +267,13 @@ static int mpfs_rtc_probe(struct platform_device *pdev) dev_info(&pdev->dev, "prescaler set to: %lu\n", prescaler); device_init_wakeup(&pdev->dev, true); - ret = dev_pm_set_wake_irq(&pdev->dev, wakeup_irq); + ret = devm_pm_set_wake_irq(&pdev->dev, wakeup_irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); return devm_rtc_register_device(rtcdev->rtc); } -static void mpfs_rtc_remove(struct platform_device *pdev) -{ - dev_pm_clear_wake_irq(&pdev->dev); -} - static const struct of_device_id mpfs_rtc_of_match[] = { { .compatible = "microchip,mpfs-rtc" }, { } @@ -288,7 +283,6 @@ MODULE_DEVICE_TABLE(of, mpfs_rtc_of_match); static struct platform_driver mpfs_rtc_driver = { .probe = mpfs_rtc_probe, - .remove = mpfs_rtc_remove, .driver = { .name = "mpfs_rtc", .of_match_table = mpfs_rtc_of_match, From 90e0bcc9392d27396e0b004116aafbd585bb082a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 5 Feb 2025 08:58:25 +0800 Subject: [PATCH 0497/2157] rtc: pl031: Use resource managed API to simplify code Use devm_pm_set_wake_irq and devm_device_init_wakeup to cleanup the error handling code and 'driver.remove()' hook. Reviewed-by: Linus Walleij Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250205-rtc-cleanup-v1-7-66165678e089@nxp.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pl031.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index bad6a5d9c683..47bfc5395e59 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -284,8 +284,6 @@ static void pl031_remove(struct amba_device *adev) { struct pl031_local *ldata = dev_get_drvdata(&adev->dev); - dev_pm_clear_wake_irq(&adev->dev); - device_init_wakeup(&adev->dev, false); if (adev->irq[0]) free_irq(adev->irq[0], ldata); amba_release_regions(adev); @@ -350,7 +348,7 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) } } - device_init_wakeup(&adev->dev, true); + devm_device_init_wakeup(&adev->dev); ldata->rtc = devm_rtc_allocate_device(&adev->dev); if (IS_ERR(ldata->rtc)) { ret = PTR_ERR(ldata->rtc); @@ -373,7 +371,7 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) vendor->irqflags, "rtc-pl031", ldata); if (ret) goto out; - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); + devm_pm_set_wake_irq(&adev->dev, adev->irq[0]); } return 0; From f432c5d502b41718b7b25fb07c6a2d1affaea18c Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Mon, 10 Feb 2025 13:45:45 +0800 Subject: [PATCH 0498/2157] rtc: ab-eoz9: Use HWMON_CHANNEL_INFO macro to simplify code Use HWMON_CHANNEL_INFO macro to simplify code. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250210054546.10785-2-lihuisong@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ab-eoz9.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c index d2b60487d462..de002f7a39bf 100644 --- a/drivers/rtc/rtc-ab-eoz9.c +++ b/drivers/rtc/rtc-ab-eoz9.c @@ -426,29 +426,9 @@ static umode_t abeoz9_is_visible(const void *data, } } -static const u32 abeoz9_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info abeoz9_chip = { - .type = hwmon_chip, - .config = abeoz9_chip_config, -}; - -static const u32 abeoz9_temp_config[] = { - HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN, - 0 -}; - -static const struct hwmon_channel_info abeoz9_temp = { - .type = hwmon_temp, - .config = abeoz9_temp_config, -}; - static const struct hwmon_channel_info * const abeoz9_info[] = { - &abeoz9_chip, - &abeoz9_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN), NULL }; From d659dfec7d351e3a9138009bdaa82a533d7bc462 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Mon, 10 Feb 2025 13:45:46 +0800 Subject: [PATCH 0499/2157] rtc: ds3232: Use HWMON_CHANNEL_INFO macro to simplify code Use HWMON_CHANNEL_INFO macro to simplify code. Signed-off-by: Huisong Li Link: https://lore.kernel.org/r/20250210054546.10785-3-lihuisong@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds3232.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 19c09c418746..18f35823b4b5 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -339,29 +339,9 @@ static int ds3232_hwmon_read(struct device *dev, return err; } -static u32 ds3232_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info ds3232_hwmon_chip = { - .type = hwmon_chip, - .config = ds3232_hwmon_chip_config, -}; - -static u32 ds3232_hwmon_temp_config[] = { - HWMON_T_INPUT, - 0 -}; - -static const struct hwmon_channel_info ds3232_hwmon_temp = { - .type = hwmon_temp, - .config = ds3232_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const ds3232_hwmon_info[] = { - &ds3232_hwmon_chip, - &ds3232_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), NULL }; From 8744dcd4fc7800de2eb9369410470bb2930d4c14 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 24 Feb 2025 18:06:57 +0100 Subject: [PATCH 0500/2157] counter: stm32-lptimer-cnt: fix error handling when enabling In case the stm32_lptim_set_enable_state() fails to update CMP and ARR, a timeout error is raised, by regmap_read_poll_timeout. It may happen, when the lptimer runs on a slow clock, and the clock is gated only few times during the polling. Badly, when this happen, STM32_LPTIM_ENABLE in CR register has been set. So the 'enable' state in sysfs wrongly lies on the counter being correctly enabled, due to CR is read as one in stm32_lptim_is_enabled(). To fix both issues: - enable the clock before writing CMP, ARR and polling ISR bits. It will avoid the possible timeout error. - clear the ENABLE bit in CR and disable the clock in the error path. Fixes: d8958824cf07 ("iio: counter: Add support for STM32 LPTimer") Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20250224170657.3368236-1-fabrice.gasnier@foss.st.com Signed-off-by: William Breathitt Gray --- drivers/counter/stm32-lptimer-cnt.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c index cf73f65baf60..b249c8647639 100644 --- a/drivers/counter/stm32-lptimer-cnt.c +++ b/drivers/counter/stm32-lptimer-cnt.c @@ -58,37 +58,43 @@ static int stm32_lptim_set_enable_state(struct stm32_lptim_cnt *priv, return 0; } + ret = clk_enable(priv->clk); + if (ret) + goto disable_cnt; + /* LP timer must be enabled before writing CMP & ARR */ ret = regmap_write(priv->regmap, STM32_LPTIM_ARR, priv->ceiling); if (ret) - return ret; + goto disable_clk; ret = regmap_write(priv->regmap, STM32_LPTIM_CMP, 0); if (ret) - return ret; + goto disable_clk; /* ensure CMP & ARR registers are properly written */ ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val, (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK, 100, 1000); if (ret) - return ret; + goto disable_clk; ret = regmap_write(priv->regmap, STM32_LPTIM_ICR, STM32_LPTIM_CMPOKCF_ARROKCF); if (ret) - return ret; + goto disable_clk; - ret = clk_enable(priv->clk); - if (ret) { - regmap_write(priv->regmap, STM32_LPTIM_CR, 0); - return ret; - } priv->enabled = true; /* Start LP timer in continuous mode */ return regmap_update_bits(priv->regmap, STM32_LPTIM_CR, STM32_LPTIM_CNTSTRT, STM32_LPTIM_CNTSTRT); + +disable_clk: + clk_disable(priv->clk); +disable_cnt: + regmap_write(priv->regmap, STM32_LPTIM_CR, 0); + + return ret; } static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable) From bac4368fab62c7243bdc87856b9d6f7b6f6eb36d Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:49 +0200 Subject: [PATCH 0501/2157] iio: imu: adis16550: add adis16550 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADIS16550 is a complete inertial system that includes a triaxis gyroscope and a triaxis accelerometer. Each inertial sensor in the ADIS16550 combines industry leading MEMS only technology with signal conditioning that optimizes dynamic performance. The factory calibration characterizes each sensor for sensitivity, bias, and alignment. As a result, each sensor has its own dynamic compensation formulas that provide accurate sensor measurements. Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Nuno Sá Signed-off-by: Nuno Sá Signed-off-by: Robert Budai Link: https://patch.msgid.link/20250217105753.605465-6-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/Kconfig | 13 + drivers/iio/imu/Makefile | 1 + drivers/iio/imu/adis16550.c | 1147 +++++++++++++++++++++++++++++++++++ 3 files changed, 1161 insertions(+) create mode 100644 drivers/iio/imu/adis16550.c diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig index ca0efecb5b5c..15612f0f189b 100644 --- a/drivers/iio/imu/Kconfig +++ b/drivers/iio/imu/Kconfig @@ -52,6 +52,19 @@ config ADIS16480 Say yes here to build support for Analog Devices ADIS16375, ADIS16480, ADIS16485, ADIS16488 inertial sensors. +config ADIS16550 + tristate "Analog Devices ADIS16550 and similar IMU driver" + depends on SPI + select IIO_ADIS_LIB + select IIO_ADIS_LIB_BUFFER if IIO_BUFFER + select CRC32 + help + Say yes here to build support for Analog Devices ADIS16550 inertial + sensor containing triaxis gyroscope and triaxis accelerometer. + + To compile this driver as a module, choose M here: the module will be + called adis16550. + source "drivers/iio/imu/bmi160/Kconfig" source "drivers/iio/imu/bmi270/Kconfig" source "drivers/iio/imu/bmi323/Kconfig" diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile index 04c77c2c4df8..e901aea498d3 100644 --- a/drivers/iio/imu/Makefile +++ b/drivers/iio/imu/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_ADIS16400) += adis16400.o obj-$(CONFIG_ADIS16460) += adis16460.o obj-$(CONFIG_ADIS16475) += adis16475.o obj-$(CONFIG_ADIS16480) += adis16480.o +obj-$(CONFIG_ADIS16550) += adis16550.o adis_lib-y += adis.o adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_trigger.o diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c new file mode 100644 index 000000000000..b14ea8937c7f --- /dev/null +++ b/drivers/iio/imu/adis16550.c @@ -0,0 +1,1147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ADIS16550 IMU driver + * + * Copyright 2024 Analog Devices Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ADIS16550_REG_BURST_GYRO_ACCEL 0x0a +#define ADIS16550_REG_BURST_DELTA_ANG_VEL 0x0b +#define ADIS16550_BURST_DATA_GYRO_ACCEL_MASK GENMASK(6, 1) +#define ADIS16550_BURST_DATA_DELTA_ANG_VEL_MASK GENMASK(12, 7) + +#define ADIS16550_REG_STATUS 0x0e +#define ADIS16550_REG_TEMP 0x10 +#define ADIS16550_REG_X_GYRO 0x12 +#define ADIS16550_REG_Y_GYRO 0x14 +#define ADIS16550_REG_Z_GYRO 0x16 +#define ADIS16550_REG_X_ACCEL 0x18 +#define ADIS16550_REG_Y_ACCEL 0x1a +#define ADIS16550_REG_Z_ACCEL 0x1c +#define ADIS16550_REG_X_DELTANG_L 0x1E +#define ADIS16550_REG_Y_DELTANG_L 0x20 +#define ADIS16550_REG_Z_DELTANG_L 0x22 +#define ADIS16550_REG_X_DELTVEL_L 0x24 +#define ADIS16550_REG_Y_DELTVEL_L 0x26 +#define ADIS16550_REG_Z_DELTVEL_L 0x28 +#define ADIS16550_REG_X_GYRO_SCALE 0x30 +#define ADIS16550_REG_Y_GYRO_SCALE 0x32 +#define ADIS16550_REG_Z_GYRO_SCALE 0x34 +#define ADIS16550_REG_X_ACCEL_SCALE 0x36 +#define ADIS16550_REG_Y_ACCEL_SCALE 0x38 +#define ADIS16550_REG_Z_ACCEL_SCALE 0x3a +#define ADIS16550_REG_X_GYRO_BIAS 0x40 +#define ADIS16550_REG_Y_GYRO_BIAS 0x42 +#define ADIS16550_REG_Z_GYRO_BIAS 0x44 +#define ADIS16550_REG_X_ACCEL_BIAS 0x46 +#define ADIS16550_REG_Y_ACCEL_BIAS 0x48 +#define ADIS16550_REG_Z_ACCEL_BIAS 0x4a +#define ADIS16550_REG_COMMAND 0x50 +#define ADIS16550_REG_CONFIG 0x52 +#define ADIS16550_GYRO_FIR_EN_MASK BIT(3) +#define ADIS16550_ACCL_FIR_EN_MASK BIT(2) +#define ADIS16550_SYNC_MASK \ + (ADIS16550_SYNC_EN_MASK | ADIS16550_SYNC_MODE_MASK) +#define ADIS16550_SYNC_MODE_MASK BIT(1) +#define ADIS16550_SYNC_EN_MASK BIT(0) +/* max of 4000 SPS in scale sync */ +#define ADIS16550_SYNC_SCALE_MAX_RATE (4000 * 1000) +#define ADIS16550_REG_DEC_RATE 0x54 +#define ADIS16550_REG_SYNC_SCALE 0x56 +#define ADIS16550_REG_SERIAL_NUM 0x76 +#define ADIS16550_REG_FW_REV 0x7A +#define ADIS16550_REG_FW_DATE 0x7C +#define ADIS16550_REG_PROD_ID 0x7E +#define ADIS16550_REG_FLASH_CNT 0x72 +/* SPI protocol*/ +#define ADIS16550_SPI_DATA_MASK GENMASK(31, 16) +#define ADIS16550_SPI_REG_MASK GENMASK(14, 8) +#define ADIS16550_SPI_R_W_MASK BIT(7) +#define ADIS16550_SPI_CRC_MASK GENMASK(3, 0) +#define ADIS16550_SPI_SV_MASK GENMASK(7, 6) +/* burst read */ +#define ADIS16550_BURST_N_ELEM 12 +#define ADIS16550_BURST_DATA_LEN (ADIS16550_BURST_N_ELEM * 4) +#define ADIS16550_MAX_SCAN_DATA 12 + +struct adis16550_sync { + u16 sync_mode; + u16 min_rate; + u16 max_rate; +}; + +struct adis16550_chip_info { + const struct iio_chan_spec *channels; + const struct adis16550_sync *sync_mode; + char *name; + u32 num_channels; + u32 gyro_max_val; + u32 gyro_max_scale; + u32 accel_max_val; + u32 accel_max_scale; + u32 temp_scale; + u32 deltang_max_val; + u32 deltvel_max_val; + u32 int_clk; + u16 max_dec; + u16 num_sync; +}; + +struct adis16550 { + const struct adis16550_chip_info *info; + struct adis adis; + unsigned long clk_freq_hz; + u32 sync_mode; + struct spi_transfer xfer[2]; + u8 buffer[ADIS16550_BURST_DATA_LEN + sizeof(u32)] __aligned(IIO_DMA_MINALIGN); + __be32 din[2]; + __be32 dout[2]; +}; + +enum { + ADIS16550_SV_INIT, + ADIS16550_SV_OK, + ADIS16550_SV_NOK, + ADIS16550_SV_SPI_ERROR, +}; + +/* + * This is a simplified implementation of lib/crc4.c. It could not be used + * directly since the polynomial used is different from the one used by the + * 16550 which is 0b10001 + */ +static u8 spi_crc4(const u32 val) +{ + int i; + const int bits = 28; + u8 crc = 0xa; + /* ignore 4lsb */ + const u32 __val = val >> 4; + + /* Calculate crc4 over four-bit nibbles, starting at the MSbit */ + for (i = bits - 4; i >= 0; i -= 4) + crc = crc ^ ((__val >> i) & 0xf); + + return crc; +} + +static int adis16550_spi_validate(const struct adis *adis, __be32 dout, + u16 *data) +{ + u32 __dout; + u8 crc, crc_rcv, sv; + + __dout = be32_to_cpu(dout); + + /* validate received message */ + crc_rcv = FIELD_GET(ADIS16550_SPI_CRC_MASK, __dout); + crc = spi_crc4(__dout); + if (crc_rcv != crc) { + dev_err(&adis->spi->dev, + "Invalid crc, rcv: 0x%02x, calc: 0x%02x!\n", + crc_rcv, crc); + return -EIO; + } + sv = FIELD_GET(ADIS16550_SPI_SV_MASK, __dout); + if (sv >= ADIS16550_SV_NOK) { + dev_err(&adis->spi->dev, + "State vector error detected: %02X", sv); + return -EIO; + } + *data = FIELD_GET(ADIS16550_SPI_DATA_MASK, __dout); + + return 0; +} + +static void adis16550_spi_msg_prepare(const u32 reg, const bool write, + const u16 data, __be32 *din) +{ + u8 crc; + u32 __din; + + __din = FIELD_PREP(ADIS16550_SPI_REG_MASK, reg); + + if (write) { + __din |= FIELD_PREP(ADIS16550_SPI_R_W_MASK, 1); + __din |= FIELD_PREP(ADIS16550_SPI_DATA_MASK, data); + } + + crc = spi_crc4(__din); + __din |= FIELD_PREP(ADIS16550_SPI_CRC_MASK, crc); + + *din = cpu_to_be32(__din); +} + +static int adis16550_spi_xfer(const struct adis *adis, u32 reg, u32 len, + u32 *readval, u32 writeval) +{ + int ret; + u16 data = 0; + struct spi_message msg; + bool wr = readval ? false : true; + struct spi_device *spi = adis->spi; + struct adis16550 *st = container_of(adis, struct adis16550, adis); + struct spi_transfer xfers[] = { + { + .tx_buf = &st->din[0], + .len = 4, + .cs_change = 1, + }, { + .tx_buf = &st->din[1], + .len = 4, + .cs_change = 1, + .rx_buf = st->dout, + }, { + .tx_buf = &st->din[1], + .rx_buf = &st->dout[1], + .len = 4, + }, + }; + + spi_message_init(&msg); + + switch (len) { + case 4: + adis16550_spi_msg_prepare(reg + 1, wr, writeval >> 16, + &st->din[0]); + spi_message_add_tail(&xfers[0], &msg); + fallthrough; + case 2: + adis16550_spi_msg_prepare(reg, wr, writeval, &st->din[1]); + spi_message_add_tail(&xfers[1], &msg); + spi_message_add_tail(&xfers[2], &msg); + break; + default: + return -EINVAL; + } + + ret = spi_sync(spi, &msg); + if (ret) { + dev_err(&spi->dev, "Spi failure %d\n", ret); + return ret; + } + /* + * When writing a register, the device will reply with a readback on the + * transfer so that we can validate if our data was actually written.. + */ + switch (len) { + case 4: + ret = adis16550_spi_validate(adis, st->dout[0], &data); + if (ret) + return ret; + + if (readval) { + *readval = data << 16; + } else if ((writeval >> 16) != data && reg != ADIS16550_REG_COMMAND) { + dev_err(&spi->dev, + "Data not written: wr: 0x%04X, rcv: 0x%04X\n", + writeval >> 16, data); + return -EIO; + } + + fallthrough; + case 2: + ret = adis16550_spi_validate(adis, st->dout[1], &data); + if (ret) + return ret; + + if (readval) { + *readval = (*readval & GENMASK(31, 16)) | data; + } else if ((writeval & GENMASK(15, 0)) != data && reg != ADIS16550_REG_COMMAND) { + dev_err(&spi->dev, + "Data not written: wr: 0x%04X, rcv: 0x%04X\n", + (u16)writeval, data); + return -EIO; + } + } + + return 0; +} + +static int adis16550_spi_read(struct adis *adis, const u32 reg, + u32 *value, const u32 len) +{ + return adis16550_spi_xfer(adis, reg, len, value, 0); +} + +static int adis16550_spi_write(struct adis *adis, const u32 reg, + const u32 value, const u32 len) +{ + return adis16550_spi_xfer(adis, reg, len, NULL, value); +} + +static ssize_t adis16550_show_firmware_revision(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct adis16550 *st = file->private_data; + char buf[7]; + size_t len; + u16 rev; + int ret; + + ret = adis_read_reg_16(&st->adis, ADIS16550_REG_FW_REV, &rev); + if (ret) + return ret; + + len = scnprintf(buf, sizeof(buf), "%x.%x\n", rev >> 8, rev & 0xff); + + return simple_read_from_buffer(userbuf, count, ppos, buf, len); +} + +static const struct file_operations adis16550_firmware_revision_fops = { + .open = simple_open, + .read = adis16550_show_firmware_revision, + .llseek = default_llseek, + .owner = THIS_MODULE, +}; + +static ssize_t adis16550_show_firmware_date(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct adis16550 *st = file->private_data; + char buf[12]; + size_t len; + u32 date; + int ret; + + ret = adis_read_reg_32(&st->adis, ADIS16550_REG_FW_DATE, &date); + if (ret) + return ret; + + len = scnprintf(buf, sizeof(buf), "%.2x-%.2x-%.4x\n", date & 0xff, + (date >> 8) & 0xff, date >> 16); + + return simple_read_from_buffer(userbuf, count, ppos, buf, len); +} + +static const struct file_operations adis16550_firmware_date_fops = { + .open = simple_open, + .read = adis16550_show_firmware_date, + .llseek = default_llseek, + .owner = THIS_MODULE, +}; + +static int adis16550_show_serial_number(void *arg, u64 *val) +{ + struct adis16550 *st = arg; + u32 serial; + int ret; + + ret = adis_read_reg_32(&st->adis, ADIS16550_REG_SERIAL_NUM, &serial); + if (ret) + return ret; + + *val = serial; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(adis16550_serial_number_fops, + adis16550_show_serial_number, NULL, "0x%.8llx\n"); + +static int adis16550_show_product_id(void *arg, u64 *val) +{ + struct adis16550 *st = arg; + u16 prod_id; + int ret; + + ret = adis_read_reg_16(&st->adis, ADIS16550_REG_PROD_ID, &prod_id); + if (ret) + return ret; + + *val = prod_id; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(adis16550_product_id_fops, + adis16550_show_product_id, NULL, "%llu\n"); + +static int adis16550_show_flash_count(void *arg, u64 *val) +{ + struct adis16550 *st = arg; + u16 flash_count; + int ret; + + ret = adis_read_reg_16(&st->adis, ADIS16550_REG_FLASH_CNT, &flash_count); + if (ret) + return ret; + + *val = flash_count; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(adis16550_flash_count_fops, + adis16550_show_flash_count, NULL, "%lld\n"); + +static void adis16550_debugfs_init(struct iio_dev *indio_dev) +{ + struct adis16550 *st = iio_priv(indio_dev); + struct dentry *d = iio_get_debugfs_dentry(indio_dev); + + debugfs_create_file_unsafe("serial_number", 0400, d, st, + &adis16550_serial_number_fops); + debugfs_create_file_unsafe("product_id", 0400, d, st, + &adis16550_product_id_fops); + debugfs_create_file("firmware_revision", 0400, d, st, + &adis16550_firmware_revision_fops); + debugfs_create_file("firmware_date", 0400, d, st, + &adis16550_firmware_date_fops); + debugfs_create_file_unsafe("flash_count", 0400, d, st, + &adis16550_flash_count_fops); +} + +enum { + ADIS16550_SYNC_MODE_DIRECT, + ADIS16550_SYNC_MODE_SCALED, +}; + +static int adis16550_get_freq(struct adis16550 *st, u32 *freq) +{ + int ret; + u16 dec = 0; + u32 sample_rate = st->clk_freq_hz; + + adis_dev_auto_lock(&st->adis); + + if (st->sync_mode == ADIS16550_SYNC_MODE_SCALED) { + u16 sync_scale; + + ret = __adis_read_reg_16(&st->adis, ADIS16550_REG_SYNC_SCALE, &sync_scale); + if (ret) + return ret; + + sample_rate = st->clk_freq_hz * sync_scale; + } + + ret = __adis_read_reg_16(&st->adis, ADIS16550_REG_DEC_RATE, &dec); + if (ret) + return -EINVAL; + *freq = DIV_ROUND_CLOSEST(sample_rate, dec + 1); + + return 0; +} + +static int adis16550_set_freq_hz(struct adis16550 *st, u32 freq_hz) +{ + u16 dec; + int ret; + u32 sample_rate = st->clk_freq_hz; + /* + * The optimal sample rate for the supported IMUs is between + * int_clk - 1000 and int_clk + 500. + */ + u32 max_sample_rate = st->info->int_clk * 1000 + 500000; + u32 min_sample_rate = st->info->int_clk * 1000 - 1000000; + + if (!freq_hz) + return -EINVAL; + + adis_dev_auto_lock(&st->adis); + + if (st->sync_mode == ADIS16550_SYNC_MODE_SCALED) { + unsigned long scaled_rate = lcm(st->clk_freq_hz, freq_hz); + int sync_scale; + + if (scaled_rate > max_sample_rate) + scaled_rate = max_sample_rate / st->clk_freq_hz * st->clk_freq_hz; + else + scaled_rate = max_sample_rate / scaled_rate * scaled_rate; + + if (scaled_rate < min_sample_rate) + scaled_rate = roundup(min_sample_rate, st->clk_freq_hz); + + sync_scale = scaled_rate / st->clk_freq_hz; + ret = __adis_write_reg_16(&st->adis, ADIS16550_REG_SYNC_SCALE, + sync_scale); + if (ret) + return ret; + + sample_rate = scaled_rate; + } + + dec = DIV_ROUND_CLOSEST(sample_rate, freq_hz); + + if (dec) + dec--; + + dec = min(dec, st->info->max_dec); + + return __adis_write_reg_16(&st->adis, ADIS16550_REG_DEC_RATE, dec); +} + +static int adis16550_get_accl_filter_freq(struct adis16550 *st, int *freq_hz) +{ + int ret; + u16 config = 0; + + ret = adis_read_reg_16(&st->adis, ADIS16550_REG_CONFIG, &config); + if (ret) + return -EINVAL; + + if (FIELD_GET(ADIS16550_ACCL_FIR_EN_MASK, config)) + *freq_hz = 100; + else + *freq_hz = 0; + + return 0; +} + +static int adis16550_set_accl_filter_freq(struct adis16550 *st, int freq_hz) +{ + u8 en = freq_hz ? 1 : 0; + u16 val = FIELD_PREP(ADIS16550_ACCL_FIR_EN_MASK, en); + + return __adis_update_bits(&st->adis, ADIS16550_REG_CONFIG, + ADIS16550_ACCL_FIR_EN_MASK, val); +} + +static int adis16550_get_gyro_filter_freq(struct adis16550 *st, int *freq_hz) +{ + int ret; + u16 config = 0; + + ret = adis_read_reg_16(&st->adis, ADIS16550_REG_CONFIG, &config); + if (ret) + return -EINVAL; + + if (FIELD_GET(ADIS16550_GYRO_FIR_EN_MASK, config)) + *freq_hz = 100; + else + *freq_hz = 0; + + return 0; +} + +static int adis16550_set_gyro_filter_freq(struct adis16550 *st, int freq_hz) +{ + u8 en = freq_hz ? 1 : 0; + u16 val = FIELD_PREP(ADIS16550_GYRO_FIR_EN_MASK, en); + + return __adis_update_bits(&st->adis, ADIS16550_REG_CONFIG, + ADIS16550_GYRO_FIR_EN_MASK, val); +} + +enum { + ADIS16550_SCAN_TEMP, + ADIS16550_SCAN_GYRO_X, + ADIS16550_SCAN_GYRO_Y, + ADIS16550_SCAN_GYRO_Z, + ADIS16550_SCAN_ACCEL_X, + ADIS16550_SCAN_ACCEL_Y, + ADIS16550_SCAN_ACCEL_Z, + ADIS16550_SCAN_DELTANG_X, + ADIS16550_SCAN_DELTANG_Y, + ADIS16550_SCAN_DELTANG_Z, + ADIS16550_SCAN_DELTVEL_X, + ADIS16550_SCAN_DELTVEL_Y, + ADIS16550_SCAN_DELTVEL_Z, +}; + +static const u32 adis16550_calib_bias[] = { + [ADIS16550_SCAN_GYRO_X] = ADIS16550_REG_X_GYRO_BIAS, + [ADIS16550_SCAN_GYRO_Y] = ADIS16550_REG_Y_GYRO_BIAS, + [ADIS16550_SCAN_GYRO_Z] = ADIS16550_REG_Z_GYRO_BIAS, + [ADIS16550_SCAN_ACCEL_X] = ADIS16550_REG_X_ACCEL_BIAS, + [ADIS16550_SCAN_ACCEL_Y] = ADIS16550_REG_Y_ACCEL_BIAS, + [ADIS16550_SCAN_ACCEL_Z] = ADIS16550_REG_Z_ACCEL_BIAS, + +}; + +static const u32 adis16550_calib_scale[] = { + [ADIS16550_SCAN_GYRO_X] = ADIS16550_REG_X_GYRO_SCALE, + [ADIS16550_SCAN_GYRO_Y] = ADIS16550_REG_Y_GYRO_SCALE, + [ADIS16550_SCAN_GYRO_Z] = ADIS16550_REG_Z_GYRO_SCALE, + [ADIS16550_SCAN_ACCEL_X] = ADIS16550_REG_X_ACCEL_SCALE, + [ADIS16550_SCAN_ACCEL_Y] = ADIS16550_REG_Y_ACCEL_SCALE, + [ADIS16550_SCAN_ACCEL_Z] = ADIS16550_REG_Z_ACCEL_SCALE, +}; + +static int adis16550_read_raw(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + int *val, int *val2, long info) +{ + struct adis16550 *st = iio_priv(indio_dev); + const int idx = chan->scan_index; + u16 scale; + int ret; + u32 tmp; + + switch (info) { + case IIO_CHAN_INFO_RAW: + return adis_single_conversion(indio_dev, chan, 0, val); + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_ANGL_VEL: + *val = st->info->gyro_max_val; + *val2 = st->info->gyro_max_scale; + return IIO_VAL_FRACTIONAL; + case IIO_ACCEL: + *val = st->info->accel_max_val; + *val2 = st->info->accel_max_scale; + return IIO_VAL_FRACTIONAL; + case IIO_TEMP: + *val = st->info->temp_scale; + return IIO_VAL_INT; + case IIO_DELTA_ANGL: + *val = st->info->deltang_max_val; + *val2 = 31; + return IIO_VAL_FRACTIONAL_LOG2; + case IIO_DELTA_VELOCITY: + *val = st->info->deltvel_max_val; + *val2 = 31; + return IIO_VAL_FRACTIONAL_LOG2; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_OFFSET: + /* temperature centered at 25°C */ + *val = DIV_ROUND_CLOSEST(25000, st->info->temp_scale); + return IIO_VAL_INT; + case IIO_CHAN_INFO_CALIBBIAS: + ret = adis_read_reg_32(&st->adis, + adis16550_calib_bias[idx], val); + if (ret) + return ret; + + return IIO_VAL_INT; + case IIO_CHAN_INFO_CALIBSCALE: + ret = adis_read_reg_16(&st->adis, + adis16550_calib_scale[idx], &scale); + if (ret) + return ret; + + *val = scale; + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + ret = adis16550_get_freq(st, &tmp); + if (ret) + return ret; + + *val = tmp / 1000; + *val2 = (tmp % 1000) * 1000; + return IIO_VAL_INT_PLUS_MICRO; + case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + switch (chan->type) { + case IIO_ANGL_VEL: + ret = adis16550_get_accl_filter_freq(st, val); + if (ret) + return ret; + return IIO_VAL_INT; + case IIO_ACCEL: + ret = adis16550_get_gyro_filter_freq(st, val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } + default: + return -EINVAL; + } +} + +static int adis16550_write_raw(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + int val, int val2, long info) +{ + struct adis16550 *st = iio_priv(indio_dev); + const int idx = chan->scan_index; + u32 tmp; + + switch (info) { + case IIO_CHAN_INFO_SAMP_FREQ: + tmp = val * 1000 + val2 / 1000; + return adis16550_set_freq_hz(st, tmp); + case IIO_CHAN_INFO_CALIBBIAS: + return adis_write_reg_32(&st->adis, adis16550_calib_bias[idx], + val); + case IIO_CHAN_INFO_CALIBSCALE: + return adis_write_reg_16(&st->adis, adis16550_calib_scale[idx], + val); + case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + switch (chan->type) { + case IIO_ANGL_VEL: + return adis16550_set_accl_filter_freq(st, val); + case IIO_ACCEL: + return adis16550_set_gyro_filter_freq(st, val); + default: + return -EINVAL; + } + default: + return -EINVAL; + } +} + +#define ADIS16550_MOD_CHAN(_type, _mod, _address, _si) \ + { \ + .type = (_type), \ + .modified = 1, \ + .channel2 = (_mod), \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_CALIBBIAS) | \ + BIT(IIO_CHAN_INFO_CALIBSCALE), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .address = (_address), \ + .scan_index = (_si), \ + .scan_type = { \ + .sign = 's', \ + .realbits = 32, \ + .storagebits = 32, \ + .endianness = IIO_BE, \ + }, \ + } + +#define ADIS16550_GYRO_CHANNEL(_mod) \ + ADIS16550_MOD_CHAN(IIO_ANGL_VEL, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _GYRO, ADIS16550_SCAN_GYRO_ ## _mod) + +#define ADIS16550_ACCEL_CHANNEL(_mod) \ + ADIS16550_MOD_CHAN(IIO_ACCEL, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _ACCEL, ADIS16550_SCAN_ACCEL_ ## _mod) + +#define ADIS16550_TEMP_CHANNEL() { \ + .type = IIO_TEMP, \ + .indexed = 1, \ + .channel = 0, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_OFFSET), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .address = ADIS16550_REG_TEMP, \ + .scan_index = ADIS16550_SCAN_TEMP, \ + .scan_type = { \ + .sign = 's', \ + .realbits = 16, \ + .storagebits = 32, \ + .endianness = IIO_BE, \ + }, \ + } + +#define ADIS16550_MOD_CHAN_DELTA(_type, _mod, _address, _si) { \ + .type = (_type), \ + .modified = 1, \ + .channel2 = (_mod), \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .address = (_address), \ + .scan_index = _si, \ + .scan_type = { \ + .sign = 's', \ + .realbits = 32, \ + .storagebits = 32, \ + .endianness = IIO_BE, \ + }, \ + } + +#define ADIS16550_DELTANG_CHAN(_mod) \ + ADIS16550_MOD_CHAN_DELTA(IIO_DELTA_ANGL, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _DELTANG_L, ADIS16550_SCAN_DELTANG_ ## _mod) + +#define ADIS16550_DELTVEL_CHAN(_mod) \ + ADIS16550_MOD_CHAN_DELTA(IIO_DELTA_VELOCITY, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _DELTVEL_L, ADIS16550_SCAN_DELTVEL_ ## _mod) + +#define ADIS16550_DELTANG_CHAN_NO_SCAN(_mod) \ + ADIS16550_MOD_CHAN_DELTA(IIO_DELTA_ANGL, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _DELTANG_L, -1) + +#define ADIS16550_DELTVEL_CHAN_NO_SCAN(_mod) \ + ADIS16550_MOD_CHAN_DELTA(IIO_DELTA_VELOCITY, IIO_MOD_ ## _mod, \ + ADIS16550_REG_ ## _mod ## _DELTVEL_L, -1) + +static const struct iio_chan_spec adis16550_channels[] = { + ADIS16550_TEMP_CHANNEL(), + ADIS16550_GYRO_CHANNEL(X), + ADIS16550_GYRO_CHANNEL(Y), + ADIS16550_GYRO_CHANNEL(Z), + ADIS16550_ACCEL_CHANNEL(X), + ADIS16550_ACCEL_CHANNEL(Y), + ADIS16550_ACCEL_CHANNEL(Z), + ADIS16550_DELTANG_CHAN(X), + ADIS16550_DELTANG_CHAN(Y), + ADIS16550_DELTANG_CHAN(Z), + ADIS16550_DELTVEL_CHAN(X), + ADIS16550_DELTVEL_CHAN(Y), + ADIS16550_DELTVEL_CHAN(Z), + IIO_CHAN_SOFT_TIMESTAMP(13), +}; + +static const struct adis16550_sync adis16550_sync_modes[] = { + { ADIS16550_SYNC_MODE_DIRECT, 3000, 4500 }, + { ADIS16550_SYNC_MODE_SCALED, 1, 128 }, +}; + +static const struct adis16550_chip_info adis16550_chip_info = { + .num_channels = ARRAY_SIZE(adis16550_channels), + .channels = adis16550_channels, + .name = "adis16550", + .gyro_max_val = 1, + .gyro_max_scale = IIO_RAD_TO_DEGREE(80 << 16), + .accel_max_val = 1, + .accel_max_scale = IIO_M_S_2_TO_G(102400000), + .temp_scale = 4, + .deltang_max_val = IIO_DEGREE_TO_RAD(720), + .deltvel_max_val = 125, + .int_clk = 4000, + .max_dec = 4095, + .sync_mode = adis16550_sync_modes, + .num_sync = ARRAY_SIZE(adis16550_sync_modes), +}; + +static u32 adis16550_validate_crc(__be32 *buffer, const u8 n_elem) +{ + int i; + u32 crc_calc; + u32 crc_buf[ADIS16550_BURST_N_ELEM - 2]; + u32 crc = be32_to_cpu(buffer[ADIS16550_BURST_N_ELEM - 1]); + /* + * The crc calculation of the data is done in little endian. Hence, we + * always swap the 32bit elements making sure that the data LSB is + * always on address 0... + */ + for (i = 0; i < n_elem; i++) + crc_buf[i] = be32_to_cpu(buffer[i]); + + crc_calc = crc32(~0, crc_buf, n_elem * 4); + crc_calc ^= ~0; + + return (crc_calc == crc); +} + +static irqreturn_t adis16550_trigger_handler(int irq, void *p) +{ + int ret; + u16 dummy; + bool valid; + struct iio_poll_func *pf = p; + __be32 data[ADIS16550_MAX_SCAN_DATA]; + struct iio_dev *indio_dev = pf->indio_dev; + struct adis16550 *st = iio_priv(indio_dev); + struct adis *adis = iio_device_get_drvdata(indio_dev); + __be32 *buffer = (__be32 *)st->buffer; + + ret = spi_sync(adis->spi, &adis->msg); + if (ret) + goto done; + /* + * Validate the header. The header is a normal spi reply with state + * vector and crc4. + */ + ret = adis16550_spi_validate(&st->adis, buffer[0], &dummy); + if (ret) + goto done; + + /* the header is not included in the crc */ + valid = adis16550_validate_crc(buffer, ADIS16550_BURST_N_ELEM - 2); + if (!valid) { + dev_err(&adis->spi->dev, "Burst Invalid crc!\n"); + goto done; + } + + /* copy the temperature together with sensor data */ + memcpy(data, &buffer[3], + (ADIS16550_SCAN_ACCEL_Z - ADIS16550_SCAN_GYRO_X + 2) * + sizeof(__be32)); + iio_push_to_buffers_with_timestamp(indio_dev, data, pf->timestamp); +done: + iio_trigger_notify_done(indio_dev->trig); + return IRQ_HANDLED; +} + +static const unsigned long adis16550_channel_masks[] = { + ADIS16550_BURST_DATA_GYRO_ACCEL_MASK | BIT(ADIS16550_SCAN_TEMP), + ADIS16550_BURST_DATA_DELTA_ANG_VEL_MASK | BIT(ADIS16550_SCAN_TEMP), + 0 +}; + +static int adis16550_update_scan_mode(struct iio_dev *indio_dev, + const unsigned long *scan_mask) +{ + u16 burst_length = ADIS16550_BURST_DATA_LEN; + struct adis16550 *st = iio_priv(indio_dev); + u8 burst_cmd; + u8 *tx; + + memset(st->buffer, 0, burst_length + sizeof(u32)); + + if (*scan_mask & ADIS16550_BURST_DATA_GYRO_ACCEL_MASK) + burst_cmd = ADIS16550_REG_BURST_GYRO_ACCEL; + else + burst_cmd = ADIS16550_REG_BURST_DELTA_ANG_VEL; + + tx = st->buffer + burst_length; + tx[0] = 0x00; + tx[1] = 0x00; + tx[2] = burst_cmd; + /* crc4 is 0 on burst command */ + tx[3] = spi_crc4(get_unaligned_le32(tx)); + + return 0; +} + +static int adis16550_reset(struct adis *adis) +{ + return __adis_write_reg_16(adis, ADIS16550_REG_COMMAND, BIT(15)); +} + +static int adis16550_config_sync(struct adis16550 *st) +{ + struct device *dev = &st->adis.spi->dev; + const struct adis16550_sync *sync_mode_data; + struct clk *clk; + int ret, i; + u16 mode; + + clk = devm_clk_get_optional_enabled(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + if (!clk) { + st->clk_freq_hz = st->info->int_clk * 1000; + return 0; + } + + st->clk_freq_hz = clk_get_rate(clk); + + for (i = 0; i < st->info->num_sync; i++) { + if (st->clk_freq_hz >= st->info->sync_mode[i].min_rate && + st->clk_freq_hz <= st->info->sync_mode[i].max_rate) { + sync_mode_data = &st->info->sync_mode[i]; + break; + } + } + + if (i == st->info->num_sync) + return dev_err_probe(dev, -EINVAL, "Clk rate: %lu not in a valid range", + st->clk_freq_hz); + + if (sync_mode_data->sync_mode == ADIS16550_SYNC_MODE_SCALED) { + u16 sync_scale; + /* + * In sps scaled sync we must scale the input clock to a range + * of [3000 4500]. + */ + + sync_scale = DIV_ROUND_CLOSEST(st->info->int_clk, st->clk_freq_hz); + + if (3000 > sync_scale || 4500 < sync_scale) + return dev_err_probe(dev, -EINVAL, + "Invalid value:%u for sync_scale", + sync_scale); + + ret = adis_write_reg_16(&st->adis, ADIS16550_REG_SYNC_SCALE, + sync_scale); + if (ret) + return ret; + + st->clk_freq_hz = st->info->int_clk; + } + + st->clk_freq_hz *= 1000; + + mode = FIELD_PREP(ADIS16550_SYNC_MODE_MASK, sync_mode_data->sync_mode) | + FIELD_PREP(ADIS16550_SYNC_EN_MASK, true); + + return __adis_update_bits(&st->adis, ADIS16550_REG_CONFIG, + ADIS16550_SYNC_MASK, mode); +} + +static const struct iio_info adis16550_info = { + .read_raw = &adis16550_read_raw, + .write_raw = &adis16550_write_raw, + .update_scan_mode = adis16550_update_scan_mode, + .debugfs_reg_access = adis_debugfs_reg_access, +}; + +enum { + ADIS16550_STATUS_CRC_CODE, + ADIS16550_STATUS_CRC_CONFIG, + ADIS16550_STATUS_FLASH_UPDATE, + ADIS16550_STATUS_INERIAL, + ADIS16550_STATUS_SENSOR, + ADIS16550_STATUS_TEMPERATURE, + ADIS16550_STATUS_SPI, + ADIS16550_STATUS_PROCESSING, + ADIS16550_STATUS_POWER, + ADIS16550_STATUS_BOOT, + ADIS16550_STATUS_WATCHDOG = 15, + ADIS16550_STATUS_REGULATOR = 28, + ADIS16550_STATUS_SENSOR_SUPPLY, + ADIS16550_STATUS_CPU_SUPPLY, + ADIS16550_STATUS_5V_SUPPLY, +}; + +static const char * const adis16550_status_error_msgs[] = { + [ADIS16550_STATUS_CRC_CODE] = "Code CRC Error", + [ADIS16550_STATUS_CRC_CONFIG] = "Configuration/Calibration CRC Error", + [ADIS16550_STATUS_FLASH_UPDATE] = "Flash Update Error", + [ADIS16550_STATUS_INERIAL] = "Overrange for Inertial Signals", + [ADIS16550_STATUS_SENSOR] = "Sensor failure", + [ADIS16550_STATUS_TEMPERATURE] = "Temperature Error", + [ADIS16550_STATUS_SPI] = "SPI Communication Error", + [ADIS16550_STATUS_PROCESSING] = "Processing Overrun Error", + [ADIS16550_STATUS_POWER] = "Power Supply Failure", + [ADIS16550_STATUS_BOOT] = "Boot Memory Failure", + [ADIS16550_STATUS_WATCHDOG] = "Watchdog timer flag", + [ADIS16550_STATUS_REGULATOR] = "Internal Regulator Error", + [ADIS16550_STATUS_SENSOR_SUPPLY] = "Internal Sensor Supply Error.", + [ADIS16550_STATUS_CPU_SUPPLY] = "Internal Processor Supply Error.", + [ADIS16550_STATUS_5V_SUPPLY] = "External 5V Supply Error", +}; + +static const struct adis_timeout adis16550_timeouts = { + .reset_ms = 1000, + .sw_reset_ms = 1000, + .self_test_ms = 1000, +}; + +static const struct adis_data adis16550_data = { + .diag_stat_reg = ADIS16550_REG_STATUS, + .diag_stat_size = 4, + .prod_id_reg = ADIS16550_REG_PROD_ID, + .prod_id = 16550, + .self_test_mask = BIT(1), + .self_test_reg = ADIS16550_REG_COMMAND, + .cs_change_delay = 5, + .unmasked_drdy = true, + .status_error_msgs = adis16550_status_error_msgs, + .status_error_mask = BIT(ADIS16550_STATUS_CRC_CODE) | + BIT(ADIS16550_STATUS_CRC_CONFIG) | + BIT(ADIS16550_STATUS_FLASH_UPDATE) | + BIT(ADIS16550_STATUS_INERIAL) | + BIT(ADIS16550_STATUS_SENSOR) | + BIT(ADIS16550_STATUS_TEMPERATURE) | + BIT(ADIS16550_STATUS_SPI) | + BIT(ADIS16550_STATUS_PROCESSING) | + BIT(ADIS16550_STATUS_POWER) | + BIT(ADIS16550_STATUS_BOOT) | + BIT(ADIS16550_STATUS_WATCHDOG) | + BIT(ADIS16550_STATUS_REGULATOR) | + BIT(ADIS16550_STATUS_SENSOR_SUPPLY) | + BIT(ADIS16550_STATUS_CPU_SUPPLY) | + BIT(ADIS16550_STATUS_5V_SUPPLY), + .timeouts = &adis16550_timeouts, +}; + +static const struct adis_ops adis16550_ops = { + .write = adis16550_spi_write, + .read = adis16550_spi_read, + .reset = adis16550_reset, +}; + +static int adis16550_probe(struct spi_device *spi) +{ + u16 burst_length = ADIS16550_BURST_DATA_LEN; + struct device *dev = &spi->dev; + struct iio_dev *indio_dev; + struct adis16550 *st; + struct adis *adis; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + st->info = spi_get_device_match_data(spi); + if (!st->info) + return -EINVAL; + adis = &st->adis; + indio_dev->name = st->info->name; + indio_dev->channels = st->info->channels; + indio_dev->num_channels = st->info->num_channels; + indio_dev->available_scan_masks = adis16550_channel_masks; + indio_dev->info = &adis16550_info; + indio_dev->modes = INDIO_DIRECT_MODE; + + st->adis.ops = &adis16550_ops; + st->xfer[0].tx_buf = st->buffer + burst_length; + st->xfer[0].len = 4; + st->xfer[0].cs_change = 1; + st->xfer[0].delay.value = 8; + st->xfer[0].delay.unit = SPI_DELAY_UNIT_USECS; + st->xfer[1].rx_buf = st->buffer; + st->xfer[1].len = burst_length; + + spi_message_init_with_transfers(&adis->msg, st->xfer, 2); + + ret = devm_regulator_get_enable(dev, "vdd"); + if (ret) + return dev_err_probe(dev, ret, "Failed to get vdd regulator\n"); + + ret = adis_init(&st->adis, indio_dev, spi, &adis16550_data); + if (ret) + return ret; + + ret = __adis_initial_startup(&st->adis); + if (ret) + return ret; + + ret = adis16550_config_sync(st); + if (ret) + return ret; + + ret = devm_adis_setup_buffer_and_trigger(&st->adis, indio_dev, + adis16550_trigger_handler); + if (ret) + return ret; + + ret = devm_iio_device_register(dev, indio_dev); + if (ret) + return ret; + + adis16550_debugfs_init(indio_dev); + + return 0; +} + +static const struct spi_device_id adis16550_id[] = { + { "adis16550", (kernel_ulong_t)&adis16550_chip_info}, + { } +}; +MODULE_DEVICE_TABLE(spi, adis16550_id); + +static const struct of_device_id adis16550_of_match[] = { + { .compatible = "adi,adis16550", .data = &adis16550_chip_info }, + { } +}; +MODULE_DEVICE_TABLE(of, adis16550_of_match); + +static struct spi_driver adis16550_driver = { + .driver = { + .name = "adis16550", + .of_match_table = adis16550_of_match, + }, + .probe = adis16550_probe, + .id_table = adis16550_id, +}; +module_spi_driver(adis16550_driver); + +MODULE_AUTHOR("Nuno Sa "); +MODULE_AUTHOR("Ramona Gradinariu "); +MODULE_AUTHOR("Antoniu Miclaus "); +MODULE_AUTHOR("Robert Budai "); +MODULE_DESCRIPTION("Analog Devices ADIS16550 IMU driver"); +MODULE_IMPORT_NS("IIO_ADISLIB"); +MODULE_LICENSE("GPL"); From aaa9d61634e0095abe1a1111c8b3e2cd6f2b81d5 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 17 Feb 2025 12:57:50 +0200 Subject: [PATCH 0502/2157] docs: iio: add documentation for adis16550 driver Add documentation for adis16550 driver which describes the driver device files and shows how the user may use the ABI for various scenarios (configuration, measurement, etc.). Co-developed-by: Antoniu Miclaus Signed-off-by: Antoniu Miclaus Co-developed-by: Ramona Gradinariu Signed-off-by: Ramona Gradinariu Signed-off-by: Robert Budai Link: https://patch.msgid.link/20250217105753.605465-7-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- Documentation/iio/adis16550.rst | 376 ++++++++++++++++++++++++++++++++ Documentation/iio/index.rst | 1 + 2 files changed, 377 insertions(+) create mode 100644 Documentation/iio/adis16550.rst diff --git a/Documentation/iio/adis16550.rst b/Documentation/iio/adis16550.rst new file mode 100644 index 000000000000..25db7b8060c4 --- /dev/null +++ b/Documentation/iio/adis16550.rst @@ -0,0 +1,376 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +ADIS16550 driver +================ + +This driver supports Analog Device's IMUs on SPI bus. + +1. Supported devices +==================== + +* `ADIS16550 `_ + +The ADIS16550 is a complete inertial system that includes a triaxis gyroscope +and a triaxis accelerometer. The factory calibration characterizes each sensor for +sensitivity, bias, and alignment. As a result, each sensor has its own dynamic +compensation formulas that provide accurate sensor measurements. + +2. Device attributes +==================== + +Accelerometer, gyroscope measurements are always provided. Furthermore, the +driver offers the capability to retrieve the delta angle and the delta velocity +measurements computed by the device. + +The delta angle measurements represent a calculation of angular displacement +between each sample update, while the delta velocity measurements represent a +calculation of linear velocity change between each sample update. + +Finally, temperature data are provided which show a coarse measurement of +the temperature inside of the IMU device. This data is most useful for +monitoring relative changes in the thermal environment. + +Each IIO device, has a device folder under ``/sys/bus/iio/devices/iio:deviceX``, +where X is the IIO index of the device. Under these folders reside a set of +device files, depending on the characteristics and features of the hardware +device in questions. These files are consistently generalized and documented in +the IIO ABI documentation. + +The following tables show the adis16550 related device files, found in the +specific device folder path ``/sys/bus/iio/devices/iio:deviceX``. + ++-------------------------------------------+----------------------------------------------------------+ +| 3-Axis Accelerometer related device files | Description | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_scale | Scale for the accelerometer channels. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_filter_low_pass_3db_frequency | Bandwidth for the accelerometer channels. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_x_calibbias | Calibration offset for the X-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_x_calibscale | Calibration scale for the X-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_x_raw | Raw X-axis accelerometer channel value. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_y_calibbias | Calibration offset for the Y-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_y_calibscale | Calibration scale for the Y-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_y_raw | Raw Y-axis accelerometer channel value. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_z_calibbias | Calibration offset for the Z-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_z_calibscale | Calibration scale for the Z-axis accelerometer channel. | ++-------------------------------------------+----------------------------------------------------------+ +| in_accel_z_raw | Raw Z-axis accelerometer channel value. | ++-------------------------------------------+----------------------------------------------------------+ +| in_deltavelocity_scale | Scale for delta velocity channels. | ++-------------------------------------------+----------------------------------------------------------+ +| in_deltavelocity_x_raw | Raw X-axis delta velocity channel value. | ++-------------------------------------------+----------------------------------------------------------+ +| in_deltavelocity_y_raw | Raw Y-axis delta velocity channel value. | ++-------------------------------------------+----------------------------------------------------------+ +| in_deltavelocity_z_raw | Raw Z-axis delta velocity channel value. | ++-------------------------------------------+----------------------------------------------------------+ + ++--------------------------------------------+------------------------------------------------------+ +| 3-Axis Gyroscope related device files | Description | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_scale | Scale for the gyroscope channels. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_filter_low_pass_3db_frequency | Scale for the gyroscope channels. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_x_calibbias | Calibration offset for the X-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_x_calibscale | Calibration scale for the X-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_x_raw | Raw X-axis gyroscope channel value. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_y_calibbias | Calibration offset for the Y-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_y_calibscale | Calibration scale for the Y-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_y_raw | Raw Y-axis gyroscope channel value. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_z_calibbias | Calibration offset for the Z-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_z_calibscale | Calibration scale for the Z-axis gyroscope channel. | ++--------------------------------------------+------------------------------------------------------+ +| in_anglvel_z_raw | Raw Z-axis gyroscope channel value. | ++--------------------------------------------+------------------------------------------------------+ +| in_deltaangl_scale | Scale for delta angle channels. | ++--------------------------------------------+------------------------------------------------------+ +| in_deltaangl_x_raw | Raw X-axis delta angle channel value. | ++--------------------------------------------+------------------------------------------------------+ +| in_deltaangl_y_raw | Raw Y-axis delta angle channel value. | ++--------------------------------------------+------------------------------------------------------+ +| in_deltaangl_z_raw | Raw Z-axis delta angle channel value. | ++--------------------------------------------+------------------------------------------------------+ + ++----------------------------------+-------------------------------------------+ +| Temperature sensor related files | Description | ++----------------------------------+-------------------------------------------+ +| in_temp0_raw | Raw temperature channel value. | ++----------------------------------+-------------------------------------------+ +| in_temp0_offset | Offset for the temperature sensor channel.| ++----------------------------------+-------------------------------------------+ +| in_temp0_scale | Scale for the temperature sensor channel. | ++----------------------------------+-------------------------------------------+ + ++----------------------------+--------------------------------------------------------------------------------+ +| Miscellaneous device files | Description | ++----------------------------+--------------------------------------------------------------------------------+ +| name | Name of the IIO device. | ++----------------------------+--------------------------------------------------------------------------------+ +| sampling_frequency | Currently selected sample rate. | ++----------------------------+--------------------------------------------------------------------------------+ + +The following table shows the adis16550 related device debug files, found in the +specific device debug folder path ``/sys/kernel/debug/iio/iio:deviceX``. + ++----------------------+-------------------------------------------------------------------------+ +| Debugfs device files | Description | ++----------------------+-------------------------------------------------------------------------+ +| serial_number | The serial number of the chip in hexadecimal format. | ++----------------------+-------------------------------------------------------------------------+ +| product_id | Chip specific product id (16550). | ++----------------------+-------------------------------------------------------------------------+ +| flash_count | The number of flash writes performed on the device. | ++----------------------+-------------------------------------------------------------------------+ +| firmware_revision | String containing the firmware revision in the following format ##.##. | ++----------------------+-------------------------------------------------------------------------+ +| firmware_date | String containing the firmware date in the following format mm-dd-yyyy. | ++----------------------+-------------------------------------------------------------------------+ + +Channels processed values +------------------------- + +A channel value can be read from its _raw attribute. The value returned is the +raw value as reported by the devices. To get the processed value of the channel, +apply the following formula: + +.. code-block:: bash + + processed value = (_raw + _offset) * _scale + +Where _offset and _scale are device attributes. If no _offset attribute is +present, simply assume its value is 0. + +The adis16550 driver offers data for 5 types of channels, the table below shows +the measurement units for the processed value, which are defined by the IIO +framework: + ++--------------------------------------+---------------------------+ +| Channel type | Measurement unit | ++--------------------------------------+---------------------------+ +| Acceleration on X, Y, and Z axis | Meters per Second squared | ++--------------------------------------+---------------------------+ +| Angular velocity on X, Y and Z axis | Radians per second | ++--------------------------------------+---------------------------+ +| Delta velocity on X. Y, and Z axis | Meters per Second | ++--------------------------------------+---------------------------+ +| Delta angle on X, Y, and Z axis | Radians | ++--------------------------------------+---------------------------+ +| Temperature | Millidegrees Celsius | ++--------------------------------------+---------------------------+ + +Usage examples +-------------- + +Show device name: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat name + adis16550 + +Show accelerometer channels value: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat in_accel_x_raw + 6903851 + root:/sys/bus/iio/devices/iio:device0> cat in_accel_y_raw + 5650550 + root:/sys/bus/iio/devices/iio:device0> cat in_accel_z_raw + 104873530 + root:/sys/bus/iio/devices/iio:device0> cat in_accel_scale + 0.000000095 + +- X-axis acceleration = in_accel_x_raw * in_accel_scale = 0.655865845 m/s^2 +- Y-axis acceleration = in_accel_y_raw * in_accel_scale = 0.53680225 m/s^2 +- Z-axis acceleration = in_accel_z_raw * in_accel_scale = 9.96298535 m/s^2 + +Show gyroscope channels value: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_x_raw + 193309 + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_y_raw + -763676 + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_z_raw + -358108 + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_scale + 0.000000003 + +- X-axis angular velocity = in_anglvel_x_raw * in_anglvel_scale = 0.000579927 rad/s +- Y-axis angular velocity = in_anglvel_y_raw * in_anglvel_scale = −0.002291028 rad/s +- Z-axis angular velocity = in_anglvel_z_raw * in_anglvel_scale = −0.001074324 rad/s + +Set calibration offset for accelerometer channels: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat in_accel_x_calibbias + 0 + + root:/sys/bus/iio/devices/iio:device0> echo 5000 > in_accel_x_calibbias + root:/sys/bus/iio/devices/iio:device0> cat in_accel_x_calibbias + 5000 + +Set calibration offset for gyroscope channels: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_y_calibbias + 0 + + root:/sys/bus/iio/devices/iio:device0> echo -5000 > in_anglvel_y_calibbias + root:/sys/bus/iio/devices/iio:device0> cat in_anglvel_y_calibbias + -5000 + +Set sampling frequency: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat sampling_frequency + 4000.000000 + + root:/sys/bus/iio/devices/iio:device0> echo 1000 > sampling_frequency + 1000.000000 + +Set bandwidth for accelerometer channels: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat in_accel_filter_low_pass_3db_frequency + 0 + + root:/sys/bus/iio/devices/iio:device0> echo 100 > in_accel_filter_low_pass_3db_frequency + root:/sys/bus/iio/devices/iio:device0> cat in_accel_filter_low_pass_3db_frequency + 100 + +Show serial number: + +.. code-block:: bash + + root:/sys/kernel/debug/iio/iio:device0> cat serial_number + 0x000000b6 + +Show product id: + +.. code-block:: bash + + root:/sys/kernel/debug/iio/iio:device0> cat product_id + 16550 + +Show flash count: + +.. code-block:: bash + + root:/sys/kernel/debug/iio/iio:device0> cat flash_count + 13 + +Show firmware revision: + +.. code-block:: bash + + root:/sys/kernel/debug/iio/iio:device0> cat firmware_revision + 1.5 + +Show firmware date: + +.. code-block:: bash + + root:/sys/kernel/debug/iio/iio:device0> cat firmware_date + 28-04-2021 + +3. Device buffers +================= + +This driver supports IIO buffers. + +The device supports retrieving the raw acceleration, gyroscope, delta velocity, +delta angle and temperature measurements using buffers. + +However, when retrieving acceleration or gyroscope data using buffers, delta +readings will not be available and vice versa. This is because the device only +allows to read either acceleration and gyroscope data or delta velocity and +delta angle data at a time and switching between these two burst data selection +modes is time consuming. + +Usage examples +-------------- + +Set device trigger in current_trigger, if not already set: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> cat trigger/current_trigger + + root:/sys/bus/iio/devices/iio:device0> echo adis16550-dev0 > trigger/current_trigger + root:/sys/bus/iio/devices/iio:device0> cat trigger/current_trigger + adis16550-dev0 + +Select channels for buffer read: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> echo 1 > scan_elements/in_deltavelocity_x_en + root:/sys/bus/iio/devices/iio:device0> echo 1 > scan_elements/in_deltavelocity_y_en + root:/sys/bus/iio/devices/iio:device0> echo 1 > scan_elements/in_deltavelocity_z_en + root:/sys/bus/iio/devices/iio:device0> echo 1 > scan_elements/in_temp0_en + +Set the number of samples to be stored in the buffer: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> echo 10 > buffer/length + +Enable buffer readings: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> echo 1 > buffer/enable + +Obtain buffered data: + +.. code-block:: bash + + root:/sys/bus/iio/devices/iio:device0> hexdump -C /dev/iio\:device0 + ... + 0000cdf0 00 00 0d 2f 00 00 08 43 00 00 09 09 00 00 a4 5f |.../...C......._| + 0000ce00 00 00 0d 2f 00 00 07 de 00 00 08 db 00 00 a4 4b |.../...........K| + 0000ce10 00 00 0d 2f 00 00 07 58 00 00 08 a3 00 00 a4 55 |.../...X.......U| + 0000ce20 00 00 0d 2f 00 00 06 d6 00 00 08 5c 00 00 a4 62 |.../.......\...b| + 0000ce30 00 00 0d 2f 00 00 06 45 00 00 08 37 00 00 a4 47 |.../...E...7...G| + 0000ce40 00 00 0d 2f 00 00 05 d4 00 00 08 30 00 00 a3 fa |.../.......0....| + 0000ce50 00 00 0d 2f 00 00 05 d0 00 00 08 12 00 00 a3 d3 |.../............| + 0000ce60 00 00 0d 2f 00 00 05 dd 00 00 08 2e 00 00 a3 e9 |.../............| + 0000ce70 00 00 0d 2f 00 00 05 cc 00 00 08 51 00 00 a3 d5 |.../.......Q....| + 0000ce80 00 00 0d 2f 00 00 05 ba 00 00 08 22 00 00 a3 9a |.../......."....| + 0000ce90 00 00 0d 2f 00 00 05 9c 00 00 07 d9 00 00 a3 40 |.../...........@| + 0000cea0 00 00 0d 2f 00 00 05 68 00 00 07 94 00 00 a2 e4 |.../...h........| + 0000ceb0 00 00 0d 2f 00 00 05 25 00 00 07 8d 00 00 a2 ce |.../...%........| + ... + +See ``Documentation/iio/iio_devbuf.rst`` for more information about how buffered +data is structured. + +4. IIO Interfacing Tools +======================== + +See ``Documentation/iio/iio_tools.rst`` for the description of the available IIO +interfacing tools. diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst index 2d334be2b7f2..4c3072ae71f2 100644 --- a/Documentation/iio/index.rst +++ b/Documentation/iio/index.rst @@ -27,6 +27,7 @@ Industrial I/O Kernel Drivers ad7944 adis16475 adis16480 + adis16550 adxl380 bno055 ep93xx_adc From df330c808182a8beab5d0f84a6cbc9cff76c61fc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:28 +0000 Subject: [PATCH 0503/2157] iio: accel: mma8452: Ensure error return on failure to matching oversampling ratio If a match was not found, then the write_raw() callback would return the odr index, not an error. Return -EINVAL if this occurs. To avoid similar issues in future, introduce j, a new indexing variable rather than using ret for this purpose. Fixes: 79de2ee469aa ("iio: accel: mma8452: claim direct mode during write raw") Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-2-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 962d289065ab..1b2014c4c4b4 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -712,7 +712,7 @@ static int mma8452_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct mma8452_data *data = iio_priv(indio_dev); - int i, ret; + int i, j, ret; ret = iio_device_claim_direct_mode(indio_dev); if (ret) @@ -772,14 +772,18 @@ static int mma8452_write_raw(struct iio_dev *indio_dev, break; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - ret = mma8452_get_odr_index(data); + j = mma8452_get_odr_index(data); for (i = 0; i < ARRAY_SIZE(mma8452_os_ratio); i++) { - if (mma8452_os_ratio[i][ret] == val) { + if (mma8452_os_ratio[i][j] == val) { ret = mma8452_set_power_mode(data, i); break; } } + if (i == ARRAY_SIZE(mma8452_os_ratio)) { + ret = -EINVAL; + break; + } break; default: ret = -EINVAL; From cce9172f3d40b681f7d4bc3b867e01b3e8e85efb Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:29 +0000 Subject: [PATCH 0504/2157] iio: accel: mma8452: Factor out guts of write_raw() to simplify locking Factoring out those parts of write_raw() in which direct mode is held allows for direct returns on errors, simplifying the code. Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-3-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 84 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 1b2014c4c4b4..8ce4ddadc559 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -707,55 +707,45 @@ static int mma8452_set_hp_filter_frequency(struct mma8452_data *data, return mma8452_change_config(data, MMA8452_HP_FILTER_CUTOFF, reg); } -static int mma8452_write_raw(struct iio_dev *indio_dev, +static int __mma8452_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct mma8452_data *data = iio_priv(indio_dev); int i, j, ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: i = mma8452_get_samp_freq_index(data, val, val2); - if (i < 0) { - ret = i; - break; - } + if (i < 0) + return i; + data->ctrl_reg1 &= ~MMA8452_CTRL_DR_MASK; data->ctrl_reg1 |= i << MMA8452_CTRL_DR_SHIFT; data->sleep_val = mma8452_calculate_sleep(data); - ret = mma8452_change_config(data, MMA8452_CTRL_REG1, - data->ctrl_reg1); - break; + return mma8452_change_config(data, MMA8452_CTRL_REG1, + data->ctrl_reg1); + case IIO_CHAN_INFO_SCALE: i = mma8452_get_scale_index(data, val, val2); - if (i < 0) { - ret = i; - break; - } + if (i < 0) + return i; data->data_cfg &= ~MMA8452_DATA_CFG_FS_MASK; data->data_cfg |= i; - ret = mma8452_change_config(data, MMA8452_DATA_CFG, - data->data_cfg); - break; - case IIO_CHAN_INFO_CALIBBIAS: - if (val < -128 || val > 127) { - ret = -EINVAL; - break; - } + return mma8452_change_config(data, MMA8452_DATA_CFG, + data->data_cfg); - ret = mma8452_change_config(data, - MMA8452_OFF_X + chan->scan_index, - val); - break; + case IIO_CHAN_INFO_CALIBBIAS: + if (val < -128 || val > 127) + return -EINVAL; + + return mma8452_change_config(data, + MMA8452_OFF_X + chan->scan_index, + val); case IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY: if (val == 0 && val2 == 0) { @@ -764,32 +754,38 @@ static int mma8452_write_raw(struct iio_dev *indio_dev, data->data_cfg |= MMA8452_DATA_CFG_HPF_MASK; ret = mma8452_set_hp_filter_frequency(data, val, val2); if (ret < 0) - break; + return ret; } - ret = mma8452_change_config(data, MMA8452_DATA_CFG, + return mma8452_change_config(data, MMA8452_DATA_CFG, data->data_cfg); - break; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: j = mma8452_get_odr_index(data); for (i = 0; i < ARRAY_SIZE(mma8452_os_ratio); i++) { - if (mma8452_os_ratio[i][j] == val) { - ret = mma8452_set_power_mode(data, i); - break; - } + if (mma8452_os_ratio[i][j] == val) + return mma8452_set_power_mode(data, i); } - if (i == ARRAY_SIZE(mma8452_os_ratio)) { - ret = -EINVAL; - break; - } - break; - default: - ret = -EINVAL; - break; - } + return -EINVAL; + + default: + return -EINVAL; + } +} + +static int mma8452_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = __mma8452_write_raw(indio_dev, chan, val, val2, mask); iio_device_release_direct_mode(indio_dev); return ret; } From 9ab72adb90392e23701db04da6103335d4176c9f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:30 +0000 Subject: [PATCH 0505/2157] iio: accel: mma8452: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-4-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 8ce4ddadc559..05f5482f366e 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -497,14 +497,13 @@ static int mma8452_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&data->lock); ret = mma8452_read(data, buffer); mutex_unlock(&data->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; @@ -781,12 +780,11 @@ static int mma8452_write_raw(struct iio_dev *indio_dev, { int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __mma8452_write_raw(indio_dev, chan, val, val2, mask); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From 305f655d059d68d0e2973294eb9139eaba22e83c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:31 +0000 Subject: [PATCH 0506/2157] iio: accel: kx022a: Factor out guts of write_raw() to allow direct returns Create a new utility function for the actions taken when direct mode is held. This allows for direct returns, simplifying the code flow. Cc: Matti Vaittinen Reviewed-by: Matti Vaittinen Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-5-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kionix-kx022a.c | 104 ++++++++++++++++-------------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index 3a56ab00791a..727e007c5fc1 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -510,12 +510,63 @@ static int kx022a_write_raw_get_fmt(struct iio_dev *idev, } } +static int __kx022a_write_raw(struct iio_dev *idev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct kx022a_data *data = iio_priv(idev); + int ret, n; + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + n = ARRAY_SIZE(kx022a_accel_samp_freq_table); + + while (n--) + if (val == kx022a_accel_samp_freq_table[n][0] && + val2 == kx022a_accel_samp_freq_table[n][1]) + break; + if (n < 0) + return -EINVAL; + + ret = kx022a_turn_off_lock(data); + if (ret) + return ret; + + ret = regmap_update_bits(data->regmap, + data->chip_info->odcntl, + KX022A_MASK_ODR, n); + data->odr_ns = kx022a_odrs[n]; + kx022a_turn_on_unlock(data); + return ret; + case IIO_CHAN_INFO_SCALE: + n = data->chip_info->scale_table_size / 2; + + while (n-- > 0) + if (val == data->chip_info->scale_table[n][0] && + val2 == data->chip_info->scale_table[n][1]) + break; + if (n < 0) + return -EINVAL; + + ret = kx022a_turn_off_lock(data); + if (ret) + return ret; + + ret = regmap_update_bits(data->regmap, data->chip_info->cntl, + KX022A_MASK_GSEL, + n << KX022A_GSEL_SHIFT); + kx022a_turn_on_unlock(data); + return ret; + default: + return -EINVAL; + } +} + static int kx022a_write_raw(struct iio_dev *idev, struct iio_chan_spec const *chan, int val, int val2, long mask) { - struct kx022a_data *data = iio_priv(idev); - int ret, n; + int ret; /* * We should not allow changing scale or frequency when FIFO is running @@ -530,55 +581,8 @@ static int kx022a_write_raw(struct iio_dev *idev, if (ret) return ret; - switch (mask) { - case IIO_CHAN_INFO_SAMP_FREQ: - n = ARRAY_SIZE(kx022a_accel_samp_freq_table); + ret = __kx022a_write_raw(idev, chan, val, val2, mask); - while (n--) - if (val == kx022a_accel_samp_freq_table[n][0] && - val2 == kx022a_accel_samp_freq_table[n][1]) - break; - if (n < 0) { - ret = -EINVAL; - goto unlock_out; - } - ret = kx022a_turn_off_lock(data); - if (ret) - break; - - ret = regmap_update_bits(data->regmap, - data->chip_info->odcntl, - KX022A_MASK_ODR, n); - data->odr_ns = kx022a_odrs[n]; - kx022a_turn_on_unlock(data); - break; - case IIO_CHAN_INFO_SCALE: - n = data->chip_info->scale_table_size / 2; - - while (n-- > 0) - if (val == data->chip_info->scale_table[n][0] && - val2 == data->chip_info->scale_table[n][1]) - break; - if (n < 0) { - ret = -EINVAL; - goto unlock_out; - } - - ret = kx022a_turn_off_lock(data); - if (ret) - break; - - ret = regmap_update_bits(data->regmap, data->chip_info->cntl, - KX022A_MASK_GSEL, - n << KX022A_GSEL_SHIFT); - kx022a_turn_on_unlock(data); - break; - default: - ret = -EINVAL; - break; - } - -unlock_out: iio_device_release_direct_mode(idev); return ret; From 6c21fc09c3e60c018997c80331cdac5f1603558b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:32 +0000 Subject: [PATCH 0507/2157] iio: accel: kx022a: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Cc: Matti Vaittinen Reviewed-by: Matti Vaittinen Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-6-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kionix-kx022a.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index 727e007c5fc1..07dcf5f0599f 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -577,13 +577,12 @@ static int kx022a_write_raw(struct iio_dev *idev, * issues if users trust the watermark to be reached within known * time-limit). */ - ret = iio_device_claim_direct_mode(idev); - if (ret) - return ret; + if (!iio_device_claim_direct(idev)) + return -EBUSY; ret = __kx022a_write_raw(idev, chan, val, val2, mask); - iio_device_release_direct_mode(idev); + iio_device_release_direct(idev); return ret; } @@ -624,15 +623,14 @@ static int kx022a_read_raw(struct iio_dev *idev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(idev); - if (ret) - return ret; + if (!iio_device_claim_direct(idev)) + return -EBUSY; mutex_lock(&data->mutex); ret = kx022a_get_axis(data, chan, val); mutex_unlock(&data->mutex); - iio_device_release_direct_mode(idev); + iio_device_release_direct(idev); return ret; From 60a0cf2ebab92011055ab7db6553c0fc3c546938 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:33 +0000 Subject: [PATCH 0508/2157] iio: accel: msa311: Fix failure to release runtime pm if direct mode claim fails. Reorder the claiming of direct mode and runtime pm calls to simplify handling a little. For correct error handling, after the reorder iio_device_release_direct_mode() must be claimed in an error occurs in pm_runtime_resume_and_get() Fixes: 1ca2cfbc0c33 ("iio: add MEMSensing MSA311 3-axis accelerometer driver") Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-7-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/msa311.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/iio/accel/msa311.c b/drivers/iio/accel/msa311.c index fe4c32a9558a..7f4ab7cdbc4a 100644 --- a/drivers/iio/accel/msa311.c +++ b/drivers/iio/accel/msa311.c @@ -594,23 +594,25 @@ static int msa311_read_raw_data(struct iio_dev *indio_dev, __le16 axis; int err; - err = pm_runtime_resume_and_get(dev); - if (err) - return err; - err = iio_device_claim_direct_mode(indio_dev); if (err) return err; + err = pm_runtime_resume_and_get(dev); + if (err) { + iio_device_release_direct_mode(indio_dev); + return err; + } + mutex_lock(&msa311->lock); err = msa311_get_axis(msa311, chan, &axis); mutex_unlock(&msa311->lock); - iio_device_release_direct_mode(indio_dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + iio_device_release_direct_mode(indio_dev); + if (err) { dev_err(dev, "can't get axis %s (%pe)\n", chan->datasheet_name, ERR_PTR(err)); @@ -756,10 +758,6 @@ static int msa311_write_samp_freq(struct iio_dev *indio_dev, int val, int val2) unsigned int odr; int err; - err = pm_runtime_resume_and_get(dev); - if (err) - return err; - /* * Sampling frequency changing is prohibited when buffer mode is * enabled, because sometimes MSA311 chip returns outliers during @@ -769,6 +767,12 @@ static int msa311_write_samp_freq(struct iio_dev *indio_dev, int val, int val2) if (err) return err; + err = pm_runtime_resume_and_get(dev); + if (err) { + iio_device_release_direct_mode(indio_dev); + return err; + } + err = -EINVAL; for (odr = 0; odr < ARRAY_SIZE(msa311_odr_table); odr++) if (val == msa311_odr_table[odr].integral && @@ -779,11 +783,11 @@ static int msa311_write_samp_freq(struct iio_dev *indio_dev, int val, int val2) break; } - iio_device_release_direct_mode(indio_dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + iio_device_release_direct_mode(indio_dev); + if (err) dev_err(dev, "can't update frequency (%pe)\n", ERR_PTR(err)); From 18a53d40122df92933d57aba2ce114aab473f73e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:34 +0000 Subject: [PATCH 0509/2157] iio: accel: msa311: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-8-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/msa311.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/iio/accel/msa311.c b/drivers/iio/accel/msa311.c index 7f4ab7cdbc4a..d31c11fbbe68 100644 --- a/drivers/iio/accel/msa311.c +++ b/drivers/iio/accel/msa311.c @@ -594,13 +594,12 @@ static int msa311_read_raw_data(struct iio_dev *indio_dev, __le16 axis; int err; - err = iio_device_claim_direct_mode(indio_dev); - if (err) - return err; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; err = pm_runtime_resume_and_get(dev); if (err) { - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return err; } @@ -611,7 +610,7 @@ static int msa311_read_raw_data(struct iio_dev *indio_dev, pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (err) { dev_err(dev, "can't get axis %s (%pe)\n", @@ -763,13 +762,12 @@ static int msa311_write_samp_freq(struct iio_dev *indio_dev, int val, int val2) * enabled, because sometimes MSA311 chip returns outliers during * frequency values growing up in the read operation moment. */ - err = iio_device_claim_direct_mode(indio_dev); - if (err) - return err; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; err = pm_runtime_resume_and_get(dev); if (err) { - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return err; } @@ -786,7 +784,7 @@ static int msa311_write_samp_freq(struct iio_dev *indio_dev, int val, int val2) pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (err) dev_err(dev, "can't update frequency (%pe)\n", ERR_PTR(err)); From 3cb26cba4a603f6488314d7f9cc8e5d928aed128 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:01:35 +0000 Subject: [PATCH 0510/2157] iio: accel: Switch to sparse friendly iio_device_claim/release_direct() Single patch for the simple cases in accelerometer drivers. These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Cc: Antoniu Miclaus Reviewed-by: David Lechner Link: https://patch.msgid.link/20250217140135.896574-9-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl372.c | 7 +++---- drivers/iio/accel/adxl380.c | 7 +++---- drivers/iio/accel/bma180.c | 7 +++---- drivers/iio/accel/bmi088-accel-core.c | 7 ++++--- drivers/iio/accel/fxls8962af-core.c | 21 +++++++++------------ 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index 8ba5fbe6e1f5..961145b50293 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -763,12 +763,11 @@ static int adxl372_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = adxl372_read_axis(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/accel/adxl380.c b/drivers/iio/accel/adxl380.c index 90340f134722..0cf3c6815829 100644 --- a/drivers/iio/accel/adxl380.c +++ b/drivers/iio/accel/adxl380.c @@ -1175,12 +1175,11 @@ static int adxl380_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = adxl380_read_chn(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 128db14ba726..aa664a923f91 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -540,14 +540,13 @@ static int bma180_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&data->mutex); ret = bma180_get_data_reg(data, chan->scan_index); mutex_unlock(&data->mutex); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; if (chan->scan_type.sign == 's') { diff --git a/drivers/iio/accel/bmi088-accel-core.c b/drivers/iio/accel/bmi088-accel-core.c index 36e5d06ffd33..dea126f993c1 100644 --- a/drivers/iio/accel/bmi088-accel-core.c +++ b/drivers/iio/accel/bmi088-accel-core.c @@ -313,12 +313,13 @@ static int bmi088_accel_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) + if (!iio_device_claim_direct(indio_dev)) { + ret = -EBUSY; goto out_read_raw_pm_put; + } ret = bmi088_accel_get_axis(data, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (!ret) ret = IIO_VAL_INT; diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 987212a7c038..48e4282964a0 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -460,22 +460,20 @@ static int fxls8962af_write_raw(struct iio_dev *indio_dev, if (val != 0) return -EINVAL; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = fxls8962af_set_full_scale(data, val2); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SAMP_FREQ: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = fxls8962af_set_samp_freq(data, val, val2); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: return -EINVAL; @@ -683,14 +681,13 @@ fxls8962af_write_event_config(struct iio_dev *indio_dev, fxls8962af_active(data); ret = fxls8962af_power_on(data); } else { - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; /* Not in buffered mode so disable power */ ret = fxls8962af_power_off(data); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); } return ret; From 5f763d31a53643659499b5eba8e882900a0717b8 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Mon, 17 Feb 2025 16:03:34 +0200 Subject: [PATCH 0511/2157] dt-bindings: iio: light: al3010: add al3000a support AL3000a is an ambient light sensor quite closely related to exising AL3010 and can reuse exising schema for AL3010. Signed-off-by: Svyatoslav Ryhel Reviewed-by: David Heidelberg Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250217140336.107476-2-clamor95@gmail.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/light/dynaimage,al3010.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml b/Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml index a3a979553e32..f1048c30e73e 100644 --- a/Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml +++ b/Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml @@ -4,14 +4,16 @@ $id: http://devicetree.org/schemas/iio/light/dynaimage,al3010.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Dyna-Image AL3010 sensor +title: Dyna-Image AL3000a/AL3010 sensor maintainers: - David Heidelberg properties: compatible: - const: dynaimage,al3010 + enum: + - dynaimage,al3000a + - dynaimage,al3010 reg: maxItems: 1 From d531b9f78949533691bf72d95e3af14b657047d9 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Mon, 17 Feb 2025 16:03:35 +0200 Subject: [PATCH 0512/2157] iio: light: Add support for AL3000a illuminance sensor AL3000a is a simple I2C-based ambient light sensor, which is closely related to AL3010 and AL3320a, but has significantly different way of processing data generated by the sensor. Tested-by: Robert Eckelmann Tested-by: Antoni Aloy Torrens Signed-off-by: Svyatoslav Ryhel Reviewed-by: David Heidelberg Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250217140336.107476-3-clamor95@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/Kconfig | 10 ++ drivers/iio/light/Makefile | 1 + drivers/iio/light/al3000a.c | 209 ++++++++++++++++++++++++++++++++++++ 3 files changed, 220 insertions(+) create mode 100644 drivers/iio/light/al3000a.c diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 89aec08df739..4a7d983c9cd4 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -43,6 +43,16 @@ config ADUX1020 To compile this driver as a module, choose M here: the module will be called adux1020. +config AL3000A + tristate "AL3000a ambient light sensor" + depends on I2C + help + Say Y here if you want to build a driver for the Dyna Image AL3000a + ambient light sensor. + + To compile this driver as a module, choose M here: the + module will be called al3000a. + config AL3010 tristate "AL3010 ambient light sensor" depends on I2C diff --git a/drivers/iio/light/Makefile b/drivers/iio/light/Makefile index 30f5fe47cfb6..8229ebe6edc4 100644 --- a/drivers/iio/light/Makefile +++ b/drivers/iio/light/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_ACPI_ALS) += acpi-als.o obj-$(CONFIG_ADJD_S311) += adjd_s311.o obj-$(CONFIG_ADUX1020) += adux1020.o +obj-$(CONFIG_AL3000A) += al3000a.o obj-$(CONFIG_AL3010) += al3010.o obj-$(CONFIG_AL3320A) += al3320a.o obj-$(CONFIG_APDS9160) += apds9160.o diff --git a/drivers/iio/light/al3000a.c b/drivers/iio/light/al3000a.c new file mode 100644 index 000000000000..e2fbb1270040 --- /dev/null +++ b/drivers/iio/light/al3000a.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define AL3000A_REG_SYSTEM 0x00 +#define AL3000A_REG_DATA 0x05 + +#define AL3000A_CONFIG_ENABLE 0x00 +#define AL3000A_CONFIG_DISABLE 0x0b +#define AL3000A_CONFIG_RESET 0x0f +#define AL3000A_GAIN_MASK GENMASK(5, 0) + +/* + * These are pre-calculated lux values based on possible output of sensor + * (range 0x00 - 0x3F) + */ +static const u32 lux_table[] = { + 1, 1, 1, 2, 2, 2, 3, 4, /* 0 - 7 */ + 4, 5, 6, 7, 9, 11, 13, 16, /* 8 - 15 */ + 19, 22, 27, 32, 39, 46, 56, 67, /* 16 - 23 */ + 80, 96, 116, 139, 167, 200, 240, 289, /* 24 - 31 */ + 347, 416, 499, 600, 720, 864, 1037, 1245, /* 32 - 39 */ + 1495, 1795, 2155, 2587, 3105, 3728, 4475, 5373, /* 40 - 47 */ + 6450, 7743, 9296, 11160, 13397, 16084, 19309, 23180, /* 48 - 55 */ + 27828, 33408, 40107, 48148, 57803, 69393, 83306, 100000 /* 56 - 63 */ +}; + +static const struct regmap_config al3000a_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = AL3000A_REG_DATA, +}; + +struct al3000a_data { + struct regmap *regmap; + struct regulator *vdd_supply; +}; + +static const struct iio_chan_spec al3000a_channels[] = { + { + .type = IIO_LIGHT, + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), + }, +}; + +static int al3000a_set_pwr_on(struct al3000a_data *data) +{ + struct device *dev = regmap_get_device(data->regmap); + int ret; + + ret = regulator_enable(data->vdd_supply); + if (ret) { + dev_err(dev, "failed to enable vdd power supply\n"); + return ret; + } + + return regmap_write(data->regmap, AL3000A_REG_SYSTEM, AL3000A_CONFIG_ENABLE); +} + +static void al3000a_set_pwr_off(void *_data) +{ + struct al3000a_data *data = _data; + struct device *dev = regmap_get_device(data->regmap); + int ret; + + ret = regmap_write(data->regmap, AL3000A_REG_SYSTEM, AL3000A_CONFIG_DISABLE); + if (ret) + dev_err(dev, "failed to write system register\n"); + + ret = regulator_disable(data->vdd_supply); + if (ret) + dev_err(dev, "failed to disable vdd power supply\n"); +} + +static int al3000a_init(struct al3000a_data *data) +{ + int ret; + + ret = al3000a_set_pwr_on(data); + if (ret) + return ret; + + ret = regmap_write(data->regmap, AL3000A_REG_SYSTEM, AL3000A_CONFIG_RESET); + if (ret) + return ret; + + return regmap_write(data->regmap, AL3000A_REG_SYSTEM, AL3000A_CONFIG_ENABLE); +} + +static int al3000a_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) +{ + struct al3000a_data *data = iio_priv(indio_dev); + int ret, gain; + + switch (mask) { + case IIO_CHAN_INFO_PROCESSED: + ret = regmap_read(data->regmap, AL3000A_REG_DATA, &gain); + if (ret) + return ret; + + *val = lux_table[gain & AL3000A_GAIN_MASK]; + + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static const struct iio_info al3000a_info = { + .read_raw = al3000a_read_raw, +}; + +static int al3000a_probe(struct i2c_client *client) +{ + struct al3000a_data *data; + struct device *dev = &client->dev; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); + if (!indio_dev) + return -ENOMEM; + + data = iio_priv(indio_dev); + i2c_set_clientdata(client, indio_dev); + + data->regmap = devm_regmap_init_i2c(client, &al3000a_regmap_config); + if (IS_ERR(data->regmap)) + return dev_err_probe(dev, PTR_ERR(data->regmap), + "cannot allocate regmap\n"); + + data->vdd_supply = devm_regulator_get(dev, "vdd"); + if (IS_ERR(data->vdd_supply)) + return dev_err_probe(dev, PTR_ERR(data->vdd_supply), + "failed to get vdd regulator\n"); + + indio_dev->info = &al3000a_info; + indio_dev->name = "al3000a"; + indio_dev->channels = al3000a_channels; + indio_dev->num_channels = ARRAY_SIZE(al3000a_channels); + indio_dev->modes = INDIO_DIRECT_MODE; + + ret = al3000a_init(data); + if (ret) + return dev_err_probe(dev, ret, "failed to init ALS\n"); + + ret = devm_add_action_or_reset(dev, al3000a_set_pwr_off, data); + if (ret) + return dev_err_probe(dev, ret, "failed to add action\n"); + + return devm_iio_device_register(dev, indio_dev); +} + +static int al3000a_suspend(struct device *dev) +{ + struct al3000a_data *data = iio_priv(dev_get_drvdata(dev)); + + al3000a_set_pwr_off(data); + return 0; +} + +static int al3000a_resume(struct device *dev) +{ + struct al3000a_data *data = iio_priv(dev_get_drvdata(dev)); + + return al3000a_set_pwr_on(data); +} + +static DEFINE_SIMPLE_DEV_PM_OPS(al3000a_pm_ops, al3000a_suspend, al3000a_resume); + +static const struct i2c_device_id al3000a_id[] = { + { "al3000a" }, + { } +}; +MODULE_DEVICE_TABLE(i2c, al3000a_id); + +static const struct of_device_id al3000a_of_match[] = { + { .compatible = "dynaimage,al3000a" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, al3000a_of_match); + +static struct i2c_driver al3000a_driver = { + .driver = { + .name = "al3000a", + .of_match_table = al3000a_of_match, + .pm = pm_sleep_ptr(&al3000a_pm_ops), + }, + .probe = al3000a_probe, + .id_table = al3000a_id, +}; +module_i2c_driver(al3000a_driver); + +MODULE_AUTHOR("Svyatolsav Ryhel "); +MODULE_DESCRIPTION("al3000a Ambient Light Sensor driver"); +MODULE_LICENSE("GPL"); From 728341b0ca4dd2442e9f653cfb27b484fb665df7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:07 +0000 Subject: [PATCH 0513/2157] iio: adc: stm32-dfsdm: Factor out core of reading INFO_RAW This allows the claim on direct mode to be release in one place rather than a lot of error paths, simplifying code. Reviewed-by: Olivier Moysan Link: https://patch.msgid.link/20250217141630.897334-8-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 59 +++++++++++++++++++------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index fe11b0d8eab3..1cf2594d6872 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1306,6 +1306,38 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int __stm32_dfsdm_read_info_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int ret = 0; + + if (adc->hwc) + ret = iio_hw_consumer_enable(adc->hwc); + if (adc->backend) + ret = iio_backend_enable(adc->backend[chan->scan_index]); + if (ret < 0) { + dev_err(&indio_dev->dev, + "%s: IIO enable failed (channel %d)\n", + __func__, chan->channel); + return ret; + } + ret = stm32_dfsdm_single_conv(indio_dev, chan, val); + if (adc->hwc) + iio_hw_consumer_disable(adc->hwc); + if (adc->backend) + iio_backend_disable(adc->backend[chan->scan_index]); + if (ret < 0) { + dev_err(&indio_dev->dev, + "%s: Conversion failed (channel %d)\n", + __func__, chan->channel); + return ret; + } + + return 0; +} + static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -1326,30 +1358,11 @@ static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, ret = iio_device_claim_direct_mode(indio_dev); if (ret) return ret; - if (adc->hwc) - ret = iio_hw_consumer_enable(adc->hwc); - if (adc->backend) - ret = iio_backend_enable(adc->backend[idx]); - if (ret < 0) { - dev_err(&indio_dev->dev, - "%s: IIO enable failed (channel %d)\n", - __func__, chan->channel); - iio_device_release_direct_mode(indio_dev); - return ret; - } - ret = stm32_dfsdm_single_conv(indio_dev, chan, val); - if (adc->hwc) - iio_hw_consumer_disable(adc->hwc); - if (adc->backend) - iio_backend_disable(adc->backend[idx]); - if (ret < 0) { - dev_err(&indio_dev->dev, - "%s: Conversion failed (channel %d)\n", - __func__, chan->channel); - iio_device_release_direct_mode(indio_dev); - return ret; - } + + ret = __stm32_dfsdm_read_info_raw(indio_dev, chan, val); iio_device_release_direct_mode(indio_dev); + if (ret) + return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: From e493763302ce70bcb76d3882bfb57c2c299a79d8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:08 +0000 Subject: [PATCH 0514/2157] iio: adc: stm32-dfsdm: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Olivier Moysan Link: https://patch.msgid.link/20250217141630.897334-9-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 1cf2594d6872..726ddafc9f6d 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1275,9 +1275,8 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = stm32_dfsdm_compute_all_osrs(indio_dev, val); if (!ret) { @@ -1287,19 +1286,18 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, adc->oversamp = val; adc->sample_freq = spi_freq / val; } - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SAMP_FREQ: if (!val) return -EINVAL; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = dfsdm_adc_set_samp_freq(indio_dev, val, spi_freq); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } @@ -1355,12 +1353,11 @@ static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __stm32_dfsdm_read_info_raw(indio_dev, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; return IIO_VAL_INT; From 5ff6b02d9bb821e020910029857f150efeaa9d26 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:09 +0000 Subject: [PATCH 0515/2157] iio: adc: ad4030: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-10-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4030.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c index ab5497c8ea1e..209cfc2e1785 100644 --- a/drivers/iio/adc/ad4030.c +++ b/drivers/iio/adc/ad4030.c @@ -784,13 +784,12 @@ static int ad4030_read_raw(struct iio_dev *indio_dev, if (info == IIO_CHAN_INFO_SCALE) return ad4030_get_chan_scale(indio_dev, chan, val, val2); - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad4030_read_raw_dispatch(indio_dev, chan, val, val2, info); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } @@ -822,13 +821,12 @@ static int ad4030_write_raw(struct iio_dev *indio_dev, { int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad4030_write_raw_dispatch(indio_dev, chan, val, val2, info); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } @@ -839,16 +837,15 @@ static int ad4030_reg_access(struct iio_dev *indio_dev, unsigned int reg, const struct ad4030_state *st = iio_priv(indio_dev); int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; if (readval) ret = regmap_read(st->regmap, reg, readval); else ret = regmap_write(st->regmap, reg, writeval); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From 32143dbd9591b404121aaec38a3455a3287f073d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:10 +0000 Subject: [PATCH 0516/2157] iio: adc: ad7192: Factor out core of ad7192_write_raw() to simplify error handling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out everything under the lock, use guard() for the mutex to avoid need to manually unlock, and switch sense of matching checks in loops to reduce indent. There are some functional changes in here as well as the code no longer updates the filter_freq_available if no change has been made to the oversampling ratio. Cc: Alisa-Dariana Roman Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-11-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 129 ++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 63 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index e96a5ae92375..785429900da8 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -945,80 +946,82 @@ static int ad7192_read_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int __ad7192_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, + int val2, + long mask) +{ + struct ad7192_state *st = iio_priv(indio_dev); + int i, div; + unsigned int tmp; + + guard(mutex)(&st->lock); + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) { + if (val2 != st->scale_avail[i][1]) + continue; + + tmp = st->conf; + st->conf &= ~AD7192_CONF_GAIN_MASK; + st->conf |= FIELD_PREP(AD7192_CONF_GAIN_MASK, i); + if (tmp == st->conf) + return 0; + ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, st->conf); + ad7192_calibrate_all(st); + return 0; + } + return -EINVAL; + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) + return -EINVAL; + + div = st->fclk / (val * ad7192_get_f_order(st) * 1024); + if (div < 1 || div > 1023) + return -EINVAL; + + st->mode &= ~AD7192_MODE_RATE_MASK; + st->mode |= FIELD_PREP(AD7192_MODE_RATE_MASK, div); + ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); + ad7192_update_filter_freq_avail(st); + return 0; + case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + return ad7192_set_3db_filter_freq(st, val, val2 / 1000); + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + for (i = 0; i < ARRAY_SIZE(st->oversampling_ratio_avail); i++) { + if (val != st->oversampling_ratio_avail[i]) + continue; + + tmp = st->mode; + st->mode &= ~AD7192_MODE_AVG_MASK; + st->mode |= FIELD_PREP(AD7192_MODE_AVG_MASK, i); + if (tmp == st->mode) + return 0; + ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); + ad7192_update_filter_freq_avail(st); + return 0; + } + return -EINVAL; + default: + return -EINVAL; + } +} + static int ad7192_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { - struct ad7192_state *st = iio_priv(indio_dev); - int ret, i, div; - unsigned int tmp; + int ret; ret = iio_device_claim_direct_mode(indio_dev); if (ret) return ret; - mutex_lock(&st->lock); - - switch (mask) { - case IIO_CHAN_INFO_SCALE: - ret = -EINVAL; - for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) - if (val2 == st->scale_avail[i][1]) { - ret = 0; - tmp = st->conf; - st->conf &= ~AD7192_CONF_GAIN_MASK; - st->conf |= FIELD_PREP(AD7192_CONF_GAIN_MASK, i); - if (tmp == st->conf) - break; - ad_sd_write_reg(&st->sd, AD7192_REG_CONF, - 3, st->conf); - ad7192_calibrate_all(st); - break; - } - break; - case IIO_CHAN_INFO_SAMP_FREQ: - if (!val) { - ret = -EINVAL; - break; - } - - div = st->fclk / (val * ad7192_get_f_order(st) * 1024); - if (div < 1 || div > 1023) { - ret = -EINVAL; - break; - } - - st->mode &= ~AD7192_MODE_RATE_MASK; - st->mode |= FIELD_PREP(AD7192_MODE_RATE_MASK, div); - ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); - ad7192_update_filter_freq_avail(st); - break; - case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: - ret = ad7192_set_3db_filter_freq(st, val, val2 / 1000); - break; - case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - ret = -EINVAL; - for (i = 0; i < ARRAY_SIZE(st->oversampling_ratio_avail); i++) - if (val == st->oversampling_ratio_avail[i]) { - ret = 0; - tmp = st->mode; - st->mode &= ~AD7192_MODE_AVG_MASK; - st->mode |= FIELD_PREP(AD7192_MODE_AVG_MASK, i); - if (tmp == st->mode) - break; - ad_sd_write_reg(&st->sd, AD7192_REG_MODE, - 3, st->mode); - break; - } - ad7192_update_filter_freq_avail(st); - break; - default: - ret = -EINVAL; - } - - mutex_unlock(&st->lock); + ret = __ad7192_write_raw(indio_dev, chan, val, val2, mask); iio_device_release_direct_mode(indio_dev); From 4758f987d3a1b0e7f7b559fb864db02ae2474231 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:11 +0000 Subject: [PATCH 0517/2157] iio: adc: ad7192: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Cc: Alisa-Dariana Roman Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-12-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 785429900da8..f6150b905286 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -694,9 +694,8 @@ static ssize_t ad7192_set(struct device *dev, if (ret < 0) return ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; switch ((u32)this_attr->address) { case AD7192_REG_GPOCON: @@ -719,7 +718,7 @@ static ssize_t ad7192_set(struct device *dev, ret = -EINVAL; } - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret ? ret : len; } @@ -1017,13 +1016,12 @@ static int ad7192_write_raw(struct iio_dev *indio_dev, { int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __ad7192_write_raw(indio_dev, chan, val, val2, mask); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From 0af1c801a15225304a6328258efbf2bee245c654 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:12 +0000 Subject: [PATCH 0518/2157] iio: adc: ad7768-1: Move setting of val a bit later to avoid unnecessary return value check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data used is all in local variables so there is no advantage in setting *val = ret with the direct mode claim held. Move it later to after error check. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-13-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 113703fb7245..c2ba357b82d8 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -370,12 +370,11 @@ static int ad7768_read_raw(struct iio_dev *indio_dev, return ret; ret = ad7768_scan_direct(indio_dev); - if (ret >= 0) - *val = ret; iio_device_release_direct_mode(indio_dev); if (ret < 0) return ret; + *val = ret; return IIO_VAL_INT; From e76750f50aaae66817ffc92417a0ac11c57dede6 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:13 +0000 Subject: [PATCH 0519/2157] iio: adc: ad7768-1: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-14-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index c2ba357b82d8..ea829c51e80b 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -365,13 +365,12 @@ static int ad7768_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7768_scan_direct(indio_dev); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; *val = ret; From 15bed4f0cbf149232b0e17d64a178230b2f21be9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:14 +0000 Subject: [PATCH 0520/2157] iio: adc: ad7606: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. This driver got partly converted during removal of the _scoped form. However some more cases got added in parallel. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-15-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 87908cc51e48..f566248db70a 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -862,11 +862,10 @@ static int ad7606_write_raw(struct iio_dev *indio_dev, val = (val * MICRO) + val2; i = find_closest(val, scale_avail_uv, cs->num_scales); - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = st->write_scale(indio_dev, ch, i + cs->reg_offset); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; cs->range = i; @@ -878,11 +877,10 @@ static int ad7606_write_raw(struct iio_dev *indio_dev, i = find_closest(val, st->oversampling_avail, st->num_os_ratios); - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = st->write_os(indio_dev, i); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; st->oversampling = st->oversampling_avail[i]; From c05cbf02df282c1f6825ef99634fb7dcb2fc3da8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:15 +0000 Subject: [PATCH 0521/2157] iio: adc: ad7791: Factor out core of ad7791_write_raw() to simplify error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out everything under the direct mode claim allowing direct returns in error paths. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-16-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7791.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c index 76118fe22db8..e49d4843f304 100644 --- a/drivers/iio/adc/ad7791.c +++ b/drivers/iio/adc/ad7791.c @@ -310,15 +310,11 @@ static int ad7791_read_raw(struct iio_dev *indio_dev, return -EINVAL; } -static int ad7791_write_raw(struct iio_dev *indio_dev, +static int __ad7791_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct ad7791_state *st = iio_priv(indio_dev); - int ret, i; - - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + int i; switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: @@ -328,20 +324,30 @@ static int ad7791_write_raw(struct iio_dev *indio_dev, break; } - if (i == ARRAY_SIZE(ad7791_sample_freq_avail)) { - ret = -EINVAL; - break; - } + if (i == ARRAY_SIZE(ad7791_sample_freq_avail)) + return -EINVAL; st->filter &= ~AD7791_FILTER_RATE_MASK; st->filter |= i; ad_sd_write_reg(&st->sd, AD7791_REG_FILTER, sizeof(st->filter), st->filter); - break; + return 0; default: - ret = -EINVAL; + return -EINVAL; } +} + +static int ad7791_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, int val2, long mask) +{ + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = __ad7791_write_raw(indio_dev, chan, val, val2, mask); iio_device_release_direct_mode(indio_dev); return ret; From 85e840ced7a89cbd7c401f299eb337db5f41acd0 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:16 +0000 Subject: [PATCH 0522/2157] iio: adc: ad7791: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-17-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7791.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c index e49d4843f304..597c2686ffa4 100644 --- a/drivers/iio/adc/ad7791.c +++ b/drivers/iio/adc/ad7791.c @@ -343,13 +343,12 @@ static int ad7791_write_raw(struct iio_dev *indio_dev, { int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __ad7791_write_raw(indio_dev, chan, val, val2, mask); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From f1a4a2e6b85ef9c76868173b87b19e2fa3107cd1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:17 +0000 Subject: [PATCH 0523/2157] iio: adc: ad7793: Factor out core of ad7793_write_raw() to simplify error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out everything under the direct mode claim allowing direct returns in error paths. Switch sense of matching in the loop and use a continue to reduce code indent and improve readability. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-18-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7793.c | 75 +++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 1b50d9643a63..861a3e44e5ad 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -462,64 +462,69 @@ static int ad7793_read_raw(struct iio_dev *indio_dev, return -EINVAL; } -static int ad7793_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, - int val2, - long mask) +static int __ad7793_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) { struct ad7793_state *st = iio_priv(indio_dev); - int ret, i; + int i; unsigned int tmp; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - switch (mask) { case IIO_CHAN_INFO_SCALE: - ret = -EINVAL; - for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) - if (val2 == st->scale_avail[i][1]) { - ret = 0; - tmp = st->conf; - st->conf &= ~AD7793_CONF_GAIN(-1); - st->conf |= AD7793_CONF_GAIN(i); + for (i = 0; i < ARRAY_SIZE(st->scale_avail); i++) { + if (val2 != st->scale_avail[i][1]) + continue; - if (tmp == st->conf) - break; + tmp = st->conf; + st->conf &= ~AD7793_CONF_GAIN(-1); + st->conf |= AD7793_CONF_GAIN(i); - ad_sd_write_reg(&st->sd, AD7793_REG_CONF, - sizeof(st->conf), st->conf); - ad7793_calibrate_all(st); - break; - } - break; - case IIO_CHAN_INFO_SAMP_FREQ: - if (!val) { - ret = -EINVAL; - break; + if (tmp == st->conf) + return 0; + + ad_sd_write_reg(&st->sd, AD7793_REG_CONF, + sizeof(st->conf), st->conf); + ad7793_calibrate_all(st); + + return 0; } + return -EINVAL; + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) + return -EINVAL; for (i = 0; i < 16; i++) if (val == st->chip_info->sample_freq_avail[i]) break; - if (i == 16) { - ret = -EINVAL; - break; - } + if (i == 16) + return -EINVAL; st->mode &= ~AD7793_MODE_RATE(-1); st->mode |= AD7793_MODE_RATE(i); ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), st->mode); - break; + return 0; default: - ret = -EINVAL; + return -EINVAL; } +} + +static int ad7793_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int ret; + + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = __ad7793_write_raw(indio_dev, chan, val, val2, mask); iio_device_release_direct_mode(indio_dev); + return ret; } From 4d47f64ca82e0bfb9c2fffe37761685e7e09e13b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:18 +0000 Subject: [PATCH 0524/2157] iio: adc: ad7793: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-19-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7793.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 861a3e44e5ad..ccf18ce48e34 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -517,13 +517,12 @@ static int ad7793_write_raw(struct iio_dev *indio_dev, { int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __ad7793_write_raw(indio_dev, chan, val, val2, mask); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From bbd841f9d02c7a32243441e49496f0f915e536df Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:19 +0000 Subject: [PATCH 0525/2157] iio: adc: ad799x: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-20-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad799x.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index aa44b4e2542b..993f4651b73a 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -291,13 +291,12 @@ static int ad799x_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&st->lock); ret = ad799x_scan_direct(st, chan->scan_index); mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; @@ -411,9 +410,8 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev, struct ad799x_state *st = iio_priv(indio_dev); int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&st->lock); @@ -429,7 +427,7 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev, ret = ad799x_write_config(st, st->config); mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } From 6eedf172d964be30364c3d8ac30751a6ce4815d8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:20 +0000 Subject: [PATCH 0526/2157] iio: adc: ad_sigma_delta: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. This is a complex function with a lot more than the release of direct mode to unwind on error. As such no attempt made to factor out the inner code. Cc: Uwe Kleine-König Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-21-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 10e635fc4fa4..5907c35b98e5 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -386,9 +386,8 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, unsigned int data_reg; int ret = 0; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ad_sigma_delta_set_channel(sigma_delta, chan->address); @@ -431,7 +430,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, sigma_delta->keep_cs_asserted = false; sigma_delta->bus_locked = false; spi_bus_unlock(sigma_delta->spi->controller); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; From b3179f59c104484896516fddb119116e02fa287d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:21 +0000 Subject: [PATCH 0527/2157] iio: adc: at91-sama5d2: Move claim of direct mode up a level and use guard() Move iio_device_claim_direct_mode() up one layer in the call stack, and use guard() for scope based unlocking to simplify error handling by allowing direct returns. Reviewed-by: Claudiu Beznea Link: https://patch.msgid.link/20250217141630.897334-22-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 8e5aaf15a921..70d3dbb39b25 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -1814,19 +1815,10 @@ static int at91_adc_read_info_locked(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val) { struct at91_adc_state *st = iio_priv(indio_dev); - int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + guard(mutex)(&st->lock); - mutex_lock(&st->lock); - ret = at91_adc_read_info_raw(indio_dev, chan, val); - mutex_unlock(&st->lock); - - iio_device_release_direct_mode(indio_dev); - - return ret; + return at91_adc_read_info_raw(indio_dev, chan, val); } static void at91_adc_temp_sensor_configure(struct at91_adc_state *st, @@ -1871,14 +1863,11 @@ static int at91_adc_read_temp(struct iio_dev *indio_dev, u32 tmp; int ret, vbg, vtemp; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - mutex_lock(&st->lock); + guard(mutex)(&st->lock); ret = pm_runtime_resume_and_get(st->dev); if (ret < 0) - goto unlock; + return ret; at91_adc_temp_sensor_configure(st, true); @@ -1900,9 +1889,6 @@ static int at91_adc_read_temp(struct iio_dev *indio_dev, at91_adc_temp_sensor_configure(st, false); pm_runtime_mark_last_busy(st->dev); pm_runtime_put_autosuspend(st->dev); -unlock: - mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); if (ret < 0) return ret; @@ -1924,10 +1910,17 @@ static int at91_adc_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct at91_adc_state *st = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: - return at91_adc_read_info_locked(indio_dev, chan, val); + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = at91_adc_read_info_locked(indio_dev, chan, val); + iio_device_release_direct_mode(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_uv / 1000; @@ -1939,7 +1932,14 @@ static int at91_adc_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_PROCESSED: if (chan->type != IIO_TEMP) return -EINVAL; - return at91_adc_read_temp(indio_dev, chan, val); + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + + ret = at91_adc_read_temp(indio_dev, chan, val); + iio_device_release_direct_mode(indio_dev); + + return ret; case IIO_CHAN_INFO_SAMP_FREQ: *val = at91_adc_get_sample_freq(st); From 173386d036336e8e75356ec89673a3f468f60b16 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:22 +0000 Subject: [PATCH 0528/2157] iio: adc: at91-sama5d2: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Claudiu Beznea Link: https://patch.msgid.link/20250217141630.897334-23-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 70d3dbb39b25..694ff96fc913 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1914,12 +1914,11 @@ static int at91_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = at91_adc_read_info_locked(indio_dev, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SCALE: @@ -1932,12 +1931,11 @@ static int at91_adc_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_PROCESSED: if (chan->type != IIO_TEMP) return -EINVAL; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = at91_adc_read_temp(indio_dev, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; @@ -1967,28 +1965,26 @@ static int at91_adc_write_raw(struct iio_dev *indio_dev, if (val == st->oversampling_ratio) return 0; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&st->lock); /* update ratio */ ret = at91_adc_config_emr(st, val, 0); mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SAMP_FREQ: if (val < st->soc_info.min_sample_rate || val > st->soc_info.max_sample_rate) return -EINVAL; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&st->lock); at91_adc_setup_samp_freq(indio_dev, val, st->soc_info.startup_time, 0); mutex_unlock(&st->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return 0; default: return -EINVAL; From 2ce5314c689cd1541b9e96aceed98708309c49c8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:23 +0000 Subject: [PATCH 0529/2157] iio: adc: max1027: Move claim of direct mode up one level and use guard() Move iio_device_claim_direct_mode() into the read_raw() callback and use guard() to release a mutex. This enables simpler error handling via direct returns. Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20250217141630.897334-24-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1027.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index f5ba4a1b5a7d..3cab89464fb5 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -336,10 +336,6 @@ static int max1027_read_single_value(struct iio_dev *indio_dev, int ret; struct max1027_state *st = iio_priv(indio_dev); - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - /* Configure conversion register with the requested chan */ st->reg = MAX1027_CONV_REG | MAX1027_CHAN(chan->channel) | MAX1027_NOSCAN; @@ -349,7 +345,7 @@ static int max1027_read_single_value(struct iio_dev *indio_dev, if (ret < 0) { dev_err(&indio_dev->dev, "Failed to configure conversion register\n"); - goto release; + return ret; } /* @@ -359,14 +355,10 @@ static int max1027_read_single_value(struct iio_dev *indio_dev, */ ret = max1027_wait_eoc(indio_dev); if (ret) - goto release; + return ret; /* Read result */ ret = spi_read(st->spi, st->buffer, (chan->type == IIO_TEMP) ? 4 : 2); - -release: - iio_device_release_direct_mode(indio_dev); - if (ret < 0) return ret; @@ -382,37 +374,33 @@ static int max1027_read_raw(struct iio_dev *indio_dev, int ret = 0; struct max1027_state *st = iio_priv(indio_dev); - mutex_lock(&st->lock); + guard(mutex)(&st->lock); switch (mask) { case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret) + return ret; + ret = max1027_read_single_value(indio_dev, chan, val); - break; + iio_device_release_direct_mode(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_TEMP: *val = 1; *val2 = 8; - ret = IIO_VAL_FRACTIONAL; - break; + return IIO_VAL_FRACTIONAL; case IIO_VOLTAGE: *val = 2500; *val2 = chan->scan_type.realbits; - ret = IIO_VAL_FRACTIONAL_LOG2; - break; + return IIO_VAL_FRACTIONAL_LOG2; default: - ret = -EINVAL; - break; + return -EINVAL; } - break; default: - ret = -EINVAL; - break; + return -EINVAL; } - - mutex_unlock(&st->lock); - - return ret; } static int max1027_debugfs_reg_access(struct iio_dev *indio_dev, From 71dfa35f7bee255d041d234603fb7afe8c0ddcb4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:24 +0000 Subject: [PATCH 0530/2157] iio: adc: max1027: Switch to sparse friendly iio_device_claim/release_direct() These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Cc: Miquel Raynal Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20250217141630.897334-25-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max1027.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index 3cab89464fb5..7e736e77d8bb 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -378,12 +378,11 @@ static int max1027_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = max1027_read_single_value(indio_dev, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SCALE: switch (chan->type) { From 386c1d6e41cf2d6071b4d10500b34c89a5439380 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:25 +0000 Subject: [PATCH 0531/2157] iio: adc: max11410: Factor out writing of sampling frequency to simplify errro paths. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce __max11410_write_samp_freq() helper and use guard() to avoid need for manual unlock of the mutex. This allows direct returns and simplifies the resulting error handling. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-26-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max11410.c | 51 ++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c index 76abafd47404..6e06c62715a8 100644 --- a/drivers/iio/adc/max11410.c +++ b/drivers/iio/adc/max11410.c @@ -507,12 +507,37 @@ static int max11410_read_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int __max11410_write_samp_freq(struct max11410_state *st, + int val, int val2) +{ + int ret, i, reg_val, filter; + + guard(mutex)(&st->lock); + + ret = regmap_read(st->regmap, MAX11410_REG_FILTER, ®_val); + if (ret) + return ret; + + filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val); + + for (i = 0; i < max11410_sampling_len[filter]; ++i) { + if (val == max11410_sampling_rates[filter][i][0] && + val2 == max11410_sampling_rates[filter][i][1]) + break; + } + if (i == max11410_sampling_len[filter]) + return -EINVAL; + + return regmap_write_bits(st->regmap, MAX11410_REG_FILTER, + MAX11410_FILTER_RATE_MASK, i); +} + static int max11410_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long mask) { struct max11410_state *st = iio_priv(indio_dev); - int i, ret, reg_val, filter, gain; + int ret, gain; u32 *scale_avail; switch (mask) { @@ -544,29 +569,7 @@ static int max11410_write_raw(struct iio_dev *indio_dev, if (ret) return ret; - mutex_lock(&st->lock); - - ret = regmap_read(st->regmap, MAX11410_REG_FILTER, ®_val); - if (ret) - goto out; - - filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val); - - for (i = 0; i < max11410_sampling_len[filter]; ++i) { - if (val == max11410_sampling_rates[filter][i][0] && - val2 == max11410_sampling_rates[filter][i][1]) - break; - } - if (i == max11410_sampling_len[filter]) { - ret = -EINVAL; - goto out; - } - - ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER, - MAX11410_FILTER_RATE_MASK, i); - -out: - mutex_unlock(&st->lock); + ret = __max11410_write_samp_freq(st, val, val2); iio_device_release_direct_mode(indio_dev); return ret; From 01528347680ffdbbd5aeb42726359320fdc6e464 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:26 +0000 Subject: [PATCH 0532/2157] iio: adc: max11410: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-27-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max11410.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c index 6e06c62715a8..437d9f24b5a1 100644 --- a/drivers/iio/adc/max11410.c +++ b/drivers/iio/adc/max11410.c @@ -471,9 +471,8 @@ static int max11410_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&state->lock); @@ -481,7 +480,7 @@ static int max11410_read_raw(struct iio_dev *indio_dev, mutex_unlock(&state->lock); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; @@ -550,9 +549,8 @@ static int max11410_write_raw(struct iio_dev *indio_dev, if (val != 0 || val2 == 0) return -EINVAL; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; /* Convert from INT_PLUS_MICRO to FRACTIONAL_LOG2 */ val2 = val2 * DIV_ROUND_CLOSEST(BIT(24), 1000000); @@ -561,16 +559,15 @@ static int max11410_write_raw(struct iio_dev *indio_dev, st->channels[chan->address].gain = clamp_val(gain, 0, 7); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return 0; case IIO_CHAN_INFO_SAMP_FREQ: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = __max11410_write_samp_freq(st, val, val2); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: From 1a21a9842e3b2e923c69cdccb3260a6d6b2494da Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 17 Feb 2025 14:16:29 +0000 Subject: [PATCH 0533/2157] iio: adc: Switch to sparse friendly iio_device_claim/release_direct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single patch for all the relatively simple cases. These new functions allow sparse to find failures to release direct mode reducing chances of bugs over the claim_direct_mode() functions that are deprecated. Cc: Olivier Moysan Cc: Phil Reid Reviewed-by: Mike Looijmans Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250217141630.897334-30-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 9 +++--- drivers/iio/adc/ad7266.c | 7 ++--- drivers/iio/adc/ad7298.c | 7 ++--- drivers/iio/adc/ad7380.c | 56 ++++++++++++++------------------- drivers/iio/adc/ad7476.c | 7 ++--- drivers/iio/adc/ad7887.c | 7 ++--- drivers/iio/adc/ad7923.c | 7 ++--- drivers/iio/adc/ad7944.c | 7 ++--- drivers/iio/adc/dln2-adc.c | 7 ++--- drivers/iio/adc/stm32-adc.c | 7 ++--- drivers/iio/adc/ti-adc084s021.c | 9 +++--- drivers/iio/adc/ti-adc108s102.c | 7 ++--- drivers/iio/adc/ti-ads1298.c | 7 ++--- drivers/iio/adc/ti-ads131e08.c | 14 ++++----- drivers/iio/adc/ti-tlc4541.c | 7 ++--- 15 files changed, 71 insertions(+), 94 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 54f9cc5a89f5..ca2b41b16cc9 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -1157,11 +1157,10 @@ static int ad7173_write_raw(struct iio_dev *indio_dev, struct ad7173_state *st = iio_priv(indio_dev); struct ad7173_channel_config *cfg; unsigned int freq, i; - int ret; + int ret = 0; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; switch (info) { /* @@ -1195,7 +1194,7 @@ static int ad7173_write_raw(struct iio_dev *indio_dev, break; } - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 858c8be2ff1a..18559757f908 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -153,11 +153,10 @@ static int ad7266_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7266_read_single(st, val, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7298.c b/drivers/iio/adc/ad7298.c index b35bd4d9ef81..28b88092b4aa 100644 --- a/drivers/iio/adc/ad7298.c +++ b/drivers/iio/adc/ad7298.c @@ -232,16 +232,15 @@ static int ad7298_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; if (chan->address == AD7298_CH_TEMP) ret = ad7298_scan_temp(st, val); else ret = ad7298_scan_direct(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index a18dcd664c1b..f232ad1a4963 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -716,16 +716,15 @@ static int ad7380_debugfs_reg_access(struct iio_dev *indio_dev, u32 reg, struct ad7380_state *st = iio_priv(indio_dev); int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; if (readval) ret = regmap_read(st->regmap, reg, readval); else ret = regmap_write(st->regmap, reg, writeval); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } @@ -1018,14 +1017,13 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7380_read_direct(st, chan->scan_index, scan_type, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SCALE: @@ -1056,13 +1054,12 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7380_get_osr(st, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; @@ -1155,13 +1152,12 @@ static int ad7380_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_OVERSAMPLING_RATIO: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7380_set_oversampling_ratio(st, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: @@ -1186,13 +1182,12 @@ static int ad7380_read_event_config(struct iio_dev *indio_dev, struct ad7380_state *st = iio_priv(indio_dev); int tmp, ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = regmap_read(st->regmap, AD7380_REG_ADDR_CONFIG1, &tmp); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; @@ -1209,16 +1204,15 @@ static int ad7380_write_event_config(struct iio_dev *indio_dev, struct ad7380_state *st = iio_priv(indio_dev); int ret; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = regmap_update_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, AD7380_CONFIG1_ALERTEN, FIELD_PREP(AD7380_CONFIG1_ALERTEN, state)); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } @@ -1265,13 +1259,12 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, switch (info) { case IIO_EV_INFO_VALUE: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7380_get_alert_th(st, dir, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: return -EINVAL; @@ -1332,13 +1325,12 @@ static int ad7380_write_event_value(struct iio_dev *indio_dev, switch (info) { case IIO_EV_INFO_VALUE: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7380_set_alert_th(indio_dev, chan, dir, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: return -EINVAL; diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c index aeb8e383fe71..37b0515cf4fc 100644 --- a/drivers/iio/adc/ad7476.c +++ b/drivers/iio/adc/ad7476.c @@ -138,11 +138,10 @@ static int ad7476_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7476_scan_direct(st); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c index 69add1dc4b53..87ff95643794 100644 --- a/drivers/iio/adc/ad7887.c +++ b/drivers/iio/adc/ad7887.c @@ -152,11 +152,10 @@ static int ad7887_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7887_scan_direct(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c index acc44cb34f82..87945efb940b 100644 --- a/drivers/iio/adc/ad7923.c +++ b/drivers/iio/adc/ad7923.c @@ -260,11 +260,10 @@ static int ad7923_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7923_scan_direct(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7944.c b/drivers/iio/adc/ad7944.c index 9a7825ea5087..2f949fe55873 100644 --- a/drivers/iio/adc/ad7944.c +++ b/drivers/iio/adc/ad7944.c @@ -407,12 +407,11 @@ static int ad7944_read_raw(struct iio_dev *indio_dev, switch (info) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ad7944_single_conversion(adc, chan, val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SCALE: diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 221a5fdc1eaa..a1e48a756a7b 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -314,15 +314,14 @@ static int dln2_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; mutex_lock(&dln2->mutex); ret = dln2_adc_read(dln2, chan->channel); mutex_unlock(&dln2->mutex); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 9d3b23efcc06..5dbf5f136768 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1471,9 +1471,8 @@ static int stm32_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: case IIO_CHAN_INFO_PROCESSED: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; if (chan->type == IIO_VOLTAGE) ret = stm32_adc_single_conv(indio_dev, chan, val); else @@ -1482,7 +1481,7 @@ static int stm32_adc_read_raw(struct iio_dev *indio_dev, if (mask == IIO_CHAN_INFO_PROCESSED) *val = STM32_ADC_VREFINT_VOLTAGE * adc->vrefint.vrefint_cal / *val; - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; case IIO_CHAN_INFO_SCALE: diff --git a/drivers/iio/adc/ti-adc084s021.c b/drivers/iio/adc/ti-adc084s021.c index da16876c32ae..9c845ee01697 100644 --- a/drivers/iio/adc/ti-adc084s021.c +++ b/drivers/iio/adc/ti-adc084s021.c @@ -96,19 +96,18 @@ static int adc084s021_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = regulator_enable(adc->reg); if (ret) { - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; } adc->tx_buf[0] = channel->channel << 3; ret = adc084s021_adc_conversion(adc, &be_val); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); regulator_disable(adc->reg); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ti-adc108s102.c b/drivers/iio/adc/ti-adc108s102.c index 9758ac801310..7d615e2bbf39 100644 --- a/drivers/iio/adc/ti-adc108s102.c +++ b/drivers/iio/adc/ti-adc108s102.c @@ -181,13 +181,12 @@ static int adc108s102_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = adc108s102_scan_direct(st, chan->address); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ti-ads1298.c b/drivers/iio/adc/ti-ads1298.c index 03f762415fa5..ae30b47e4514 100644 --- a/drivers/iio/adc/ti-ads1298.c +++ b/drivers/iio/adc/ti-ads1298.c @@ -319,13 +319,12 @@ static int ads1298_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ads1298_read_one(priv, chan->scan_index); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 91a79ebc4bde..c6096b64664e 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -505,12 +505,11 @@ static int ads131e08_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ads131e08_read_direct(indio_dev, channel, value); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret) return ret; @@ -551,12 +550,11 @@ static int ads131e08_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = ads131e08_set_data_rate(st, value); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); return ret; default: diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c index 08de997584fd..5a138be983ed 100644 --- a/drivers/iio/adc/ti-tlc4541.c +++ b/drivers/iio/adc/ti-tlc4541.c @@ -131,11 +131,10 @@ static int tlc4541_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = spi_sync(st->spi, &st->scan_single_msg); - iio_device_release_direct_mode(indio_dev); + iio_device_release_direct(indio_dev); if (ret < 0) return ret; *val = be16_to_cpu(st->rx_buf[0]); From 035b4989211dc1c8626e186d655ae8ca5141bb73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 18 Feb 2025 10:31:25 +0000 Subject: [PATCH 0534/2157] iio: backend: make sure to NULL terminate stack buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to NULL terminate the buffer in iio_backend_debugfs_write_reg() before passing it to sscanf(). It is a stack variable so we should not assume it will 0 initialized. Fixes: cdf01e0809a4 ("iio: backend: add debugFs interface") Signed-off-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250218-dev-iio-misc-v1-1-bf72b20a1eb8@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-backend.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c index d4ad36f54090..a43c8d1bb3d0 100644 --- a/drivers/iio/industrialio-backend.c +++ b/drivers/iio/industrialio-backend.c @@ -155,10 +155,12 @@ static ssize_t iio_backend_debugfs_write_reg(struct file *file, ssize_t rc; int ret; - rc = simple_write_to_buffer(buf, sizeof(buf), ppos, userbuf, count); + rc = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf, count); if (rc < 0) return rc; + buf[count] = '\0'; + ret = sscanf(buf, "%i %i", &back->cached_reg_addr, &val); switch (ret) { From 6d5dd486c715908b5a6ed02315a15ff044a91025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 18 Feb 2025 10:31:26 +0000 Subject: [PATCH 0535/2157] iio: core: make use of simple_write_to_buffer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of open coding (kind of) simple_write_to_buffer(), use it. While at it, use ascii representation to terminate the string as that is the more common way of doing it. Signed-off-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250218-dev-iio-misc-v1-2-bf72b20a1eb8@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index a2117ad1337d..b9f4113ae5fc 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -410,11 +410,12 @@ static ssize_t iio_debugfs_write_reg(struct file *file, char buf[80]; int ret; - count = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf, + count); + if (ret < 0) + return ret; - buf[count] = 0; + buf[count] = '\0'; ret = sscanf(buf, "%i %i", ®, &val); From d477cda71a3a502113b81e9d1f07948624a2f8a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 18 Feb 2025 10:34:57 +0000 Subject: [PATCH 0536/2157] iio: adc: adi-axi-adc: replace of.h with mod_devicetable.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't use of.h in order to include mod_devicetable.h. Use it directly as there no direct dependency on OF. Signed-off-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250218-dev-axi-adc-fix-headers-v1-1-5ddc79221d8c@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/adi-axi-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 61ab7dce43be..cf942c043457 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include #include #include From cafeb8a99746e218035ad000d28deeca2bad9f9c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 18 Feb 2025 17:17:45 -0600 Subject: [PATCH 0537/2157] iio: adc: ad4695: fix out of bounds array access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix some out of bounds array access of st->channels_cfg in the ad4695 driver. This array only has elements for voltage channels, but it was also being accessed for the temperature channel in a few cases causing reading past the end of the array. In some cases, this was harmless because the value was read but not used. However, the in_temp_sampling_frequency attribute shares code with the in_voltageY_sampling_frequency attributes and was trying to read the oversampling ratio from the st->channels_cfg array. This resulted in a garbage value being used in the calculation and the resulting in_temp_sampling_frequency value was incorrect. To fix, make sure we always check that we are dealing with a voltage channel before accessing the st->channels_cfg array and use an oversampling ratio of 1 for the temperature channel (multiplicative identity value) since that channel doesn't support oversampling. Fixes: 67d63185db79 ("iio: adc: ad4695: add offload-based oversampling support") Signed-off-by: David Lechner Reviewed-by: Trevor Gamblin Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250218-iio-adc-ad4695-fix-out-of-bounds-array-access-v1-1-57fef8c7a3fd@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 9dbf326b6273..d42dea494302 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -1054,12 +1054,14 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, { struct ad4695_state *st = iio_priv(indio_dev); const struct iio_scan_type *scan_type; - struct ad4695_channel_config *cfg = &st->channels_cfg[chan->scan_index]; - unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; + struct ad4695_channel_config *cfg; unsigned int reg_val; int ret, tmp; u8 realbits; + if (chan->type == IIO_VOLTAGE) + cfg = &st->channels_cfg[chan->scan_index]; + scan_type = iio_get_current_scan_type(indio_dev, chan); if (IS_ERR(scan_type)) return PTR_ERR(scan_type); @@ -1140,7 +1142,7 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, tmp = sign_extend32(reg_val, 15); - switch (osr) { + switch (cfg->oversampling_ratio) { case 1: *val = tmp / 4; *val2 = abs(tmp) % 4 * MICRO / 4; @@ -1180,6 +1182,10 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_SAMP_FREQ: { struct pwm_state state; + unsigned int osr = 1; + + if (chan->type == IIO_VOLTAGE) + osr = cfg->oversampling_ratio; ret = pwm_get_state_hw(st->cnv_pwm, &state); if (ret) @@ -1272,7 +1278,10 @@ static int __ad4695_write_raw(struct iio_dev *indio_dev, { struct ad4695_state *st = iio_priv(indio_dev); unsigned int reg_val; - unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; + unsigned int osr = 1; + + if (chan->type == IIO_VOLTAGE) + osr = st->channels_cfg[chan->scan_index].oversampling_ratio; switch (mask) { case IIO_CHAN_INFO_CALIBSCALE: @@ -1383,7 +1392,10 @@ static int ad4695_read_avail(struct iio_dev *indio_dev, }, }; struct ad4695_state *st = iio_priv(indio_dev); - unsigned int osr = st->channels_cfg[chan->scan_index].oversampling_ratio; + unsigned int osr = 1; + + if (chan->type == IIO_VOLTAGE) + osr = st->channels_cfg[chan->scan_index].oversampling_ratio; switch (mask) { case IIO_CHAN_INFO_CALIBSCALE: @@ -1738,7 +1750,7 @@ static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, for (i = 0; i < indio_dev->num_channels; i++) { struct iio_chan_spec *chan = &st->iio_chan[i]; - struct ad4695_channel_config *cfg = &st->channels_cfg[i]; + struct ad4695_channel_config *cfg; /* * NB: When using offload support, all channels need to have the @@ -1759,6 +1771,8 @@ static int ad4695_probe_spi_offload(struct iio_dev *indio_dev, if (chan->type != IIO_VOLTAGE) continue; + cfg = &st->channels_cfg[i]; + chan->info_mask_separate |= BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); chan->info_mask_separate_available |= BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); From 38f898e0b54f5f1e7db92bec755fb55115d43de8 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 18 Feb 2025 17:17:46 -0600 Subject: [PATCH 0538/2157] iio: adc: ad4695: simplify getting oversampling_ratio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have a local variable that holds a pointer to st->channels_cfg[chan->scan_index]. Use that to simplify the code. Signed-off-by: David Lechner Reviewed-by: Trevor Gamblin Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250218-iio-adc-ad4695-fix-out-of-bounds-array-access-v1-2-57fef8c7a3fd@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index d42dea494302..8222c8ab2940 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -1175,7 +1175,7 @@ static int ad4695_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OVERSAMPLING_RATIO: switch (chan->type) { case IIO_VOLTAGE: - *val = st->channels_cfg[chan->scan_index].oversampling_ratio; + *val = cfg->oversampling_ratio; return IIO_VAL_INT; default: return -EINVAL; From b543d881e89cadf7f5238fd202d900af3e6482ea Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 21 Feb 2025 19:46:58 +0000 Subject: [PATCH 0539/2157] docs: iio: fix wrong driver name in documentation The ADXL380/382 documentation uses in one place a wrong driver name. Adds no functional change. Signed-off-by: Lothar Rubusch Link: https://patch.msgid.link/20250221194658.41358-1-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- Documentation/iio/adxl380.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/iio/adxl380.rst b/Documentation/iio/adxl380.rst index 376dee5fe1dd..66c8a4d4f767 100644 --- a/Documentation/iio/adxl380.rst +++ b/Documentation/iio/adxl380.rst @@ -94,7 +94,7 @@ apply the following formula: Where _offset and _scale are device attributes. If no _offset attribute is present, simply assume its value is 0. -The adis16475 driver offers data for 2 types of channels, the table below shows +The ADXL380 driver offers data for 2 types of channels, the table below shows the measurement units for the processed value, which are defined by the IIO framework: From 583350c1d4d0a9bc63f3e9a4fc662d0d22007bf3 Mon Sep 17 00:00:00 2001 From: Jun Yan Date: Fri, 21 Feb 2025 00:50:01 +0800 Subject: [PATCH 0540/2157] iio: gyro: bmg160_i2c: add BMI088 to of_match_table BMI088 is missing from the of_match_table. Let's complete it. Signed-off-by: Jun Yan Link: https://patch.msgid.link/20250220165001.273325-3-jerrysteve1101@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/gyro/bmg160_i2c.c b/drivers/iio/gyro/bmg160_i2c.c index 9c5d7e8ee99c..e6caab49f98a 100644 --- a/drivers/iio/gyro/bmg160_i2c.c +++ b/drivers/iio/gyro/bmg160_i2c.c @@ -58,6 +58,7 @@ MODULE_DEVICE_TABLE(i2c, bmg160_i2c_id); static const struct of_device_id bmg160_of_match[] = { { .compatible = "bosch,bmg160" }, { .compatible = "bosch,bmi055_gyro" }, + { .compatible = "bosch,bmi088_gyro" }, { } }; From 25331775b5b4ed45efb37c7e25adad2e8abef948 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 19 Feb 2025 19:57:43 +0100 Subject: [PATCH 0541/2157] iio: dac: adi-axi-dac: add io_mode check Add safe check to the high bound of the enum values, Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250219-wip-bl-axi-dac-add-enum-check-v1-1-8de9db0b3b1b@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/adi-axi-dac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index 009fb987e1af..892d770aec69 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -728,6 +728,9 @@ static int axi_dac_bus_set_io_mode(struct iio_backend *back, struct axi_dac_state *st = iio_backend_get_priv(back); int ival, ret; + if (mode > AD3552R_IO_MODE_QSPI) + return -EINVAL; + guard(mutex)(&st->lock); ret = regmap_update_bits(st->regmap, AXI_DAC_CUSTOM_CTRL_REG, From 4e0bd62e80761a0ead3a0b6ac6520fca1a38d678 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Wed, 19 Feb 2025 20:54:45 -0300 Subject: [PATCH 0542/2157] iio: imu: bmi270: move private struct declaration to source file The device's private data struct is currently declared in the header file, but it does not need to be exposed there. Move it to the driver's core source file to avoid unnecessary #include directives or forward declarations in the header. Signed-off-by: Gustavo Silva Acked-by: Alex Lanzano Link: https://patch.msgid.link/20250219-bmi270-irq-v1-1-145d02bbca3b@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270.h | 17 +---------------- drivers/iio/imu/bmi270/bmi270_core.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270.h b/drivers/iio/imu/bmi270/bmi270.h index fdfad5784cc5..d94525f6aee8 100644 --- a/drivers/iio/imu/bmi270/bmi270.h +++ b/drivers/iio/imu/bmi270/bmi270.h @@ -6,22 +6,6 @@ #include #include -struct device; -struct bmi270_data { - struct device *dev; - struct regmap *regmap; - const struct bmi270_chip_info *chip_info; - - /* - * Where IIO_DMA_MINALIGN may be larger than 8 bytes, align to - * that to ensure a DMA safe buffer. - */ - struct { - __le16 channels[6]; - aligned_s64 timestamp; - } data __aligned(IIO_DMA_MINALIGN); -}; - struct bmi270_chip_info { const char *name; int chip_id; @@ -32,6 +16,7 @@ extern const struct regmap_config bmi270_regmap_config; extern const struct bmi270_chip_info bmi260_chip_info; extern const struct bmi270_chip_info bmi270_chip_info; +struct device; int bmi270_core_probe(struct device *dev, struct regmap *regmap, const struct bmi270_chip_info *chip_info); diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index 464dcdd657c4..9f24d4044ed6 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -78,6 +78,21 @@ #define BMI260_INIT_DATA_FILE "bmi260-init-data.fw" #define BMI270_INIT_DATA_FILE "bmi270-init-data.fw" +struct bmi270_data { + struct device *dev; + struct regmap *regmap; + const struct bmi270_chip_info *chip_info; + + /* + * Where IIO_DMA_MINALIGN may be larger than 8 bytes, align to + * that to ensure a DMA safe buffer. + */ + struct { + __le16 channels[6]; + aligned_s64 timestamp; + } data __aligned(IIO_DMA_MINALIGN); +}; + enum bmi270_scan { BMI270_SCAN_ACCEL_X, BMI270_SCAN_ACCEL_Y, From 8c53df1499ca8d1b44ad9423ebeeecc11014ea95 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 28 Feb 2025 15:14:03 +0200 Subject: [PATCH 0543/2157] iio: adc: Include cleanup.h when using guard() Directly include the cleanup.h for the guard() instead of relying it to be included via other files. Signed-off-by: Matti Vaittinen Link: https://patch.msgid.link/5b352ce7241e5904a317dd26950c7cd4daa59fc0.1740748394.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 1 + drivers/iio/adc/ad7091r-base.c | 1 + drivers/iio/adc/ad7606.c | 1 + drivers/iio/adc/max34408.c | 1 + drivers/iio/adc/pac1921.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 061eeb9b1f8d..f3ae41214326 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 606486c4dfe8..931ff71b2888 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index f566248db70a..631e83717167 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -5,6 +5,7 @@ * Copyright 2011 Analog Devices Inc. */ +#include #include #include #include diff --git a/drivers/iio/adc/max34408.c b/drivers/iio/adc/max34408.c index 971e6e5dee9b..4f45fd22a90c 100644 --- a/drivers/iio/adc/max34408.c +++ b/drivers/iio/adc/max34408.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/drivers/iio/adc/pac1921.c b/drivers/iio/adc/pac1921.c index 90f61c47b1c4..1c28df132e9f 100644 --- a/drivers/iio/adc/pac1921.c +++ b/drivers/iio/adc/pac1921.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From 9b4b9791dd51c42cb6a6352ec6e84afed64993d6 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Wed, 19 Feb 2025 18:00:56 -0300 Subject: [PATCH 0544/2157] Documentation: iio: Add ADC documentation ADC inputs can be classified into a few different types according to how they measure the input signal, how restrained the signal is, and number of input pins. Even though datasheets tend to provide many details about their inputs and measurement procedures, it may not always be clear how to model those inputs into IIO channels. For example, some differential ADCs can have their inputs configured into pseudo-differential channels. In that configuration, only one input connects to the signal of interest as opposed to using two inputs of a differential input configuration. Datasheets sometimes also refer to pseudo-differential inputs as single-ended inputs even though they have distinct physical configuration and measurement procedure. Document consolidated ADC input types and how they are usually described and supported in device tree and IIO, respectively. Signed-off-by: Marcelo Schmitt Reviewed-by: David Lechner Link: https://patch.msgid.link/e6ac2a595f06ba2d5ff0eb86e5895479c9dd797f.1739998491.git.marcelo.schmitt@analog.com Signed-off-by: Jonathan Cameron --- Documentation/iio/iio_adc.rst | 305 ++++++++++++++++++++++++++++++++++ Documentation/iio/index.rst | 1 + 2 files changed, 306 insertions(+) create mode 100644 Documentation/iio/iio_adc.rst diff --git a/Documentation/iio/iio_adc.rst b/Documentation/iio/iio_adc.rst new file mode 100644 index 000000000000..f2f19a691907 --- /dev/null +++ b/Documentation/iio/iio_adc.rst @@ -0,0 +1,305 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +========================= +IIO Abstractions for ADCs +========================= + +1. Overview +=========== + +The IIO subsystem supports many Analog to Digital Converters (ADCs). Some ADCs +have features and characteristics that are supported in specific ways by IIO +device drivers. This documentation describes common ADC features and explains +how they are supported by the IIO subsystem. + +1. ADC Channel Types +==================== + +ADCs can have distinct types of inputs, each of them measuring analog voltages +in a slightly different way. An ADC digitizes the analog input voltage over a +span that is often given by the provided voltage reference, the input type, and +the input polarity. The input range allowed to an ADC channel is needed to +determine the scale factor and offset needed to obtain the measured value in +real-world units (millivolts for voltage measurement, milliamps for current +measurement, etc.). + +Elaborate designs may have nonlinear characteristics or integrated components +(such as amplifiers and reference buffers) that might also have to be considered +to derive the allowed input range for an ADC. For clarity, the sections below +assume the input range only depends on the provided voltage references, input +type, and input polarity. + +There are three general types of ADC inputs (single-ended, differential, +pseudo-differential) and two possible polarities (unipolar, bipolar). The input +type (single-ended, differential, pseudo-differential) is one channel +characteristic, and is completely independent of the polarity (unipolar, +bipolar) aspect. A comprehensive article about ADC input types (on which this +doc is heavily based on) can be found at +https://www.analog.com/en/resources/technical-articles/sar-adc-input-types.html. + +1.1 Single-ended channels +------------------------- + +Single-ended channels digitize the analog input voltage relative to ground and +can be either unipolar or bipolar. + +1.1.1 Single-ended Unipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + ---------- VREF ------------- + ´ ` ´ ` _____________ + / \ / \ / | + / \ / \ --- < IN ADC | + \ / \ / \ | + `-´ `-´ \ VREF | + -------- GND (0V) ----------- +-----------+ + ^ + | + External VREF + +The input voltage to a **single-ended unipolar** channel is allowed to swing +from GND to VREF (where VREF is a voltage reference with electrical potential +higher than system ground). The maximum input voltage is also called VFS +(Voltage input Full-Scale), with VFS being determined by VREF. The voltage +reference may be provided from an external supply or derived from the chip power +source. + +A single-ended unipolar channel could be described in device tree like the +following example:: + + adc@0 { + ... + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + }; + }; + +One is always allowed to include ADC channel nodes in the device tree. Though, +if the device has a uniform set of inputs (e.g. all inputs are single-ended), +then declaring the channel nodes is optional. + +One caveat for devices that support mixed single-ended and differential channels +is that single-ended channel nodes also need to provide a ``single-channel`` +property when ``reg`` is an arbitrary number that doesn't match the input pin +number. + +See ``Documentation/devicetree/bindings/iio/adc/adc.yaml`` for the complete +documentation of ADC specific device tree properties. + + +1.1.2 Single-ended Bipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + ---------- +VREF ------------ + ´ ` ´ ` _____________________ + / \ / \ / | + / \ / \ --- < IN ADC | + \ / \ / \ | + `-´ `-´ \ +VREF -VREF | + ---------- -VREF ------------ +-------------------+ + ^ ^ + | | + External +VREF ------+ External -VREF + +For a **single-ended bipolar** channel, the analog voltage input can go from +-VREF to +VREF (where -VREF is the voltage reference that has the lower +electrical potential while +VREF is the reference with the higher one). Some ADC +chips derive the lower reference from +VREF, others get it from a separate +input. Often, +VREF and -VREF are symmetric but they don't need to be so. When +-VREF is lower than system ground, these inputs are also called single-ended +true bipolar. Also, while there is a relevant difference between bipolar and +true bipolar from the electrical perspective, IIO makes no explicit distinction +between them. + +Here's an example device tree description of a single-ended bipolar channel:: + + adc@0 { + ... + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + bipolar; + }; + }; + +1.2 Differential channels +------------------------- + +A differential voltage measurement digitizes the voltage level at the positive +input (IN+) relative to the negative input (IN-) over the -VREF to +VREF span. +In other words, a differential channel measures the potential difference between +IN+ and IN-, which is often denoted by the IN+ - IN- formula. + +1.2.1 Differential Bipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + -------- +VREF ------ +-------------------+ + ´ ` ´ ` / | + / \ / \ / --- < IN+ | + `-´ `-´ | | + -------- -VREF ------ | | + | ADC | + -------- +VREF ------ | | + ´ ` ´ ` | | + \ / \ / \ --- < IN- | + `-´ `-´ \ +VREF -VREF | + -------- -VREF ------ +-------------------+ + ^ ^ + | +---- External -VREF + External +VREF + +The analog signals to **differential bipolar** inputs are also allowed to swing +from -VREF to +VREF. The bipolar part of the name means that the resulting value +of the difference (IN+ - IN-) can be positive or negative. If -VREF is below +system GND, these are also called differential true bipolar inputs. + +Device tree example of a differential bipolar channel:: + + adc@0 { + ... + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + bipolar; + diff-channels = <0 1>; + }; + }; + +In the ADC driver, ``differential = 1`` is set into ``struct iio_chan_spec`` for +the channel. Even though, there are three general input types, ``differential`` +is only used to distinguish between differential and non-differential (either +single-ended or pseudo-differential) input types. See +``include/linux/iio/iio.h`` for more information. + +1.2.2 Differential Unipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For **differential unipolar** channels, the analog voltage at the positive input +must also be higher than the voltage at the negative input. Thus, the actual +input range allowed to a differential unipolar channel is IN- to +VREF. Because +IN+ is allowed to swing with the measured analog signal and the input setup must +guarantee IN+ will not go below IN- (nor IN- will raise above IN+), most +differential unipolar channel setups have IN- fixed to a known voltage that does +not fall within the voltage range expected for the measured signal. That leads +to a setup that is equivalent to a pseudo-differential channel. Thus, +differential unipolar setups can often be supported as pseudo-differential +unipolar channels. + +1.3 Pseudo-differential Channels +-------------------------------- + +There is a third ADC input type which is called pseudo-differential or +single-ended to differential configuration. A pseudo-differential channel is +similar to a differential channel in that it also measures IN+ relative to IN-. +However, unlike bipolar differential channels, the negative input is limited to +a narrow voltage range (taken as a constant voltage) while only IN+ is allowed +to swing. A pseudo-differential channel can be made out from a differential pair +of inputs by restricting the negative input to a known voltage while allowing +only the positive input to swing. Sometimes, the input provided to IN- is called +common-mode voltage. Besides, some parts have a COM pin that allows single-ended +inputs to be referenced to a common-mode voltage, making them +pseudo-differential channels. Often, the common mode input voltage can be +described in the device tree as a voltage regulator (e.g. ``com-supply``) since +it is basically a constant voltage source. + +1.3.1 Pseudo-differential Unipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + -------- +VREF ------ +-------------------+ + ´ ` ´ ` / | + / \ / \ / --- < IN+ | + `-´ `-´ | | + --------- IN- ------- | ADC | + | | + Common-mode voltage --> --- < IN- | + \ +VREF -VREF | + +-------------------+ + ^ ^ + | +---- External -VREF + External +VREF + +A **pseudo-differential unipolar** input has the limitations a differential +unipolar channel would have, meaning the analog voltage to the positive input +IN+ must stay within IN- to +VREF. The fixed voltage to IN- is often called +common-mode voltage and it must be within -VREF to +VREF as would be expected +from the signal to any differential channel negative input. + +The voltage measured from IN+ is relative to IN- but, unlike differential +channels, pseudo-differential setups are intended to gauge single-ended input +signals. To enable applications to calculate IN+ voltage with respect to system +ground, the IIO channel may provide an ``_offset`` sysfs attribute to be added +to ADC output when converting raw data to voltage units. In many setups, the +common-mode voltage input is at GND level and the ``_offset`` attribute is +omitted due to being always zero. + +Device tree example for pseudo-differential unipolar channel:: + + adc@0 { + ... + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + single-channel = <0>; + common-mode-channel = <1>; + }; + }; + +Do not set ``differential`` in the channel ``iio_chan_spec`` struct of +pseudo-differential channels. + +1.3.2 Pseudo-differential Bipolar Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + -------- +VREF ------ +-------------------+ + ´ ` ´ ` / | + / \ / \ / --- < IN+ | + `-´ `-´ | | + -------- -VREF ------ | ADC | + | | + Common-mode voltage --> --- < IN- | + \ +VREF -VREF | + +-------------------+ + ^ ^ + | +---- External -VREF + External +VREF + +A **pseudo-differential bipolar** input is not limited by the level at IN- but +it will be limited to -VREF or to GND on the lower end of the input range +depending on the particular ADC. Similar to their unipolar counter parts, +pseudo-differential bipolar channels ought to declare an ``_offset`` attribute +to enable the conversion of raw ADC data to voltage units. For the setup with +IN- connected to GND, ``_offset`` is often omitted. + +Device tree example for pseudo-differential bipolar channel:: + + adc@0 { + ... + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + bipolar; + single-channel = <0>; + common-mode-channel = <1>; + }; + }; diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst index 4c3072ae71f2..ea35f2a06496 100644 --- a/Documentation/iio/index.rst +++ b/Documentation/iio/index.rst @@ -7,6 +7,7 @@ Industrial I/O .. toctree:: :maxdepth: 1 + iio_adc iio_configfs iio_devbuf iio_dmabuf_api From 16c94de2aa3555a7bd8e66b98504e682530b5cb5 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Fri, 28 Feb 2025 20:03:49 -0300 Subject: [PATCH 0545/2157] iio: imu: bmi270: rename variable bmi270_device to data Rename all instances of 'struct bmi270_data' to 'data', to ensure consistency across the driver. Also rename bmi270_data::data to bmi270_data::buffer to avoid naming conflicts. Acked-by: Alex Lanzano Signed-off-by: Gustavo Silva Link: https://patch.msgid.link/20250228-bmi270-irq-v2-2-3f97a4e8f551@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270_core.c | 85 ++++++++++++++-------------- 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index 9f24d4044ed6..16deda12a5c1 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -90,7 +90,7 @@ struct bmi270_data { struct { __le16 channels[6]; aligned_s64 timestamp; - } data __aligned(IIO_DMA_MINALIGN); + } buffer __aligned(IIO_DMA_MINALIGN); }; enum bmi270_scan { @@ -284,8 +284,8 @@ static int bmi270_set_scale(struct bmi270_data *data, int chan_type, int uscale) return -EINVAL; } -static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, - int *scale, int *uscale) +static int bmi270_get_scale(struct bmi270_data *data, int chan_type, int *scale, + int *uscale) { int ret; unsigned int val; @@ -293,8 +293,7 @@ static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, switch (chan_type) { case IIO_ACCEL: - ret = regmap_read(bmi270_device->regmap, - BMI270_ACC_CONF_RANGE_REG, &val); + ret = regmap_read(data->regmap, BMI270_ACC_CONF_RANGE_REG, &val); if (ret) return ret; @@ -302,8 +301,7 @@ static int bmi270_get_scale(struct bmi270_data *bmi270_device, int chan_type, bmi270_scale_item = bmi270_scale_table[BMI270_ACCEL]; break; case IIO_ANGL_VEL: - ret = regmap_read(bmi270_device->regmap, - BMI270_GYR_CONF_RANGE_REG, &val); + ret = regmap_read(data->regmap, BMI270_GYR_CONF_RANGE_REG, &val); if (ret) return ret; @@ -403,25 +401,25 @@ static irqreturn_t bmi270_trigger_handler(int irq, void *p) { struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; - struct bmi270_data *bmi270_device = iio_priv(indio_dev); + struct bmi270_data *data = iio_priv(indio_dev); int ret; - ret = regmap_bulk_read(bmi270_device->regmap, BMI270_ACCEL_X_REG, - &bmi270_device->data.channels, - sizeof(bmi270_device->data.channels)); + ret = regmap_bulk_read(data->regmap, BMI270_ACCEL_X_REG, + &data->buffer.channels, + sizeof(data->buffer.channels)); if (ret) goto done; - iio_push_to_buffers_with_timestamp(indio_dev, &bmi270_device->data, + iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, pf->timestamp); done: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; } -static int bmi270_get_data(struct bmi270_data *bmi270_device, - int chan_type, int axis, int *val) +static int bmi270_get_data(struct bmi270_data *data, int chan_type, int axis, + int *val) { __le16 sample; int reg; @@ -441,7 +439,7 @@ static int bmi270_get_data(struct bmi270_data *bmi270_device, return -EINVAL; } - ret = regmap_bulk_read(bmi270_device->regmap, reg, &sample, sizeof(sample)); + ret = regmap_bulk_read(data->regmap, reg, &sample, sizeof(sample)); if (ret) return ret; @@ -455,17 +453,17 @@ static int bmi270_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { int ret; - struct bmi270_data *bmi270_device = iio_priv(indio_dev); + struct bmi270_data *data = iio_priv(indio_dev); switch (mask) { case IIO_CHAN_INFO_RAW: - ret = bmi270_get_data(bmi270_device, chan->type, chan->channel2, val); + ret = bmi270_get_data(data, chan->type, chan->channel2, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - ret = bmi270_get_scale(bmi270_device, chan->type, val, val2); + ret = bmi270_get_scale(data, chan->type, val, val2); return ret ? ret : IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_OFFSET: switch (chan->type) { @@ -476,7 +474,7 @@ static int bmi270_read_raw(struct iio_dev *indio_dev, return -EINVAL; } case IIO_CHAN_INFO_SAMP_FREQ: - ret = bmi270_get_odr(bmi270_device, chan->type, val, val2); + ret = bmi270_get_odr(data, chan->type, val, val2); return ret ? ret : IIO_VAL_INT_PLUS_MICRO; default: return -EINVAL; @@ -599,12 +597,12 @@ static const struct iio_chan_spec bmi270_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(BMI270_SCAN_TIMESTAMP), }; -static int bmi270_validate_chip_id(struct bmi270_data *bmi270_device) +static int bmi270_validate_chip_id(struct bmi270_data *data) { int chip_id; int ret; - struct device *dev = bmi270_device->dev; - struct regmap *regmap = bmi270_device->regmap; + struct device *dev = data->dev; + struct regmap *regmap = data->regmap; ret = regmap_read(regmap, BMI270_CHIP_ID_REG, &chip_id); if (ret) @@ -618,24 +616,24 @@ static int bmi270_validate_chip_id(struct bmi270_data *bmi270_device) if (chip_id == BMI160_CHIP_ID_VAL) return -ENODEV; - if (chip_id != bmi270_device->chip_info->chip_id) + if (chip_id != data->chip_info->chip_id) dev_info(dev, "Unexpected chip id 0x%x", chip_id); if (chip_id == bmi260_chip_info.chip_id) - bmi270_device->chip_info = &bmi260_chip_info; + data->chip_info = &bmi260_chip_info; else if (chip_id == bmi270_chip_info.chip_id) - bmi270_device->chip_info = &bmi270_chip_info; + data->chip_info = &bmi270_chip_info; return 0; } -static int bmi270_write_calibration_data(struct bmi270_data *bmi270_device) +static int bmi270_write_calibration_data(struct bmi270_data *data) { int ret; int status = 0; const struct firmware *init_data; - struct device *dev = bmi270_device->dev; - struct regmap *regmap = bmi270_device->regmap; + struct device *dev = data->dev; + struct regmap *regmap = data->regmap; ret = regmap_clear_bits(regmap, BMI270_PWR_CONF_REG, BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); @@ -656,8 +654,7 @@ static int bmi270_write_calibration_data(struct bmi270_data *bmi270_device) return dev_err_probe(dev, ret, "Failed to prepare device to load init data"); - ret = request_firmware(&init_data, - bmi270_device->chip_info->fw_name, dev); + ret = request_firmware(&init_data, data->chip_info->fw_name, dev); if (ret) return dev_err_probe(dev, ret, "Failed to load init data file"); @@ -689,11 +686,11 @@ static int bmi270_write_calibration_data(struct bmi270_data *bmi270_device) return 0; } -static int bmi270_configure_imu(struct bmi270_data *bmi270_device) +static int bmi270_configure_imu(struct bmi270_data *data) { int ret; - struct device *dev = bmi270_device->dev; - struct regmap *regmap = bmi270_device->regmap; + struct device *dev = data->dev; + struct regmap *regmap = data->regmap; ret = regmap_set_bits(regmap, BMI270_PWR_CTRL_REG, BMI270_PWR_CTRL_AUX_EN_MSK | @@ -730,38 +727,38 @@ static int bmi270_configure_imu(struct bmi270_data *bmi270_device) return 0; } -static int bmi270_chip_init(struct bmi270_data *bmi270_device) +static int bmi270_chip_init(struct bmi270_data *data) { int ret; - ret = bmi270_validate_chip_id(bmi270_device); + ret = bmi270_validate_chip_id(data); if (ret) return ret; - ret = bmi270_write_calibration_data(bmi270_device); + ret = bmi270_write_calibration_data(data); if (ret) return ret; - return bmi270_configure_imu(bmi270_device); + return bmi270_configure_imu(data); } int bmi270_core_probe(struct device *dev, struct regmap *regmap, const struct bmi270_chip_info *chip_info) { int ret; - struct bmi270_data *bmi270_device; + struct bmi270_data *data; struct iio_dev *indio_dev; - indio_dev = devm_iio_device_alloc(dev, sizeof(*bmi270_device)); + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) return -ENOMEM; - bmi270_device = iio_priv(indio_dev); - bmi270_device->dev = dev; - bmi270_device->regmap = regmap; - bmi270_device->chip_info = chip_info; + data = iio_priv(indio_dev); + data->dev = dev; + data->regmap = regmap; + data->chip_info = chip_info; - ret = bmi270_chip_init(bmi270_device); + ret = bmi270_chip_init(data); if (ret) return ret; From a1854d55f54b4f8df738b3fbc6595b03e66da5c8 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Fri, 28 Feb 2025 20:03:50 -0300 Subject: [PATCH 0546/2157] iio: imu: bmi270: add support for data ready interrupt trigger The BMI270 sensor provides two interrupt pins that can be used for different interrupt sources, including a data ready signal. Add support for configuring one the pins as a trigger source. The interrupt pin can be configured with various options: active high or low, push-pull or open-drain, and latched or non-latched. Acked-by: Alex Lanzano Signed-off-by: Gustavo Silva Link: https://patch.msgid.link/20250228-bmi270-irq-v2-3-3f97a4e8f551@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270_core.c | 234 ++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 7 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index 16deda12a5c1..a86be5af5ccb 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -4,11 +4,13 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -26,6 +28,9 @@ #define BMI270_ACCEL_X_REG 0x0c #define BMI270_ANG_VEL_X_REG 0x12 +#define BMI270_INT_STATUS_1_REG 0x1d +#define BMI270_INT_STATUS_1_ACC_GYR_DRDY_MSK GENMASK(7, 6) + #define BMI270_INTERNAL_STATUS_REG 0x21 #define BMI270_INTERNAL_STATUS_MSG_MSK GENMASK(3, 0) #define BMI270_INTERNAL_STATUS_MSG_INIT_OK 0x01 @@ -55,6 +60,20 @@ #define BMI270_GYR_CONF_RANGE_REG 0x43 #define BMI270_GYR_CONF_RANGE_MSK GENMASK(2, 0) +#define BMI270_INT1_IO_CTRL_REG 0x53 +#define BMI270_INT2_IO_CTRL_REG 0x54 +#define BMI270_INT_IO_CTRL_LVL_MSK BIT(1) +#define BMI270_INT_IO_CTRL_OD_MSK BIT(2) +#define BMI270_INT_IO_CTRL_OP_MSK BIT(3) +#define BMI270_INT_IO_LVL_OD_OP_MSK GENMASK(3, 1) + +#define BMI270_INT_LATCH_REG 0x55 +#define BMI270_INT_LATCH_REG_MSK BIT(0) + +#define BMI270_INT_MAP_DATA_REG 0x58 +#define BMI270_INT_MAP_DATA_DRDY_INT1_MSK BIT(2) +#define BMI270_INT_MAP_DATA_DRDY_INT2_MSK BIT(6) + #define BMI270_INIT_CTRL_REG 0x59 #define BMI270_INIT_CTRL_LOAD_DONE_MSK BIT(0) @@ -78,10 +97,20 @@ #define BMI260_INIT_DATA_FILE "bmi260-init-data.fw" #define BMI270_INIT_DATA_FILE "bmi270-init-data.fw" +enum bmi270_irq_pin { + BMI270_IRQ_DISABLED, + BMI270_IRQ_INT1, + BMI270_IRQ_INT2, +}; + struct bmi270_data { struct device *dev; struct regmap *regmap; const struct bmi270_chip_info *chip_info; + enum bmi270_irq_pin irq_pin; + struct iio_trigger *trig; + /* Protect device's private data from concurrent access */ + struct mutex mutex; /* * Where IIO_DMA_MINALIGN may be larger than 8 bytes, align to @@ -274,6 +303,8 @@ static int bmi270_set_scale(struct bmi270_data *data, int chan_type, int uscale) return -EINVAL; } + guard(mutex)(&data->mutex); + for (i = 0; i < bmi270_scale_item.num; i++) { if (bmi270_scale_item.tbl[i].uscale != uscale) continue; @@ -291,6 +322,8 @@ static int bmi270_get_scale(struct bmi270_data *data, int chan_type, int *scale, unsigned int val; struct bmi270_scale_item bmi270_scale_item; + guard(mutex)(&data->mutex); + switch (chan_type) { case IIO_ACCEL: ret = regmap_read(data->regmap, BMI270_ACC_CONF_RANGE_REG, &val); @@ -346,6 +379,8 @@ static int bmi270_set_odr(struct bmi270_data *data, int chan_type, int odr, return -EINVAL; } + guard(mutex)(&data->mutex); + for (i = 0; i < bmi270_odr_item.num; i++) { if (bmi270_odr_item.tbl[i].odr != odr || bmi270_odr_item.tbl[i].uodr != uodr) @@ -364,6 +399,8 @@ static int bmi270_get_odr(struct bmi270_data *data, int chan_type, int *odr, int i, val, ret; struct bmi270_odr_item bmi270_odr_item; + guard(mutex)(&data->mutex); + switch (chan_type) { case IIO_ACCEL: ret = regmap_read(data->regmap, BMI270_ACC_CONF_REG, &val); @@ -397,6 +434,60 @@ static int bmi270_get_odr(struct bmi270_data *data, int chan_type, int *odr, return -EINVAL; } +static irqreturn_t bmi270_irq_thread_handler(int irq, void *private) +{ + struct iio_dev *indio_dev = private; + struct bmi270_data *data = iio_priv(indio_dev); + unsigned int status; + int ret; + + scoped_guard(mutex, &data->mutex) { + ret = regmap_read(data->regmap, BMI270_INT_STATUS_1_REG, + &status); + if (ret) + return IRQ_NONE; + } + + if (FIELD_GET(BMI270_INT_STATUS_1_ACC_GYR_DRDY_MSK, status)) + iio_trigger_poll_nested(data->trig); + + return IRQ_HANDLED; +} + +static int bmi270_data_rdy_trigger_set_state(struct iio_trigger *trig, + bool state) +{ + struct bmi270_data *data = iio_trigger_get_drvdata(trig); + unsigned int field_value = 0; + unsigned int mask; + + guard(mutex)(&data->mutex); + + switch (data->irq_pin) { + case BMI270_IRQ_INT1: + mask = BMI270_INT_MAP_DATA_DRDY_INT1_MSK; + set_mask_bits(&field_value, BMI270_INT_MAP_DATA_DRDY_INT1_MSK, + FIELD_PREP(BMI270_INT_MAP_DATA_DRDY_INT1_MSK, + state)); + break; + case BMI270_IRQ_INT2: + mask = BMI270_INT_MAP_DATA_DRDY_INT2_MSK; + set_mask_bits(&field_value, BMI270_INT_MAP_DATA_DRDY_INT2_MSK, + FIELD_PREP(BMI270_INT_MAP_DATA_DRDY_INT2_MSK, + state)); + break; + default: + return -EINVAL; + } + + return regmap_update_bits(data->regmap, BMI270_INT_MAP_DATA_REG, mask, + field_value); +} + +static const struct iio_trigger_ops bmi270_trigger_ops = { + .set_trigger_state = &bmi270_data_rdy_trigger_set_state, +}; + static irqreturn_t bmi270_trigger_handler(int irq, void *p) { struct iio_poll_func *pf = p; @@ -404,6 +495,8 @@ static irqreturn_t bmi270_trigger_handler(int irq, void *p) struct bmi270_data *data = iio_priv(indio_dev); int ret; + guard(mutex)(&data->mutex); + ret = regmap_bulk_read(data->regmap, BMI270_ACCEL_X_REG, &data->buffer.channels, sizeof(data->buffer.channels)); @@ -439,13 +532,15 @@ static int bmi270_get_data(struct bmi270_data *data, int chan_type, int axis, return -EINVAL; } + guard(mutex)(&data->mutex); + ret = regmap_bulk_read(data->regmap, reg, &sample, sizeof(sample)); if (ret) return ret; *val = sign_extend32(le16_to_cpu(sample), 15); - return 0; + return IIO_VAL_INT; } static int bmi270_read_raw(struct iio_dev *indio_dev, @@ -457,11 +552,11 @@ static int bmi270_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; ret = bmi270_get_data(data, chan->type, chan->channel2, val); - if (ret) - return ret; - - return IIO_VAL_INT; + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SCALE: ret = bmi270_get_scale(data, chan->type, val, val2); return ret ? ret : IIO_VAL_INT_PLUS_MICRO; @@ -486,12 +581,21 @@ static int bmi270_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct bmi270_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_SCALE: - return bmi270_set_scale(data, chan->type, val2); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi270_set_scale(data, chan->type, val2); + iio_device_release_direct(indio_dev); + return ret; case IIO_CHAN_INFO_SAMP_FREQ: - return bmi270_set_odr(data, chan->type, val, val2); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + ret = bmi270_set_odr(data, chan->type, val, val2); + iio_device_release_direct(indio_dev); + return ret; default: return -EINVAL; } @@ -597,6 +701,116 @@ static const struct iio_chan_spec bmi270_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(BMI270_SCAN_TIMESTAMP), }; +static int bmi270_int_pin_config(struct bmi270_data *data, + enum bmi270_irq_pin irq_pin, + bool active_high, bool open_drain, bool latch) +{ + unsigned int reg, field_value; + int ret; + + ret = regmap_update_bits(data->regmap, BMI270_INT_LATCH_REG, + BMI270_INT_LATCH_REG_MSK, + FIELD_PREP(BMI270_INT_LATCH_REG_MSK, latch)); + if (ret) + return ret; + + switch (irq_pin) { + case BMI270_IRQ_INT1: + reg = BMI270_INT1_IO_CTRL_REG; + break; + case BMI270_IRQ_INT2: + reg = BMI270_INT2_IO_CTRL_REG; + break; + default: + return -EINVAL; + } + + field_value = FIELD_PREP(BMI270_INT_IO_CTRL_LVL_MSK, active_high) | + FIELD_PREP(BMI270_INT_IO_CTRL_OD_MSK, open_drain) | + FIELD_PREP(BMI270_INT_IO_CTRL_OP_MSK, 1); + return regmap_update_bits(data->regmap, reg, + BMI270_INT_IO_LVL_OD_OP_MSK, field_value); +} + +static int bmi270_trigger_probe(struct bmi270_data *data, + struct iio_dev *indio_dev) +{ + bool open_drain, active_high, latch; + struct fwnode_handle *fwnode; + enum bmi270_irq_pin irq_pin; + int ret, irq, irq_type; + + fwnode = dev_fwnode(data->dev); + if (!fwnode) + return -ENODEV; + + irq = fwnode_irq_get_byname(fwnode, "INT1"); + if (irq > 0) { + irq_pin = BMI270_IRQ_INT1; + } else { + irq = fwnode_irq_get_byname(fwnode, "INT2"); + if (irq < 0) + return 0; + + irq_pin = BMI270_IRQ_INT2; + } + + irq_type = irq_get_trigger_type(irq); + switch (irq_type) { + case IRQF_TRIGGER_RISING: + latch = false; + active_high = true; + break; + case IRQF_TRIGGER_HIGH: + latch = true; + active_high = true; + break; + case IRQF_TRIGGER_FALLING: + latch = false; + active_high = false; + break; + case IRQF_TRIGGER_LOW: + latch = true; + active_high = false; + break; + default: + return dev_err_probe(data->dev, -EINVAL, + "Invalid interrupt type 0x%x specified\n", + irq_type); + } + + open_drain = fwnode_property_read_bool(fwnode, "drive-open-drain"); + + ret = bmi270_int_pin_config(data, irq_pin, active_high, open_drain, + latch); + if (ret) + return dev_err_probe(data->dev, ret, + "Failed to configure irq line\n"); + + data->trig = devm_iio_trigger_alloc(data->dev, "%s-trig-%d", + indio_dev->name, irq_pin); + if (!data->trig) + return -ENOMEM; + + data->trig->ops = &bmi270_trigger_ops; + iio_trigger_set_drvdata(data->trig, data); + + ret = devm_request_threaded_irq(data->dev, irq, NULL, + bmi270_irq_thread_handler, + IRQF_ONESHOT, "bmi270-int", indio_dev); + if (ret) + return dev_err_probe(data->dev, ret, "Failed to request IRQ\n"); + + ret = devm_iio_trigger_register(data->dev, data->trig); + if (ret) + return dev_err_probe(data->dev, ret, + "Trigger registration failed\n"); + + data->irq_pin = irq_pin; + + return 0; +} + static int bmi270_validate_chip_id(struct bmi270_data *data) { int chip_id; @@ -757,6 +971,8 @@ int bmi270_core_probe(struct device *dev, struct regmap *regmap, data->dev = dev; data->regmap = regmap; data->chip_info = chip_info; + data->irq_pin = BMI270_IRQ_DISABLED; + mutex_init(&data->mutex); ret = bmi270_chip_init(data); if (ret) @@ -769,6 +985,10 @@ int bmi270_core_probe(struct device *dev, struct regmap *regmap, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &bmi270_info; + ret = bmi270_trigger_probe(data, indio_dev); + if (ret) + return ret; + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, iio_pollfunc_store_time, bmi270_trigger_handler, NULL); From 60a7903301f8b53a3012f31375841cbb3957fa00 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Thu, 20 Feb 2025 10:42:20 +0000 Subject: [PATCH 0547/2157] iio: accel: adxl345: reorganize measurement enable Move the measurement enable function up in order to have it generically available. This is a preparation for upcomming patches. Particular features need to have measuring off while changing settings, and turned on again afterwards. Signed-off-by: Lothar Rubusch Link: https://patch.msgid.link/20250220104234.40958-2-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl345_core.c | 40 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/iio/accel/adxl345_core.c b/drivers/iio/accel/adxl345_core.c index d1b2d3985a40..e2f65d8278d5 100644 --- a/drivers/iio/accel/adxl345_core.c +++ b/drivers/iio/accel/adxl345_core.c @@ -76,6 +76,26 @@ static const unsigned long adxl345_scan_masks[] = { 0 }; +/** + * adxl345_set_measure_en() - Enable and disable measuring. + * + * @st: The device data. + * @en: Enable measurements, else standby mode. + * + * For lowest power operation, standby mode can be used. In standby mode, + * current consumption is supposed to be reduced to 0.1uA (typical). In this + * mode no measurements are made. Placing the device into standby mode + * preserves the contents of FIFO. + * + * Return: Returns 0 if successful, or a negative error value. + */ +static int adxl345_set_measure_en(struct adxl345_state *st, bool en) +{ + unsigned int val = en ? ADXL345_POWER_CTL_MEASURE : ADXL345_POWER_CTL_STANDBY; + + return regmap_write(st->regmap, ADXL345_REG_POWER_CTL, val); +} + static int adxl345_set_interrupts(struct adxl345_state *st) { int ret; @@ -214,26 +234,6 @@ static int adxl345_write_raw_get_fmt(struct iio_dev *indio_dev, } } -/** - * adxl345_set_measure_en() - Enable and disable measuring. - * - * @st: The device data. - * @en: Enable measurements, else standby mode. - * - * For lowest power operation, standby mode can be used. In standby mode, - * current consumption is supposed to be reduced to 0.1uA (typical). In this - * mode no measurements are made. Placing the device into standby mode - * preserves the contents of FIFO. - * - * Return: Returns 0 if successful, or a negative error value. - */ -static int adxl345_set_measure_en(struct adxl345_state *st, bool en) -{ - unsigned int val = en ? ADXL345_POWER_CTL_MEASURE : ADXL345_POWER_CTL_STANDBY; - - return regmap_write(st->regmap, ADXL345_REG_POWER_CTL, val); -} - static void adxl345_powerdown(void *ptr) { struct adxl345_state *st = ptr; From a69b0bd304382b5e7050418614b1f5ddb36e77fe Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Thu, 20 Feb 2025 10:42:21 +0000 Subject: [PATCH 0548/2157] iio: accel: adxl345: add debug register access Add the possibility to verify the content of the configuration registers of the sensor in preparation for upcomming feature implementations. Signed-off-by: Lothar Rubusch Link: https://patch.msgid.link/20250220104234.40958-3-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl345_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/iio/accel/adxl345_core.c b/drivers/iio/accel/adxl345_core.c index e2f65d8278d5..fedf0d41d36b 100644 --- a/drivers/iio/accel/adxl345_core.c +++ b/drivers/iio/accel/adxl345_core.c @@ -202,6 +202,16 @@ static int adxl345_write_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int adxl345_reg_access(struct iio_dev *indio_dev, unsigned int reg, + unsigned int writeval, unsigned int *readval) +{ + struct adxl345_state *st = iio_priv(indio_dev); + + if (readval) + return regmap_read(st->regmap, reg, readval); + return regmap_write(st->regmap, reg, writeval); +} + static int adxl345_set_watermark(struct iio_dev *indio_dev, unsigned int value) { struct adxl345_state *st = iio_priv(indio_dev); @@ -467,6 +477,7 @@ static const struct iio_info adxl345_info = { .read_raw = adxl345_read_raw, .write_raw = adxl345_write_raw, .write_raw_get_fmt = adxl345_write_raw_get_fmt, + .debugfs_reg_access = &adxl345_reg_access, .hwfifo_set_watermark = adxl345_set_watermark, }; From 62c6b4f1c70e0a3a31df36dd055524cfe6acfa5e Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Thu, 20 Feb 2025 10:42:22 +0000 Subject: [PATCH 0549/2157] iio: accel: adxl345: reorganize irq handler Reorganize the IRQ handler. Move the overrun handling to the bottom. Overrun leads to reset the interrupt register. This also happens at evaluation of a particular interrupt event. First evaluate an event if possible, then fall back to overrun handling. Additionally simplify fetching the interrupt status function. Both is in preparation to build interrupt handling up for the handling of different detected events, implemented in follow up patches. Signed-off-by: Lothar Rubusch Link: https://patch.msgid.link/20250220104234.40958-4-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl345.h | 1 - drivers/iio/accel/adxl345_core.c | 27 +++++++-------------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/iio/accel/adxl345.h b/drivers/iio/accel/adxl345.h index 517e494ba555..bc6d634bd85c 100644 --- a/drivers/iio/accel/adxl345.h +++ b/drivers/iio/accel/adxl345.h @@ -43,7 +43,6 @@ #define ADXL345_REG_INT_ENABLE 0x2E #define ADXL345_REG_INT_MAP 0x2F #define ADXL345_REG_INT_SOURCE 0x30 -#define ADXL345_REG_INT_SOURCE_MSK 0xFF #define ADXL345_REG_DATA_FORMAT 0x31 #define ADXL345_REG_XYZ_BASE 0x32 #define ADXL345_REG_DATA_AXIS(index) \ diff --git a/drivers/iio/accel/adxl345_core.c b/drivers/iio/accel/adxl345_core.c index fedf0d41d36b..375c27d16827 100644 --- a/drivers/iio/accel/adxl345_core.c +++ b/drivers/iio/accel/adxl345_core.c @@ -107,8 +107,7 @@ static int adxl345_set_interrupts(struct adxl345_state *st) * interrupts to the INT1 pin, whereas bits set to 1 send their respective * interrupts to the INT2 pin. The intio shall convert this accordingly. */ - int_map = FIELD_GET(ADXL345_REG_INT_SOURCE_MSK, - st->intio ? st->int_map : ~st->int_map); + int_map = st->intio ? st->int_map : ~st->int_map; ret = regmap_write(st->regmap, ADXL345_REG_INT_MAP, int_map); if (ret) @@ -404,18 +403,6 @@ static const struct iio_buffer_setup_ops adxl345_buffer_ops = { .predisable = adxl345_buffer_predisable, }; -static int adxl345_get_status(struct adxl345_state *st) -{ - int ret; - unsigned int regval; - - ret = regmap_read(st->regmap, ADXL345_REG_INT_SOURCE, ®val); - if (ret < 0) - return ret; - - return FIELD_GET(ADXL345_REG_INT_SOURCE_MSK, regval); -} - static int adxl345_fifo_push(struct iio_dev *indio_dev, int samples) { @@ -449,14 +436,10 @@ static irqreturn_t adxl345_irq_handler(int irq, void *p) int int_stat; int samples; - int_stat = adxl345_get_status(st); - if (int_stat <= 0) + if (regmap_read(st->regmap, ADXL345_REG_INT_SOURCE, &int_stat)) return IRQ_NONE; - if (int_stat & ADXL345_INT_OVERRUN) - goto err; - - if (int_stat & ADXL345_INT_WATERMARK) { + if (FIELD_GET(ADXL345_INT_WATERMARK, int_stat)) { samples = adxl345_get_samples(st); if (samples < 0) goto err; @@ -464,6 +447,10 @@ static irqreturn_t adxl345_irq_handler(int irq, void *p) if (adxl345_fifo_push(indio_dev, samples) < 0) goto err; } + + if (FIELD_GET(ADXL345_INT_OVERRUN, int_stat)) + goto err; + return IRQ_HANDLED; err: From eb2f9ab1f5133e5a16e3f6303464f5c60012aa05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2025 12:36:42 +0300 Subject: [PATCH 0550/2157] iio: adc: ad4851: Fix signedness bug in ad4851_calibrate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "c" variable is used to store error codes from ad4851_find_opt() so it has to be signed for the error handling to work. Change it to type int. Fixes: 6250803fe2ec ("iio: adc: ad4851: add ad485x driver") Signed-off-by: Dan Carpenter Reviewed-by: Nuno Sá Link: https://patch.msgid.link/f5e260e9-d7a8-4dae-b7ea-f1bbb1760e60@stanley.mountain Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4851.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad4851.c b/drivers/iio/adc/ad4851.c index 1ad37084355e..98ebc853db79 100644 --- a/drivers/iio/adc/ad4851.c +++ b/drivers/iio/adc/ad4851.c @@ -492,11 +492,11 @@ static int ad4851_find_opt(const unsigned long *field, unsigned int start, static int ad4851_calibrate(struct iio_dev *indio_dev) { struct ad4851_state *st = iio_priv(indio_dev); - unsigned int opt_delay, num_lanes, delay, i, s, c; + unsigned int opt_delay, num_lanes, delay, i, s; enum iio_backend_interface_type interface_type; DECLARE_BITMAP(pn_status, AD4851_MAX_LANES * AD4851_MAX_IODELAY); bool status; - int ret; + int c, ret; ret = iio_backend_interface_type_get(st->back, &interface_type); if (ret) From 1062d81086156e42878d701b816d2f368b53a77c Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 26 Feb 2025 02:40:12 -0800 Subject: [PATCH 0551/2157] iommufd: Disallow allocating nested parent domain with fault ID Allocating a domain with a fault ID indicates that the domain is faultable. However, there is a gap for the nested parent domain to support PRI. Some hardware lacks the capability to distinguish whether PRI occurs at stage 1 or stage 2. This limitation may require software-based page table walking to resolve. Since no in-tree IOMMU driver currently supports this functionality, it is disallowed. For more details, refer to the related discussion at [1]. [1] https://lore.kernel.org/linux-iommu/bd1655c6-8b2f-4cfa-adb1-badc00d01811@intel.com/ Link: https://patch.msgid.link/r/20250226104012.82079-1-yi.l.liu@intel.com Suggested-by: Lu Baolu Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 3 +++ tools/testing/selftests/iommu/iommufd.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 7de6e914232e..268315b1d8bc 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -126,6 +126,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_FAULT_ID_VALID) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index a1b2b657999d..618c03bb6509 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -439,6 +439,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &test_hwpt_id); test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0, &test_hwpt_id); + test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_FAULT_ID_VALID, + &test_hwpt_id); test_cmd_hwpt_alloc(self->device_id, self->ioas_id, IOMMU_HWPT_ALLOC_NEST_PARENT, From a05df03a88bc1088be8e9d958f208d6484691e43 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 27 Feb 2025 12:07:29 -0800 Subject: [PATCH 0552/2157] iommufd: Fix uninitialized rc in iommufd_access_rw() Reported by smatch: drivers/iommu/iommufd/device.c:1392 iommufd_access_rw() error: uninitialized symbol 'rc'. Fixes: 8d40205f6093 ("iommufd: Add kAPI toward external drivers for kernel access") Link: https://patch.msgid.link/r/20250227200729.85030-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202502271339.a2nWr9UA-lkp@intel.com/ [nicolinc: can't find an original report but only in "old smatch warnings"] Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 4e107f69f951..b2f0cb909e6d 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -1349,7 +1349,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; - int rc; + int rc = -EINVAL; if (!length) return -EINVAL; From 443238858a2dccfefb37ce98b1328bec58f56b1c Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 4 Mar 2025 14:05:18 +0800 Subject: [PATCH 0553/2157] iio: imu: adis: fix uninitialized symbol warning Fix below kernel warning: smatch warnings: drivers/iio/imu/adis.c:319 __adis_check_status() error: uninitialized symbol 'status_16'. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: sunliming Link: https://patch.msgid.link/20250304060518.1834910-1-sunliming@linux.dev Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index 1c646c36aeb1..0ea072a4c966 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -306,7 +306,7 @@ int __adis_check_status(struct adis *adis) { unsigned int status; int diag_stat_bits; - u16 status_16; + u16 status_16 = 0; int ret; int i; From 7867a0d1dd8c621e95772ac3addb7396ab2d5883 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 3 Mar 2025 14:39:57 +0000 Subject: [PATCH 0554/2157] MAINTAINERS: remove adi,ad7606.yaml from SEPS525 Remove Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml from STAGING - SEPS525 LCD CONTROLLER DRIVERS. This was likley a copy/paste mistake. There is no bindings file for SEPS525 since it is only in staging. The removed file matches Documentation/devicetree/bindings/iio/*/adi,* under ANALOG DEVICES INC IIO DRIVERS already so wasn't just misplaced. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250303-maintainers-remove-adi-ad7606-yaml-from-seps525-lcd-controller-v1-1-a4e4f1b824ab@baylibre.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d8ca113b83ad..06f122cb8bbd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22486,7 +22486,6 @@ STAGING - SEPS525 LCD CONTROLLER DRIVERS M: Michael Hennerich L: linux-fbdev@vger.kernel.org S: Supported -F: Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml F: drivers/staging/fbtft/fb_seps525.c STAGING - SILICON MOTION SM750 FRAME BUFFER DRIVER From d15fc646ccff50addb5b56bc71620691da642af6 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Thu, 27 Feb 2025 18:40:50 +0000 Subject: [PATCH 0555/2157] dt-bindings: iio: adc: Add rockchip,rk3528-saradc variant The Successive Approximation ADC (SARADC) in RK3528 uses the v2 controller and support: - 10-bit resolution - Up to 1MS/s sampling rate - 4 single-ended input channels - Current consumption: 0.5mA @ 1MS/s Add a rockchip,rk3562-saradc compatible string for the 4 channels of 10-bit resolution supported by SARADC in RK3528. Signed-off-by: Jonas Karlman Acked-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20250227184058.2964204-2-jonas@kwiboo.se Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml index fd93ed3991e0..86eae9672c04 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml @@ -15,6 +15,7 @@ properties: - const: rockchip,saradc - const: rockchip,rk3066-tsadc - const: rockchip,rk3399-saradc + - const: rockchip,rk3528-saradc - const: rockchip,rk3588-saradc - items: - const: rockchip,rk3576-saradc From 8a9aa0bbd615c9b377bd82e671519f0c4cb272dd Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Thu, 27 Feb 2025 18:40:51 +0000 Subject: [PATCH 0556/2157] iio: adc: rockchip_saradc: Add support for RK3528 The Successive Approximation ADC (SARADC) in RK3528 uses the v2 controller and support: - 10-bit resolution - Up to 1MS/s sampling rate - 4 single-ended input channels - Current consumption: 0.5mA @ 1MS/s Add support for the 4 channels of 10-bit resolution supported by SARADC in RK3528. Signed-off-by: Jonas Karlman Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20250227184058.2964204-3-jonas@kwiboo.se Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rockchip_saradc.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index a29e54754c8f..2619547d5d6f 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -275,6 +275,21 @@ static const struct rockchip_saradc_data rk3399_saradc_data = { .power_down = rockchip_saradc_power_down_v1, }; +static const struct iio_chan_spec rockchip_rk3528_saradc_iio_channels[] = { + SARADC_CHANNEL(0, "adc0", 10), + SARADC_CHANNEL(1, "adc1", 10), + SARADC_CHANNEL(2, "adc2", 10), + SARADC_CHANNEL(3, "adc3", 10), +}; + +static const struct rockchip_saradc_data rk3528_saradc_data = { + .channels = rockchip_rk3528_saradc_iio_channels, + .num_channels = ARRAY_SIZE(rockchip_rk3528_saradc_iio_channels), + .clk_rate = 1000000, + .start = rockchip_saradc_start_v2, + .read = rockchip_saradc_read_v2, +}; + static const struct iio_chan_spec rockchip_rk3568_saradc_iio_channels[] = { SARADC_CHANNEL(0, "adc0", 10), SARADC_CHANNEL(1, "adc1", 10), @@ -324,6 +339,9 @@ static const struct of_device_id rockchip_saradc_match[] = { }, { .compatible = "rockchip,rk3399-saradc", .data = &rk3399_saradc_data, + }, { + .compatible = "rockchip,rk3528-saradc", + .data = &rk3528_saradc_data, }, { .compatible = "rockchip,rk3568-saradc", .data = &rk3568_saradc_data, From dc872c5f527a07124b2ad92d8c70d11829f17ac9 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:22 +0800 Subject: [PATCH 0557/2157] Coresight: Add support for new APB clock name Add support for new APB clock-name. If the function fails to obtain the clock with the name "apb_pclk", it will attempt to acquire the clock with the name "apb". Reviewed-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-2-quic_jiegan@quicinc.com --- include/linux/coresight.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c7614ccb3232..2e493d5288d8 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -471,8 +471,11 @@ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) int ret; pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) - return NULL; + if (IS_ERR(pclk)) { + pclk = clk_get(dev, "apb"); + if (IS_ERR(pclk)) + return NULL; + } ret = clk_prepare_enable(pclk); if (ret) { From c367a89dec267c65b556ace5d6aafd07fb1d66b1 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:23 +0800 Subject: [PATCH 0558/2157] Coresight: Add trace_id function to retrieving the trace ID Add 'trace_id' function pointer in coresight_ops. It's responsible for retrieving the device's trace ID. Co-developed-by: James Clark Signed-off-by: James Clark Reviewed-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-3-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 30 +++++++++++++++++++ drivers/hwtracing/coresight/coresight-dummy.c | 13 +++++++- .../coresight/coresight-etm3x-core.c | 1 + .../coresight/coresight-etm4x-core.c | 1 + drivers/hwtracing/coresight/coresight-stm.c | 11 +++++++ drivers/hwtracing/coresight/coresight-tpda.c | 11 +++++++ include/linux/coresight.h | 5 ++++ 7 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 48bfb8036924..44ad407817c1 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -24,6 +24,7 @@ #include "coresight-etm-perf.h" #include "coresight-priv.h" #include "coresight-syscfg.h" +#include "coresight-trace-id.h" /* * Mutex used to lock all sysfs enable and disable actions and loading and @@ -1567,6 +1568,35 @@ void coresight_remove_driver(struct amba_driver *amba_drv, } EXPORT_SYMBOL_GPL(coresight_remove_driver); +int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink) +{ + int trace_id; + int cpu = source_ops(csdev)->cpu_id(csdev); + + switch (mode) { + case CS_MODE_SYSFS: + trace_id = coresight_trace_id_get_cpu_id(cpu); + break; + case CS_MODE_PERF: + if (WARN_ON(!sink)) + return -EINVAL; + + trace_id = coresight_trace_id_get_cpu_id_map(cpu, &sink->perf_sink_id_map); + break; + default: + trace_id = -EINVAL; + break; + } + + if (!IS_VALID_CS_TRACE_ID(trace_id)) + dev_err(&csdev->dev, + "Failed to allocate trace ID on CPU%d\n", cpu); + + return trace_id; +} +EXPORT_SYMBOL_GPL(coresight_etm_get_trace_id); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Pratik Patel "); MODULE_AUTHOR("Mathieu Poirier "); diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c index 9be53be8964b..b5692ba358c1 100644 --- a/drivers/hwtracing/coresight/coresight-dummy.c +++ b/drivers/hwtracing/coresight/coresight-dummy.c @@ -41,6 +41,16 @@ static void dummy_source_disable(struct coresight_device *csdev, dev_dbg(csdev->dev.parent, "Dummy source disabled\n"); } +static int dummy_source_trace_id(struct coresight_device *csdev, __maybe_unused enum cs_mode mode, + __maybe_unused struct coresight_device *sink) +{ + struct dummy_drvdata *drvdata; + + drvdata = dev_get_drvdata(csdev->dev.parent); + + return drvdata->traceid; +} + static int dummy_sink_enable(struct coresight_device *csdev, enum cs_mode mode, void *data) { @@ -62,7 +72,8 @@ static const struct coresight_ops_source dummy_source_ops = { }; static const struct coresight_ops dummy_source_cs_ops = { - .source_ops = &dummy_source_ops, + .trace_id = dummy_source_trace_id, + .source_ops = &dummy_source_ops, }; static const struct coresight_ops_sink dummy_sink_ops = { diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c index c103f4c70f5d..c1dda4bc4a2f 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c @@ -704,6 +704,7 @@ static const struct coresight_ops_source etm_source_ops = { }; static const struct coresight_ops etm_cs_ops = { + .trace_id = coresight_etm_get_trace_id, .source_ops = &etm_source_ops, }; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 335fa157f30f..07e2dee54688 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1104,6 +1104,7 @@ static const struct coresight_ops_source etm4_source_ops = { }; static const struct coresight_ops etm4_cs_ops = { + .trace_id = coresight_etm_get_trace_id, .source_ops = &etm4_source_ops, }; diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index b581a30a1cd9..aca25b5e3be2 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -281,12 +281,23 @@ static void stm_disable(struct coresight_device *csdev, } } +static int stm_trace_id(struct coresight_device *csdev, __maybe_unused enum cs_mode mode, + __maybe_unused struct coresight_device *sink) +{ + struct stm_drvdata *drvdata; + + drvdata = dev_get_drvdata(csdev->dev.parent); + + return drvdata->traceid; +} + static const struct coresight_ops_source stm_source_ops = { .enable = stm_enable, .disable = stm_disable, }; static const struct coresight_ops stm_cs_ops = { + .trace_id = stm_trace_id, .source_ops = &stm_source_ops, }; diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c index c88c548e6d49..0633f04beb24 100644 --- a/drivers/hwtracing/coresight/coresight-tpda.c +++ b/drivers/hwtracing/coresight/coresight-tpda.c @@ -242,12 +242,23 @@ static void tpda_disable(struct coresight_device *csdev, dev_dbg(drvdata->dev, "TPDA inport %d disabled\n", in->dest_port); } +static int tpda_trace_id(struct coresight_device *csdev, __maybe_unused enum cs_mode mode, + __maybe_unused struct coresight_device *sink) +{ + struct tpda_drvdata *drvdata; + + drvdata = dev_get_drvdata(csdev->dev.parent); + + return drvdata->atid; +} + static const struct coresight_ops_link tpda_link_ops = { .enable = tpda_enable, .disable = tpda_disable, }; static const struct coresight_ops tpda_cs_ops = { + .trace_id = tpda_trace_id, .link_ops = &tpda_link_ops, }; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 2e493d5288d8..1396bda77f3f 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -335,6 +335,7 @@ enum cs_mode { CS_MODE_PERF, }; +#define coresight_ops(csdev) csdev->ops #define source_ops(csdev) csdev->ops->source_ops #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops @@ -421,6 +422,8 @@ struct coresight_ops_panic { }; struct coresight_ops { + int (*trace_id)(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; @@ -713,4 +716,6 @@ int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); +int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); #endif /* _LINUX_COREISGHT_H */ From 182e8c70791d68d332b33d5c6e849628f89cdf0d Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 3 Mar 2025 11:29:24 +0800 Subject: [PATCH 0559/2157] Coresight: Use coresight_etm_get_trace_id() in traceid_show() Use the new API, coresight_etm_get_trace_id, to read the traceid of the ETM device when call traceid_show via sysfs node. Signed-off-by: James Clark Reviewed-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-4-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-etm3x-sysfs.c | 3 +-- drivers/hwtracing/coresight/coresight-etm4x-sysfs.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c index 68c644be9813..b9006451f515 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c @@ -1190,10 +1190,9 @@ static DEVICE_ATTR_RO(cpu); static ssize_t traceid_show(struct device *dev, struct device_attribute *attr, char *buf) { - int trace_id; struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + int trace_id = coresight_etm_get_trace_id(drvdata->csdev, CS_MODE_SYSFS, NULL); - trace_id = etm_read_alloc_trace_id(drvdata); if (trace_id < 0) return trace_id; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index c767f8ae4cf1..e5216c0f60da 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -4,6 +4,7 @@ * Author: Mathieu Poirier */ +#include #include #include #include @@ -2402,10 +2403,9 @@ static ssize_t trctraceid_show(struct device *dev, struct device_attribute *attr, char *buf) { - int trace_id; struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + int trace_id = coresight_etm_get_trace_id(drvdata->csdev, CS_MODE_SYSFS, NULL); - trace_id = etm4_read_alloc_trace_id(drvdata); if (trace_id < 0) return trace_id; From fcd104f0ed9f620fc063d2edc5be08e239c43874 Mon Sep 17 00:00:00 2001 From: Folker Schwesinger Date: Thu, 27 Feb 2025 20:27:53 +0100 Subject: [PATCH 0560/2157] iio: buffer-dma: Fix docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a typo in the docstring of iio_dma_buffer_read() and fix what looks like a copy-and-paste error in the iio_dma_buffer_write() docstring. Signed-off-by: Folker Schwesinger Reviewed-by: Nuno Sá Link: https://patch.msgid.link/D83IPSTKYWNB.1PUBV1530XI86@folker-schwesinger.de Signed-off-by: Jonathan Cameron --- drivers/iio/buffer/industrialio-buffer-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c index 7ea784304ffb..ee294a775e8a 100644 --- a/drivers/iio/buffer/industrialio-buffer-dma.c +++ b/drivers/iio/buffer/industrialio-buffer-dma.c @@ -624,7 +624,7 @@ static int iio_dma_buffer_io(struct iio_buffer *buffer, size_t n, /** * iio_dma_buffer_read() - DMA buffer read callback - * @buffer: Buffer to read form + * @buffer: Buffer to read from * @n: Number of bytes to read * @user_buffer: Userspace buffer to copy the data to * @@ -640,7 +640,7 @@ EXPORT_SYMBOL_NS_GPL(iio_dma_buffer_read, "IIO_DMA_BUFFER"); /** * iio_dma_buffer_write() - DMA buffer write callback - * @buffer: Buffer to read form + * @buffer: Buffer to write to * @n: Number of bytes to read * @user_buffer: Userspace buffer to copy the data from * From 5017dcb8fcace47292b081cb729d392f908c0ef8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Feb 2025 09:02:28 +0000 Subject: [PATCH 0561/2157] iio: light: Fix spelling mistake "regist" -> "register" There are spelling mistakes in dev_err messages. Fix them. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250228090228.679535-1-colin.i.king@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm32181.c | 2 +- drivers/iio/light/cm36651.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c index aeae0566ec12..bb90f738312a 100644 --- a/drivers/iio/light/cm32181.c +++ b/drivers/iio/light/cm32181.c @@ -492,7 +492,7 @@ static int cm32181_probe(struct i2c_client *client) ret = devm_iio_device_register(dev, indio_dev); if (ret) { - dev_err(dev, "%s: regist device failed\n", __func__); + dev_err(dev, "%s: register device failed\n", __func__); return ret; } diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c index ae3fc3299eec..446dd54d5037 100644 --- a/drivers/iio/light/cm36651.c +++ b/drivers/iio/light/cm36651.c @@ -683,7 +683,7 @@ static int cm36651_probe(struct i2c_client *client) ret = iio_device_register(indio_dev); if (ret) { - dev_err(&client->dev, "%s: regist device failed\n", __func__); + dev_err(&client->dev, "%s: register device failed\n", __func__); goto error_free_irq; } From 15a007e7ae5b0680bc236b478c5c680512ec45bd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2025 12:35:40 +0300 Subject: [PATCH 0562/2157] iio: adc: ad4030: fix error pointer dereference in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intention here was obviously to return an error if devm_regmap_init() fails, but the return statement was accidentally left out. This leads to an error pointer dereference when we call: ret = ad4030_detect_chip_info(st); Add the return statement. Fixes: ec25cf6f1ee3 ("iio: adc: ad4030: add support for ad4632-16 and ad4632-24") Signed-off-by: Dan Carpenter Reviewed-by: Nuno Sá Link: https://patch.msgid.link/cc67cee7-9c65-46d2-aae3-f860fc3cc461@stanley.mountain Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4030.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c index 209cfc2e1785..9a020680885d 100644 --- a/drivers/iio/adc/ad4030.c +++ b/drivers/iio/adc/ad4030.c @@ -1011,8 +1011,8 @@ static int ad4030_probe(struct spi_device *spi) st->regmap = devm_regmap_init(dev, &ad4030_regmap_bus, st, &ad4030_regmap_config); if (IS_ERR(st->regmap)) - dev_err_probe(dev, PTR_ERR(st->regmap), - "Failed to initialize regmap\n"); + return dev_err_probe(dev, PTR_ERR(st->regmap), + "Failed to initialize regmap\n"); st->chip = spi_get_device_match_data(spi); if (!st->chip) From 94e4fc4004e492754489b4753a032840e8447351 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 27 Feb 2025 19:03:42 +0800 Subject: [PATCH 0563/2157] dt-bindings: iio: adc: Add rockchip,rk3562-saradc string Add rockchip,rk3562-saradc compatible string. The saradc on rk3562 is v2 controller, with 10bit width which is different with rk3588. Signed-off-by: Kever Yang Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250227110343.2342017-1-kever.yang@rock-chips.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml index 86eae9672c04..41e0c56ef8e3 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml @@ -16,6 +16,7 @@ properties: - const: rockchip,rk3066-tsadc - const: rockchip,rk3399-saradc - const: rockchip,rk3528-saradc + - const: rockchip,rk3562-saradc - const: rockchip,rk3588-saradc - items: - const: rockchip,rk3576-saradc From 1e30116da036b8c5969c6461b344fb1ee71edcf0 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Thu, 27 Feb 2025 19:03:43 +0800 Subject: [PATCH 0564/2157] iio: adc: rockchip_saradc: add rk3562 rk3562 is using v2 saradc with 8 channels. Signed-off-by: Simon Xue Signed-off-by: Kever Yang Link: https://patch.msgid.link/20250227110343.2342017-2-kever.yang@rock-chips.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rockchip_saradc.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 2619547d5d6f..9a099df79518 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Rockchip Successive Approximation Register (SAR) A/D Converter - * Copyright (C) 2014 ROCKCHIP, Inc. + * Copyright (C) 2014 Rockchip Electronics Co., Ltd. */ #include @@ -290,6 +290,25 @@ static const struct rockchip_saradc_data rk3528_saradc_data = { .read = rockchip_saradc_read_v2, }; +static const struct iio_chan_spec rockchip_rk3562_saradc_iio_channels[] = { + SARADC_CHANNEL(0, "adc0", 10), + SARADC_CHANNEL(1, "adc1", 10), + SARADC_CHANNEL(2, "adc2", 10), + SARADC_CHANNEL(3, "adc3", 10), + SARADC_CHANNEL(4, "adc4", 10), + SARADC_CHANNEL(5, "adc5", 10), + SARADC_CHANNEL(6, "adc6", 10), + SARADC_CHANNEL(7, "adc7", 10), +}; + +static const struct rockchip_saradc_data rk3562_saradc_data = { + .channels = rockchip_rk3562_saradc_iio_channels, + .num_channels = ARRAY_SIZE(rockchip_rk3562_saradc_iio_channels), + .clk_rate = 1000000, + .start = rockchip_saradc_start_v2, + .read = rockchip_saradc_read_v2, +}; + static const struct iio_chan_spec rockchip_rk3568_saradc_iio_channels[] = { SARADC_CHANNEL(0, "adc0", 10), SARADC_CHANNEL(1, "adc1", 10), @@ -342,6 +361,9 @@ static const struct of_device_id rockchip_saradc_match[] = { }, { .compatible = "rockchip,rk3528-saradc", .data = &rk3528_saradc_data, + }, { + .compatible = "rockchip,rk3562-saradc", + .data = &rk3562_saradc_data, }, { .compatible = "rockchip,rk3568-saradc", .data = &rk3568_saradc_data, From f66d625c4d48e58ad38cc1953092f8b632b99671 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 26 Feb 2025 15:50:03 +0100 Subject: [PATCH 0565/2157] iio: adc: ad7380: add adaq4381-4 support adaq4381-4 is the 14 bits version of adaq4380-1 chip. Add support for it. Signed-off-by: Julien Stephan Reviewed-by: David Lechner Link: https://patch.msgid.link/20250226-ad7380-add-adaq4381-4-support-v1-1-f350ab872d37@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index f232ad1a4963..407930f1f5dd 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -15,6 +15,7 @@ * ad7386/7/8-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/ad7386-4-7387-4-7388-4.pdf * adaq4370-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4370-4.pdf * adaq4380-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4380-4.pdf + * adaq4381-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4381-4.pdf */ #include @@ -287,6 +288,7 @@ DEFINE_AD7380_2_CHANNEL(ad7381_channels, 14, 1, s); DEFINE_AD7380_4_CHANNEL(ad7380_4_channels, 16, 1, s); DEFINE_AD7380_4_CHANNEL(ad7381_4_channels, 14, 1, s); DEFINE_ADAQ4380_4_CHANNEL(adaq4380_4_channels, 16, 1, s); +DEFINE_ADAQ4380_4_CHANNEL(adaq4381_4_channels, 14, 1, s); /* pseudo differential */ DEFINE_AD7380_2_CHANNEL(ad7383_channels, 16, 0, s); DEFINE_AD7380_2_CHANNEL(ad7384_channels, 14, 0, s); @@ -599,6 +601,19 @@ static const struct ad7380_chip_info adaq4380_4_chip_info = { .timing_specs = &ad7380_4_timing, }; +static const struct ad7380_chip_info adaq4381_4_chip_info = { + .name = "adaq4381-4", + .channels = adaq4381_4_channels, + .num_channels = ARRAY_SIZE(adaq4381_4_channels), + .num_simult_channels = 4, + .supplies = adaq4380_supplies, + .num_supplies = ARRAY_SIZE(adaq4380_supplies), + .adaq_internal_ref_only = true, + .has_hardware_gain = true, + .available_scan_masks = ad7380_4_channel_scan_masks, + .timing_specs = &ad7380_4_timing, +}; + struct ad7380_state { const struct ad7380_chip_info *chip_info; struct spi_device *spi; @@ -1582,6 +1597,7 @@ static const struct of_device_id ad7380_of_match_table[] = { { .compatible = "adi,ad7388-4", .data = &ad7388_4_chip_info }, { .compatible = "adi,adaq4370-4", .data = &adaq4370_4_chip_info }, { .compatible = "adi,adaq4380-4", .data = &adaq4380_4_chip_info }, + { .compatible = "adi,adaq4381-4", .data = &adaq4381_4_chip_info }, { } }; @@ -1602,6 +1618,7 @@ static const struct spi_device_id ad7380_id_table[] = { { "ad7388-4", (kernel_ulong_t)&ad7388_4_chip_info }, { "adaq4370-4", (kernel_ulong_t)&adaq4370_4_chip_info }, { "adaq4380-4", (kernel_ulong_t)&adaq4380_4_chip_info }, + { "adaq4381-4", (kernel_ulong_t)&adaq4381_4_chip_info }, { } }; MODULE_DEVICE_TABLE(spi, ad7380_id_table); From 7131fcdba97ffe0aa33062414fc235c82502e6d8 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 26 Feb 2025 15:50:04 +0100 Subject: [PATCH 0566/2157] dt-bindings: iio: adc: ad7380: add adaq4381-4 compatible parts adaq4381-4 is the 14 bits version of adaq4380-1 chip. Add support for it. Signed-off-by: Julien Stephan Acked-by: Rob Herring (Arm) Reviewed-by: David Lechner Link: https://patch.msgid.link/20250226-ad7380-add-adaq4381-4-support-v1-2-f350ab872d37@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml index ada08005b3cd..ff4f5c21c548 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml @@ -27,6 +27,7 @@ description: | * https://www.analog.com/en/products/ad7388-4.html * https://www.analog.com/en/products/adaq4370-4.html * https://www.analog.com/en/products/adaq4380-4.html + * https://www.analog.com/en/products/adaq4381-4.html $ref: /schemas/spi/spi-peripheral-props.yaml# @@ -50,6 +51,7 @@ properties: - adi,ad7388-4 - adi,adaq4370-4 - adi,adaq4380-4 + - adi,adaq4381-4 reg: maxItems: 1 @@ -201,6 +203,7 @@ allOf: - adi,ad7380-4 - adi,adaq4370-4 - adi,adaq4380-4 + - adi,adaq4381-4 then: properties: refio-supply: false @@ -218,6 +221,7 @@ allOf: enum: - adi,adaq4370-4 - adi,adaq4380-4 + - adi,adaq4381-4 then: required: - vs-p-supply From aac287ec80d71a7ab7e44c936a434625417c3e30 Mon Sep 17 00:00:00 2001 From: Julien Stephan Date: Wed, 26 Feb 2025 15:50:05 +0100 Subject: [PATCH 0567/2157] docs: iio: ad7380: add adaq4381-4 adaq4381-4 is the 14 bits version of adaq4380-1 chip. Add it to the documentation. Signed-off-by: Julien Stephan Reviewed-by: David Lechner Link: https://patch.msgid.link/20250226-ad7380-add-adaq4381-4-support-v1-3-f350ab872d37@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad7380.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/iio/ad7380.rst b/Documentation/iio/ad7380.rst index cff688bcc2d9..35232a0e3ad7 100644 --- a/Documentation/iio/ad7380.rst +++ b/Documentation/iio/ad7380.rst @@ -29,6 +29,7 @@ The following chips are supported by this driver: * `AD7388-4 `_ * `ADAQ4370-4 `_ * `ADAQ4380-4 `_ +* `ADAQ4381-4 `_ Supported features @@ -52,8 +53,8 @@ declared in the device tree as ``refin-supply``. ADAQ devices ~~~~~~~~~~~~ -adaq4370-4 and adaq4380-4 don't have an external reference, but use a 3.3V -internal reference derived from one of its supplies (``refin-supply``) +ADAQ devices don't have an external reference, but use a 3.3V internal reference +derived from one of its supplies (``refin-supply``) All other devices from ad738x family ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 3c03c49b2fa59c4d456e2d673fe5847fa6cbcdf2 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:25 +0800 Subject: [PATCH 0568/2157] Coresight: Introduce a new struct coresight_path Introduce a new strcuture, 'struct coresight_path', to store the data that utilized by the devices in the path. The coresight_path will be built/released by coresight_build_path/coresight_release_path functions. Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-5-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 16 +++++----- .../hwtracing/coresight/coresight-etm-perf.c | 30 ++++++++++--------- .../hwtracing/coresight/coresight-etm-perf.h | 2 +- drivers/hwtracing/coresight/coresight-priv.h | 6 ++-- drivers/hwtracing/coresight/coresight-sysfs.c | 12 ++++---- include/linux/coresight.h | 10 +++++++ 6 files changed, 44 insertions(+), 32 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 44ad407817c1..b34b5e4776c4 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -670,7 +670,7 @@ static void coresight_drop_device(struct coresight_device *csdev) static int _coresight_build_path(struct coresight_device *csdev, struct coresight_device *source, struct coresight_device *sink, - struct list_head *path) + struct coresight_path *path) { int i, ret; bool found = false; @@ -723,25 +723,25 @@ static int _coresight_build_path(struct coresight_device *csdev, return -ENOMEM; node->csdev = csdev; - list_add(&node->link, path); + list_add(&node->link, &path->path_list); return 0; } -struct list_head *coresight_build_path(struct coresight_device *source, +struct coresight_path *coresight_build_path(struct coresight_device *source, struct coresight_device *sink) { - struct list_head *path; + struct coresight_path *path; int rc; if (!sink) return ERR_PTR(-EINVAL); - path = kzalloc(sizeof(struct list_head), GFP_KERNEL); + path = kzalloc(sizeof(struct coresight_path), GFP_KERNEL); if (!path) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(path); + INIT_LIST_HEAD(&path->path_list); rc = _coresight_build_path(source, source, sink, path); if (rc) { @@ -759,12 +759,12 @@ struct list_head *coresight_build_path(struct coresight_device *source, * Go through all the elements of a path and 1) removed it from the list and * 2) free the memory allocated for each node. */ -void coresight_release_path(struct list_head *path) +void coresight_release_path(struct coresight_path *path) { struct coresight_device *csdev; struct coresight_node *nd, *next; - list_for_each_entry_safe(nd, next, path, link) { + list_for_each_entry_safe(nd, next, &path->path_list, link) { csdev = nd->csdev; coresight_drop_device(csdev); diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index ad6a8f4b70b6..b0426792f08a 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -136,13 +136,13 @@ static const struct attribute_group *etm_pmu_attr_groups[] = { NULL, }; -static inline struct list_head ** +static inline struct coresight_path ** etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) { return per_cpu_ptr(data->path, cpu); } -static inline struct list_head * +static inline struct coresight_path * etm_event_cpu_path(struct etm_event_data *data, int cpu) { return *etm_event_cpu_path_ptr(data, cpu); @@ -197,6 +197,7 @@ static void free_sink_buffer(struct etm_event_data *event_data) int cpu; cpumask_t *mask = &event_data->mask; struct coresight_device *sink; + struct coresight_path *path; if (!event_data->snk_config) return; @@ -205,7 +206,8 @@ static void free_sink_buffer(struct etm_event_data *event_data) return; cpu = cpumask_first(mask); - sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); + path = etm_event_cpu_path(event_data, cpu); + sink = coresight_get_sink(&path->path_list); sink_ops(sink)->free_buffer(event_data->snk_config); } @@ -226,11 +228,11 @@ static void free_event_data(struct work_struct *work) cscfg_deactivate_config(event_data->cfg_hash); for_each_cpu(cpu, mask) { - struct list_head **ppath; + struct coresight_path **ppath; ppath = etm_event_cpu_path_ptr(event_data, cpu); if (!(IS_ERR_OR_NULL(*ppath))) { - struct coresight_device *sink = coresight_get_sink(*ppath); + struct coresight_device *sink = coresight_get_sink(&((*ppath)->path_list)); /* * Mark perf event as done for trace id allocator, but don't call @@ -276,7 +278,7 @@ static void *alloc_event_data(int cpu) * unused memory when dealing with single CPU trace scenarios is small * compared to the cost of searching through an optimized array. */ - event_data->path = alloc_percpu(struct list_head *); + event_data->path = alloc_percpu(struct coresight_path *); if (!event_data->path) { kfree(event_data); @@ -352,7 +354,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, * CPUs, we can handle it and fail the session. */ for_each_cpu(cpu, mask) { - struct list_head *path; + struct coresight_path *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); @@ -458,7 +460,7 @@ static void etm_event_start(struct perf_event *event, int flags) struct etm_ctxt *ctxt = this_cpu_ptr(&etm_ctxt); struct perf_output_handle *handle = &ctxt->handle; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); - struct list_head *path; + struct coresight_path *path; u64 hw_id; u8 trace_id; @@ -494,12 +496,12 @@ static void etm_event_start(struct perf_event *event, int flags) path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ - sink = coresight_get_sink(path); + sink = coresight_get_sink(&path->path_list); if (WARN_ON_ONCE(!sink)) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(path, CS_MODE_PERF, handle)) + if (coresight_enable_path(&path->path_list, CS_MODE_PERF, handle)) goto fail_end_stop; /* Finally enable the tracer */ @@ -534,7 +536,7 @@ static void etm_event_start(struct perf_event *event, int flags) return; fail_disable_path: - coresight_disable_path(path); + coresight_disable_path(&path->path_list); fail_end_stop: /* * Check if the handle is still associated with the event, @@ -558,7 +560,7 @@ static void etm_event_stop(struct perf_event *event, int mode) struct etm_ctxt *ctxt = this_cpu_ptr(&etm_ctxt); struct perf_output_handle *handle = &ctxt->handle; struct etm_event_data *event_data; - struct list_head *path; + struct coresight_path *path; /* * If we still have access to the event_data via handle, @@ -599,7 +601,7 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!path) return; - sink = coresight_get_sink(path); + sink = coresight_get_sink(&path->path_list); if (!sink) return; @@ -643,7 +645,7 @@ static void etm_event_stop(struct perf_event *event, int mode) } /* Disabling the path make its elements available to other sessions */ - coresight_disable_path(path); + coresight_disable_path(&path->path_list); } static int etm_event_add(struct perf_event *event, int mode) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index 744531158d6b..5febbcdb8696 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -59,7 +59,7 @@ struct etm_event_data { cpumask_t aux_hwid_done; void *snk_config; u32 cfg_hash; - struct list_head * __percpu *path; + struct coresight_path * __percpu *path; }; int etm_perf_symlink(struct coresight_device *csdev, bool link); diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 76403530f33e..27b7dc348d4a 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -139,9 +139,9 @@ struct coresight_device *coresight_get_sink(struct list_head *path); struct coresight_device *coresight_get_sink_by_id(u32 id); struct coresight_device * coresight_find_default_sink(struct coresight_device *csdev); -struct list_head *coresight_build_path(struct coresight_device *csdev, - struct coresight_device *sink); -void coresight_release_path(struct list_head *path); +struct coresight_path *coresight_build_path(struct coresight_device *csdev, + struct coresight_device *sink); +void coresight_release_path(struct coresight_path *path); int coresight_add_sysfs_link(struct coresight_sysfs_link *info); void coresight_remove_sysfs_link(struct coresight_sysfs_link *info); int coresight_create_conns_sysfs_group(struct coresight_device *csdev); diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c index a01c9e54e2ed..cb4c39732d26 100644 --- a/drivers/hwtracing/coresight/coresight-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-sysfs.c @@ -22,7 +22,7 @@ static DEFINE_IDR(path_idr); * When operating Coresight drivers from the sysFS interface, only a single * path can exist from a tracer (associated to a CPU) to a sink. */ -static DEFINE_PER_CPU(struct list_head *, tracer_path); +static DEFINE_PER_CPU(struct coresight_path *, tracer_path); ssize_t coresight_simple_show_pair(struct device *_dev, struct device_attribute *attr, char *buf) @@ -167,7 +167,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) { int cpu, ret = 0; struct coresight_device *sink; - struct list_head *path; + struct coresight_path *path; enum coresight_dev_subtype_source subtype; u32 hash; @@ -209,7 +209,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) goto out; } - ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL); + ret = coresight_enable_path(&path->path_list, CS_MODE_SYSFS, NULL); if (ret) goto err_path; @@ -251,7 +251,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) return ret; err_source: - coresight_disable_path(path); + coresight_disable_path(&path->path_list); err_path: coresight_release_path(path); @@ -262,7 +262,7 @@ EXPORT_SYMBOL_GPL(coresight_enable_sysfs); void coresight_disable_sysfs(struct coresight_device *csdev) { int cpu, ret; - struct list_head *path = NULL; + struct coresight_path *path = NULL; u32 hash; mutex_lock(&coresight_mutex); @@ -297,7 +297,7 @@ void coresight_disable_sysfs(struct coresight_device *csdev) break; } - coresight_disable_path(path); + coresight_disable_path(&path->path_list); coresight_release_path(path); out: diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 1396bda77f3f..509e1f256412 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -329,6 +329,16 @@ static struct coresight_dev_list (var) = { \ #define to_coresight_device(d) container_of(d, struct coresight_device, dev) +/** + * struct coresight_path - data needed by enable/disable path + * @path_list: path from source to sink. + * @trace_id: trace_id of the whole path. + */ +struct coresight_path { + struct list_head path_list; + u8 trace_id; +}; + enum cs_mode { CS_MODE_DISABLED, CS_MODE_SYSFS, From d87d76d823d111b09c842855f1d5171a45095d82 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:26 +0800 Subject: [PATCH 0569/2157] Coresight: Allocate trace ID after building the path The trace_id will be stored in coresight_path instead of being declared everywhere and allocated after building the path. Co-developed-by: James Clark Signed-off-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-6-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 44 +++++++++++++++++++ .../hwtracing/coresight/coresight-etm-perf.c | 5 +-- drivers/hwtracing/coresight/coresight-priv.h | 2 + drivers/hwtracing/coresight/coresight-sysfs.c | 4 ++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index b34b5e4776c4..58f5106e0f4b 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -655,6 +655,50 @@ static void coresight_drop_device(struct coresight_device *csdev) } } +/* + * coresight device will read their existing or alloc a trace ID, if their trace_id + * callback is set. + * + * Return 0 if the trace_id callback is not set. + * Return the result of the trace_id callback if it is set. The return value + * will be the trace_id if successful, and an error number if it fails. + */ +static int coresight_get_trace_id(struct coresight_device *csdev, + enum cs_mode mode, + struct coresight_device *sink) +{ + if (coresight_ops(csdev)->trace_id) + return coresight_ops(csdev)->trace_id(csdev, mode, sink); + + return 0; +} + +/* + * Call this after creating the path and before enabling it. This leaves + * the trace ID set on the path, or it remains 0 if it couldn't be assigned. + */ +void coresight_path_assign_trace_id(struct coresight_path *path, + enum cs_mode mode) +{ + struct coresight_device *sink = coresight_get_sink(&path->path_list); + struct coresight_node *nd; + int trace_id; + + list_for_each_entry(nd, &path->path_list, link) { + /* Assign a trace ID to the path for the first device that wants to do it */ + trace_id = coresight_get_trace_id(nd->csdev, mode, sink); + + /* + * 0 in this context is that it didn't want to assign so keep searching. + * Non 0 is either success or fail. + */ + if (trace_id != 0) { + path->trace_id = trace_id; + return; + } + } +} + /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index b0426792f08a..134290ab622e 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -319,7 +319,6 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, { u32 id, cfg_hash; int cpu = event->cpu; - int trace_id; cpumask_t *mask; struct coresight_device *sink = NULL; struct coresight_device *user_sink = NULL, *last_sink = NULL; @@ -409,8 +408,8 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, } /* ensure we can allocate a trace ID for this CPU */ - trace_id = coresight_trace_id_get_cpu_id_map(cpu, &sink->perf_sink_id_map); - if (!IS_VALID_CS_TRACE_ID(trace_id)) { + coresight_path_assign_trace_id(path, CS_MODE_PERF); + if (!IS_VALID_CS_TRACE_ID(path->trace_id)) { cpumask_clear_cpu(cpu, mask); coresight_release_path(path); continue; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 27b7dc348d4a..2bea35bae0d4 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -152,6 +152,8 @@ int coresight_make_links(struct coresight_device *orig, void coresight_remove_links(struct coresight_device *orig, struct coresight_connection *conn); u32 coresight_get_sink_id(struct coresight_device *csdev); +void coresight_path_assign_trace_id(struct coresight_path *path, + enum cs_mode mode); #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X) extern int etm_readl_cp14(u32 off, unsigned int *val); diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c index cb4c39732d26..d03751bf3d8a 100644 --- a/drivers/hwtracing/coresight/coresight-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-sysfs.c @@ -209,6 +209,10 @@ int coresight_enable_sysfs(struct coresight_device *csdev) goto out; } + coresight_path_assign_trace_id(path, CS_MODE_SYSFS); + if (!IS_VALID_CS_TRACE_ID(path->trace_id)) + goto err_path; + ret = coresight_enable_path(&path->path_list, CS_MODE_SYSFS, NULL); if (ret) goto err_path; From 7b365f056d8e02fc70c823bdf736e41a7236a54b Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:27 +0800 Subject: [PATCH 0570/2157] Coresight: Change to read the trace ID from coresight_path The source device can directly read the trace ID from the coresight_path which result in etm_read_alloc_trace_id and etm4_read_alloc_trace_id being deleted. Co-developed-by: James Clark Signed-off-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-7-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-dummy.c | 2 +- .../hwtracing/coresight/coresight-etm-perf.c | 8 +-- drivers/hwtracing/coresight/coresight-etm.h | 1 - .../coresight/coresight-etm3x-core.c | 54 +++---------------- .../coresight/coresight-etm4x-core.c | 54 +++---------------- drivers/hwtracing/coresight/coresight-etm4x.h | 1 - drivers/hwtracing/coresight/coresight-stm.c | 2 +- drivers/hwtracing/coresight/coresight-sysfs.c | 7 +-- drivers/hwtracing/coresight/coresight-tpdm.c | 2 +- include/linux/coresight.h | 2 +- 10 files changed, 25 insertions(+), 108 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c index b5692ba358c1..aaa92b5081e3 100644 --- a/drivers/hwtracing/coresight/coresight-dummy.c +++ b/drivers/hwtracing/coresight/coresight-dummy.c @@ -24,7 +24,7 @@ DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink"); static int dummy_source_enable(struct coresight_device *csdev, struct perf_event *event, enum cs_mode mode, - __maybe_unused struct coresight_trace_id_map *id_map) + __maybe_unused struct coresight_path *path) { if (!coresight_take_mode(csdev, mode)) return -EBUSY; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 134290ab622e..300305d67a1d 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -461,7 +461,6 @@ static void etm_event_start(struct perf_event *event, int flags) struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct coresight_path *path; u64 hw_id; - u8 trace_id; if (!csdev) goto fail; @@ -504,8 +503,7 @@ static void etm_event_start(struct perf_event *event, int flags) goto fail_end_stop; /* Finally enable the tracer */ - if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF, - &sink->perf_sink_id_map)) + if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF, path)) goto fail_disable_path; /* @@ -515,13 +513,11 @@ static void etm_event_start(struct perf_event *event, int flags) if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) { cpumask_set_cpu(cpu, &event_data->aux_hwid_done); - trace_id = coresight_trace_id_read_cpu_id_map(cpu, &sink->perf_sink_id_map); - hw_id = FIELD_PREP(CS_AUX_HW_ID_MAJOR_VERSION_MASK, CS_AUX_HW_ID_MAJOR_VERSION); hw_id |= FIELD_PREP(CS_AUX_HW_ID_MINOR_VERSION_MASK, CS_AUX_HW_ID_MINOR_VERSION); - hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, trace_id); + hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, path->trace_id); hw_id |= FIELD_PREP(CS_AUX_HW_ID_SINK_ID_MASK, coresight_get_sink_id(sink)); perf_report_aux_output_id(event, hw_id); diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h index e02c3ea972c9..171f1384f7c0 100644 --- a/drivers/hwtracing/coresight/coresight-etm.h +++ b/drivers/hwtracing/coresight/coresight-etm.h @@ -284,6 +284,5 @@ extern const struct attribute_group *coresight_etm_groups[]; void etm_set_default(struct etm_config *config); void etm_config_trace_mode(struct etm_config *config); struct etm_config *get_etm_config(struct etm_drvdata *drvdata); -int etm_read_alloc_trace_id(struct etm_drvdata *drvdata); void etm_release_trace_id(struct etm_drvdata *drvdata); #endif diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c index c1dda4bc4a2f..8927bfaf3af2 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c @@ -455,26 +455,6 @@ static int etm_cpu_id(struct coresight_device *csdev) return drvdata->cpu; } -int etm_read_alloc_trace_id(struct etm_drvdata *drvdata) -{ - int trace_id; - - /* - * This will allocate a trace ID to the cpu, - * or return the one currently allocated. - * - * trace id function has its own lock - */ - trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu); - if (IS_VALID_CS_TRACE_ID(trace_id)) - drvdata->traceid = (u8)trace_id; - else - dev_err(&drvdata->csdev->dev, - "Failed to allocate trace ID for %s on CPU%d\n", - dev_name(&drvdata->csdev->dev), drvdata->cpu); - return trace_id; -} - void etm_release_trace_id(struct etm_drvdata *drvdata) { coresight_trace_id_put_cpu_id(drvdata->cpu); @@ -482,38 +462,22 @@ void etm_release_trace_id(struct etm_drvdata *drvdata) static int etm_enable_perf(struct coresight_device *csdev, struct perf_event *event, - struct coresight_trace_id_map *id_map) + struct coresight_path *path) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - int trace_id; if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) return -EINVAL; /* Configure the tracer based on the session's specifics */ etm_parse_event_config(drvdata, event); - - /* - * perf allocates cpu ids as part of _setup_aux() - device needs to use - * the allocated ID. This reads the current version without allocation. - * - * This does not use the trace id lock to prevent lock_dep issues - * with perf locks - we know the ID cannot change until perf shuts down - * the session - */ - trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map); - if (!IS_VALID_CS_TRACE_ID(trace_id)) { - dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n", - dev_name(&drvdata->csdev->dev), drvdata->cpu); - return -EINVAL; - } - drvdata->traceid = (u8)trace_id; + drvdata->traceid = path->trace_id; /* And enable it */ return etm_enable_hw(drvdata); } -static int etm_enable_sysfs(struct coresight_device *csdev) +static int etm_enable_sysfs(struct coresight_device *csdev, struct coresight_path *path) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct etm_enable_arg arg = { }; @@ -521,10 +485,7 @@ static int etm_enable_sysfs(struct coresight_device *csdev) spin_lock(&drvdata->spinlock); - /* sysfs needs to allocate and set a trace ID */ - ret = etm_read_alloc_trace_id(drvdata); - if (ret < 0) - goto unlock_enable_sysfs; + drvdata->traceid = path->trace_id; /* * Configure the ETM only if the CPU is online. If it isn't online @@ -545,7 +506,6 @@ static int etm_enable_sysfs(struct coresight_device *csdev) if (ret) etm_release_trace_id(drvdata); -unlock_enable_sysfs: spin_unlock(&drvdata->spinlock); if (!ret) @@ -554,7 +514,7 @@ static int etm_enable_sysfs(struct coresight_device *csdev) } static int etm_enable(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode, struct coresight_trace_id_map *id_map) + enum cs_mode mode, struct coresight_path *path) { int ret; struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -566,10 +526,10 @@ static int etm_enable(struct coresight_device *csdev, struct perf_event *event, switch (mode) { case CS_MODE_SYSFS: - ret = etm_enable_sysfs(csdev); + ret = etm_enable_sysfs(csdev, path); break; case CS_MODE_PERF: - ret = etm_enable_perf(csdev, event, id_map); + ret = etm_enable_perf(csdev, event, path); break; default: ret = -EINVAL; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 07e2dee54688..bb1e80df2914 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -232,25 +232,6 @@ static int etm4_cpu_id(struct coresight_device *csdev) return drvdata->cpu; } -int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata) -{ - int trace_id; - - /* - * This will allocate a trace ID to the cpu, - * or return the one currently allocated. - * The trace id function has its own lock - */ - trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu); - if (IS_VALID_CS_TRACE_ID(trace_id)) - drvdata->trcid = (u8)trace_id; - else - dev_err(&drvdata->csdev->dev, - "Failed to allocate trace ID for %s on CPU%d\n", - dev_name(&drvdata->csdev->dev), drvdata->cpu); - return trace_id; -} - void etm4_release_trace_id(struct etmv4_drvdata *drvdata) { coresight_trace_id_put_cpu_id(drvdata->cpu); @@ -810,9 +791,9 @@ static int etm4_parse_event_config(struct coresight_device *csdev, static int etm4_enable_perf(struct coresight_device *csdev, struct perf_event *event, - struct coresight_trace_id_map *id_map) + struct coresight_path *path) { - int ret = 0, trace_id; + int ret = 0; struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) { @@ -825,22 +806,7 @@ static int etm4_enable_perf(struct coresight_device *csdev, if (ret) goto out; - /* - * perf allocates cpu ids as part of _setup_aux() - device needs to use - * the allocated ID. This reads the current version without allocation. - * - * This does not use the trace id lock to prevent lock_dep issues - * with perf locks - we know the ID cannot change until perf shuts down - * the session - */ - trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map); - if (!IS_VALID_CS_TRACE_ID(trace_id)) { - dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n", - dev_name(&drvdata->csdev->dev), drvdata->cpu); - ret = -EINVAL; - goto out; - } - drvdata->trcid = (u8)trace_id; + drvdata->trcid = path->trace_id; /* And enable it */ ret = etm4_enable_hw(drvdata); @@ -849,7 +815,7 @@ static int etm4_enable_perf(struct coresight_device *csdev, return ret; } -static int etm4_enable_sysfs(struct coresight_device *csdev) +static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_path *path) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct etm4_enable_arg arg = { }; @@ -866,10 +832,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) spin_lock(&drvdata->spinlock); - /* sysfs needs to read and allocate a trace ID */ - ret = etm4_read_alloc_trace_id(drvdata); - if (ret < 0) - goto unlock_sysfs_enable; + drvdata->trcid = path->trace_id; /* * Executing etm4_enable_hw on the cpu whose ETM is being enabled @@ -886,7 +849,6 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) if (ret) etm4_release_trace_id(drvdata); -unlock_sysfs_enable: spin_unlock(&drvdata->spinlock); if (!ret) @@ -895,7 +857,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) } static int etm4_enable(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode, struct coresight_trace_id_map *id_map) + enum cs_mode mode, struct coresight_path *path) { int ret; @@ -906,10 +868,10 @@ static int etm4_enable(struct coresight_device *csdev, struct perf_event *event, switch (mode) { case CS_MODE_SYSFS: - ret = etm4_enable_sysfs(csdev); + ret = etm4_enable_sysfs(csdev, path); break; case CS_MODE_PERF: - ret = etm4_enable_perf(csdev, event, id_map); + ret = etm4_enable_perf(csdev, event, path); break; default: ret = -EINVAL; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 1119762b5cec..2b92de17b5a2 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -1066,6 +1066,5 @@ static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata) return drvdata->arch >= ETM_ARCH_ETE; } -int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata); void etm4_release_trace_id(struct etmv4_drvdata *drvdata); #endif diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index aca25b5e3be2..26f9339f38b9 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -195,7 +195,7 @@ static void stm_enable_hw(struct stm_drvdata *drvdata) static int stm_enable(struct coresight_device *csdev, struct perf_event *event, enum cs_mode mode, - __maybe_unused struct coresight_trace_id_map *trace_id) + __maybe_unused struct coresight_path *path) { struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c index d03751bf3d8a..3ac5b52413a6 100644 --- a/drivers/hwtracing/coresight/coresight-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-sysfs.c @@ -53,7 +53,8 @@ ssize_t coresight_simple_show32(struct device *_dev, EXPORT_SYMBOL_GPL(coresight_simple_show32); static int coresight_enable_source_sysfs(struct coresight_device *csdev, - enum cs_mode mode, void *data) + enum cs_mode mode, + struct coresight_path *path) { int ret; @@ -64,7 +65,7 @@ static int coresight_enable_source_sysfs(struct coresight_device *csdev, */ lockdep_assert_held(&coresight_mutex); if (coresight_get_mode(csdev) != CS_MODE_SYSFS) { - ret = source_ops(csdev)->enable(csdev, data, mode, NULL); + ret = source_ops(csdev)->enable(csdev, NULL, mode, path); if (ret) return ret; } @@ -217,7 +218,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) if (ret) goto err_path; - ret = coresight_enable_source_sysfs(csdev, CS_MODE_SYSFS, NULL); + ret = coresight_enable_source_sysfs(csdev, CS_MODE_SYSFS, path); if (ret) goto err_source; diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c index af806d2275d2..7214e65097ec 100644 --- a/drivers/hwtracing/coresight/coresight-tpdm.c +++ b/drivers/hwtracing/coresight/coresight-tpdm.c @@ -480,7 +480,7 @@ static void __tpdm_enable(struct tpdm_drvdata *drvdata) static int tpdm_enable(struct coresight_device *csdev, struct perf_event *event, enum cs_mode mode, - __maybe_unused struct coresight_trace_id_map *id_map) + __maybe_unused struct coresight_path *path) { struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 509e1f256412..bc0c853ffa6d 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -401,7 +401,7 @@ struct coresight_ops_link { struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode, struct coresight_trace_id_map *id_map); + enum cs_mode mode, struct coresight_path *path); void (*disable)(struct coresight_device *csdev, struct perf_event *event); }; From 080ee83cc361451a7de7b5486c7f96ce454f7203 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:28 +0800 Subject: [PATCH 0571/2157] Coresight: Change functions to accept the coresight_path Modify following functions to accept the coresight_path. Devices in the path can read data from coresight_path if needed. - coresight_enable_path - coresight_disable_path - coresight_get_source - coresight_get_sink - coresight_enable_helpers - coresight_disable_helpers Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-8-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 37 ++++++++++--------- .../hwtracing/coresight/coresight-etm-perf.c | 16 ++++---- drivers/hwtracing/coresight/coresight-priv.h | 6 +-- drivers/hwtracing/coresight/coresight-sysfs.c | 6 +-- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 58f5106e0f4b..bd0a7edd38c9 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -77,14 +77,14 @@ struct coresight_device *coresight_get_percpu_sink(int cpu) } EXPORT_SYMBOL_GPL(coresight_get_percpu_sink); -static struct coresight_device *coresight_get_source(struct list_head *path) +static struct coresight_device *coresight_get_source(struct coresight_path *path) { struct coresight_device *csdev; if (!path) return NULL; - csdev = list_first_entry(path, struct coresight_node, link)->csdev; + csdev = list_first_entry(&path->path_list, struct coresight_node, link)->csdev; if (!coresight_is_device_source(csdev)) return NULL; @@ -333,12 +333,12 @@ static int coresight_enable_helper(struct coresight_device *csdev, return helper_ops(csdev)->enable(csdev, mode, data); } -static void coresight_disable_helper(struct coresight_device *csdev) +static void coresight_disable_helper(struct coresight_device *csdev, void *data) { - helper_ops(csdev)->disable(csdev, NULL); + helper_ops(csdev)->disable(csdev, data); } -static void coresight_disable_helpers(struct coresight_device *csdev) +static void coresight_disable_helpers(struct coresight_device *csdev, void *data) { int i; struct coresight_device *helper; @@ -346,7 +346,7 @@ static void coresight_disable_helpers(struct coresight_device *csdev) for (i = 0; i < csdev->pdata->nr_outconns; ++i) { helper = csdev->pdata->out_conns[i]->dest_dev; if (helper && coresight_is_helper(helper)) - coresight_disable_helper(helper); + coresight_disable_helper(helper, data); } } @@ -363,7 +363,7 @@ static void coresight_disable_helpers(struct coresight_device *csdev) void coresight_disable_source(struct coresight_device *csdev, void *data) { source_ops(csdev)->disable(csdev, data); - coresight_disable_helpers(csdev); + coresight_disable_helpers(csdev, NULL); } EXPORT_SYMBOL_GPL(coresight_disable_source); @@ -372,16 +372,16 @@ EXPORT_SYMBOL_GPL(coresight_disable_source); * @nd in the list. If @nd is NULL, all the components, except the SOURCE are * disabled. */ -static void coresight_disable_path_from(struct list_head *path, +static void coresight_disable_path_from(struct coresight_path *path, struct coresight_node *nd) { u32 type; struct coresight_device *csdev, *parent, *child; if (!nd) - nd = list_first_entry(path, struct coresight_node, link); + nd = list_first_entry(&path->path_list, struct coresight_node, link); - list_for_each_entry_continue(nd, path, link) { + list_for_each_entry_continue(nd, &path->path_list, link) { csdev = nd->csdev; type = csdev->type; @@ -419,11 +419,11 @@ static void coresight_disable_path_from(struct list_head *path, } /* Disable all helpers adjacent along the path last */ - coresight_disable_helpers(csdev); + coresight_disable_helpers(csdev, path); } } -void coresight_disable_path(struct list_head *path) +void coresight_disable_path(struct coresight_path *path) { coresight_disable_path_from(path, NULL); } @@ -448,7 +448,7 @@ static int coresight_enable_helpers(struct coresight_device *csdev, return 0; } -int coresight_enable_path(struct list_head *path, enum cs_mode mode, +int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, void *sink_data) { int ret = 0; @@ -458,12 +458,12 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode, struct coresight_device *source; source = coresight_get_source(path); - list_for_each_entry_reverse(nd, path, link) { + list_for_each_entry_reverse(nd, &path->path_list, link) { csdev = nd->csdev; type = csdev->type; /* Enable all helpers adjacent to the path first */ - ret = coresight_enable_helpers(csdev, mode, sink_data); + ret = coresight_enable_helpers(csdev, mode, path); if (ret) goto err; /* @@ -511,20 +511,21 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode, goto out; } -struct coresight_device *coresight_get_sink(struct list_head *path) +struct coresight_device *coresight_get_sink(struct coresight_path *path) { struct coresight_device *csdev; if (!path) return NULL; - csdev = list_last_entry(path, struct coresight_node, link)->csdev; + csdev = list_last_entry(&path->path_list, struct coresight_node, link)->csdev; if (csdev->type != CORESIGHT_DEV_TYPE_SINK && csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) return NULL; return csdev; } +EXPORT_SYMBOL_GPL(coresight_get_sink); u32 coresight_get_sink_id(struct coresight_device *csdev) { @@ -680,7 +681,7 @@ static int coresight_get_trace_id(struct coresight_device *csdev, void coresight_path_assign_trace_id(struct coresight_path *path, enum cs_mode mode) { - struct coresight_device *sink = coresight_get_sink(&path->path_list); + struct coresight_device *sink = coresight_get_sink(path); struct coresight_node *nd; int trace_id; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 300305d67a1d..f4cccd68e625 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -197,7 +197,6 @@ static void free_sink_buffer(struct etm_event_data *event_data) int cpu; cpumask_t *mask = &event_data->mask; struct coresight_device *sink; - struct coresight_path *path; if (!event_data->snk_config) return; @@ -206,8 +205,7 @@ static void free_sink_buffer(struct etm_event_data *event_data) return; cpu = cpumask_first(mask); - path = etm_event_cpu_path(event_data, cpu); - sink = coresight_get_sink(&path->path_list); + sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); sink_ops(sink)->free_buffer(event_data->snk_config); } @@ -232,7 +230,7 @@ static void free_event_data(struct work_struct *work) ppath = etm_event_cpu_path_ptr(event_data, cpu); if (!(IS_ERR_OR_NULL(*ppath))) { - struct coresight_device *sink = coresight_get_sink(&((*ppath)->path_list)); + struct coresight_device *sink = coresight_get_sink(*ppath); /* * Mark perf event as done for trace id allocator, but don't call @@ -494,12 +492,12 @@ static void etm_event_start(struct perf_event *event, int flags) path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ - sink = coresight_get_sink(&path->path_list); + sink = coresight_get_sink(path); if (WARN_ON_ONCE(!sink)) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(&path->path_list, CS_MODE_PERF, handle)) + if (coresight_enable_path(path, CS_MODE_PERF, handle)) goto fail_end_stop; /* Finally enable the tracer */ @@ -531,7 +529,7 @@ static void etm_event_start(struct perf_event *event, int flags) return; fail_disable_path: - coresight_disable_path(&path->path_list); + coresight_disable_path(path); fail_end_stop: /* * Check if the handle is still associated with the event, @@ -596,7 +594,7 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!path) return; - sink = coresight_get_sink(&path->path_list); + sink = coresight_get_sink(path); if (!sink) return; @@ -640,7 +638,7 @@ static void etm_event_stop(struct perf_event *event, int mode) } /* Disabling the path make its elements available to other sessions */ - coresight_disable_path(&path->path_list); + coresight_disable_path(path); } static int etm_event_add(struct perf_event *event, int mode) diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 2bea35bae0d4..82644aff8d2b 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -132,10 +132,10 @@ static inline void CS_UNLOCK(void __iomem *addr) } while (0); } -void coresight_disable_path(struct list_head *path); -int coresight_enable_path(struct list_head *path, enum cs_mode mode, +void coresight_disable_path(struct coresight_path *path); +int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, void *sink_data); -struct coresight_device *coresight_get_sink(struct list_head *path); +struct coresight_device *coresight_get_sink(struct coresight_path *path); struct coresight_device *coresight_get_sink_by_id(u32 id); struct coresight_device * coresight_find_default_sink(struct coresight_device *csdev); diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c index 3ac5b52413a6..feadaf065b53 100644 --- a/drivers/hwtracing/coresight/coresight-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-sysfs.c @@ -214,7 +214,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) if (!IS_VALID_CS_TRACE_ID(path->trace_id)) goto err_path; - ret = coresight_enable_path(&path->path_list, CS_MODE_SYSFS, NULL); + ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL); if (ret) goto err_path; @@ -256,7 +256,7 @@ int coresight_enable_sysfs(struct coresight_device *csdev) return ret; err_source: - coresight_disable_path(&path->path_list); + coresight_disable_path(path); err_path: coresight_release_path(path); @@ -302,7 +302,7 @@ void coresight_disable_sysfs(struct coresight_device *csdev) break; } - coresight_disable_path(&path->path_list); + coresight_disable_path(path); coresight_release_path(path); out: From 166df2a18dc9efa820bb2084d162f9b6650efac1 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:29 +0800 Subject: [PATCH 0572/2157] dt-bindings: arm: Add Coresight TMC Control Unit hardware Add binding file to specify how to define a Coresight TMC Control Unit device in device tree. It is responsible for controlling the data filter function based on the source device's Trace ID for TMC ETR device. The trace data with that Trace id can get into ETR's buffer while other trace data gets ignored. Reviewed-by: Rob Herring (Arm) Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-9-quic_jiegan@quicinc.com --- .../bindings/arm/qcom,coresight-ctcu.yaml | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml diff --git a/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml b/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml new file mode 100644 index 000000000000..843b52eaf872 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/qcom,coresight-ctcu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: CoreSight TMC Control Unit + +maintainers: + - Yuanfang Zhang + - Mao Jinlong + - Jie Gan + +description: | + The Trace Memory Controller(TMC) is used for Embedded Trace Buffer(ETB), + Embedded Trace FIFO(ETF) and Embedded Trace Router(ETR) configurations. + The configuration mode (ETB, ETF, ETR) is discovered at boot time when + the device is probed. + + The Coresight TMC Control unit controls various Coresight behaviors. + It works as a helper device when connected to TMC ETR device. + It is responsible for controlling the data filter function based on + the source device's Trace ID for TMC ETR device. The trace data with + that Trace id can get into ETR's buffer while other trace data gets + ignored. + +properties: + compatible: + enum: + - qcom,sa8775p-ctcu + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: apb + + in-ports: + $ref: /schemas/graph.yaml#/properties/ports + + patternProperties: + '^port(@[0-1])?$': + description: Input connections from CoreSight Trace bus + $ref: /schemas/graph.yaml#/properties/port + +required: + - compatible + - reg + - in-ports + +additionalProperties: false + +examples: + - | + ctcu@1001000 { + compatible = "qcom,sa8775p-ctcu"; + reg = <0x1001000 0x1000>; + + clocks = <&aoss_qmp>; + clock-names = "apb"; + + in-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + ctcu_in_port0: endpoint { + remote-endpoint = <&etr0_out_port>; + }; + }; + + port@1 { + reg = <1>; + ctcu_in_port1: endpoint { + remote-endpoint = <&etr1_out_port>; + }; + }; + }; + }; From f78d206f3d73446e4c3568946a26f903da2149b4 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 3 Mar 2025 11:29:30 +0800 Subject: [PATCH 0573/2157] Coresight: Add Coresight TMC Control Unit driver The Coresight TMC Control Unit hosts miscellaneous configuration registers which control various features related to TMC ETR sink. Based on the trace ID, which is programmed in the related CTCU ATID register of a specific ETR, trace data with that trace ID gets into the ETR buffer, while other trace data gets dropped. Enabling source device sets one bit of the ATID register based on source device's trace ID. Disabling source device resets the bit according to the source device's trace ID. Reviewed-by: James Clark Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250303032931.2500935-10-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/Kconfig | 12 + drivers/hwtracing/coresight/Makefile | 2 + .../hwtracing/coresight/coresight-ctcu-core.c | 326 ++++++++++++++++++ drivers/hwtracing/coresight/coresight-ctcu.h | 39 +++ include/linux/coresight.h | 3 +- 5 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 drivers/hwtracing/coresight/coresight-ctcu-core.c create mode 100644 drivers/hwtracing/coresight/coresight-ctcu.h diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index 06f0a7594169..ecd7086a5b83 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -133,6 +133,18 @@ config CORESIGHT_STM To compile this driver as a module, choose M here: the module will be called coresight-stm. +config CORESIGHT_CTCU + tristate "CoreSight TMC Control Unit driver" + depends on CORESIGHT_LINK_AND_SINK_TMC + help + This driver provides support for CoreSight TMC Control Unit + that hosts miscellaneous configuration registers. This is + primarily used for controlling the behaviors of the TMC + ETR device. + + To compile this driver as a module, choose M here: the + module will be called coresight-ctcu. + config CORESIGHT_CPU_DEBUG tristate "CoreSight CPU Debug driver" depends on ARM || ARM64 diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index 46ce7f39d05f..8e62c3150aeb 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -51,3 +51,5 @@ coresight-cti-y := coresight-cti-core.o coresight-cti-platform.o \ coresight-cti-sysfs.o obj-$(CONFIG_ULTRASOC_SMB) += ultrasoc-smb.o obj-$(CONFIG_CORESIGHT_DUMMY) += coresight-dummy.o +obj-$(CONFIG_CORESIGHT_CTCU) += coresight-ctcu.o +coresight-ctcu-y := coresight-ctcu-core.o diff --git a/drivers/hwtracing/coresight/coresight-ctcu-core.c b/drivers/hwtracing/coresight/coresight-ctcu-core.c new file mode 100644 index 000000000000..da35d8b4d579 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-ctcu-core.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-ctcu.h" +#include "coresight-priv.h" + +DEFINE_CORESIGHT_DEVLIST(ctcu_devs, "ctcu"); + +#define ctcu_writel(drvdata, val, offset) __raw_writel((val), drvdata->base + offset) +#define ctcu_readl(drvdata, offset) __raw_readl(drvdata->base + offset) + +/* + * The TMC Coresight Control Unit utilizes four ATID registers to control the data + * filter function based on the trace ID for each TMC ETR sink. The length of each + * ATID register is 32 bits. Therefore, an ETR device has a 128-bit long field + * in CTCU. Each trace ID is represented by one bit in that filed. + * e.g. ETR0ATID0 layout, set bit 5 for traceid 5 + * bit5 + * ------------------------------------------------------ + * | |28| |24| |20| |16| |12| |8| 1|4| |0| + * ------------------------------------------------------ + * + * e.g. ETR0: + * 127 0 from ATID_offset for ETR0ATID0 + * ------------------------- + * |ATID3|ATID2|ATID1|ATID0| + */ +#define CTCU_ATID_REG_OFFSET(traceid, atid_offset) \ + ((traceid / 32) * 4 + atid_offset) + +#define CTCU_ATID_REG_BIT(traceid) (traceid % 32) +#define CTCU_ATID_REG_SIZE 0x10 +#define CTCU_ETR0_ATID0 0xf8 +#define CTCU_ETR1_ATID0 0x108 + +static const struct ctcu_etr_config sa8775p_etr_cfgs[] = { + { + .atid_offset = CTCU_ETR0_ATID0, + .port_num = 0, + }, + { + .atid_offset = CTCU_ETR1_ATID0, + .port_num = 1, + }, +}; + +static const struct ctcu_config sa8775p_cfgs = { + .etr_cfgs = sa8775p_etr_cfgs, + .num_etr_config = ARRAY_SIZE(sa8775p_etr_cfgs), +}; + +static void ctcu_program_atid_register(struct ctcu_drvdata *drvdata, u32 reg_offset, + u8 bit, bool enable) +{ + u32 val; + + CS_UNLOCK(drvdata->base); + val = ctcu_readl(drvdata, reg_offset); + if (enable) + val |= BIT(bit); + else + val &= ~BIT(bit); + + ctcu_writel(drvdata, val, reg_offset); + CS_LOCK(drvdata->base); +} + +/* + * __ctcu_set_etr_traceid: Set bit in the ATID register based on trace ID when enable is true. + * Reset the bit of the ATID register based on trace ID when enable is false. + * + * @csdev: coresight_device of CTCU. + * @traceid: trace ID of the source tracer. + * @port_num: port number connected to TMC ETR sink. + * @enable: True for set bit and false for reset bit. + * + * Returns 0 indicates success. Non-zero result means failure. + */ +static int __ctcu_set_etr_traceid(struct coresight_device *csdev, u8 traceid, int port_num, + bool enable) +{ + struct ctcu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + u32 atid_offset, reg_offset; + u8 refcnt, bit; + + atid_offset = drvdata->atid_offset[port_num]; + if (atid_offset == 0) + return -EINVAL; + + bit = CTCU_ATID_REG_BIT(traceid); + reg_offset = CTCU_ATID_REG_OFFSET(traceid, atid_offset); + if (reg_offset - atid_offset > CTCU_ATID_REG_SIZE) + return -EINVAL; + + guard(raw_spinlock_irqsave)(&drvdata->spin_lock); + refcnt = drvdata->traceid_refcnt[port_num][traceid]; + /* Only program the atid register when the refcnt value is 1 or 0 */ + if ((enable && !refcnt++) || (!enable && !--refcnt)) + ctcu_program_atid_register(drvdata, reg_offset, bit, enable); + + drvdata->traceid_refcnt[port_num][traceid] = refcnt; + + return 0; +} + +/* + * Searching the sink device from helper's view in case there are multiple helper devices + * connected to the sink device. + */ +static int ctcu_get_active_port(struct coresight_device *sink, struct coresight_device *helper) +{ + struct coresight_platform_data *pdata = helper->pdata; + int i; + + for (i = 0; i < pdata->nr_inconns; ++i) { + if (pdata->in_conns[i]->src_dev == sink) + return pdata->in_conns[i]->dest_port; + } + + return -EINVAL; +} + +static int ctcu_set_etr_traceid(struct coresight_device *csdev, struct coresight_path *path, + bool enable) +{ + struct coresight_device *sink = coresight_get_sink(path); + u8 traceid = path->trace_id; + int port_num; + + if ((sink == NULL) || !IS_VALID_CS_TRACE_ID(traceid)) { + dev_err(&csdev->dev, "Invalid sink device or trace ID\n"); + return -EINVAL; + } + + port_num = ctcu_get_active_port(sink, csdev); + if (port_num < 0) + return -EINVAL; + + dev_dbg(&csdev->dev, "traceid is %d\n", traceid); + + return __ctcu_set_etr_traceid(csdev, traceid, port_num, enable); +} + +static int ctcu_enable(struct coresight_device *csdev, enum cs_mode mode, void *data) +{ + struct coresight_path *path = (struct coresight_path *)data; + + return ctcu_set_etr_traceid(csdev, path, true); +} + +static int ctcu_disable(struct coresight_device *csdev, void *data) +{ + struct coresight_path *path = (struct coresight_path *)data; + + return ctcu_set_etr_traceid(csdev, path, false); +} + +static const struct coresight_ops_helper ctcu_helper_ops = { + .enable = ctcu_enable, + .disable = ctcu_disable, +}; + +static const struct coresight_ops ctcu_ops = { + .helper_ops = &ctcu_helper_ops, +}; + +static int ctcu_probe(struct platform_device *pdev) +{ + const struct ctcu_etr_config *etr_cfg; + struct coresight_platform_data *pdata; + struct coresight_desc desc = { 0 }; + struct device *dev = &pdev->dev; + const struct ctcu_config *cfgs; + struct ctcu_drvdata *drvdata; + void __iomem *base; + int i; + + desc.name = coresight_alloc_device_name(&ctcu_devs, dev); + if (!desc.name) + return -ENOMEM; + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + pdata = coresight_get_platform_data(dev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + dev->platform_data = pdata; + + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (!base) + return -ENOMEM; + + drvdata->apb_clk = coresight_get_enable_apb_pclk(dev); + if (IS_ERR(drvdata->apb_clk)) + return -ENODEV; + + cfgs = of_device_get_match_data(dev); + if (cfgs) { + if (cfgs->num_etr_config <= ETR_MAX_NUM) { + for (i = 0; i < cfgs->num_etr_config; i++) { + etr_cfg = &cfgs->etr_cfgs[i]; + drvdata->atid_offset[i] = etr_cfg->atid_offset; + } + } + } + + drvdata->base = base; + drvdata->dev = dev; + platform_set_drvdata(pdev, drvdata); + + desc.type = CORESIGHT_DEV_TYPE_HELPER; + desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CTCU; + desc.pdata = pdata; + desc.dev = dev; + desc.ops = &ctcu_ops; + desc.access = CSDEV_ACCESS_IOMEM(base); + + drvdata->csdev = coresight_register(&desc); + if (IS_ERR(drvdata->csdev)) { + if (!IS_ERR_OR_NULL(drvdata->apb_clk)) + clk_put(drvdata->apb_clk); + + return PTR_ERR(drvdata->csdev); + } + + return 0; +} + +static void ctcu_remove(struct platform_device *pdev) +{ + struct ctcu_drvdata *drvdata = platform_get_drvdata(pdev); + + coresight_unregister(drvdata->csdev); +} + +static int ctcu_platform_probe(struct platform_device *pdev) +{ + int ret; + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + ret = ctcu_probe(pdev); + pm_runtime_put(&pdev->dev); + if (ret) + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static void ctcu_platform_remove(struct platform_device *pdev) +{ + struct ctcu_drvdata *drvdata = platform_get_drvdata(pdev); + + if (WARN_ON(!drvdata)) + return; + + ctcu_remove(pdev); + pm_runtime_disable(&pdev->dev); + if (!IS_ERR_OR_NULL(drvdata->apb_clk)) + clk_put(drvdata->apb_clk); +} + +#ifdef CONFIG_PM +static int ctcu_runtime_suspend(struct device *dev) +{ + struct ctcu_drvdata *drvdata = dev_get_drvdata(dev); + + if (drvdata && !IS_ERR_OR_NULL(drvdata->apb_clk)) + clk_disable_unprepare(drvdata->apb_clk); + + return 0; +} + +static int ctcu_runtime_resume(struct device *dev) +{ + struct ctcu_drvdata *drvdata = dev_get_drvdata(dev); + + if (drvdata && !IS_ERR_OR_NULL(drvdata->apb_clk)) + clk_prepare_enable(drvdata->apb_clk); + + return 0; +} +#endif + +static const struct dev_pm_ops ctcu_dev_pm_ops = { + SET_RUNTIME_PM_OPS(ctcu_runtime_suspend, ctcu_runtime_resume, NULL) +}; + +static const struct of_device_id ctcu_match[] = { + {.compatible = "qcom,sa8775p-ctcu", .data = &sa8775p_cfgs}, + {} +}; + +static struct platform_driver ctcu_driver = { + .probe = ctcu_platform_probe, + .remove = ctcu_platform_remove, + .driver = { + .name = "coresight-ctcu", + .of_match_table = ctcu_match, + .pm = &ctcu_dev_pm_ops, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(ctcu_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CoreSight TMC Control Unit driver"); diff --git a/drivers/hwtracing/coresight/coresight-ctcu.h b/drivers/hwtracing/coresight/coresight-ctcu.h new file mode 100644 index 000000000000..e9594c38dd91 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-ctcu.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _CORESIGHT_CTCU_H +#define _CORESIGHT_CTCU_H +#include "coresight-trace-id.h" + +/* Maximum number of supported ETR devices for a single CTCU. */ +#define ETR_MAX_NUM 2 + +/** + * struct ctcu_etr_config + * @atid_offset: offset to the ATID0 Register. + * @port_num: in-port number of CTCU device that connected to ETR. + */ +struct ctcu_etr_config { + const u32 atid_offset; + const u32 port_num; +}; + +struct ctcu_config { + const struct ctcu_etr_config *etr_cfgs; + int num_etr_config; +}; + +struct ctcu_drvdata { + void __iomem *base; + struct clk *apb_clk; + struct device *dev; + struct coresight_device *csdev; + raw_spinlock_t spin_lock; + u32 atid_offset[ETR_MAX_NUM]; + /* refcnt for each traceid of each sink */ + u8 traceid_refcnt[ETR_MAX_NUM][CORESIGHT_TRACE_ID_RES_TOP]; +}; + +#endif diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bc0c853ffa6d..89b781e70fe7 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -71,7 +71,8 @@ enum coresight_dev_subtype_source { enum coresight_dev_subtype_helper { CORESIGHT_DEV_SUBTYPE_HELPER_CATU, - CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI + CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI, + CORESIGHT_DEV_SUBTYPE_HELPER_CTCU, }; /** From 3066a95d14aef3ad6a22b14aa911d09a271b5a0f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 27 Feb 2025 17:06:43 -0800 Subject: [PATCH 0574/2157] dt-bindings: input: matrix_keypad - add wakeup-source property The driver recognizes standard "wakeup-source" property and there are DTS files using it. Add the property to the binding. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502280105.REZ29MVg-lkp@intel.com/ Reviewed-by: Manuel Traut Acked-by: Conor Dooley Link: https://lore.kernel.org/r/Z8EMI9ALqYY72VBV@google.com Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/gpio-matrix-keypad.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml index 73bb153ed241..ebfff9e42a36 100644 --- a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml +++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml @@ -68,6 +68,8 @@ properties: Drive inactive columns during scan, default is to turn inactive columns into inputs. + wakeup-source: true + required: - compatible - row-gpios @@ -96,4 +98,6 @@ examples: 0x0001006A 0x0101001C 0x0201006C>; + + wakeup-source; }; From 7021d97fb89b216557561ca8cdf68144c016993b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Feb 2025 12:35:27 +0100 Subject: [PATCH 0575/2157] iio: adc: ad7173: Grab direct mode for calibration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While a calibration is running, better don't make the device do anything else. To enforce that, grab direct mode during calibration. Fixes: 031bdc8aee01 ("iio: adc: ad7173: add calibration support") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/8319fa2dc881c9899d60db4eba7fe8e984716617.1740655250.git.u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index ca2b41b16cc9..9ac8ea3cb499 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -345,6 +345,9 @@ static ssize_t ad7173_write_syscalib(struct iio_dev *indio_dev, if (ret) return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + mode = st->channels[chan->channel].syscalib_mode; if (sys_calib) { if (mode == AD7173_SYSCALIB_ZERO_SCALE) @@ -355,6 +358,8 @@ static ssize_t ad7173_write_syscalib(struct iio_dev *indio_dev, chan->address); } + iio_device_release_direct(indio_dev); + return ret ? : len; } From 08808b3ef384974b1eaf4975de707f93f8cda62d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Feb 2025 12:35:28 +0100 Subject: [PATCH 0576/2157] iio: adc: ad7192: Grab direct mode for calibration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While a calibration is running, better don't make the device do anything else. To enforce that, grab direct mode during calibration. Fixes: 42776c14c692 ("staging: iio: adc: ad7192: Add system calibration support") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/8aade802afca6a89641e24c1ae1d4b8d82cff58d.1740655250.git.u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index f6150b905286..15dc6e6854e8 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -257,6 +257,9 @@ static ssize_t ad7192_write_syscalib(struct iio_dev *indio_dev, if (ret) return ret; + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + temp = st->syscalib_mode[chan->channel]; if (sys_calib) { if (temp == AD7192_SYSCALIB_ZERO_SCALE) @@ -267,6 +270,8 @@ static ssize_t ad7192_write_syscalib(struct iio_dev *indio_dev, chan->address); } + iio_device_release_direct(indio_dev); + return ret ? ret : len; } From 21ce1ce04350fd46807622269018f9e2c09b7ac1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 22 Feb 2025 15:20:21 +0000 Subject: [PATCH 0577/2157] staging: iio: accel: adis16240: Drop driver for this impact sensor Whilst an interesting part, no one has done significant work on this driver since 2019. The recent changes are all as a result of adis library improvements having to incorporate this device. https://www.analog.com/en/products/adis16240.html now lists this part as obsolete so the chances of anyone working on it are likely to be greatly reduced. So drop it. We can always bring it back if anyone does have interest in this device and is willing to invest the time to make it suitable for a staging graduation. How to handle the hardware triggered short bursts of capture has never been resolved and is a somewhat challenging ABI design problem. Cc: Nuno Sa Cc: Rodrigo Carvalho Link: https://patch.msgid.link/20250222152021.1039675-1-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/staging/iio/accel/Kconfig | 12 - drivers/staging/iio/accel/Makefile | 1 - drivers/staging/iio/accel/adis16240.c | 443 -------------------------- 3 files changed, 456 deletions(-) delete mode 100644 drivers/staging/iio/accel/adis16240.c diff --git a/drivers/staging/iio/accel/Kconfig b/drivers/staging/iio/accel/Kconfig index 3318997a7009..cee51f64bc4b 100644 --- a/drivers/staging/iio/accel/Kconfig +++ b/drivers/staging/iio/accel/Kconfig @@ -16,16 +16,4 @@ config ADIS16203 To compile this driver as a module, say M here: the module will be called adis16203. -config ADIS16240 - tristate "Analog Devices ADIS16240 Programmable Impact Sensor and Recorder" - depends on SPI - select IIO_ADIS_LIB - select IIO_ADIS_LIB_BUFFER if IIO_BUFFER - help - Say Y here to build support for Analog Devices adis16240 programmable - impact Sensor and recorder. - - To compile this driver as a module, say M here: the module will be - called adis16240. - endmenu diff --git a/drivers/staging/iio/accel/Makefile b/drivers/staging/iio/accel/Makefile index 094cc9be35bd..acac7bc9b9c0 100644 --- a/drivers/staging/iio/accel/Makefile +++ b/drivers/staging/iio/accel/Makefile @@ -4,4 +4,3 @@ # obj-$(CONFIG_ADIS16203) += adis16203.o -obj-$(CONFIG_ADIS16240) += adis16240.o diff --git a/drivers/staging/iio/accel/adis16240.c b/drivers/staging/iio/accel/adis16240.c deleted file mode 100644 index 3be3eaf5d9d4..000000000000 --- a/drivers/staging/iio/accel/adis16240.c +++ /dev/null @@ -1,443 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * ADIS16240 Programmable Impact Sensor and Recorder driver - * - * Copyright 2010 Analog Devices Inc. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define ADIS16240_STARTUP_DELAY 220 /* ms */ - -/* Flash memory write count */ -#define ADIS16240_FLASH_CNT 0x00 - -/* Output, power supply */ -#define ADIS16240_SUPPLY_OUT 0x02 - -/* Output, x-axis accelerometer */ -#define ADIS16240_XACCL_OUT 0x04 - -/* Output, y-axis accelerometer */ -#define ADIS16240_YACCL_OUT 0x06 - -/* Output, z-axis accelerometer */ -#define ADIS16240_ZACCL_OUT 0x08 - -/* Output, auxiliary ADC input */ -#define ADIS16240_AUX_ADC 0x0A - -/* Output, temperature */ -#define ADIS16240_TEMP_OUT 0x0C - -/* Output, x-axis acceleration peak */ -#define ADIS16240_XPEAK_OUT 0x0E - -/* Output, y-axis acceleration peak */ -#define ADIS16240_YPEAK_OUT 0x10 - -/* Output, z-axis acceleration peak */ -#define ADIS16240_ZPEAK_OUT 0x12 - -/* Output, sum-of-squares acceleration peak */ -#define ADIS16240_XYZPEAK_OUT 0x14 - -/* Output, Capture Buffer 1, X and Y acceleration */ -#define ADIS16240_CAPT_BUF1 0x16 - -/* Output, Capture Buffer 2, Z acceleration */ -#define ADIS16240_CAPT_BUF2 0x18 - -/* Diagnostic, error flags */ -#define ADIS16240_DIAG_STAT 0x1A - -/* Diagnostic, event counter */ -#define ADIS16240_EVNT_CNTR 0x1C - -/* Diagnostic, check sum value from firmware test */ -#define ADIS16240_CHK_SUM 0x1E - -/* Calibration, x-axis acceleration offset adjustment */ -#define ADIS16240_XACCL_OFF 0x20 - -/* Calibration, y-axis acceleration offset adjustment */ -#define ADIS16240_YACCL_OFF 0x22 - -/* Calibration, z-axis acceleration offset adjustment */ -#define ADIS16240_ZACCL_OFF 0x24 - -/* Clock, hour and minute */ -#define ADIS16240_CLK_TIME 0x2E - -/* Clock, month and day */ -#define ADIS16240_CLK_DATE 0x30 - -/* Clock, year */ -#define ADIS16240_CLK_YEAR 0x32 - -/* Wake-up setting, hour and minute */ -#define ADIS16240_WAKE_TIME 0x34 - -/* Wake-up setting, month and day */ -#define ADIS16240_WAKE_DATE 0x36 - -/* Alarm 1 amplitude threshold */ -#define ADIS16240_ALM_MAG1 0x38 - -/* Alarm 2 amplitude threshold */ -#define ADIS16240_ALM_MAG2 0x3A - -/* Alarm control */ -#define ADIS16240_ALM_CTRL 0x3C - -/* Capture, external trigger control */ -#define ADIS16240_XTRIG_CTRL 0x3E - -/* Capture, address pointer */ -#define ADIS16240_CAPT_PNTR 0x40 - -/* Capture, configuration and control */ -#define ADIS16240_CAPT_CTRL 0x42 - -/* General-purpose digital input/output control */ -#define ADIS16240_GPIO_CTRL 0x44 - -/* Miscellaneous control */ -#define ADIS16240_MSC_CTRL 0x46 - -/* Internal sample period (rate) control */ -#define ADIS16240_SMPL_PRD 0x48 - -/* System command */ -#define ADIS16240_GLOB_CMD 0x4A - -/* MSC_CTRL */ - -/* Enables sum-of-squares output (XYZPEAK_OUT) */ -#define ADIS16240_MSC_CTRL_XYZPEAK_OUT_EN BIT(15) - -/* Enables peak tracking output (XPEAK_OUT, YPEAK_OUT, and ZPEAK_OUT) */ -#define ADIS16240_MSC_CTRL_X_Y_ZPEAK_OUT_EN BIT(14) - -/* Self-test enable: 1 = apply electrostatic force, 0 = disabled */ -#define ADIS16240_MSC_CTRL_SELF_TEST_EN BIT(8) - -/* Data-ready enable: 1 = enabled, 0 = disabled */ -#define ADIS16240_MSC_CTRL_DATA_RDY_EN BIT(2) - -/* Data-ready polarity: 1 = active high, 0 = active low */ -#define ADIS16240_MSC_CTRL_ACTIVE_HIGH BIT(1) - -/* Data-ready line selection: 1 = DIO2, 0 = DIO1 */ -#define ADIS16240_MSC_CTRL_DATA_RDY_DIO2 BIT(0) - -/* DIAG_STAT */ - -/* Alarm 2 status: 1 = alarm active, 0 = alarm inactive */ -#define ADIS16240_DIAG_STAT_ALARM2 BIT(9) - -/* Alarm 1 status: 1 = alarm active, 0 = alarm inactive */ -#define ADIS16240_DIAG_STAT_ALARM1 BIT(8) - -/* Capture buffer full: 1 = capture buffer is full */ -#define ADIS16240_DIAG_STAT_CPT_BUF_FUL BIT(7) - -/* Flash test, checksum flag: 1 = mismatch, 0 = match */ -#define ADIS16240_DIAG_STAT_CHKSUM BIT(6) - -/* Power-on, self-test flag: 1 = failure, 0 = pass */ -#define ADIS16240_DIAG_STAT_PWRON_FAIL_BIT 5 - -/* Power-on self-test: 1 = in-progress, 0 = complete */ -#define ADIS16240_DIAG_STAT_PWRON_BUSY BIT(4) - -/* SPI communications failure */ -#define ADIS16240_DIAG_STAT_SPI_FAIL_BIT 3 - -/* Flash update failure */ -#define ADIS16240_DIAG_STAT_FLASH_UPT_BIT 2 - -/* Power supply above 3.625 V */ -#define ADIS16240_DIAG_STAT_POWER_HIGH_BIT 1 - - /* Power supply below 2.225 V */ -#define ADIS16240_DIAG_STAT_POWER_LOW_BIT 0 - -/* GLOB_CMD */ - -#define ADIS16240_GLOB_CMD_RESUME BIT(8) -#define ADIS16240_GLOB_CMD_SW_RESET BIT(7) -#define ADIS16240_GLOB_CMD_STANDBY BIT(2) - -#define ADIS16240_ERROR_ACTIVE BIT(14) - -/* At the moment triggers are only used for ring buffer - * filling. This may change! - */ - -enum adis16240_scan { - ADIS16240_SCAN_ACC_X, - ADIS16240_SCAN_ACC_Y, - ADIS16240_SCAN_ACC_Z, - ADIS16240_SCAN_SUPPLY, - ADIS16240_SCAN_AUX_ADC, - ADIS16240_SCAN_TEMP, -}; - -static ssize_t adis16240_spi_read_signed(struct device *dev, - struct device_attribute *attr, - char *buf, - unsigned int bits) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct adis *st = iio_priv(indio_dev); - int ret; - s16 val = 0; - unsigned int shift = 16 - bits; - struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); - - ret = adis_read_reg_16(st, - this_attr->address, (u16 *)&val); - if (ret) - return ret; - - if (val & ADIS16240_ERROR_ACTIVE) - adis_check_status(st); - - val = (s16)(val << shift) >> shift; - return sprintf(buf, "%d\n", val); -} - -static ssize_t adis16240_read_12bit_signed(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return adis16240_spi_read_signed(dev, attr, buf, 12); -} - -static IIO_DEVICE_ATTR(in_accel_xyz_squared_peak_raw, 0444, - adis16240_read_12bit_signed, NULL, - ADIS16240_XYZPEAK_OUT); - -static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("4096"); - -static const u8 adis16240_addresses[][2] = { - [ADIS16240_SCAN_ACC_X] = { ADIS16240_XACCL_OFF, ADIS16240_XPEAK_OUT }, - [ADIS16240_SCAN_ACC_Y] = { ADIS16240_YACCL_OFF, ADIS16240_YPEAK_OUT }, - [ADIS16240_SCAN_ACC_Z] = { ADIS16240_ZACCL_OFF, ADIS16240_ZPEAK_OUT }, -}; - -static int adis16240_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int *val, int *val2, - long mask) -{ - struct adis *st = iio_priv(indio_dev); - int ret; - u8 addr; - s16 val16; - - switch (mask) { - case IIO_CHAN_INFO_RAW: - return adis_single_conversion(indio_dev, chan, - ADIS16240_ERROR_ACTIVE, val); - case IIO_CHAN_INFO_SCALE: - switch (chan->type) { - case IIO_VOLTAGE: - if (chan->channel == 0) { - *val = 4; - *val2 = 880000; /* 4.88 mV */ - return IIO_VAL_INT_PLUS_MICRO; - } - return -EINVAL; - case IIO_TEMP: - *val = 244; /* 0.244 C */ - *val2 = 0; - return IIO_VAL_INT_PLUS_MICRO; - case IIO_ACCEL: - *val = 0; - *val2 = IIO_G_TO_M_S_2(51400); /* 51.4 mg */ - return IIO_VAL_INT_PLUS_MICRO; - default: - return -EINVAL; - } - break; - case IIO_CHAN_INFO_PEAK_SCALE: - *val = 0; - *val2 = IIO_G_TO_M_S_2(51400); /* 51.4 mg */ - return IIO_VAL_INT_PLUS_MICRO; - case IIO_CHAN_INFO_OFFSET: - *val = 25000 / 244 - 0x133; /* 25 C = 0x133 */ - return IIO_VAL_INT; - case IIO_CHAN_INFO_CALIBBIAS: - addr = adis16240_addresses[chan->scan_index][0]; - ret = adis_read_reg_16(st, addr, &val16); - if (ret) - return ret; - *val = sign_extend32(val16, 9); - return IIO_VAL_INT; - case IIO_CHAN_INFO_PEAK: - addr = adis16240_addresses[chan->scan_index][1]; - ret = adis_read_reg_16(st, addr, &val16); - if (ret) - return ret; - *val = sign_extend32(val16, 9); - return IIO_VAL_INT; - } - return -EINVAL; -} - -static int adis16240_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, - int val2, - long mask) -{ - struct adis *st = iio_priv(indio_dev); - u8 addr; - - switch (mask) { - case IIO_CHAN_INFO_CALIBBIAS: - addr = adis16240_addresses[chan->scan_index][0]; - return adis_write_reg_16(st, addr, val & GENMASK(9, 0)); - } - return -EINVAL; -} - -static const struct iio_chan_spec adis16240_channels[] = { - ADIS_SUPPLY_CHAN(ADIS16240_SUPPLY_OUT, ADIS16240_SCAN_SUPPLY, 0, 10), - ADIS_AUX_ADC_CHAN(ADIS16240_AUX_ADC, ADIS16240_SCAN_AUX_ADC, 0, 10), - ADIS_ACCEL_CHAN(X, ADIS16240_XACCL_OUT, ADIS16240_SCAN_ACC_X, - BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), - 0, 10), - ADIS_ACCEL_CHAN(Y, ADIS16240_YACCL_OUT, ADIS16240_SCAN_ACC_Y, - BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), - 0, 10), - ADIS_ACCEL_CHAN(Z, ADIS16240_ZACCL_OUT, ADIS16240_SCAN_ACC_Z, - BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), - 0, 10), - ADIS_TEMP_CHAN(ADIS16240_TEMP_OUT, ADIS16240_SCAN_TEMP, 0, 10), - IIO_CHAN_SOFT_TIMESTAMP(6) -}; - -static struct attribute *adis16240_attributes[] = { - &iio_dev_attr_in_accel_xyz_squared_peak_raw.dev_attr.attr, - &iio_const_attr_sampling_frequency_available.dev_attr.attr, - NULL -}; - -static const struct attribute_group adis16240_attribute_group = { - .attrs = adis16240_attributes, -}; - -static const struct iio_info adis16240_info = { - .attrs = &adis16240_attribute_group, - .read_raw = adis16240_read_raw, - .write_raw = adis16240_write_raw, - .update_scan_mode = adis_update_scan_mode, -}; - -static const char * const adis16240_status_error_msgs[] = { - [ADIS16240_DIAG_STAT_PWRON_FAIL_BIT] = "Power on, self-test failed", - [ADIS16240_DIAG_STAT_SPI_FAIL_BIT] = "SPI failure", - [ADIS16240_DIAG_STAT_FLASH_UPT_BIT] = "Flash update failed", - [ADIS16240_DIAG_STAT_POWER_HIGH_BIT] = "Power supply above 3.625V", - [ADIS16240_DIAG_STAT_POWER_LOW_BIT] = "Power supply below 2.225V", -}; - -static const struct adis_timeout adis16240_timeouts = { - .reset_ms = ADIS16240_STARTUP_DELAY, - .sw_reset_ms = ADIS16240_STARTUP_DELAY, - .self_test_ms = ADIS16240_STARTUP_DELAY, -}; - -static const struct adis_data adis16240_data = { - .write_delay = 35, - .read_delay = 35, - .msc_ctrl_reg = ADIS16240_MSC_CTRL, - .glob_cmd_reg = ADIS16240_GLOB_CMD, - .diag_stat_reg = ADIS16240_DIAG_STAT, - - .self_test_mask = ADIS16240_MSC_CTRL_SELF_TEST_EN, - .self_test_reg = ADIS16240_MSC_CTRL, - .self_test_no_autoclear = true, - .timeouts = &adis16240_timeouts, - - .status_error_msgs = adis16240_status_error_msgs, - .status_error_mask = BIT(ADIS16240_DIAG_STAT_PWRON_FAIL_BIT) | - BIT(ADIS16240_DIAG_STAT_SPI_FAIL_BIT) | - BIT(ADIS16240_DIAG_STAT_FLASH_UPT_BIT) | - BIT(ADIS16240_DIAG_STAT_POWER_HIGH_BIT) | - BIT(ADIS16240_DIAG_STAT_POWER_LOW_BIT), -}; - -static int adis16240_probe(struct spi_device *spi) -{ - int ret; - struct adis *st; - struct iio_dev *indio_dev; - - /* setup the industrialio driver allocated elements */ - indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st)); - if (!indio_dev) - return -ENOMEM; - st = iio_priv(indio_dev); - /* this is only used for removal purposes */ - spi_set_drvdata(spi, indio_dev); - - indio_dev->name = spi->dev.driver->name; - indio_dev->info = &adis16240_info; - indio_dev->channels = adis16240_channels; - indio_dev->num_channels = ARRAY_SIZE(adis16240_channels); - indio_dev->modes = INDIO_DIRECT_MODE; - - spi->mode = SPI_MODE_3; - ret = spi_setup(spi); - if (ret) { - dev_err(&spi->dev, "spi_setup failed!\n"); - return ret; - } - - ret = adis_init(st, indio_dev, spi, &adis16240_data); - if (ret) - return ret; - ret = devm_adis_setup_buffer_and_trigger(st, indio_dev, NULL); - if (ret) - return ret; - - /* Get the device into a sane initial state */ - ret = __adis_initial_startup(st); - if (ret) - return ret; - - return devm_iio_device_register(&spi->dev, indio_dev); -} - -static const struct of_device_id adis16240_of_match[] = { - { .compatible = "adi,adis16240" }, - { }, -}; -MODULE_DEVICE_TABLE(of, adis16240_of_match); - -static struct spi_driver adis16240_driver = { - .driver = { - .name = "adis16240", - .of_match_table = adis16240_of_match, - }, - .probe = adis16240_probe, -}; -module_spi_driver(adis16240_driver); - -MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); -MODULE_DESCRIPTION("Analog Devices Programmable Impact Sensor and Recorder"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("spi:adis16240"); -MODULE_IMPORT_NS("IIO_ADISLIB"); From 6c9405fd2581a1131c39fa9393d08a3cfe5d767b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:29 +0000 Subject: [PATCH 0578/2157] rtc: max77686: drop needless struct max77686_rtc_info::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When this driver was converted to using the devres managed i2c device in commit 59a7f24fceb3 ("rtc: max77686: convert to devm_i2c_new_dummy_device()"), struct max77686_rtc_info::rtc became essentially unused. We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-1-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-max77686.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 7bb044d2ac25..6b0d02b44c80 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -85,7 +85,6 @@ struct max77686_rtc_driver_data { struct max77686_rtc_info { struct device *dev; - struct i2c_client *rtc; struct rtc_device *rtc_dev; struct mutex lock; @@ -691,6 +690,7 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info) { struct device *parent = info->dev->parent; struct i2c_client *parent_i2c = to_i2c_client(parent); + struct i2c_client *client; int ret; if (info->drv_data->rtc_irq_from_platform) { @@ -714,14 +714,14 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info) goto add_rtc_irq; } - info->rtc = devm_i2c_new_dummy_device(info->dev, parent_i2c->adapter, - info->drv_data->rtc_i2c_addr); - if (IS_ERR(info->rtc)) { + client = devm_i2c_new_dummy_device(info->dev, parent_i2c->adapter, + info->drv_data->rtc_i2c_addr); + if (IS_ERR(client)) { dev_err(info->dev, "Failed to allocate I2C device for RTC\n"); - return PTR_ERR(info->rtc); + return PTR_ERR(client); } - info->rtc_regmap = devm_regmap_init_i2c(info->rtc, + info->rtc_regmap = devm_regmap_init_i2c(client, info->drv_data->regmap_config); if (IS_ERR(info->rtc_regmap)) { ret = PTR_ERR(info->rtc_regmap); From afe5f9f94d1116e14c84908c64f589aa142745c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:30 +0000 Subject: [PATCH 0579/2157] rtc: s5m: drop needless struct s5m_rtc_info::i2c member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When this driver was converted to using the devres managed i2c device in commit 7db7ad0817fe ("rtc: s5m: use devm_i2c_new_dummy_device()"), struct s5m_rtc_info::i2c became essentially unused. We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-2-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s5m.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index 36acca5b2639..88aed2766034 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -146,7 +146,6 @@ static const struct s5m_rtc_reg_config s2mps15_rtc_regs = { struct s5m_rtc_info { struct device *dev; - struct i2c_client *i2c; struct sec_pmic_dev *s5m87xx; struct regmap *regmap; struct rtc_device *rtc_dev; @@ -640,6 +639,7 @@ static int s5m_rtc_probe(struct platform_device *pdev) { struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent); struct s5m_rtc_info *info; + struct i2c_client *i2c; const struct regmap_config *regmap_cfg; int ret, alarm_irq; @@ -675,14 +675,14 @@ static int s5m_rtc_probe(struct platform_device *pdev) return -ENODEV; } - info->i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, - RTC_I2C_ADDR); - if (IS_ERR(info->i2c)) { + i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, + RTC_I2C_ADDR); + if (IS_ERR(i2c)) { dev_err(&pdev->dev, "Failed to allocate I2C for RTC\n"); - return PTR_ERR(info->i2c); + return PTR_ERR(i2c); } - info->regmap = devm_regmap_init_i2c(info->i2c, regmap_cfg); + info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); if (IS_ERR(info->regmap)) { ret = PTR_ERR(info->regmap); dev_err(&pdev->dev, "Failed to allocate RTC register map: %d\n", From d19111dff9c2dd0fcafaaea634236ecd0ec29c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:31 +0000 Subject: [PATCH 0580/2157] rtc: aspeed: drop needless struct aspeed_rtc::rtc_dev member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc_dev member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-3-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-aspeed.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-aspeed.c b/drivers/rtc/rtc-aspeed.c index 880b015eebaf..0d0053b52f9b 100644 --- a/drivers/rtc/rtc-aspeed.c +++ b/drivers/rtc/rtc-aspeed.c @@ -8,7 +8,6 @@ #include struct aspeed_rtc { - struct rtc_device *rtc_dev; void __iomem *base; }; @@ -85,6 +84,7 @@ static const struct rtc_class_ops aspeed_rtc_ops = { static int aspeed_rtc_probe(struct platform_device *pdev) { struct aspeed_rtc *rtc; + struct rtc_device *rtc_dev; rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) @@ -94,17 +94,17 @@ static int aspeed_rtc_probe(struct platform_device *pdev) if (IS_ERR(rtc->base)) return PTR_ERR(rtc->base); - rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(rtc->rtc_dev)) - return PTR_ERR(rtc->rtc_dev); + rtc_dev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc_dev)) + return PTR_ERR(rtc_dev); platform_set_drvdata(pdev, rtc); - rtc->rtc_dev->ops = &aspeed_rtc_ops; - rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; - rtc->rtc_dev->range_max = 38814989399LL; /* 3199-12-31 23:59:59 */ + rtc_dev->ops = &aspeed_rtc_ops; + rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; + rtc_dev->range_max = 38814989399LL; /* 3199-12-31 23:59:59 */ - return devm_rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc_dev); } static const struct of_device_id aspeed_rtc_match[] = { From 1b625aaf335ad2216b47b9b9d9416d66f0a54e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:32 +0000 Subject: [PATCH 0581/2157] rtc: ds2404: drop needless struct ds2404::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-4-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds2404.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index 3231fd9f61da..217694eca36c 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -31,7 +31,6 @@ struct ds2404 { struct gpio_desc *rst_gpiod; struct gpio_desc *clk_gpiod; struct gpio_desc *dq_gpiod; - struct rtc_device *rtc; }; static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev) @@ -182,6 +181,7 @@ static const struct rtc_class_ops ds2404_rtc_ops = { static int rtc_probe(struct platform_device *pdev) { struct ds2404 *chip; + struct rtc_device *rtc; int retval = -EBUSY; chip = devm_kzalloc(&pdev->dev, sizeof(struct ds2404), GFP_KERNEL); @@ -190,9 +190,9 @@ static int rtc_probe(struct platform_device *pdev) chip->dev = &pdev->dev; - chip->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(chip->rtc)) - return PTR_ERR(chip->rtc); + rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); retval = ds2404_gpio_map(chip, pdev); if (retval) @@ -200,10 +200,10 @@ static int rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, chip); - chip->rtc->ops = &ds2404_rtc_ops; - chip->rtc->range_max = U32_MAX; + rtc->ops = &ds2404_rtc_ops; + rtc->range_max = U32_MAX; - retval = devm_rtc_register_device(chip->rtc); + retval = devm_rtc_register_device(rtc); if (retval) return retval; From 5d0ad519f503b8432ebc396dfe87e6190dbdcbb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:33 +0000 Subject: [PATCH 0582/2157] rtc: ep93xx: drop needless struct ep93xx_rtc::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-5-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ep93xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 1fdd20d01560..dcdcdd06f30d 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -28,7 +28,6 @@ struct ep93xx_rtc { void __iomem *mmio_base; - struct rtc_device *rtc; }; static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, @@ -123,6 +122,7 @@ static const struct attribute_group ep93xx_rtc_sysfs_files = { static int ep93xx_rtc_probe(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc; + struct rtc_device *rtc; int err; ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL); @@ -135,18 +135,18 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ep93xx_rtc); - ep93xx_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(ep93xx_rtc->rtc)) - return PTR_ERR(ep93xx_rtc->rtc); + rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - ep93xx_rtc->rtc->ops = &ep93xx_rtc_ops; - ep93xx_rtc->rtc->range_max = U32_MAX; + rtc->ops = &ep93xx_rtc_ops; + rtc->range_max = U32_MAX; - err = rtc_add_group(ep93xx_rtc->rtc, &ep93xx_rtc_sysfs_files); + err = rtc_add_group(rtc, &ep93xx_rtc_sysfs_files); if (err) return err; - return devm_rtc_register_device(ep93xx_rtc->rtc); + return devm_rtc_register_device(rtc); } static const struct of_device_id ep93xx_rtc_of_ids[] = { From a55d44807b63f171f8079d770c9d82fa87046cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:34 +0000 Subject: [PATCH 0583/2157] rtc: ftrtc010: drop needless struct ftrtc010_rtc::rtc_dev member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc_dev member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Reviewed-by: Linus Walleij Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-6-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ftrtc010.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c index cb4a5d101f53..02608d378495 100644 --- a/drivers/rtc/rtc-ftrtc010.c +++ b/drivers/rtc/rtc-ftrtc010.c @@ -28,7 +28,6 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRV_NAME); struct ftrtc010_rtc { - struct rtc_device *rtc_dev; void __iomem *rtc_base; int rtc_irq; struct clk *pclk; @@ -113,6 +112,7 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev) struct ftrtc010_rtc *rtc; struct device *dev = &pdev->dev; struct resource *res; + struct rtc_device *rtc_dev; int ret; rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); @@ -160,29 +160,28 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev) goto err_disable_extclk; } - rtc->rtc_dev = devm_rtc_allocate_device(dev); - if (IS_ERR(rtc->rtc_dev)) { - ret = PTR_ERR(rtc->rtc_dev); + rtc_dev = devm_rtc_allocate_device(dev); + if (IS_ERR(rtc_dev)) { + ret = PTR_ERR(rtc_dev); goto err_disable_extclk; } - rtc->rtc_dev->ops = &ftrtc010_rtc_ops; + rtc_dev->ops = &ftrtc010_rtc_ops; sec = readl(rtc->rtc_base + FTRTC010_RTC_SECOND); min = readl(rtc->rtc_base + FTRTC010_RTC_MINUTE); hour = readl(rtc->rtc_base + FTRTC010_RTC_HOUR); days = readl(rtc->rtc_base + FTRTC010_RTC_DAYS); - rtc->rtc_dev->range_min = (u64)days * 86400 + hour * 3600 + - min * 60 + sec; - rtc->rtc_dev->range_max = U32_MAX + rtc->rtc_dev->range_min; + rtc_dev->range_min = (u64)days * 86400 + hour * 3600 + min * 60 + sec; + rtc_dev->range_max = U32_MAX + rtc_dev->range_min; ret = devm_request_irq(dev, rtc->rtc_irq, ftrtc010_rtc_interrupt, IRQF_SHARED, pdev->name, dev); if (unlikely(ret)) goto err_disable_extclk; - return devm_rtc_register_device(rtc->rtc_dev); + return devm_rtc_register_device(rtc_dev); err_disable_extclk: clk_disable_unprepare(rtc->extclk); From 013df5bdf8b469e8eb15f0a26743864a33217639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:35 +0000 Subject: [PATCH 0584/2157] rtc: m48t86: drop needless struct m48t86_rtc_info::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-7-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m48t86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index dd4a62e2d39c..10cd054fe86f 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -41,7 +41,6 @@ struct m48t86_rtc_info { void __iomem *index_reg; void __iomem *data_reg; - struct rtc_device *rtc; }; static unsigned char m48t86_readb(struct device *dev, unsigned long addr) @@ -219,6 +218,7 @@ static bool m48t86_verify_chip(struct platform_device *pdev) static int m48t86_rtc_probe(struct platform_device *pdev) { struct m48t86_rtc_info *info; + struct rtc_device *rtc; unsigned char reg; int err; struct nvmem_config m48t86_nvmem_cfg = { @@ -250,17 +250,17 @@ static int m48t86_rtc_probe(struct platform_device *pdev) return -ENODEV; } - info->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(info->rtc)) - return PTR_ERR(info->rtc); + rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - info->rtc->ops = &m48t86_rtc_ops; + rtc->ops = &m48t86_rtc_ops; - err = devm_rtc_register_device(info->rtc); + err = devm_rtc_register_device(rtc); if (err) return err; - devm_rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg); + devm_rtc_nvmem_register(rtc, &m48t86_nvmem_cfg); /* read battery status */ reg = m48t86_readb(&pdev->dev, M48T86_D); From a0470062748fc96bdd3eef9e0acf9465688db088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:36 +0000 Subject: [PATCH 0585/2157] rtc: meson: drop needless struct meson_rtc::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-8-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-meson.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-meson.c b/drivers/rtc/rtc-meson.c index db1d626edca5..47e9ebf58ffc 100644 --- a/drivers/rtc/rtc-meson.c +++ b/drivers/rtc/rtc-meson.c @@ -59,7 +59,6 @@ #define MESON_STATIC_DEFAULT (MESON_STATIC_BIAS_CUR | MESON_STATIC_VOLTAGE) struct meson_rtc { - struct rtc_device *rtc; /* rtc device we created */ struct device *dev; /* device we bound from */ struct reset_control *reset; /* reset source */ struct regulator *vdd; /* voltage input */ @@ -292,6 +291,7 @@ static int meson_rtc_probe(struct platform_device *pdev) }; struct device *dev = &pdev->dev; struct meson_rtc *rtc; + struct rtc_device *rtc_dev; void __iomem *base; int ret; u32 tm; @@ -300,16 +300,16 @@ static int meson_rtc_probe(struct platform_device *pdev) if (!rtc) return -ENOMEM; - rtc->rtc = devm_rtc_allocate_device(dev); - if (IS_ERR(rtc->rtc)) - return PTR_ERR(rtc->rtc); + rtc_dev = devm_rtc_allocate_device(dev); + if (IS_ERR(rtc_dev)) + return PTR_ERR(rtc_dev); platform_set_drvdata(pdev, rtc); rtc->dev = dev; - rtc->rtc->ops = &meson_rtc_ops; - rtc->rtc->range_max = U32_MAX; + rtc_dev->ops = &meson_rtc_ops; + rtc_dev->range_max = U32_MAX; base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) @@ -365,11 +365,11 @@ static int meson_rtc_probe(struct platform_device *pdev) } meson_rtc_nvmem_config.priv = rtc; - ret = devm_rtc_nvmem_register(rtc->rtc, &meson_rtc_nvmem_config); + ret = devm_rtc_nvmem_register(rtc_dev, &meson_rtc_nvmem_config); if (ret) goto out_disable_vdd; - ret = devm_rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc_dev); if (ret) goto out_disable_vdd; From 38c7aaeab8b888180abaec88c4304a95e1f74b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:37 +0000 Subject: [PATCH 0586/2157] rtc: meson-vrtc: drop needless struct meson_vrtc_data::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-9-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-meson-vrtc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 5849729f7d01..7d38258cbe37 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -13,7 +13,6 @@ struct meson_vrtc_data { void __iomem *io_alarm; - struct rtc_device *rtc; unsigned long alarm_time; bool enabled; }; @@ -65,6 +64,7 @@ static const struct rtc_class_ops meson_vrtc_ops = { static int meson_vrtc_probe(struct platform_device *pdev) { struct meson_vrtc_data *vrtc; + struct rtc_device *rtc; vrtc = devm_kzalloc(&pdev->dev, sizeof(*vrtc), GFP_KERNEL); if (!vrtc) @@ -78,12 +78,12 @@ static int meson_vrtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, vrtc); - vrtc->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(vrtc->rtc)) - return PTR_ERR(vrtc->rtc); + rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - vrtc->rtc->ops = &meson_vrtc_ops; - return devm_rtc_register_device(vrtc->rtc); + rtc->ops = &meson_vrtc_ops; + return devm_rtc_register_device(rtc); } static int __maybe_unused meson_vrtc_suspend(struct device *dev) From 3d5d0fe1cb82eea1831d33a8db08b142d208d7fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:38 +0000 Subject: [PATCH 0587/2157] rtc: pl030: drop needless struct pl030_rtc::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Reviewed-by: Linus Walleij Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-10-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pl030.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index 1326f310bf93..5caaa714f448 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -21,7 +21,6 @@ #define RTC_CR_MIE (1 << 0) struct pl030_rtc { - struct rtc_device *rtc; void __iomem *base; }; @@ -86,6 +85,7 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) { struct pl030_rtc *rtc; int ret; + struct rtc_device *rtc_dev; ret = amba_request_regions(dev, NULL); if (ret) @@ -97,14 +97,14 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) goto err_rtc; } - rtc->rtc = devm_rtc_allocate_device(&dev->dev); - if (IS_ERR(rtc->rtc)) { - ret = PTR_ERR(rtc->rtc); + rtc_dev = devm_rtc_allocate_device(&dev->dev); + if (IS_ERR(rtc_dev)) { + ret = PTR_ERR(rtc_dev); goto err_rtc; } - rtc->rtc->ops = &pl030_ops; - rtc->rtc->range_max = U32_MAX; + rtc_dev->ops = &pl030_ops; + rtc_dev->range_max = U32_MAX; rtc->base = ioremap(dev->res.start, resource_size(&dev->res)); if (!rtc->base) { ret = -ENOMEM; @@ -121,7 +121,7 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto err_irq; - ret = devm_rtc_register_device(rtc->rtc); + ret = devm_rtc_register_device(rtc_dev); if (ret) goto err_reg; From 3b87c6872aed6a7f4112ed6f6078e43035ce0026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:39 +0000 Subject: [PATCH 0588/2157] rtc: rx8581: drop needless struct rx8581 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Since this now means that the structure has just one member only left, there is no need anymore to allocate data for it and pass that around via the various callbacks, just to extract that one member. Instead, we can just pass that one member and avoid the extra memory allocation for the containing struct, reducing runtime memory consumption. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-11-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx8581.c | 85 +++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 49 deletions(-) diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index b18c12887bdc..20c2dff01bae 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -52,11 +52,6 @@ #define RX8571_USER_RAM 0x10 #define RX8571_NVRAM_SIZE 0x10 -struct rx8581 { - struct regmap *regmap; - struct rtc_device *rtc; -}; - struct rx85x1_config { struct regmap_config regmap; unsigned int num_nvram; @@ -72,14 +67,14 @@ static int rx8581_rtc_read_time(struct device *dev, struct rtc_time *tm) unsigned char date[7]; unsigned int data; int err; - struct rx8581 *rx8581 = i2c_get_clientdata(client); + struct regmap *regmap = i2c_get_clientdata(client); /* First we ensure that the "update flag" is not set, we read the * time and date then re-read the "update flag". If the update flag * has been set, we know that the time has changed during the read so * we repeat the whole process again. */ - err = regmap_read(rx8581->regmap, RX8581_REG_FLAG, &data); + err = regmap_read(regmap, RX8581_REG_FLAG, &data); if (err < 0) return err; @@ -92,20 +87,20 @@ static int rx8581_rtc_read_time(struct device *dev, struct rtc_time *tm) do { /* If update flag set, clear it */ if (data & RX8581_FLAG_UF) { - err = regmap_write(rx8581->regmap, RX8581_REG_FLAG, - data & ~RX8581_FLAG_UF); + err = regmap_write(regmap, RX8581_REG_FLAG, + data & ~RX8581_FLAG_UF); if (err < 0) return err; } /* Now read time and date */ - err = regmap_bulk_read(rx8581->regmap, RX8581_REG_SC, date, + err = regmap_bulk_read(regmap, RX8581_REG_SC, date, sizeof(date)); if (err < 0) return err; /* Check flag register */ - err = regmap_read(rx8581->regmap, RX8581_REG_FLAG, &data); + err = regmap_read(regmap, RX8581_REG_FLAG, &data); if (err < 0) return err; } while (data & RX8581_FLAG_UF); @@ -137,7 +132,7 @@ static int rx8581_rtc_set_time(struct device *dev, struct rtc_time *tm) struct i2c_client *client = to_i2c_client(dev); int err; unsigned char buf[7]; - struct rx8581 *rx8581 = i2c_get_clientdata(client); + struct regmap *regmap = i2c_get_clientdata(client); dev_dbg(dev, "%s: secs=%d, mins=%d, hours=%d, " "mday=%d, mon=%d, year=%d, wday=%d\n", @@ -160,25 +155,23 @@ static int rx8581_rtc_set_time(struct device *dev, struct rtc_time *tm) buf[RX8581_REG_DW] = (0x1 << tm->tm_wday); /* Stop the clock */ - err = regmap_update_bits(rx8581->regmap, RX8581_REG_CTRL, + err = regmap_update_bits(regmap, RX8581_REG_CTRL, RX8581_CTRL_STOP, RX8581_CTRL_STOP); if (err < 0) return err; /* write register's data */ - err = regmap_bulk_write(rx8581->regmap, RX8581_REG_SC, - buf, sizeof(buf)); + err = regmap_bulk_write(regmap, RX8581_REG_SC, buf, sizeof(buf)); if (err < 0) return err; /* get VLF and clear it */ - err = regmap_update_bits(rx8581->regmap, RX8581_REG_FLAG, - RX8581_FLAG_VLF, 0); + err = regmap_update_bits(regmap, RX8581_REG_FLAG, RX8581_FLAG_VLF, 0); if (err < 0) return err; /* Restart the clock */ - return regmap_update_bits(rx8581->regmap, RX8581_REG_CTRL, + return regmap_update_bits(regmap, RX8581_REG_CTRL, RX8581_CTRL_STOP, 0); } @@ -190,29 +183,27 @@ static const struct rtc_class_ops rx8581_rtc_ops = { static int rx8571_nvram_read(void *priv, unsigned int offset, void *val, size_t bytes) { - struct rx8581 *rx8581 = priv; + struct regmap *regmap = priv; - return regmap_bulk_read(rx8581->regmap, RX8571_USER_RAM + offset, - val, bytes); + return regmap_bulk_read(regmap, RX8571_USER_RAM + offset, val, bytes); } static int rx8571_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) { - struct rx8581 *rx8581 = priv; + struct regmap *regmap = priv; - return regmap_bulk_write(rx8581->regmap, RX8571_USER_RAM + offset, - val, bytes); + return regmap_bulk_write(regmap, RX8571_USER_RAM + offset, val, bytes); } static int rx85x1_nvram_read(void *priv, unsigned int offset, void *val, size_t bytes) { - struct rx8581 *rx8581 = priv; + struct regmap *regmap = priv; unsigned int tmp_val; int ret; - ret = regmap_read(rx8581->regmap, RX8581_REG_RAM, &tmp_val); + ret = regmap_read(regmap, RX8581_REG_RAM, &tmp_val); (*(unsigned char *)val) = (unsigned char) tmp_val; return ret; @@ -221,12 +212,11 @@ static int rx85x1_nvram_read(void *priv, unsigned int offset, void *val, static int rx85x1_nvram_write(void *priv, unsigned int offset, void *val, size_t bytes) { - struct rx8581 *rx8581 = priv; + struct regmap *regmap = priv; unsigned char tmp_val; tmp_val = *((unsigned char *)val); - return regmap_write(rx8581->regmap, RX8581_REG_RAM, - (unsigned int)tmp_val); + return regmap_write(regmap, RX8581_REG_RAM, (unsigned int)tmp_val); } static const struct rx85x1_config rx8581_config = { @@ -249,9 +239,10 @@ static const struct rx85x1_config rx8571_config = { static int rx8581_probe(struct i2c_client *client) { - struct rx8581 *rx8581; + struct regmap *regmap; const struct rx85x1_config *config = &rx8581_config; const void *data = of_device_get_match_data(&client->dev); + struct rtc_device *rtc; static struct nvmem_config nvmem_cfg[] = { { .name = "rx85x1-", @@ -276,31 +267,27 @@ static int rx8581_probe(struct i2c_client *client) if (data) config = data; - rx8581 = devm_kzalloc(&client->dev, sizeof(struct rx8581), GFP_KERNEL); - if (!rx8581) - return -ENOMEM; + regmap = devm_regmap_init_i2c(client, &config->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); - i2c_set_clientdata(client, rx8581); + i2c_set_clientdata(client, regmap); - rx8581->regmap = devm_regmap_init_i2c(client, &config->regmap); - if (IS_ERR(rx8581->regmap)) - return PTR_ERR(rx8581->regmap); + rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - rx8581->rtc = devm_rtc_allocate_device(&client->dev); - if (IS_ERR(rx8581->rtc)) - return PTR_ERR(rx8581->rtc); + rtc->ops = &rx8581_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; + rtc->start_secs = 0; + rtc->set_start_time = true; - rx8581->rtc->ops = &rx8581_rtc_ops; - rx8581->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - rx8581->rtc->range_max = RTC_TIMESTAMP_END_2099; - rx8581->rtc->start_secs = 0; - rx8581->rtc->set_start_time = true; - - ret = devm_rtc_register_device(rx8581->rtc); + ret = devm_rtc_register_device(rtc); for (i = 0; i < config->num_nvram; i++) { - nvmem_cfg[i].priv = rx8581; - devm_rtc_nvmem_register(rx8581->rtc, &nvmem_cfg[i]); + nvmem_cfg[i].priv = regmap; + devm_rtc_nvmem_register(rtc, &nvmem_cfg[i]); } return ret; From cd2a7052482e8a34348e30e6a6525ecb23f0d383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:40 +0000 Subject: [PATCH 0589/2157] rtc: s35390a: drop needless struct s35390a::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-12-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s35390a.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index e3dc18882f41..3408d2ab2741 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -63,7 +63,6 @@ MODULE_DEVICE_TABLE(of, s35390a_of_match); struct s35390a { struct i2c_client *client[8]; - struct rtc_device *rtc; int twentyfourhour; }; @@ -422,6 +421,7 @@ static int s35390a_probe(struct i2c_client *client) int err, err_read; unsigned int i; struct s35390a *s35390a; + struct rtc_device *rtc; char buf, status1; struct device *dev = &client->dev; @@ -447,9 +447,9 @@ static int s35390a_probe(struct i2c_client *client) } } - s35390a->rtc = devm_rtc_allocate_device(dev); - if (IS_ERR(s35390a->rtc)) - return PTR_ERR(s35390a->rtc); + rtc = devm_rtc_allocate_device(dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); err_read = s35390a_read_status(s35390a, &status1); if (err_read < 0) { @@ -480,17 +480,17 @@ static int s35390a_probe(struct i2c_client *client) device_set_wakeup_capable(dev, 1); - s35390a->rtc->ops = &s35390a_rtc_ops; - s35390a->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - s35390a->rtc->range_max = RTC_TIMESTAMP_END_2099; + rtc->ops = &s35390a_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; - set_bit(RTC_FEATURE_ALARM_RES_MINUTE, s35390a->rtc->features); - clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, s35390a->rtc->features ); + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); if (status1 & S35390A_FLAG_INT2) - rtc_update_irq(s35390a->rtc, 1, RTC_AF); + rtc_update_irq(rtc, 1, RTC_AF); - return devm_rtc_register_device(s35390a->rtc); + return devm_rtc_register_device(rtc); } static struct i2c_driver s35390a_driver = { From d94bc2bbf8d9c1319c64467cc5bf94ecb5388cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:41 +0000 Subject: [PATCH 0590/2157] rtc: sd2405al: drop needless struct sd2405al::rtc member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Tested-by: Tóth János Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-13-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sd2405al.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-sd2405al.c b/drivers/rtc/rtc-sd2405al.c index d2568c3e3876..00c3033e8079 100644 --- a/drivers/rtc/rtc-sd2405al.c +++ b/drivers/rtc/rtc-sd2405al.c @@ -42,7 +42,6 @@ struct sd2405al { struct device *dev; - struct rtc_device *rtc; struct regmap *regmap; }; @@ -167,6 +166,7 @@ static const struct regmap_config sd2405al_regmap_conf = { static int sd2405al_probe(struct i2c_client *client) { struct sd2405al *sd2405al; + struct rtc_device *rtc; int ret; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) @@ -182,17 +182,17 @@ static int sd2405al_probe(struct i2c_client *client) if (IS_ERR(sd2405al->regmap)) return PTR_ERR(sd2405al->regmap); - sd2405al->rtc = devm_rtc_allocate_device(&client->dev); - if (IS_ERR(sd2405al->rtc)) - return PTR_ERR(sd2405al->rtc); + rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - sd2405al->rtc->ops = &sd2405al_rtc_ops; - sd2405al->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - sd2405al->rtc->range_max = RTC_TIMESTAMP_END_2099; + rtc->ops = &sd2405al_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; dev_set_drvdata(&client->dev, sd2405al); - ret = devm_rtc_register_device(sd2405al->rtc); + ret = devm_rtc_register_device(rtc); if (ret < 0) return ret; From 6158c6b82444e628fdbde9ae6b76872e7ac4e7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:42 +0000 Subject: [PATCH 0591/2157] rtc: sd3078: drop needless struct sd3078 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory pointed to by the ::rtc member is managed via devres, and no code in this driver uses it past _probe(). We can drop it from the structure and just use a local temporary variable, reducing runtime memory consumption by a few bytes. Since this now means that the structure has just one member only left, there is no need anymore to allocate data for it and pass that around via the various callbacks, just to extract that one member. Instead, we can just pass that one member and avoid the extra memory allocation for the containing struct, reducing runtime memory consumption. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-14-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sd3078.c | 71 ++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/rtc/rtc-sd3078.c b/drivers/rtc/rtc-sd3078.c index fe27b54beaad..10cc1dcfc774 100644 --- a/drivers/rtc/rtc-sd3078.c +++ b/drivers/rtc/rtc-sd3078.c @@ -36,11 +36,6 @@ */ #define WRITE_PROTECT_EN 0 -struct sd3078 { - struct rtc_device *rtc; - struct regmap *regmap; -}; - /* * In order to prevent arbitrary modification of the time register, * when modification of the register, @@ -49,14 +44,11 @@ struct sd3078 { * 2. set WRITE2 bit * 3. set WRITE3 bit */ -static void sd3078_enable_reg_write(struct sd3078 *sd3078) +static void sd3078_enable_reg_write(struct regmap *regmap) { - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL2, - KEY_WRITE1, KEY_WRITE1); - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL1, - KEY_WRITE2, KEY_WRITE2); - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL1, - KEY_WRITE3, KEY_WRITE3); + regmap_update_bits(regmap, SD3078_REG_CTRL2, KEY_WRITE1, KEY_WRITE1); + regmap_update_bits(regmap, SD3078_REG_CTRL1, KEY_WRITE2, KEY_WRITE2); + regmap_update_bits(regmap, SD3078_REG_CTRL1, KEY_WRITE3, KEY_WRITE3); } #if WRITE_PROTECT_EN @@ -69,14 +61,11 @@ static void sd3078_enable_reg_write(struct sd3078 *sd3078) * 2. clear WRITE3 bit * 3. clear WRITE1 bit */ -static void sd3078_disable_reg_write(struct sd3078 *sd3078) +static void sd3078_disable_reg_write(struct regmap *regmap) { - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL1, - KEY_WRITE2, 0); - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL1, - KEY_WRITE3, 0); - regmap_update_bits(sd3078->regmap, SD3078_REG_CTRL2, - KEY_WRITE1, 0); + regmap_update_bits(regmap, SD3078_REG_CTRL1, KEY_WRITE2, 0); + regmap_update_bits(regmap, SD3078_REG_CTRL1, KEY_WRITE3, 0); + regmap_update_bits(regmap, SD3078_REG_CTRL2, KEY_WRITE1, 0); } #endif @@ -85,11 +74,10 @@ static int sd3078_rtc_read_time(struct device *dev, struct rtc_time *tm) unsigned char hour; unsigned char rtc_data[NUM_TIME_REGS] = {0}; struct i2c_client *client = to_i2c_client(dev); - struct sd3078 *sd3078 = i2c_get_clientdata(client); + struct regmap *regmap = i2c_get_clientdata(client); int ret; - ret = regmap_bulk_read(sd3078->regmap, SD3078_REG_SC, rtc_data, - NUM_TIME_REGS); + ret = regmap_bulk_read(regmap, SD3078_REG_SC, rtc_data, NUM_TIME_REGS); if (ret < 0) { dev_err(dev, "reading from RTC failed with err:%d\n", ret); return ret; @@ -123,7 +111,7 @@ static int sd3078_rtc_set_time(struct device *dev, struct rtc_time *tm) { unsigned char rtc_data[NUM_TIME_REGS]; struct i2c_client *client = to_i2c_client(dev); - struct sd3078 *sd3078 = i2c_get_clientdata(client); + struct regmap *regmap = i2c_get_clientdata(client); int ret; rtc_data[SD3078_REG_SC] = bin2bcd(tm->tm_sec); @@ -135,10 +123,10 @@ static int sd3078_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[SD3078_REG_YR] = bin2bcd(tm->tm_year - 100); #if WRITE_PROTECT_EN - sd3078_enable_reg_write(sd3078); + sd3078_enable_reg_write(regmap); #endif - ret = regmap_bulk_write(sd3078->regmap, SD3078_REG_SC, rtc_data, + ret = regmap_bulk_write(regmap, SD3078_REG_SC, rtc_data, NUM_TIME_REGS); if (ret < 0) { dev_err(dev, "writing to RTC failed with err:%d\n", ret); @@ -146,7 +134,7 @@ static int sd3078_rtc_set_time(struct device *dev, struct rtc_time *tm) } #if WRITE_PROTECT_EN - sd3078_disable_reg_write(sd3078); + sd3078_disable_reg_write(regmap); #endif return 0; @@ -166,36 +154,33 @@ static const struct regmap_config regmap_config = { static int sd3078_probe(struct i2c_client *client) { int ret; - struct sd3078 *sd3078; + struct regmap *regmap; + struct rtc_device *rtc; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - sd3078 = devm_kzalloc(&client->dev, sizeof(*sd3078), GFP_KERNEL); - if (!sd3078) - return -ENOMEM; - - sd3078->regmap = devm_regmap_init_i2c(client, ®map_config); - if (IS_ERR(sd3078->regmap)) { + regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(regmap)) { dev_err(&client->dev, "regmap allocation failed\n"); - return PTR_ERR(sd3078->regmap); + return PTR_ERR(regmap); } - i2c_set_clientdata(client, sd3078); + i2c_set_clientdata(client, regmap); - sd3078->rtc = devm_rtc_allocate_device(&client->dev); - if (IS_ERR(sd3078->rtc)) - return PTR_ERR(sd3078->rtc); + rtc = devm_rtc_allocate_device(&client->dev); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - sd3078->rtc->ops = &sd3078_rtc_ops; - sd3078->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - sd3078->rtc->range_max = RTC_TIMESTAMP_END_2099; + rtc->ops = &sd3078_rtc_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2099; - ret = devm_rtc_register_device(sd3078->rtc); + ret = devm_rtc_register_device(rtc); if (ret) return ret; - sd3078_enable_reg_write(sd3078); + sd3078_enable_reg_write(regmap); return 0; } From e6403ae59ce1956beb8eaa9d622090bbf98f79aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:43 +0000 Subject: [PATCH 0592/2157] rtc: max77686: use dev_err_probe() where appropriate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_err_probe() exists to simplify code and harmonise error messages, there's no reason not to use it here. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-15-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-max77686.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 6b0d02b44c80..69ea3ce75b5a 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -704,10 +704,9 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info) } info->regmap = dev_get_regmap(parent, NULL); - if (!info->regmap) { - dev_err(info->dev, "Failed to get rtc regmap\n"); - return -ENODEV; - } + if (!info->regmap) + return dev_err_probe(info->dev, -ENODEV, + "Failed to get rtc regmap\n"); if (info->drv_data->rtc_i2c_addr == MAX77686_INVALID_I2C_ADDR) { info->rtc_regmap = info->regmap; @@ -716,28 +715,24 @@ static int max77686_init_rtc_regmap(struct max77686_rtc_info *info) client = devm_i2c_new_dummy_device(info->dev, parent_i2c->adapter, info->drv_data->rtc_i2c_addr); - if (IS_ERR(client)) { - dev_err(info->dev, "Failed to allocate I2C device for RTC\n"); - return PTR_ERR(client); - } + if (IS_ERR(client)) + return dev_err_probe(info->dev, PTR_ERR(client), + "Failed to allocate I2C device for RTC\n"); info->rtc_regmap = devm_regmap_init_i2c(client, info->drv_data->regmap_config); - if (IS_ERR(info->rtc_regmap)) { - ret = PTR_ERR(info->rtc_regmap); - dev_err(info->dev, "Failed to allocate RTC regmap: %d\n", ret); - return ret; - } + if (IS_ERR(info->rtc_regmap)) + return dev_err_probe(info->dev, PTR_ERR(info->rtc_regmap), + "Failed to allocate RTC regmap\n"); add_rtc_irq: ret = regmap_add_irq_chip(info->rtc_regmap, info->rtc_irq, IRQF_ONESHOT | IRQF_SHARED, 0, info->drv_data->rtc_irq_chip, &info->rtc_irq_data); - if (ret < 0) { - dev_err(info->dev, "Failed to add RTC irq chip: %d\n", ret); - return ret; - } + if (ret < 0) + return dev_err_probe(info->dev, ret, + "Failed to add RTC irq chip\n"); return 0; } From 0c57c2e72c5d9fb1b165eff65a6ff88511fd9a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 4 Mar 2025 17:05:44 +0000 Subject: [PATCH 0593/2157] rtc: s5m: convert to dev_err_probe() where appropriate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_err_probe() exists to simplify code and harmonise error messages, there's no reason not to use it here. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250304-rtc-cleanups-v2-16-d4689a71668c@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s5m.c | 50 ++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index 88aed2766034..db5c9b641277 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -626,11 +626,10 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) } info->rtc_24hr_mode = 1; - if (ret < 0) { - dev_err(info->dev, "%s: fail to write controlm reg(%d)\n", - __func__, ret); - return ret; - } + if (ret < 0) + return dev_err_probe(info->dev, ret, + "%s: fail to write controlm reg\n", + __func__); return ret; } @@ -669,26 +668,21 @@ static int s5m_rtc_probe(struct platform_device *pdev) alarm_irq = S5M8767_IRQ_RTCA1; break; default: - dev_err(&pdev->dev, - "Device type %lu is not supported by RTC driver\n", - platform_get_device_id(pdev)->driver_data); - return -ENODEV; + return dev_err_probe(&pdev->dev, -ENODEV, + "Device type %lu is not supported by RTC driver\n", + platform_get_device_id(pdev)->driver_data); } i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, RTC_I2C_ADDR); - if (IS_ERR(i2c)) { - dev_err(&pdev->dev, "Failed to allocate I2C for RTC\n"); - return PTR_ERR(i2c); - } + if (IS_ERR(i2c)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c), + "Failed to allocate I2C for RTC\n"); info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); - if (IS_ERR(info->regmap)) { - ret = PTR_ERR(info->regmap); - dev_err(&pdev->dev, "Failed to allocate RTC register map: %d\n", - ret); - return ret; - } + if (IS_ERR(info->regmap)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), + "Failed to allocate RTC register map\n"); info->dev = &pdev->dev; info->s5m87xx = s5m87xx; @@ -696,11 +690,10 @@ static int s5m_rtc_probe(struct platform_device *pdev) if (s5m87xx->irq_data) { info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq); - if (info->irq <= 0) { - dev_err(&pdev->dev, "Failed to get virtual IRQ %d\n", - alarm_irq); - return -EINVAL; - } + if (info->irq <= 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to get virtual IRQ %d\n", + alarm_irq); } platform_set_drvdata(pdev, info); @@ -724,11 +717,10 @@ static int s5m_rtc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, s5m_rtc_alarm_irq, 0, "rtc-alarm0", info); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", - info->irq, ret); - return ret; - } + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to request alarm IRQ %d\n", + info->irq); device_init_wakeup(&pdev->dev, true); } From c0c9c73434666dc99ee156b25e7e722150bee001 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 5 Mar 2025 19:01:19 +0900 Subject: [PATCH 0594/2157] counter: microchip-tcb-capture: Fix undefined counter channel state on probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardware initialize of the timer counter channel does not occur on probe thus leaving the Count in an undefined state until the first function_write() callback is executed. Fix this by performing the proper hardware initialization during probe. Fixes: 106b104137fd ("counter: Add microchip TCB capture counter") Reported-by: Csókás Bence Closes: https://lore.kernel.org/all/bfa70e78-3cc3-4295-820b-3925c26135cb@prolan.hu/ Link: https://lore.kernel.org/r/20250305-preset-capture-mode-microchip-tcb-capture-v1-1-632c95c6421e@kernel.org Signed-off-by: William Breathitt Gray --- drivers/counter/microchip-tcb-capture.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index 2f096a5b973d..c391ac38b990 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -368,6 +368,25 @@ static int mchp_tc_probe(struct platform_device *pdev) channel); } + /* Disable Quadrature Decoder and position measure */ + ret = regmap_update_bits(regmap, ATMEL_TC_BMR, ATMEL_TC_QDEN | ATMEL_TC_POSEN, 0); + if (ret) + return ret; + + /* Setup the period capture mode */ + ret = regmap_update_bits(regmap, ATMEL_TC_REG(priv->channel[0], CMR), + ATMEL_TC_WAVE | ATMEL_TC_ABETRG | ATMEL_TC_CMR_MASK | + ATMEL_TC_TCCLKS, + ATMEL_TC_CMR_MASK); + if (ret) + return ret; + + /* Enable clock and trigger counter */ + ret = regmap_write(regmap, ATMEL_TC_REG(priv->channel[0], CCR), + ATMEL_TC_CLKEN | ATMEL_TC_SWTRG); + if (ret) + return ret; + priv->tc_cfg = tcb_config; priv->regmap = regmap; counter->name = dev_name(&pdev->dev); From 5da692e2262b8f81993baa9592f57d12c2703dea Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Thu, 6 Mar 2025 16:41:50 +0800 Subject: [PATCH 0595/2157] dm cache: prevent BUG_ON by blocking retries on failed device resumes A cache device failing to resume due to mapping errors should not be retried, as the failure leaves a partially initialized policy object. Repeating the resume operation risks triggering BUG_ON when reloading cache mappings into the incomplete policy object. Reproduce steps: 1. create a cache metadata consisting of 512 or more cache blocks, with some mappings stored in the first array block of the mapping array. Here we use cache_restore v1.0 to build the metadata. cat <> cmeta.xml EOF dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" cache_restore -i cmeta.xml -o /dev/mapper/cmeta --metadata-version=2 dmsetup remove cmeta 2. wipe the second array block of the mapping array to simulate data degradations. mapping_root=$(dd if=/dev/sdc bs=1c count=8 skip=192 \ 2>/dev/null | hexdump -e '1/8 "%u\n"') ablock=$(dd if=/dev/sdc bs=1c count=8 skip=$((4096*mapping_root+2056)) \ 2>/dev/null | hexdump -e '1/8 "%u\n"') dd if=/dev/zero of=/dev/sdc bs=4k count=1 seek=$ablock 3. try bringing up the cache device. The resume is expected to fail due to the broken array block. dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dmsetup create cache --notable dmsetup load cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup resume cache 4. try resuming the cache again. An unexpected BUG_ON is triggered while loading cache mappings. dmsetup resume cache Kernel logs: (snip) ------------[ cut here ]------------ kernel BUG at drivers/md/dm-cache-policy-smq.c:752! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 332 Comm: dmsetup Not tainted 6.13.4 #3 RIP: 0010:smq_load_mapping+0x3e5/0x570 Fix by disallowing resume operations for devices that failed the initial attempt. Signed-off-by: Ming-Hung Tsai Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9cb797a561d6..6cee5eac8b9e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2899,6 +2899,27 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) return to_cblock(size); } +static bool can_resume(struct cache *cache) +{ + /* + * Disallow retrying the resume operation for devices that failed the + * first resume attempt, as the failure leaves the policy object partially + * initialized. Retrying could trigger BUG_ON when loading cache mappings + * into the incomplete policy object. + */ + if (cache->sized && !cache->loaded_mappings) { + if (get_cache_mode(cache) != CM_WRITE) + DMERR("%s: unable to resume a failed-loaded cache, please check metadata.", + cache_device_name(cache)); + else + DMERR("%s: unable to resume cache due to missing proper cache table reload", + cache_device_name(cache)); + return false; + } + + return true; +} + static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { @@ -2947,6 +2968,9 @@ static int cache_preresume(struct dm_target *ti) struct cache *cache = ti->private; dm_cblock_t csize = get_cache_dev_size(cache); + if (!can_resume(cache)) + return -EINVAL; + /* * Check to see if the cache has resized. */ From c2662b1544cbd8ea3181381bb899b8e681dfedc7 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Thu, 6 Mar 2025 16:41:51 +0800 Subject: [PATCH 0596/2157] dm cache: support shrinking the origin device This patch introduces formal support for shrinking the cache origin by reducing the cache target length via table reloads. Cache blocks mapped beyond the new target length must be clean and are invalidated during preresume. If any dirty blocks exist in the area being removed, the preresume operation fails without setting the NEEDS_CHECK flag in superblock, and the resume ioctl returns EFBIG. The cache device remains suspended until a table reload with target length that fits existing mappings is performed. Without this patch, reducing the cache target length could result in io errors (RHBZ: 2134334), out-of-bounds memory access to the discard bitset, and security concerns regarding data leakage. Verification steps: 1. create a cache metadata with some cached blocks mapped to the tail of the origin device. Here we use cache_restore v1.0 to build a metadata with one clean block mapped to the last origin block. cat <> cmeta.xml EOF dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" cache_restore -i cmeta.xml -o /dev/mapper/cmeta --metadata-version=2 dmsetup remove cmeta 2. bring up the cache whilst shrinking the cache origin by one block: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524160 linear /dev/sdc 262144" dmsetup create cache --table "0 524160 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 3. check the number of cached data blocks via dmsetup status. It is expected to be zero. dmsetup status cache | cut -d ' ' -f 7 In addition to the script above, this patch can be verified using the "cache/resize" tests in dmtest-python: ./dmtest run --rx cache/resize/shrink_origin --result-set default Signed-off-by: Ming-Hung Tsai Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 72 ++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 6cee5eac8b9e..a10d75a562db 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -406,6 +406,12 @@ struct cache { mempool_t migration_pool; struct bio_set bs; + + /* + * Cache_size entries. Set bits indicate blocks mapped beyond the + * target length, which are marked for invalidation. + */ + unsigned long *invalid_bitset; }; struct per_bio_data { @@ -1922,6 +1928,9 @@ static void __destroy(struct cache *cache) if (cache->discard_bitset) free_bitset(cache->discard_bitset); + if (cache->invalid_bitset) + free_bitset(cache->invalid_bitset); + if (cache->copier) dm_kcopyd_client_destroy(cache->copier); @@ -2510,6 +2519,13 @@ static int cache_create(struct cache_args *ca, struct cache **result) } clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); + cache->invalid_bitset = alloc_bitset(from_cblock(cache->cache_size)); + if (!cache->invalid_bitset) { + *error = "could not allocate bitset for invalid blocks"; + goto bad; + } + clear_bitset(cache->invalid_bitset, from_cblock(cache->cache_size)); + cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(cache->copier)) { *error = "could not create kcopyd client"; @@ -2808,6 +2824,24 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); } +static int load_filtered_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, + bool dirty, uint32_t hint, bool hint_valid) +{ + struct cache *cache = context; + + if (from_oblock(oblock) >= from_oblock(cache->origin_blocks)) { + if (dirty) { + DMERR("%s: unable to shrink origin; cache block %u is dirty", + cache_device_name(cache), from_cblock(cblock)); + return -EFBIG; + } + set_bit(from_cblock(cblock), cache->invalid_bitset); + return 0; + } + + return load_mapping(context, oblock, cblock, dirty, hint, hint_valid); +} + /* * The discard block size in the on disk metadata is not * necessarily the same as we're currently using. So we have to @@ -2962,6 +2996,24 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) return 0; } +static int truncate_oblocks(struct cache *cache) +{ + uint32_t nr_blocks = from_cblock(cache->cache_size); + uint32_t i; + int r; + + for_each_set_bit(i, cache->invalid_bitset, nr_blocks) { + r = dm_cache_remove_mapping(cache->cmd, to_cblock(i)); + if (r) { + DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata", + cache_device_name(cache)); + return r; + } + } + + return 0; +} + static int cache_preresume(struct dm_target *ti) { int r = 0; @@ -2986,11 +3038,25 @@ static int cache_preresume(struct dm_target *ti) } if (!cache->loaded_mappings) { + /* + * The fast device could have been resized since the last + * failed preresume attempt. To be safe we start by a blank + * bitset for cache blocks. + */ + clear_bitset(cache->invalid_bitset, from_cblock(cache->cache_size)); + r = dm_cache_load_mappings(cache->cmd, cache->policy, - load_mapping, cache); + load_filtered_mapping, cache); if (r) { DMERR("%s: could not load cache mappings", cache_device_name(cache)); - metadata_operation_failed(cache, "dm_cache_load_mappings", r); + if (r != -EFBIG) + metadata_operation_failed(cache, "dm_cache_load_mappings", r); + return r; + } + + r = truncate_oblocks(cache); + if (r) { + metadata_operation_failed(cache, "dm_cache_remove_mapping", r); return r; } @@ -3450,7 +3516,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {2, 2, 0}, + .version = {2, 3, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, From b020761e8cbf6c0bb14b12095a61e85f77c6b8b9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 6 Mar 2025 16:49:40 +0200 Subject: [PATCH 0597/2157] xhci: show correct U1 and U2 timeout values in debug messages U2 value is encoded in 256 microsecond intervals, show in microseconds. U1 value is in microseconds. debug message incorrectly showed "ms" Unwrap debug messages while we anyway modify them. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 45653114ccd7..3f2cd546a7a2 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4755,8 +4755,8 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, */ if (timeout_ns <= USB3_LPM_U1_MAX_TIMEOUT) return timeout_ns; - dev_dbg(&udev->dev, "Hub-initiated U1 disabled " - "due to long timeout %llu ms\n", timeout_ns); + dev_dbg(&udev->dev, "Hub-initiated U1 disabled due to long timeout %lluus\n", + timeout_ns); return xhci_get_timeout_no_hub_lpm(udev, USB3_LPM_U1); } @@ -4813,8 +4813,8 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, */ if (timeout_ns <= USB3_LPM_U2_MAX_TIMEOUT) return timeout_ns; - dev_dbg(&udev->dev, "Hub-initiated U2 disabled " - "due to long timeout %llu ms\n", timeout_ns); + dev_dbg(&udev->dev, "Hub-initiated U2 disabled due to long timeout %lluus\n", + timeout_ns * 256); return xhci_get_timeout_no_hub_lpm(udev, USB3_LPM_U2); } From 856563be98b2e9cfc6ceaff12043763f50d089d2 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Thu, 6 Mar 2025 16:49:41 +0200 Subject: [PATCH 0598/2157] usb: xhci: remove redundant update_ring_for_set_deq_completion() function The function is a remnant from a previous implementation and is now redundant. There is no longer a need to search for the dequeue pointer, as both the TRB and segment dequeue pointers are saved within 'queued_deq_seg' and 'queued_deq_ptr'. Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 41 ++---------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 965bffce301e..23cf20026359 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1332,43 +1332,6 @@ void xhci_hc_died(struct xhci_hcd *xhci) usb_hc_died(xhci_to_hcd(xhci)); } -static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci, - struct xhci_virt_device *dev, - struct xhci_ring *ep_ring, - unsigned int ep_index) -{ - union xhci_trb *dequeue_temp; - - dequeue_temp = ep_ring->dequeue; - - /* If we get two back-to-back stalls, and the first stalled transfer - * ends just before a link TRB, the dequeue pointer will be left on - * the link TRB by the code in the while loop. So we have to update - * the dequeue pointer one segment further, or we'll jump off - * the segment into la-la-land. - */ - if (trb_is_link(ep_ring->dequeue)) { - ep_ring->deq_seg = ep_ring->deq_seg->next; - ep_ring->dequeue = ep_ring->deq_seg->trbs; - } - - while (ep_ring->dequeue != dev->eps[ep_index].queued_deq_ptr) { - /* We have more usable TRBs */ - ep_ring->dequeue++; - if (trb_is_link(ep_ring->dequeue)) { - if (ep_ring->dequeue == - dev->eps[ep_index].queued_deq_ptr) - break; - ep_ring->deq_seg = ep_ring->deq_seg->next; - ep_ring->dequeue = ep_ring->deq_seg->trbs; - } - if (ep_ring->dequeue == dequeue_temp) { - xhci_dbg(xhci, "Unable to find new dequeue pointer\n"); - break; - } - } -} - /* * When we get a completion for a Set Transfer Ring Dequeue Pointer command, * we need to clear the set deq pending flag in the endpoint ring state, so that @@ -1473,8 +1436,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, /* Update the ring's dequeue segment and dequeue pointer * to reflect the new position. */ - update_ring_for_set_deq_completion(xhci, ep->vdev, - ep_ring, ep_index); + ep_ring->deq_seg = ep->queued_deq_seg; + ep_ring->dequeue = ep->queued_deq_ptr; } else { xhci_warn(xhci, "Mismatch between completed Set TR Deq Ptr command & xHCI internal state.\n"); xhci_warn(xhci, "ep deq seg = %p, deq ptr = %p\n", From 58d0a3fab5f4fdc112c16a4c6d382f62097afd1c Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:42 +0200 Subject: [PATCH 0599/2157] usb: xhci: Don't skip on Stopped - Length Invalid Up until commit d56b0b2ab142 ("usb: xhci: ensure skipped isoc TDs are returned when isoc ring is stopped") in v6.11, the driver didn't skip missed isochronous TDs when handling Stoppend and Stopped - Length Invalid events. Instead, it erroneously cleared the skip flag, which would cause the ring to get stuck, as future events won't match the missed TD which is never removed from the queue until it's cancelled. This buggy logic seems to have been in place substantially unchanged since the 3.x series over 10 years ago, which probably speaks first and foremost about relative rarity of this case in normal usage, but by the spec I see no reason why it shouldn't be possible. After d56b0b2ab142, TDs are immediately skipped when handling those Stopped events. This poses a potential problem in case of Stopped - Length Invalid, which occurs either on completed TDs (likely already given back) or Link and No-Op TRBs. Such event won't be recognized as matching any TD (unless it's the rare Link TRB inside a TD) and will result in skipping all pending TDs, giving them back possibly before they are done, risking isoc data loss and maybe UAF by HW. As a compromise, don't skip and don't clear the skip flag on this kind of event. Then the next event will skip missed TDs. A downside of not handling Stopped - Length Invalid on a Link inside a TD is that if the TD is cancelled, its actual length will not be updated to account for TRBs (silently) completed before the TD was stopped. I had no luck producing this sequence of completion events so there is no compelling demonstration of any resulting disaster. It may be a very rare, obscure condition. The sole motivation for this patch is that if such unlikely event does occur, I'd rather risk reporting a cancelled partially done isoc frame as empty than gamble with UAF. This will be fixed more properly by looking at Stopped event's TRB pointer when making skipping decisions, but such rework is unlikely to be backported to v6.12, which will stay around for a few years. Fixes: d56b0b2ab142 ("usb: xhci: ensure skipped isoc TDs are returned when isoc ring is stopped") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 23cf20026359..6fb48d30ec21 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2828,6 +2828,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (!ep_seg) { if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + /* this event is unlikely to match any TD, don't skip them all */ + if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID) + return 0; + skip_isoc_td(xhci, td, ep, status); if (!list_empty(&ep_ring->td_list)) continue; From bfa8459942822bdcc86f0e87f237c0723ae64948 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:43 +0200 Subject: [PATCH 0600/2157] usb: xhci: Complete 'error mid TD' transfers when handling Missed Service Missed Service Error after an error mid TD means that the failed TD has already been passed by the xHC without acknowledgment of the final TRB, a known hardware bug. So don't wait any more and give back the TD. Reproduced on NEC uPD720200 under conditions of ludicrously bad USB link quality, confirmed to behave as expected using dynamic debug. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6fb48d30ec21..47aaaf4eb92a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2752,7 +2752,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, xhci_dbg(xhci, "Miss service interval error for slot %u ep %u, set skip flag\n", slot_id, ep_index); - return 0; + break; case COMP_NO_PING_RESPONSE_ERROR: ep->skip = true; xhci_dbg(xhci, @@ -2800,6 +2800,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, xhci_dequeue_td(xhci, td, ep_ring, td->status); } + /* Missed TDs will be skipped on the next event */ + if (trb_comp_code == COMP_MISSED_SERVICE_ERROR) + return 0; + if (list_empty(&ep_ring->td_list)) { /* * Don't print wanings if ring is empty due to a stopped endpoint generating an From 906dec15b9b321b546fd31a3c99ffc13724c7af4 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:44 +0200 Subject: [PATCH 0601/2157] usb: xhci: Fix isochronous Ring Underrun/Overrun event handling The TRB pointer of these events points at enqueue at the time of error occurrence on xHCI 1.1+ HCs or it's NULL on older ones. By the time we are handling the event, a new TD may be queued at this ring position. I can trigger this race by rising interrupt moderation to increase IRQ handling delay. Similar delay may occur naturally due to system load. If this ever happens after a Missed Service Error, missed TDs will be skipped and the new TD processed as if it matched the event. It could be given back prematurely, risking data loss or buffer UAF by the xHC. Don't complete TDs on xrun events and don't warn if queued TDs don't match the event's TRB pointer, which can be NULL or a link/no-op TRB. Don't warn if there are no queued TDs at all. Now that it's safe, also handle xrun events if the skip flag is clear. This ensures completion of any TD stuck in 'error mid TD' state right before the xrun event, which could happen if a driver submits a finite number of URBs to a buggy HC and then an error occurs on the last TD. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 47aaaf4eb92a..d34f46b63006 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2627,6 +2627,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, int status = -EINPROGRESS; struct xhci_ep_ctx *ep_ctx; u32 trb_comp_code; + bool ring_xrun_event = false; slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1; @@ -2733,14 +2734,12 @@ static int handle_tx_event(struct xhci_hcd *xhci, * Underrun Event for OUT Isoch endpoint. */ xhci_dbg(xhci, "Underrun event on slot %u ep %u\n", slot_id, ep_index); - if (ep->skip) - break; - return 0; + ring_xrun_event = true; + break; case COMP_RING_OVERRUN: xhci_dbg(xhci, "Overrun event on slot %u ep %u\n", slot_id, ep_index); - if (ep->skip) - break; - return 0; + ring_xrun_event = true; + break; case COMP_MISSED_SERVICE_ERROR: /* * When encounter missed service error, one or more isoc tds @@ -2813,6 +2812,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ if (trb_comp_code != COMP_STOPPED && trb_comp_code != COMP_STOPPED_LENGTH_INVALID && + !ring_xrun_event && !ep_ring->last_td_was_short) { xhci_warn(xhci, "Event TRB for slot %u ep %u with no TDs queued\n", slot_id, ep_index); @@ -2847,6 +2847,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, goto check_endpoint_halted; } + /* TD was queued after xrun, maybe xrun was on a link, don't panic yet */ + if (ring_xrun_event) + return 0; + /* * Skip the Force Stopped Event. The 'ep_trb' of FSE is not in the current * TD pointed by 'ep_ring->dequeue' because that the hardware dequeue @@ -2893,6 +2897,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ } while (ep->skip); + /* Get out if a TD was queued at enqueue after the xrun occurred */ + if (ring_xrun_event) + return 0; + if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; else From d0b619599e52fe3e1454f7d151dedf2b194f61d8 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:45 +0200 Subject: [PATCH 0602/2157] usb: xhci: Expedite skipping missed isoch TDs on modern HCs xHCI spec rev. 1.0 allowed the TRB pointer of Missed Service events to be NULL. Having no idea which of the queued TDs were missed and which are waiting, we can only set a flag to skip missed TDs later. But HCs are also allowed to give us pointer to the last missed TRB, and this became mandatory in spec rev. 1.1 and later. Use this pointer, if available, to immediately skip all missed TDs. This reduces latency and risk of skipping-related bugs, because we can now leave the skip flag cleared for future events. Handle Missed Service Error events as 'error mid TD', if applicable, because rev. 1.0 spec excplicitly says so in notes to 4.10.3.2 and later revs in 4.10.3.2 and 4.11.2.5.2. Notes to 4.9.1 seem to apply. Tested on ASM1142 and ASM3142 v1.1 xHCs which provide TRB pointers. Tested on AMD, Etron, Renesas v1.0 xHCs which provide TRB pointers. Tested on NEC v0.96 and VIA v1.0 xHCs which send a NULL pointer. Change inspired by a discussion about realtime USB audio. Link: https://lore.kernel.org/linux-usb/76e1a191-020d-4a76-97f6-237f9bd0ede0@gmx.net/T/ Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-7-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d34f46b63006..e871dd61a636 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2439,6 +2439,12 @@ static void process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, if (ep_trb != td->end_trb) td->error_mid_td = true; break; + case COMP_MISSED_SERVICE_ERROR: + frame->status = -EXDEV; + sum_trbs_for_length = true; + if (ep_trb != td->end_trb) + td->error_mid_td = true; + break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: frame->status = -EPROTO; @@ -2749,8 +2755,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ ep->skip = true; xhci_dbg(xhci, - "Miss service interval error for slot %u ep %u, set skip flag\n", - slot_id, ep_index); + "Miss service interval error for slot %u ep %u, set skip flag%s\n", + slot_id, ep_index, ep_trb_dma ? ", skip now" : ""); break; case COMP_NO_PING_RESPONSE_ERROR: ep->skip = true; @@ -2799,8 +2805,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, xhci_dequeue_td(xhci, td, ep_ring, td->status); } - /* Missed TDs will be skipped on the next event */ - if (trb_comp_code == COMP_MISSED_SERVICE_ERROR) + /* If the TRB pointer is NULL, missed TDs will be skipped on the next event */ + if (trb_comp_code == COMP_MISSED_SERVICE_ERROR && !ep_trb_dma) return 0; if (list_empty(&ep_ring->td_list)) { From fe1ccba52a8d9e0ccc5d81ffe1938fc51d7a3e71 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:46 +0200 Subject: [PATCH 0603/2157] usb: xhci: Skip only one TD on Ring Underrun/Overrun If skipping is deferred to events other than Missed Service Error itsef, it means we are running on an xHCI 1.0 host and don't know how many TDs were missed until we reach some ordinary transfer completion event. And in case of ring xrun, we can't know where the xrun happened either. If we skip all pending TDs, we may prematurely give back TDs added after the xrun had occurred, risking data loss or buffer UAF by the xHC. If we skip none, a driver may become confused and stop working when all its URBs are missed and appear to be "in flight" forever. Skip exactly one TD on each xrun event - the first one that was missed, as we can now be sure that the HC has finished processing it. Provided that one more TD is queued before any subsequent doorbell ring, it will become safe to skip another TD by the time we get an xrun again. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-8-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e871dd61a636..70b896297494 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2843,8 +2843,21 @@ static int handle_tx_event(struct xhci_hcd *xhci, return 0; skip_isoc_td(xhci, td, ep, status); - if (!list_empty(&ep_ring->td_list)) + + if (!list_empty(&ep_ring->td_list)) { + if (ring_xrun_event) { + /* + * If we are here, we are on xHCI 1.0 host with no + * idea how many TDs were missed or where the xrun + * occurred. New TDs may have been added after the + * xrun, so skip only one TD to be safe. + */ + xhci_dbg(xhci, "Skipped one TD for slot %u ep %u", + slot_id, ep_index); + return 0; + } continue; + } xhci_dbg(xhci, "All TDs skipped for slot %u ep %u. Clear skip flag.\n", slot_id, ep_index); From 55741c723318905e6d5161bf1e12749020b161e3 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Thu, 6 Mar 2025 16:49:47 +0200 Subject: [PATCH 0604/2157] usb: xhci: correct debug message page size calculation The ffs() function returns the index of the first set bit, starting from 1. If no bits are set, it returns zero. This behavior causes an off-by-one page size in the debug message, as the page size calculation [1] is zero-based, while ffs() is one-based. Fix this by subtracting one from the result of ffs(). Note that since variable 'val' is unsigned, subtracting one from zero will result in the maximum unsigned integer value. Consequently, the condition 'if (val < 16)' will still function correctly. [1], Page size: (2^(n+12)), where 'n' is the set page size bit. Fixes: 81720ec5320c ("usb: host: xhci: use ffs() in xhci_mem_init()") Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-9-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 92703efda1f7..dc5bcd8db4c0 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2391,10 +2391,10 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Supported page size register = 0x%x", page_size); - i = ffs(page_size); - if (i < 16) + val = ffs(page_size) - 1; + if (val < 16) xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Supported page size of %iK", (1 << (i+12)) / 1024); + "Supported page size of %iK", (1 << (val + 12)) / 1024); else xhci_warn(xhci, "WARN: no supported page size\n"); /* Use 4K pages, since that's common and the minimum the HC supports */ From 68c1f1671650b49bbd26e6a65ddcf33f2565efa3 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Thu, 6 Mar 2025 16:49:48 +0200 Subject: [PATCH 0605/2157] usb: xhci: set page size to the xHCI-supported size The current xHCI driver does not validate whether a page size of 4096 bytes is supported. Address the issue by setting the page size to the value supported by the xHCI controller, as read from the Page Size register. In the event of an unexpected value; default to a 4K page size. Additionally, this commit removes unnecessary debug messages and instead prints the supported and used page size once. The xHCI controller supports page sizes of (2^{(n+12)}) bytes, where 'n' is the Page Size Bit. Only one page size is supported, with a maximum page size of 128 KB. Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-10-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 34 ++++++++++++++++++---------------- drivers/usb/host/xhci.h | 8 ++++---- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index dc5bcd8db4c0..a7fdfa00eb48 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1953,7 +1953,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->interrupters = NULL; xhci->page_size = 0; - xhci->page_shift = 0; xhci->usb2_rhub.bus_state.bus_suspended = 0; xhci->usb3_rhub.bus_state.bus_suspended = 0; } @@ -2372,6 +2371,22 @@ xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs, } EXPORT_SYMBOL_GPL(xhci_create_secondary_interrupter); +static void xhci_hcd_page_size(struct xhci_hcd *xhci) +{ + u32 page_size; + + page_size = readl(&xhci->op_regs->page_size) & XHCI_PAGE_SIZE_MASK; + if (!is_power_of_2(page_size)) { + xhci_warn(xhci, "Invalid page size register = 0x%x\n", page_size); + /* Fallback to 4K page size, since that's common */ + page_size = 1; + } + + xhci->page_size = page_size << 12; + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "HCD page size set to %iK", + xhci->page_size >> 10); +} + int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) { struct xhci_interrupter *ir; @@ -2379,7 +2394,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) dma_addr_t dma; unsigned int val, val2; u64 val_64; - u32 page_size, temp; + u32 temp; int i; INIT_LIST_HEAD(&xhci->cmd_list); @@ -2388,20 +2403,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout); init_completion(&xhci->cmd_ring_stop_completion); - page_size = readl(&xhci->op_regs->page_size); - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Supported page size register = 0x%x", page_size); - val = ffs(page_size) - 1; - if (val < 16) - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Supported page size of %iK", (1 << (val + 12)) / 1024); - else - xhci_warn(xhci, "WARN: no supported page size\n"); - /* Use 4K pages, since that's common and the minimum the HC supports */ - xhci->page_shift = 12; - xhci->page_size = 1 << xhci->page_shift; - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "HCD page size set to %iK", xhci->page_size / 1024); + xhci_hcd_page_size(xhci); /* * Program the Number of Device Slots Enabled field in the CONFIG diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8c164340a2c3..5b8751b86008 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -211,6 +211,9 @@ struct xhci_op_regs { #define CONFIG_CIE (1 << 9) /* bits 10:31 - reserved and should be preserved */ +/* bits 15:0 - HCD page shift bit */ +#define XHCI_PAGE_SIZE_MASK 0xffff + /** * struct xhci_intr_reg - Interrupt Register Set * @irq_pending: IMAN - Interrupt Management Register. Used to enable @@ -1514,10 +1517,7 @@ struct xhci_hcd { u16 max_interrupters; /* imod_interval in ns (I * 250ns) */ u32 imod_interval; - /* 4KB min, 128MB max */ - int page_size; - /* Valid values are 12 to 20, inclusive */ - int page_shift; + u32 page_size; /* MSI-X/MSI vectors */ int nvecs; /* optional clocks */ From d71cb7d6e1a261ef25c60056920d91d052144dd8 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Thu, 6 Mar 2025 16:49:49 +0200 Subject: [PATCH 0606/2157] usb: xhci: refactor trb_in_td() to be static Relocate trb_in_td() and marks it as static, as it's exclusively utilized in xhci-ring.c. This adjustment lays the groundwork for future rework of the function. The function's logic remains unchanged; only its access specifier is altered to static and a redundant "else" is removed on line 325 (due to checkpatch.pl complaining). Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-11-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 122 +++++++++++++++++------------------ drivers/usb/host/xhci.h | 2 - 2 files changed, 61 insertions(+), 63 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 70b896297494..8c7258afb6bf 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -277,6 +277,67 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, } } +/* + * If the suspect DMA address is a TRB in this TD, this function returns that + * TRB's segment. Otherwise it returns 0. + */ +static struct xhci_segment *trb_in_td(struct xhci_hcd *xhci, struct xhci_td *td, + dma_addr_t suspect_dma, bool debug) +{ + dma_addr_t start_dma; + dma_addr_t end_seg_dma; + dma_addr_t end_trb_dma; + struct xhci_segment *cur_seg; + + start_dma = xhci_trb_virt_to_dma(td->start_seg, td->start_trb); + cur_seg = td->start_seg; + + do { + if (start_dma == 0) + return NULL; + /* We may get an event for a Link TRB in the middle of a TD */ + end_seg_dma = xhci_trb_virt_to_dma(cur_seg, + &cur_seg->trbs[TRBS_PER_SEGMENT - 1]); + /* If the end TRB isn't in this segment, this is set to 0 */ + end_trb_dma = xhci_trb_virt_to_dma(cur_seg, td->end_trb); + + if (debug) + xhci_warn(xhci, + "Looking for event-dma %016llx trb-start %016llx trb-end %016llx seg-start %016llx seg-end %016llx\n", + (unsigned long long)suspect_dma, + (unsigned long long)start_dma, + (unsigned long long)end_trb_dma, + (unsigned long long)cur_seg->dma, + (unsigned long long)end_seg_dma); + + if (end_trb_dma > 0) { + /* The end TRB is in this segment, so suspect should be here */ + if (start_dma <= end_trb_dma) { + if (suspect_dma >= start_dma && suspect_dma <= end_trb_dma) + return cur_seg; + } else { + /* Case for one segment with + * a TD wrapped around to the top + */ + if ((suspect_dma >= start_dma && + suspect_dma <= end_seg_dma) || + (suspect_dma >= cur_seg->dma && + suspect_dma <= end_trb_dma)) + return cur_seg; + } + return NULL; + } + /* Might still be somewhere in this segment */ + if (suspect_dma >= start_dma && suspect_dma <= end_seg_dma) + return cur_seg; + + cur_seg = cur_seg->next; + start_dma = xhci_trb_virt_to_dma(cur_seg, &cur_seg->trbs[0]); + } while (cur_seg != td->start_seg); + + return NULL; +} + /* * Return number of free normal TRBs from enqueue to dequeue pointer on ring. * Not counting an assumed link TRB at end of each TRBS_PER_SEGMENT sized segment. @@ -2079,67 +2140,6 @@ static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event) spin_lock(&xhci->lock); } -/* - * If the suspect DMA address is a TRB in this TD, this function returns that - * TRB's segment. Otherwise it returns 0. - */ -struct xhci_segment *trb_in_td(struct xhci_hcd *xhci, struct xhci_td *td, dma_addr_t suspect_dma, - bool debug) -{ - dma_addr_t start_dma; - dma_addr_t end_seg_dma; - dma_addr_t end_trb_dma; - struct xhci_segment *cur_seg; - - start_dma = xhci_trb_virt_to_dma(td->start_seg, td->start_trb); - cur_seg = td->start_seg; - - do { - if (start_dma == 0) - return NULL; - /* We may get an event for a Link TRB in the middle of a TD */ - end_seg_dma = xhci_trb_virt_to_dma(cur_seg, - &cur_seg->trbs[TRBS_PER_SEGMENT - 1]); - /* If the end TRB isn't in this segment, this is set to 0 */ - end_trb_dma = xhci_trb_virt_to_dma(cur_seg, td->end_trb); - - if (debug) - xhci_warn(xhci, - "Looking for event-dma %016llx trb-start %016llx trb-end %016llx seg-start %016llx seg-end %016llx\n", - (unsigned long long)suspect_dma, - (unsigned long long)start_dma, - (unsigned long long)end_trb_dma, - (unsigned long long)cur_seg->dma, - (unsigned long long)end_seg_dma); - - if (end_trb_dma > 0) { - /* The end TRB is in this segment, so suspect should be here */ - if (start_dma <= end_trb_dma) { - if (suspect_dma >= start_dma && suspect_dma <= end_trb_dma) - return cur_seg; - } else { - /* Case for one segment with - * a TD wrapped around to the top - */ - if ((suspect_dma >= start_dma && - suspect_dma <= end_seg_dma) || - (suspect_dma >= cur_seg->dma && - suspect_dma <= end_trb_dma)) - return cur_seg; - } - return NULL; - } else { - /* Might still be somewhere in this segment */ - if (suspect_dma >= start_dma && suspect_dma <= end_seg_dma) - return cur_seg; - } - cur_seg = cur_seg->next; - start_dma = xhci_trb_virt_to_dma(cur_seg, &cur_seg->trbs[0]); - } while (cur_seg != td->start_seg); - - return NULL; -} - static void xhci_clear_hub_tt_buffer(struct xhci_hcd *xhci, struct xhci_td *td, struct xhci_virt_ep *ep) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5b8751b86008..cd96e0a8c593 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1884,8 +1884,6 @@ int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); -struct xhci_segment *trb_in_td(struct xhci_hcd *xhci, struct xhci_td *td, - dma_addr_t suspect_dma, bool debug); int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code); void xhci_ring_cmd_db(struct xhci_hcd *xhci); int xhci_queue_slot_control(struct xhci_hcd *xhci, struct xhci_command *cmd, From 9a7f4bc4c82b4dd8e57b13375d42aff6a52d1812 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Thu, 6 Mar 2025 16:49:50 +0200 Subject: [PATCH 0607/2157] usb: xhci: move debug capabilities from trb_in_td() to handle_tx_event() Function trb_in_td() currently includes debug capabilities that are triggered when its debug argument is set to true. The only consumer of these debug capabilities is handle_tx_event(), which calls trb_in_td() twice, once for its primary functionality and a second time solely for debugging purposes if the first call returns 'NULL'. This approach is inefficient and can lead to confusion, as trb_in_td() executes the same code with identical arguments twice, differing only in the debug output during the second execution. To enhance clarity and efficiency, move the debug capabilities out of trb_in_td() and integrates them directly into handle_tx_event(). This change reduces the argument count of trb_in_td() and ensures that debug steps are executed only when necessary, streamlining the function's operation. Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-12-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 38 ++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 8c7258afb6bf..c2e15a27338b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -281,8 +281,7 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, * If the suspect DMA address is a TRB in this TD, this function returns that * TRB's segment. Otherwise it returns 0. */ -static struct xhci_segment *trb_in_td(struct xhci_hcd *xhci, struct xhci_td *td, - dma_addr_t suspect_dma, bool debug) +static struct xhci_segment *trb_in_td(struct xhci_td *td, dma_addr_t suspect_dma) { dma_addr_t start_dma; dma_addr_t end_seg_dma; @@ -301,15 +300,6 @@ static struct xhci_segment *trb_in_td(struct xhci_hcd *xhci, struct xhci_td *td, /* If the end TRB isn't in this segment, this is set to 0 */ end_trb_dma = xhci_trb_virt_to_dma(cur_seg, td->end_trb); - if (debug) - xhci_warn(xhci, - "Looking for event-dma %016llx trb-start %016llx trb-end %016llx seg-start %016llx seg-end %016llx\n", - (unsigned long long)suspect_dma, - (unsigned long long)start_dma, - (unsigned long long)end_trb_dma, - (unsigned long long)cur_seg->dma, - (unsigned long long)end_seg_dma); - if (end_trb_dma > 0) { /* The end TRB is in this segment, so suspect should be here */ if (start_dma <= end_trb_dma) { @@ -1075,7 +1065,7 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) td->urb->stream_id); hw_deq &= ~0xf; - if (td->cancel_status == TD_HALTED || trb_in_td(xhci, td, hw_deq, false)) { + if (td->cancel_status == TD_HALTED || trb_in_td(td, hw_deq)) { switch (td->cancel_status) { case TD_CLEARED: /* TD is already no-op */ case TD_CLEARING_CACHE: /* set TR deq command already queued */ @@ -1165,7 +1155,7 @@ static struct xhci_td *find_halted_td(struct xhci_virt_ep *ep) hw_deq = xhci_get_hw_deq(ep->xhci, ep->vdev, ep->ep_index, 0); hw_deq &= ~0xf; td = list_first_entry(&ep->ring->td_list, struct xhci_td, td_list); - if (trb_in_td(ep->xhci, td, hw_deq, false)) + if (trb_in_td(td, hw_deq)) return td; } return NULL; @@ -2800,7 +2790,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ td = list_first_entry_or_null(&ep_ring->td_list, struct xhci_td, td_list); - if (td && td->error_mid_td && !trb_in_td(xhci, td, ep_trb_dma, false)) { + if (td && td->error_mid_td && !trb_in_td(td, ep_trb_dma)) { xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); xhci_dequeue_td(xhci, td, ep_ring, td->status); } @@ -2833,7 +2823,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, td_list); /* Is this a TRB in the currently executing TD? */ - ep_seg = trb_in_td(xhci, td, ep_trb_dma, false); + ep_seg = trb_in_td(td, ep_trb_dma); if (!ep_seg) { @@ -2893,12 +2883,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, } /* HC is busted, give up! */ - xhci_err(xhci, - "ERROR Transfer event TRB DMA ptr not part of current TD ep_index %d comp_code %u\n", - ep_index, trb_comp_code); - trb_in_td(xhci, td, ep_trb_dma, true); - - return -ESHUTDOWN; + goto debug_finding_td; } if (ep->skip) { @@ -2955,6 +2940,17 @@ static int handle_tx_event(struct xhci_hcd *xhci, return 0; +debug_finding_td: + xhci_err(xhci, "Event dma %pad for ep %d status %d not part of TD at %016llx - %016llx\n", + &ep_trb_dma, ep_index, trb_comp_code, + (unsigned long long)xhci_trb_virt_to_dma(td->start_seg, td->start_trb), + (unsigned long long)xhci_trb_virt_to_dma(td->end_seg, td->end_trb)); + + xhci_for_each_ring_seg(ep_ring->first_seg, ep_seg) + xhci_warn(xhci, "Ring seg %u dma %pad\n", ep_seg->num, &ep_seg->dma); + + return -ESHUTDOWN; + err_out: xhci_err(xhci, "@%016llx %08x %08x %08x %08x\n", (unsigned long long) xhci_trb_virt_to_dma( From 860f5d0d3594005d4588240028f42e8d2bfc725b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 6 Mar 2025 16:49:51 +0200 Subject: [PATCH 0608/2157] xhci: Prevent early endpoint restart when handling STALL errors. Ensure that an endpoint halted due to device STALL is not restarted before a Clear_Feature(ENDPOINT_HALT) request is sent to the device. The host side of the endpoint may otherwise be started early by the 'Set TR Deq' command completion handler which is called if dequeue is moved past a cancelled or halted TD. Prevent this with a new flag set for bulk and interrupt endpoints when a Stall Error is received. Clear it in hcd->endpoint_reset() which is called after Clear_Feature(ENDPOINT_HALT) is sent. Also add a debug message if a class driver queues a new URB after the STALL. Note that class driver might not be aware of the STALL yet when it submits the URB as URBs are given back in BH. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-13-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 7 +++++-- drivers/usb/host/xhci.c | 6 ++++++ drivers/usb/host/xhci.h | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c2e15a27338b..7643ab9ec3b4 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -556,8 +556,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, * pointer command pending because the device can choose to start any * stream once the endpoint is on the HW schedule. */ - if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) || - (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT)) + if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED | + EP_CLEARING_TT | EP_STALLED)) return; trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id)); @@ -2555,6 +2555,9 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return; + case COMP_STALL_ERROR: + ep->ep_state |= EP_STALLED; + break; default: /* do nothing */ break; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 3f2cd546a7a2..0c22b78358b9 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1604,6 +1604,11 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto free_priv; } + /* Class driver might not be aware ep halted due to async URB giveback */ + if (*ep_state & EP_STALLED) + dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n", + urb); + switch (usb_endpoint_type(&urb->ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: @@ -3202,6 +3207,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; ep = &vdev->eps[ep_index]; + ep->ep_state &= ~EP_STALLED; /* Bail out if toggle is already being cleared by a endpoint reset */ spin_lock_irqsave(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index cd96e0a8c593..4ee14f651d36 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -664,7 +664,7 @@ struct xhci_virt_ep { unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) -#define EP_HALTED (1 << 1) /* For stall handling */ +#define EP_HALTED (1 << 1) /* Halted host ep handling */ #define EP_STOP_CMD_PENDING (1 << 2) /* For URB cancellation */ /* Transitioning the endpoint to using streams, don't enqueue URBs */ #define EP_GETTING_STREAMS (1 << 3) @@ -675,6 +675,7 @@ struct xhci_virt_ep { #define EP_SOFT_CLEAR_TOGGLE (1 << 7) /* usb_hub_clear_tt_buffer is in progress */ #define EP_CLEARING_TT (1 << 8) +#define EP_STALLED (1 << 9) /* For stall handling */ /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; struct xhci_hcd *xhci; From bb0ba4cb1065e87f9cc75db1fa454e56d0894d01 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:52 +0200 Subject: [PATCH 0609/2157] usb: xhci: Apply the link chain quirk on NEC isoc endpoints Two clearly different specimens of NEC uPD720200 (one with start/stop bug, one without) were seen to cause IOMMU faults after some Missed Service Errors. Faulting address is immediately after a transfer ring segment and patched dynamic debug messages revealed that the MSE was received when waiting for a TD near the end of that segment: [ 1.041954] xhci_hcd: Miss service interval error for slot 1 ep 2 expected TD DMA ffa08fe0 [ 1.042120] xhci_hcd: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0005 address=0xffa09000 flags=0x0000] [ 1.042146] xhci_hcd: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0005 address=0xffa09040 flags=0x0000] It gets even funnier if the next page is a ring segment accessible to the HC. Below, it reports MSE in segment at ff1e8000, plows through a zero-filled page at ff1e9000 and starts reporting events for TRBs in page at ff1ea000 every microframe, instead of jumping to seg ff1e6000. [ 7.041671] xhci_hcd: Miss service interval error for slot 1 ep 2 expected TD DMA ff1e8fe0 [ 7.041999] xhci_hcd: Miss service interval error for slot 1 ep 2 expected TD DMA ff1e8fe0 [ 7.042011] xhci_hcd: WARN: buffer overrun event for slot 1 ep 2 on endpoint [ 7.042028] xhci_hcd: All TDs skipped for slot 1 ep 2. Clear skip flag. [ 7.042134] xhci_hcd: WARN: buffer overrun event for slot 1 ep 2 on endpoint [ 7.042138] xhci_hcd: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 2 comp_code 31 [ 7.042144] xhci_hcd: Looking for event-dma 00000000ff1ea040 trb-start 00000000ff1e6820 trb-end 00000000ff1e6820 [ 7.042259] xhci_hcd: WARN: buffer overrun event for slot 1 ep 2 on endpoint [ 7.042262] xhci_hcd: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 2 comp_code 31 [ 7.042266] xhci_hcd: Looking for event-dma 00000000ff1ea050 trb-start 00000000ff1e6820 trb-end 00000000ff1e6820 At some point completion events change from Isoch Buffer Overrun to Short Packet and the HC finally finds cycle bit mismatch in ff1ec000. [ 7.098130] xhci_hcd: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 2 comp_code 13 [ 7.098132] xhci_hcd: Looking for event-dma 00000000ff1ecc50 trb-start 00000000ff1e6820 trb-end 00000000ff1e6820 [ 7.098254] xhci_hcd: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 2 comp_code 13 [ 7.098256] xhci_hcd: Looking for event-dma 00000000ff1ecc60 trb-start 00000000ff1e6820 trb-end 00000000ff1e6820 [ 7.098379] xhci_hcd: Overrun event on slot 1 ep 2 It's possible that data from the isochronous device were written to random buffers of pending TDs on other endpoints (either IN or OUT), other devices or even other HCs in the same IOMMU domain. Lastly, an error from a different USB device on another HC. Was it caused by the above? I don't know, but it may have been. The disk was working without any other issues and generated PCIe traffic to starve the NEC of upstream BW and trigger those MSEs. The two HCs shared one x1 slot by means of a commercial "PCIe splitter" board. [ 7.162604] usb 10-2: reset SuperSpeed USB device number 3 using xhci_hcd [ 7.178990] sd 9:0:0:0: [sdb] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x07 driverbyte=DRIVER_OK cmd_age=0s [ 7.179001] sd 9:0:0:0: [sdb] tag#0 CDB: opcode=0x28 28 00 04 02 ae 00 00 02 00 00 [ 7.179004] I/O error, dev sdb, sector 67284480 op 0x0:(READ) flags 0x80700 phys_seg 5 prio class 0 Fortunately, it appears that this ridiculous bug is avoided by setting the chain bit of Link TRBs on isochronous rings. Other ancient HCs are known which also expect the bit to be set and they ignore Link TRBs if it's not. Reportedly, 0.95 spec guaranteed that the bit is set. The bandwidth-starved NEC HC running a 32KB/uframe UVC endpoint reports tens of MSEs per second and runs into the bug within seconds. Chaining Link TRBs allows the same workload to run for many minutes, many times. No negative side effects seen in UVC recording and UAC playback with a few devices at full speed, high speed and SuperSpeed. The problem doesn't reproduce on the newer Renesas uPD720201/uPD720202 and on old Etron EJ168 and VIA VL805 (but the VL805 has other bug). [shorten line length of log snippets in commit messge -Mathias] Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-14-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 4ee14f651d36..d9d7cd1906f3 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1760,11 +1760,20 @@ static inline void xhci_write_64(struct xhci_hcd *xhci, } -/* Link TRB chain should always be set on 0.95 hosts, and AMD 0.96 ISOC rings */ +/* + * Reportedly, some chapters of v0.95 spec said that Link TRB always has its chain bit set. + * Other chapters and later specs say that it should only be set if the link is inside a TD + * which continues from the end of one segment to the next segment. + * + * Some 0.95 hardware was found to misbehave if any link TRB doesn't have the chain bit set. + * + * 0.96 hardware from AMD and NEC was found to ignore unchained isochronous link TRBs when + * "resynchronizing the pipe" after a Missed Service Error. + */ static inline bool xhci_link_chain_quirk(struct xhci_hcd *xhci, enum xhci_ring_type type) { return (xhci->quirks & XHCI_LINK_TRB_QUIRK) || - (type == TYPE_ISOC && (xhci->quirks & XHCI_AMD_0x96_HOST)); + (type == TYPE_ISOC && (xhci->quirks & (XHCI_AMD_0x96_HOST | XHCI_NEC_HOST))); } /* xHCI debugging */ From 118abe036c9a65d5249b24f55846667a1e5a71ee Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 6 Mar 2025 16:49:53 +0200 Subject: [PATCH 0610/2157] usb: xhci: Unify duplicate inc_enq() code Extract a block of code copied from inc_enq() into a separate function and call it from inc_enq() and the other function which used this code. Remove the pointless 'next' variable which only aliases ring->enqueue. Note: I don't know if any 0.95 xHC ever reached series production, but "AMD 0.96 host" appears to be the "Llano" family APU. Example dmesg at https://linux-hardware.org/?probe=79d5cfd4fd&log=dmesg pci 0000:00:10.0: [1022:7812] type 00 class 0x0c0330 hcc params 0x014042c3 hci version 0x96 quirks 0x0000000000000608 Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-15-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 132 +++++++++++++++-------------------- 1 file changed, 56 insertions(+), 76 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7643ab9ec3b4..2df94ed3152c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -203,6 +203,50 @@ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring) return; } +/* + * If enqueue points at a link TRB, follow links until an ordinary TRB is reached. + * Toggle the cycle bit of passed link TRBs and optionally chain them. + */ +static void inc_enq_past_link(struct xhci_hcd *xhci, struct xhci_ring *ring, u32 chain) +{ + unsigned int link_trb_count = 0; + + while (trb_is_link(ring->enqueue)) { + + /* + * Section 6.4.4.1 of the 0.95 spec says link TRBs cannot have the chain bit + * set, but other sections talk about dealing with the chain bit set. This was + * fixed in the 0.96 specification errata, but we have to assume that all 0.95 + * xHCI hardware can't handle the chain bit being cleared on a link TRB. + * + * On 0.95 and some 0.96 HCs the chain bit is set once at segment initalization + * and never changed here. On all others, modify it as requested by the caller. + */ + if (!xhci_link_chain_quirk(xhci, ring->type)) { + ring->enqueue->link.control &= cpu_to_le32(~TRB_CHAIN); + ring->enqueue->link.control |= cpu_to_le32(chain); + } + + /* Give this link TRB to the hardware */ + wmb(); + ring->enqueue->link.control ^= cpu_to_le32(TRB_CYCLE); + + /* Toggle the cycle bit after the last ring segment. */ + if (link_trb_toggles_cycle(ring->enqueue)) + ring->cycle_state ^= 1; + + ring->enq_seg = ring->enq_seg->next; + ring->enqueue = ring->enq_seg->trbs; + + trace_xhci_inc_enq(ring); + + if (link_trb_count++ > ring->num_segs) { + xhci_warn(xhci, "Link TRB loop at enqueue\n"); + break; + } + } +} + /* * See Cycle bit rules. SW is the consumer for the event ring only. * @@ -211,11 +255,6 @@ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring) * If we've enqueued the last TRB in a TD, make sure the following link TRBs * have their chain bit cleared (so that each Link TRB is a separate TD). * - * Section 6.4.4.1 of the 0.95 spec says link TRBs cannot have the chain bit - * set, but other sections talk about dealing with the chain bit set. This was - * fixed in the 0.96 specification errata, but we have to assume that all 0.95 - * xHCI hardware can't handle the chain bit being cleared on a link TRB. - * * @more_trbs_coming: Will you enqueue more TRBs before calling * prepare_transfer()? */ @@ -223,8 +262,6 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool more_trbs_coming) { u32 chain; - union xhci_trb *next; - unsigned int link_trb_count = 0; chain = le32_to_cpu(ring->enqueue->generic.field[3]) & TRB_CHAIN; @@ -233,48 +270,16 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, return; } - next = ++(ring->enqueue); + ring->enqueue++; - /* Update the dequeue pointer further if that was a link TRB */ - while (trb_is_link(next)) { - - /* - * If the caller doesn't plan on enqueueing more TDs before - * ringing the doorbell, then we don't want to give the link TRB - * to the hardware just yet. We'll give the link TRB back in - * prepare_ring() just before we enqueue the TD at the top of - * the ring. - */ - if (!chain && !more_trbs_coming) - break; - - /* If we're not dealing with 0.95 hardware or isoc rings on - * AMD 0.96 host, carry over the chain bit of the previous TRB - * (which may mean the chain bit is cleared). - */ - if (!xhci_link_chain_quirk(xhci, ring->type)) { - next->link.control &= cpu_to_le32(~TRB_CHAIN); - next->link.control |= cpu_to_le32(chain); - } - /* Give this link TRB to the hardware */ - wmb(); - next->link.control ^= cpu_to_le32(TRB_CYCLE); - - /* Toggle the cycle bit after the last ring segment. */ - if (link_trb_toggles_cycle(next)) - ring->cycle_state ^= 1; - - ring->enq_seg = ring->enq_seg->next; - ring->enqueue = ring->enq_seg->trbs; - next = ring->enqueue; - - trace_xhci_inc_enq(ring); - - if (link_trb_count++ > ring->num_segs) { - xhci_warn(xhci, "%s: Ring link TRB loop\n", __func__); - break; - } - } + /* + * If we are in the middle of a TD or the caller plans to enqueue more + * TDs as one transfer (eg. control), traverse any link TRBs right now. + * Otherwise, enqueue can stay on a link until the next prepare_ring(). + * This avoids enqueue entering deq_seg and simplifies ring expansion. + */ + if (trb_is_link(ring->enqueue) && (chain || more_trbs_coming)) + inc_enq_past_link(xhci, ring, chain); } /* @@ -3213,7 +3218,6 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, u32 ep_state, unsigned int num_trbs, gfp_t mem_flags) { - unsigned int link_trb_count = 0; unsigned int new_segs = 0; /* Make sure the endpoint has been added to xHC schedule */ @@ -3261,33 +3265,9 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, } } - while (trb_is_link(ep_ring->enqueue)) { - /* If we're not dealing with 0.95 hardware or isoc rings - * on AMD 0.96 host, clear the chain bit. - */ - if (!xhci_link_chain_quirk(xhci, ep_ring->type)) - ep_ring->enqueue->link.control &= - cpu_to_le32(~TRB_CHAIN); - else - ep_ring->enqueue->link.control |= - cpu_to_le32(TRB_CHAIN); - - wmb(); - ep_ring->enqueue->link.control ^= cpu_to_le32(TRB_CYCLE); - - /* Toggle the cycle bit after the last ring segment. */ - if (link_trb_toggles_cycle(ep_ring->enqueue)) - ep_ring->cycle_state ^= 1; - - ep_ring->enq_seg = ep_ring->enq_seg->next; - ep_ring->enqueue = ep_ring->enq_seg->trbs; - - /* prevent infinite loop if all first trbs are link trbs */ - if (link_trb_count++ > ep_ring->num_segs) { - xhci_warn(xhci, "Ring is an endless link TRB loop\n"); - return -EINVAL; - } - } + /* Ensure that new TRBs won't overwrite a link */ + if (trb_is_link(ep_ring->enqueue)) + inc_enq_past_link(xhci, ep_ring, 0); if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue)) { xhci_warn(xhci, "Missing link TRB at end of ring segment\n"); From b331a3d8097fad4e541d212684192f21fedbd6e5 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 6 Mar 2025 16:49:54 +0200 Subject: [PATCH 0611/2157] xhci: Handle spurious events on Etron host isoc enpoints Unplugging a USB3.0 webcam from Etron hosts while streaming results in errors like this: [ 2.646387] xhci_hcd 0000:03:00.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 18 comp_code 13 [ 2.646446] xhci_hcd 0000:03:00.0: Looking for event-dma 000000002fdf8630 trb-start 000000002fdf8640 trb-end 000000002fdf8650 [ 2.646560] xhci_hcd 0000:03:00.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 18 comp_code 13 [ 2.646568] xhci_hcd 0000:03:00.0: Looking for event-dma 000000002fdf8660 trb-start 000000002fdf8670 trb-end 000000002fdf8670 Etron xHC generates two transfer events for the TRB if an error is detected while processing the last TRB of an isoc TD. The first event can be any sort of error (like USB Transaction or Babble Detected, etc), and the final event is Success. The xHCI driver will handle the TD after the first event and remove it from its internal list, and then print an "Transfer event TRB DMA ptr not part of current TD" error message after the final event. Commit 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") is designed to address isoc transaction errors, but unfortunately it doesn't account for this scenario. This issue is similar to the XHCI_SPURIOUS_SUCCESS case where a success event follows a 'short transfer' event, but the TD the event points to is already given back. Expand the spurious success 'short transfer' event handling to cover the spurious success after error on Etron hosts. Kuangyi Chiang reported this issue and submitted a different solution based on using error_mid_td. This commit message is mostly taken from that patch. Reported-by: Kuangyi Chiang Closes: https://lore.kernel.org/linux-usb/20241028025337.6372-6-ki.chiang65@gmail.com/ Tested-by: Kuangyi Chiang Tested-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250306144954.3507700-16-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 38 ++++++++++++++++++++++++------------ drivers/usb/host/xhci.h | 2 +- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 2df94ed3152c..0f8acbb9cd21 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2611,6 +2611,22 @@ static int handle_transferless_tx_event(struct xhci_hcd *xhci, struct xhci_virt_ return 0; } +static bool xhci_spurious_success_tx_event(struct xhci_hcd *xhci, + struct xhci_ring *ring) +{ + switch (ring->old_trb_comp_code) { + case COMP_SHORT_PACKET: + return xhci->quirks & XHCI_SPURIOUS_SUCCESS; + case COMP_USB_TRANSACTION_ERROR: + case COMP_BABBLE_DETECTED_ERROR: + case COMP_ISOCH_BUFFER_OVERRUN: + return xhci->quirks & XHCI_ETRON_HOST && + ring->type == TYPE_ISOC; + default: + return false; + } +} + /* * If this function returns an error condition, it means it got a Transfer * event with a corrupted Slot ID, Endpoint ID, or TRB DMA address. @@ -2665,8 +2681,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, case COMP_SUCCESS: if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { trb_comp_code = COMP_SHORT_PACKET; - xhci_dbg(xhci, "Successful completion on short TX for slot %u ep %u with last td short %d\n", - slot_id, ep_index, ep_ring->last_td_was_short); + xhci_dbg(xhci, "Successful completion on short TX for slot %u ep %u with last td comp code %d\n", + slot_id, ep_index, ep_ring->old_trb_comp_code); } break; case COMP_SHORT_PACKET: @@ -2817,7 +2833,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (trb_comp_code != COMP_STOPPED && trb_comp_code != COMP_STOPPED_LENGTH_INVALID && !ring_xrun_event && - !ep_ring->last_td_was_short) { + !xhci_spurious_success_tx_event(xhci, ep_ring)) { xhci_warn(xhci, "Event TRB for slot %u ep %u with no TDs queued\n", slot_id, ep_index); } @@ -2882,11 +2898,12 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* * Some hosts give a spurious success event after a short - * transfer. Ignore it. + * transfer or error on last TRB. Ignore it. */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; + if (xhci_spurious_success_tx_event(xhci, ep_ring)) { + xhci_dbg(xhci, "Spurious event dma %pad, comp_code %u after %u\n", + &ep_trb_dma, trb_comp_code, ep_ring->old_trb_comp_code); + ep_ring->old_trb_comp_code = trb_comp_code; return 0; } @@ -2909,15 +2926,12 @@ static int handle_tx_event(struct xhci_hcd *xhci, */ } while (ep->skip); + ep_ring->old_trb_comp_code = trb_comp_code; + /* Get out if a TD was queued at enqueue after the xrun occurred */ if (ring_xrun_event) return 0; - if (trb_comp_code == COMP_SHORT_PACKET) - ep_ring->last_td_was_short = true; - else - ep_ring->last_td_was_short = false; - ep_trb = &ep_seg->trbs[(ep_trb_dma - ep_seg->dma) / sizeof(*ep_trb)]; trace_xhci_handle_transfer(ep_ring, (struct xhci_generic_trb *) ep_trb, ep_trb_dma); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index d9d7cd1906f3..6c00062a9acc 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1375,7 +1375,7 @@ struct xhci_ring { unsigned int num_trbs_free; /* used only by xhci DbC */ unsigned int bounce_buf_len; enum xhci_ring_type type; - bool last_td_was_short; + u32 old_trb_comp_code; struct radix_tree_root *trb_address_map; }; From 5ad414f4df2294b28836b5b7b69787659d6aa708 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 16 Feb 2025 23:52:00 +0300 Subject: [PATCH 0612/2157] fs/ntfs3: Fix a couple integer overflows on 32bit systems On 32bit systems the "off + sizeof(struct NTFS_DE)" addition can have an integer wrapping issue. Fix it by using size_add(). Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") Signed-off-by: Dan Carpenter Signed-off-by: Konstantin Komarov --- fs/ntfs3/index.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7eb9fae22f8d..78d20e4baa2c 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -618,7 +618,7 @@ static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes) u32 off = le32_to_cpu(hdr->de_off); if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot || - off + sizeof(struct NTFS_DE) > end) { + size_add(off, sizeof(struct NTFS_DE)) > end) { /* incorrect index buffer. */ return false; } @@ -736,7 +736,7 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, if (end > total) return NULL; - if (off + sizeof(struct NTFS_DE) > end) + if (size_add(off, sizeof(struct NTFS_DE)) > end) return NULL; e = Add2Ptr(hdr, off); From 6bb81b94f7a9cba6bde9a905cef52a65317a8b04 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 16 Feb 2025 23:52:10 +0300 Subject: [PATCH 0613/2157] fs/ntfs3: Prevent integer overflow in hdr_first_de() The "de_off" and "used" variables come from the disk so they both need to check. The problem is that on 32bit systems if they're both greater than UINT_MAX - 16 then the check does work as intended because of an integer overflow. Fixes: 60ce8dfde035 ("fs/ntfs3: Fix wrong if in hdr_first_de") Signed-off-by: Dan Carpenter Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 241f2ffdd920..1ff13b6f9613 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -717,7 +717,7 @@ static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr) struct NTFS_DE *e; u16 esize; - if (de_off >= used || de_off + sizeof(struct NTFS_DE) > used ) + if (de_off >= used || size_add(de_off, sizeof(struct NTFS_DE)) > used) return NULL; e = Add2Ptr(hdr, de_off); From 1b998c4cf0166eaab5e4194d25b922e8377aee14 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Feb 2025 01:45:54 +0000 Subject: [PATCH 0614/2157] fs/ntfs3: Remove unused ni_load_attr ni_load_attr() was added in 2021 by commit 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 57 ---------------------------------------------- fs/ntfs3/ntfs_fs.h | 3 --- 2 files changed, 60 deletions(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 81271196c557..b7a83200f2cc 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -280,63 +280,6 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, return rec_find_attr_le(ni, mi2, le2); } -/* - * ni_load_attr - Load attribute that contains given VCN. - */ -struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, CLST vcn, - struct mft_inode **pmi) -{ - struct ATTR_LIST_ENTRY *le; - struct ATTRIB *attr; - struct mft_inode *mi; - struct ATTR_LIST_ENTRY *next; - - if (!ni->attr_list.size) { - if (pmi) - *pmi = &ni->mi; - return mi_find_attr(ni, &ni->mi, NULL, type, name, name_len, - NULL); - } - - le = al_find_ex(ni, NULL, type, name, name_len, NULL); - if (!le) - return NULL; - - /* - * Unfortunately ATTR_LIST_ENTRY contains only start VCN. - * So to find the ATTRIB segment that contains 'vcn' we should - * enumerate some entries. - */ - if (vcn) { - for (;; le = next) { - next = al_find_ex(ni, le, type, name, name_len, NULL); - if (!next || le64_to_cpu(next->vcn) > vcn) - break; - } - } - - if (ni_load_mi(ni, le, &mi)) - return NULL; - - if (pmi) - *pmi = mi; - - attr = mi_find_attr(ni, mi, NULL, type, name, name_len, &le->id); - if (!attr) - return NULL; - - if (!attr->non_res) - return attr; - - if (le64_to_cpu(attr->nres.svcn) <= vcn && - vcn <= le64_to_cpu(attr->nres.evcn)) - return attr; - - _ntfs_bad_inode(&ni->vfs_inode); - return NULL; -} - /* * ni_load_all_mi - Load all subrecords. */ diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 382820464dee..2034dca90a5e 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -530,9 +530,6 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, struct ATTR_LIST_ENTRY **le, struct mft_inode **mi); -struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, CLST vcn, - struct mft_inode **pmi); int ni_load_all_mi(struct ntfs_inode *ni); bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi); int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, From 1404580279f2386aac8246e1a366848589b26f9f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Feb 2025 01:45:55 +0000 Subject: [PATCH 0615/2157] fs/ntfs3: Remove unused ntfs_sb_read ntfs_sb_read() was added in 2021 by commit 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 28 ---------------------------- fs/ntfs3/ntfs_fs.h | 1 - 2 files changed, 29 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 938d351ebac7..df81f1f7330c 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1035,34 +1035,6 @@ struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) return NULL; } -int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) -{ - struct block_device *bdev = sb->s_bdev; - u32 blocksize = sb->s_blocksize; - u64 block = lbo >> sb->s_blocksize_bits; - u32 off = lbo & (blocksize - 1); - u32 op = blocksize - off; - - for (; bytes; block += 1, off = 0, op = blocksize) { - struct buffer_head *bh = __bread(bdev, block, blocksize); - - if (!bh) - return -EIO; - - if (op > bytes) - op = bytes; - - memcpy(buffer, bh->b_data + off, op); - - put_bh(bh); - - bytes -= op; - buffer = Add2Ptr(buffer, op); - } - - return 0; -} - int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, const void *buf, int wait) { diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 2034dca90a5e..0e34c944b622 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -616,7 +616,6 @@ enum NTFS_DIRTY_FLAGS { NTFS_DIRTY_ERROR = 2, }; int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty); -int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer); int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, const void *buffer, int wait); int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, From 8b12017c1b9582db8c5833cf08d610e8f810f4b3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Feb 2025 01:45:56 +0000 Subject: [PATCH 0616/2157] fs/ntfs3: Remove unused ntfs_flush_inodes ntfs_flush_inodes() was added in 2021 by commit 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") but has remained unused. Remove it, and it's helper function. Note: There is a commented out call to ntfs_flush_inodes in ntfs_truncate() - I've left that in place. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 40 ---------------------------------------- fs/ntfs3/ntfs_fs.h | 2 -- 2 files changed, 42 deletions(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index a1e11228dafd..3e2957a1e360 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1024,46 +1024,6 @@ int ntfs_sync_inode(struct inode *inode) return _ni_write_inode(inode, 1); } -/* - * writeback_inode - Helper function for ntfs_flush_inodes(). - * - * This writes both the inode and the file data blocks, waiting - * for in flight data blocks before the start of the call. It - * does not wait for any io started during the call. - */ -static int writeback_inode(struct inode *inode) -{ - int ret = sync_inode_metadata(inode, 0); - - if (!ret) - ret = filemap_fdatawrite(inode->i_mapping); - return ret; -} - -/* - * ntfs_flush_inodes - * - * Write data and metadata corresponding to i1 and i2. The io is - * started but we do not wait for any of it to finish. - * - * filemap_flush() is used for the block device, so if there is a dirty - * page for a block already in flight, we will not wait and start the - * io over again. - */ -int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2) -{ - int ret = 0; - - if (i1) - ret = writeback_inode(i1); - if (!ret && i2) - ret = writeback_inode(i2); - if (!ret) - ret = filemap_flush(sb->s_bdev_file->f_mapping); - return ret; -} - /* * Helper function to read file. */ diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 0e34c944b622..d628977e2556 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -713,8 +713,6 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, u32 len, u32 copied, struct folio *folio, void *fsdata); int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); int ntfs_sync_inode(struct inode *inode); -int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); int inode_read_data(struct inode *inode, void *data, size_t bytes); int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, From 59f994e6e3325d5c9f2c5b6a2b834566a6750690 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 25 Feb 2025 14:37:48 +0100 Subject: [PATCH 0617/2157] dt-bindings: i3c: dw: Add power-domains Describe optional power-domains property. Signed-off-by: Michal Simek Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/fb8adcd318b1023ca6b90d294e46ae3b59dc1280.1740490666.git.michal.simek@amd.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml index 4fc13e3c0f75..5f6467375811 100644 --- a/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml @@ -34,6 +34,9 @@ properties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + required: - compatible - reg From c24a084ab6a2c6522f779acd2dfc1d5c062c7781 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 6 Mar 2025 15:54:25 +0800 Subject: [PATCH 0618/2157] dt-bindings: i3c: silvaco: Add npcm845 compatible string Nuvoton npcm845 SoC uses the same Silvico IP but an older version. Need to add a new compatible string to distinguish between different hardware versions. Reviewed-by: Frank Li Acked-by: Krzysztof Kozlowski Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250306075429.2265183-2-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml index c56ff77677f1..4fbdcdac0aee 100644 --- a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml @@ -14,7 +14,9 @@ allOf: properties: compatible: - const: silvaco,i3c-master-v1 + enum: + - nuvoton,npcm845-i3c + - silvaco,i3c-master-v1 reg: maxItems: 1 From 98d87600a04e42282797631aa6b98dd43999e274 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 6 Mar 2025 15:54:26 +0800 Subject: [PATCH 0619/2157] i3c: master: svc: Add support for Nuvoton npcm845 i3c Nuvoton npcm845 SoC uses an older IP version, which has specific hardware issues that need to be addressed with a different compatible string. Add driver data for different compatible strings to define platform specific quirks. Add compatible string for npcm845 to define its own driver data. Signed-off-by: Stanley Chu Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250306075429.2265183-3-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 98f800f99969..c0299e3f2cf8 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -158,6 +158,10 @@ struct svc_i3c_regs_save { u32 mdynaddr; }; +struct svc_i3c_drvdata { + u32 quirks; +}; + /** * struct svc_i3c_master - Silvaco I3C Master structure * @base: I3C master controller @@ -183,6 +187,7 @@ struct svc_i3c_regs_save { * @ibi.tbq_slot: To be queued IBI slot * @ibi.lock: IBI lock * @lock: Transfer lock, protect between IBI work thread and callbacks from master + * @drvdata: Driver data * @enabled_events: Bit masks for enable events (IBI, HotJoin). * @mctrl_config: Configuration value in SVC_I3C_MCTRL for setting speed back. */ @@ -214,6 +219,7 @@ struct svc_i3c_master { spinlock_t lock; } ibi; struct mutex lock; + const struct svc_i3c_drvdata *drvdata; u32 enabled_events; u32 mctrl_config; }; @@ -1819,6 +1825,10 @@ static int svc_i3c_master_probe(struct platform_device *pdev) if (!master) return -ENOMEM; + master->drvdata = of_device_get_match_data(dev); + if (!master->drvdata) + return -EINVAL; + master->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(master->regs)) return PTR_ERR(master->regs); @@ -1960,8 +1970,13 @@ static const struct dev_pm_ops svc_i3c_pm_ops = { svc_i3c_runtime_resume, NULL) }; +static const struct svc_i3c_drvdata npcm845_drvdata = {}; + +static const struct svc_i3c_drvdata svc_default_drvdata = {}; + static const struct of_device_id svc_i3c_master_of_match_tbl[] = { - { .compatible = "silvaco,i3c-master-v1"}, + { .compatible = "nuvoton,npcm845-i3c", .data = &npcm845_drvdata }, + { .compatible = "silvaco,i3c-master-v1", .data = &svc_default_drvdata }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, svc_i3c_master_of_match_tbl); From 4008a74e0f9b0ec25441028e324452933644ed2a Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 6 Mar 2025 15:54:27 +0800 Subject: [PATCH 0620/2157] i3c: master: svc: Fix npcm845 FIFO empty issue I3C HW stalls the write transfer if the transmit FIFO becomes empty, when new data is written to FIFO, I3C HW resumes the transfer but the first transmitted data bit may have the wrong value. Fill the FIFO in advance to prevent FIFO from becoming empty. Reviewed-by: Frank Li Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250306075429.2265183-4-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 71 +++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index c0299e3f2cf8..6b0ec74313d3 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -113,6 +113,7 @@ #define SVC_I3C_MWDATAHE 0x0BC #define SVC_I3C_MRDATAB 0x0C0 #define SVC_I3C_MRDATAH 0x0C8 +#define SVC_I3C_MWDATAB1 0x0CC #define SVC_I3C_MWMSG_SDR 0x0D0 #define SVC_I3C_MRMSG_SDR 0x0D4 #define SVC_I3C_MWMSG_DDR 0x0D8 @@ -133,6 +134,16 @@ #define SVC_I3C_EVENT_IBI GENMASK(7, 0) #define SVC_I3C_EVENT_HOTJOIN BIT(31) +/* + * SVC_I3C_QUIRK_FIFO_EMPTY: + * I3C HW stalls the write transfer if the transmit FIFO becomes empty, + * when new data is written to FIFO, I3C HW resumes the transfer but + * the first transmitted data bit may have the wrong value. + * Workaround: + * Fill the FIFO in advance to prevent FIFO from becoming empty. + */ +#define SVC_I3C_QUIRK_FIFO_EMPTY BIT(0) + struct svc_i3c_cmd { u8 addr; bool rnw; @@ -236,6 +247,11 @@ struct svc_i3c_i2c_dev_data { struct i3c_generic_ibi_pool *ibi_pool; }; +static inline bool svc_has_quirk(struct svc_i3c_master *master, u32 quirk) +{ + return (master->drvdata->quirks & quirk); +} + static inline bool is_events_enabled(struct svc_i3c_master *master, u32 mask) { return !!(master->enabled_events & mask); @@ -894,7 +910,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, u8 *addrs, unsigned int *count) { u64 prov_id[SVC_I3C_MAX_DEVS] = {}, nacking_prov_id = 0; - unsigned int dev_nb = 0, last_addr = 0; + unsigned int dev_nb = 0, last_addr = 0, dyn_addr; u32 reg; int ret, i; @@ -939,6 +955,25 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, if (SVC_I3C_MSTATUS_RXPEND(reg)) { u8 data[6]; + /* + * One slave sends its ID to request for address assignment, + * prefilling the dynamic address can reduce SCL clock stalls + * and also fix the SVC_I3C_QUIRK_FIFO_EMPTY quirk. + * + * Ideally, prefilling before the processDAA command is better. + * However, it requires an additional check to write the dyn_addr + * at the right time because the driver needs to write the processDAA + * command twice for one assignment. + * Prefilling here is safe and efficient because the FIFO starts + * filling within a few hundred nanoseconds, which is significantly + * faster compared to the 64 SCL clock cycles. + */ + dyn_addr = i3c_master_get_free_addr(&master->base, last_addr + 1); + if (dyn_addr < 0) + return -ENOSPC; + + writel(dyn_addr, master->regs + SVC_I3C_MWDATAB); + /* * We only care about the 48-bit provisioned ID yet to * be sure a device does not nack an address twice. @@ -1017,21 +1052,16 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, if (ret) break; - /* Give the slave device a suitable dynamic address */ - ret = i3c_master_get_free_addr(&master->base, last_addr + 1); - if (ret < 0) - break; - - addrs[dev_nb] = ret; + addrs[dev_nb] = dyn_addr; dev_dbg(master->dev, "DAA: device %d assigned to 0x%02x\n", dev_nb, addrs[dev_nb]); - - writel(addrs[dev_nb], master->regs + SVC_I3C_MWDATAB); last_addr = addrs[dev_nb++]; } /* Need manual issue STOP except for Complete condition */ svc_i3c_master_emit_stop(master); + svc_i3c_master_flush_fifo(master); + return ret; } @@ -1228,6 +1258,24 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, SVC_I3C_MCTRL_RDTERM(*actual_len), master->regs + SVC_I3C_MCTRL); + /* + * The entire transaction can consist of multiple write transfers. + * Prefilling before EmitStartAddr causes the data to be emitted + * immediately, becoming part of the previous transfer. + * The only way to work around this hardware issue is to let the + * FIFO start filling as soon as possible after EmitStartAddr. + */ + if (svc_has_quirk(master, SVC_I3C_QUIRK_FIFO_EMPTY) && !rnw && xfer_len) { + u32 end = xfer_len > SVC_I3C_FIFO_SIZE ? 0 : SVC_I3C_MWDATAB_END; + u32 len = min_t(u32, xfer_len, SVC_I3C_FIFO_SIZE); + + writesb(master->regs + SVC_I3C_MWDATAB1, out, len - 1); + /* Mark END bit if this is the last byte */ + writel(out[len - 1] | end, master->regs + SVC_I3C_MWDATAB); + xfer_len -= len; + out += len; + } + ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, SVC_I3C_MSTATUS_MCTRLDONE(reg), 0, 1000); if (ret) @@ -1316,6 +1364,7 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, emit_stop: svc_i3c_master_emit_stop(master); svc_i3c_master_clear_merrwarn(master); + svc_i3c_master_flush_fifo(master); return ret; } @@ -1970,7 +2019,9 @@ static const struct dev_pm_ops svc_i3c_pm_ops = { svc_i3c_runtime_resume, NULL) }; -static const struct svc_i3c_drvdata npcm845_drvdata = {}; +static const struct svc_i3c_drvdata npcm845_drvdata = { + .quirks = SVC_I3C_QUIRK_FIFO_EMPTY, +}; static const struct svc_i3c_drvdata svc_default_drvdata = {}; From 4dd12e944f07013ac280d7f46463c19157898bd1 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 6 Mar 2025 15:54:28 +0800 Subject: [PATCH 0621/2157] i3c: master: svc: Fix npcm845 invalid slvstart event I3C HW may generate an invalid SlvStart event when emitting a STOP. If it is a true SlvStart, the MSTATUS state is SLVREQ. Check the MSTATUS state to ignore the false event. Reviewed-by: Frank Li Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250306075429.2265183-5-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 6b0ec74313d3..25439f9ca2a5 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -58,6 +58,7 @@ #define SVC_I3C_MSTATUS 0x088 #define SVC_I3C_MSTATUS_STATE(x) FIELD_GET(GENMASK(2, 0), (x)) #define SVC_I3C_MSTATUS_STATE_DAA(x) (SVC_I3C_MSTATUS_STATE(x) == 5) +#define SVC_I3C_MSTATUS_STATE_SLVREQ(x) (SVC_I3C_MSTATUS_STATE(x) == 1) #define SVC_I3C_MSTATUS_STATE_IDLE(x) (SVC_I3C_MSTATUS_STATE(x) == 0) #define SVC_I3C_MSTATUS_BETWEEN(x) FIELD_GET(BIT(4), (x)) #define SVC_I3C_MSTATUS_NACKED(x) FIELD_GET(BIT(5), (x)) @@ -143,6 +144,12 @@ * Fill the FIFO in advance to prevent FIFO from becoming empty. */ #define SVC_I3C_QUIRK_FIFO_EMPTY BIT(0) +/* + * SVC_I3C_QUIRK_FLASE_SLVSTART: + * I3C HW may generate an invalid SlvStart event when emitting a STOP. + * If it is a true SlvStart, the MSTATUS state is SLVREQ. + */ +#define SVC_I3C_QUIRK_FALSE_SLVSTART BIT(1) struct svc_i3c_cmd { u8 addr; @@ -586,6 +593,11 @@ static irqreturn_t svc_i3c_master_irq_handler(int irq, void *dev_id) /* Clear the interrupt status */ writel(SVC_I3C_MINT_SLVSTART, master->regs + SVC_I3C_MSTATUS); + /* Ignore the false event */ + if (svc_has_quirk(master, SVC_I3C_QUIRK_FALSE_SLVSTART) && + !SVC_I3C_MSTATUS_STATE_SLVREQ(active)) + return IRQ_HANDLED; + svc_i3c_master_disable_interrupts(master); /* Handle the interrupt in a non atomic context */ @@ -2020,7 +2032,8 @@ static const struct dev_pm_ops svc_i3c_pm_ops = { }; static const struct svc_i3c_drvdata npcm845_drvdata = { - .quirks = SVC_I3C_QUIRK_FIFO_EMPTY, + .quirks = SVC_I3C_QUIRK_FIFO_EMPTY | + SVC_I3C_QUIRK_FALSE_SLVSTART, }; static const struct svc_i3c_drvdata svc_default_drvdata = {}; From 2a785307e41b5c621228e3a7ec3949af546a3e69 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 6 Mar 2025 15:54:29 +0800 Subject: [PATCH 0622/2157] i3c: master: svc: Fix npcm845 DAA process corruption When MCONFIG.SKEW=0 and MCONFIG.ODHPP=0, the ENTDAA transaction gets corrupted and results in a no repeated-start condition at the end of address assignment. Workaround: Set MCONFIG.SKEW to 1 before initiating the DAA process. After the DAA process is completed, return MCONFIG.SKEW to its previous value. Reviewed-by: Frank Li Signed-off-by: Stanley Chu Link: https://lore.kernel.org/r/20250306075429.2265183-6-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 25439f9ca2a5..f22fb9e75142 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -32,6 +32,7 @@ #define SVC_I3C_MCONFIG_ODBAUD(x) FIELD_PREP(GENMASK(23, 16), (x)) #define SVC_I3C_MCONFIG_ODHPP(x) FIELD_PREP(BIT(24), (x)) #define SVC_I3C_MCONFIG_SKEW(x) FIELD_PREP(GENMASK(27, 25), (x)) +#define SVC_I3C_MCONFIG_SKEW_MASK GENMASK(27, 25) #define SVC_I3C_MCONFIG_I2CBAUD(x) FIELD_PREP(GENMASK(31, 28), (x)) #define SVC_I3C_MCTRL 0x084 @@ -150,6 +151,16 @@ * If it is a true SlvStart, the MSTATUS state is SLVREQ. */ #define SVC_I3C_QUIRK_FALSE_SLVSTART BIT(1) +/* + * SVC_I3C_QUIRK_DAA_CORRUPT: + * When MCONFIG.SKEW=0 and MCONFIG.ODHPP=0, the ENTDAA transaction gets + * corrupted and results in a no repeated-start condition at the end of + * address assignment. + * Workaround: + * Set MCONFIG.SKEW to 1 before initiating the DAA process. After the DAA + * process is completed, return MCONFIG.SKEW to its previous value. + */ +#define SVC_I3C_QUIRK_DAA_CORRUPT BIT(2) struct svc_i3c_cmd { u8 addr; @@ -259,6 +270,13 @@ static inline bool svc_has_quirk(struct svc_i3c_master *master, u32 quirk) return (master->drvdata->quirks & quirk); } +static inline bool svc_has_daa_corrupt(struct svc_i3c_master *master) +{ + return ((master->drvdata->quirks & SVC_I3C_QUIRK_DAA_CORRUPT) && + !(master->mctrl_config & + (SVC_I3C_MCONFIG_SKEW_MASK | SVC_I3C_MCONFIG_ODHPP(1)))); +} + static inline bool is_events_enabled(struct svc_i3c_master *master, u32 mask) { return !!(master->enabled_events & mask); @@ -1146,7 +1164,16 @@ static int svc_i3c_master_do_daa(struct i3c_master_controller *m) } spin_lock_irqsave(&master->xferqueue.lock, flags); + + if (svc_has_daa_corrupt(master)) + writel(master->mctrl_config | SVC_I3C_MCONFIG_SKEW(1), + master->regs + SVC_I3C_MCONFIG); + ret = svc_i3c_master_do_daa_locked(master, addrs, &dev_nb); + + if (svc_has_daa_corrupt(master)) + writel(master->mctrl_config, master->regs + SVC_I3C_MCONFIG); + spin_unlock_irqrestore(&master->xferqueue.lock, flags); svc_i3c_master_clear_merrwarn(master); @@ -2033,7 +2060,8 @@ static const struct dev_pm_ops svc_i3c_pm_ops = { static const struct svc_i3c_drvdata npcm845_drvdata = { .quirks = SVC_I3C_QUIRK_FIFO_EMPTY | - SVC_I3C_QUIRK_FALSE_SLVSTART, + SVC_I3C_QUIRK_FALSE_SLVSTART | + SVC_I3C_QUIRK_DAA_CORRUPT, }; static const struct svc_i3c_drvdata svc_default_drvdata = {}; From dcec12617ee61beed928e889607bf37e145bf86b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 3 Mar 2025 23:37:44 +0100 Subject: [PATCH 0623/2157] rtc: ds1307: stop disabling alarms on probe It is a bad practice to disable alarms on probe or remove as this will prevent alarms across reboots. Link: https://lore.kernel.org/r/20250303223744.1135672-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 872e0b679be4..5efbe69bf5ca 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1807,10 +1807,8 @@ static int ds1307_probe(struct i2c_client *client) * For some variants, be sure alarms can trigger when we're * running on Vbackup (BBSQI/BBSQW) */ - if (want_irq || ds1307_can_wakeup_device) { + if (want_irq || ds1307_can_wakeup_device) regs[0] |= DS1337_BIT_INTCN | chip->bbsqi_bit; - regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE); - } regmap_write(ds1307->regmap, DS1337_REG_CONTROL, regs[0]); From 74fc34937d72d04e89e4f75ea66147cdc9b785f5 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 27 Feb 2025 13:22:31 +0000 Subject: [PATCH 0624/2157] rust: miscdevice: change how f_ops vtable is constructed I was helping someone with writing a new Rust abstraction, and we were using the miscdevice abstraction as an example. While doing this, it became clear to me that the way I implemented the f_ops vtable is confusing to new Rust users, and that the approach used by the block abstractions is less confusing. Thus, update the miscdevice abstractions to use the same approach as rust/kernel/block/mq/operations.rs. Sorry about the large diff. This changes the indentation of a large amount of code. Reviewed-by: Christian Schrefl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250227-miscdevice-fops-change-v1-1-c9e9b75d67eb@google.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/miscdevice.rs | 303 ++++++++++++++++++-------------------- 1 file changed, 146 insertions(+), 157 deletions(-) diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index e14433b2ab9d..fa9ecc42602a 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -35,7 +35,7 @@ pub const fn into_raw(self) -> bindings::miscdevice { let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() }; result.minor = bindings::MISC_DYNAMIC_MINOR as _; result.name = self.name.as_char_ptr(); - result.fops = create_vtable::(); + result.fops = MiscdeviceVTable::::build(); result } } @@ -160,171 +160,160 @@ fn show_fdinfo( } } -const fn create_vtable() -> &'static bindings::file_operations { - const fn maybe_fn(check: bool, func: T) -> Option { - if check { - Some(func) - } else { - None +/// A vtable for the file operations of a Rust miscdevice. +struct MiscdeviceVTable(PhantomData); + +impl MiscdeviceVTable { + /// # Safety + /// + /// `file` and `inode` must be the file and inode for a file that is undergoing initialization. + /// The file must be associated with a `MiscDeviceRegistration`. + unsafe extern "C" fn open(inode: *mut bindings::inode, raw_file: *mut bindings::file) -> c_int { + // SAFETY: The pointers are valid and for a file being opened. + let ret = unsafe { bindings::generic_file_open(inode, raw_file) }; + if ret != 0 { + return ret; + } + + // SAFETY: The open call of a file can access the private data. + let misc_ptr = unsafe { (*raw_file).private_data }; + + // SAFETY: This is a miscdevice, so `misc_open()` set the private data to a pointer to the + // associated `struct miscdevice` before calling into this method. Furthermore, + // `misc_open()` ensures that the miscdevice can't be unregistered and freed during this + // call to `fops_open`. + let misc = unsafe { &*misc_ptr.cast::>() }; + + // SAFETY: + // * This underlying file is valid for (much longer than) the duration of `T::open`. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(raw_file) }; + + let ptr = match T::open(file, misc) { + Ok(ptr) => ptr, + Err(err) => return err.to_errno(), + }; + + // This overwrites the private data with the value specified by the user, changing the type + // of this file's private data. All future accesses to the private data is performed by + // other fops_* methods in this file, which all correctly cast the private data to the new + // type. + // + // SAFETY: The open call of a file can access the private data. + unsafe { (*raw_file).private_data = ptr.into_foreign() }; + + 0 + } + + /// # Safety + /// + /// `file` and `inode` must be the file and inode for a file that is being released. The file + /// must be associated with a `MiscDeviceRegistration`. + unsafe extern "C" fn release(_inode: *mut bindings::inode, file: *mut bindings::file) -> c_int { + // SAFETY: The release call of a file owns the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: The release call of a file owns the private data. + let ptr = unsafe { ::from_foreign(private) }; + + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + T::release(ptr, unsafe { File::from_raw_file(file) }); + + 0 + } + + /// # Safety + /// + /// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. + unsafe extern "C" fn ioctl(file: *mut bindings::file, cmd: c_uint, arg: c_ulong) -> c_long { + // SAFETY: The ioctl call of a file can access the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: Ioctl calls can borrow the private data of the file. + let device = unsafe { ::borrow(private) }; + + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + + match T::ioctl(device, file, cmd, arg) { + Ok(ret) => ret as c_long, + Err(err) => err.to_errno() as c_long, } } - struct VtableHelper { - _t: PhantomData, - } - impl VtableHelper { - const VTABLE: bindings::file_operations = bindings::file_operations { - open: Some(fops_open::), - release: Some(fops_release::), - unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::), - #[cfg(CONFIG_COMPAT)] - compat_ioctl: if T::HAS_COMPAT_IOCTL { - Some(fops_compat_ioctl::) - } else if T::HAS_IOCTL { - Some(bindings::compat_ptr_ioctl) - } else { - None - }, - show_fdinfo: maybe_fn(T::HAS_SHOW_FDINFO, fops_show_fdinfo::), - // SAFETY: All zeros is a valid value for `bindings::file_operations`. - ..unsafe { MaybeUninit::zeroed().assume_init() } - }; + /// # Safety + /// + /// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. + #[cfg(CONFIG_COMPAT)] + unsafe extern "C" fn compat_ioctl( + file: *mut bindings::file, + cmd: c_uint, + arg: c_ulong, + ) -> c_long { + // SAFETY: The compat ioctl call of a file can access the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: Ioctl calls can borrow the private data of the file. + let device = unsafe { ::borrow(private) }; + + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + + match T::compat_ioctl(device, file, cmd, arg) { + Ok(ret) => ret as c_long, + Err(err) => err.to_errno() as c_long, + } } - &VtableHelper::::VTABLE -} + /// # Safety + /// + /// - `file` must be a valid file that is associated with a `MiscDeviceRegistration`. + /// - `seq_file` must be a valid `struct seq_file` that we can write to. + unsafe extern "C" fn show_fdinfo(seq_file: *mut bindings::seq_file, file: *mut bindings::file) { + // SAFETY: The release call of a file owns the private data. + let private = unsafe { (*file).private_data }; + // SAFETY: Ioctl calls can borrow the private data of the file. + let device = unsafe { ::borrow(private) }; + // SAFETY: + // * The file is valid for the duration of this call. + // * There is no active fdget_pos region on the file on this thread. + let file = unsafe { File::from_raw_file(file) }; + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in + // which this method is called. + let m = unsafe { SeqFile::from_raw(seq_file) }; -/// # Safety -/// -/// `file` and `inode` must be the file and inode for a file that is undergoing initialization. -/// The file must be associated with a `MiscDeviceRegistration`. -unsafe extern "C" fn fops_open( - inode: *mut bindings::inode, - raw_file: *mut bindings::file, -) -> c_int { - // SAFETY: The pointers are valid and for a file being opened. - let ret = unsafe { bindings::generic_file_open(inode, raw_file) }; - if ret != 0 { - return ret; + T::show_fdinfo(device, m, file); } - // SAFETY: The open call of a file can access the private data. - let misc_ptr = unsafe { (*raw_file).private_data }; - - // SAFETY: This is a miscdevice, so `misc_open()` set the private data to a pointer to the - // associated `struct miscdevice` before calling into this method. Furthermore, `misc_open()` - // ensures that the miscdevice can't be unregistered and freed during this call to `fops_open`. - let misc = unsafe { &*misc_ptr.cast::>() }; - - // SAFETY: - // * This underlying file is valid for (much longer than) the duration of `T::open`. - // * There is no active fdget_pos region on the file on this thread. - let file = unsafe { File::from_raw_file(raw_file) }; - - let ptr = match T::open(file, misc) { - Ok(ptr) => ptr, - Err(err) => return err.to_errno(), + const VTABLE: bindings::file_operations = bindings::file_operations { + open: Some(Self::open), + release: Some(Self::release), + unlocked_ioctl: if T::HAS_IOCTL { + Some(Self::ioctl) + } else { + None + }, + #[cfg(CONFIG_COMPAT)] + compat_ioctl: if T::HAS_COMPAT_IOCTL { + Some(Self::compat_ioctl) + } else if T::HAS_IOCTL { + Some(bindings::compat_ptr_ioctl) + } else { + None + }, + show_fdinfo: if T::HAS_SHOW_FDINFO { + Some(Self::show_fdinfo) + } else { + None + }, + // SAFETY: All zeros is a valid value for `bindings::file_operations`. + ..unsafe { MaybeUninit::zeroed().assume_init() } }; - // This overwrites the private data with the value specified by the user, changing the type of - // this file's private data. All future accesses to the private data is performed by other - // fops_* methods in this file, which all correctly cast the private data to the new type. - // - // SAFETY: The open call of a file can access the private data. - unsafe { (*raw_file).private_data = ptr.into_foreign() }; - - 0 -} - -/// # Safety -/// -/// `file` and `inode` must be the file and inode for a file that is being released. The file must -/// be associated with a `MiscDeviceRegistration`. -unsafe extern "C" fn fops_release( - _inode: *mut bindings::inode, - file: *mut bindings::file, -) -> c_int { - // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }; - // SAFETY: The release call of a file owns the private data. - let ptr = unsafe { ::from_foreign(private) }; - - // SAFETY: - // * The file is valid for the duration of this call. - // * There is no active fdget_pos region on the file on this thread. - T::release(ptr, unsafe { File::from_raw_file(file) }); - - 0 -} - -/// # Safety -/// -/// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. -unsafe extern "C" fn fops_ioctl( - file: *mut bindings::file, - cmd: c_uint, - arg: c_ulong, -) -> c_long { - // SAFETY: The ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }; - // SAFETY: Ioctl calls can borrow the private data of the file. - let device = unsafe { ::borrow(private) }; - - // SAFETY: - // * The file is valid for the duration of this call. - // * There is no active fdget_pos region on the file on this thread. - let file = unsafe { File::from_raw_file(file) }; - - match T::ioctl(device, file, cmd, arg) { - Ok(ret) => ret as c_long, - Err(err) => err.to_errno() as c_long, + const fn build() -> &'static bindings::file_operations { + &Self::VTABLE } } - -/// # Safety -/// -/// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. -#[cfg(CONFIG_COMPAT)] -unsafe extern "C" fn fops_compat_ioctl( - file: *mut bindings::file, - cmd: c_uint, - arg: c_ulong, -) -> c_long { - // SAFETY: The compat ioctl call of a file can access the private data. - let private = unsafe { (*file).private_data }; - // SAFETY: Ioctl calls can borrow the private data of the file. - let device = unsafe { ::borrow(private) }; - - // SAFETY: - // * The file is valid for the duration of this call. - // * There is no active fdget_pos region on the file on this thread. - let file = unsafe { File::from_raw_file(file) }; - - match T::compat_ioctl(device, file, cmd, arg) { - Ok(ret) => ret as c_long, - Err(err) => err.to_errno() as c_long, - } -} - -/// # Safety -/// -/// - `file` must be a valid file that is associated with a `MiscDeviceRegistration`. -/// - `seq_file` must be a valid `struct seq_file` that we can write to. -unsafe extern "C" fn fops_show_fdinfo( - seq_file: *mut bindings::seq_file, - file: *mut bindings::file, -) { - // SAFETY: The release call of a file owns the private data. - let private = unsafe { (*file).private_data }; - // SAFETY: Ioctl calls can borrow the private data of the file. - let device = unsafe { ::borrow(private) }; - // SAFETY: - // * The file is valid for the duration of this call. - // * There is no active fdget_pos region on the file on this thread. - let file = unsafe { File::from_raw_file(file) }; - // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which - // this method is called. - let m = unsafe { SeqFile::from_raw(seq_file) }; - - T::show_fdinfo(device, m, file); -} From 897008d0f7672c3510281e826232a32d62710323 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 5 Mar 2025 13:18:00 -0800 Subject: [PATCH 0625/2157] iommufd: Set domain->iommufd_hwpt in all hwpt->domain allocators Setting domain->iommufd_hwpt in iommufd_hwpt_alloc() only covers the HWPT allocations from user space, but not for an auto domain. This resulted in a NULL pointer access in the auto domain pathway: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 pc : iommufd_sw_msi+0x54/0x2b0 lr : iommufd_sw_msi+0x40/0x2b0 Call trace: iommufd_sw_msi+0x54/0x2b0 (P) iommu_dma_prepare_msi+0x64/0xa8 its_irq_domain_alloc+0xf0/0x2c0 irq_domain_alloc_irqs_parent+0x2c/0xa8 msi_domain_alloc+0xa0/0x1a8 Since iommufd_sw_msi() requires to access the domain->iommufd_hwpt, it is better to set that explicitly prior to calling iommu_domain_set_sw_msi(). Fixes: 748706d7ca06 ("iommu: Turn fault_data to iommufd private pointer") Link: https://patch.msgid.link/r/20250305211800.229465-1-nicolinc@nvidia.com Reported-by: Ankit Agrawal Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Tested-by: Ankit Agrawal Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 268315b1d8bc..1d4cfe3677dc 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -159,6 +159,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } } + hwpt->domain->iommufd_hwpt = hwpt; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* @@ -255,6 +256,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, goto out_abort; } hwpt->domain->owner = ops; + hwpt->domain->iommufd_hwpt = hwpt; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { @@ -311,6 +313,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt->domain = NULL; goto out_abort; } + hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); @@ -415,7 +418,6 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) refcount_inc(&fault->obj.users); iommufd_put_object(ucmd->ictx, &fault->obj); } - hwpt->domain->iommufd_hwpt = hwpt; cmd->out_hwpt_id = hwpt->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); From 55c85fa7579dc2e3f5399ef5bad67a44257c1a48 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 5 Mar 2025 19:48:42 -0800 Subject: [PATCH 0626/2157] iommufd: Fail replace if device has not been attached The current implementation of iommufd_device_do_replace() implicitly assumes that the input device has already been attached. However, there is no explicit check to verify this assumption. If another device within the same group has been attached, the replace operation might succeed, but the input device itself may not have been attached yet. As a result, the input device might not be tracked in the igroup->device_list, and its reserved IOVA might not be added. Despite this, the caller might incorrectly assume that the device has been successfully replaced, which could lead to unexpected behavior or errors. To address this issue, add a check to ensure that the input device has been attached before proceeding with the replace operation. This check will help maintain the integrity of the device tracking system and prevent potential issues arising from incorrect assumptions about the device's attachment status. Fixes: e88d4ec154a8 ("iommufd: Add iommufd_device_replace()") Link: https://patch.msgid.link/r/20250306034842.5950-1-yi.l.liu@intel.com Cc: stable@vger.kernel.org Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index b2f0cb909e6d..bd50146e2ad0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -471,6 +471,17 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, /* The device attach/detach/replace helpers for attach_handle */ +/* Check if idev is attached to igroup->hwpt */ +static bool iommufd_device_is_attached(struct iommufd_device *idev) +{ + struct iommufd_device *cur; + + list_for_each_entry(cur, &idev->igroup->device_list, group_item) + if (cur == idev) + return true; + return false; +} + static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { @@ -710,6 +721,11 @@ iommufd_device_do_replace(struct iommufd_device *idev, goto err_unlock; } + if (!iommufd_device_is_attached(idev)) { + rc = -EINVAL; + goto err_unlock; + } + if (hwpt == igroup->hwpt) { mutex_unlock(&idev->igroup->lock); return NULL; From 8a9b1751b26cf04dabd903e32d0cc9c765fb3116 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 5 Mar 2025 23:16:59 +0100 Subject: [PATCH 0627/2157] rtc: pl031: document struct pl031_vendor_data members Document the range related members of struct pl031_vendor_data. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503011015.SYvdddTc-lkp@intel.com/ Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250305221659.1153495-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pl031.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 47bfc5395e59..eab39dfa4e5f 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -74,6 +74,8 @@ * @st_weekday: if this is an ST Microelectronics silicon version that need * the weekday fix * @irqflags: special IRQ flags per variant + * @range_min: minimum date/time supported by the RTC + * @range_max: maximum date/time supported by the RTC */ struct pl031_vendor_data { struct rtc_class_ops ops; From e5d5813968217b99ef2b83f13353967b218e3841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Thu, 6 Mar 2025 14:44:36 +0100 Subject: [PATCH 0628/2157] counter: microchip-tcb-capture: Add IRQ handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add interrupt servicing to allow userspace to wait for the following: * Change-of-state caused by external trigger * Capture of timer value into RA/RB * Compare to RC register * Overflow Signed-off-by: Bence Csókás Link: https://lore.kernel.org/r/20250306134441.582819-2-csokas.bence@prolan.hu Signed-off-by: William Breathitt Gray --- MAINTAINERS | 1 + drivers/counter/microchip-tcb-capture.c | 74 +++++++++++++++++++ .../linux/counter/microchip-tcb-capture.h | 34 +++++++++ 3 files changed, 109 insertions(+) create mode 100644 include/uapi/linux/counter/microchip-tcb-capture.h diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..c7aa35994e14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15642,6 +15642,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-iio@vger.kernel.org S: Maintained F: drivers/counter/microchip-tcb-capture.c +F: include/uapi/linux/counter/microchip-tcb-capture.h MICROCHIP USB251XB DRIVER M: Richard Leitner diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index 2f096a5b973d..16707099bd69 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -6,18 +6,24 @@ */ #include #include +#include #include #include #include #include +#include #include #include +#include #include #define ATMEL_TC_CMR_MASK (ATMEL_TC_LDRA_RISING | ATMEL_TC_LDRB_FALLING | \ ATMEL_TC_ETRGEDG_RISING | ATMEL_TC_LDBDIS | \ ATMEL_TC_LDBSTOP) +#define ATMEL_TC_DEF_IRQS (ATMEL_TC_ETRGS | ATMEL_TC_COVFS | \ + ATMEL_TC_LDRAS | ATMEL_TC_LDRBS | ATMEL_TC_CPCS) + #define ATMEL_TC_QDEN BIT(8) #define ATMEL_TC_POSEN BIT(9) @@ -294,6 +300,65 @@ static const struct of_device_id atmel_tc_of_match[] = { { /* sentinel */ } }; +static irqreturn_t mchp_tc_isr(int irq, void *dev_id) +{ + struct counter_device *const counter = dev_id; + struct mchp_tc_data *const priv = counter_priv(counter); + u32 sr, mask; + + regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], SR), &sr); + regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], IMR), &mask); + + sr &= mask; + if (!(sr & ATMEL_TC_ALL_IRQ)) + return IRQ_NONE; + + if (sr & ATMEL_TC_ETRGS) + counter_push_event(counter, COUNTER_EVENT_CHANGE_OF_STATE, + COUNTER_MCHP_EVCHN_CV); + if (sr & ATMEL_TC_LDRAS) + counter_push_event(counter, COUNTER_EVENT_CAPTURE, + COUNTER_MCHP_EVCHN_RA); + if (sr & ATMEL_TC_LDRBS) + counter_push_event(counter, COUNTER_EVENT_CAPTURE, + COUNTER_MCHP_EVCHN_RB); + if (sr & ATMEL_TC_CPCS) + counter_push_event(counter, COUNTER_EVENT_THRESHOLD, + COUNTER_MCHP_EVCHN_RC); + if (sr & ATMEL_TC_COVFS) + counter_push_event(counter, COUNTER_EVENT_OVERFLOW, + COUNTER_MCHP_EVCHN_CV); + + return IRQ_HANDLED; +} + +static void mchp_tc_irq_remove(void *ptr) +{ + struct mchp_tc_data *priv = ptr; + + regmap_write(priv->regmap, ATMEL_TC_REG(priv->channel[0], IDR), ATMEL_TC_DEF_IRQS); +} + +static int mchp_tc_irq_enable(struct counter_device *const counter, int irq) +{ + struct mchp_tc_data *const priv = counter_priv(counter); + int ret = devm_request_irq(counter->parent, irq, mchp_tc_isr, 0, + dev_name(counter->parent), counter); + + if (ret < 0) + return ret; + + ret = regmap_write(priv->regmap, ATMEL_TC_REG(priv->channel[0], IER), ATMEL_TC_DEF_IRQS); + if (ret < 0) + return ret; + + ret = devm_add_action_or_reset(counter->parent, mchp_tc_irq_remove, priv); + if (ret < 0) + return ret; + + return 0; +} + static void mchp_tc_clk_remove(void *ptr) { clk_disable_unprepare((struct clk *)ptr); @@ -378,6 +443,15 @@ static int mchp_tc_probe(struct platform_device *pdev) counter->num_signals = ARRAY_SIZE(mchp_tc_count_signals); counter->signals = mchp_tc_count_signals; + i = of_irq_get(np->parent, 0); + if (i == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (i > 0) { + ret = mchp_tc_irq_enable(counter, i); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "Failed to set up IRQ"); + } + ret = devm_counter_add(&pdev->dev, counter); if (ret < 0) return dev_err_probe(&pdev->dev, ret, "Failed to add counter\n"); diff --git a/include/uapi/linux/counter/microchip-tcb-capture.h b/include/uapi/linux/counter/microchip-tcb-capture.h new file mode 100644 index 000000000000..f3ef315fe9f6 --- /dev/null +++ b/include/uapi/linux/counter/microchip-tcb-capture.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Channel numbers used by the microchip-tcb-capture driver + * Copyright (C) 2025 Bence Csókás + */ +#ifndef _UAPI_COUNTER_MCHP_TCB_H_ +#define _UAPI_COUNTER_MCHP_TCB_H_ + +/* + * The driver defines the following components: + * + * Count 0 + * \__ Synapse 0 -- Signal 0 (Channel A, i.e. TIOA) + * \__ Synapse 1 -- Signal 1 (Channel B, i.e. TIOB) + * + * It also supports the following events: + * + * Channel 0: + * - CV register changed + * - CV overflowed + * - RA captured + * Channel 1: + * - RB captured + * Channel 2: + * - RC compare triggered + */ + +/* Event channels */ +#define COUNTER_MCHP_EVCHN_CV 0 +#define COUNTER_MCHP_EVCHN_RA 0 +#define COUNTER_MCHP_EVCHN_RB 1 +#define COUNTER_MCHP_EVCHN_RC 2 + +#endif /* _UAPI_COUNTER_MCHP_TCB_H_ */ From 1adc6240a80278c613f655b71c6c0d447b2d5932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Thu, 6 Mar 2025 14:44:37 +0100 Subject: [PATCH 0629/2157] counter: microchip-tcb-capture: Add capture extensions for registers RA/RB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCB hardware is capable of capturing the timer value to registers RA and RB. Add these registers as capture extensions. Signed-off-by: Bence Csókás Link: https://lore.kernel.org/r/20250306134441.582819-3-csokas.bence@prolan.hu Signed-off-by: William Breathitt Gray --- drivers/counter/microchip-tcb-capture.c | 58 +++++++++++++++++++ .../linux/counter/microchip-tcb-capture.h | 6 ++ 2 files changed, 64 insertions(+) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index 16707099bd69..aeaee6e0245e 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -253,6 +253,62 @@ static int mchp_tc_count_read(struct counter_device *counter, return 0; } +static int mchp_tc_count_cap_read(struct counter_device *counter, + struct counter_count *count, size_t idx, u64 *val) +{ + struct mchp_tc_data *const priv = counter_priv(counter); + u32 cnt; + int ret; + + switch (idx) { + case COUNTER_MCHP_EXCAP_RA: + ret = regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], RA), &cnt); + break; + case COUNTER_MCHP_EXCAP_RB: + ret = regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], RB), &cnt); + break; + default: + return -EINVAL; + } + + if (ret < 0) + return ret; + + *val = cnt; + + return 0; +} + +static int mchp_tc_count_cap_write(struct counter_device *counter, + struct counter_count *count, size_t idx, u64 val) +{ + struct mchp_tc_data *const priv = counter_priv(counter); + int ret; + + if (val > U32_MAX) + return -ERANGE; + + switch (idx) { + case COUNTER_MCHP_EXCAP_RA: + ret = regmap_write(priv->regmap, ATMEL_TC_REG(priv->channel[0], RA), val); + break; + case COUNTER_MCHP_EXCAP_RB: + ret = regmap_write(priv->regmap, ATMEL_TC_REG(priv->channel[0], RB), val); + break; + default: + return -EINVAL; + } + + return ret; +} + +static DEFINE_COUNTER_ARRAY_CAPTURE(mchp_tc_cnt_cap_array, 2); + +static struct counter_comp mchp_tc_count_ext[] = { + COUNTER_COMP_ARRAY_CAPTURE(mchp_tc_count_cap_read, mchp_tc_count_cap_write, + mchp_tc_cnt_cap_array), +}; + static struct counter_count mchp_tc_counts[] = { { .id = 0, @@ -261,6 +317,8 @@ static struct counter_count mchp_tc_counts[] = { .num_functions = ARRAY_SIZE(mchp_tc_count_functions), .synapses = mchp_tc_count_synapses, .num_synapses = ARRAY_SIZE(mchp_tc_count_synapses), + .ext = mchp_tc_count_ext, + .num_ext = ARRAY_SIZE(mchp_tc_count_ext), }, }; diff --git a/include/uapi/linux/counter/microchip-tcb-capture.h b/include/uapi/linux/counter/microchip-tcb-capture.h index f3ef315fe9f6..136e2faa7730 100644 --- a/include/uapi/linux/counter/microchip-tcb-capture.h +++ b/include/uapi/linux/counter/microchip-tcb-capture.h @@ -12,6 +12,8 @@ * Count 0 * \__ Synapse 0 -- Signal 0 (Channel A, i.e. TIOA) * \__ Synapse 1 -- Signal 1 (Channel B, i.e. TIOB) + * \__ Extension capture0 (RA register) + * \__ Extension capture1 (RB register) * * It also supports the following events: * @@ -25,6 +27,10 @@ * - RC compare triggered */ +/* Capture extensions */ +#define COUNTER_MCHP_EXCAP_RA 0 +#define COUNTER_MCHP_EXCAP_RB 1 + /* Event channels */ #define COUNTER_MCHP_EVCHN_CV 0 #define COUNTER_MCHP_EVCHN_RA 0 From 5c03f9f4d36292150c14ebd90788c4d3273ed9dc Mon Sep 17 00:00:00 2001 From: Chin-Ting Kuo Date: Mon, 13 Jan 2025 17:37:37 +0800 Subject: [PATCH 0630/2157] watchdog: aspeed: Update bootstatus handling The boot status in the watchdog device struct is updated during controller probe stage. Application layer can get the boot status through the command, cat /sys/class/watchdog/watchdogX/bootstatus. The bootstatus can be, WDIOF_CARDRESET => System is reset due to WDT timeout occurs. Others => Other reset events, e.g., power on reset. On ASPEED platforms, boot status is recorded in the SCU registers. - AST2400: Only a bit is used to represent system reset triggered by any WDT controller. - AST2500/AST2600: System reset triggered by different WDT controllers can be distinguished by different SCU bits. Besides, on AST2400 and AST2500, since alternating boot event is also triggered by using WDT timeout mechanism, it is classified as WDIOF_CARDRESET. Signed-off-by: Chin-Ting Kuo Reviewed-by: Andrew Jeffery Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250113093737.845097-2-chin-ting_kuo@aspeedtech.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 81 ++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index b4773a6aaf8c..369635b38ca0 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -11,21 +11,30 @@ #include #include #include +#include #include #include #include #include +#include #include static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +struct aspeed_wdt_scu { + const char *compatible; + u32 reset_status_reg; + u32 wdt_reset_mask; + u32 wdt_reset_mask_shift; +}; struct aspeed_wdt_config { u32 ext_pulse_width_mask; u32 irq_shift; u32 irq_mask; + struct aspeed_wdt_scu scu; }; struct aspeed_wdt { @@ -39,18 +48,36 @@ static const struct aspeed_wdt_config ast2400_config = { .ext_pulse_width_mask = 0xff, .irq_shift = 0, .irq_mask = 0, + .scu = { + .compatible = "aspeed,ast2400-scu", + .reset_status_reg = 0x3c, + .wdt_reset_mask = 0x1, + .wdt_reset_mask_shift = 1, + }, }; static const struct aspeed_wdt_config ast2500_config = { .ext_pulse_width_mask = 0xfffff, .irq_shift = 12, .irq_mask = GENMASK(31, 12), + .scu = { + .compatible = "aspeed,ast2500-scu", + .reset_status_reg = 0x3c, + .wdt_reset_mask = 0x1, + .wdt_reset_mask_shift = 2, + }, }; static const struct aspeed_wdt_config ast2600_config = { .ext_pulse_width_mask = 0xfffff, .irq_shift = 0, .irq_mask = GENMASK(31, 10), + .scu = { + .compatible = "aspeed,ast2600-scu", + .reset_status_reg = 0x74, + .wdt_reset_mask = 0xf, + .wdt_reset_mask_shift = 16, + }, }; static const struct of_device_id aspeed_wdt_of_table[] = { @@ -213,6 +240,56 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd, return 0; } +static void aspeed_wdt_update_bootstatus(struct platform_device *pdev, + struct aspeed_wdt *wdt) +{ + const struct resource *res; + struct aspeed_wdt_scu scu = wdt->cfg->scu; + struct regmap *scu_base; + u32 reset_mask_width; + u32 reset_mask_shift; + u32 idx = 0; + u32 status; + int ret; + + if (!of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2400-wdt")) { + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + idx = ((intptr_t)wdt->base & 0x00000fff) / resource_size(res); + } + + scu_base = syscon_regmap_lookup_by_compatible(scu.compatible); + if (IS_ERR(scu_base)) { + wdt->wdd.bootstatus = WDIOS_UNKNOWN; + return; + } + + ret = regmap_read(scu_base, scu.reset_status_reg, &status); + if (ret) { + wdt->wdd.bootstatus = WDIOS_UNKNOWN; + return; + } + + reset_mask_width = hweight32(scu.wdt_reset_mask); + reset_mask_shift = scu.wdt_reset_mask_shift + + reset_mask_width * idx; + + if (status & (scu.wdt_reset_mask << reset_mask_shift)) + wdt->wdd.bootstatus = WDIOF_CARDRESET; + + /* clear wdt reset event flag */ + if (of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2400-wdt") || + of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2500-wdt")) { + ret = regmap_read(scu_base, scu.reset_status_reg, &status); + if (!ret) { + status &= ~(scu.wdt_reset_mask << reset_mask_shift); + regmap_write(scu_base, scu.reset_status_reg, status); + } + } else { + regmap_write(scu_base, scu.reset_status_reg, + scu.wdt_reset_mask << reset_mask_shift); + } +} + /* access_cs0 shows if cs0 is accessible, hence the reverted bit */ static ssize_t access_cs0_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -458,10 +535,10 @@ static int aspeed_wdt_probe(struct platform_device *pdev) writel(duration - 1, wdt->base + WDT_RESET_WIDTH); } + aspeed_wdt_update_bootstatus(pdev, wdt); + status = readl(wdt->base + WDT_TIMEOUT_STATUS); if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) { - wdt->wdd.bootstatus = WDIOF_CARDRESET; - if (of_device_is_compatible(np, "aspeed,ast2400-wdt") || of_device_is_compatible(np, "aspeed,ast2500-wdt")) wdt->wdd.groups = bswitch_groups; From f285bd8c74d3deefd36dfee8bcbbdd781fa6fc21 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 16 Jan 2025 14:46:04 -0800 Subject: [PATCH 0631/2157] watchdog: cros-ec: Add newlines to printks Add newlines to printk messages so that the next record is more easily readable. Cc: Lukasz Majczak Cc: Benson Leung Signed-off-by: Stephen Boyd Reviewed-by: Benson Leung Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250116224605.110870-1-swboyd@chromium.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/cros_ec_wdt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c index ba045e29f9a5..716c23f4388c 100644 --- a/drivers/watchdog/cros_ec_wdt.c +++ b/drivers/watchdog/cros_ec_wdt.c @@ -58,7 +58,7 @@ static int cros_ec_wdt_ping(struct watchdog_device *wdd) arg.req.command = EC_HANG_DETECT_CMD_RELOAD; ret = cros_ec_wdt_send_cmd(cros_ec, &arg); if (ret < 0) - dev_dbg(wdd->parent, "Failed to ping watchdog (%d)", ret); + dev_dbg(wdd->parent, "Failed to ping watchdog (%d)\n", ret); return ret; } @@ -74,7 +74,7 @@ static int cros_ec_wdt_start(struct watchdog_device *wdd) arg.req.reboot_timeout_sec = wdd->timeout; ret = cros_ec_wdt_send_cmd(cros_ec, &arg); if (ret < 0) - dev_dbg(wdd->parent, "Failed to start watchdog (%d)", ret); + dev_dbg(wdd->parent, "Failed to start watchdog (%d)\n", ret); return ret; } @@ -88,7 +88,7 @@ static int cros_ec_wdt_stop(struct watchdog_device *wdd) arg.req.command = EC_HANG_DETECT_CMD_CANCEL; ret = cros_ec_wdt_send_cmd(cros_ec, &arg); if (ret < 0) - dev_dbg(wdd->parent, "Failed to stop watchdog (%d)", ret); + dev_dbg(wdd->parent, "Failed to stop watchdog (%d)\n", ret); return ret; } @@ -136,7 +136,7 @@ static int cros_ec_wdt_probe(struct platform_device *pdev) arg.req.command = EC_HANG_DETECT_CMD_GET_STATUS; ret = cros_ec_wdt_send_cmd(cros_ec, &arg); if (ret < 0) - return dev_err_probe(dev, ret, "Failed to get watchdog bootstatus"); + return dev_err_probe(dev, ret, "Failed to get watchdog bootstatus\n"); wdd->parent = &pdev->dev; wdd->info = &cros_ec_wdt_ident; @@ -150,7 +150,7 @@ static int cros_ec_wdt_probe(struct platform_device *pdev) arg.req.command = EC_HANG_DETECT_CMD_CLEAR_STATUS; ret = cros_ec_wdt_send_cmd(cros_ec, &arg); if (ret < 0) - return dev_err_probe(dev, ret, "Failed to clear watchdog bootstatus"); + return dev_err_probe(dev, ret, "Failed to clear watchdog bootstatus\n"); watchdog_stop_on_reboot(wdd); watchdog_stop_on_unregister(wdd); From 331c8349605c8fa2f9040c39fe8c40afe3fdc3c3 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sun, 26 Jan 2025 13:26:31 +0000 Subject: [PATCH 0632/2157] watchdog: Enable RZV2HWDT driver depend on ARCH_RENESAS RZ/G3E watchdog timer IP is similar to the one found on RZ/V2H. Both these SoCs belong to the ARCH_RENESAS family. So, it makes sense to use ARCH_RENESAS rather than ARCH_R9A09G057 to enable the RZV2HWDT driver. Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250126132633.31956-3-biju.das.jz@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f81705f8539a..b9d23f98a436 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -963,13 +963,14 @@ config RENESAS_RZG2LWDT Renesas RZ/G2L SoCs. These watchdogs can be used to reset a system. config RENESAS_RZV2HWDT - tristate "Renesas RZ/V2H(P) WDT Watchdog" - depends on ARCH_R9A09G057 || COMPILE_TEST + tristate "Renesas RZ/{G3E,V2H(P)} WDT Watchdog" + depends on ARCH_RENESAS || COMPILE_TEST depends on PM || COMPILE_TEST select WATCHDOG_CORE help This driver adds watchdog support for the integrated watchdogs in the - Renesas RZ/V2H(P) SoCs. These watchdogs can be used to reset a system. + Renesas RZ/{G3E,V2H(P)} SoCs. These watchdogs can be used to reset a + system. config ASPEED_WATCHDOG tristate "Aspeed BMC watchdog support" From c284153a2c5537db4fec51ac850c17d2eb1ffcfe Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Fri, 31 Jan 2025 15:48:55 -0500 Subject: [PATCH 0633/2157] watchdog: lenovo_se30_wdt: Watchdog driver for Lenovo SE30 platform Watchdog driver implementation for Lenovo SE30 platform. Signed-off-by: Mark Pearson Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250131204855.1827-1-mpearson-lenovo@squebb.ca Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 12 + drivers/watchdog/Makefile | 1 + drivers/watchdog/lenovo_se30_wdt.c | 393 +++++++++++++++++++++++++++++ 3 files changed, 406 insertions(+) create mode 100644 drivers/watchdog/lenovo_se30_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b9d23f98a436..123d1f564f7c 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -279,6 +279,18 @@ config LENOVO_SE10_WDT This driver can also be built as a module. If so, the module will be called lenovo-se10-wdt. +config LENOVO_SE30_WDT + tristate "Lenovo SE30 Watchdog" + depends on (X86 && DMI) || COMPILE_TEST + depends on HAS_IOPORT + select WATCHDOG_CORE + help + If you say yes here you get support for the watchdog + functionality for the Lenovo SE30 platform. + + This driver can also be built as a module. If so, the module + will be called lenovo-se30-wdt. + config MENF21BMC_WATCHDOG tristate "MEN 14F021P00 BMC Watchdog" depends on MFD_MENF21BMC || COMPILE_TEST diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 8411626fa162..c9482904bf87 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -124,6 +124,7 @@ obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o obj-$(CONFIG_IE6XX_WDT) += ie6xx_wdt.o obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o obj-$(CONFIG_LENOVO_SE10_WDT) += lenovo_se10_wdt.o +obj-$(CONFIG_LENOVO_SE30_WDT) += lenovo_se30_wdt.o ifeq ($(CONFIG_ITCO_VENDOR_SUPPORT),y) obj-$(CONFIG_ITCO_WDT) += iTCO_vendor_support.o endif diff --git a/drivers/watchdog/lenovo_se30_wdt.c b/drivers/watchdog/lenovo_se30_wdt.c new file mode 100644 index 000000000000..f25429da0cec --- /dev/null +++ b/drivers/watchdog/lenovo_se30_wdt.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * WDT driver for Lenovo SE30 device + */ + +#define dev_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOREGION_OFFSET 4 /* Use EC port 1 */ +#define IOREGION_LENGTH 4 + +#define WATCHDOG_TIMEOUT 60 + +#define MIN_TIMEOUT 1 +#define MAX_TIMEOUT 255 +#define MAX_WAIT 10 + +static int timeout; /* in seconds */ +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, + "Watchdog timeout in seconds. 1 <= timeout <= 255, default=" + __MODULE_STRING(WATCHDOG_TIMEOUT) "."); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define LNV_SE30_NAME "lenovo-se30-wdt" +#define LNV_SE30_ID 0x0110 +#define CHIPID_MASK 0xFFF0 + +#define CHIPID_REG 0x20 +#define SIO_REG 0x2e +#define LDN_REG 0x07 +#define UNLOCK_KEY 0x87 +#define LOCK_KEY 0xAA +#define LD_NUM_SHM 0x0F +#define LD_BASE_ADDR 0xF8 + +#define WDT_MODULE 0x10 +#define WDT_CFG_INDEX 0x15 /* WD configuration register */ +#define WDT_CNT_INDEX 0x16 /* WD timer count register */ +#define WDT_CFG_RESET 0x2 + +/* Host Interface WIN2 offset definition */ +#define SHM_WIN_SIZE 0xFF +#define SHM_WIN_MOD_OFFSET 0x01 +#define SHM_WIN_CMD_OFFSET 0x02 +#define SHM_WIN_SEL_OFFSET 0x03 +#define SHM_WIN_CTL_OFFSET 0x04 +#define VAL_SHM_WIN_CTRL_WR 0x40 +#define VAL_SHM_WIN_CTRL_RD 0x80 +#define SHM_WIN_ID_OFFSET 0x08 +#define SHM_WIN_DAT_OFFSET 0x10 + +struct nct6692_reg { + unsigned char mod; + unsigned char cmd; + unsigned char sel; + unsigned int idx; +}; + +/* Watchdog is based on NCT6692 device */ +struct lenovo_se30_wdt { + unsigned char __iomem *shm_base_addr; + struct nct6692_reg wdt_cfg; + struct nct6692_reg wdt_cnt; + struct watchdog_device wdt; +}; + +static inline void superio_outb(int ioreg, int reg, int val) +{ + outb(reg, ioreg); + outb(val, ioreg + 1); +} + +static inline int superio_inb(int ioreg, int reg) +{ + outb(reg, ioreg); + return inb(ioreg + 1); +} + +static inline int superio_enter(int key, int addr, const char *name) +{ + if (!request_muxed_region(addr, 2, name)) { + pr_err("I/O address 0x%04x already in use\n", addr); + return -EBUSY; + } + outb(key, addr); /* Enter extended function mode */ + outb(key, addr); /* Again according to manual */ + + return 0; +} + +static inline void superio_exit(int key, int addr) +{ + outb(key, addr); /* Leave extended function mode */ + release_region(addr, 2); +} + +static int shm_get_ready(unsigned char __iomem *shm_base_addr, + const struct nct6692_reg *reg) +{ + unsigned char pre_id, new_id; + int loop = 0; + + iowrite8(reg->mod, shm_base_addr + SHM_WIN_MOD_OFFSET); + iowrite8(reg->cmd, shm_base_addr + SHM_WIN_CMD_OFFSET); + iowrite8(reg->sel, shm_base_addr + SHM_WIN_SEL_OFFSET); + + pre_id = ioread8(shm_base_addr + SHM_WIN_ID_OFFSET); + iowrite8(VAL_SHM_WIN_CTRL_RD, shm_base_addr + SHM_WIN_CTL_OFFSET); + + /* Loop checking when interface is ready */ + while (loop < MAX_WAIT) { + new_id = ioread8(shm_base_addr + SHM_WIN_ID_OFFSET); + if (new_id != pre_id) + return 0; + loop++; + usleep_range(10, 125); + } + return -ETIMEDOUT; +} + +static int read_shm_win(unsigned char __iomem *shm_base_addr, + const struct nct6692_reg *reg, + unsigned char idx_offset, + unsigned char *data) +{ + int err = shm_get_ready(shm_base_addr, reg); + + if (err) + return err; + *data = ioread8(shm_base_addr + SHM_WIN_DAT_OFFSET + reg->idx + idx_offset); + return 0; +} + +static int write_shm_win(unsigned char __iomem *shm_base_addr, + const struct nct6692_reg *reg, + unsigned char idx_offset, + unsigned char val) +{ + int err = shm_get_ready(shm_base_addr, reg); + + if (err) + return err; + iowrite8(val, shm_base_addr + SHM_WIN_DAT_OFFSET + reg->idx + idx_offset); + iowrite8(VAL_SHM_WIN_CTRL_WR, shm_base_addr + SHM_WIN_CTL_OFFSET); + err = shm_get_ready(shm_base_addr, reg); + return err; +} + +static int lenovo_se30_wdt_enable(struct lenovo_se30_wdt *data, unsigned int timeout) +{ + if (timeout) { + int err = write_shm_win(data->shm_base_addr, &data->wdt_cfg, 0, WDT_CFG_RESET); + + if (err) + return err; + } + return write_shm_win(data->shm_base_addr, &data->wdt_cnt, 0, timeout); +} + +static int lenovo_se30_wdt_start(struct watchdog_device *wdog) +{ + struct lenovo_se30_wdt *data = watchdog_get_drvdata(wdog); + + return lenovo_se30_wdt_enable(data, wdog->timeout); +} + +static int lenovo_se30_wdt_stop(struct watchdog_device *wdog) +{ + struct lenovo_se30_wdt *data = watchdog_get_drvdata(wdog); + + return lenovo_se30_wdt_enable(data, 0); +} + +static unsigned int lenovo_se30_wdt_get_timeleft(struct watchdog_device *wdog) +{ + struct lenovo_se30_wdt *data = watchdog_get_drvdata(wdog); + unsigned char timeleft; + int err; + + err = read_shm_win(data->shm_base_addr, &data->wdt_cnt, 0, &timeleft); + if (err) + return 0; + return timeleft; +} + +static int lenovo_se30_wdt_ping(struct watchdog_device *wdt) +{ + struct lenovo_se30_wdt *data = watchdog_get_drvdata(wdt); + int err = 0; + + /* + * Device does not support refreshing WDT_TIMER_REG register when + * the watchdog is active. Need to disable, feed and enable again + */ + err = lenovo_se30_wdt_enable(data, 0); + if (err) + return err; + + err = write_shm_win(data->shm_base_addr, &data->wdt_cnt, 0, wdt->timeout); + if (!err) + err = lenovo_se30_wdt_enable(data, wdt->timeout); + + return err; +} + +static const struct watchdog_info lenovo_se30_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, + .identity = "Lenovo SE30 watchdog", +}; + +static const struct watchdog_ops lenovo_se30_wdt_ops = { + .owner = THIS_MODULE, + .start = lenovo_se30_wdt_start, + .stop = lenovo_se30_wdt_stop, + .ping = lenovo_se30_wdt_ping, + .get_timeleft = lenovo_se30_wdt_get_timeleft, +}; + +static int lenovo_se30_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct lenovo_se30_wdt *priv; + unsigned long base_phys; + unsigned short val; + int err; + + err = superio_enter(UNLOCK_KEY, SIO_REG, LNV_SE30_NAME); + if (err) + return err; + + val = superio_inb(SIO_REG, CHIPID_REG) << 8; + val |= superio_inb(SIO_REG, CHIPID_REG + 1); + + if ((val & CHIPID_MASK) != LNV_SE30_ID) { + superio_exit(LOCK_KEY, SIO_REG); + return -ENODEV; + } + + superio_outb(SIO_REG, LDN_REG, LD_NUM_SHM); + base_phys = (superio_inb(SIO_REG, LD_BASE_ADDR) | + (superio_inb(SIO_REG, LD_BASE_ADDR + 1) << 8) | + (superio_inb(SIO_REG, LD_BASE_ADDR + 2) << 16) | + (superio_inb(SIO_REG, LD_BASE_ADDR + 3) << 24)) & + 0xFFFFFFFF; + + superio_exit(LOCK_KEY, SIO_REG); + if (base_phys == 0xFFFFFFFF || base_phys == 0) + return -ENODEV; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + if (!devm_request_mem_region(dev, base_phys, SHM_WIN_SIZE, LNV_SE30_NAME)) + return -EBUSY; + + priv->shm_base_addr = devm_ioremap(dev, base_phys, SHM_WIN_SIZE); + + priv->wdt_cfg.mod = WDT_MODULE; + priv->wdt_cfg.idx = WDT_CFG_INDEX; + priv->wdt_cnt.mod = WDT_MODULE; + priv->wdt_cnt.idx = WDT_CNT_INDEX; + + priv->wdt.ops = &lenovo_se30_wdt_ops; + priv->wdt.info = &lenovo_se30_wdt_info; + priv->wdt.timeout = WATCHDOG_TIMEOUT; /* Set default timeout */ + priv->wdt.min_timeout = MIN_TIMEOUT; + priv->wdt.max_timeout = MAX_TIMEOUT; + priv->wdt.parent = dev; + + watchdog_init_timeout(&priv->wdt, timeout, dev); + watchdog_set_drvdata(&priv->wdt, priv); + watchdog_set_nowayout(&priv->wdt, nowayout); + watchdog_stop_on_reboot(&priv->wdt); + watchdog_stop_on_unregister(&priv->wdt); + + return devm_watchdog_register_device(dev, &priv->wdt); +} + +static struct platform_device *pdev; + +static struct platform_driver lenovo_se30_wdt_driver = { + .driver = { + .name = LNV_SE30_NAME, + }, + .probe = lenovo_se30_wdt_probe, +}; + +static int lenovo_se30_create_platform_device(const struct dmi_system_id *id) +{ + int err; + + pdev = platform_device_alloc(LNV_SE30_NAME, -1); + if (!pdev) + return -ENOMEM; + + err = platform_device_add(pdev); + if (err) + platform_device_put(pdev); + + return err; +} + +static const struct dmi_system_id lenovo_se30_wdt_dmi_table[] __initconst = { + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NA"), + }, + .callback = lenovo_se30_create_platform_device, + }, + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NB"), + }, + .callback = lenovo_se30_create_platform_device, + }, + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NC"), + }, + .callback = lenovo_se30_create_platform_device, + }, + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NH"), + }, + .callback = lenovo_se30_create_platform_device, + }, + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NJ"), + }, + .callback = lenovo_se30_create_platform_device, + }, + { + .ident = "LENOVO-SE30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "11NK"), + }, + .callback = lenovo_se30_create_platform_device, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, lenovo_se30_wdt_dmi_table); + +static int __init lenovo_se30_wdt_init(void) +{ + if (!dmi_check_system(lenovo_se30_wdt_dmi_table)) + return -ENODEV; + + return platform_driver_register(&lenovo_se30_wdt_driver); +} + +static void __exit lenovo_se30_wdt_exit(void) +{ + if (pdev) + platform_device_unregister(pdev); + platform_driver_unregister(&lenovo_se30_wdt_driver); +} + +module_init(lenovo_se30_wdt_init); +module_exit(lenovo_se30_wdt_exit); + +MODULE_AUTHOR("Mark Pearson "); +MODULE_AUTHOR("David Ober "); +MODULE_DESCRIPTION("Lenovo SE30 watchdog driver"); +MODULE_LICENSE("GPL"); From 480ee8a260e6f87cbcdaff77ac2cbf6dc03f0f4f Mon Sep 17 00:00:00 2001 From: Kyunghwan Seo Date: Thu, 13 Feb 2025 09:41:04 +0900 Subject: [PATCH 0634/2157] watchdog: s3c2410_wdt: Fix PMU register bits for ExynosAutoV920 SoC Fix the PMU register bits for the ExynosAutoV920 SoC. This SoC has different bit information compared to its previous version, ExynosAutoV9, and we have made the necessary adjustments. rst_stat_bit: - ExynosAutoV920 cl0 : 0 - ExynosAutoV920 cl1 : 1 cnt_en_bit: - ExynosAutoV920 cl0 : 8 - ExynosAutoV920 cl1 : 8 Signed-off-by: Kyunghwan Seo Signed-off-by: Sangwook Shin Reviewed-by: Alim Akhtar Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250213004104.3881711-1-sw617.shin@samsung.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/s3c2410_wdt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 30450e99e5e9..bdd81d8074b2 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -72,6 +72,8 @@ #define EXYNOS850_CLUSTER1_WDTRESET_BIT 23 #define EXYNOSAUTOV9_CLUSTER0_WDTRESET_BIT 25 #define EXYNOSAUTOV9_CLUSTER1_WDTRESET_BIT 24 +#define EXYNOSAUTOV920_CLUSTER0_WDTRESET_BIT 0 +#define EXYNOSAUTOV920_CLUSTER1_WDTRESET_BIT 1 #define GS_CLUSTER0_NONCPU_OUT 0x1220 #define GS_CLUSTER1_NONCPU_OUT 0x1420 @@ -312,9 +314,9 @@ static const struct s3c2410_wdt_variant drv_data_exynosautov920_cl0 = { .mask_bit = 2, .mask_reset_inv = true, .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, - .rst_stat_bit = EXYNOSAUTOV9_CLUSTER0_WDTRESET_BIT, + .rst_stat_bit = EXYNOSAUTOV920_CLUSTER0_WDTRESET_BIT, .cnt_en_reg = EXYNOSAUTOV920_CLUSTER0_NONCPU_OUT, - .cnt_en_bit = 7, + .cnt_en_bit = 8, .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN | QUIRK_HAS_DBGACK_BIT, @@ -325,9 +327,9 @@ static const struct s3c2410_wdt_variant drv_data_exynosautov920_cl1 = { .mask_bit = 2, .mask_reset_inv = true, .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET, - .rst_stat_bit = EXYNOSAUTOV9_CLUSTER1_WDTRESET_BIT, + .rst_stat_bit = EXYNOSAUTOV920_CLUSTER1_WDTRESET_BIT, .cnt_en_reg = EXYNOSAUTOV920_CLUSTER1_NONCPU_OUT, - .cnt_en_bit = 7, + .cnt_en_bit = 8, .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET | QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN | QUIRK_HAS_DBGACK_BIT, From f1c16aa612dcc5b03f5d6f39ca53f791d36850e1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Mar 2025 15:45:35 +0200 Subject: [PATCH 0635/2157] watchdog: nic7018_wdt: tidy up ACPI ID table Tidy up ACPI ID table: - drop ACPI_PTR() and hence replace acpi.h with mod_devicetable.h - remove explicit driver_data initializer - drop comma in the terminator entry With that done, extend compile test coverage. Signed-off-by: Andy Shevchenko Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250304140114.1812452-1-andriy.shevchenko@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 3 ++- drivers/watchdog/nic7018_wdt.c | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 123d1f564f7c..0d8d37f712e8 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1743,7 +1743,8 @@ config NI903X_WDT config NIC7018_WDT tristate "NIC7018 Watchdog" - depends on X86 && ACPI + depends on HAS_IOPORT + depends on ACPI || COMPILE_TEST select WATCHDOG_CORE help Support for National Instruments NIC7018 Watchdog. diff --git a/drivers/watchdog/nic7018_wdt.c b/drivers/watchdog/nic7018_wdt.c index 44982b37ba6f..44b5298f599a 100644 --- a/drivers/watchdog/nic7018_wdt.c +++ b/drivers/watchdog/nic7018_wdt.c @@ -3,12 +3,13 @@ * Copyright (C) 2016 National Instruments Corp. */ -#include #include #include #include +#include #include #include +#include #include #define LOCK 0xA5 @@ -229,8 +230,8 @@ static void nic7018_remove(struct platform_device *pdev) } static const struct acpi_device_id nic7018_device_ids[] = { - {"NIC7018", 0}, - {"", 0}, + { "NIC7018" }, + { } }; MODULE_DEVICE_TABLE(acpi, nic7018_device_ids); @@ -239,7 +240,7 @@ static struct platform_driver watchdog_driver = { .remove = nic7018_remove, .driver = { .name = KBUILD_MODNAME, - .acpi_match_table = ACPI_PTR(nic7018_device_ids), + .acpi_match_table = nic7018_device_ids, }, }; From d127d9ce2c8ee87d51fdcfff7895661a3c06fb24 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 7 Mar 2025 10:44:24 -0500 Subject: [PATCH 0636/2157] dt-bindings: watchdog: fsl-imx7ulp-wdt: Add i.MX94 support Add compatible string "fsl,imx94-wdt" for the i.MX94 chip, which is backward compatible with i.MX93. Set it to fall back to "fsl,imx93-wdt". Signed-off-by: Frank Li Acked-by: Conor Dooley Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250307154424.2613795-1-Frank.Li@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml index a09686b3030d..6ec391b9723a 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml @@ -22,6 +22,10 @@ properties: - const: fsl,imx8ulp-wdt - const: fsl,imx7ulp-wdt - const: fsl,imx93-wdt + - items: + - enum: + - fsl,imx94-wdt + - const: fsl,imx93-wdt reg: maxItems: 1 From 3641c6392695b0846e80a4c1245d7139c8ed7d48 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:00 +0800 Subject: [PATCH 0637/2157] Documentation: driver: add SoundWire BRA description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Bulk Register Access protocol was left as a TODO topic since 2018. It's time to document this protocol and the design of its Linux support. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Reviewed-by: Bagas Sanjaya Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- Documentation/driver-api/soundwire/bra.rst | 336 ++++++++++++++++++ .../driver-api/soundwire/bra_cadence.rst | 66 ++++ Documentation/driver-api/soundwire/index.rst | 2 + .../driver-api/soundwire/summary.rst | 8 - 4 files changed, 404 insertions(+), 8 deletions(-) create mode 100644 Documentation/driver-api/soundwire/bra.rst create mode 100644 Documentation/driver-api/soundwire/bra_cadence.rst diff --git a/Documentation/driver-api/soundwire/bra.rst b/Documentation/driver-api/soundwire/bra.rst new file mode 100644 index 000000000000..8500253fa3e8 --- /dev/null +++ b/Documentation/driver-api/soundwire/bra.rst @@ -0,0 +1,336 @@ +========================== +Bulk Register Access (BRA) +========================== + +Conventions +----------- + +Capitalized words used in this documentation are intentional and refer +to concepts of the SoundWire 1.x specification. + +Introduction +------------ + +The SoundWire 1.x specification provides a mechanism to speed-up +command/control transfers by reclaiming parts of the audio +bandwidth. The Bulk Register Access (BRA) protocol is a standard +solution based on the Bulk Payload Transport (BPT) definitions. + +The regular control channel uses Column 0 and can only send/retrieve +one byte per frame with write/read commands. With a typical 48kHz +frame rate, only 48kB/s can be transferred. + +The optional Bulk Register Access capability can transmit up to 12 +Mbits/s and reduce transfer times by several orders of magnitude, but +has multiple design constraints: + + (1) Each frame can only support a read or a write transfer, with a + 10-byte overhead per frame (header and footer response). + + (2) The read/writes SHALL be from/to contiguous register addresses + in the same frame. A fragmented register space decreases the + efficiency of the protocol by requiring multiple BRA transfers + scheduled in different frames. + + (3) The targeted Peripheral device SHALL support the optional Data + Port 0, and likewise the Manager SHALL expose audio-like Ports + to insert BRA packets in the audio payload using the concepts of + Sample Interval, HSTART, HSTOP, etc. + + (4) The BRA transport efficiency depends on the available + bandwidth. If there are no on-going audio transfers, the entire + frame minus Column 0 can be reclaimed for BRA. The frame shape + also impacts efficiency: since Column0 cannot be used for + BTP/BRA, the frame should rely on a large number of columns and + minimize the number of rows. The bus clock should be as high as + possible. + + (5) The number of bits transferred per frame SHALL be a multiple of + 8 bits. Padding bits SHALL be inserted if necessary at the end + of the data. + + (6) The regular read/write commands can be issued in parallel with + BRA transfers. This is convenient to e.g. deal with alerts, jack + detection or change the volume during firmware download, but + accessing the same address with two independent protocols has to + be avoided to avoid undefined behavior. + + (7) Some implementations may not be capable of handling the + bandwidth of the BRA protocol, e.g. in the case of a slow I2C + bus behind the SoundWire IP. In this case, the transfers may + need to be spaced in time or flow-controlled. + + (8) Each BRA packet SHALL be marked as 'Active' when valid data is + to be transmitted. This allows for software to allocate a BRA + stream but not transmit/discard data while processing the + results or preparing the next batch of data, or allowing the + peripheral to deal with the previous transfer. In addition BRA + transfer can be started early on without data being ready. + + (9) Up to 470 bytes may be transmitted per frame. + + (10) The address is represented with 32 bits and does not rely on + the paging registers used for the regular command/control + protocol in Column 0. + + +Error checking +-------------- + +Firmware download is one of the key usages of the Bulk Register Access +protocol. To make sure the binary data integrity is not compromised by +transmission or programming errors, each BRA packet provides: + + (1) A CRC on the 7-byte header. This CRC helps the Peripheral Device + check if it is addressed and set the start address and number of + bytes. The Peripheral Device provides a response in Byte 7. + + (2) A CRC on the data block (header excluded). This CRC is + transmitted as the last-but-one byte in the packet, prior to the + footer response. + +The header response can be one of: + (a) Ack + (b) Nak + (c) Not Ready + +The footer response can be one of: + (1) Ack + (2) Nak (CRC failure) + (3) Good (operation completed) + (4) Bad (operation failed) + +Example frame +------------- + +The example below is not to scale and makes simplifying assumptions +for clarity. The different chunks in the BRA packets are not required +to start on a new SoundWire Row, and the scale of data may vary. + + :: + + +---+--------------------------------------------+ + + | | + + | BRA HEADER | + + | | + + +--------------------------------------------+ + + C | HEADER CRC | + + O +--------------------------------------------+ + + M | HEADER RESPONSE | + + M +--------------------------------------------+ + + A | | + + N | | + + D | DATA | + + | | + + | | + + | | + + +--------------------------------------------+ + + | DATA CRC | + + +--------------------------------------------+ + + | FOOTER RESPONSE | + +---+--------------------------------------------+ + + +Assuming the frame uses N columns, the configuration shown above can +be programmed by setting the DP0 registers as: + + - HSTART = 1 + - HSTOP = N - 1 + - Sampling Interval = N + - WordLength = N - 1 + +Addressing restrictions +----------------------- + +The Device Number specified in the Header follows the SoundWire +definitions, and broadcast and group addressing are permitted. For now +the Linux implementation only allows for a single BPT transfer to a +single device at a time. This might be revisited at a later point as +an optimization to send the same firmware to multiple devices, but +this would only be beneficial for single-link solutions. + +In the case of multiple Peripheral devices attached to different +Managers, the broadcast and group addressing is not supported by the +SoundWire specification. Each device must be handled with separate BRA +streams, possibly in parallel - the links are really independent. + +Unsupported features +-------------------- + +The Bulk Register Access specification provides a number of +capabilities that are not supported in known implementations, such as: + + (1) Transfers initiated by a Peripheral Device. The BRA Initiator is + always the Manager Device. + + (2) Flow-control capabilities and retransmission based on the + 'NotReady' header response require extra buffering in the + SoundWire IP and are not implemented. + +Bi-directional handling +----------------------- + +The BRA protocol can handle writes as well as reads, and in each +packet the header and footer response are provided by the Peripheral +Target device. On the Peripheral device, the BRA protocol is handled +by a single DP0 data port, and at the low-level the bus ownership can +will change for header/footer response as well as the data transmitted +during a read. + +On the host side, most implementations rely on a Port-like concept, +with two FIFOs consuming/generating data transfers in parallel +(Host->Peripheral and Peripheral->Host). The amount of data +consumed/produced by these FIFOs is not symmetrical, as a result +hardware typically inserts markers to help software and hardware +interpret raw data + +Each packet will typically have: + + (1) a 'Start of Packet' indicator. + + (2) an 'End of Packet' indicator. + + (3) a packet identifier to correlate the data requested and + transmitted, and the error status for each frame + +Hardware implementations can check errors at the frame level, and +retry a transfer in case of errors. However, as for the flow-control +case, this requires extra buffering and intelligence in the +hardware. The Linux support assumes that the entire transfer is +cancelled if a single error is detected in one of the responses. + +Abstraction required +~~~~~~~~~~~~~~~~~~~~ + +There are no standard registers or mandatory implementation at the +Manager level, so the low-level BPT/BRA details must be hidden in +Manager-specific code. For example the Cadence IP format above is not +known to the codec drivers. + +Likewise, codec drivers should not have to know the frame size. The +computation of CRC and handling of responses is handled in helpers and +Manager-specific code. + +The host BRA driver may also have restrictions on pages allocated for +DMA, or other host-DSP communication protocols. The codec driver +should not be aware of any of these restrictions, since it might be +reused in combination with different implementations of Manager IPs. + +Concurrency between BRA and regular read/write +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The existing 'nread/nwrite' API already relies on a notion of start +address and number of bytes, so it would be possible to extend this +API with a 'hint' requesting BPT/BRA be used. + +However BRA transfers could be quite long, and the use of a single +mutex for regular read/write and BRA is a show-stopper. Independent +operation of the control/command and BRA transfers is a fundamental +requirement, e.g. to change the volume level with the existing regmap +interface while downloading firmware. The integration must however +ensure that there are no concurrent access to the same address with +the command/control protocol and the BRA protocol. + +In addition, the 'sdw_msg' structure hard-codes support for 16-bit +addresses and paging registers which are irrelevant for BPT/BRA +support based on native 32-bit addresses. A separate API with +'sdw_bpt_msg' makes more sense. + +One possible strategy to speed-up all initialization tasks would be to +start a BRA transfer for firmware download, then deal with all the +"regular" read/writes in parallel with the command channel, and last +to wait for the BRA transfers to complete. This would allow for a +degree of overlap instead of a purely sequential solution. As such, +the BRA API must support async transfers and expose a separate wait +function. + + +Peripheral/bus interface +------------------------ + +The bus interface for BPT/BRA is made of two functions: + + - sdw_bpt_send_async(bpt_message) + + This function sends the data using the Manager + implementation-defined capabilities (typically DMA or IPC + protocol). + + Queueing is currently not supported, the caller + needs to wait for completion of the requested transfer. + + - sdw_bpt_wait() + + This function waits for the entire message provided by the + codec driver in the 'send_async' stage. Intermediate status for + smaller chunks will not be provided back to the codec driver, + only a return code will be provided. + +Regmap use +~~~~~~~~~~ + +Existing codec drivers rely on regmap to download firmware to +Peripherals. regmap exposes an async interface similar to the +send/wait API suggested above, so at a high-level it would seem +natural to combine BRA and regmap. The regmap layer could check if BRA +is available or not, and use a regular read-write command channel in +the latter case. + +The regmap integration will be handled in a second step. + +BRA stream model +---------------- + +For regular audio transfers, the machine driver exposes a dailink +connecting CPU DAI(s) and Codec DAI(s). + +This model is not required BRA support: + + (1) The SoundWire DAIs are mainly wrappers for SoundWire Data + Ports, with possibly some analog or audio conversion + capabilities bolted behind the Data Port. In the context of + BRA, the DP0 is the destination. DP0 registers are standard and + can be programmed blindly without knowing what Peripheral is + connected to each link. In addition, if there are multiple + Peripherals on a link and some of them do not support DP0, the + write commands to program DP0 registers will generate harmless + COMMAND_IGNORED responses that will be wired-ORed with + responses from Peripherals which support DP0. In other words, + the DP0 programming can be done with broadcast commands, and + the information on the Target device can be added only in the + BRA Header. + + (2) At the CPU level, the DAI concept is not useful for BRA; the + machine driver will not create a dailink relying on DP0. The + only concept that is needed is the notion of port. + + (3) The stream concept relies on a set of master_rt and slave_rt + concepts. All of these entities represent ports and not DAIs. + + (4) With the assumption that a single BRA stream is used per link, + that stream can connect master ports as well as all peripheral + DP0 ports. + + (5) BRA transfers only make sense in the context of one + Manager/Link, so the BRA stream handling does not rely on the + concept of multi-link aggregation allowed by regular DAI links. + +Audio DMA support +----------------- + +Some DMAs, such as HDaudio, require an audio format field to be +set. This format is in turn used to define acceptable bursts. BPT/BRA +support is not fully compatible with these definitions in that the +format and bandwidth may vary between read and write commands. + +In addition, on Intel HDaudio Intel platforms the DMAs need to be +programmed with a PCM format matching the bandwidth of the BPT/BRA +transfer. The format is based on 192kHz 32-bit samples, and the number +of channels varies to adjust the bandwidth. The notion of channel is +completely notional since the data is not typical audio +PCM. Programming such channels helps reserve enough bandwidth and adjust +FIFO sizes to avoid xruns. + +Alignment requirements are currently not enforced at the core level +but at the platform-level, e.g. for Intel the data sizes must be +multiples of 32 bytes. diff --git a/Documentation/driver-api/soundwire/bra_cadence.rst b/Documentation/driver-api/soundwire/bra_cadence.rst new file mode 100644 index 000000000000..4560752e8f47 --- /dev/null +++ b/Documentation/driver-api/soundwire/bra_cadence.rst @@ -0,0 +1,66 @@ +Cadence IP BRA support +---------------------- + +Format requirements +~~~~~~~~~~~~~~~~~~~ + +The Cadence IP relies on PDI0 for TX and PDI1 for RX. The data needs +to be formatted with the following conventions: + + (1) all Data is stored in bits 15..0 of the 32-bit PDI FIFOs. + + (2) the start of packet is BIT(31). + + (3) the end of packet is BIT(30). + + (4) A packet ID is stored in bits 19..16. This packet ID is + determined by software and is typically a rolling counter. + + (5) Padding shall be inserted as needed so that the Header CRC, + Header response, Footer CRC, Footer response are always in + Byte0. Padding is inserted by software for writes, and on reads + software shall discard the padding added by the hardware. + +Example format +~~~~~~~~~~~~~~ + +The following table represents the sequence provided to PDI0 for a +write command followed by a read command. + +:: + + +---+---+--------+---------------+---------------+ + + 1 | 0 | ID = 0 | WR HDR[1] | WR HDR[0] | + + | | | WR HDR[3] | WR HDR[2] | + + | | | WR HDR[5] | WR HDR[4] | + + | | | pad | WR HDR CRC | + + | | | WR Data[1] | WR Data[0] | + + | | | WR Data[3] | WR Data[2] | + + | | | WR Data[n-2] | WR Data[n-3] | + + | | | pad | WR Data[n-1] | + + 0 | 1 | | pad | WR Data CRC | + +---+---+--------+---------------+---------------+ + + 1 | 0 | ID = 1 | RD HDR[1] | RD HDR[0] | + + | | | RD HDR[3] | RD HDR[2] | + + | | | RD HDR[5] | RD HDR[4] | + + 0 | 1 | | pad | RD HDR CRC | + +---+---+--------+---------------+---------------+ + + +The table below represents the data received on PDI1 for the same +write command followed by a read command. + +:: + + +---+---+--------+---------------+---------------+ + + 1 | 0 | ID = 0 | pad | WR Hdr Rsp | + + 0 | 1 | | pad | WR Ftr Rsp | + +---+---+--------+---------------+---------------+ + + 1 | 0 | ID = 0 | pad | Rd Hdr Rsp | + + | | | RD Data[1] | RD Data[0] | + + | | | RD Data[3] | RD Data[2] | + + | | | RD HDR[n-2] | RD Data[n-3] | + + | | | pad | RD Data[n-1] | + + | | | pad | RD Data CRC | + + 0 | 1 | | pad | RD Ftr Rsp | + +---+---+--------+---------------+---------------+ diff --git a/Documentation/driver-api/soundwire/index.rst b/Documentation/driver-api/soundwire/index.rst index 234911a0db99..ef8d90dfbdde 100644 --- a/Documentation/driver-api/soundwire/index.rst +++ b/Documentation/driver-api/soundwire/index.rst @@ -9,6 +9,8 @@ SoundWire Documentation stream error_handling locking + bra + bra_cadence .. only:: subproject and html diff --git a/Documentation/driver-api/soundwire/summary.rst b/Documentation/driver-api/soundwire/summary.rst index 01dcb954f6d7..df78053743b5 100644 --- a/Documentation/driver-api/soundwire/summary.rst +++ b/Documentation/driver-api/soundwire/summary.rst @@ -184,14 +184,6 @@ function that provides capabilities information. Bus needs to know a set of Slave capabilities to program Slave registers and to control the Bus reconfigurations. -Future enhancements to be done -============================== - - (1) Bulk Register Access (BRA) transfers. - - - (2) Multiple data lane support. - Links ===== From 3e3ae0c8fccc51021136b192ec88e94a1bc5704c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:01 +0800 Subject: [PATCH 0638/2157] soundwire: cadence: add BTP support for DP0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The register definitions are missing a BULK_ENABLE bitfield which must be set for DP0. In addition, the existing mapping from PDI to Data Port is 1:1. That's fine for PCM streams which are by construction in one direction only. The BTP/BRA protocol is bidirectional and relies on DP0 only, which breaks the 1:1 mapping. DP0 MUST be mapped to both PDI0 and PDI1, with PDI0 taking care of the TX direction and PDI1 of the RX direction. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 68be8ff3f02b..b29929b59510 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -184,6 +184,7 @@ MODULE_PARM_DESC(cdns_mcp_int_mask, "Cadence MCP IntMask"); #define CDNS_PORTCTRL_TEST_FAILED BIT(1) #define CDNS_PORTCTRL_DIRN BIT(7) #define CDNS_PORTCTRL_BANK_INVERT BIT(8) +#define CDNS_PORTCTRL_BULK_ENABLE BIT(16) #define CDNS_PORT_OFFSET 0x80 @@ -1917,13 +1918,20 @@ void sdw_cdns_config_stream(struct sdw_cdns *cdns, if (cdns->bus.params.m_data_mode != SDW_PORT_DATA_MODE_NORMAL) val |= CDNS_PORTCTRL_TEST_FAILED; + } else if (pdi->num == 0 || pdi->num == 1) { + val |= CDNS_PORTCTRL_BULK_ENABLE; } offset = CDNS_PORTCTRL + pdi->num * CDNS_PORT_OFFSET; cdns_updatel(cdns, offset, - CDNS_PORTCTRL_DIRN | CDNS_PORTCTRL_TEST_FAILED, + CDNS_PORTCTRL_DIRN | CDNS_PORTCTRL_TEST_FAILED | + CDNS_PORTCTRL_BULK_ENABLE, val); - val = pdi->num; + /* The DataPort0 needs to be mapped to both PDI0 and PDI1 ! */ + if (pdi->num == 1) + val = 0; + else + val = pdi->num; val |= CDNS_PDI_CONFIG_SOFT_RESET; val |= FIELD_PREP(CDNS_PDI_CONFIG_CHANNEL, (1 << ch) - 1); cdns_writel(cdns, CDNS_PDI_CONFIG(pdi->num), val); From df896e4f7cf5cc3abffb186e2b6815b785500b57 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:02 +0800 Subject: [PATCH 0639/2157] soundwire: extend sdw_stream_type to BPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BPT/BRA need to be special cased, i.e. there's no point in using the bandwidth allocation since the entire frame can be used. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 2d6c30317792..857b670eb9a5 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -150,12 +150,14 @@ enum sdw_dpn_pkg_mode { * * @SDW_STREAM_PCM: PCM data stream * @SDW_STREAM_PDM: PDM data stream + * @SDW_STREAM_BPT: BPT data stream * * spec doesn't define this, but is used in implementation */ enum sdw_stream_type { SDW_STREAM_PCM = 0, SDW_STREAM_PDM = 1, + SDW_STREAM_BPT = 2, }; /** @@ -879,7 +881,7 @@ struct sdw_port_config { * @ch_count: Channel count of the stream * @bps: Number of bits per audio sample * @direction: Data direction - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT */ struct sdw_stream_config { unsigned int frame_rate; @@ -929,7 +931,7 @@ struct sdw_stream_params { * @name: SoundWire stream name * @params: Stream parameters * @state: Current state of the stream - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance From dc90bbefa792031d89fe2af9ad4a6febd6be96a9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:03 +0800 Subject: [PATCH 0640/2157] soundwire: stream: extend sdw_alloc_stream() to take 'type' parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the existing definition of sdw_stream_runtime, the 'type' member is never set and defaults to PCM. To prepare for the BPT/BRA support, we need to special-case streams and make use of the 'type'. No functional change for now, the implicit PCM type is now explicit. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- Documentation/driver-api/soundwire/stream.rst | 2 +- drivers/soundwire/stream.c | 6 ++++-- include/linux/soundwire/sdw.h | 2 +- sound/soc/qcom/sdw.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst index 2a794484f62c..d66201299633 100644 --- a/Documentation/driver-api/soundwire/stream.rst +++ b/Documentation/driver-api/soundwire/stream.rst @@ -291,7 +291,7 @@ per stream. From ASoC DPCM framework, this stream state maybe linked to .. code-block:: c - int sdw_alloc_stream(char * stream_name); + int sdw_alloc_stream(char * stream_name, enum sdw_stream_type type); The SoundWire core provides a sdw_startup_stream() helper function, typically called during a dailink .startup() callback, which performs diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index dd8de88d8b46..ae6d1c767ab9 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1806,12 +1806,13 @@ static int set_stream(struct snd_pcm_substream *substream, * sdw_alloc_stream() - Allocate and return stream runtime * * @stream_name: SoundWire stream name + * @type: stream type (could be PCM ,PDM or BPT) * * Allocates a SoundWire stream runtime instance. * sdw_alloc_stream should be called only once per stream. Typically * invoked from ALSA/ASoC machine/platform driver. */ -struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name) +struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name, enum sdw_stream_type type) { struct sdw_stream_runtime *stream; @@ -1823,6 +1824,7 @@ struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name) INIT_LIST_HEAD(&stream->master_list); stream->state = SDW_STREAM_ALLOCATED; stream->m_rt_count = 0; + stream->type = type; return stream; } @@ -1851,7 +1853,7 @@ int sdw_startup_stream(void *sdw_substream) if (!name) return -ENOMEM; - sdw_stream = sdw_alloc_stream(name); + sdw_stream = sdw_alloc_stream(name, SDW_STREAM_PCM); if (!sdw_stream) { dev_err(rtd->dev, "alloc stream failed for substream DAI %s\n", substream->name); ret = -ENOMEM; diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 857b670eb9a5..ba58e2a52322 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1019,7 +1019,7 @@ struct sdw_bus { unsigned int lane_used_bandwidth[SDW_MAX_LANES]; }; -struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); +struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name, enum sdw_stream_type type); void sdw_release_stream(struct sdw_stream_runtime *stream); int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream); diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c index f2eda2ff46c0..875da4adb1d7 100644 --- a/sound/soc/qcom/sdw.c +++ b/sound/soc/qcom/sdw.c @@ -27,7 +27,7 @@ int qcom_snd_sdw_startup(struct snd_pcm_substream *substream) struct snd_soc_dai *codec_dai; int ret, i; - sruntime = sdw_alloc_stream(cpu_dai->name); + sruntime = sdw_alloc_stream(cpu_dai->name, SDW_STREAM_PCM); if (!sruntime) return -ENOMEM; From 00f57195f10fa7e2fa2ceeac0b2768ae544fee2e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:04 +0800 Subject: [PATCH 0641/2157] soundwire: stream: special-case the bus compute_params() routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For BPT support, we want to allocate the entire audio payload and bypass the allocation based on PCM/PDM parameters. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- .../soundwire/generic_bandwidth_allocation.c | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index 59965f43c2fb..b646c2ffe84a 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -86,6 +86,49 @@ void sdw_compute_slave_ports(struct sdw_master_runtime *m_rt, } EXPORT_SYMBOL(sdw_compute_slave_ports); +static void sdw_compute_dp0_slave_ports(struct sdw_master_runtime *m_rt) +{ + struct sdw_bus *bus = m_rt->bus; + struct sdw_slave_runtime *s_rt; + struct sdw_port_runtime *p_rt; + + list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) { + list_for_each_entry(p_rt, &s_rt->port_list, port_node) { + sdw_fill_xport_params(&p_rt->transport_params, p_rt->num, false, + SDW_BLK_GRP_CNT_1, bus->params.col, 0, 0, 1, + bus->params.col - 1, SDW_BLK_PKG_PER_PORT, 0x0); + + sdw_fill_port_params(&p_rt->port_params, p_rt->num, bus->params.col - 1, + SDW_PORT_FLOW_MODE_ISOCH, SDW_PORT_DATA_MODE_NORMAL); + } + } +} + +static void sdw_compute_dp0_master_ports(struct sdw_master_runtime *m_rt) +{ + struct sdw_port_runtime *p_rt; + struct sdw_bus *bus = m_rt->bus; + + list_for_each_entry(p_rt, &m_rt->port_list, port_node) { + sdw_fill_xport_params(&p_rt->transport_params, p_rt->num, false, + SDW_BLK_GRP_CNT_1, bus->params.col, 0, 0, 1, + bus->params.col - 1, SDW_BLK_PKG_PER_PORT, 0x0); + + sdw_fill_port_params(&p_rt->port_params, p_rt->num, bus->params.col - 1, + SDW_PORT_FLOW_MODE_ISOCH, SDW_PORT_DATA_MODE_NORMAL); + } +} + +static void sdw_compute_dp0_port_params(struct sdw_bus *bus) +{ + struct sdw_master_runtime *m_rt; + + list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + sdw_compute_dp0_master_ports(m_rt); + sdw_compute_dp0_slave_ports(m_rt); + } +} + static void sdw_compute_master_ports(struct sdw_master_runtime *m_rt, struct sdw_group_params *params, int *port_bo, int hstop) @@ -618,6 +661,11 @@ int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream) if (ret < 0) return ret; + if (stream->type == SDW_STREAM_BPT) { + sdw_compute_dp0_port_params(bus); + return 0; + } + /* Compute transport and port params */ ret = sdw_compute_port_params(bus, stream); if (ret < 0) { From b422b7237ead30bfb90f52c7563ef518a5849cd9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:05 +0800 Subject: [PATCH 0642/2157] soundwire: stream: reuse existing code for BPT stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP0 (Data Port 0) is very similar to regular data ports, with minor tweaks we can reuse the same code. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-7-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 104 ++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 31 deletions(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index ae6d1c767ab9..d29d85d809c8 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -88,11 +88,14 @@ static int _sdw_program_slave_port_params(struct sdw_bus *bus, return ret; } - /* Program DPN_BlockCtrl3 register */ - ret = sdw_write_no_pm(slave, addr2, t_params->blk_pkg_mode); - if (ret < 0) { - dev_err(bus->dev, "DPN_BlockCtrl3 register write failed\n"); - return ret; + /* DP0 does not implement BlockCtrl3 */ + if (t_params->port_num) { + /* Program DPN_BlockCtrl3 register */ + ret = sdw_write_no_pm(slave, addr2, t_params->blk_pkg_mode); + if (ret < 0) { + dev_err(bus->dev, "DPN_BlockCtrl3 register write failed\n"); + return ret; + } } /* @@ -131,18 +134,28 @@ static int sdw_program_slave_port_params(struct sdw_bus *bus, struct sdw_port_params *p_params = &p_rt->port_params; struct sdw_slave_prop *slave_prop = &s_rt->slave->prop; u32 addr1, addr2, addr3, addr4, addr5, addr6; - struct sdw_dpn_prop *dpn_prop; + enum sdw_dpn_type port_type; + bool read_only_wordlength; int ret; u8 wbuf; if (s_rt->slave->is_mockup_device) return 0; - dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave, - s_rt->direction, - t_params->port_num); - if (!dpn_prop) - return -EINVAL; + if (t_params->port_num) { + struct sdw_dpn_prop *dpn_prop; + + dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave, s_rt->direction, + t_params->port_num); + if (!dpn_prop) + return -EINVAL; + + read_only_wordlength = dpn_prop->read_only_wordlength; + port_type = dpn_prop->type; + } else { + read_only_wordlength = false; + port_type = SDW_DPN_FULL; + } addr1 = SDW_DPN_PORTCTRL(t_params->port_num); addr2 = SDW_DPN_BLOCKCTRL1(t_params->port_num); @@ -172,7 +185,7 @@ static int sdw_program_slave_port_params(struct sdw_bus *bus, return ret; } - if (!dpn_prop->read_only_wordlength) { + if (!read_only_wordlength) { /* Program DPN_BlockCtrl1 register */ ret = sdw_write_no_pm(s_rt->slave, addr2, (p_params->bps - 1)); if (ret < 0) { @@ -224,9 +237,9 @@ static int sdw_program_slave_port_params(struct sdw_bus *bus, } } - if (dpn_prop->type != SDW_DPN_SIMPLE) { + if (port_type != SDW_DPN_SIMPLE) { ret = _sdw_program_slave_port_params(bus, s_rt->slave, - t_params, dpn_prop->type); + t_params, port_type); if (ret < 0) dev_err(&s_rt->slave->dev, "Transport reg write failed for port: %d\n", @@ -433,6 +446,9 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, struct completion *port_ready; struct sdw_dpn_prop *dpn_prop; struct sdw_prepare_ch prep_ch; + u32 imp_def_interrupts; + bool simple_ch_prep_sm; + u32 ch_prep_timeout; bool intr = false; int ret = 0, val; u32 addr; @@ -440,20 +456,35 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, prep_ch.num = p_rt->num; prep_ch.ch_mask = p_rt->ch_mask; - dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave, - s_rt->direction, - prep_ch.num); - if (!dpn_prop) { - dev_err(bus->dev, - "Slave Port:%d properties not found\n", prep_ch.num); - return -EINVAL; + if (p_rt->num) { + dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave, s_rt->direction, prep_ch.num); + if (!dpn_prop) { + dev_err(bus->dev, + "Slave Port:%d properties not found\n", prep_ch.num); + return -EINVAL; + } + + imp_def_interrupts = dpn_prop->imp_def_interrupts; + simple_ch_prep_sm = dpn_prop->simple_ch_prep_sm; + ch_prep_timeout = dpn_prop->ch_prep_timeout; + } else { + struct sdw_dp0_prop *dp0_prop = s_rt->slave->prop.dp0_prop; + + if (!dp0_prop) { + dev_err(bus->dev, + "Slave DP0 properties not found\n"); + return -EINVAL; + } + imp_def_interrupts = dp0_prop->imp_def_interrupts; + simple_ch_prep_sm = dp0_prop->simple_ch_prep_sm; + ch_prep_timeout = dp0_prop->ch_prep_timeout; } prep_ch.prepare = prep; prep_ch.bank = bus->params.next_bank; - if (dpn_prop->imp_def_interrupts || !dpn_prop->simple_ch_prep_sm || + if (imp_def_interrupts || !simple_ch_prep_sm || bus->params.s_data_mode != SDW_PORT_DATA_MODE_NORMAL) intr = true; @@ -464,7 +495,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, */ if (prep && intr) { ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep, - dpn_prop->imp_def_interrupts); + imp_def_interrupts); if (ret < 0) return ret; } @@ -473,7 +504,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, sdw_do_port_prep(s_rt, prep_ch, prep ? SDW_OPS_PORT_PRE_PREP : SDW_OPS_PORT_PRE_DEPREP); /* Prepare Slave port implementing CP_SM */ - if (!dpn_prop->simple_ch_prep_sm) { + if (!simple_ch_prep_sm) { addr = SDW_DPN_PREPARECTRL(p_rt->num); if (prep) @@ -490,7 +521,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, /* Wait for completion on port ready */ port_ready = &s_rt->slave->port_ready[prep_ch.num]; wait_for_completion_timeout(port_ready, - msecs_to_jiffies(dpn_prop->ch_prep_timeout)); + msecs_to_jiffies(ch_prep_timeout)); val = sdw_read_no_pm(s_rt->slave, SDW_DPN_PREPARESTATUS(p_rt->num)); if ((val < 0) || (val & p_rt->ch_mask)) { @@ -507,7 +538,7 @@ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, /* Disable interrupt after Port de-prepare */ if (!prep && intr) ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep, - dpn_prop->imp_def_interrupts); + imp_def_interrupts); return ret; } @@ -1008,7 +1039,8 @@ static int sdw_slave_port_is_valid_range(struct device *dev, int num) static int sdw_slave_port_config(struct sdw_slave *slave, struct sdw_slave_runtime *s_rt, - const struct sdw_port_config *port_config) + const struct sdw_port_config *port_config, + bool is_bpt_stream) { struct sdw_port_runtime *p_rt; int ret; @@ -1020,9 +1052,13 @@ static int sdw_slave_port_config(struct sdw_slave *slave, * TODO: Check valid port range as defined by DisCo/ * slave */ - ret = sdw_slave_port_is_valid_range(&slave->dev, port_config[i].num); - if (ret < 0) - return ret; + if (!is_bpt_stream) { + ret = sdw_slave_port_is_valid_range(&slave->dev, port_config[i].num); + if (ret < 0) + return ret; + } else if (port_config[i].num) { + return -EINVAL; + } ret = sdw_port_config(p_rt, port_config, i); if (ret < 0) @@ -1331,6 +1367,11 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave, u8 num_ports; int i; + if (!port_num) { + dev_err(&slave->dev, "%s: port_num is zero\n", __func__); + return NULL; + } + if (direction == SDW_DATA_DIR_TX) { num_ports = hweight32(slave->prop.source_ports); dpn_prop = slave->prop.src_dpn_prop; @@ -2116,7 +2157,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave, if (ret) goto unlock; - ret = sdw_slave_port_config(slave, s_rt, port_config); + ret = sdw_slave_port_config(slave, s_rt, port_config, + stream->type == SDW_STREAM_BPT); if (ret) goto unlock; From 9a756289ac5a8517dc643555d784d830b61576ad Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:06 +0800 Subject: [PATCH 0643/2157] soundwire: bus: add send_async/wait APIs for BPT protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add definitions and helpers for the BPT/BRA protocol. Peripheral drivers (aka ASoC codec drivers) can use this API to send bulk data such as firmware or tables. The design intent is however NOT to directly use this API but to rely on an intermediate regmap layer. The API is only available when no other audio streams have been allocated, and only one BTP/BRA stream is allowed per link. To avoid the addition of yet another lock, the refcount tests are handled in the stream master_runtime alloc/free routines where the bus_lock is already held. Another benefit of this approach is that the same bus_lock is used to handle runtime and port linked lists, which reduces the potential for misaligned configurations. In addition to exclusion with audio streams, BPT transfers have a lot of overhead, specifically registers writes are needed to enable transport in DP0. Most DMAs don't handle too well very small data sets and they may have alignment limitations. The size and alignment requirements are for now not handled by the core but must be checked by platform-specific drivers. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-8-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 43 +++++++++++++++++++++++++++++++++++ drivers/soundwire/bus.h | 18 +++++++++++++++ drivers/soundwire/stream.c | 30 ++++++++++++++++++++++++ include/linux/soundwire/sdw.h | 23 +++++++++++++++++++ 4 files changed, 114 insertions(+) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 25120aa3bd67..6f8a20014e76 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -2039,3 +2039,46 @@ void sdw_clear_slave_status(struct sdw_bus *bus, u32 request) } } EXPORT_SYMBOL(sdw_clear_slave_status); + +int sdw_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg) +{ + if (msg->len > SDW_BPT_MSG_MAX_BYTES) { + dev_err(bus->dev, "Invalid BPT message length %d\n", msg->len); + return -EINVAL; + } + + /* check device is enumerated */ + if (slave->dev_num == SDW_ENUM_DEV_NUM || + slave->dev_num > SDW_MAX_DEVICES) { + dev_err(&slave->dev, "Invalid device number %d\n", slave->dev_num); + return -ENODEV; + } + + /* make sure all callbacks are defined */ + if (!bus->ops->bpt_send_async || + !bus->ops->bpt_wait) { + dev_err(bus->dev, "BPT callbacks not defined\n"); + return -EOPNOTSUPP; + } + + return bus->ops->bpt_send_async(bus, slave, msg); +} +EXPORT_SYMBOL(sdw_bpt_send_async); + +int sdw_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg) +{ + return bus->ops->bpt_wait(bus, slave, msg); +} +EXPORT_SYMBOL(sdw_bpt_wait); + +int sdw_bpt_send_sync(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg) +{ + int ret; + + ret = sdw_bpt_send_async(bus, slave, msg); + if (ret < 0) + return ret; + + return sdw_bpt_wait(bus, slave, msg); +} +EXPORT_SYMBOL(sdw_bpt_send_sync); diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h index fc990171b3f7..02651fbb683a 100644 --- a/drivers/soundwire/bus.h +++ b/drivers/soundwire/bus.h @@ -72,6 +72,24 @@ struct sdw_msg { bool page; }; +/** + * struct sdw_btp_msg - Message structure + * @addr: Start Register address accessed in the Slave + * @len: number of bytes to transfer. More than 64Kb can be transferred + * but a practical limit of SDW_BPT_MSG_MAX_BYTES is enforced. + * @dev_num: Slave device number + * @flags: transfer flags, indicate if xfer is read or write + * @buf: message data buffer (filled by host for write, filled + * by Peripheral hardware for reads) + */ +struct sdw_bpt_msg { + u32 addr; + u32 len; + u8 dev_num; + u8 flags; + u8 *buf; +}; + #define SDW_DOUBLE_RATE_FACTOR 2 #define SDW_STRM_RATE_GROUPING 1 diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index d29d85d809c8..a4bea742b5d9 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1227,6 +1227,20 @@ static struct sdw_master_runtime struct sdw_master_runtime *m_rt, *walk_m_rt; struct list_head *insert_after; + if (stream->type == SDW_STREAM_BPT) { + if (bus->stream_refcount > 0 || bus->bpt_stream_refcount > 0) { + dev_err(bus->dev, "%s: %d/%d audio/BPT stream already allocated\n", + __func__, bus->stream_refcount, bus->bpt_stream_refcount); + return ERR_PTR(-EBUSY); + } + } else { + if (bus->bpt_stream_refcount > 0) { + dev_err(bus->dev, "%s: BPT stream already allocated\n", + __func__); + return ERR_PTR(-EAGAIN); + } + } + m_rt = kzalloc(sizeof(*m_rt), GFP_KERNEL); if (!m_rt) return NULL; @@ -1255,6 +1269,8 @@ static struct sdw_master_runtime m_rt->stream = stream; bus->stream_refcount++; + if (stream->type == SDW_STREAM_BPT) + bus->bpt_stream_refcount++; return m_rt; } @@ -1303,6 +1319,8 @@ static void sdw_master_rt_free(struct sdw_master_runtime *m_rt, list_del(&m_rt->bus_node); kfree(m_rt); + if (stream->type == SDW_STREAM_BPT) + bus->bpt_stream_refcount--; bus->stream_refcount--; } @@ -2001,6 +2019,12 @@ int sdw_stream_add_master(struct sdw_bus *bus, m_rt = sdw_master_rt_find(bus, stream); if (!m_rt) { m_rt = sdw_master_rt_alloc(bus, stream); + if (IS_ERR(m_rt)) { + ret = PTR_ERR(m_rt); + dev_err(bus->dev, "%s: Master runtime alloc failed for stream:%s: %d\n", + __func__, stream->name, ret); + goto unlock; + } if (!m_rt) { dev_err(bus->dev, "%s: Master runtime alloc failed for stream:%s\n", __func__, stream->name); @@ -2116,6 +2140,12 @@ int sdw_stream_add_slave(struct sdw_slave *slave, * So, allocate m_rt and add Slave to it. */ m_rt = sdw_master_rt_alloc(slave->bus, stream); + if (IS_ERR(m_rt)) { + ret = PTR_ERR(m_rt); + dev_err(&slave->dev, "%s: Master runtime alloc failed for stream:%s: %d\n", + __func__, stream->name, ret); + goto unlock; + } if (!m_rt) { dev_err(&slave->dev, "%s: Master runtime alloc failed for stream:%s\n", __func__, stream->name); diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index ba58e2a52322..69f3e700796d 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -824,6 +824,15 @@ struct sdw_defer { struct completion complete; }; +/* + * Add a practical limit to BPT transfer sizes. BPT is typically used + * to transfer firmware, and larger firmware transfers will increase + * the cold latency beyond typical OS or user requirements. + */ +#define SDW_BPT_MSG_MAX_BYTES (1024 * 1024) + +struct sdw_bpt_msg; + /** * struct sdw_master_ops - Master driver ops * @read_prop: Read Master properties @@ -839,6 +848,10 @@ struct sdw_defer { * @get_device_num: Callback for vendor-specific device_number allocation * @put_device_num: Callback for vendor-specific device_number release * @new_peripheral_assigned: Callback to handle enumeration of new peripheral. + * @bpt_send_async: reserve resources for BPT stream and send message + * using BTP protocol + * @bpt_wait: wait for message completion using BTP protocol + * and release resources */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); @@ -855,6 +868,9 @@ struct sdw_master_ops { void (*new_peripheral_assigned)(struct sdw_bus *bus, struct sdw_slave *slave, int dev_num); + int (*bpt_send_async)(struct sdw_bus *bus, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, @@ -961,6 +977,8 @@ struct sdw_stream_runtime { * @defer_msg: Defer message * @params: Current bus parameters * @stream_refcount: number of streams currently using this bus + * @btp_stream_refcount: number of BTP streams currently using this bus (should + * be zero or one, multiple streams per link is not supported). * @ops: Master callback ops * @port_ops: Master port callback ops * @prop: Master properties @@ -998,6 +1016,7 @@ struct sdw_bus { struct sdw_defer defer_msg; struct sdw_bus_params params; int stream_refcount; + int bpt_stream_refcount; const struct sdw_master_ops *ops; const struct sdw_master_port_ops *port_ops; struct sdw_master_prop prop; @@ -1045,6 +1064,10 @@ int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave); +int sdw_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_send_sync(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); + #if IS_ENABLED(CONFIG_SOUNDWIRE) int sdw_stream_add_slave(struct sdw_slave *slave, From 8e4a239b403bd8aed8787798de8e4e42f79246c2 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:07 +0800 Subject: [PATCH 0644/2157] soundwire: bus: add bpt_stream pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a convenience pointer to the 'sdw_bus' structure. BPT is a dedicated stream which will typically not be handled by DAIs or dailinks. Since there's only one BPT stream per link, storing the pointer at the link level seems rather natural. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-9-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 69f3e700796d..2362f621d94c 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -979,6 +979,7 @@ struct sdw_stream_runtime { * @stream_refcount: number of streams currently using this bus * @btp_stream_refcount: number of BTP streams currently using this bus (should * be zero or one, multiple streams per link is not supported). + * @bpt_stream: pointer stored to handle BTP streams. * @ops: Master callback ops * @port_ops: Master port callback ops * @prop: Master properties @@ -1017,6 +1018,7 @@ struct sdw_bus { struct sdw_bus_params params; int stream_refcount; int bpt_stream_refcount; + struct sdw_stream_runtime *bpt_stream; const struct sdw_master_ops *ops; const struct sdw_master_port_ops *port_ops; struct sdw_master_prop prop; From 8eb5d7ade8b1ed1678cdc5340ef3f6d346eed9be Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:08 +0800 Subject: [PATCH 0645/2157] soundwire: cadence: add BTP/BRA helpers to format data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Cadence IP expects a specific format (detailed in the Documentation). Add helpers to copy the data into the DMA buffer. The crc8 table is for now only used by the Cadence driver. This table might be moved to a common module at a later point if needed by other controller implementations. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-10-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/Kconfig | 1 + drivers/soundwire/cadence_master.c | 634 +++++++++++++++++++++++++++++ drivers/soundwire/cadence_master.h | 20 + 3 files changed, 655 insertions(+) diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig index f66f869dff2e..ad56393e4c93 100644 --- a/drivers/soundwire/Kconfig +++ b/drivers/soundwire/Kconfig @@ -31,6 +31,7 @@ config SOUNDWIRE_AMD config SOUNDWIRE_CADENCE tristate + select CRC8 config SOUNDWIRE_INTEL tristate "Intel SoundWire Master driver" diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index b29929b59510..21bb491d026b 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -1976,5 +1977,638 @@ struct sdw_cdns_pdi *sdw_cdns_alloc_pdi(struct sdw_cdns *cdns, } EXPORT_SYMBOL(sdw_cdns_alloc_pdi); +/* + * the MIPI SoundWire CRC8 polynomial is X^8 + X^6 + X^3 + X^2 + 1, MSB first + * The value is (1)01001101 = 0x4D + * + * the table below was generated with + * + * u8 crc8_lookup_table[CRC8_TABLE_SIZE]; + * crc8_populate_msb(crc8_lookup_table, SDW_CRC8_POLY); + * + */ +#define SDW_CRC8_SEED 0xFF +#define SDW_CRC8_POLY 0x4D + +static const u8 sdw_crc8_lookup_msb[CRC8_TABLE_SIZE] = { + 0x00, 0x4d, 0x9a, 0xd7, 0x79, 0x34, 0xe3, 0xae, /* 0 - 7 */ + 0xf2, 0xbf, 0x68, 0x25, 0x8b, 0xc6, 0x11, 0x5c, /* 8 -15 */ + 0xa9, 0xe4, 0x33, 0x7e, 0xd0, 0x9d, 0x4a, 0x07, /* 16 - 23 */ + 0x5b, 0x16, 0xc1, 0x8c, 0x22, 0x6f, 0xb8, 0xf5, /* 24 - 31 */ + 0x1f, 0x52, 0x85, 0xc8, 0x66, 0x2b, 0xfc, 0xb1, /* 32 - 39 */ + 0xed, 0xa0, 0x77, 0x3a, 0x94, 0xd9, 0x0e, 0x43, /* 40 - 47 */ + 0xb6, 0xfb, 0x2c, 0x61, 0xcf, 0x82, 0x55, 0x18, /* 48 - 55 */ + 0x44, 0x09, 0xde, 0x93, 0x3d, 0x70, 0xa7, 0xea, /* 56 - 63 */ + 0x3e, 0x73, 0xa4, 0xe9, 0x47, 0x0a, 0xdd, 0x90, /* 64 - 71 */ + 0xcc, 0x81, 0x56, 0x1b, 0xb5, 0xf8, 0x2f, 0x62, /* 72 - 79 */ + 0x97, 0xda, 0x0d, 0x40, 0xee, 0xa3, 0x74, 0x39, /* 80 - 87 */ + 0x65, 0x28, 0xff, 0xb2, 0x1c, 0x51, 0x86, 0xcb, /* 88 - 95 */ + 0x21, 0x6c, 0xbb, 0xf6, 0x58, 0x15, 0xc2, 0x8f, /* 96 - 103 */ + 0xd3, 0x9e, 0x49, 0x04, 0xaa, 0xe7, 0x30, 0x7d, /* 104 - 111 */ + 0x88, 0xc5, 0x12, 0x5f, 0xf1, 0xbc, 0x6b, 0x26, /* 112 - 119 */ + 0x7a, 0x37, 0xe0, 0xad, 0x03, 0x4e, 0x99, 0xd4, /* 120 - 127 */ + 0x7c, 0x31, 0xe6, 0xab, 0x05, 0x48, 0x9f, 0xd2, /* 128 - 135 */ + 0x8e, 0xc3, 0x14, 0x59, 0xf7, 0xba, 0x6d, 0x20, /* 136 - 143 */ + 0xd5, 0x98, 0x4f, 0x02, 0xac, 0xe1, 0x36, 0x7b, /* 144 - 151 */ + 0x27, 0x6a, 0xbd, 0xf0, 0x5e, 0x13, 0xc4, 0x89, /* 152 - 159 */ + 0x63, 0x2e, 0xf9, 0xb4, 0x1a, 0x57, 0x80, 0xcd, /* 160 - 167 */ + 0x91, 0xdc, 0x0b, 0x46, 0xe8, 0xa5, 0x72, 0x3f, /* 168 - 175 */ + 0xca, 0x87, 0x50, 0x1d, 0xb3, 0xfe, 0x29, 0x64, /* 176 - 183 */ + 0x38, 0x75, 0xa2, 0xef, 0x41, 0x0c, 0xdb, 0x96, /* 184 - 191 */ + 0x42, 0x0f, 0xd8, 0x95, 0x3b, 0x76, 0xa1, 0xec, /* 192 - 199 */ + 0xb0, 0xfd, 0x2a, 0x67, 0xc9, 0x84, 0x53, 0x1e, /* 200 - 207 */ + 0xeb, 0xa6, 0x71, 0x3c, 0x92, 0xdf, 0x08, 0x45, /* 208 - 215 */ + 0x19, 0x54, 0x83, 0xce, 0x60, 0x2d, 0xfa, 0xb7, /* 216 - 223 */ + 0x5d, 0x10, 0xc7, 0x8a, 0x24, 0x69, 0xbe, 0xf3, /* 224 - 231 */ + 0xaf, 0xe2, 0x35, 0x78, 0xd6, 0x9b, 0x4c, 0x01, /* 232 - 239 */ + 0xf4, 0xb9, 0x6e, 0x23, 0x8d, 0xc0, 0x17, 0x5a, /* 240 - 247 */ + 0x06, 0x4b, 0x9c, 0xd1, 0x7f, 0x32, 0xe5, 0xa8 /* 248 - 255 */ +}; + +/* BPT/BRA helpers */ + +#define SDW_CDNS_BRA_HDR 6 /* defined by MIPI */ +#define SDW_CDNS_BRA_HDR_CRC 1 /* defined by MIPI */ +#define SDW_CDNS_BRA_HDR_CRC_PAD 1 /* Cadence only */ +#define SDW_CDNS_BRA_HDR_RESP 1 /* defined by MIPI */ +#define SDW_CDNS_BRA_HDR_RESP_PAD 1 /* Cadence only */ + +#define SDW_CDNS_BRA_DATA_PAD 1 /* Cadence only */ +#define SDW_CDNS_BRA_DATA_CRC 1 /* defined by MIPI */ +#define SDW_CDNS_BRA_DATA_CRC_PAD 1 /* Cadence only */ + +#define SDW_CDNS_BRA_FOOTER_RESP 1 /* defined by MIPI */ +#define SDW_CDNS_BRA_FOOTER_RESP_PAD 1 /* Cadence only */ + +#define SDW_CDNS_WRITE_PDI1_BUFFER_SIZE \ + ((SDW_CDNS_BRA_HDR_RESP + SDW_CDNS_BRA_HDR_RESP_PAD + \ + SDW_CDNS_BRA_FOOTER_RESP + SDW_CDNS_BRA_FOOTER_RESP_PAD) * 2) + +#define SDW_CDNS_READ_PDI0_BUFFER_SIZE \ + ((SDW_CDNS_BRA_HDR + SDW_CDNS_BRA_HDR_CRC + SDW_CDNS_BRA_HDR_CRC_PAD) * 2) + +static unsigned int sdw_cdns_bra_actual_data_size(unsigned int allocated_bytes_per_frame) +{ + unsigned int total; + + if (allocated_bytes_per_frame < (SDW_CDNS_BRA_HDR + SDW_CDNS_BRA_HDR_CRC + + SDW_CDNS_BRA_HDR_RESP + SDW_CDNS_BRA_DATA_CRC + + SDW_CDNS_BRA_FOOTER_RESP)) + return 0; + + total = allocated_bytes_per_frame - SDW_CDNS_BRA_HDR - SDW_CDNS_BRA_HDR_CRC - + SDW_CDNS_BRA_HDR_RESP - SDW_CDNS_BRA_DATA_CRC - SDW_CDNS_BRA_FOOTER_RESP; + + return total; +} + +static unsigned int sdw_cdns_write_pdi0_buffer_size(unsigned int actual_data_size) +{ + unsigned int total; + + total = SDW_CDNS_BRA_HDR + SDW_CDNS_BRA_HDR_CRC + SDW_CDNS_BRA_HDR_CRC_PAD; + + total += actual_data_size; + if (actual_data_size & 1) + total += SDW_CDNS_BRA_DATA_PAD; + + total += SDW_CDNS_BRA_DATA_CRC + SDW_CDNS_BRA_DATA_CRC_PAD; + + return total * 2; +} + +static unsigned int sdw_cdns_read_pdi1_buffer_size(unsigned int actual_data_size) +{ + unsigned int total; + + total = SDW_CDNS_BRA_HDR_RESP + SDW_CDNS_BRA_HDR_RESP_PAD; + + total += actual_data_size; + if (actual_data_size & 1) + total += SDW_CDNS_BRA_DATA_PAD; + + total += SDW_CDNS_BRA_HDR_CRC + SDW_CDNS_BRA_HDR_CRC_PAD; + + total += SDW_CDNS_BRA_FOOTER_RESP + SDW_CDNS_BRA_FOOTER_RESP_PAD; + + return total * 2; +} + +int sdw_cdns_bpt_find_buffer_sizes(int command, /* 0: write, 1: read */ + int row, int col, unsigned int data_bytes, + unsigned int requested_bytes_per_frame, + unsigned int *data_per_frame, unsigned int *pdi0_buffer_size, + unsigned int *pdi1_buffer_size, unsigned int *num_frames) +{ + unsigned int bpt_bits = row * (col - 1); + unsigned int bpt_bytes = bpt_bits >> 3; + unsigned int actual_bpt_bytes; + unsigned int pdi0_tx_size; + unsigned int pdi1_rx_size; + unsigned int remainder; + + if (!data_bytes) + return -EINVAL; + + actual_bpt_bytes = sdw_cdns_bra_actual_data_size(bpt_bytes); + if (!actual_bpt_bytes) + return -EINVAL; + + if (data_bytes < actual_bpt_bytes) + actual_bpt_bytes = data_bytes; + + /* + * the caller may want to set the number of bytes per frame, + * allow when possible + */ + if (requested_bytes_per_frame < actual_bpt_bytes) + actual_bpt_bytes = requested_bytes_per_frame; + + *data_per_frame = actual_bpt_bytes; + + if (command == 0) { + /* + * for writes we need to send all the data_bytes per frame, + * even for the last frame which may only transport fewer bytes + */ + + *num_frames = DIV_ROUND_UP(data_bytes, actual_bpt_bytes); + + pdi0_tx_size = sdw_cdns_write_pdi0_buffer_size(actual_bpt_bytes); + pdi1_rx_size = SDW_CDNS_WRITE_PDI1_BUFFER_SIZE; + + *pdi0_buffer_size = pdi0_tx_size * *num_frames; + *pdi1_buffer_size = pdi1_rx_size * *num_frames; + } else { + /* + * for reads we need to retrieve only what is requested in the BPT + * header, so the last frame needs to be special-cased + */ + *num_frames = data_bytes / actual_bpt_bytes; + + pdi0_tx_size = SDW_CDNS_READ_PDI0_BUFFER_SIZE; + pdi1_rx_size = sdw_cdns_read_pdi1_buffer_size(actual_bpt_bytes); + + *pdi0_buffer_size = pdi0_tx_size * *num_frames; + *pdi1_buffer_size = pdi1_rx_size * *num_frames; + + remainder = data_bytes % actual_bpt_bytes; + if (remainder) { + pdi0_tx_size = SDW_CDNS_READ_PDI0_BUFFER_SIZE; + pdi1_rx_size = sdw_cdns_read_pdi1_buffer_size(remainder); + + *num_frames = *num_frames + 1; + *pdi0_buffer_size += pdi0_tx_size; + *pdi1_buffer_size += pdi1_rx_size; + } + } + + return 0; +} +EXPORT_SYMBOL(sdw_cdns_bpt_find_buffer_sizes); + +static int sdw_cdns_copy_write_data(u8 *data, int data_size, u8 *dma_buffer, int dma_buffer_size) +{ + /* + * the implementation copies the data one byte at a time. Experiments with + * two bytes at a time did not seem to improve the performance + */ + int i, j; + + /* size check to prevent out of bounds access */ + i = data_size - 1; + j = (2 * i) - (i & 1); + if (data_size & 1) + j++; + j += 2; + if (j >= dma_buffer_size) + return -EINVAL; + + /* copy data */ + for (i = 0; i < data_size; i++) { + j = (2 * i) - (i & 1); + dma_buffer[j] = data[i]; + } + /* add required pad */ + if (data_size & 1) + dma_buffer[++j] = 0; + /* skip last two bytes */ + j += 2; + + /* offset and data are off-by-one */ + return j + 1; +} + +static int sdw_cdns_prepare_write_pd0_buffer(u8 *header, unsigned int header_size, + u8 *data, unsigned int data_size, + u8 *dma_buffer, unsigned int dma_buffer_size, + unsigned int *dma_data_written, + unsigned int frame_counter) +{ + int data_written; + u8 *last_byte; + u8 crc; + + *dma_data_written = 0; + + data_written = sdw_cdns_copy_write_data(header, header_size, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer[3] = BIT(7); + dma_buffer[3] |= frame_counter & GENMASK(3, 0); + + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + crc = SDW_CRC8_SEED; + crc = crc8(sdw_crc8_lookup_msb, header, header_size, crc); + + data_written = sdw_cdns_copy_write_data(&crc, 1, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + data_written = sdw_cdns_copy_write_data(data, data_size, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + crc = SDW_CRC8_SEED; + crc = crc8(sdw_crc8_lookup_msb, data, data_size, crc); + data_written = sdw_cdns_copy_write_data(&crc, 1, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + /* tag last byte */ + last_byte = dma_buffer - 1; + last_byte[0] = BIT(6); + + return 0; +} + +static int sdw_cdns_prepare_read_pd0_buffer(u8 *header, unsigned int header_size, + u8 *dma_buffer, unsigned int dma_buffer_size, + unsigned int *dma_data_written, + unsigned int frame_counter) +{ + int data_written; + u8 *last_byte; + u8 crc; + + *dma_data_written = 0; + + data_written = sdw_cdns_copy_write_data(header, header_size, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer[3] = BIT(7); + dma_buffer[3] |= frame_counter & GENMASK(3, 0); + + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + crc = SDW_CRC8_SEED; + crc = crc8(sdw_crc8_lookup_msb, header, header_size, crc); + + data_written = sdw_cdns_copy_write_data(&crc, 1, dma_buffer, dma_buffer_size); + if (data_written < 0) + return data_written; + dma_buffer += data_written; + dma_buffer_size -= data_written; + *dma_data_written += data_written; + + /* tag last byte */ + last_byte = dma_buffer - 1; + last_byte[0] = BIT(6); + + return 0; +} + +#define CDNS_BPT_ROLLING_COUNTER_START 1 + +int sdw_cdns_prepare_write_dma_buffer(u8 dev_num, u32 start_register, u8 *data, int data_size, + int data_per_frame, u8 *dma_buffer, int dma_buffer_size, + int *dma_buffer_total_bytes) +{ + int total_dma_data_written = 0; + u8 *p_dma_buffer = dma_buffer; + u8 header[SDW_CDNS_BRA_HDR]; + int dma_data_written; + u8 *p_data = data; + u8 counter; + int ret; + + counter = CDNS_BPT_ROLLING_COUNTER_START; + + header[0] = BIT(1); /* write command: BIT(1) set */ + header[0] |= GENMASK(7, 6); /* header is active */ + header[0] |= (dev_num << 2); + + while (data_size >= data_per_frame) { + header[1] = data_per_frame; + header[2] = start_register >> 24 & 0xFF; + header[3] = start_register >> 16 & 0xFF; + header[4] = start_register >> 8 & 0xFF; + header[5] = start_register >> 0 & 0xFF; + + ret = sdw_cdns_prepare_write_pd0_buffer(header, SDW_CDNS_BRA_HDR, + p_data, data_per_frame, + p_dma_buffer, dma_buffer_size, + &dma_data_written, counter); + if (ret < 0) + return ret; + + counter++; + + p_data += data_per_frame; + data_size -= data_per_frame; + + p_dma_buffer += dma_data_written; + dma_buffer_size -= dma_data_written; + total_dma_data_written += dma_data_written; + + start_register += data_per_frame; + } + + if (data_size) { + header[1] = data_size; + header[2] = start_register >> 24 & 0xFF; + header[3] = start_register >> 16 & 0xFF; + header[4] = start_register >> 8 & 0xFF; + header[5] = start_register >> 0 & 0xFF; + + ret = sdw_cdns_prepare_write_pd0_buffer(header, SDW_CDNS_BRA_HDR, + p_data, data_size, + p_dma_buffer, dma_buffer_size, + &dma_data_written, counter); + if (ret < 0) + return ret; + + total_dma_data_written += dma_data_written; + } + + *dma_buffer_total_bytes = total_dma_data_written; + + return 0; +} +EXPORT_SYMBOL(sdw_cdns_prepare_write_dma_buffer); + +int sdw_cdns_prepare_read_dma_buffer(u8 dev_num, u32 start_register, int data_size, + int data_per_frame, u8 *dma_buffer, int dma_buffer_size, + int *dma_buffer_total_bytes) +{ + int total_dma_data_written = 0; + u8 *p_dma_buffer = dma_buffer; + u8 header[SDW_CDNS_BRA_HDR]; + int dma_data_written; + u8 counter; + int ret; + + counter = CDNS_BPT_ROLLING_COUNTER_START; + + header[0] = 0; /* read command: BIT(1) cleared */ + header[0] |= GENMASK(7, 6); /* header is active */ + header[0] |= (dev_num << 2); + + while (data_size >= data_per_frame) { + header[1] = data_per_frame; + header[2] = start_register >> 24 & 0xFF; + header[3] = start_register >> 16 & 0xFF; + header[4] = start_register >> 8 & 0xFF; + header[5] = start_register >> 0 & 0xFF; + + ret = sdw_cdns_prepare_read_pd0_buffer(header, SDW_CDNS_BRA_HDR, p_dma_buffer, + dma_buffer_size, &dma_data_written, + counter); + if (ret < 0) + return ret; + + counter++; + + data_size -= data_per_frame; + + p_dma_buffer += dma_data_written; + dma_buffer_size -= dma_data_written; + total_dma_data_written += dma_data_written; + + start_register += data_per_frame; + } + + if (data_size) { + header[1] = data_size; + header[2] = start_register >> 24 & 0xFF; + header[3] = start_register >> 16 & 0xFF; + header[4] = start_register >> 8 & 0xFF; + header[5] = start_register >> 0 & 0xFF; + + ret = sdw_cdns_prepare_read_pd0_buffer(header, SDW_CDNS_BRA_HDR, p_dma_buffer, + dma_buffer_size, &dma_data_written, + counter); + if (ret < 0) + return ret; + + total_dma_data_written += dma_data_written; + } + + *dma_buffer_total_bytes = total_dma_data_written; + + return 0; +} +EXPORT_SYMBOL(sdw_cdns_prepare_read_dma_buffer); + +static int check_counter(u32 val, u8 counter) +{ + u8 frame; + + frame = (val >> 24) & GENMASK(3, 0); + if (counter != frame) + return -EIO; + return 0; +} + +static int check_response(u32 val) +{ + u8 response; + + response = (val >> 3) & GENMASK(1, 0); + if (response == 0) /* Ignored */ + return -ENODATA; + if (response != 1) /* ACK */ + return -EIO; + + return 0; +} + +static int check_frame_start(u32 header, u8 counter) +{ + int ret; + + /* check frame_start marker */ + if (!(header & BIT(31))) + return -EIO; + + ret = check_counter(header, counter); + if (ret < 0) + return ret; + + return check_response(header); +} + +static int check_frame_end(u32 footer) +{ + /* check frame_end marker */ + if (!(footer & BIT(30))) + return -EIO; + + return check_response(footer); +} + +int sdw_cdns_check_write_response(struct device *dev, u8 *dma_buffer, + int dma_buffer_size, int num_frames) +{ + u32 *p_data; + int counter; + u32 header; + u32 footer; + int ret; + int i; + + /* paranoia check on buffer size */ + if (dma_buffer_size != num_frames * 8) + return -EINVAL; + + counter = CDNS_BPT_ROLLING_COUNTER_START; + p_data = (u32 *)dma_buffer; + + for (i = 0; i < num_frames; i++) { + header = *p_data++; + footer = *p_data++; + + ret = check_frame_start(header, counter); + if (ret < 0) { + dev_err(dev, "%s: bad frame %d/%d start header %x\n", + __func__, i, num_frames, header); + return ret; + } + + ret = check_frame_end(footer); + if (ret < 0) { + dev_err(dev, "%s: bad frame %d/%d end footer %x\n", + __func__, i, num_frames, footer); + return ret; + } + + counter++; + counter &= GENMASK(3, 0); + } + return 0; +} +EXPORT_SYMBOL(sdw_cdns_check_write_response); + +static u8 extract_read_data(u32 *data, int num_bytes, u8 *buffer) +{ + u32 val; + int i; + u8 crc; + u8 b0; + u8 b1; + + crc = SDW_CRC8_SEED; + + /* process two bytes at a time */ + for (i = 0; i < num_bytes / 2; i++) { + val = *data++; + + b0 = val & 0xff; + b1 = (val >> 8) & 0xff; + + *buffer++ = b0; + crc = crc8(sdw_crc8_lookup_msb, &b0, 1, crc); + + *buffer++ = b1; + crc = crc8(sdw_crc8_lookup_msb, &b1, 1, crc); + } + /* handle remaining byte if it exists */ + if (num_bytes & 1) { + val = *data; + + b0 = val & 0xff; + + *buffer++ = b0; + crc = crc8(sdw_crc8_lookup_msb, &b0, 1, crc); + } + return crc; +} + +int sdw_cdns_check_read_response(struct device *dev, u8 *dma_buffer, int dma_buffer_size, + u8 *buffer, int buffer_size, int num_frames, int data_per_frame) +{ + int total_num_bytes = 0; + u32 *p_data; + u8 *p_buf; + int counter; + u32 header; + u32 footer; + u8 expected_crc; + u8 crc; + int len; + int ret; + int i; + + counter = CDNS_BPT_ROLLING_COUNTER_START; + p_data = (u32 *)dma_buffer; + p_buf = buffer; + + for (i = 0; i < num_frames; i++) { + header = *p_data++; + + ret = check_frame_start(header, counter); + if (ret < 0) { + dev_err(dev, "%s: bad frame %d/%d start header %x\n", + __func__, i, num_frames, header); + return ret; + } + + len = data_per_frame; + if (total_num_bytes + data_per_frame > buffer_size) + len = buffer_size - total_num_bytes; + + crc = extract_read_data(p_data, len, p_buf); + + p_data += (len + 1) / 2; + expected_crc = *p_data++ & 0xff; + + if (crc != expected_crc) { + dev_err(dev, "%s: bad frame %d/%d crc %#x expected %#x\n", + __func__, i, num_frames, crc, expected_crc); + return -EIO; + } + + p_buf += len; + total_num_bytes += len; + + footer = *p_data++; + ret = check_frame_end(footer); + if (ret < 0) { + dev_err(dev, "%s: bad frame %d/%d end footer %x\n", + __func__, i, num_frames, footer); + return ret; + } + + counter++; + counter &= GENMASK(3, 0); + } + return 0; +} +EXPORT_SYMBOL(sdw_cdns_check_read_response); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Cadence Soundwire Library"); diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index c34fb050fe4f..9373426c7f63 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -208,4 +208,24 @@ void sdw_cdns_check_self_clearing_bits(struct sdw_cdns *cdns, const char *string void sdw_cdns_config_update(struct sdw_cdns *cdns); int sdw_cdns_config_update_set_wait(struct sdw_cdns *cdns); +/* SoundWire BPT/BRA helpers to format data */ +int sdw_cdns_bpt_find_buffer_sizes(int command, /* 0: write, 1: read */ + int row, int col, unsigned int data_bytes, + unsigned int requested_bytes_per_frame, + unsigned int *data_per_frame, unsigned int *pdi0_buffer_size, + unsigned int *pdi1_buffer_size, unsigned int *num_frames); + +int sdw_cdns_prepare_write_dma_buffer(u8 dev_num, u32 start_register, u8 *data, int data_size, + int data_per_frame, u8 *dma_buffer, int dma_buffer_size, + int *dma_buffer_total_bytes); + +int sdw_cdns_prepare_read_dma_buffer(u8 dev_num, u32 start_register, int data_size, + int data_per_frame, u8 *dma_buffer, int dma_buffer_size, + int *dma_buffer_total_bytes); + +int sdw_cdns_check_write_response(struct device *dev, u8 *dma_buffer, + int dma_buffer_size, int num_frames); + +int sdw_cdns_check_read_response(struct device *dev, u8 *dma_buffer, int dma_buffer_size, + u8 *buffer, int buffer_size, int num_frames, int data_per_frame); #endif /* __SDW_CADENCE_H */ From 7f17a73a7dd8252aa88c6f5e23310861de3d5423 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:09 +0800 Subject: [PATCH 0646/2157] soundwire: intel_auxdevice: add indirection for BPT send_async/wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror abstraction added for master ops. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-11-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel_auxdevice.c | 24 ++++++++++++++++++++++++ include/linux/soundwire/sdw_intel.h | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index dee126f6d9d5..5ea6399e6c9b 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -79,6 +79,27 @@ static bool is_wake_capable(struct sdw_slave *slave) return false; } +static int generic_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, + struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + if (sdw->link_res->hw_ops->bpt_send_async) + return sdw->link_res->hw_ops->bpt_send_async(sdw, slave, msg); + return -EOPNOTSUPP; +} + +static int generic_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + if (sdw->link_res->hw_ops->bpt_wait) + return sdw->link_res->hw_ops->bpt_wait(sdw, slave, msg); + return -EOPNOTSUPP; +} + static int generic_pre_bank_switch(struct sdw_bus *bus) { struct sdw_cdns *cdns = bus_to_cdns(bus); @@ -267,6 +288,9 @@ static struct sdw_master_ops sdw_intel_ops = { .get_device_num = intel_get_device_num_ida, .put_device_num = intel_put_device_num_ida, .new_peripheral_assigned = generic_new_peripheral_assigned, + + .bpt_send_async = generic_bpt_send_async, + .bpt_wait = generic_bpt_wait, }; /* diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 580086417e4b..493d9de4e472 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -436,6 +436,10 @@ struct sdw_intel_hw_ops { bool (*sync_check_cmdsync_unlocked)(struct sdw_intel *sdw); void (*program_sdi)(struct sdw_intel *sdw, int dev_num); + + int (*bpt_send_async)(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_intel *sdw, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; From 5d5cb86fb46ea1c7efd3b894f63fe364e5847043 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:10 +0800 Subject: [PATCH 0647/2157] ASoC: SOF: Intel: hda-sdw-bpt: add helpers for SoundWire BPT DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SoundWire BPT DMA helpers as a separate module to avoid circular dependencies. For now this assumes no link DMA, only coupled mode. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Acked-by: Mark Brown Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-12-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/sound/hda-sdw-bpt.h | 69 +++++++ sound/soc/sof/intel/Kconfig | 7 + sound/soc/sof/intel/Makefile | 4 + sound/soc/sof/intel/hda-sdw-bpt.c | 319 ++++++++++++++++++++++++++++++ 4 files changed, 399 insertions(+) create mode 100644 include/sound/hda-sdw-bpt.h create mode 100644 sound/soc/sof/intel/hda-sdw-bpt.c diff --git a/include/sound/hda-sdw-bpt.h b/include/sound/hda-sdw-bpt.h new file mode 100644 index 000000000000..f649549b75d5 --- /dev/null +++ b/include/sound/hda-sdw-bpt.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * Copyright(c) 2025 Intel Corporation. + */ + +#ifndef __HDA_SDW_BPT_H +#define __HDA_SDW_BPT_H + +#include + +struct hdac_ext_stream; +struct snd_dma_buffer; + +#if IS_ENABLED(CONFIG_SND_SOF_SOF_HDA_SDW_BPT) +int hda_sdw_bpt_open(struct device *dev, int link_id, struct hdac_ext_stream **bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, u32 bpt_tx_num_bytes, + u32 tx_dma_bandwidth, struct hdac_ext_stream **bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl, u32 bpt_rx_num_bytes, + u32 rx_dma_bandwidth); + +int hda_sdw_bpt_send_async(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream); + +int hda_sdw_bpt_wait(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream); + +int hda_sdw_bpt_close(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, struct hdac_ext_stream *bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl); +#else +static inline int hda_sdw_bpt_open(struct device *dev, int link_id, + struct hdac_ext_stream **bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, u32 bpt_tx_num_bytes, + u32 tx_dma_bandwidth, struct hdac_ext_stream **bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl, u32 bpt_rx_num_bytes, + u32 rx_dma_bandwidth) +{ + WARN_ONCE(1, "SoundWire BPT is disabled"); + return -EOPNOTSUPP; +} + +static inline int hda_sdw_bpt_send_async(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream) +{ + WARN_ONCE(1, "SoundWire BPT is disabled"); + return -EOPNOTSUPP; +} + +static inline int hda_sdw_bpt_wait(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream) +{ + WARN_ONCE(1, "SoundWire BPT is disabled"); + return -EOPNOTSUPP; +} + +static inline int hda_sdw_bpt_close(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, + struct hdac_ext_stream *bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl) +{ + WARN_ONCE(1, "SoundWire BPT is disabled"); + return -EOPNOTSUPP; +} +#endif + +#endif /* __HDA_SDW_BPT_H */ diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 2c43558d96b9..fae3598fd601 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -268,6 +268,7 @@ config SND_SOC_SOF_INTEL_LNL tristate select SND_SOC_SOF_HDA_GENERIC select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE + select SND_SOF_SOF_HDA_SDW_BPT if SND_SOC_SOF_INTEL_SOUNDWIRE select SND_SOC_SOF_IPC4 select SND_SOC_SOF_INTEL_MTL @@ -342,6 +343,12 @@ config SND_SOC_SOF_HDA_AUDIO_CODEC endif ## SND_SOC_SOF_HDA_GENERIC +config SND_SOF_SOF_HDA_SDW_BPT + tristate + help + This option is not user-selectable but automagically handled by + 'select' statements at a higher level. + config SND_SOC_SOF_HDA_LINK_BASELINE tristate select SND_SOC_SOF_HDA if SND_SOC_SOF_HDA_LINK diff --git a/sound/soc/sof/intel/Makefile b/sound/soc/sof/intel/Makefile index f40daa616803..af30af92f21e 100644 --- a/sound/soc/sof/intel/Makefile +++ b/sound/soc/sof/intel/Makefile @@ -12,6 +12,8 @@ snd-sof-intel-hda-generic-y := hda.o hda-common-ops.o snd-sof-intel-hda-mlink-y := hda-mlink.o +snd-sof-intel-hda-sdw-bpt-objs := hda-sdw-bpt.o + snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-probes.o snd-sof-intel-hda-y := hda-codec.o @@ -26,6 +28,8 @@ obj-$(CONFIG_SND_SOC_SOF_HDA_GENERIC) += snd-sof-intel-hda-generic.o obj-$(CONFIG_SND_SOC_SOF_HDA_MLINK) += snd-sof-intel-hda-mlink.o obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o +obj-$(CONFIG_SND_SOF_SOF_HDA_SDW_BPT) += snd-sof-intel-hda-sdw-bpt.o + snd-sof-pci-intel-tng-y := pci-tng.o snd-sof-pci-intel-skl-y := pci-skl.o skl.o hda-loader-skl.o snd-sof-pci-intel-apl-y := pci-apl.o apl.o diff --git a/sound/soc/sof/intel/hda-sdw-bpt.c b/sound/soc/sof/intel/hda-sdw-bpt.c new file mode 100644 index 000000000000..bc7a3172656f --- /dev/null +++ b/sound/soc/sof/intel/hda-sdw-bpt.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2025 Intel Corporation. +// + +/* + * Hardware interface for SoundWire BPT support with HDA DMA + */ + +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-priv.h" +#include "hda.h" + +#define BPT_FREQUENCY 192000 /* The max rate defined in rate_bits[] hdac_device.c */ +#define BPT_MULTIPLIER ((BPT_FREQUENCY / 48000) - 1) + +static int hda_sdw_bpt_dma_prepare(struct device *dev, struct hdac_ext_stream **sdw_bpt_stream, + struct snd_dma_buffer *dmab_bdl, u32 bpt_num_bytes, + unsigned int num_channels, int direction) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + struct hdac_ext_stream *bpt_stream; + unsigned int format = HDA_CL_STREAM_FORMAT; + + /* + * the baseline format needs to be adjusted to + * bandwidth requirements + */ + format |= (num_channels - 1); + format |= BPT_MULTIPLIER << AC_FMT_MULT_SHIFT; + + dev_dbg(dev, "direction %d format_val %#x\n", direction, format); + + bpt_stream = hda_cl_prepare(dev, format, bpt_num_bytes, dmab_bdl, false, direction, false); + if (IS_ERR(bpt_stream)) { + dev_err(sdev->dev, "%s: SDW BPT DMA prepare failed: dir %d\n", + __func__, direction); + return PTR_ERR(bpt_stream); + } + *sdw_bpt_stream = bpt_stream; + + if (hdac_stream(bpt_stream)->direction == SNDRV_PCM_STREAM_PLAYBACK) { + struct hdac_bus *bus = sof_to_bus(sdev); + struct hdac_ext_link *hlink; + int stream_tag; + + stream_tag = hdac_stream(bpt_stream)->stream_tag; + hlink = hdac_bus_eml_sdw_get_hlink(bus); + + snd_hdac_ext_bus_link_set_stream_id(hlink, stream_tag); + } + return 0; +} + +static int hda_sdw_bpt_dma_deprepare(struct device *dev, struct hdac_ext_stream *sdw_bpt_stream, + struct snd_dma_buffer *dmab_bdl) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + int ret; + + ret = hda_cl_cleanup(sdev->dev, dmab_bdl, true, sdw_bpt_stream); + if (ret < 0) { + dev_err(sdev->dev, "%s: SDW BPT DMA cleanup failed\n", + __func__); + return ret; + } + + if (hdac_stream(sdw_bpt_stream)->direction == SNDRV_PCM_STREAM_PLAYBACK) { + struct hdac_bus *bus = sof_to_bus(sdev); + struct hdac_ext_link *hlink; + int stream_tag; + + stream_tag = hdac_stream(sdw_bpt_stream)->stream_tag; + hlink = hdac_bus_eml_sdw_get_hlink(bus); + + snd_hdac_ext_bus_link_clear_stream_id(hlink, stream_tag); + } + + return 0; +} + +static int hda_sdw_bpt_dma_enable(struct device *dev, struct hdac_ext_stream *sdw_bpt_stream) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + int ret; + + ret = hda_cl_trigger(sdev->dev, sdw_bpt_stream, SNDRV_PCM_TRIGGER_START); + if (ret < 0) + dev_err(sdev->dev, "%s: SDW BPT DMA trigger start failed\n", __func__); + + return ret; +} + +static int hda_sdw_bpt_dma_disable(struct device *dev, struct hdac_ext_stream *sdw_bpt_stream) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + int ret; + + ret = hda_cl_trigger(sdev->dev, sdw_bpt_stream, SNDRV_PCM_TRIGGER_STOP); + if (ret < 0) + dev_err(sdev->dev, "%s: SDW BPT DMA trigger stop failed\n", __func__); + + return ret; +} + +int hda_sdw_bpt_open(struct device *dev, int link_id, struct hdac_ext_stream **bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, u32 bpt_tx_num_bytes, + u32 tx_dma_bandwidth, struct hdac_ext_stream **bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl, u32 bpt_rx_num_bytes, + u32 rx_dma_bandwidth) +{ + struct snd_sof_dev *sdev = dev_get_drvdata(dev); + unsigned int num_channels_tx; + unsigned int num_channels_rx; + int ret1; + int ret; + + num_channels_tx = DIV_ROUND_UP(tx_dma_bandwidth, BPT_FREQUENCY * 32); + + ret = hda_sdw_bpt_dma_prepare(dev, bpt_tx_stream, dmab_tx_bdl, bpt_tx_num_bytes, + num_channels_tx, SNDRV_PCM_STREAM_PLAYBACK); + if (ret < 0) { + dev_err(dev, "%s: hda_sdw_bpt_dma_prepare failed for TX: %d\n", + __func__, ret); + return ret; + } + + num_channels_rx = DIV_ROUND_UP(rx_dma_bandwidth, BPT_FREQUENCY * 32); + + ret = hda_sdw_bpt_dma_prepare(dev, bpt_rx_stream, dmab_rx_bdl, bpt_rx_num_bytes, + num_channels_rx, SNDRV_PCM_STREAM_CAPTURE); + if (ret < 0) { + dev_err(dev, "%s: hda_sdw_bpt_dma_prepare failed for RX: %d\n", + __func__, ret); + + ret1 = hda_sdw_bpt_dma_deprepare(dev, *bpt_tx_stream, dmab_tx_bdl); + if (ret1 < 0) + dev_err(dev, "%s: hda_sdw_bpt_dma_deprepare failed for TX: %d\n", + __func__, ret1); + return ret; + } + + /* we need to map the channels in PCMSyCM registers */ + ret = hdac_bus_eml_sdw_map_stream_ch(sof_to_bus(sdev), link_id, + 0, /* cpu_dai->id -> PDI0 */ + GENMASK(num_channels_tx - 1, 0), + hdac_stream(*bpt_tx_stream)->stream_tag, + SNDRV_PCM_STREAM_PLAYBACK); + if (ret < 0) { + dev_err(dev, "%s: hdac_bus_eml_sdw_map_stream_ch failed for TX: %d\n", + __func__, ret); + goto close; + } + + ret = hdac_bus_eml_sdw_map_stream_ch(sof_to_bus(sdev), link_id, + 1, /* cpu_dai->id -> PDI1 */ + GENMASK(num_channels_rx - 1, 0), + hdac_stream(*bpt_rx_stream)->stream_tag, + SNDRV_PCM_STREAM_CAPTURE); + if (!ret) + return 0; + + dev_err(dev, "%s: hdac_bus_eml_sdw_map_stream_ch failed for RX: %d\n", + __func__, ret); + +close: + ret1 = hda_sdw_bpt_close(dev, *bpt_tx_stream, dmab_tx_bdl, *bpt_rx_stream, dmab_rx_bdl); + if (ret1 < 0) + dev_err(dev, "%s: hda_sdw_bpt_close failed: %d\n", + __func__, ret1); + + return ret; +} +EXPORT_SYMBOL_NS(hda_sdw_bpt_open, "SND_SOC_SOF_INTEL_HDA_SDW_BPT"); + +int hda_sdw_bpt_send_async(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream) +{ + int ret1; + int ret; + + ret = hda_sdw_bpt_dma_enable(dev, bpt_tx_stream); + if (ret < 0) { + dev_err(dev, "%s: hda_sdw_bpt_dma_enable failed for TX: %d\n", + __func__, ret); + return ret; + } + + ret = hda_sdw_bpt_dma_enable(dev, bpt_rx_stream); + if (ret < 0) { + dev_err(dev, "%s: hda_sdw_bpt_dma_enable failed for RX: %d\n", + __func__, ret); + + ret1 = hda_sdw_bpt_dma_disable(dev, bpt_tx_stream); + if (ret1 < 0) + dev_err(dev, "%s: hda_sdw_bpt_dma_disable failed for TX: %d\n", + __func__, ret1); + } + + return ret; +} +EXPORT_SYMBOL_NS(hda_sdw_bpt_send_async, "SND_SOC_SOF_INTEL_HDA_SDW_BPT"); + +/* + * 3s is several orders of magnitude larger than what is needed for a + * typical firmware download. + */ +#define HDA_BPT_IOC_TIMEOUT_MS 3000 + +int hda_sdw_bpt_wait(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct hdac_ext_stream *bpt_rx_stream) +{ + struct sof_intel_hda_stream *hda_tx_stream; + struct sof_intel_hda_stream *hda_rx_stream; + snd_pcm_uframes_t tx_position; + snd_pcm_uframes_t rx_position; + unsigned long time_tx_left; + unsigned long time_rx_left; + int ret = 0; + int ret1; + int i; + + hda_tx_stream = container_of(bpt_tx_stream, struct sof_intel_hda_stream, hext_stream); + hda_rx_stream = container_of(bpt_rx_stream, struct sof_intel_hda_stream, hext_stream); + + time_tx_left = wait_for_completion_timeout(&hda_tx_stream->ioc, + msecs_to_jiffies(HDA_BPT_IOC_TIMEOUT_MS)); + if (!time_tx_left) { + tx_position = hda_dsp_stream_get_position(hdac_stream(bpt_tx_stream), + SNDRV_PCM_STREAM_PLAYBACK, false); + dev_err(dev, "%s: SDW BPT TX DMA did not complete: %ld\n", + __func__, tx_position); + ret = -ETIMEDOUT; + goto dma_disable; + } + + /* Make sure the DMA is flushed */ + i = 0; + do { + tx_position = hda_dsp_stream_get_position(hdac_stream(bpt_tx_stream), + SNDRV_PCM_STREAM_PLAYBACK, false); + usleep_range(1000, 1010); + i++; + } while (tx_position && i < HDA_BPT_IOC_TIMEOUT_MS); + if (tx_position) { + dev_err(dev, "%s: SDW BPT TX DMA position %ld was not cleared\n", + __func__, tx_position); + ret = -ETIMEDOUT; + goto dma_disable; + } + + /* the wait should be minimal here */ + time_rx_left = wait_for_completion_timeout(&hda_rx_stream->ioc, + msecs_to_jiffies(HDA_BPT_IOC_TIMEOUT_MS)); + if (!time_rx_left) { + rx_position = hda_dsp_stream_get_position(hdac_stream(bpt_rx_stream), + SNDRV_PCM_STREAM_CAPTURE, false); + dev_err(dev, "%s: SDW BPT RX DMA did not complete: %ld\n", + __func__, rx_position); + ret = -ETIMEDOUT; + goto dma_disable; + } + + /* Make sure the DMA is flushed */ + i = 0; + do { + rx_position = hda_dsp_stream_get_position(hdac_stream(bpt_rx_stream), + SNDRV_PCM_STREAM_CAPTURE, false); + usleep_range(1000, 1010); + i++; + } while (rx_position && i < HDA_BPT_IOC_TIMEOUT_MS); + if (rx_position) { + dev_err(dev, "%s: SDW BPT RX DMA position %ld was not cleared\n", + __func__, rx_position); + ret = -ETIMEDOUT; + goto dma_disable; + } + +dma_disable: + ret1 = hda_sdw_bpt_dma_disable(dev, bpt_rx_stream); + if (!ret) + ret = ret1; + + ret1 = hda_sdw_bpt_dma_disable(dev, bpt_tx_stream); + if (!ret) + ret = ret1; + + return ret; +} +EXPORT_SYMBOL_NS(hda_sdw_bpt_wait, "SND_SOC_SOF_INTEL_HDA_SDW_BPT"); + +int hda_sdw_bpt_close(struct device *dev, struct hdac_ext_stream *bpt_tx_stream, + struct snd_dma_buffer *dmab_tx_bdl, struct hdac_ext_stream *bpt_rx_stream, + struct snd_dma_buffer *dmab_rx_bdl) +{ + int ret; + int ret1; + + ret = hda_sdw_bpt_dma_deprepare(dev, bpt_rx_stream, dmab_rx_bdl); + + ret1 = hda_sdw_bpt_dma_deprepare(dev, bpt_tx_stream, dmab_tx_bdl); + if (!ret) + ret = ret1; + + return ret; +} +EXPORT_SYMBOL_NS(hda_sdw_bpt_close, "SND_SOC_SOF_INTEL_HDA_SDW_BPT"); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("SOF helpers for HDaudio SoundWire BPT"); +MODULE_IMPORT_NS("SND_SOC_SOF_INTEL_HDA_COMMON"); +MODULE_IMPORT_NS("SND_SOC_SOF_HDA_MLINK"); From 5cdc23764da8be3c1b2c022ddce95dd8e49673da Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:11 +0800 Subject: [PATCH 0648/2157] soundwire: intel: add BPT context definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed to be shared between open/send_async/close. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-13-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index dddd29381441..d44e70d3c4e3 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -48,11 +48,34 @@ struct sdw_intel_link_res { struct hdac_bus *hbus; }; +/** + * struct sdw_intel_bpt - SoundWire Intel BPT context + * @bpt_tx_stream: BPT TX stream + * @dmab_tx_bdl: BPT TX buffer descriptor list + * @bpt_rx_stream: BPT RX stream + * @dmab_rx_bdl: BPT RX buffer descriptor list + * @pdi0_buffer_size: PDI0 buffer size + * @pdi1_buffer_size: PDI1 buffer size + * @num_frames: number of frames + * @data_per_frame: data per frame + */ +struct sdw_intel_bpt { + struct hdac_ext_stream *bpt_tx_stream; + struct snd_dma_buffer dmab_tx_bdl; + struct hdac_ext_stream *bpt_rx_stream; + struct snd_dma_buffer dmab_rx_bdl; + unsigned int pdi0_buffer_size; + unsigned int pdi1_buffer_size; + unsigned int num_frames; + unsigned int data_per_frame; +}; + struct sdw_intel { struct sdw_cdns cdns; int instance; struct sdw_intel_link_res *link_res; bool startup_done; + struct sdw_intel_bpt bpt_ctx; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif From 4c1ce9f37d8a809dcdfba082cc9003880efa0a63 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:12 +0800 Subject: [PATCH 0649/2157] soundwire: intel_ace2x: add BPT send_async/wait callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for BTP API using Cadence and hda-sdw-bpt helpers. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-14-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel_ace2x.c | 312 ++++++++++++++++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index e305c6258ca9..5b31e1f69591 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -13,12 +13,320 @@ #include #include #include +#include #include #include #include "cadence_master.h" #include "bus.h" #include "intel.h" +static int sdw_slave_bpt_stream_add(struct sdw_slave *slave, struct sdw_stream_runtime *stream) +{ + struct sdw_stream_config sconfig = {0}; + struct sdw_port_config pconfig = {0}; + int ret; + + /* arbitrary configuration */ + sconfig.frame_rate = 16000; + sconfig.ch_count = 1; + sconfig.bps = 32; /* this is required for BPT/BRA */ + sconfig.direction = SDW_DATA_DIR_RX; + sconfig.type = SDW_STREAM_BPT; + + pconfig.num = 0; + pconfig.ch_mask = BIT(0); + + ret = sdw_stream_add_slave(slave, &sconfig, &pconfig, 1, stream); + if (ret) + dev_err(&slave->dev, "%s: failed: %d\n", __func__, ret); + + return ret; +} + +static int intel_ace2x_bpt_open_stream(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = &sdw->cdns; + struct sdw_bus *bus = &cdns->bus; + struct sdw_master_prop *prop = &bus->prop; + struct sdw_stream_runtime *stream; + struct sdw_stream_config sconfig; + struct sdw_port_config *pconfig; + unsigned int pdi0_buffer_size; + unsigned int tx_dma_bandwidth; + unsigned int pdi1_buffer_size; + unsigned int rx_dma_bandwidth; + unsigned int data_per_frame; + unsigned int tx_total_bytes; + struct sdw_cdns_pdi *pdi0; + struct sdw_cdns_pdi *pdi1; + unsigned int num_frames; + int command; + int ret1; + int ret; + int dir; + int i; + + stream = sdw_alloc_stream("BPT", SDW_STREAM_BPT); + if (!stream) + return -ENOMEM; + + cdns->bus.bpt_stream = stream; + + ret = sdw_slave_bpt_stream_add(slave, stream); + if (ret < 0) + goto release_stream; + + /* handle PDI0 first */ + dir = SDW_DATA_DIR_TX; + + pdi0 = sdw_cdns_alloc_pdi(cdns, &cdns->pcm, 1, dir, 0); + if (!pdi0) { + dev_err(cdns->dev, "%s: sdw_cdns_alloc_pdi0 failed\n", __func__); + ret = -EINVAL; + goto remove_slave; + } + + sdw_cdns_config_stream(cdns, 1, dir, pdi0); + + /* handle PDI1 */ + dir = SDW_DATA_DIR_RX; + + pdi1 = sdw_cdns_alloc_pdi(cdns, &cdns->pcm, 1, dir, 1); + if (!pdi1) { + dev_err(cdns->dev, "%s: sdw_cdns_alloc_pdi1 failed\n", __func__); + ret = -EINVAL; + goto remove_slave; + } + + sdw_cdns_config_stream(cdns, 1, dir, pdi1); + + /* + * the port config direction, number of channels and frame + * rate is totally arbitrary + */ + sconfig.direction = dir; + sconfig.ch_count = 1; + sconfig.frame_rate = 16000; + sconfig.type = SDW_STREAM_BPT; + sconfig.bps = 32; /* this is required for BPT/BRA */ + + /* Port configuration */ + pconfig = kcalloc(2, sizeof(*pconfig), GFP_KERNEL); + if (!pconfig) { + ret = -ENOMEM; + goto remove_slave; + } + + for (i = 0; i < 2 /* num_pdi */; i++) { + pconfig[i].num = i; + pconfig[i].ch_mask = 1; + } + + ret = sdw_stream_add_master(&cdns->bus, &sconfig, pconfig, 2, stream); + kfree(pconfig); + + if (ret < 0) { + dev_err(cdns->dev, "add master to stream failed:%d\n", ret); + goto remove_slave; + } + + ret = sdw_prepare_stream(cdns->bus.bpt_stream); + if (ret < 0) + goto remove_master; + + command = (msg->flags & SDW_MSG_FLAG_WRITE) ? 0 : 1; + + ret = sdw_cdns_bpt_find_buffer_sizes(command, cdns->bus.params.row, cdns->bus.params.col, + msg->len, SDW_BPT_MSG_MAX_BYTES, &data_per_frame, + &pdi0_buffer_size, &pdi1_buffer_size, &num_frames); + if (ret < 0) + goto deprepare_stream; + + sdw->bpt_ctx.pdi0_buffer_size = pdi0_buffer_size; + sdw->bpt_ctx.pdi1_buffer_size = pdi1_buffer_size; + sdw->bpt_ctx.num_frames = num_frames; + sdw->bpt_ctx.data_per_frame = data_per_frame; + tx_dma_bandwidth = div_u64((u64)pdi0_buffer_size * 8 * (u64)prop->default_frame_rate, + num_frames); + rx_dma_bandwidth = div_u64((u64)pdi1_buffer_size * 8 * (u64)prop->default_frame_rate, + num_frames); + + dev_dbg(cdns->dev, "Message len %d transferred in %d frames (%d per frame)\n", + msg->len, num_frames, data_per_frame); + dev_dbg(cdns->dev, "sizes pdi0 %d pdi1 %d tx_bandwidth %d rx_bandwidth %d\n", + pdi0_buffer_size, pdi1_buffer_size, tx_dma_bandwidth, rx_dma_bandwidth); + + ret = hda_sdw_bpt_open(cdns->dev->parent, /* PCI device */ + sdw->instance, &sdw->bpt_ctx.bpt_tx_stream, + &sdw->bpt_ctx.dmab_tx_bdl, pdi0_buffer_size, tx_dma_bandwidth, + &sdw->bpt_ctx.bpt_rx_stream, &sdw->bpt_ctx.dmab_rx_bdl, + pdi1_buffer_size, rx_dma_bandwidth); + if (ret < 0) { + dev_err(cdns->dev, "%s: hda_sdw_bpt_open failed %d\n", __func__, ret); + goto deprepare_stream; + } + + if (!command) { + ret = sdw_cdns_prepare_write_dma_buffer(msg->dev_num, msg->addr, msg->buf, + msg->len, data_per_frame, + sdw->bpt_ctx.dmab_tx_bdl.area, + pdi0_buffer_size, &tx_total_bytes); + } else { + ret = sdw_cdns_prepare_read_dma_buffer(msg->dev_num, msg->addr, msg->len, + data_per_frame, + sdw->bpt_ctx.dmab_tx_bdl.area, + pdi0_buffer_size, &tx_total_bytes); + } + + if (!ret) + return 0; + + dev_err(cdns->dev, "%s: sdw_prepare_%s_dma_buffer failed %d\n", + __func__, command ? "read" : "write", ret); + + ret1 = hda_sdw_bpt_close(cdns->dev->parent, /* PCI device */ + sdw->bpt_ctx.bpt_tx_stream, &sdw->bpt_ctx.dmab_tx_bdl, + sdw->bpt_ctx.bpt_rx_stream, &sdw->bpt_ctx.dmab_rx_bdl); + if (ret1 < 0) + dev_err(cdns->dev, "%s: hda_sdw_bpt_close failed: ret %d\n", + __func__, ret1); + +deprepare_stream: + sdw_deprepare_stream(cdns->bus.bpt_stream); + +remove_master: + ret1 = sdw_stream_remove_master(&cdns->bus, cdns->bus.bpt_stream); + if (ret1 < 0) + dev_err(cdns->dev, "%s: remove master failed: %d\n", + __func__, ret1); + +remove_slave: + ret1 = sdw_stream_remove_slave(slave, cdns->bus.bpt_stream); + if (ret1 < 0) + dev_err(cdns->dev, "%s: remove slave failed: %d\n", + __func__, ret1); + +release_stream: + sdw_release_stream(cdns->bus.bpt_stream); + cdns->bus.bpt_stream = NULL; + + return ret; +} + +static void intel_ace2x_bpt_close_stream(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = &sdw->cdns; + int ret; + + ret = hda_sdw_bpt_close(cdns->dev->parent /* PCI device */, sdw->bpt_ctx.bpt_tx_stream, + &sdw->bpt_ctx.dmab_tx_bdl, sdw->bpt_ctx.bpt_rx_stream, + &sdw->bpt_ctx.dmab_rx_bdl); + if (ret < 0) + dev_err(cdns->dev, "%s: hda_sdw_bpt_close failed: ret %d\n", + __func__, ret); + + ret = sdw_deprepare_stream(cdns->bus.bpt_stream); + if (ret < 0) + dev_err(cdns->dev, "%s: sdw_deprepare_stream failed: ret %d\n", + __func__, ret); + + ret = sdw_stream_remove_master(&cdns->bus, cdns->bus.bpt_stream); + if (ret < 0) + dev_err(cdns->dev, "%s: remove master failed: %d\n", + __func__, ret); + + ret = sdw_stream_remove_slave(slave, cdns->bus.bpt_stream); + if (ret < 0) + dev_err(cdns->dev, "%s: remove slave failed: %d\n", + __func__, ret); + + cdns->bus.bpt_stream = NULL; +} + +#define INTEL_BPT_MSG_BYTE_ALIGNMENT 32 + +static int intel_ace2x_bpt_send_async(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = &sdw->cdns; + int ret; + + if (msg->len % INTEL_BPT_MSG_BYTE_ALIGNMENT) { + dev_err(cdns->dev, "BPT message length %d is not a multiple of %d bytes\n", + msg->len, INTEL_BPT_MSG_BYTE_ALIGNMENT); + return -EINVAL; + } + + dev_dbg(cdns->dev, "BPT Transfer start\n"); + + ret = intel_ace2x_bpt_open_stream(sdw, slave, msg); + if (ret < 0) + return ret; + + ret = hda_sdw_bpt_send_async(cdns->dev->parent, /* PCI device */ + sdw->bpt_ctx.bpt_tx_stream, sdw->bpt_ctx.bpt_rx_stream); + if (ret < 0) { + dev_err(cdns->dev, "%s: hda_sdw_bpt_send_async failed: %d\n", + __func__, ret); + + intel_ace2x_bpt_close_stream(sdw, slave, msg); + + return ret; + } + + ret = sdw_enable_stream(cdns->bus.bpt_stream); + if (ret < 0) { + dev_err(cdns->dev, "%s: sdw_stream_enable failed: %d\n", + __func__, ret); + intel_ace2x_bpt_close_stream(sdw, slave, msg); + } + + return ret; +} + +static int intel_ace2x_bpt_wait(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg) +{ + struct sdw_cdns *cdns = &sdw->cdns; + int ret; + + dev_dbg(cdns->dev, "BPT Transfer wait\n"); + + ret = hda_sdw_bpt_wait(cdns->dev->parent, /* PCI device */ + sdw->bpt_ctx.bpt_tx_stream, sdw->bpt_ctx.bpt_rx_stream); + if (ret < 0) + dev_err(cdns->dev, "%s: hda_sdw_bpt_wait failed: %d\n", __func__, ret); + + ret = sdw_disable_stream(cdns->bus.bpt_stream); + if (ret < 0) { + dev_err(cdns->dev, "%s: sdw_stream_enable failed: %d\n", + __func__, ret); + goto err; + } + + if (msg->flags & SDW_MSG_FLAG_WRITE) { + ret = sdw_cdns_check_write_response(cdns->dev, sdw->bpt_ctx.dmab_rx_bdl.area, + sdw->bpt_ctx.pdi1_buffer_size, + sdw->bpt_ctx.num_frames); + if (ret < 0) + dev_err(cdns->dev, "%s: BPT Write failed %d\n", __func__, ret); + } else { + ret = sdw_cdns_check_read_response(cdns->dev, sdw->bpt_ctx.dmab_rx_bdl.area, + sdw->bpt_ctx.pdi1_buffer_size, + msg->buf, msg->len, sdw->bpt_ctx.num_frames, + sdw->bpt_ctx.data_per_frame); + if (ret < 0) + dev_err(cdns->dev, "%s: BPT Read failed %d\n", __func__, ret); + } + +err: + intel_ace2x_bpt_close_stream(sdw, slave, msg); + + return ret; +} + /* * shim vendor-specific (vs) ops */ @@ -753,7 +1061,11 @@ const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops = { .sync_check_cmdsync_unlocked = intel_check_cmdsync_unlocked, .program_sdi = intel_program_sdi, + + .bpt_send_async = intel_ace2x_bpt_send_async, + .bpt_wait = intel_ace2x_bpt_wait, }; EXPORT_SYMBOL_NS(sdw_intel_lnl_hw_ops, "SOUNDWIRE_INTEL"); MODULE_IMPORT_NS("SND_SOC_SOF_HDA_MLINK"); +MODULE_IMPORT_NS("SND_SOC_SOF_INTEL_HDA_SDW_BPT"); From 3394e2b125043aeede344d28fc73b3c0d2a5c21f Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 27 Feb 2025 22:06:13 +0800 Subject: [PATCH 0650/2157] ASoC: SOF: Intel: hda-sdw-bpt: add CHAIN_DMA support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the firmware is involved, the data can be transferred with a CHAIN_DMA on LNL+. The CHAIN_DMA needs to be programmed before the DMAs per the documentation. The states are not exactly symmetrical, on stop we must do a PAUSE and RESET. The FIFO size of 10ms was determined experimentally. With the minimum of 2ms, errors were reported by the codec, likely because of xruns. The code flow deals with the two TX and RX CHAIN_DMAs in symmetrical ways, i.e. alloc TX alloc RX enable TX enable RX disable RX disable TX free RX free TX Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Acked-by: Mark Brown Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-15-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- sound/soc/sof/intel/hda-sdw-bpt.c | 126 ++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/sound/soc/sof/intel/hda-sdw-bpt.c b/sound/soc/sof/intel/hda-sdw-bpt.c index bc7a3172656f..1327f1cad0bc 100644 --- a/sound/soc/sof/intel/hda-sdw-bpt.c +++ b/sound/soc/sof/intel/hda-sdw-bpt.c @@ -14,12 +14,80 @@ #include #include #include +#include #include "../ops.h" #include "../sof-priv.h" +#include "../ipc4-priv.h" #include "hda.h" #define BPT_FREQUENCY 192000 /* The max rate defined in rate_bits[] hdac_device.c */ #define BPT_MULTIPLIER ((BPT_FREQUENCY / 48000) - 1) +#define BPT_CHAIN_DMA_FIFO_MS 10 +/* + * This routine is directly inspired by sof_ipc4_chain_dma_trigger(), + * with major simplifications since there are no pipelines defined + * and no dependency on ALSA hw_params + */ +static int chain_dma_trigger(struct snd_sof_dev *sdev, unsigned int stream_tag, + int direction, int state) +{ + struct sof_ipc4_fw_data *ipc4_data = sdev->private; + bool allocate, enable, set_fifo_size; + struct sof_ipc4_msg msg = {{ 0 }}; + int dma_id; + + if (sdev->pdata->ipc_type != SOF_IPC_TYPE_4) + return -EOPNOTSUPP; + + switch (state) { + case SOF_IPC4_PIPE_RUNNING: /* Allocate and start the chain */ + allocate = true; + enable = true; + set_fifo_size = true; + break; + case SOF_IPC4_PIPE_PAUSED: /* Stop the chain */ + allocate = true; + enable = false; + set_fifo_size = false; + break; + case SOF_IPC4_PIPE_RESET: /* Deallocate chain resources and remove the chain */ + allocate = false; + enable = false; + set_fifo_size = false; + break; + default: + dev_err(sdev->dev, "Unexpected state %d", state); + return -EINVAL; + } + + msg.primary = SOF_IPC4_MSG_TYPE_SET(SOF_IPC4_GLB_CHAIN_DMA); + msg.primary |= SOF_IPC4_MSG_DIR(SOF_IPC4_MSG_REQUEST); + msg.primary |= SOF_IPC4_MSG_TARGET(SOF_IPC4_FW_GEN_MSG); + + /* for BPT/BRA we can use the same stream tag for host and link */ + dma_id = stream_tag - 1; + if (direction == SNDRV_PCM_STREAM_CAPTURE) + dma_id += ipc4_data->num_playback_streams; + + msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_HOST_ID(dma_id); + msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(dma_id); + + /* For BPT/BRA we use 32 bits so SCS is not set */ + + /* CHAIN DMA needs at least 2ms */ + if (set_fifo_size) + msg.extension |= SOF_IPC4_GLB_EXT_CHAIN_DMA_FIFO_SIZE(BPT_FREQUENCY / 1000 * + BPT_CHAIN_DMA_FIFO_MS * + sizeof(u32)); + + if (allocate) + msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_ALLOCATE_MASK; + + if (enable) + msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_ENABLE_MASK; + + return sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0); +} static int hda_sdw_bpt_dma_prepare(struct device *dev, struct hdac_ext_stream **sdw_bpt_stream, struct snd_dma_buffer *dmab_bdl, u32 bpt_num_bytes, @@ -46,6 +114,21 @@ static int hda_sdw_bpt_dma_prepare(struct device *dev, struct hdac_ext_stream ** } *sdw_bpt_stream = bpt_stream; + if (!sdev->dspless_mode_selected) { + struct hdac_stream *hstream; + u32 mask; + + /* decouple host and link DMA if the DSP is used */ + hstream = &bpt_stream->hstream; + mask = BIT(hstream->index); + + snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, mask, mask); + + snd_hdac_ext_stream_reset(bpt_stream); + + snd_hdac_ext_stream_setup(bpt_stream, format); + } + if (hdac_stream(bpt_stream)->direction == SNDRV_PCM_STREAM_PLAYBACK) { struct hdac_bus *bus = sof_to_bus(sdev); struct hdac_ext_link *hlink; @@ -63,6 +146,8 @@ static int hda_sdw_bpt_dma_deprepare(struct device *dev, struct hdac_ext_stream struct snd_dma_buffer *dmab_bdl) { struct snd_sof_dev *sdev = dev_get_drvdata(dev); + struct hdac_stream *hstream; + u32 mask; int ret; ret = hda_cl_cleanup(sdev->dev, dmab_bdl, true, sdw_bpt_stream); @@ -83,6 +168,22 @@ static int hda_sdw_bpt_dma_deprepare(struct device *dev, struct hdac_ext_stream snd_hdac_ext_bus_link_clear_stream_id(hlink, stream_tag); } + if (!sdev->dspless_mode_selected) { + /* Release CHAIN_DMA resources */ + ret = chain_dma_trigger(sdev, hdac_stream(sdw_bpt_stream)->stream_tag, + hdac_stream(sdw_bpt_stream)->direction, + SOF_IPC4_PIPE_RESET); + if (ret < 0) + dev_err(sdev->dev, "%s: chain_dma_trigger PIPE_RESET failed: %d\n", + __func__, ret); + + /* couple host and link DMA */ + hstream = &sdw_bpt_stream->hstream; + mask = BIT(hstream->index); + + snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL, mask, 0); + } + return 0; } @@ -95,6 +196,20 @@ static int hda_sdw_bpt_dma_enable(struct device *dev, struct hdac_ext_stream *sd if (ret < 0) dev_err(sdev->dev, "%s: SDW BPT DMA trigger start failed\n", __func__); + if (!sdev->dspless_mode_selected) { + /* the chain DMA needs to be programmed before the DMAs */ + ret = chain_dma_trigger(sdev, hdac_stream(sdw_bpt_stream)->stream_tag, + hdac_stream(sdw_bpt_stream)->direction, + SOF_IPC4_PIPE_RUNNING); + if (ret < 0) { + dev_err(sdev->dev, "%s: chain_dma_trigger failed: %d\n", + __func__, ret); + hda_cl_trigger(sdev->dev, sdw_bpt_stream, SNDRV_PCM_TRIGGER_STOP); + return ret; + } + snd_hdac_ext_stream_start(sdw_bpt_stream); + } + return ret; } @@ -103,6 +218,17 @@ static int hda_sdw_bpt_dma_disable(struct device *dev, struct hdac_ext_stream *s struct snd_sof_dev *sdev = dev_get_drvdata(dev); int ret; + if (!sdev->dspless_mode_selected) { + snd_hdac_ext_stream_clear(sdw_bpt_stream); + + ret = chain_dma_trigger(sdev, hdac_stream(sdw_bpt_stream)->stream_tag, + hdac_stream(sdw_bpt_stream)->direction, + SOF_IPC4_PIPE_PAUSED); + if (ret < 0) + dev_err(sdev->dev, "%s: chain_dma_trigger PIPE_PAUSED failed: %d\n", + __func__, ret); + } + ret = hda_cl_trigger(sdev->dev, sdw_bpt_stream, SNDRV_PCM_TRIGGER_STOP); if (ret < 0) dev_err(sdev->dev, "%s: SDW BPT DMA trigger stop failed\n", __func__); From bb5cb09eedce756eaeb66c69b6dac0f16e464e24 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:14 +0800 Subject: [PATCH 0651/2157] soundwire: debugfs: add interface for BPT/BRA transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add code to show what codec drivers will need to do to enable BPT/BRA transfers. The only difference is to set the 'command_type' file to '1'. A zero-value will rely on regular read/write commands in Column0. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-16-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/debugfs.c | 88 +++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 18 deletions(-) diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index 5bf0d9552433..3099ea074f10 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -136,9 +136,10 @@ static int sdw_slave_reg_show(struct seq_file *s_file, void *data) } DEFINE_SHOW_ATTRIBUTE(sdw_slave_reg); -#define MAX_CMD_BYTES 256 +#define MAX_CMD_BYTES (1024 * 1024) static int cmd; +static int cmd_type; static u32 start_addr; static size_t num_bytes; static u8 read_buffer[MAX_CMD_BYTES]; @@ -162,6 +163,25 @@ static int set_command(void *data, u64 value) DEFINE_DEBUGFS_ATTRIBUTE(set_command_fops, NULL, set_command, "%llu\n"); +static int set_command_type(void *data, u64 value) +{ + struct sdw_slave *slave = data; + + if (value > 1) + return -EINVAL; + + /* Userspace changed the hardware state behind the kernel's back */ + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + + dev_dbg(&slave->dev, "command type: %s\n", value ? "BRA" : "Column0"); + + cmd_type = (int)value; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(set_command_type_fops, NULL, + set_command_type, "%llu\n"); + static int set_start_address(void *data, u64 value) { struct sdw_slave *slave = data; @@ -197,9 +217,28 @@ static int set_num_bytes(void *data, u64 value) DEFINE_DEBUGFS_ATTRIBUTE(set_num_bytes_fops, NULL, set_num_bytes, "%llu\n"); +static int do_bpt_sequence(struct sdw_slave *slave, bool write, u8 *buffer) +{ + struct sdw_bpt_msg msg = {0}; + + msg.addr = start_addr; + msg.len = num_bytes; + msg.dev_num = slave->dev_num; + if (write) + msg.flags = SDW_MSG_FLAG_WRITE; + else + msg.flags = SDW_MSG_FLAG_READ; + msg.buf = buffer; + + return sdw_bpt_send_sync(slave->bus, slave, &msg); +} + static int cmd_go(void *data, u64 value) { + const struct firmware *fw = NULL; struct sdw_slave *slave = data; + ktime_t start_t; + ktime_t finish_t; int ret; if (value != 1) @@ -216,40 +255,52 @@ static int cmd_go(void *data, u64 value) return ret; } - /* Userspace changed the hardware state behind the kernel's back */ - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - - dev_dbg(&slave->dev, "starting command\n"); - if (cmd == 0) { - const struct firmware *fw; - ret = request_firmware(&fw, firmware_file, &slave->dev); if (ret < 0) { dev_err(&slave->dev, "firmware %s not found\n", firmware_file); goto out; } - - if (fw->size != num_bytes) { + if (fw->size < num_bytes) { dev_err(&slave->dev, - "firmware %s: unexpected size %zd, desired %zd\n", + "firmware %s: firmware size %zd, desired %zd\n", firmware_file, fw->size, num_bytes); - release_firmware(fw); goto out; } - - ret = sdw_nwrite_no_pm(slave, start_addr, num_bytes, fw->data); - release_firmware(fw); - } else { - ret = sdw_nread_no_pm(slave, start_addr, num_bytes, read_buffer); } - dev_dbg(&slave->dev, "command completed %d\n", ret); + /* Userspace changed the hardware state behind the kernel's back */ + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + + dev_dbg(&slave->dev, "starting command\n"); + start_t = ktime_get(); + + if (cmd == 0) { + if (cmd_type) + ret = do_bpt_sequence(slave, true, (u8 *)fw->data); + else + ret = sdw_nwrite_no_pm(slave, start_addr, num_bytes, fw->data); + } else { + memset(read_buffer, 0, sizeof(read_buffer)); + + if (cmd_type) + ret = do_bpt_sequence(slave, false, read_buffer); + else + ret = sdw_nread_no_pm(slave, start_addr, num_bytes, read_buffer); + } + + finish_t = ktime_get(); out: + if (fw) + release_firmware(fw); + pm_runtime_mark_last_busy(&slave->dev); pm_runtime_put(&slave->dev); + dev_dbg(&slave->dev, "command completed, num_byte %zu status %d, time %lld ms\n", + num_bytes, ret, div_u64(finish_t - start_t, NSEC_PER_MSEC)); + return ret; } DEFINE_DEBUGFS_ATTRIBUTE(cmd_go_fops, NULL, @@ -291,6 +342,7 @@ void sdw_slave_debugfs_init(struct sdw_slave *slave) /* interface to send arbitrary commands */ debugfs_create_file("command", 0200, d, slave, &set_command_fops); + debugfs_create_file("command_type", 0200, d, slave, &set_command_type_fops); debugfs_create_file("start_address", 0200, d, slave, &set_start_address_fops); debugfs_create_file("num_bytes", 0200, d, slave, &set_num_bytes_fops); debugfs_create_file("go", 0200, d, slave, &cmd_go_fops); From 9452422fc321f105a38435bc72afe5fd2c51882b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 27 Feb 2025 22:06:15 +0800 Subject: [PATCH 0652/2157] ASoC: rt711-sdca: add DP0 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP0 is required for BPT/BRA transport. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Acked-by: Mark Brown Tested-by: shumingf@realtek.com Link: https://lore.kernel.org/r/20250227140615.8147-17-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- sound/soc/codecs/rt711-sdca-sdw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index f5933d2e085e..e87e2e1bfff7 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -225,6 +225,14 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) j++; } + prop->dp0_prop = devm_kzalloc(&slave->dev, sizeof(*prop->dp0_prop), + GFP_KERNEL); + if (!prop->dp0_prop) + return -ENOMEM; + + prop->dp0_prop->simple_ch_prep_sm = true; + prop->dp0_prop->ch_prep_timeout = 10; + /* set the timeout values */ prop->clk_stop_timeout = 700; From 4a8463ae8d871ccd491d48a371a6789eb7378243 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Thu, 6 Mar 2025 20:29:23 +0100 Subject: [PATCH 0653/2157] phy: phy-rockchip-samsung-hdptx: Add support for RK3576 Despite the compatible already being listed in the bindings, the PHY driver never gained explicit support for it. This is especially a problem because the explicitly listed PHY addresses need to be specified for each SoC. To solve this, add the compatible, and a PHY config, with the address gleaned from rk3576.dtsi. Signed-off-by: Nicolas Frattaroli Reviewed-by: Cristian Ciocaltea Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250306-rk3576-hdptx-phy-v1-1-288cc4b0611a@collabora.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index f88369864c50..fe7c05748356 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -2017,6 +2017,13 @@ static const struct dev_pm_ops rk_hdptx_phy_pm_ops = { rk_hdptx_phy_runtime_resume, NULL) }; +static const struct rk_hdptx_phy_cfg rk3576_hdptx_phy_cfgs = { + .num_phys = 1, + .phy_ids = { + 0x2b000000, + }, +}; + static const struct rk_hdptx_phy_cfg rk3588_hdptx_phy_cfgs = { .num_phys = 2, .phy_ids = { @@ -2026,6 +2033,10 @@ static const struct rk_hdptx_phy_cfg rk3588_hdptx_phy_cfgs = { }; static const struct of_device_id rk_hdptx_phy_of_match[] = { + { + .compatible = "rockchip,rk3576-hdptx-phy", + .data = &rk3576_hdptx_phy_cfgs + }, { .compatible = "rockchip,rk3588-hdptx-phy", .data = &rk3588_hdptx_phy_cfgs From 1f7af7f3c353ae3b384a66a82c5074ac28901160 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 7 Mar 2025 00:57:04 +0000 Subject: [PATCH 0654/2157] dt-bindings: phy: document Allwinner A523 USB-2.0 PHY The Allwinner A523 SoC contains a USB-2.0 PHY fully compatible to the one used in the D1/T113s SoCs. This PHY controls the two USB-2.0 ports, there is a separate and quite different PHY for the USB-3.0 port. Add the new compatible string, with a fallback to the D1 version. Signed-off-by: Andre Przywara Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250307005712.16828-8-andre.przywara@arm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml index 21209126ed00..580c3296a18d 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml @@ -20,7 +20,9 @@ properties: - allwinner,sun20i-d1-usb-phy - allwinner,sun50i-a64-usb-phy - items: - - const: allwinner,sun50i-a100-usb-phy + - enum: + - allwinner,sun50i-a100-usb-phy + - allwinner,sun55i-a523-usb-phy - const: allwinner,sun20i-d1-usb-phy reg: From b02d41d884f64d22d5d5a2a4b35550f5e80bdb3d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 6 Mar 2025 01:54:08 +0000 Subject: [PATCH 0655/2157] phy: core: Remove unused phy_pm_runtime_(allow|forbid) phy_pm_runtime_allow() and phy_pm_runtime_forbid() were added in 2013 as part of commit ff764963479a ("drivers: phy: add generic PHY framework") but have remained unused. Remove them. Fix up the (English) docs - I've left the Chinese translation. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250306015408.277729-1-linux@treblig.org Signed-off-by: Vinod Koul --- Documentation/driver-api/phy/phy.rst | 3 +-- drivers/phy/phy-core.c | 24 ------------------------ include/linux/phy/phy.h | 12 ------------ 3 files changed, 1 insertion(+), 38 deletions(-) diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst index 81785c084f3e..719a2b3fd2ab 100644 --- a/Documentation/driver-api/phy/phy.rst +++ b/Documentation/driver-api/phy/phy.rst @@ -198,8 +198,7 @@ pm_runtime_get_sync of PHY provider device because of parent-child relationship. It should also be noted that phy_power_on and phy_power_off performs phy_pm_runtime_get_sync and phy_pm_runtime_put respectively. There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync, -phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and -phy_pm_runtime_forbid for performing PM operations. +phy_pm_runtime_put and phy_pm_runtime_put_sync for performing PM operations. PHY Mappings ============ diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 067316dfcd83..8e2daea81666 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -214,30 +214,6 @@ int phy_pm_runtime_put_sync(struct phy *phy) } EXPORT_SYMBOL_GPL(phy_pm_runtime_put_sync); -void phy_pm_runtime_allow(struct phy *phy) -{ - if (!phy) - return; - - if (!pm_runtime_enabled(&phy->dev)) - return; - - pm_runtime_allow(&phy->dev); -} -EXPORT_SYMBOL_GPL(phy_pm_runtime_allow); - -void phy_pm_runtime_forbid(struct phy *phy) -{ - if (!phy) - return; - - if (!pm_runtime_enabled(&phy->dev)) - return; - - pm_runtime_forbid(&phy->dev); -} -EXPORT_SYMBOL_GPL(phy_pm_runtime_forbid); - /** * phy_init - phy internal initialization before phy operation * @phy: the phy returned by phy_get() diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 03cd5bae92d3..e63e6e70e860 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -227,8 +227,6 @@ int phy_pm_runtime_get(struct phy *phy); int phy_pm_runtime_get_sync(struct phy *phy); int phy_pm_runtime_put(struct phy *phy); int phy_pm_runtime_put_sync(struct phy *phy); -void phy_pm_runtime_allow(struct phy *phy); -void phy_pm_runtime_forbid(struct phy *phy); int phy_init(struct phy *phy); int phy_exit(struct phy *phy); int phy_power_on(struct phy *phy); @@ -321,16 +319,6 @@ static inline int phy_pm_runtime_put_sync(struct phy *phy) return -ENOSYS; } -static inline void phy_pm_runtime_allow(struct phy *phy) -{ - return; -} - -static inline void phy_pm_runtime_forbid(struct phy *phy) -{ - return; -} - static inline int phy_init(struct phy *phy) { if (!phy) From b5198201932635e19068cc2e83a99adf38f32c43 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 6 Mar 2025 16:05:43 +0900 Subject: [PATCH 0656/2157] counter: Introduce the compare component Compare registers are used in devices to compare a counter channel against a particular count value (e.g. to check if a threshold has been reached). A macro COUNTER_COMP_COMPARE() is introduced to facilitate the creation of compare components as Count extensions. Link: https://lore.kernel.org/r/20250306-introduce-compare-component-v1-1-93993b3dca9c@kernel.org Signed-off-by: William Breathitt Gray --- Documentation/ABI/testing/sysfs-bus-counter | 9 +++++++++ include/linux/counter.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter index 73ac84c0bca7..3e8259e56d38 100644 --- a/Documentation/ABI/testing/sysfs-bus-counter +++ b/Documentation/ABI/testing/sysfs-bus-counter @@ -34,6 +34,14 @@ Contact: linux-iio@vger.kernel.org Description: Count data of Count Y represented as a string. +What: /sys/bus/counter/devices/counterX/countY/compare +KernelVersion: 6.15 +Contact: linux-iio@vger.kernel.org +Description: + If the counter device supports compare registers -- registers + used to compare counter channels against a particular count -- + the compare count for channel Y is provided by this attribute. + What: /sys/bus/counter/devices/counterX/countY/capture KernelVersion: 6.1 Contact: linux-iio@vger.kernel.org @@ -301,6 +309,7 @@ Description: What: /sys/bus/counter/devices/counterX/cascade_counts_enable_component_id What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_component_id +What: /sys/bus/counter/devices/counterX/countY/compare_component_id What: /sys/bus/counter/devices/counterX/countY/capture_component_id What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id What: /sys/bus/counter/devices/counterX/countY/floor_component_id diff --git a/include/linux/counter.h b/include/linux/counter.h index 426b7d58a438..f208e867dd0f 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -580,6 +580,9 @@ struct counter_array { #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) +#define COUNTER_COMP_COMPARE(_read, _write) \ + COUNTER_COMP_COUNT_U64("compare", _read, _write) + #define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \ { \ .type = COUNTER_COMP_COUNT_MODE, \ From ba27a0247b7187af36cb0b1fe7f7a68067ccb555 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 6 Mar 2025 16:05:44 +0900 Subject: [PATCH 0657/2157] counter: microchip-tcb-capture: Add support for RC Compare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Capture mode, the RC register serves as a compare register for the Timer Counter Channel. When a the Counter Value reaches the RC value, a RC Compare event occurs (COUNTER_EVENT_THRESHOLD). This patch exposes the RC register to userspace as the 'compare' Count extension, thus allowing users to configure the threshold condition for these events. Acked-by: Bence Csókás Link: https://lore.kernel.org/r/20250306-introduce-compare-component-v1-2-93993b3dca9c@kernel.org Signed-off-by: William Breathitt Gray --- drivers/counter/microchip-tcb-capture.c | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index aeaee6e0245e..00a463b044b5 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -302,11 +302,39 @@ static int mchp_tc_count_cap_write(struct counter_device *counter, return ret; } +static int mchp_tc_count_compare_read(struct counter_device *counter, struct counter_count *count, + u64 *val) +{ + struct mchp_tc_data *const priv = counter_priv(counter); + u32 cnt; + int ret; + + ret = regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], RC), &cnt); + if (ret < 0) + return ret; + + *val = cnt; + + return 0; +} + +static int mchp_tc_count_compare_write(struct counter_device *counter, struct counter_count *count, + u64 val) +{ + struct mchp_tc_data *const priv = counter_priv(counter); + + if (val > U32_MAX) + return -ERANGE; + + return regmap_write(priv->regmap, ATMEL_TC_REG(priv->channel[0], RC), val); +} + static DEFINE_COUNTER_ARRAY_CAPTURE(mchp_tc_cnt_cap_array, 2); static struct counter_comp mchp_tc_count_ext[] = { COUNTER_COMP_ARRAY_CAPTURE(mchp_tc_count_cap_read, mchp_tc_count_cap_write, mchp_tc_cnt_cap_array), + COUNTER_COMP_COMPARE(mchp_tc_count_compare_read, mchp_tc_count_compare_write), }; static struct counter_count mchp_tc_counts[] = { From ab37128ad5ed872e10048fd83f1dd59a9067fc68 Mon Sep 17 00:00:00 2001 From: Jie Gan Date: Mon, 10 Mar 2025 18:27:24 +0800 Subject: [PATCH 0658/2157] coresight: add verification process for coresight_etm_get_trace_id The coresight_etm_get_trace_id function is a global function. The verification process for 'csdev' is required prior to its usage. Fixes: c367a89dec26 ("Coresight: Add trace_id function to retrieving the trace ID") Signed-off-by: Jie Gan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250310102724.2112905-1-quic_jiegan@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index bd0a7edd38c9..c915a055534e 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1616,9 +1616,12 @@ EXPORT_SYMBOL_GPL(coresight_remove_driver); int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, struct coresight_device *sink) { - int trace_id; - int cpu = source_ops(csdev)->cpu_id(csdev); + int cpu, trace_id; + if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE || !source_ops(csdev)->cpu_id) + return -EINVAL; + + cpu = source_ops(csdev)->cpu_id(csdev); switch (mode) { case CS_MODE_SYSFS: trace_id = coresight_trace_id_get_cpu_id(cpu); From 26f060c106f630e34876c096c1c8997a9e68c371 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:02 +0000 Subject: [PATCH 0659/2157] coresight: change coresight_device lock type to raw_spinlock_t coresight_device->cscfg_csdev_lock can be held during __schedule() by perf_event_task_sched_out()/in(). Since coresight->cscfg_csdev_lock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type of coresight_device->cscfg_csdev_lock from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-2-yeoreum.yun@arm.com --- .../hwtracing/coresight/coresight-syscfg.c | 26 +++++++++---------- include/linux/coresight.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index 11138a9762b0..a70c1454b410 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -89,9 +89,9 @@ static int cscfg_add_csdev_cfg(struct coresight_device *csdev, } /* if matched features, add config to device.*/ if (config_csdev) { - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); list_add(&config_csdev->node, &csdev->config_csdev_list); - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); } return 0; @@ -194,9 +194,9 @@ static int cscfg_load_feat_csdev(struct coresight_device *csdev, /* add to internal csdev feature list & initialise using reset call */ cscfg_reset_feat(feat_csdev); - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); list_add(&feat_csdev->node, &csdev->feature_csdev_list); - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); return 0; } @@ -765,7 +765,7 @@ static int cscfg_list_add_csdev(struct coresight_device *csdev, INIT_LIST_HEAD(&csdev->feature_csdev_list); INIT_LIST_HEAD(&csdev->config_csdev_list); - spin_lock_init(&csdev->cscfg_csdev_lock); + raw_spin_lock_init(&csdev->cscfg_csdev_lock); return 0; } @@ -855,7 +855,7 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev) struct cscfg_feature_csdev *feat_csdev; unsigned long flags; - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); if (list_empty(&csdev->feature_csdev_list)) goto unlock_exit; @@ -863,7 +863,7 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev) cscfg_reset_feat(feat_csdev); unlock_exit: - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); } EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats); @@ -1059,7 +1059,7 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, * Look for matching configuration - set the active configuration * context if found. */ - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) { config_desc = config_csdev_item->config_desc; if ((atomic_read(&config_desc->active_cnt)) && @@ -1069,7 +1069,7 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, break; } } - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); /* * If found, attempt to enable @@ -1090,12 +1090,12 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, * * Set enabled if OK, err if not. */ - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); if (csdev->active_cscfg_ctxt) config_csdev_active->enabled = true; else err = -EBUSY; - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); } } return err; @@ -1124,7 +1124,7 @@ void cscfg_csdev_disable_active_config(struct coresight_device *csdev) * If it was not enabled, we have no work to do, otherwise mark as disabled. * Clear the active config pointer. */ - spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); + raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); config_csdev = (struct cscfg_config_csdev *)csdev->active_cscfg_ctxt; if (config_csdev) { if (!config_csdev->enabled) @@ -1133,7 +1133,7 @@ void cscfg_csdev_disable_active_config(struct coresight_device *csdev) config_csdev->enabled = false; } csdev->active_cscfg_ctxt = NULL; - spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); + raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); /* true if there was an enabled active config */ if (config_csdev) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 89b781e70fe7..4541bfc1cc6b 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -302,7 +302,7 @@ struct coresight_device { /* system configuration and feature lists */ struct list_head feature_csdev_list; struct list_head config_csdev_list; - spinlock_t cscfg_csdev_lock; + raw_spinlock_t cscfg_csdev_lock; void *active_cscfg_ctxt; }; From 743c5a97c64d9a2fd7feb3cf9fe3651fba0359db Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:03 +0000 Subject: [PATCH 0660/2157] coresight-etm4x: change etmv4_drvdata spinlock type to raw_spinlock_t In coresight-etm4x drivers, etmv4_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since etmv4_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type etmv4_drvdata->spinlock in coresight-etm4x drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Signed-off-by: Yeoreum Yun Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-3-yeoreum.yun@arm.com --- .../hwtracing/coresight/coresight-config.c | 8 +- .../hwtracing/coresight/coresight-config.h | 2 +- .../coresight/coresight-etm4x-core.c | 18 +- .../coresight/coresight-etm4x-sysfs.c | 250 +++++++++--------- drivers/hwtracing/coresight/coresight-etm4x.h | 2 +- 5 files changed, 140 insertions(+), 140 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-config.c b/drivers/hwtracing/coresight/coresight-config.c index 4723bf7402a2..4f72ae71b696 100644 --- a/drivers/hwtracing/coresight/coresight-config.c +++ b/drivers/hwtracing/coresight/coresight-config.c @@ -76,10 +76,10 @@ static int cscfg_set_on_enable(struct cscfg_feature_csdev *feat_csdev) unsigned long flags; int i; - spin_lock_irqsave(feat_csdev->drv_spinlock, flags); + raw_spin_lock_irqsave(feat_csdev->drv_spinlock, flags); for (i = 0; i < feat_csdev->nr_regs; i++) cscfg_set_reg(&feat_csdev->regs_csdev[i]); - spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags); + raw_spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags); dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s", feat_csdev->feat_desc->name, "set on enable"); return 0; @@ -91,10 +91,10 @@ static void cscfg_save_on_disable(struct cscfg_feature_csdev *feat_csdev) unsigned long flags; int i; - spin_lock_irqsave(feat_csdev->drv_spinlock, flags); + raw_spin_lock_irqsave(feat_csdev->drv_spinlock, flags); for (i = 0; i < feat_csdev->nr_regs; i++) cscfg_save_reg(&feat_csdev->regs_csdev[i]); - spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags); + raw_spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags); dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s", feat_csdev->feat_desc->name, "save on disable"); } diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h index 6ba013975741..b9ebc9fcfb7f 100644 --- a/drivers/hwtracing/coresight/coresight-config.h +++ b/drivers/hwtracing/coresight/coresight-config.h @@ -206,7 +206,7 @@ struct cscfg_feature_csdev { const struct cscfg_feature_desc *feat_desc; struct coresight_device *csdev; struct list_head node; - spinlock_t *drv_spinlock; + raw_spinlock_t *drv_spinlock; int nr_params; struct cscfg_parameter_csdev *params_csdev; int nr_regs; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index bb1e80df2914..e5972f16abff 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -830,7 +830,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa return ret; } - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); drvdata->trcid = path->trace_id; @@ -849,7 +849,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa if (ret) etm4_release_trace_id(drvdata); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); if (!ret) dev_dbg(&csdev->dev, "ETM tracing enabled\n"); @@ -1011,7 +1011,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) * DYING hotplug callback is serviced by the ETM driver. */ cpus_read_lock(); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* * Executing etm4_disable_hw on the cpu whose ETM is being disabled @@ -1019,7 +1019,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) */ smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); cpus_read_unlock(); /* @@ -1698,13 +1698,13 @@ static int etm4_starting_cpu(unsigned int cpu) if (!etmdrvdata[cpu]) return 0; - spin_lock(&etmdrvdata[cpu]->spinlock); + raw_spin_lock(&etmdrvdata[cpu]->spinlock); if (!etmdrvdata[cpu]->os_unlock) etm4_os_unlock(etmdrvdata[cpu]); if (coresight_get_mode(etmdrvdata[cpu]->csdev)) etm4_enable_hw(etmdrvdata[cpu]); - spin_unlock(&etmdrvdata[cpu]->spinlock); + raw_spin_unlock(&etmdrvdata[cpu]->spinlock); return 0; } @@ -1713,10 +1713,10 @@ static int etm4_dying_cpu(unsigned int cpu) if (!etmdrvdata[cpu]) return 0; - spin_lock(&etmdrvdata[cpu]->spinlock); + raw_spin_lock(&etmdrvdata[cpu]->spinlock); if (coresight_get_mode(etmdrvdata[cpu]->csdev)) etm4_disable_hw(etmdrvdata[cpu]); - spin_unlock(&etmdrvdata[cpu]->spinlock); + raw_spin_unlock(&etmdrvdata[cpu]->spinlock); return 0; } @@ -2160,7 +2160,7 @@ static int etm4_probe(struct device *dev) return -ENOMEM; } - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); drvdata->cpu = coresight_get_cpu(dev); if (drvdata->cpu < 0) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index e5216c0f60da..fdd0956fecb3 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -175,7 +175,7 @@ static ssize_t reset_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if (val) config->mode = 0x0; @@ -267,7 +267,7 @@ static ssize_t reset_store(struct device *dev, config->vmid_mask0 = 0x0; config->vmid_mask1 = 0x0; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); /* for sysfs - only release trace id when resetting */ etm4_release_trace_id(drvdata); @@ -301,7 +301,7 @@ static ssize_t mode_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->mode = val & ETMv4_MODE_ALL; if (drvdata->instrp0 == true) { @@ -438,7 +438,7 @@ static ssize_t mode_store(struct device *dev, if (config->mode & (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER)) etm4_config_trace_mode(config); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } @@ -467,14 +467,14 @@ static ssize_t pe_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if (val > drvdata->nr_pe) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EINVAL; } config->pe_sel = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(pe); @@ -502,7 +502,7 @@ static ssize_t event_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); switch (drvdata->nr_event) { case 0x0: /* EVENT0, bits[7:0] */ @@ -523,7 +523,7 @@ static ssize_t event_store(struct device *dev, default: break; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(event); @@ -551,7 +551,7 @@ static ssize_t event_instren_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* start by clearing all instruction event enable bits */ config->eventctrl1 &= ~TRCEVENTCTL1R_INSTEN_MASK; switch (drvdata->nr_event) { @@ -579,7 +579,7 @@ static ssize_t event_instren_store(struct device *dev, default: break; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(event_instren); @@ -740,11 +740,11 @@ static ssize_t event_vinst_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val &= TRCVICTLR_EVENT_MASK >> __bf_shf(TRCVICTLR_EVENT_MASK); config->vinst_ctrl &= ~TRCVICTLR_EVENT_MASK; config->vinst_ctrl |= FIELD_PREP(TRCVICTLR_EVENT_MASK, val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(event_vinst); @@ -772,13 +772,13 @@ static ssize_t s_exlevel_vinst_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* clear all EXLEVEL_S bits */ config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_S_MASK; /* enable instruction tracing for corresponding exception level */ val &= drvdata->s_ex_level; config->vinst_ctrl |= val << __bf_shf(TRCVICTLR_EXLEVEL_S_MASK); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(s_exlevel_vinst); @@ -807,13 +807,13 @@ static ssize_t ns_exlevel_vinst_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* clear EXLEVEL_NS bits */ config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_NS_MASK; /* enable instruction tracing for corresponding exception level */ val &= drvdata->ns_ex_level; config->vinst_ctrl |= val << __bf_shf(TRCVICTLR_EXLEVEL_NS_MASK); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(ns_exlevel_vinst); @@ -847,9 +847,9 @@ static ssize_t addr_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->addr_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_idx); @@ -863,7 +863,7 @@ static ssize_t addr_instdatatype_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; val = FIELD_GET(TRCACATRn_TYPE_MASK, config->addr_acc[idx]); len = scnprintf(buf, PAGE_SIZE, "%s\n", @@ -871,7 +871,7 @@ static ssize_t addr_instdatatype_show(struct device *dev, (val == TRCACATRn_TYPE_DATA_LOAD_ADDR ? "data_load" : (val == TRCACATRn_TYPE_DATA_STORE_ADDR ? "data_store" : "data_load_store"))); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return len; } @@ -889,13 +889,13 @@ static ssize_t addr_instdatatype_store(struct device *dev, if (sscanf(buf, "%s", str) != 1) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!strcmp(str, "instr")) /* TYPE, bits[1:0] */ config->addr_acc[idx] &= ~TRCACATRn_TYPE_MASK; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_instdatatype); @@ -910,14 +910,14 @@ static ssize_t addr_single_show(struct device *dev, struct etmv4_config *config = &drvdata->config; idx = config->addr_idx; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } val = (unsigned long)config->addr_val[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -933,17 +933,17 @@ static ssize_t addr_single_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } config->addr_val[idx] = (u64)val; config->addr_type[idx] = ETM_ADDR_TYPE_SINGLE; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_single); @@ -957,23 +957,23 @@ static ssize_t addr_range_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (idx % 2 != 0) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE && config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE && config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } val1 = (unsigned long)config->addr_val[idx]; val2 = (unsigned long)config->addr_val[idx + 1]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); } @@ -996,10 +996,10 @@ static ssize_t addr_range_store(struct device *dev, if (val1 > val2) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (idx % 2 != 0) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } @@ -1007,7 +1007,7 @@ static ssize_t addr_range_store(struct device *dev, config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE && config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } @@ -1024,7 +1024,7 @@ static ssize_t addr_range_store(struct device *dev, exclude = config->mode & ETM_MODE_EXCLUDE; etm4_set_mode_exclude(drvdata, exclude ? true : false); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_range); @@ -1038,17 +1038,17 @@ static ssize_t addr_start_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_START)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } val = (unsigned long)config->addr_val[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1064,22 +1064,22 @@ static ssize_t addr_start_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!drvdata->nr_addr_cmp) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EINVAL; } if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_START)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } config->addr_val[idx] = (u64)val; config->addr_type[idx] = ETM_ADDR_TYPE_START; config->vissctlr |= BIT(idx); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_start); @@ -1093,17 +1093,17 @@ static ssize_t addr_stop_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } val = (unsigned long)config->addr_val[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1119,22 +1119,22 @@ static ssize_t addr_stop_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!drvdata->nr_addr_cmp) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EINVAL; } if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE || config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return -EPERM; } config->addr_val[idx] = (u64)val; config->addr_type[idx] = ETM_ADDR_TYPE_STOP; config->vissctlr |= BIT(idx + 16); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_stop); @@ -1148,14 +1148,14 @@ static ssize_t addr_ctxtype_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; /* CONTEXTTYPE, bits[3:2] */ val = FIELD_GET(TRCACATRn_CONTEXTTYPE_MASK, config->addr_acc[idx]); len = scnprintf(buf, PAGE_SIZE, "%s\n", val == ETM_CTX_NONE ? "none" : (val == ETM_CTX_CTXID ? "ctxid" : (val == ETM_CTX_VMID ? "vmid" : "all"))); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return len; } @@ -1173,7 +1173,7 @@ static ssize_t addr_ctxtype_store(struct device *dev, if (sscanf(buf, "%s", str) != 1) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; if (!strcmp(str, "none")) /* start by clearing context type bits */ @@ -1200,7 +1200,7 @@ static ssize_t addr_ctxtype_store(struct device *dev, if (drvdata->numvmidc) config->addr_acc[idx] |= TRCACATRn_CONTEXTTYPE_VMID; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_ctxtype); @@ -1214,11 +1214,11 @@ static ssize_t addr_context_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; /* context ID comparator bits[6:4] */ val = FIELD_GET(TRCACATRn_CONTEXT_MASK, config->addr_acc[idx]); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1239,12 +1239,12 @@ static ssize_t addr_context_store(struct device *dev, drvdata->numcidc : drvdata->numvmidc)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; /* clear context ID comparator bits[6:4] */ config->addr_acc[idx] &= ~TRCACATRn_CONTEXT_MASK; config->addr_acc[idx] |= val << __bf_shf(TRCACATRn_CONTEXT_MASK); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_context); @@ -1258,10 +1258,10 @@ static ssize_t addr_exlevel_s_ns_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; val = FIELD_GET(TRCACATRn_EXLEVEL_MASK, config->addr_acc[idx]); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1280,12 +1280,12 @@ static ssize_t addr_exlevel_s_ns_store(struct device *dev, if (val & ~(TRCACATRn_EXLEVEL_MASK >> __bf_shf(TRCACATRn_EXLEVEL_MASK))) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; /* clear Exlevel_ns & Exlevel_s bits[14:12, 11:8], bit[15] is res0 */ config->addr_acc[idx] &= ~TRCACATRn_EXLEVEL_MASK; config->addr_acc[idx] |= val << __bf_shf(TRCACATRn_EXLEVEL_MASK); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(addr_exlevel_s_ns); @@ -1308,7 +1308,7 @@ static ssize_t addr_cmp_view_show(struct device *dev, int size = 0; bool exclude = false; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->addr_idx; addr_v = config->addr_val[idx]; addr_ctrl = config->addr_acc[idx]; @@ -1323,7 +1323,7 @@ static ssize_t addr_cmp_view_show(struct device *dev, } exclude = config->viiectlr & BIT(idx / 2 + 16); } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); if (addr_type) { size = scnprintf(buf, PAGE_SIZE, "addr_cmp[%i] %s %#lx", idx, addr_type_names[addr_type], addr_v); @@ -1367,9 +1367,9 @@ static ssize_t vinst_pe_cmp_start_stop_store(struct device *dev, if (!drvdata->nr_pe_cmp) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->vipcssctlr = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(vinst_pe_cmp_start_stop); @@ -1403,9 +1403,9 @@ static ssize_t seq_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->seq_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(seq_idx); @@ -1449,10 +1449,10 @@ static ssize_t seq_event_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->seq_idx; val = config->seq_ctrl[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1468,11 +1468,11 @@ static ssize_t seq_event_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->seq_idx; /* Seq control has two masks B[15:8] F[7:0] */ config->seq_ctrl[idx] = val & 0xFFFF; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(seq_event); @@ -1536,9 +1536,9 @@ static ssize_t cntr_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->cntr_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(cntr_idx); @@ -1552,10 +1552,10 @@ static ssize_t cntrldvr_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; val = config->cntrldvr[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1573,10 +1573,10 @@ static ssize_t cntrldvr_store(struct device *dev, if (val > ETM_CNTR_MAX_VAL) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; config->cntrldvr[idx] = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(cntrldvr); @@ -1590,10 +1590,10 @@ static ssize_t cntr_val_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; val = config->cntr_val[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1611,10 +1611,10 @@ static ssize_t cntr_val_store(struct device *dev, if (val > ETM_CNTR_MAX_VAL) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; config->cntr_val[idx] = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(cntr_val); @@ -1628,10 +1628,10 @@ static ssize_t cntr_ctrl_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; val = config->cntr_ctrl[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1647,10 +1647,10 @@ static ssize_t cntr_ctrl_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->cntr_idx; config->cntr_ctrl[idx] = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(cntr_ctrl); @@ -1688,9 +1688,9 @@ static ssize_t res_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->res_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(res_idx); @@ -1704,10 +1704,10 @@ static ssize_t res_ctrl_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->res_idx; val = config->res_ctrl[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1723,7 +1723,7 @@ static ssize_t res_ctrl_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->res_idx; /* For odd idx pair inversal bit is RES0 */ if (idx % 2 != 0) @@ -1733,7 +1733,7 @@ static ssize_t res_ctrl_store(struct device *dev, TRCRSCTLRn_INV | TRCRSCTLRn_GROUP_MASK | TRCRSCTLRn_SELECT_MASK); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(res_ctrl); @@ -1762,9 +1762,9 @@ static ssize_t sshot_idx_store(struct device *dev, if (val >= drvdata->nr_ss_cmp) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->ss_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(sshot_idx); @@ -1777,9 +1777,9 @@ static ssize_t sshot_ctrl_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = config->ss_ctrl[config->ss_idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1795,12 +1795,12 @@ static ssize_t sshot_ctrl_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->ss_idx; config->ss_ctrl[idx] = FIELD_PREP(TRCSSCCRn_SAC_ARC_RST_MASK, val); /* must clear bit 31 in related status register on programming */ config->ss_status[idx] &= ~TRCSSCSRn_STATUS; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(sshot_ctrl); @@ -1812,9 +1812,9 @@ static ssize_t sshot_status_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = config->ss_status[config->ss_idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } static DEVICE_ATTR_RO(sshot_status); @@ -1827,9 +1827,9 @@ static ssize_t sshot_pe_ctrl_show(struct device *dev, struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); struct etmv4_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = config->ss_pe_cmp[config->ss_idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1845,12 +1845,12 @@ static ssize_t sshot_pe_ctrl_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->ss_idx; config->ss_pe_cmp[idx] = FIELD_PREP(TRCSSPCICRn_PC_MASK, val); /* must clear bit 31 in related status register on programming */ config->ss_status[idx] &= ~TRCSSCSRn_STATUS; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(sshot_pe_ctrl); @@ -1884,9 +1884,9 @@ static ssize_t ctxid_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->ctxid_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(ctxid_idx); @@ -1907,10 +1907,10 @@ static ssize_t ctxid_pid_show(struct device *dev, if (task_active_pid_ns(current) != &init_pid_ns) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->ctxid_idx; val = (unsigned long)config->ctxid_pid[idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -1945,10 +1945,10 @@ static ssize_t ctxid_pid_store(struct device *dev, if (kstrtoul(buf, 16, &pid)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); idx = config->ctxid_idx; config->ctxid_pid[idx] = (u64)pid; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(ctxid_pid); @@ -1968,10 +1968,10 @@ static ssize_t ctxid_masks_show(struct device *dev, if (task_active_pid_ns(current) != &init_pid_ns) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val1 = config->ctxid_mask0; val2 = config->ctxid_mask1; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); } @@ -2004,7 +2004,7 @@ static ssize_t ctxid_masks_store(struct device *dev, if ((drvdata->numcidc > 4) && (nr_inputs != 2)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* * each byte[0..3] controls mask value applied to ctxid * comparator[0..3] @@ -2076,7 +2076,7 @@ static ssize_t ctxid_masks_store(struct device *dev, mask >>= 0x8; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(ctxid_masks); @@ -2110,9 +2110,9 @@ static ssize_t vmid_idx_store(struct device *dev, * Use spinlock to ensure index doesn't change while it gets * dereferenced multiple times within a spinlock block elsewhere. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->vmid_idx = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(vmid_idx); @@ -2132,9 +2132,9 @@ static ssize_t vmid_val_show(struct device *dev, if (!task_is_in_init_pid_ns(current)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = (unsigned long)config->vmid_val[config->vmid_idx]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx\n", val); } @@ -2162,9 +2162,9 @@ static ssize_t vmid_val_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->vmid_val[config->vmid_idx] = (u64)val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(vmid_val); @@ -2183,10 +2183,10 @@ static ssize_t vmid_masks_show(struct device *dev, if (!task_is_in_init_pid_ns(current)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val1 = config->vmid_mask0; val2 = config->vmid_mask1; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2); } @@ -2218,7 +2218,7 @@ static ssize_t vmid_masks_store(struct device *dev, if ((drvdata->numvmidc > 4) && (nr_inputs != 2)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* * each byte[0..3] controls mask value applied to vmid @@ -2291,7 +2291,7 @@ static ssize_t vmid_masks_store(struct device *dev, else mask >>= 0x8; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(vmid_masks); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 2b92de17b5a2..bd7db36ba197 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -989,7 +989,7 @@ struct etmv4_drvdata { struct clk *pclk; void __iomem *base; struct coresight_device *csdev; - spinlock_t spinlock; + raw_spinlock_t spinlock; int cpu; u8 arch; u8 nr_pe; From 4cf364ca57d851e192ce02e98d314d22fa514895 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:04 +0000 Subject: [PATCH 0661/2157] coresight: change coresight_trace_id_map's lock type to raw_spinlock_t coresight_trace_id_map->lock can be acquired while coresight devices' drvdata_lock. But the drvdata_lock can be raw_spinlock_t (i.e) coresight-etm4x. To address this, change type of coresight_trace_id_map->lock to raw_spinlock_t Signed-off-by: Yeoreum Yun Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-4-yeoreum.yun@arm.com --- drivers/hwtracing/coresight/coresight-core.c | 2 +- .../hwtracing/coresight/coresight-trace-id.c | 22 +++++++++---------- include/linux/coresight.h | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index c915a055534e..fb43ef6a3b1f 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1296,7 +1296,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) if (csdev->type == CORESIGHT_DEV_TYPE_SINK || csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { - spin_lock_init(&csdev->perf_sink_id_map.lock); + raw_spin_lock_init(&csdev->perf_sink_id_map.lock); csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t); if (!csdev->perf_sink_id_map.cpu_map) { kfree(csdev); diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c index 378af743be45..7ed337d54d3e 100644 --- a/drivers/hwtracing/coresight/coresight-trace-id.c +++ b/drivers/hwtracing/coresight/coresight-trace-id.c @@ -22,7 +22,7 @@ enum trace_id_flags { static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0); static struct coresight_trace_id_map id_map_default = { .cpu_map = &id_map_default_cpu_ids, - .lock = __SPIN_LOCK_UNLOCKED(id_map_default.lock) + .lock = __RAW_SPIN_LOCK_UNLOCKED(id_map_default.lock) }; /* #define TRACE_ID_DEBUG 1 */ @@ -131,11 +131,11 @@ static void coresight_trace_id_release_all(struct coresight_trace_id_map *id_map unsigned long flags; int cpu; - spin_lock_irqsave(&id_map->lock, flags); + raw_spin_lock_irqsave(&id_map->lock, flags); bitmap_zero(id_map->used_ids, CORESIGHT_TRACE_IDS_MAX); for_each_possible_cpu(cpu) atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0); - spin_unlock_irqrestore(&id_map->lock, flags); + raw_spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID_MAP(id_map); } @@ -144,7 +144,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map unsigned long flags; int id; - spin_lock_irqsave(&id_map->lock, flags); + raw_spin_lock_irqsave(&id_map->lock, flags); /* check for existing allocation for this CPU */ id = _coresight_trace_id_read_cpu_id(cpu, id_map); @@ -171,7 +171,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id); get_cpu_id_out_unlock: - spin_unlock_irqrestore(&id_map->lock, flags); + raw_spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID_CPU(cpu, id); DUMP_ID_MAP(id_map); @@ -188,12 +188,12 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma if (!id) return; - spin_lock_irqsave(&id_map->lock, flags); + raw_spin_lock_irqsave(&id_map->lock, flags); coresight_trace_id_free(id, id_map); atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0); - spin_unlock_irqrestore(&id_map->lock, flags); + raw_spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID_CPU(cpu, id); DUMP_ID_MAP(id_map); } @@ -204,9 +204,9 @@ static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *i unsigned long flags; int id; - spin_lock_irqsave(&id_map->lock, flags); + raw_spin_lock_irqsave(&id_map->lock, flags); id = coresight_trace_id_alloc_new_id(id_map, preferred_id, traceid_flags); - spin_unlock_irqrestore(&id_map->lock, flags); + raw_spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID(id); DUMP_ID_MAP(id_map); @@ -217,9 +217,9 @@ static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map * { unsigned long flags; - spin_lock_irqsave(&id_map->lock, flags); + raw_spin_lock_irqsave(&id_map->lock, flags); coresight_trace_id_free(id, id_map); - spin_unlock_irqrestore(&id_map->lock, flags); + raw_spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID(id); DUMP_ID_MAP(id_map); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 4541bfc1cc6b..d79a242b271d 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -239,7 +239,7 @@ struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); atomic_t __percpu *cpu_map; atomic_t perf_cs_etm_session_active; - spinlock_t lock; + raw_spinlock_t lock; }; /** From e3044065fc2cf148d278475d5d03465ebf01248c Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:05 +0000 Subject: [PATCH 0662/2157] coresight-cti: change cti_drvdata spinlock's type to raw_spinlock_t In coresight-cti drivers, cti_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since cti_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type cti_drvdata->spinlock in coresight-cti drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-5-yeoreum.yun@arm.com --- .../hwtracing/coresight/coresight-cti-core.c | 44 +++++------ .../hwtracing/coresight/coresight-cti-sysfs.c | 76 +++++++++---------- drivers/hwtracing/coresight/coresight-cti.h | 2 +- 3 files changed, 61 insertions(+), 61 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index d2b5a5718c29..80f6265e3740 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -93,7 +93,7 @@ static int cti_enable_hw(struct cti_drvdata *drvdata) unsigned long flags; int rc = 0; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* no need to do anything if enabled or unpowered*/ if (config->hw_enabled || !config->hw_powered) @@ -108,7 +108,7 @@ static int cti_enable_hw(struct cti_drvdata *drvdata) config->hw_enabled = true; drvdata->config.enable_req_count++; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return rc; cti_state_unchanged: @@ -116,7 +116,7 @@ static int cti_enable_hw(struct cti_drvdata *drvdata) /* cannot enable due to error */ cti_err_not_enabled: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return rc; } @@ -125,7 +125,7 @@ static void cti_cpuhp_enable_hw(struct cti_drvdata *drvdata) { struct cti_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); config->hw_powered = true; /* no need to do anything if no enable request */ @@ -138,12 +138,12 @@ static void cti_cpuhp_enable_hw(struct cti_drvdata *drvdata) cti_write_all_hw_regs(drvdata); config->hw_enabled = true; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return; /* did not re-enable due to no claim / no request */ cti_hp_not_enabled: - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); } /* disable hardware */ @@ -153,7 +153,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) struct coresight_device *csdev = drvdata->csdev; int ret = 0; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* don't allow negative refcounts, return an error */ if (!drvdata->config.enable_req_count) { @@ -177,12 +177,12 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return ret; /* not disabled this call */ cti_not_disabled: - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return ret; } @@ -198,11 +198,11 @@ void cti_write_intack(struct device *dev, u32 ackval) struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); struct cti_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* write if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, CTIINTACK, ackval); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); } /* @@ -369,7 +369,7 @@ int cti_channel_trig_op(struct device *dev, enum cti_chan_op op, reg_offset = (direction == CTI_TRIG_IN ? CTIINEN(trigger_idx) : CTIOUTEN(trigger_idx)); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* read - modify write - the trigger / channel enable value */ reg_value = direction == CTI_TRIG_IN ? config->ctiinen[trigger_idx] : @@ -388,7 +388,7 @@ int cti_channel_trig_op(struct device *dev, enum cti_chan_op op, /* write through if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, reg_offset, reg_value); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return 0; } @@ -406,7 +406,7 @@ int cti_channel_gate_op(struct device *dev, enum cti_chan_gate_op op, chan_bitmask = BIT(channel_idx); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); reg_value = config->ctigate; switch (op) { case CTI_GATE_CHAN_ENABLE: @@ -426,7 +426,7 @@ int cti_channel_gate_op(struct device *dev, enum cti_chan_gate_op op, if (cti_active(config)) cti_write_single_reg(drvdata, CTIGATE, reg_value); } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return err; } @@ -445,7 +445,7 @@ int cti_channel_setop(struct device *dev, enum cti_chan_set_op op, chan_bitmask = BIT(channel_idx); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); reg_value = config->ctiappset; switch (op) { case CTI_CHAN_SET: @@ -473,7 +473,7 @@ int cti_channel_setop(struct device *dev, enum cti_chan_set_op op, if ((err == 0) && cti_active(config)) cti_write_single_reg(drvdata, reg_offset, reg_value); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return err; } @@ -676,7 +676,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, if (WARN_ON_ONCE(drvdata->ctidev.cpu != cpu)) return NOTIFY_BAD; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); switch (cmd) { case CPU_PM_ENTER: @@ -716,7 +716,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, } cti_notify_exit: - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return notify_res; } @@ -743,11 +743,11 @@ static int cti_dying_cpu(unsigned int cpu) if (!drvdata) return 0; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); drvdata->config.hw_powered = false; if (drvdata->config.hw_enabled) coresight_disclaim_device(drvdata->csdev); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return 0; } @@ -888,7 +888,7 @@ static int cti_probe(struct amba_device *adev, const struct amba_id *id) drvdata->ctidev.ctm_id = 0; INIT_LIST_HEAD(&drvdata->ctidev.trig_cons); - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); /* initialise CTI driver config values */ cti_set_default_config(dev, drvdata); diff --git a/drivers/hwtracing/coresight/coresight-cti-sysfs.c b/drivers/hwtracing/coresight/coresight-cti-sysfs.c index d25dd2737b49..572b80ee96fb 100644 --- a/drivers/hwtracing/coresight/coresight-cti-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-cti-sysfs.c @@ -84,11 +84,11 @@ static ssize_t enable_show(struct device *dev, bool enabled, powered; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); enable_req = drvdata->config.enable_req_count; powered = drvdata->config.hw_powered; enabled = drvdata->config.hw_enabled; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); if (powered) return sprintf(buf, "%d\n", enabled); @@ -134,9 +134,9 @@ static ssize_t powered_show(struct device *dev, bool powered; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); powered = drvdata->config.hw_powered; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%d\n", powered); } @@ -181,10 +181,10 @@ static ssize_t coresight_cti_reg_show(struct device *dev, u32 val = 0; pm_runtime_get_sync(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if (drvdata->config.hw_powered) val = readl_relaxed(drvdata->base + cti_attr->off); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); pm_runtime_put_sync(dev->parent); return sysfs_emit(buf, "0x%x\n", val); } @@ -202,10 +202,10 @@ static __maybe_unused ssize_t coresight_cti_reg_store(struct device *dev, return -EINVAL; pm_runtime_get_sync(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if (drvdata->config.hw_powered) cti_write_single_reg(drvdata, cti_attr->off, val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); pm_runtime_put_sync(dev->parent); return size; } @@ -264,7 +264,7 @@ static ssize_t cti_reg32_show(struct device *dev, char *buf, struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); struct cti_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); if ((reg_offset >= 0) && cti_active(config)) { CS_UNLOCK(drvdata->base); val = readl_relaxed(drvdata->base + reg_offset); @@ -274,7 +274,7 @@ static ssize_t cti_reg32_show(struct device *dev, char *buf, } else if (pcached_val) { val = *pcached_val; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%#x\n", val); } @@ -293,7 +293,7 @@ static ssize_t cti_reg32_store(struct device *dev, const char *buf, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* local store */ if (pcached_val) *pcached_val = (u32)val; @@ -301,7 +301,7 @@ static ssize_t cti_reg32_store(struct device *dev, const char *buf, /* write through if offset and enabled */ if ((reg_offset >= 0) && cti_active(config)) cti_write_single_reg(drvdata, reg_offset, val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } @@ -349,9 +349,9 @@ static ssize_t inout_sel_store(struct device *dev, if (val > (CTIINOUTEN_MAX - 1)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); drvdata->config.ctiinout_sel = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(inout_sel); @@ -364,10 +364,10 @@ static ssize_t inen_show(struct device *dev, int index; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); index = drvdata->config.ctiinout_sel; val = drvdata->config.ctiinen[index]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%#lx\n", val); } @@ -383,14 +383,14 @@ static ssize_t inen_store(struct device *dev, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); index = config->ctiinout_sel; config->ctiinen[index] = val; /* write through if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, CTIINEN(index), val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(inen); @@ -403,10 +403,10 @@ static ssize_t outen_show(struct device *dev, int index; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); index = drvdata->config.ctiinout_sel; val = drvdata->config.ctiouten[index]; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%#lx\n", val); } @@ -422,14 +422,14 @@ static ssize_t outen_store(struct device *dev, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); index = config->ctiinout_sel; config->ctiouten[index] = val; /* write through if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, CTIOUTEN(index), val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(outen); @@ -463,7 +463,7 @@ static ssize_t appclear_store(struct device *dev, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* a 1'b1 in appclr clears down the same bit in appset*/ config->ctiappset &= ~val; @@ -471,7 +471,7 @@ static ssize_t appclear_store(struct device *dev, /* write through if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, CTIAPPCLEAR, val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_WO(appclear); @@ -487,12 +487,12 @@ static ssize_t apppulse_store(struct device *dev, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* write through if enabled */ if (cti_active(config)) cti_write_single_reg(drvdata, CTIAPPPULSE, val); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_WO(apppulse); @@ -681,9 +681,9 @@ static ssize_t trig_filter_enable_show(struct device *dev, u32 val; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = drvdata->config.trig_filter_enable; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%d\n", val); } @@ -697,9 +697,9 @@ static ssize_t trig_filter_enable_store(struct device *dev, if (kstrtoul(buf, 0, &val)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); drvdata->config.trig_filter_enable = !!val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_RW(trig_filter_enable); @@ -728,7 +728,7 @@ static ssize_t chan_xtrigs_reset_store(struct device *dev, struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); struct cti_config *config = &drvdata->config; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); /* clear the CTI trigger / channel programming registers */ for (i = 0; i < config->nr_trig_max; i++) { @@ -747,7 +747,7 @@ static ssize_t chan_xtrigs_reset_store(struct device *dev, if (cti_active(config)) cti_write_all_hw_regs(drvdata); - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } static DEVICE_ATTR_WO(chan_xtrigs_reset); @@ -768,9 +768,9 @@ static ssize_t chan_xtrigs_sel_store(struct device *dev, if (val > (drvdata->config.nr_ctm_channels - 1)) return -EINVAL; - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); drvdata->config.xtrig_rchan_sel = val; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return size; } @@ -781,9 +781,9 @@ static ssize_t chan_xtrigs_sel_show(struct device *dev, unsigned long val; struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); val = drvdata->config.xtrig_rchan_sel; - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); return sprintf(buf, "%ld\n", val); } @@ -838,12 +838,12 @@ static ssize_t print_chan_list(struct device *dev, unsigned long inuse_bits = 0, chan_mask; /* scan regs to get bitmap of channels in use. */ - spin_lock(&drvdata->spinlock); + raw_spin_lock(&drvdata->spinlock); for (i = 0; i < config->nr_trig_max; i++) { inuse_bits |= config->ctiinen[i]; inuse_bits |= config->ctiouten[i]; } - spin_unlock(&drvdata->spinlock); + raw_spin_unlock(&drvdata->spinlock); /* inverse bits if printing free channels */ if (!inuse) diff --git a/drivers/hwtracing/coresight/coresight-cti.h b/drivers/hwtracing/coresight/coresight-cti.h index cb9ee616d01f..16e310e7e9d4 100644 --- a/drivers/hwtracing/coresight/coresight-cti.h +++ b/drivers/hwtracing/coresight/coresight-cti.h @@ -175,7 +175,7 @@ struct cti_drvdata { void __iomem *base; struct coresight_device *csdev; struct cti_device ctidev; - spinlock_t spinlock; + raw_spinlock_t spinlock; struct cti_config config; struct list_head node; void (*csdev_release)(struct device *dev); From 6b80c0abe475ed1017c5e862636049aa1cc17a1a Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:06 +0000 Subject: [PATCH 0663/2157] coresight-etb10: change etb_drvdata spinlock's type to raw_spinlock_t In coresight-etb10 drivers, etb_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since etb_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type etb_drvdata->spinlock in coresight-etb10 drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-6-yeoreum.yun@arm.com --- drivers/hwtracing/coresight/coresight-etb10.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index aea9ac9c4bd0..7948597d483d 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -84,7 +84,7 @@ struct etb_drvdata { struct clk *atclk; struct coresight_device *csdev; struct miscdevice miscdev; - spinlock_t spinlock; + raw_spinlock_t spinlock; local_t reading; pid_t pid; u8 *buf; @@ -145,7 +145,7 @@ static int etb_enable_sysfs(struct coresight_device *csdev) unsigned long flags; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Don't messup with perf sessions. */ if (coresight_get_mode(csdev) == CS_MODE_PERF) { @@ -163,7 +163,7 @@ static int etb_enable_sysfs(struct coresight_device *csdev) csdev->refcnt++; out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } @@ -176,7 +176,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) struct perf_output_handle *handle = data; struct cs_buffers *buf = etm_perf_sink_config(handle); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* No need to continue if the component is already in used by sysFS. */ if (coresight_get_mode(drvdata->csdev) == CS_MODE_SYSFS) { @@ -219,7 +219,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) } out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } @@ -352,11 +352,11 @@ static int etb_disable(struct coresight_device *csdev) struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); unsigned long flags; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); csdev->refcnt--; if (csdev->refcnt) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } @@ -366,7 +366,7 @@ static int etb_disable(struct coresight_device *csdev) /* Dissociate from monitored process. */ drvdata->pid = -1; coresight_set_mode(csdev, CS_MODE_DISABLED); - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(&csdev->dev, "ETB disabled\n"); return 0; @@ -443,7 +443,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Don't do anything if another tracer is using this sink */ if (csdev->refcnt != 1) @@ -566,7 +566,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, __etb_enable_hw(drvdata); CS_LOCK(drvdata->base); out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return to_read; } @@ -587,13 +587,13 @@ static void etb_dump(struct etb_drvdata *drvdata) { unsigned long flags; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (coresight_get_mode(drvdata->csdev) == CS_MODE_SYSFS) { __etb_disable_hw(drvdata); etb_dump_hw(drvdata); __etb_enable_hw(drvdata); } - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(&drvdata->csdev->dev, "ETB dumped\n"); } @@ -746,7 +746,7 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id) drvdata->base = base; desc.access = CSDEV_ACCESS_IOMEM(base); - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); drvdata->buffer_depth = etb_get_buffer_depth(drvdata); From 56eb02f0b04fce3b5c4e3224b659e342cc7a8c56 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:07 +0000 Subject: [PATCH 0664/2157] coresight-funnel: change funnel_drvdata spinlock's type to raw_spinlock_t In coresight-funnel drivers, cti_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since funnel_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type funnel_drvdata->spinlock in coresight-funnel drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Signed-off-by: Yeoreum Yun Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-7-yeoreum.yun@arm.com --- drivers/hwtracing/coresight/coresight-funnel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 8faf51469bb8..0541712b2bcb 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -47,7 +47,7 @@ struct funnel_drvdata { struct clk *pclk; struct coresight_device *csdev; unsigned long priority; - spinlock_t spinlock; + raw_spinlock_t spinlock; }; static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port) @@ -85,7 +85,7 @@ static int funnel_enable(struct coresight_device *csdev, unsigned long flags; bool first_enable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (in->dest_refcnt == 0) { if (drvdata->base) rc = dynamic_funnel_enable_hw(drvdata, in->dest_port); @@ -94,7 +94,7 @@ static int funnel_enable(struct coresight_device *csdev, } if (!rc) in->dest_refcnt++; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (first_enable) dev_dbg(&csdev->dev, "FUNNEL inport %d enabled\n", @@ -129,13 +129,13 @@ static void funnel_disable(struct coresight_device *csdev, unsigned long flags; bool last_disable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (--in->dest_refcnt == 0) { if (drvdata->base) dynamic_funnel_disable_hw(drvdata, in->dest_port); last_disable = true; } - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (last_disable) dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n", @@ -266,7 +266,7 @@ static int funnel_probe(struct device *dev, struct resource *res) } dev->platform_data = pdata; - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG; desc.ops = &funnel_cs_ops; From 982d0a0e08db46865cfbb901444b192c528fc741 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:08 +0000 Subject: [PATCH 0665/2157] coresight-replicator: change replicator_drvdata spinlock's type to raw_spinlock_t In coresight-replicator drivers, replicator_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since replicator_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type replicator_drvdata->spinlock in coresight-replicator drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-8-yeoreum.yun@arm.com --- drivers/hwtracing/coresight/coresight-replicator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index a1181c9048c0..ee7ee79f6cf7 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -41,7 +41,7 @@ struct replicator_drvdata { struct clk *atclk; struct clk *pclk; struct coresight_device *csdev; - spinlock_t spinlock; + raw_spinlock_t spinlock; bool check_idfilter_val; }; @@ -125,7 +125,7 @@ static int replicator_enable(struct coresight_device *csdev, unsigned long flags; bool first_enable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (out->src_refcnt == 0) { if (drvdata->base) rc = dynamic_replicator_enable(drvdata, in->dest_port, @@ -135,7 +135,7 @@ static int replicator_enable(struct coresight_device *csdev, } if (!rc) out->src_refcnt++; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (first_enable) dev_dbg(&csdev->dev, "REPLICATOR enabled\n"); @@ -179,14 +179,14 @@ static void replicator_disable(struct coresight_device *csdev, unsigned long flags; bool last_disable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (--out->src_refcnt == 0) { if (drvdata->base) dynamic_replicator_disable(drvdata, in->dest_port, out->src_port); last_disable = true; } - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (last_disable) dev_dbg(&csdev->dev, "REPLICATOR disabled\n"); @@ -277,7 +277,7 @@ static int replicator_probe(struct device *dev, struct resource *res) } dev->platform_data = pdata; - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT; desc.ops = &replicator_cs_ops; From db11f75bc29c843a70d4af1921f05d50fc72f49b Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:09 +0000 Subject: [PATCH 0666/2157] coresight-tmc: change tmc_drvdata spinlock's type to raw_spinlock_t In coresight-tmc drivers, tmc_drvdata->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since tmc_drvdata->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type tmc_drvdata->spinlock in coresight-tmc drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-9-yeoreum.yun@arm.com --- .../hwtracing/coresight/coresight-tmc-core.c | 14 +++--- .../hwtracing/coresight/coresight-tmc-etf.c | 48 +++++++++---------- .../hwtracing/coresight/coresight-tmc-etr.c | 44 ++++++++--------- drivers/hwtracing/coresight/coresight-tmc.h | 2 +- 4 files changed, 54 insertions(+), 54 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index d5122e12daa7..a7814e8e657b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -358,12 +358,12 @@ static int tmc_crashdata_open(struct inode *inode, struct file *file) mdata = drvdata->crash_mdata.vaddr; rbuf = &drvdata->resrv_buf; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (mdata->valid) rbuf->reading = true; else err = -ENOENT; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (err) goto exit; @@ -408,9 +408,9 @@ static int tmc_crashdata_release(struct inode *inode, struct file *file) crashdev); rbuf = &drvdata->resrv_buf; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); rbuf->reading = false; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__); return ret; @@ -801,7 +801,7 @@ static int __tmc_probe(struct device *dev, struct resource *res) drvdata->base = base; desc.access = CSDEV_ACCESS_IOMEM(base); - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); drvdata->config_type = BMVAL(devid, 6, 7); @@ -913,7 +913,7 @@ static void tmc_shutdown(struct amba_device *adev) unsigned long flags; struct tmc_drvdata *drvdata = amba_get_drvdata(adev); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (coresight_get_mode(drvdata->csdev) == CS_MODE_DISABLED) goto out; @@ -927,7 +927,7 @@ static void tmc_shutdown(struct amba_device *adev) * the system is going down after this. */ out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); } static void __tmc_remove(struct device *dev) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index bdc3a7e9ba06..d858740001c2 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -185,9 +185,9 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) * If we don't have a buffer release the lock and allocate memory. * Otherwise keep the lock and move along. */ - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (!drvdata->buf) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Allocating the memory here while outside of the spinlock */ buf = kzalloc(drvdata->size, GFP_KERNEL); @@ -195,7 +195,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) return -ENOMEM; /* Let's try again */ - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); } if (drvdata->reading) { @@ -237,7 +237,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) used = false; } out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free memory outside the spinlock if need be */ if (!used) @@ -255,7 +255,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) struct perf_output_handle *handle = data; struct cs_buffers *buf = etm_perf_sink_config(handle); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); do { ret = -EINVAL; if (drvdata->reading) @@ -298,7 +298,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) csdev->refcnt++; } } while (0); - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } @@ -333,16 +333,16 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev) unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } csdev->refcnt--; if (csdev->refcnt) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } @@ -353,7 +353,7 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev) drvdata->pid = -1; coresight_set_mode(csdev, CS_MODE_DISABLED); - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(&csdev->dev, "TMC-ETB/ETF disabled\n"); return 0; @@ -368,9 +368,9 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); bool first_enable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } @@ -383,7 +383,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, } if (!ret) csdev->refcnt++; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (first_enable) dev_dbg(&csdev->dev, "TMC-ETF enabled\n"); @@ -398,9 +398,9 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); bool last_disable = false; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return; } @@ -410,7 +410,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, coresight_set_mode(csdev, CS_MODE_DISABLED); last_disable = true; } - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (last_disable) dev_dbg(&csdev->dev, "TMC-ETF disabled\n"); @@ -490,7 +490,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, if (WARN_ON_ONCE(coresight_get_mode(csdev) != CS_MODE_PERF)) return 0; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Don't do anything if another tracer is using this sink */ if (csdev->refcnt != 1) @@ -587,7 +587,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, */ CS_LOCK(drvdata->base); out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return to_read; } @@ -705,7 +705,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) drvdata->config_type != TMC_CONFIG_TYPE_ETF)) return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EBUSY; @@ -737,7 +737,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) drvdata->reading = true; out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } @@ -754,14 +754,14 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) drvdata->config_type != TMC_CONFIG_TYPE_ETF)) return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Re-enable the TMC if need be */ if (coresight_get_mode(drvdata->csdev) == CS_MODE_SYSFS) { /* There is no point in reading a TMC in HW FIFO mode */ mode = readl_relaxed(drvdata->base + TMC_MODE); if (mode != TMC_MODE_CIRCULAR_BUFFER) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EINVAL; } /* @@ -775,7 +775,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) memset(drvdata->buf, 0, drvdata->size); rc = __tmc_etb_enable_hw(drvdata); if (rc) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return rc; } } else { @@ -788,7 +788,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) } drvdata->reading = false; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* * Free allocated memory outside of the spinlock. There is no need diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index eda7cdad0e2b..76a8cb29b68a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1251,10 +1251,10 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) * buffer, provided the size matches. Any allocation has to be done * with the lock released. */ - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); sysfs_buf = READ_ONCE(drvdata->sysfs_buf); if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Allocate memory with the locks released */ free_buf = new_buf = tmc_etr_setup_sysfs_buf(drvdata); @@ -1262,7 +1262,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) return new_buf; /* Let's try again */ - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); } if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) { @@ -1281,7 +1281,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) } out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free memory outside the spinlock if need be */ if (free_buf) @@ -1299,7 +1299,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) if (IS_ERR(sysfs_buf)) return PTR_ERR(sysfs_buf); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* * In sysFS mode we can have multiple writers per sink. Since this @@ -1318,7 +1318,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) } out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); if (!ret) dev_dbg(&csdev->dev, "TMC-ETR enabled\n"); @@ -1637,17 +1637,17 @@ tmc_update_etr_buffer(struct coresight_device *csdev, struct etr_perf_buffer *etr_perf = config; struct etr_buf *etr_buf = etr_perf->etr_buf; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Don't do anything if another tracer is using this sink */ if (csdev->refcnt != 1) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); goto out; } if (WARN_ON(drvdata->perf_buf != etr_buf)) { lost = true; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); goto out; } @@ -1657,7 +1657,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev, tmc_sync_etr_buf(drvdata); CS_LOCK(drvdata->base); - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); lost = etr_buf->full; offset = etr_buf->offset; @@ -1726,7 +1726,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) struct perf_output_handle *handle = data; struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* Don't use this sink if it is already claimed by sysFS */ if (coresight_get_mode(csdev) == CS_MODE_SYSFS) { rc = -EBUSY; @@ -1766,7 +1766,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) } unlock_out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return rc; } @@ -1788,16 +1788,16 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } csdev->refcnt--; if (csdev->refcnt) { - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } @@ -1810,7 +1810,7 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) /* Reset perf specific data */ drvdata->perf_buf = NULL; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(&csdev->dev, "TMC-ETR disabled\n"); return 0; @@ -1910,7 +1910,7 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR)) return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { ret = -EBUSY; goto out; @@ -1932,7 +1932,7 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) drvdata->reading = true; out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } @@ -1946,7 +1946,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR)) return -EINVAL; - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); /* RE-enable the TMC if need be */ if (coresight_get_mode(drvdata->csdev) == CS_MODE_SYSFS) { @@ -1966,7 +1966,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) } drvdata->reading = false; - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free allocated memory out side of the spinlock */ if (sysfs_buf) @@ -2023,14 +2023,14 @@ static int buf_mode_set_resrv(struct tmc_drvdata *drvdata) rbuf = &drvdata->resrv_buf; /* Ensure there are no active crashdata read sessions */ - spin_lock_irqsave(&drvdata->spinlock, flags); + raw_spin_lock_irqsave(&drvdata->spinlock, flags); if (!rbuf->reading) { tmc_crashdata_set_invalid(drvdata); rbuf->len = 0; drvdata->etr_mode = ETR_MODE_RESRV; err = 0; } - spin_unlock_irqrestore(&drvdata->spinlock, flags); + raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); return err; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index b48bc9a01cc0..6541a27a018e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -249,7 +249,7 @@ struct tmc_drvdata { struct coresight_device *csdev; struct miscdevice miscdev; struct miscdevice crashdev; - spinlock_t spinlock; + raw_spinlock_t spinlock; pid_t pid; bool reading; bool stop_on_flush; From d11eb31db26912e6e3882a2d253e5a79374c412e Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Thu, 6 Mar 2025 12:11:10 +0000 Subject: [PATCH 0667/2157] coresight/ultrasoc: change smb_drv_data spinlock's type to raw_spinlock_t In ultrasoc-smb drivers, smb_drv_data->spinlock can be held during __schedule() by perf_event_task_sched_out()/in(). Since smb__drv_data->spinlock type is spinlock_t and perf_event_task_sched_out()/in() is called after acquiring rq_lock, which is raw_spinlock_t (an unsleepable lock), this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable. To address this, change type smb_drv_data->spinlock in ultrasoc-smb drivers, which can be called by perf_event_task_sched_out()/in(), from spinlock_t to raw_spinlock_t. Reviewed-by: James Clark Signed-off-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250306121110.1647948-10-yeoreum.yun@arm.com --- drivers/hwtracing/coresight/ultrasoc-smb.c | 12 ++++++------ drivers/hwtracing/coresight/ultrasoc-smb.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index dc3c9504dd7c..26cfc939e5bd 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -98,7 +98,7 @@ static int smb_open(struct inode *inode, struct file *file) struct smb_drv_data *drvdata = container_of(file->private_data, struct smb_drv_data, miscdev); - guard(spinlock)(&drvdata->spinlock); + guard(raw_spinlock)(&drvdata->spinlock); if (drvdata->reading) return -EBUSY; @@ -152,7 +152,7 @@ static int smb_release(struct inode *inode, struct file *file) struct smb_drv_data *drvdata = container_of(file->private_data, struct smb_drv_data, miscdev); - guard(spinlock)(&drvdata->spinlock); + guard(raw_spinlock)(&drvdata->spinlock); drvdata->reading = false; return 0; @@ -245,7 +245,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode, struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); int ret = 0; - guard(spinlock)(&drvdata->spinlock); + guard(raw_spinlock)(&drvdata->spinlock); /* Do nothing, the trace data is reading by other interface now */ if (drvdata->reading) @@ -280,7 +280,7 @@ static int smb_disable(struct coresight_device *csdev) { struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); - guard(spinlock)(&drvdata->spinlock); + guard(raw_spinlock)(&drvdata->spinlock); if (drvdata->reading) return -EBUSY; @@ -378,7 +378,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev, if (!buf) return 0; - guard(spinlock)(&drvdata->spinlock); + guard(raw_spinlock)(&drvdata->spinlock); /* Don't do anything if another tracer is using this sink. */ if (csdev->refcnt != 1) @@ -563,7 +563,7 @@ static int smb_probe(struct platform_device *pdev) smb_reset_buffer(drvdata); platform_set_drvdata(pdev, drvdata); - spin_lock_init(&drvdata->spinlock); + raw_spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; ret = smb_register_sink(pdev, drvdata); diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h index a91d39cfccb8..c4c111275627 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.h +++ b/drivers/hwtracing/coresight/ultrasoc-smb.h @@ -115,7 +115,7 @@ struct smb_drv_data { struct coresight_device *csdev; struct smb_data_buffer sdb; struct miscdevice miscdev; - spinlock_t spinlock; + raw_spinlock_t spinlock; bool reading; pid_t pid; }; From 8d2764251ffec556261efbc7a3e19d9149ec95a7 Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Sun, 9 Mar 2025 07:23:14 +0100 Subject: [PATCH 0668/2157] dt-bindings: input: goodix,gt9916: Document gt9897 compatible Document the Goodix GT9897 which is a Berlin-A series touchscreen controller IC by Goodix. Acked-by: Rob Herring (Arm) Signed-off-by: Jens Reidel Link: https://lore.kernel.org/r/20250309062315.35720-2-adrian@mainlining.org Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/goodix,gt9916.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml index d90f045ac06c..c40d92b7f4af 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml @@ -19,6 +19,7 @@ allOf: properties: compatible: enum: + - goodix,gt9897 - goodix,gt9916 reg: From 4d395cb071a343196ca524d3694790f06978fe91 Mon Sep 17 00:00:00 2001 From: Jens Reidel Date: Sun, 9 Mar 2025 07:23:15 +0100 Subject: [PATCH 0669/2157] Input: goodix_berlin - add support for Berlin-A series The current implementation of the goodix_berlin driver lacks support for revisions A and B of the Berlin IC. This change adds support for the gt9897 IC, which is a Berlin-A revision part. The differences between revision D and A are rather minor, a handful of address changes and a slightly larger read buffer. They were taken from the driver published by Goodix, which does a few more things that don't appear to be necessary for the touchscreen to work properly. Reviewed-by: Neil Armstrong Tested-by: Luca Weiss Signed-off-by: Jens Reidel Link: https://lore.kernel.org/r/20250309062315.35720-3-adrian@mainlining.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_berlin.h | 16 ++++++- .../input/touchscreen/goodix_berlin_core.c | 21 ++++---- drivers/input/touchscreen/goodix_berlin_i2c.c | 14 ++++-- drivers/input/touchscreen/goodix_berlin_spi.c | 48 ++++++++++++++----- 4 files changed, 73 insertions(+), 26 deletions(-) diff --git a/drivers/input/touchscreen/goodix_berlin.h b/drivers/input/touchscreen/goodix_berlin.h index 38b6f9ddbdef..d8bbd4853206 100644 --- a/drivers/input/touchscreen/goodix_berlin.h +++ b/drivers/input/touchscreen/goodix_berlin.h @@ -12,12 +12,26 @@ #include +#define GOODIX_BERLIN_FW_VERSION_INFO_ADDR_A 0x1000C +#define GOODIX_BERLIN_FW_VERSION_INFO_ADDR_D 0x10014 + +#define GOODIX_BERLIN_IC_INFO_ADDR_A 0x10068 +#define GOODIX_BERLIN_IC_INFO_ADDR_D 0x10070 + +struct goodix_berlin_ic_data { + int fw_version_info_addr; + int ic_info_addr; + ssize_t read_dummy_len; + ssize_t read_prefix_len; +}; + struct device; struct input_id; struct regmap; int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id, - struct regmap *regmap); + struct regmap *regmap, + const struct goodix_berlin_ic_data *ic_data); extern const struct dev_pm_ops goodix_berlin_pm_ops; extern const struct attribute_group *goodix_berlin_groups[]; diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 3fc03cf0ca23..bc58a1d535e7 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -12,7 +12,7 @@ * to the previous generations. * * Currently the driver only handles Multitouch events with already - * programmed firmware and "config" for "Revision D" Berlin IC. + * programmed firmware and "config" for "Revision A/D" Berlin IC. * * Support is missing for: * - ESD Management @@ -20,7 +20,7 @@ * - "Config" update/flashing * - Stylus Events * - Gesture Events - * - Support for older revisions (A & B) + * - Support for revision B */ #include @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -53,10 +54,8 @@ #define GOODIX_BERLIN_DEV_CONFIRM_VAL 0xAA #define GOODIX_BERLIN_BOOTOPTION_ADDR 0x10000 -#define GOODIX_BERLIN_FW_VERSION_INFO_ADDR 0x10014 #define GOODIX_BERLIN_IC_INFO_MAX_LEN SZ_1K -#define GOODIX_BERLIN_IC_INFO_ADDR 0x10070 #define GOODIX_BERLIN_CHECKSUM_SIZE sizeof(u16) @@ -175,6 +174,8 @@ struct goodix_berlin_core { /* Runtime parameters extracted from IC_INFO buffer */ u32 touch_data_addr; + const struct goodix_berlin_ic_data *ic_data; + struct goodix_berlin_event event; }; @@ -299,7 +300,7 @@ static int goodix_berlin_read_version(struct goodix_berlin_core *cd) { int error; - error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_FW_VERSION_INFO_ADDR, + error = regmap_raw_read(cd->regmap, cd->ic_data->fw_version_info_addr, &cd->fw_version, sizeof(cd->fw_version)); if (error) { dev_err(cd->dev, "error reading fw version, %d\n", error); @@ -367,7 +368,7 @@ static int goodix_berlin_get_ic_info(struct goodix_berlin_core *cd) if (!afe_data) return -ENOMEM; - error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_IC_INFO_ADDR, + error = regmap_raw_read(cd->regmap, cd->ic_data->ic_info_addr, &length_raw, sizeof(length_raw)); if (error) { dev_err(cd->dev, "failed get ic info length, %d\n", error); @@ -380,8 +381,8 @@ static int goodix_berlin_get_ic_info(struct goodix_berlin_core *cd) return -EINVAL; } - error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_IC_INFO_ADDR, - afe_data, length); + error = regmap_raw_read(cd->regmap, cd->ic_data->ic_info_addr, afe_data, + length); if (error) { dev_err(cd->dev, "failed get ic info data, %d\n", error); return error; @@ -716,7 +717,8 @@ const struct attribute_group *goodix_berlin_groups[] = { EXPORT_SYMBOL_GPL(goodix_berlin_groups); int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id, - struct regmap *regmap) + struct regmap *regmap, + const struct goodix_berlin_ic_data *ic_data) { struct goodix_berlin_core *cd; int error; @@ -733,6 +735,7 @@ int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id, cd->dev = dev; cd->regmap = regmap; cd->irq = irq; + cd->ic_data = ic_data; cd->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(cd->reset_gpio)) diff --git a/drivers/input/touchscreen/goodix_berlin_i2c.c b/drivers/input/touchscreen/goodix_berlin_i2c.c index ad7a60d94338..929090a094bf 100644 --- a/drivers/input/touchscreen/goodix_berlin_i2c.c +++ b/drivers/input/touchscreen/goodix_berlin_i2c.c @@ -31,6 +31,8 @@ static const struct input_id goodix_berlin_i2c_input_id = { static int goodix_berlin_i2c_probe(struct i2c_client *client) { + const struct goodix_berlin_ic_data *ic_data = + i2c_get_match_data(client); struct regmap *regmap; int error; @@ -39,22 +41,28 @@ static int goodix_berlin_i2c_probe(struct i2c_client *client) return PTR_ERR(regmap); error = goodix_berlin_probe(&client->dev, client->irq, - &goodix_berlin_i2c_input_id, regmap); + &goodix_berlin_i2c_input_id, regmap, + ic_data); if (error) return error; return 0; } +static const struct goodix_berlin_ic_data gt9916_data = { + .fw_version_info_addr = GOODIX_BERLIN_FW_VERSION_INFO_ADDR_D, + .ic_info_addr = GOODIX_BERLIN_IC_INFO_ADDR_D, +}; + static const struct i2c_device_id goodix_berlin_i2c_id[] = { - { "gt9916" }, + { .name = "gt9916", .driver_data = (long)>9916_data }, { } }; MODULE_DEVICE_TABLE(i2c, goodix_berlin_i2c_id); static const struct of_device_id goodix_berlin_i2c_of_match[] = { - { .compatible = "goodix,gt9916", }, + { .compatible = "goodix,gt9916", .data = >9916_data }, { } }; MODULE_DEVICE_TABLE(of, goodix_berlin_i2c_of_match); diff --git a/drivers/input/touchscreen/goodix_berlin_spi.c b/drivers/input/touchscreen/goodix_berlin_spi.c index 0662e87b8692..01f850f484c2 100644 --- a/drivers/input/touchscreen/goodix_berlin_spi.c +++ b/drivers/input/touchscreen/goodix_berlin_spi.c @@ -18,10 +18,14 @@ #define GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN 1 #define GOODIX_BERLIN_REGISTER_WIDTH 4 -#define GOODIX_BERLIN_SPI_READ_DUMMY_LEN 3 -#define GOODIX_BERLIN_SPI_READ_PREFIX_LEN (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \ +#define GOODIX_BERLIN_SPI_READ_DUMMY_LEN_A 4 +#define GOODIX_BERLIN_SPI_READ_DUMMY_LEN_D 3 +#define GOODIX_BERLIN_SPI_READ_PREFIX_LEN_A (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \ GOODIX_BERLIN_REGISTER_WIDTH + \ - GOODIX_BERLIN_SPI_READ_DUMMY_LEN) + GOODIX_BERLIN_SPI_READ_DUMMY_LEN_A) +#define GOODIX_BERLIN_SPI_READ_PREFIX_LEN_D (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \ + GOODIX_BERLIN_REGISTER_WIDTH + \ + GOODIX_BERLIN_SPI_READ_DUMMY_LEN_D) #define GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \ GOODIX_BERLIN_REGISTER_WIDTH) @@ -33,6 +37,7 @@ static int goodix_berlin_spi_read(void *context, const void *reg_buf, size_t val_size) { struct spi_device *spi = context; + const struct goodix_berlin_ic_data *ic_data = spi_get_device_match_data(spi); struct spi_transfer xfers; struct spi_message spi_msg; const u32 *reg = reg_buf; /* reg is stored as native u32 at start of buffer */ @@ -42,23 +47,22 @@ static int goodix_berlin_spi_read(void *context, const void *reg_buf, return -EINVAL; u8 *buf __free(kfree) = - kzalloc(GOODIX_BERLIN_SPI_READ_PREFIX_LEN + val_size, - GFP_KERNEL); + kzalloc(ic_data->read_prefix_len + val_size, GFP_KERNEL); if (!buf) return -ENOMEM; spi_message_init(&spi_msg); memset(&xfers, 0, sizeof(xfers)); - /* buffer format: 0xF1 + addr(4bytes) + dummy(3bytes) + data */ + /* buffer format: 0xF1 + addr(4bytes) + dummy(3/4bytes) + data */ buf[0] = GOODIX_BERLIN_SPI_READ_FLAG; put_unaligned_be32(*reg, buf + GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN); memset(buf + GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + GOODIX_BERLIN_REGISTER_WIDTH, - 0xff, GOODIX_BERLIN_SPI_READ_DUMMY_LEN); + 0xff, ic_data->read_dummy_len); xfers.tx_buf = buf; xfers.rx_buf = buf; - xfers.len = GOODIX_BERLIN_SPI_READ_PREFIX_LEN + val_size; + xfers.len = ic_data->read_prefix_len + val_size; xfers.cs_change = 0; spi_message_add_tail(&xfers, &spi_msg); @@ -68,7 +72,7 @@ static int goodix_berlin_spi_read(void *context, const void *reg_buf, return error; } - memcpy(val_buf, buf + GOODIX_BERLIN_SPI_READ_PREFIX_LEN, val_size); + memcpy(val_buf, buf + ic_data->read_prefix_len, val_size); return error; } @@ -123,6 +127,7 @@ static const struct input_id goodix_berlin_spi_input_id = { static int goodix_berlin_spi_probe(struct spi_device *spi) { + const struct goodix_berlin_ic_data *ic_data = spi_get_device_match_data(spi); struct regmap_config regmap_config; struct regmap *regmap; size_t max_size; @@ -137,7 +142,7 @@ static int goodix_berlin_spi_probe(struct spi_device *spi) max_size = spi_max_transfer_size(spi); regmap_config = goodix_berlin_spi_regmap_conf; - regmap_config.max_raw_read = max_size - GOODIX_BERLIN_SPI_READ_PREFIX_LEN; + regmap_config.max_raw_read = max_size - ic_data->read_prefix_len; regmap_config.max_raw_write = max_size - GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN; regmap = devm_regmap_init(&spi->dev, NULL, spi, ®map_config); @@ -145,21 +150,38 @@ static int goodix_berlin_spi_probe(struct spi_device *spi) return PTR_ERR(regmap); error = goodix_berlin_probe(&spi->dev, spi->irq, - &goodix_berlin_spi_input_id, regmap); + &goodix_berlin_spi_input_id, regmap, + ic_data); if (error) return error; return 0; } +static const struct goodix_berlin_ic_data gt9897_data = { + .fw_version_info_addr = GOODIX_BERLIN_FW_VERSION_INFO_ADDR_A, + .ic_info_addr = GOODIX_BERLIN_IC_INFO_ADDR_A, + .read_dummy_len = GOODIX_BERLIN_SPI_READ_DUMMY_LEN_A, + .read_prefix_len = GOODIX_BERLIN_SPI_READ_PREFIX_LEN_A, +}; + +static const struct goodix_berlin_ic_data gt9916_data = { + .fw_version_info_addr = GOODIX_BERLIN_FW_VERSION_INFO_ADDR_D, + .ic_info_addr = GOODIX_BERLIN_IC_INFO_ADDR_D, + .read_dummy_len = GOODIX_BERLIN_SPI_READ_DUMMY_LEN_D, + .read_prefix_len = GOODIX_BERLIN_SPI_READ_PREFIX_LEN_D, +}; + static const struct spi_device_id goodix_berlin_spi_ids[] = { - { "gt9916" }, + { .name = "gt9897", .driver_data = (long)>9897_data }, + { .name = "gt9916", .driver_data = (long)>9916_data }, { }, }; MODULE_DEVICE_TABLE(spi, goodix_berlin_spi_ids); static const struct of_device_id goodix_berlin_spi_of_match[] = { - { .compatible = "goodix,gt9916", }, + { .compatible = "goodix,gt9897", .data = >9897_data }, + { .compatible = "goodix,gt9916", .data = >9916_data }, { } }; MODULE_DEVICE_TABLE(of, goodix_berlin_spi_of_match); From 97e8a2ff28a3dc36ca3cdc94f37359852d7e1c8b Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 5 Mar 2025 15:43:15 +0100 Subject: [PATCH 0670/2157] phy: freescale: imx8m-pcie: cleanup reset logic Remove the switch statement and base perst release on whether it is found in the device tree. The probe function fails without the reset property, making it mandatory. Therefore, always release reset independent of the variant. This does not change the behavior of the driver but reduces driver complexity and allows for easier future modifications. Signed-off-by: Stefan Eichenberger Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250305144355.20364-2-eichest@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index e98361dcdead..5b505e34ca36 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -141,15 +141,9 @@ static int imx8_pcie_phy_power_on(struct phy *phy) IMX8MM_GPR_PCIE_REF_CLK_PLL); usleep_range(100, 200); - switch (imx8_phy->drvdata->variant) { - case IMX8MP: - reset_control_deassert(imx8_phy->perst); - fallthrough; - case IMX8MM: - reset_control_deassert(imx8_phy->reset); - usleep_range(200, 500); - break; - } + reset_control_deassert(imx8_phy->perst); + reset_control_deassert(imx8_phy->reset); + usleep_range(200, 500); /* Do the PHY common block reset */ regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, From aecb63e88c5e5fb9afb782a1577264c76f179af9 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 5 Mar 2025 15:43:16 +0100 Subject: [PATCH 0671/2157] phy: freescale: imx8m-pcie: assert phy reset and perst in power off Ensure the PHY reset and perst is asserted during power-off to guarantee it is in a reset state upon repeated power-on calls. This resolves an issue where the PHY may not properly initialize during subsequent power-on cycles. Power-on will deassert the reset at the appropriate time after tuning the PHY parameters. During suspend/resume cycles, we observed that the PHY PLL failed to lock during resume when the CPU temperature increased from 65C to 75C. The observed errors were: phy phy-32f00000.pcie-phy.3: phy poweron failed --> -110 imx6q-pcie 33800000.pcie: waiting for PHY ready timeout! imx6q-pcie 33800000.pcie: PM: dpm_run_callback(): genpd_resume_noirq+0x0/0x80 returns -110 imx6q-pcie 33800000.pcie: PM: failed to resume noirq: error -110 This resulted in a complete CPU freeze, which is resolved by ensuring the PHY is in reset during power-on, thus preventing PHY PLL failures. Cc: stable@vger.kernel.org Fixes: 1aa97b002258 ("phy: freescale: pcie: Initialize the imx8 pcie standalone phy driver") Signed-off-by: Stefan Eichenberger Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250305144355.20364-3-eichest@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 5b505e34ca36..7355d9921b64 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -156,6 +156,16 @@ static int imx8_pcie_phy_power_on(struct phy *phy) return ret; } +static int imx8_pcie_phy_power_off(struct phy *phy) +{ + struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy); + + reset_control_assert(imx8_phy->reset); + reset_control_assert(imx8_phy->perst); + + return 0; +} + static int imx8_pcie_phy_init(struct phy *phy) { struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy); @@ -176,6 +186,7 @@ static const struct phy_ops imx8_pcie_phy_ops = { .init = imx8_pcie_phy_init, .exit = imx8_pcie_phy_exit, .power_on = imx8_pcie_phy_power_on, + .power_off = imx8_pcie_phy_power_off, .owner = THIS_MODULE, }; From e45cc62c23428eefbae18a9b4d88d10749741bdd Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 10 Mar 2025 16:33:59 +0530 Subject: [PATCH 0672/2157] phy: qcom: qmp-usbc: Add qmp configuration for QCS615 Provide PHY configuration for the USB QMP PHY for QCS615 Platform. Signed-off-by: Krishna Kurapati Reviewed-by: Dmitry Baryshkov Unreviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250310110359.210990-1-krishna.kurapati@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usbc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c index cf12a6f12134..5e7fcb26744a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c @@ -1124,6 +1124,9 @@ static const struct of_device_id qmp_usbc_of_match_table[] = { }, { .compatible = "qcom,qcm2290-qmp-usb3-phy", .data = &qcm2290_usb3phy_cfg, + }, { + .compatible = "qcom,qcs615-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, }, { .compatible = "qcom,sdm660-qmp-usb3-phy", .data = &sdm660_usb3phy_cfg, From 08ae0d61c3d79bb5d52ae30ad4fc12442e966a23 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 10 Mar 2025 15:36:53 +0800 Subject: [PATCH 0673/2157] soundwire: take in count the bandwidth of a prepared stream When a stream's state is marked as prepared, it is ready for playback/capture. Therefore, we need to include the stream's bandwidth when we calculate the required bandwidth of a bus. Fixes: 25befdf32aa40 ("soundwire: generic_bandwidth_allocation: count the bandwidth of active streams only") Signed-off-by: Bard Liao Link: https://github.com/thesofproject/linux/issues/5334 Reviewed-by: Richard Fitzgerald Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20250310073653.56476-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/generic_bandwidth_allocation.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index b646c2ffe84a..1cfaccf43eac 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -237,10 +237,11 @@ static int sdw_compute_group_params(struct sdw_bus *bus, continue; } else { /* - * Include runtimes with running (ENABLED state) and paused (DISABLED state) - * streams + * Include runtimes with running (ENABLED/PREPARED state) and + * paused (DISABLED state) streams */ if (m_rt->stream->state != SDW_STREAM_ENABLED && + m_rt->stream->state != SDW_STREAM_PREPARED && m_rt->stream->state != SDW_STREAM_DISABLED) continue; } From 964c032d1d5d1d896746b361416b84cef078dba5 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 7 Mar 2025 16:50:59 -0500 Subject: [PATCH 0674/2157] dt-bindings: dma: fsl-mxs-dma: Add compatible string for i.MX8 chips Add compatible string for all i.MX8 chips, which is backward compatible with i.MX28. Set it to fall back to "fsl,imx28-dma-apbh". Signed-off-by: Frank Li Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250307215100.3257649-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml b/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml index a17cf2360dd4..75a7d9556699 100644 --- a/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml @@ -31,6 +31,12 @@ properties: - fsl,imx6q-dma-apbh - fsl,imx6sx-dma-apbh - fsl,imx7d-dma-apbh + - fsl,imx8dxl-dma-apbh + - fsl,imx8mm-dma-apbh + - fsl,imx8mn-dma-apbh + - fsl,imx8mp-dma-apbh + - fsl,imx8mq-dma-apbh + - fsl,imx8qm-dma-apbh - fsl,imx8qxp-dma-apbh - const: fsl,imx28-dma-apbh - enum: From 1fe283e850d6659dc3ccc295c6a0b470dd461047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Sat, 8 Mar 2025 19:33:39 +0100 Subject: [PATCH 0675/2157] dt-bindings: dma: Convert fsl,elo*-dma to YAML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devicetree bindings for Freescale DMA engines have so far existed as a text file. This patch converts them to YAML, and specifies all the compatible strings currently in use in arch/powerpc/boot/dts. Signed-off-by: J. Neuschäfer Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250308-ppcyaml-dma-v4-1-20392ea81ec6@posteo.net Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/fsl,elo-dma.yaml | 137 ++++++++++++ .../devicetree/bindings/dma/fsl,elo3-dma.yaml | 125 +++++++++++ .../bindings/dma/fsl,eloplus-dma.yaml | 132 ++++++++++++ .../devicetree/bindings/powerpc/fsl/dma.txt | 204 ------------------ 4 files changed, 394 insertions(+), 204 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml create mode 100644 Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml create mode 100644 Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml delete mode 100644 Documentation/devicetree/bindings/powerpc/fsl/dma.txt diff --git a/Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml b/Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml new file mode 100644 index 000000000000..92288d76d51b --- /dev/null +++ b/Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml @@ -0,0 +1,137 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/fsl,elo-dma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Elo DMA Controller + +maintainers: + - J. Neuschäfer + +description: + This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx + series chips such as mpc8315, mpc8349, mpc8379 etc. + +properties: + compatible: + items: + - enum: + - fsl,mpc8313-dma + - fsl,mpc8315-dma + - fsl,mpc8323-dma + - fsl,mpc8347-dma + - fsl,mpc8349-dma + - fsl,mpc8360-dma + - fsl,mpc8377-dma + - fsl,mpc8378-dma + - fsl,mpc8379-dma + - const: fsl,elo-dma + + reg: + items: + - description: + DMA General Status Register, i.e. DGSR which contains status for + all the 4 DMA channels. + + cell-index: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Controller index. 0 for controller @ 0x8100. + + ranges: true + + "#address-cells": + const: 1 + + "#size-cells": + const: 1 + + interrupts: + maxItems: 1 + description: Controller interrupt. + +required: + - compatible + - reg + +patternProperties: + "^dma-channel@[0-9a-f]+$": + type: object + additionalProperties: false + + properties: + compatible: + oneOf: + # native DMA channel + - items: + - enum: + - fsl,mpc8315-dma-channel + - fsl,mpc8323-dma-channel + - fsl,mpc8347-dma-channel + - fsl,mpc8349-dma-channel + - fsl,mpc8360-dma-channel + - fsl,mpc8377-dma-channel + - fsl,mpc8378-dma-channel + - fsl,mpc8379-dma-channel + - const: fsl,elo-dma-channel + + # audio DMA channel, see fsl,ssi.yaml + - const: fsl,ssi-dma-channel + + reg: + maxItems: 1 + + cell-index: + description: DMA channel index starts at 0. + + interrupts: + maxItems: 1 + description: + Per-channel interrupt. Only necessary if no controller interrupt has + been provided. + +additionalProperties: false + +examples: + - | + #include + + dma@82a8 { + compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; + reg = <0x82a8 4>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x8100 0x1a4>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; + cell-index = <0>; + + dma-channel@0 { + compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; + reg = <0 0x80>; + cell-index = <0>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; + }; + + dma-channel@80 { + compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; + }; + + dma-channel@100 { + compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; + }; + + dma-channel@180 { + compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml b/Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml new file mode 100644 index 000000000000..0f5e475657a7 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml @@ -0,0 +1,125 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/fsl,elo3-dma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Elo3 DMA Controller + +maintainers: + - J. Neuschäfer + +description: + DMA controller which has same function as EloPlus except that Elo3 has 8 + channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx + series chips, such as t1040, t4240, b4860. + +properties: + compatible: + const: fsl,elo3-dma + + reg: + items: + - description: + DMA General Status Registers starting from DGSR0, for channel 1~4 + - description: + DMA General Status Registers starting from DGSR1, for channel 5~8 + + ranges: true + + "#address-cells": + const: 1 + + "#size-cells": + const: 1 + + interrupts: + maxItems: 1 + +patternProperties: + "^dma-channel@[0-9a-f]+$": + type: object + additionalProperties: false + + properties: + compatible: + enum: + # native DMA channel + - fsl,eloplus-dma-channel + + # audio DMA channel, see fsl,ssi.yaml + - fsl,ssi-dma-channel + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + description: + Per-channel interrupt. Only necessary if no controller interrupt has + been provided. + +additionalProperties: false + +examples: + - | + #include + + dma@100300 { + compatible = "fsl,elo3-dma"; + reg = <0x100300 0x4>, + <0x100600 0x4>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x100100 0x500>; + + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + interrupts = <28 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + interrupts = <29 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + interrupts = <30 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + interrupts = <31 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@300 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x300 0x80>; + interrupts = <76 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@380 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x380 0x80>; + interrupts = <77 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@400 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x400 0x80>; + interrupts = <78 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + + dma-channel@480 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x480 0x80>; + interrupts = <79 IRQ_TYPE_EDGE_FALLING 0 0>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml b/Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml new file mode 100644 index 000000000000..8992f244c4db --- /dev/null +++ b/Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/fsl,eloplus-dma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale EloPlus DMA Controller + +maintainers: + - J. Neuschäfer + +description: + This is a 4-channel DMA controller with extended addresses and chaining, + mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as + mpc8540, mpc8641 p4080, bsc9131 etc. + +properties: + compatible: + oneOf: + - items: + - enum: + - fsl,mpc8540-dma + - fsl,mpc8541-dma + - fsl,mpc8548-dma + - fsl,mpc8555-dma + - fsl,mpc8560-dma + - fsl,mpc8572-dma + - fsl,mpc8641-dma + - const: fsl,eloplus-dma + - const: fsl,eloplus-dma + + reg: + items: + - description: + DMA General Status Register, i.e. DGSR which contains + status for all the 4 DMA channels + + cell-index: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + controller index. 0 for controller @ 0x21000, 1 for controller @ 0xc000 + + ranges: true + + "#address-cells": + const: 1 + + "#size-cells": + const: 1 + + interrupts: + maxItems: 1 + description: Controller interrupt. + +patternProperties: + "^dma-channel@[0-9a-f]+$": + type: object + additionalProperties: false + + properties: + compatible: + oneOf: + # native DMA channel + - items: + - enum: + - fsl,mpc8540-dma-channel + - fsl,mpc8541-dma-channel + - fsl,mpc8548-dma-channel + - fsl,mpc8555-dma-channel + - fsl,mpc8560-dma-channel + - fsl,mpc8572-dma-channel + - const: fsl,eloplus-dma-channel + + # audio DMA channel, see fsl,ssi.yaml + - const: fsl,ssi-dma-channel + + reg: + maxItems: 1 + + cell-index: + description: DMA channel index starts at 0. + + interrupts: + maxItems: 1 + description: + Per-channel interrupt. Only necessary if no controller interrupt has + been provided. + +additionalProperties: false + +examples: + - | + #include + + dma@21300 { + compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; + reg = <0x21300 4>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x21100 0x200>; + cell-index = <0>; + + dma-channel@0 { + compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; + reg = <0 0x80>; + cell-index = <0>; + interrupts = <20 IRQ_TYPE_EDGE_FALLING>; + }; + + dma-channel@80 { + compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupts = <21 IRQ_TYPE_EDGE_FALLING>; + }; + + dma-channel@100 { + compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupts = <22 IRQ_TYPE_EDGE_FALLING>; + }; + + dma-channel@180 { + compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupts = <23 IRQ_TYPE_EDGE_FALLING>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt deleted file mode 100644 index c11ad5c6db21..000000000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt +++ /dev/null @@ -1,204 +0,0 @@ -* Freescale DMA Controllers - -** Freescale Elo DMA Controller - This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx - series chips such as mpc8315, mpc8349, mpc8379 etc. - -Required properties: - -- compatible : must include "fsl,elo-dma" -- reg : DMA General Status Register, i.e. DGSR which contains - status for all the 4 DMA channels -- ranges : describes the mapping between the address space of the - DMA channels and the address space of the DMA controller -- cell-index : controller index. 0 for controller @ 0x8100 -- interrupts : interrupt specifier for DMA IRQ - -- DMA channel nodes: - - compatible : must include "fsl,elo-dma-channel" - However, see note below. - - reg : DMA channel specific registers - - cell-index : DMA channel index starts at 0. - -Optional properties: - - interrupts : interrupt specifier for DMA channel IRQ - (on 83xx this is expected to be identical to - the interrupts property of the parent node) - -Example: - dma@82a8 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; - reg = <0x82a8 4>; - ranges = <0 0x8100 0x1a4>; - interrupt-parent = <&ipic>; - interrupts = <71 8>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; - cell-index = <0>; - reg = <0 0x80>; - interrupt-parent = <&ipic>; - interrupts = <71 8>; - }; - dma-channel@80 { - compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; - cell-index = <1>; - reg = <0x80 0x80>; - interrupt-parent = <&ipic>; - interrupts = <71 8>; - }; - dma-channel@100 { - compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; - cell-index = <2>; - reg = <0x100 0x80>; - interrupt-parent = <&ipic>; - interrupts = <71 8>; - }; - dma-channel@180 { - compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; - cell-index = <3>; - reg = <0x180 0x80>; - interrupt-parent = <&ipic>; - interrupts = <71 8>; - }; - }; - -** Freescale EloPlus DMA Controller - This is a 4-channel DMA controller with extended addresses and chaining, - mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as - mpc8540, mpc8641 p4080, bsc9131 etc. - -Required properties: - -- compatible : must include "fsl,eloplus-dma" -- reg : DMA General Status Register, i.e. DGSR which contains - status for all the 4 DMA channels -- cell-index : controller index. 0 for controller @ 0x21000, - 1 for controller @ 0xc000 -- ranges : describes the mapping between the address space of the - DMA channels and the address space of the DMA controller - -- DMA channel nodes: - - compatible : must include "fsl,eloplus-dma-channel" - However, see note below. - - cell-index : DMA channel index starts at 0. - - reg : DMA channel specific registers - - interrupts : interrupt specifier for DMA channel IRQ - -Example: - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; - reg = <0x21300 4>; - ranges = <0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; - reg = <0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - -** Freescale Elo3 DMA Controller - DMA controller which has same function as EloPlus except that Elo3 has 8 - channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx - series chips, such as t1040, t4240, b4860. - -Required properties: - -- compatible : must include "fsl,elo3-dma" -- reg : contains two entries for DMA General Status Registers, - i.e. DGSR0 which includes status for channel 1~4, and - DGSR1 for channel 5~8 -- ranges : describes the mapping between the address space of the - DMA channels and the address space of the DMA controller - -- DMA channel nodes: - - compatible : must include "fsl,eloplus-dma-channel" - - reg : DMA channel specific registers - - interrupts : interrupt specifier for DMA channel IRQ - -Example: -dma@100300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,elo3-dma"; - reg = <0x100300 0x4>, - <0x100600 0x4>; - ranges = <0x0 0x100100 0x500>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - interrupts = <28 2 0 0>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - interrupts = <29 2 0 0>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - interrupts = <30 2 0 0>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - interrupts = <31 2 0 0>; - }; - dma-channel@300 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x300 0x80>; - interrupts = <76 2 0 0>; - }; - dma-channel@380 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x380 0x80>; - interrupts = <77 2 0 0>; - }; - dma-channel@400 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x400 0x80>; - interrupts = <78 2 0 0>; - }; - dma-channel@480 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x480 0x80>; - interrupts = <79 2 0 0>; - }; -}; - -Note on DMA channel compatible properties: The compatible property must say -"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA -driver (fsldma). Any DMA channel used by fsldma cannot be used by another -DMA driver, such as the SSI sound drivers for the MPC8610. Therefore, any DMA -channel that should be used for another driver should not use -"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel". For the SSI drivers, for -example, the compatible property should be "fsl,ssi-dma-channel". See ssi.txt -for more information. From 384a530111c682d4a6c4c9ec305254c323a1379a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 2 Mar 2025 18:27:23 +0200 Subject: [PATCH 0676/2157] vfio/virtio: Enable support for virtio-block live migration With a functional and tested backend for virtio-block live migration, add the virtio-block device ID to the pci_device_id table. Currently, the driver supports legacy IO functionality only for virtio-net, and it is accounted for in specific parts of the code. To enforce this limitation, an explicit check for virtio-net, has been added in virtiovf_support_legacy_io(). Once a backend implements legacy IO functionality for virtio-block, the necessary support will be added to the driver, and this additional check should be removed. The module description was updated accordingly. Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20250302162723.82578-1-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/virtio/Kconfig | 6 +++--- drivers/vfio/pci/virtio/legacy_io.c | 4 +++- drivers/vfio/pci/virtio/main.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig index 2770f7eb702c..33e04e65bec6 100644 --- a/drivers/vfio/pci/virtio/Kconfig +++ b/drivers/vfio/pci/virtio/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config VIRTIO_VFIO_PCI - tristate "VFIO support for VIRTIO NET PCI VF devices" + tristate "VFIO support for VIRTIO PCI VF devices" depends on VIRTIO_PCI select VFIO_PCI_CORE help - This provides migration support for VIRTIO NET PCI VF devices - using the VFIO framework. Migration support requires the + This provides migration support for VIRTIO NET and BLOCK PCI VF + devices using the VFIO framework. Migration support requires the SR-IOV PF device to support specific VIRTIO extensions, otherwise this driver provides no additional functionality beyond vfio-pci. diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c index 20382ee15fac..832af5ba267c 100644 --- a/drivers/vfio/pci/virtio/legacy_io.c +++ b/drivers/vfio/pci/virtio/legacy_io.c @@ -382,7 +382,9 @@ static bool virtiovf_bar0_exists(struct pci_dev *pdev) bool virtiovf_support_legacy_io(struct pci_dev *pdev) { - return virtio_pci_admin_has_legacy_io(pdev) && !virtiovf_bar0_exists(pdev); + /* For now, the legacy IO functionality is supported only for virtio-net */ + return pdev->device == 0x1041 && virtio_pci_admin_has_legacy_io(pdev) && + !virtiovf_bar0_exists(pdev); } int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev) diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c index d534d48c4163..515fe1b9f94d 100644 --- a/drivers/vfio/pci/virtio/main.c +++ b/drivers/vfio/pci/virtio/main.c @@ -187,8 +187,9 @@ static void virtiovf_pci_remove(struct pci_dev *pdev) } static const struct pci_device_id virtiovf_pci_table[] = { - /* Only virtio-net is supported/tested so far */ + /* Only virtio-net and virtio-block are supported/tested so far */ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042) }, {} }; @@ -221,4 +222,4 @@ module_pci_driver(virtiovf_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yishai Hadas "); MODULE_DESCRIPTION( - "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices"); + "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET and BLOCK devices"); From e87ca16e99118ab4e130a41bdf12abbf6a87656c Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 5 Mar 2025 15:00:06 -0800 Subject: [PATCH 0677/2157] dmaengine: dmatest: Fix dmatest waiting less when interrupted Change the "wait for operation finish" logic to take interrupts into account. When using dmatest with idxd DMA engine, it's possible that during longer tests, the interrupt notifying the finish of an operation happens during wait_event_freezable_timeout(), which causes dmatest to cleanup all the resources, some of which might still be in use. This fix ensures that the wait logic correctly handles interrupts, preventing premature cleanup of resources. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202502171134.8c403348-lkp@intel.com Signed-off-by: Vinicius Costa Gomes Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250305230007.590178-1-vinicius.gomes@intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmatest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 91b2fbc0b864..d891dfca358e 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -841,9 +841,9 @@ static int dmatest_func(void *data) } else { dma_async_issue_pending(chan); - wait_event_freezable_timeout(thread->done_wait, - done->done, - msecs_to_jiffies(params->timeout)); + wait_event_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); From 6ec29d4086ed8b951fa794ac6c0e7cd7ae3762d9 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Mon, 3 Mar 2025 14:56:48 +0800 Subject: [PATCH 0678/2157] dt-bindings: dma: snps,dw-axi-dmac: Allow devices to be marked as noncoherent A RISC-V platform can have both DMA coherent/noncoherent devices. Since the RISC-V architecture is marked coherent, devices should be marked as noncoherent when coherent devices exist. Add dma-noncoherent property for snps,dw-axi-dmac device. It will be used on SG2044, and it has other coherent devices. Signed-off-by: Inochi Amaoto Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250303065649.937233-1-inochiama@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml index 525f5f3932f5..935735a59afd 100644 --- a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml @@ -59,6 +59,8 @@ properties: minimum: 1 maximum: 8 + dma-noncoherent: true + resets: minItems: 1 maxItems: 2 From c9c59da76ce9cb3f215b66eb3708cda1134a5206 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 28 Feb 2025 15:17:19 +0800 Subject: [PATCH 0679/2157] dmaengine: fsl-edma: cleanup chan after dma_async_device_unregister There is kernel dump when do module test: sysfs: cannot create duplicate filename /devices/platform/soc@0/44000000.bus/44000000.dma-controller/dma/dma0chan0 __dma_async_device_channel_register+0x128/0x19c dma_async_device_register+0x150/0x454 fsl_edma_probe+0x6cc/0x8a0 platform_probe+0x68/0xc8 fsl_edma_cleanup_vchan will unlink vchan.chan.device_node, while dma_async_device_unregister needs the link to do __dma_async_device_channel_unregister. So need move fsl_edma_cleanup_vchan after dma_async_device_unregister to make sure channel could be freed. So clean up chan after dma_async_device_unregister to address this. Fixes: 6f93b93b2a1b ("dmaengine: fsl-edma: kill the tasklets upon exit") Reviewed-by: Frank Li Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250228071720.3780479-1-peng.fan@oss.nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 760c9e3e374c..7eb4d898434b 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -802,9 +802,9 @@ static void fsl_edma_remove(struct platform_device *pdev) struct fsl_edma_engine *fsl_edma = platform_get_drvdata(pdev); fsl_edma_irq_exit(pdev, fsl_edma); - fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); of_dma_controller_free(np); dma_async_device_unregister(&fsl_edma->dma_dev); + fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs); } From fa70c4c3c580c239a0f9e83a14770ab026e8d820 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 28 Feb 2025 15:17:20 +0800 Subject: [PATCH 0680/2157] dmaengine: fsl-edma: free irq correctly in remove path Add fsl_edma->txirq/errirq check to avoid below warning because no errirq at i.MX9 platform. Otherwise there will be kernel dump: WARNING: CPU: 0 PID: 11 at kernel/irq/devres.c:144 devm_free_irq+0x74/0x80 Modules linked in: CPU: 0 UID: 0 PID: 11 Comm: kworker/u8:0 Not tainted 6.12.0-rc7#18 Hardware name: NXP i.MX93 11X11 EVK board (DT) Workqueue: events_unbound deferred_probe_work_func pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : devm_free_irq+0x74/0x80 lr : devm_free_irq+0x48/0x80 Call trace: devm_free_irq+0x74/0x80 (P) devm_free_irq+0x48/0x80 (L) fsl_edma_remove+0xc4/0xc8 platform_remove+0x28/0x44 device_remove+0x4c/0x80 Fixes: 44eb827264de ("dmaengine: fsl-edma: request per-channel IRQ only when channel is allocated") Reviewed-by: Frank Li Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20250228071720.3780479-2-peng.fan@oss.nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 7eb4d898434b..756d67325db5 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -401,6 +401,7 @@ fsl_edma2_irq_init(struct platform_device *pdev, /* The last IRQ is for eDMA err */ if (i == count - 1) { + fsl_edma->errirq = irq; ret = devm_request_irq(&pdev->dev, irq, fsl_edma_err_handler, 0, "eDMA2-ERR", fsl_edma); @@ -420,10 +421,13 @@ static void fsl_edma_irq_exit( struct platform_device *pdev, struct fsl_edma_engine *fsl_edma) { if (fsl_edma->txirq == fsl_edma->errirq) { - devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); + if (fsl_edma->txirq >= 0) + devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); } else { - devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); - devm_free_irq(&pdev->dev, fsl_edma->errirq, fsl_edma); + if (fsl_edma->txirq >= 0) + devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma); + if (fsl_edma->errirq >= 0) + devm_free_irq(&pdev->dev, fsl_edma->errirq, fsl_edma); } } @@ -620,6 +624,8 @@ static int fsl_edma_probe(struct platform_device *pdev) if (!fsl_edma) return -ENOMEM; + fsl_edma->errirq = -EINVAL; + fsl_edma->txirq = -EINVAL; fsl_edma->drvdata = drvdata; fsl_edma->n_chans = chans; mutex_init(&fsl_edma->fsl_edma_mutex); From 566beb347eded7a860511164a7a163bc882dc4d0 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 5 Feb 2025 17:48:01 +0530 Subject: [PATCH 0681/2157] dmaengine: ti: k3-udma: Enable second resource range for BCDMA and PKTDMA The SoC DMA resources for UDMA, BCDMA and PKTDMA can be described via a combination of up to two resource ranges. The first resource range handles the default partitioning wherein all resources belonging to that range are allocated to a single entity and form a continuous range. For use-cases where the resources are shared across multiple entities and require to be described via discontinuous ranges, a second resource range is required. Currently, udma_setup_resources() supports handling resources that belong to the second range. Extend bcdma_setup_resources() and pktdma_setup_resources() to support the same. Signed-off-by: Siddharth Vadapalli Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20250205121805.316792-1-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 7ed1956b4642..b223a7aacb0c 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4886,6 +4886,12 @@ static int bcdma_setup_resources(struct udma_dev *ud) irq_res.desc[i].start = rm_res->desc[i].start + oes->bcdma_bchan_ring; irq_res.desc[i].num = rm_res->desc[i].num; + + if (rm_res->desc[i].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec + + oes->bcdma_bchan_ring; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } } } } else { @@ -4909,6 +4915,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) irq_res.desc[i + 1].start = rm_res->desc[j].start + oes->bcdma_tchan_ring; irq_res.desc[i + 1].num = rm_res->desc[j].num; + + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + oes->bcdma_tchan_data; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + irq_res.desc[i + 1].start_sec = rm_res->desc[j].start_sec + + oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num_sec = rm_res->desc[j].num_sec; + } } } } @@ -4929,6 +4944,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) irq_res.desc[i + 1].start = rm_res->desc[j].start + oes->bcdma_rchan_ring; irq_res.desc[i + 1].num = rm_res->desc[j].num; + + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + oes->bcdma_rchan_data; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + irq_res.desc[i + 1].start_sec = rm_res->desc[j].start_sec + + oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num_sec = rm_res->desc[j].num_sec; + } } } } @@ -5063,6 +5087,12 @@ static int pktdma_setup_resources(struct udma_dev *ud) irq_res.desc[i].start = rm_res->desc[i].start + oes->pktdma_tchan_flow; irq_res.desc[i].num = rm_res->desc[i].num; + + if (rm_res->desc[i].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec + + oes->pktdma_tchan_flow; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; @@ -5074,6 +5104,12 @@ static int pktdma_setup_resources(struct udma_dev *ud) irq_res.desc[i].start = rm_res->desc[j].start + oes->pktdma_rchan_flow; irq_res.desc[i].num = rm_res->desc[j].num; + + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + oes->pktdma_rchan_flow; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + } } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); From e7240aba2053d437a661d946b3d413f310138a45 Mon Sep 17 00:00:00 2001 From: Matthew Majewski Date: Sun, 16 Feb 2025 16:47:41 -0500 Subject: [PATCH 0682/2157] dmaengine: ti: edma: support sw triggered chans in of_edma_xlate() The .of_edma_xlate() function always sets the hw_triggered flag to true. This causes sw triggered channels consumed via the device-tree to not function properly, as the driver incorrectly assumes they are hw triggered. Modify the xlate() function to correctly set the hw_triggered flag to false for channels reserved for memcpy operation (ie, sw triggered). Signed-off-by: Matthew Majewski Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20250216214741.207538-1-mattwmajewski@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index b1a54655e6ce..3ed406f08c44 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2259,8 +2259,12 @@ static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec, return NULL; out: - /* The channel is going to be used as HW synchronized */ - echan->hw_triggered = true; + /* + * The channel is going to be HW synchronized, unless it was + * reserved as a memcpy channel + */ + echan->hw_triggered = + !edma_is_memcpy_channel(i, ecc->info->memcpy_channels); return dma_get_slave_channel(chan); } #else From b9014a10bdb8e967d85819f5ef61e6f80d0b46c9 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 10 Feb 2025 17:29:11 -0600 Subject: [PATCH 0683/2157] dmaengine: Remove device_prep_dma_imm_data from struct dma_device The device_prep_dma_imm_data() method isn't implemented or invoked by any code since commit 80ade22c06ca ("misc: mic: remove the MIC drivers"). Remove it, shrinking struct dma_device by a few bytes. Signed-off-by: Nathan Lynch Link: https://lore.kernel.org/r/20250210-dmaengine-drop-imm-data-v1-1-e017766da2fa@amd.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index a360e0330436..bb146c5ac3e4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -839,7 +839,6 @@ struct dma_filter { * The function takes a buffer of size buf_len. The callback function will * be called after period_len bytes have been transferred. * @device_prep_interleaved_dma: Transfer expression in a generic way. - * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address * @device_caps: May be used to override the generic DMA slave capabilities * with per-channel specific ones * @device_config: Pushes a new configuration to a channel, return 0 or an error @@ -942,9 +941,6 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)( - struct dma_chan *chan, dma_addr_t dst, u64 data, - unsigned long flags); void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps); int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); From b87c29c007e80f4737a056b3c5c21b5b5106b7f7 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 3 Feb 2025 21:55:09 +0530 Subject: [PATCH 0684/2157] dmaengine: ae4dma: Remove deprecated PCI IDs Two previously used PCI IDs are deprecated and should not be used for AE4DMA. Hence, remove as they are unsupported for AE4DMA. Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver") Signed-off-by: Basavaraj Natikar Link: https://lore.kernel.org/r/20250203162511.911946-2-Basavaraj.Natikar@amd.com Signed-off-by: Vinod Koul --- drivers/dma/amd/ae4dma/ae4dma-pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c index aad0dc4294a3..7f96843f5215 100644 --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c @@ -137,8 +137,6 @@ static void ae4_pci_remove(struct pci_dev *pdev) } static const struct pci_device_id ae4_pci_table[] = { - { PCI_VDEVICE(AMD, 0x14C8), }, - { PCI_VDEVICE(AMD, 0x14DC), }, { PCI_VDEVICE(AMD, 0x149B), }, /* Last entry must be zero */ { 0, } From feba04e6fdf4daccc83fc09d499a3e32c178edb4 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 3 Feb 2025 21:55:10 +0530 Subject: [PATCH 0685/2157] dmaengine: ae4dma: Use the MSI count and its corresponding IRQ number Instead of using the defined maximum hardware queue, which can lead to incorrect values if the counts mismatch, use the exact supported MSI count and its corresponding IRQ number. Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver") Signed-off-by: Basavaraj Natikar Link: https://lore.kernel.org/r/20250203162511.911946-3-Basavaraj.Natikar@amd.com Signed-off-by: Vinod Koul --- drivers/dma/amd/ae4dma/ae4dma-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c index 7f96843f5215..2c63907db228 100644 --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c @@ -46,8 +46,8 @@ static int ae4_get_irqs(struct ae4_device *ae4) } else { ae4_msix->msix_count = ret; - for (i = 0; i < MAX_AE4_HW_QUEUES; i++) - ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector; + for (i = 0; i < ae4_msix->msix_count; i++) + ae4->ae4_irq[i] = pci_irq_vector(pdev, i); } return ret; From 6565439894570a07b00dba0b739729fe6b56fba4 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 3 Feb 2025 21:55:11 +0530 Subject: [PATCH 0686/2157] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As AE4DMA offers multi-channel functionality compared to PTDMA’s single queue, utilize multi-queue, which supports higher speeds than PTDMA, to achieve higher performance using the AE4DMA workqueue based mechanism. Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version") Signed-off-by: Basavaraj Natikar Link: https://lore.kernel.org/r/20250203162511.911946-4-Basavaraj.Natikar@amd.com Signed-off-by: Vinod Koul --- drivers/dma/amd/ae4dma/ae4dma.h | 2 + drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h index 265c5d436008..57f6048726bb 100644 --- a/drivers/dma/amd/ae4dma/ae4dma.h +++ b/drivers/dma/amd/ae4dma/ae4dma.h @@ -37,6 +37,8 @@ #define AE4_DMA_VERSION 4 #define CMD_AE4_DESC_DW0_VAL 2 +#define AE4_TIME_OUT 5000 + struct ae4_msix { int msix_count; struct msix_entry msix_entry[MAX_AE4_HW_QUEUES]; diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 35c84ec9608b..715ac3ae067b 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, { struct dma_async_tx_descriptor *tx_desc; struct virt_dma_desc *vd; + struct pt_device *pt; unsigned long flags; + pt = chan->pt; /* Loop over descriptors until one is found with commands */ do { if (desc) { @@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { + if (pt->ver != AE4_DMA_VERSION && desc) { if (desc->status != DMA_COMPLETE) { if (desc->status != DMA_ERROR) desc->status = DMA_COMPLETE; @@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, spin_unlock_irqrestore(&chan->vc.lock, flags); - if (tx_desc) { + if (pt->ver != AE4_DMA_VERSION && tx_desc) { dmaengine_desc_get_callback_invoke(tx_desc, NULL); dma_run_dependencies(tx_desc); vchan_vdesc_fini(vd); @@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, return NULL; } +static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q) +{ + u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF); + u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF); + + if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1)) + return true; + + return false; +} + static void pt_cmd_callback(void *data, int err) { struct pt_dma_desc *desc = data; + struct ae4_cmd_queue *ae4cmd_q; struct dma_chan *dma_chan; struct pt_dma_chan *chan; + struct ae4_device *ae4; + struct pt_device *pt; int ret; if (err == -EINPROGRESS) @@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err) dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); + pt = chan->pt; if (err) desc->status = DMA_ERROR; while (true) { + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + ae4cmd_q = &ae4->ae4cmd_q[chan->id]; + + if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) || + ae4_core_queue_full(&ae4cmd_q->cmd_q)) { + wake_up(&ae4cmd_q->q_w); + + if (wait_for_completion_timeout(&ae4cmd_q->cmp, + msecs_to_jiffies(AE4_TIME_OUT)) + == 0) { + dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id); + break; + } + + reinit_completion(&ae4cmd_q->cmp); + continue; + } + } + /* Check for DMA descriptor completion */ desc = pt_handle_active_desc(chan, desc); @@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, return desc; } +static void pt_cmd_callback_work(void *data, int err) +{ + struct dma_async_tx_descriptor *tx_desc; + struct pt_dma_desc *desc = data; + struct dma_chan *dma_chan; + struct virt_dma_desc *vd; + struct pt_dma_chan *chan; + unsigned long flags; + + dma_chan = desc->vd.tx.chan; + chan = to_pt_chan(dma_chan); + + if (err == -EINPROGRESS) + return; + + tx_desc = &desc->vd.tx; + vd = &desc->vd; + + if (err) + desc->status = DMA_ERROR; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (desc) { + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + if (tx_desc) { + dmaengine_desc_get_callback_invoke(tx_desc, NULL); + dma_run_dependencies(tx_desc); + list_del(&desc->vd.node); + vchan_vdesc_fini(vd); + } +} + static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, @@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, desc->len = len; if (pt->ver == AE4_DMA_VERSION) { + pt_cmd->pt_cmd_callback = pt_cmd_callback_work; ae4 = container_of(pt, struct ae4_device, pt); ae4cmd_q = &ae4->ae4cmd_q[chan->id]; mutex_lock(&ae4cmd_q->cmd_lock); @@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan) { struct pt_dma_chan *chan = to_pt_chan(dma_chan); struct pt_dma_desc *desc; + struct pt_device *pt; unsigned long flags; bool engine_is_idle = true; + pt = chan->pt; + spin_lock_irqsave(&chan->vc.lock, flags); desc = pt_next_dma_desc(chan); - if (desc) + if (desc && pt->ver != AE4_DMA_VERSION) engine_is_idle = false; vchan_issue_pending(&chan->vc); From 10b20f2d1bbeddcb6c1f6c01b6eb1b8d2828b663 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Mon, 10 Mar 2025 15:51:38 -0400 Subject: [PATCH 0687/2157] rust/kernel/faux: mark Registration methods inline When building the kernel on Arch Linux using on x86_64 with tools: $ rustc --version rustc 1.84.0 (9fc6b4312 2025-01-07) $ clang --version clang version 19.1.7 Target: x86_64-pc-linux-gnu The following symbols are generated: $ nm vmlinux | rg ' _R' | rustfilt | rg faux ffffffff81959ae0 T ::new ffffffff81959b40 T ::drop However, these Rust symbols are wrappers around bindings in the C faux code. Inlining these functions removes the middle-man wrapper function After applying this patch, the above function signatures disappear. Link: https://github.com/Rust-for-Linux/linux/issues/1145 Signed-off-by: Ethan Carter Edwards Acked-by: Danilo Krummrich Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/jesg4yu7m6fvzmgg5tlsktrrjm36l4qsranto5mdmnucx4pvf3@nhvt4juw5es3 Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 3277f35c3f79..8a50fcd4c9bb 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -23,6 +23,7 @@ impl Registration { /// Create and register a new faux device with the given name. + #[inline] pub fn new(name: &CStr, parent: Option<&device::Device>) -> Result { // SAFETY: // - `name` is copied by this function into its own storage @@ -58,6 +59,7 @@ fn as_ref(&self) -> &device::Device { } impl Drop for Registration { + #[inline] fn drop(&mut self) { // SAFETY: `self.0` is a valid registered faux_device via our type invariants. unsafe { bindings::faux_device_destroy(self.as_raw()) } From a2e934885c82912caf3f72b9511ef626f3619e3d Mon Sep 17 00:00:00 2001 From: Nitheesh Sekar Date: Thu, 20 Feb 2025 15:12:45 +0530 Subject: [PATCH 0688/2157] dt-bindings: phy: qcom,uniphy-pcie: Document PCIe uniphy Document the Qualcomm UNIPHY PCIe 28LP present in IPQ5332. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Nitheesh Sekar Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20250220094251.230936-2-quic_varada@quicinc.com Signed-off-by: Vinod Koul --- .../phy/qcom,ipq5332-uniphy-pcie-phy.yaml | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml new file mode 100644 index 000000000000..e39168d55d23 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,ipq5332-uniphy-pcie-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm UNIPHY PCIe 28LP PHY + +maintainers: + - Nitheesh Sekar + - Varadarajan Narayanan + +description: + PCIe and USB combo PHY found in Qualcomm IPQ5332 SoC + +properties: + compatible: + enum: + - qcom,ipq5332-uniphy-pcie-phy + + reg: + maxItems: 1 + + clocks: + items: + - description: pcie pipe clock + - description: pcie ahb clock + + resets: + items: + - description: phy reset + - description: ahb reset + - description: cfg reset + + "#phy-cells": + const: 0 + + "#clock-cells": + const: 0 + + num-lanes: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 2] + +required: + - compatible + - reg + - clocks + - resets + - "#phy-cells" + - "#clock-cells" + - num-lanes + +additionalProperties: false + +examples: + - | + #include + + pcie0_phy: phy@4b0000 { + compatible = "qcom,ipq5332-uniphy-pcie-phy"; + reg = <0x004b0000 0x800>; + + clocks = <&gcc GCC_PCIE3X1_0_PIPE_CLK>, + <&gcc GCC_PCIE3X1_PHY_AHB_CLK>; + + resets = <&gcc GCC_PCIE3X1_0_PHY_BCR>, + <&gcc GCC_PCIE3X1_PHY_AHB_CLK_ARES>, + <&gcc GCC_PCIE3X1_0_PHY_PHY_BCR>; + + #clock-cells = <0>; + + #phy-cells = <0>; + + num-lanes = <1>; + }; From 74badb8b0b146668cc6c03eb58e2a814f9463d02 Mon Sep 17 00:00:00 2001 From: Nitheesh Sekar Date: Thu, 20 Feb 2025 15:12:46 +0530 Subject: [PATCH 0689/2157] phy: qcom: Introduce PCIe UNIPHY 28LP driver Add Qualcomm PCIe UNIPHY 28LP driver support present in Qualcomm IPQ5332 SoC and the phy init sequence. Reviewed-by: Dmitry Baryshkov Signed-off-by: Nitheesh Sekar Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20250220094251.230936-3-quic_varada@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 12 + drivers/phy/qualcomm/Makefile | 1 + .../phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c | 286 ++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 846f8c99547f..a6b71fda1b9c 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -154,6 +154,18 @@ config PHY_QCOM_M31_USB management. This driver is required even for peripheral only or host only mode configurations. +config PHY_QCOM_UNIPHY_PCIE_28LP + bool "PCIE UNIPHY 28LP PHY driver" + depends on ARCH_QCOM + depends on HAS_IOMEM + depends on OF + select GENERIC_PHY + help + Enable this to support the PCIe UNIPHY 28LP phy transceiver that + is used with PCIe controllers on Qualcomm IPQ5332 chips. It + handles PHY initialization, clock management required after + resetting the hardware and power management. + config PHY_QCOM_USB_HS tristate "Qualcomm USB HS PHY module" depends on USB_ULPI_BUS diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile index eb60e950ad53..42038bc30974 100644 --- a/drivers/phy/qualcomm/Makefile +++ b/drivers/phy/qualcomm/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_PHY_QCOM_QMP_USB_LEGACY) += phy-qcom-qmp-usb-legacy.o obj-$(CONFIG_PHY_QCOM_QUSB2) += phy-qcom-qusb2.o obj-$(CONFIG_PHY_QCOM_SNPS_EUSB2) += phy-qcom-snps-eusb2.o obj-$(CONFIG_PHY_QCOM_EUSB2_REPEATER) += phy-qcom-eusb2-repeater.o +obj-$(CONFIG_PHY_QCOM_UNIPHY_PCIE_28LP) += phy-qcom-uniphy-pcie-28lp.o obj-$(CONFIG_PHY_QCOM_USB_HS) += phy-qcom-usb-hs.o obj-$(CONFIG_PHY_QCOM_USB_HSIC) += phy-qcom-usb-hsic.o obj-$(CONFIG_PHY_QCOM_USB_HS_28NM) += phy-qcom-usb-hs-28nm.o diff --git a/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c b/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c new file mode 100644 index 000000000000..c8b2a3818880 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2025, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RST_ASSERT_DELAY_MIN_US 100 +#define RST_ASSERT_DELAY_MAX_US 150 +#define PIPE_CLK_DELAY_MIN_US 5000 +#define PIPE_CLK_DELAY_MAX_US 5100 +#define CLK_EN_DELAY_MIN_US 30 +#define CLK_EN_DELAY_MAX_US 50 +#define CDR_CTRL_REG_1 0x80 +#define CDR_CTRL_REG_2 0x84 +#define CDR_CTRL_REG_3 0x88 +#define CDR_CTRL_REG_4 0x8c +#define CDR_CTRL_REG_5 0x90 +#define CDR_CTRL_REG_6 0x94 +#define CDR_CTRL_REG_7 0x98 +#define SSCG_CTRL_REG_1 0x9c +#define SSCG_CTRL_REG_2 0xa0 +#define SSCG_CTRL_REG_3 0xa4 +#define SSCG_CTRL_REG_4 0xa8 +#define SSCG_CTRL_REG_5 0xac +#define SSCG_CTRL_REG_6 0xb0 +#define PCS_INTERNAL_CONTROL_2 0x2d8 + +#define PHY_CFG_PLLCFG 0x220 +#define PHY_CFG_EIOS_DTCT_REG 0x3e4 +#define PHY_CFG_GEN3_ALIGN_HOLDOFF_TIME 0x3e8 + +enum qcom_uniphy_pcie_type { + PHY_TYPE_PCIE = 1, + PHY_TYPE_PCIE_GEN2, + PHY_TYPE_PCIE_GEN3, +}; + +struct qcom_uniphy_pcie_regs { + u32 offset; + u32 val; +}; + +struct qcom_uniphy_pcie_data { + int lane_offset; /* offset between the lane register bases */ + u32 phy_type; + const struct qcom_uniphy_pcie_regs *init_seq; + u32 init_seq_num; + u32 pipe_clk_rate; +}; + +struct qcom_uniphy_pcie { + struct phy phy; + struct device *dev; + const struct qcom_uniphy_pcie_data *data; + struct clk_bulk_data *clks; + int num_clks; + struct reset_control *resets; + void __iomem *base; + int lanes; +}; + +#define phy_to_dw_phy(x) container_of((x), struct qca_uni_pcie_phy, phy) + +static const struct qcom_uniphy_pcie_regs ipq5332_regs[] = { + { + .offset = PHY_CFG_PLLCFG, + .val = 0x30, + }, { + .offset = PHY_CFG_EIOS_DTCT_REG, + .val = 0x53ef, + }, { + .offset = PHY_CFG_GEN3_ALIGN_HOLDOFF_TIME, + .val = 0xcf, + }, +}; + +static const struct qcom_uniphy_pcie_data ipq5332_data = { + .lane_offset = 0x800, + .phy_type = PHY_TYPE_PCIE_GEN3, + .init_seq = ipq5332_regs, + .init_seq_num = ARRAY_SIZE(ipq5332_regs), + .pipe_clk_rate = 250 * MEGA, +}; + +static void qcom_uniphy_pcie_init(struct qcom_uniphy_pcie *phy) +{ + const struct qcom_uniphy_pcie_data *data = phy->data; + const struct qcom_uniphy_pcie_regs *init_seq; + void __iomem *base = phy->base; + int lane, i; + + for (lane = 0; lane < phy->lanes; lane++) { + init_seq = data->init_seq; + + for (i = 0; i < data->init_seq_num; i++) + writel(init_seq[i].val, base + init_seq[i].offset); + + base += data->lane_offset; + } +} + +static int qcom_uniphy_pcie_power_off(struct phy *x) +{ + struct qcom_uniphy_pcie *phy = phy_get_drvdata(x); + + clk_bulk_disable_unprepare(phy->num_clks, phy->clks); + + return reset_control_assert(phy->resets); +} + +static int qcom_uniphy_pcie_power_on(struct phy *x) +{ + struct qcom_uniphy_pcie *phy = phy_get_drvdata(x); + int ret; + + ret = reset_control_assert(phy->resets); + if (ret) { + dev_err(phy->dev, "reset assert failed (%d)\n", ret); + return ret; + } + + usleep_range(RST_ASSERT_DELAY_MIN_US, RST_ASSERT_DELAY_MAX_US); + + ret = reset_control_deassert(phy->resets); + if (ret) { + dev_err(phy->dev, "reset deassert failed (%d)\n", ret); + return ret; + } + + usleep_range(PIPE_CLK_DELAY_MIN_US, PIPE_CLK_DELAY_MAX_US); + + ret = clk_bulk_prepare_enable(phy->num_clks, phy->clks); + if (ret) { + dev_err(phy->dev, "clk prepare and enable failed %d\n", ret); + return ret; + } + + usleep_range(CLK_EN_DELAY_MIN_US, CLK_EN_DELAY_MAX_US); + + qcom_uniphy_pcie_init(phy); + + return 0; +} + +static inline int qcom_uniphy_pcie_get_resources(struct platform_device *pdev, + struct qcom_uniphy_pcie *phy) +{ + struct resource *res; + + phy->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + phy->num_clks = devm_clk_bulk_get_all(phy->dev, &phy->clks); + if (phy->num_clks < 0) + return phy->num_clks; + + phy->resets = devm_reset_control_array_get_exclusive(phy->dev); + if (IS_ERR(phy->resets)) + return PTR_ERR(phy->resets); + + return 0; +} + +/* + * Register a fixed rate pipe clock. + * + * The _pipe_clksrc generated by PHY goes to the GCC that gate + * controls it. The _pipe_clk coming out of the GCC is requested + * by the PHY driver for its operations. + * We register the _pipe_clksrc here. The gcc driver takes care + * of assigning this _pipe_clksrc as parent to _pipe_clk. + * Below picture shows this relationship. + * + * +---------------+ + * | PHY block |<<---------------------------------------+ + * | | | + * | +-------+ | +-----+ | + * I/P---^-->| PLL |---^--->pipe_clksrc--->| GCC |--->pipe_clk---+ + * clk | +-------+ | +-----+ + * +---------------+ + */ +static inline int phy_pipe_clk_register(struct qcom_uniphy_pcie *phy, int id) +{ + const struct qcom_uniphy_pcie_data *data = phy->data; + struct clk_hw *hw; + char name[64]; + + snprintf(name, sizeof(name), "phy%d_pipe_clk_src", id); + hw = devm_clk_hw_register_fixed_rate(phy->dev, name, NULL, 0, + data->pipe_clk_rate); + if (IS_ERR(hw)) + return dev_err_probe(phy->dev, PTR_ERR(hw), + "Unable to register %s\n", name); + + return devm_of_clk_add_hw_provider(phy->dev, of_clk_hw_simple_get, hw); +} + +static const struct of_device_id qcom_uniphy_pcie_id_table[] = { + { + .compatible = "qcom,ipq5332-uniphy-pcie-phy", + .data = &ipq5332_data, + }, { + /* Sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, qcom_uniphy_pcie_id_table); + +static const struct phy_ops pcie_ops = { + .power_on = qcom_uniphy_pcie_power_on, + .power_off = qcom_uniphy_pcie_power_off, + .owner = THIS_MODULE, +}; + +static int qcom_uniphy_pcie_probe(struct platform_device *pdev) +{ + struct phy_provider *phy_provider; + struct device *dev = &pdev->dev; + struct qcom_uniphy_pcie *phy; + struct phy *generic_phy; + int ret; + + phy = devm_kzalloc(&pdev->dev, sizeof(*phy), GFP_KERNEL); + if (!phy) + return -ENOMEM; + + platform_set_drvdata(pdev, phy); + phy->dev = &pdev->dev; + + phy->data = of_device_get_match_data(dev); + if (!phy->data) + return -EINVAL; + + ret = of_property_read_u32(dev_of_node(dev), "num-lanes", &phy->lanes); + if (ret) + return dev_err_probe(dev, ret, "Couldn't read num-lanes\n"); + + ret = qcom_uniphy_pcie_get_resources(pdev, phy); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to get resources: %d\n", ret); + + generic_phy = devm_phy_create(phy->dev, NULL, &pcie_ops); + if (IS_ERR(generic_phy)) + return PTR_ERR(generic_phy); + + phy_set_drvdata(generic_phy, phy); + + ret = phy_pipe_clk_register(phy, generic_phy->id); + if (ret) + dev_err(&pdev->dev, "failed to register phy pipe clk\n"); + + phy_provider = devm_of_phy_provider_register(phy->dev, + of_phy_simple_xlate); + if (IS_ERR(phy_provider)) + return PTR_ERR(phy_provider); + + return 0; +} + +static struct platform_driver qcom_uniphy_pcie_driver = { + .probe = qcom_uniphy_pcie_probe, + .driver = { + .name = "qcom-uniphy-pcie", + .of_match_table = qcom_uniphy_pcie_id_table, + }, +}; + +module_platform_driver(qcom_uniphy_pcie_driver); + +MODULE_DESCRIPTION("PCIE QCOM UNIPHY driver"); +MODULE_LICENSE("GPL"); From 12185bc38f7667b1d895b2165a8a47335a4cf31b Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 10 Mar 2025 14:12:29 -0700 Subject: [PATCH 0690/2157] dt-bindings: phy: qcom,sc8280xp-qmp-ufs-phy: document the SM8750 QMP UFS PHY Document the QMP UFS PHY on the SM8750 Platform. Acked-by: Krzysztof Kozlowski Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250310-sm8750_ufs_master-v2-1-0dfdd6823161@quicinc.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml index 72bed2933b03..a58370a6a5d3 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -44,6 +44,7 @@ properties: - qcom,sm8475-qmp-ufs-phy - qcom,sm8550-qmp-ufs-phy - qcom,sm8650-qmp-ufs-phy + - qcom,sm8750-qmp-ufs-phy reg: maxItems: 1 @@ -111,6 +112,7 @@ allOf: - qcom,sm8475-qmp-ufs-phy - qcom,sm8550-qmp-ufs-phy - qcom,sm8650-qmp-ufs-phy + - qcom,sm8750-qmp-ufs-phy then: properties: clocks: From b02cc9a176793b207e959701af1ec26222093b05 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 10 Mar 2025 14:12:30 -0700 Subject: [PATCH 0691/2157] phy: qcom-qmp-ufs: Add PHY Configuration support for sm8750 Add SM8750 specific register layout and table configs. The serdes TX RX register offset has changed for SM8750 and hence keep UFS specific serdes offsets in a dedicated header file. Reviewed-by: Neil Armstrong Co-developed-by: Manish Pandey Signed-off-by: Manish Pandey Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250310-sm8750_ufs_master-v2-2-0dfdd6823161@quicinc.com Signed-off-by: Vinod Koul --- .../qualcomm/phy-qcom-qmp-qserdes-com-v6.h | 7 + .../phy-qcom-qmp-qserdes-txrx-ufs-v7.h | 67 +++++++ drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 180 +++++++++++++++++- 3 files changed, 246 insertions(+), 8 deletions(-) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v7.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h index 328c6c0b0b09..258f3d30742e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h @@ -86,4 +86,11 @@ #define QSERDES_V6_COM_CMN_STATUS 0x1d0 #define QSERDES_V6_COM_C_READY_STATUS 0x1f8 +#define QSERDES_V6_COM_ADAPTIVE_ANALOG_CONFIG 0x268 +#define QSERDES_V6_COM_CP_CTRL_ADAPTIVE_MODE0 0x26c +#define QSERDES_V6_COM_PLL_RCCTRL_ADAPTIVE_MODE0 0x270 +#define QSERDES_V6_COM_PLL_CCTRL_ADAPTIVE_MODE0 0x274 +#define QSERDES_V6_COM_CP_CTRL_ADAPTIVE_MODE1 0x278 +#define QSERDES_V6_COM_PLL_RCCTRL_ADAPTIVE_MODE1 0x27c +#define QSERDES_V6_COM_PLL_CCTRL_ADAPTIVE_MODE1 0x280 #endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v7.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v7.h new file mode 100644 index 000000000000..3f0522492f85 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v7.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_QSERDES_TXRX_UFS_V7_H_ +#define QCOM_PHY_QMP_QSERDES_TXRX_UFS_V7_H_ + +#define QSERDES_UFS_V7_TX_RES_CODE_LANE_TX 0x28 +#define QSERDES_UFS_V7_TX_RES_CODE_LANE_RX 0x2c +#define QSERDES_UFS_V7_TX_RES_CODE_LANE_OFFSET_TX 0x30 +#define QSERDES_UFS_V7_TX_RES_CODE_LANE_OFFSET_RX 0x34 +#define QSERDES_UFS_V7_TX_LANE_MODE_1 0x7c +#define QSERDES_UFS_V7_TX_FR_DCC_CTRL 0x108 + +#define QSERDES_UFS_V7_RX_UCDR_FASTLOCK_FO_GAIN_RATE4 0x10 +#define QSERDES_UFS_V7_RX_UCDR_FASTLOCK_SO_GAIN_RATE4 0x24 +#define QSERDES_UFS_V7_RX_UCDR_SO_SATURATION 0x28 +#define QSERDES_UFS_V7_RX_UCDR_FASTLOCK_COUNT_HIGH_RATE4 0x54 +#define QSERDES_UFS_V7_RX_UCDR_PI_CTRL1 0x58 +#define QSERDES_UFS_V7_RX_TERM_BW_CTRL0 0xc4 +#define QSERDES_UFS_V7_RX_UCDR_FO_GAIN_RATE2 0xd4 +#define QSERDES_UFS_V7_RX_UCDR_FO_GAIN_RATE4 0xdc +#define QSERDES_UFS_V7_RX_UCDR_SO_GAIN_RATE4 0xf0 +#define QSERDES_UFS_V7_RX_UCDR_PI_CONTROLS 0xf4 +#define QSERDES_UFS_V7_RX_VGA_CAL_MAN_VAL 0x178 +#define QSERDES_UFS_V7_RX_EQU_ADAPTOR_CNTRL4 0x1b4 +#define QSERDES_UFS_V7_RX_EQ_OFFSET_ADAPTOR_CNTRL1 0x1cc +#define QSERDES_UFS_V7_RX_OFFSET_ADAPTOR_CNTRL3 0x1d4 +#define QSERDES_UFS_V7_RX_INTERFACE_MODE 0x1f0 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B0 0x218 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B1 0x21C +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B2 0x220 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B3 0x224 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B4 0x228 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B6 0x230 +#define QSERDES_UFS_V7_RX_MODE_RATE_0_1_B7 0x234 +#define QSERDES_UFS_V7_RX_MODE_RATE2_B3 0x248 +#define QSERDES_UFS_V7_RX_MODE_RATE2_B6 0x254 +#define QSERDES_UFS_V7_RX_MODE_RATE2_B7 0x258 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B0 0x260 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B1 0x264 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B2 0x268 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B3 0x26c +#define QSERDES_UFS_V7_RX_MODE_RATE3_B4 0x270 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B5 0x274 +#define QSERDES_UFS_V7_RX_MODE_RATE3_B7 0x27c +#define QSERDES_UFS_V7_RX_MODE_RATE3_B8 0x280 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B0 0x284 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B1 0x288 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B2 0x28c +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B3 0x290 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B4 0x294 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B5 0x298 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B6 0x29c +#define QSERDES_UFS_V7_RX_MODE_RATE4_SA_B7 0x2a0 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B0 0x2a8 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B1 0x2ac +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B2 0x2b0 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B3 0x2b4 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B4 0x2b8 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B5 0x2bc +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B6 0x2c0 +#define QSERDES_UFS_V7_RX_MODE_RATE4_SB_B7 0x2c4 +#define QSERDES_UFS_V7_RX_DLL0_FTUNE_CTRL 0x348 +#define QSERDES_UFS_V7_RX_SIGDET_CAL_TRIM 0x380 +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index d964bdfe8700..45b3b792696e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -31,6 +31,7 @@ #include "phy-qcom-qmp-pcs-ufs-v6.h" #include "phy-qcom-qmp-qserdes-txrx-ufs-v6.h" +#include "phy-qcom-qmp-qserdes-txrx-ufs-v7.h" /* QPHY_PCS_READY_STATUS bit */ #define PCS_READY BIT(0) @@ -949,6 +950,124 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_g5_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSG5_SYNC_WAIT_TIME, 0x9e), }; +static const struct qmp_phy_init_tbl sm8750_ufsphy_serdes[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0xd9), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_HS_SWITCH_SEL_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_CFG, 0x60), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO_MODE1, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_IETRIM, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_IPTRIM, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_CTRL, 0x40), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_ADAPTIVE_ANALOG_CONFIG, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x41), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_ADAPTIVE_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCCTRL_ADAPTIVE_MODE0, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_ADAPTIVE_MODE0, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_ADAPTIVE_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCCTRL_ADAPTIVE_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_ADAPTIVE_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x99), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xbe), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x23), +}; + +static const struct qmp_phy_init_tbl sm8750_ufsphy_tx[] = { + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_TX_LANE_MODE_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_TX_RES_CODE_LANE_OFFSET_TX, 0x07), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_TX_RES_CODE_LANE_OFFSET_RX, 0x17), +}; + +static const struct qmp_phy_init_tbl sm8750_ufsphy_rx[] = { + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_FO_GAIN_RATE2, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_FO_GAIN_RATE4, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_SO_GAIN_RATE4, 0x04), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_PI_CONTROLS, 0x07), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_OFFSET_ADAPTOR_CNTRL3, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_FASTLOCK_COUNT_HIGH_RATE4, 0x02), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_FASTLOCK_FO_GAIN_RATE4, 0x1c), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_FASTLOCK_SO_GAIN_RATE4, 0x06), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_VGA_CAL_MAN_VAL, 0x8e), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_EQU_ADAPTOR_CNTRL4, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B0, 0xce), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B1, 0xce), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B2, 0x18), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B3, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B4, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B6, 0x60), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE_0_1_B7, 0x62), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE2_B3, 0x9a), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE2_B6, 0xe2), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE2_B7, 0x06), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B0, 0x1b), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B1, 0x1b), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B2, 0x98), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B3, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B4, 0x2a), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B5, 0x12), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B7, 0x06), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE3_B8, 0x01), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B0, 0x93), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B1, 0x93), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B2, 0x60), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B3, 0x99), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B4, 0x5f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B5, 0x92), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B6, 0xe3), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SA_B7, 0x06), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B0, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B1, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B2, 0x60), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B3, 0x99), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B4, 0x5f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B5, 0x92), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B6, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_MODE_RATE4_SB_B7, 0x06), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_SO_SATURATION, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_UCDR_PI_CTRL1, 0x94), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_TERM_BW_CTRL0, 0xfa), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_DLL0_FTUNE_CTRL, 0x30), + QMP_PHY_INIT_CFG(QSERDES_UFS_V7_RX_SIGDET_CAL_TRIM, 0x77), +}; + +static const struct qmp_phy_init_tbl sm8750_ufsphy_pcs[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0x40), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S5, 0x12), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S6, 0x15), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S7, 0x19), +}; + +static const struct qmp_phy_init_tbl sm8750_ufsphy_g4_pcs[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04), +}; + +static const struct qmp_phy_init_tbl sm8750_ufsphy_hs_b_pcs[] = { + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0x41), +}; + struct qmp_ufs_offsets { u16 serdes; u16 pcs; @@ -1523,6 +1642,45 @@ static const struct qmp_phy_cfg sm8650_ufsphy_cfg = { .regs = ufsphy_v6_regs_layout, }; +static const struct qmp_phy_cfg sm8750_ufsphy_cfg = { + .lanes = 2, + + .offsets = &qmp_ufs_offsets_v6, + .max_supported_gear = UFS_HS_G5, + + .tbls = { + .serdes = sm8750_ufsphy_serdes, + .serdes_num = ARRAY_SIZE(sm8750_ufsphy_serdes), + .tx = sm8750_ufsphy_tx, + .tx_num = ARRAY_SIZE(sm8750_ufsphy_tx), + .rx = sm8750_ufsphy_rx, + .rx_num = ARRAY_SIZE(sm8750_ufsphy_rx), + .pcs = sm8750_ufsphy_pcs, + .pcs_num = ARRAY_SIZE(sm8750_ufsphy_pcs), + }, + + .tbls_hs_b = { + .pcs = sm8750_ufsphy_hs_b_pcs, + .pcs_num = ARRAY_SIZE(sm8750_ufsphy_hs_b_pcs), + }, + + .tbls_hs_overlay[0] = { + .pcs = sm8750_ufsphy_g4_pcs, + .pcs_num = ARRAY_SIZE(sm8750_ufsphy_g4_pcs), + .max_gear = UFS_HS_G4, + }, + .tbls_hs_overlay[1] = { + .pcs = sm8650_ufsphy_g5_pcs, + .pcs_num = ARRAY_SIZE(sm8650_ufsphy_g5_pcs), + .max_gear = UFS_HS_G5, + }, + + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = ufsphy_v6_regs_layout, + +}; + static void qmp_ufs_serdes_init(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbls *tbls) { void __iomem *serdes = qmp->serdes; @@ -1578,23 +1736,25 @@ static int qmp_ufs_get_gear_overlay(struct qmp_ufs *qmp, const struct qmp_phy_cf return ret; } +static void qmp_ufs_init_all(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbls *tbls) +{ + qmp_ufs_serdes_init(qmp, tbls); + qmp_ufs_lanes_init(qmp, tbls); + qmp_ufs_pcs_init(qmp, tbls); +} + static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg *cfg) { int i; - qmp_ufs_serdes_init(qmp, &cfg->tbls); - qmp_ufs_lanes_init(qmp, &cfg->tbls); - qmp_ufs_pcs_init(qmp, &cfg->tbls); + qmp_ufs_init_all(qmp, &cfg->tbls); i = qmp_ufs_get_gear_overlay(qmp, cfg); if (i >= 0) { - qmp_ufs_serdes_init(qmp, &cfg->tbls_hs_overlay[i]); - qmp_ufs_lanes_init(qmp, &cfg->tbls_hs_overlay[i]); - qmp_ufs_pcs_init(qmp, &cfg->tbls_hs_overlay[i]); + qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]); } - if (qmp->mode == PHY_MODE_UFS_HS_B) - qmp_ufs_serdes_init(qmp, &cfg->tbls_hs_b); + qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } static int qmp_ufs_com_init(struct qmp_ufs *qmp) @@ -2061,7 +2221,11 @@ static const struct of_device_id qmp_ufs_of_match_table[] = { }, { .compatible = "qcom,sm8650-qmp-ufs-phy", .data = &sm8650_ufsphy_cfg, + }, { + .compatible = "qcom,sm8750-qmp-ufs-phy", + .data = &sm8750_ufsphy_cfg, }, + { }, }; MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table); From e46e59b77a9e6f322ef1ad08a8874211f389cf47 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Mon, 10 Mar 2025 14:30:56 +0800 Subject: [PATCH 0692/2157] dt-bindings: phy: qcom,sc8280xp-qmp-pcie-phy: Document the QCS8300 QMP PCIe PHY Gen4 x2 Document the QMP PCIe PHY on the QCS8300 platform. Acked-by: Rob Herring (Arm) Signed-off-by: Ziyue Zhang Link: https://lore.kernel.org/r/20250310063103.3924525-2-quic_ziyuzhan@quicinc.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index af8c4aa4f43d..2c6c9296e4c0 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -17,6 +17,7 @@ properties: compatible: enum: - qcom,qcs615-qmp-gen3x1-pcie-phy + - qcom,qcs8300-qmp-gen4x2-pcie-phy - qcom,sa8775p-qmp-gen4x2-pcie-phy - qcom,sa8775p-qmp-gen4x4-pcie-phy - qcom,sar2130p-qmp-gen3x2-pcie-phy @@ -195,6 +196,7 @@ allOf: compatible: contains: enum: + - qcom,qcs8300-qmp-gen4x2-pcie-phy - qcom,sa8775p-qmp-gen4x2-pcie-phy - qcom,sa8775p-qmp-gen4x4-pcie-phy then: From ebf198f17b5ac967db6256f4083bbcbdcc2a3100 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Mon, 10 Mar 2025 14:30:57 +0800 Subject: [PATCH 0693/2157] phy: qcom-qmp-pcie: add dual lane PHY support for QCS8300 The PCIe Gen4x2 PHY for qcs8300 has a lot of difference with sa8775p. So the qcs8300_qmp_gen4x2_pcie_rx_alt_tbl for qcs8300 is added. Reviewed-by: Dmitry Baryshkov Signed-off-by: Ziyue Zhang Link: https://lore.kernel.org/r/20250310063103.3924525-3-quic_ziyuzhan@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 89 ++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 6726bbe4ad15..c232b8fe9846 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -805,6 +805,58 @@ static const struct qmp_phy_init_tbl qcs615_pcie_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M3P5DB_V0, 0xe), }; +static const struct qmp_phy_init_tbl qcs8300_qmp_gen4x2_pcie_rx_alt_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_PI_CONTROLS, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B0, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B1, 0xb0), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B2, 0xd2), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B3, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B4, 0x42), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B5, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE_0_1_B6, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B0, 0x9b), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B1, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B2, 0xd2), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B3, 0xec), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B4, 0x43), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B5, 0xdd), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE2_B6, 0x0d), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B0, 0xf3), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B1, 0xf8), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B2, 0xec), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B3, 0xd6), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B4, 0x83), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B5, 0xf5), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MODE_RATE3_B6, 0x5e), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_PHPRE_CTRL, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_0_1, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_AUX_DATA_THRESH_BIN_RATE_2_3, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_3, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH4_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH5_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH6_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH1_RATE210, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH2_RATE210, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_MARG_COARSE_THRESH3_RATE210, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_Q_PI_INTRINSIC_BIAS_RATE32, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE2, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_FO_GAIN_RATE3, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_UCDR_SO_GAIN_RATE3, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_CNTRL1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_VGA_CAL_MAN_VAL, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x7c), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_RX_IDAC_SAOFFSET, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_DFE_DAC_ENABLE1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_GM_CAL, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_20_RX_TX_ADAPT_POST_THRESH2, 0x1f), +}; + static const struct qmp_phy_init_tbl sdm845_qmp_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x14), QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30), @@ -3336,6 +3388,40 @@ static const struct qmp_phy_cfg qcs615_pciephy_cfg = { .phy_status = PHYSTATUS, }; +static const struct qmp_phy_cfg qcs8300_qmp_gen4x2_pciephy_cfg = { + .lanes = 2, + .offsets = &qmp_pcie_offsets_v5_20, + + .tbls = { + .serdes = sa8775p_qmp_gen4x2_pcie_serdes_alt_tbl, + .serdes_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_serdes_alt_tbl), + .tx = sa8775p_qmp_gen4_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_tx_tbl), + .rx = qcs8300_qmp_gen4x2_pcie_rx_alt_tbl, + .rx_num = ARRAY_SIZE(qcs8300_qmp_gen4x2_pcie_rx_alt_tbl), + .pcs = sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl, + .pcs_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_pcs_alt_tbl), + .pcs_misc = sa8775p_qmp_gen4_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sa8775p_qmp_gen4x2_pcie_rc_serdes_alt_tbl, + .serdes_num = ARRAY_SIZE(sa8775p_qmp_gen4x2_pcie_rc_serdes_alt_tbl), + .pcs_misc = sa8775p_qmp_gen4_pcie_rc_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sa8775p_qmp_gen4_pcie_rc_pcs_misc_tbl), + }, + + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = pciephy_v5_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS_4_20, +}; + static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, @@ -4891,6 +4977,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,qcs615-qmp-gen3x1-pcie-phy", .data = &qcs615_pciephy_cfg, + }, { + .compatible = "qcom,qcs8300-qmp-gen4x2-pcie-phy", + .data = &qcs8300_qmp_gen4x2_pciephy_cfg, }, { .compatible = "qcom,sa8775p-qmp-gen4x2-pcie-phy", .data = &sa8775p_qmp_gen4x2_pciephy_cfg, From 27b2fcbd6b98204b0dce62e9aa9540ca0a2b70f1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 5 Feb 2025 11:55:19 +0200 Subject: [PATCH 0694/2157] rtc: renesas-rtca3: Disable interrupts only if the RTC is enabled If the RTC is not enabled and the code attempts to disable the interrupt, the readb_poll_timeout_atomic() function in the rtca3_alarm_irq_set_helper() may timeout, leading to probe failures. This issue is reproducible on some devices because the initial values of the PIE and AIE bits in the RCR1 register are undefined. To prevent probe failures in this scenario, disable RTC interrupts only when the RTC is actually enabled. Fixes: d4488377609e ("rtc: renesas-rtca3: Add driver for RTCA-3 available on Renesas RZ/G3S SoC") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250205095519.2031742-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-renesas-rtca3.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-renesas-rtca3.c b/drivers/rtc/rtc-renesas-rtca3.c index a056291d3887..ab816bdf0d77 100644 --- a/drivers/rtc/rtc-renesas-rtca3.c +++ b/drivers/rtc/rtc-renesas-rtca3.c @@ -586,17 +586,14 @@ static int rtca3_initial_setup(struct clk *clk, struct rtca3_priv *priv) */ usleep_range(sleep_us, sleep_us + 10); - /* Disable all interrupts. */ - mask = RTCA3_RCR1_AIE | RTCA3_RCR1_CIE | RTCA3_RCR1_PIE; - ret = rtca3_alarm_irq_set_helper(priv, mask, 0); - if (ret) - return ret; - mask = RTCA3_RCR2_START | RTCA3_RCR2_HR24; val = readb(priv->base + RTCA3_RCR2); - /* Nothing to do if already started in 24 hours and calendar count mode. */ - if ((val & mask) == mask) - return 0; + /* Only disable the interrupts if already started in 24 hours and calendar count mode. */ + if ((val & mask) == mask) { + /* Disable all interrupts. */ + mask = RTCA3_RCR1_AIE | RTCA3_RCR1_CIE | RTCA3_RCR1_PIE; + return rtca3_alarm_irq_set_helper(priv, mask, 0); + } /* Reconfigure the RTC in 24 hours and calendar count mode. */ mask = RTCA3_RCR2_START | RTCA3_RCR2_CNTMD; From 0ccd5d56e6b2f342096a362ac24785d4be9c64a2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 11 Mar 2025 20:50:05 +1100 Subject: [PATCH 0695/2157] watchdog: lenovo_se30_wdt: include io.h for devm_ioremap() After merging the watchdog tree, today's linux-next build (x86_64 allmodconfig) failed like this: drivers/watchdog/lenovo_se30_wdt.c: In function 'lenovo_se30_wdt_probe': drivers/watchdog/lenovo_se30_wdt.c:272:31: error: implicit declaration of function 'devm_ioremap' [-Wimplicit-function-declaration] 272 | priv->shm_base_addr = devm_ioremap(dev, base_phys, SHM_WIN_SIZE); | ^~~~~~~~~~~~ drivers/watchdog/lenovo_se30_wdt.c:272:29: error: assignment to 'unsigned char *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 272 | priv->shm_base_addr = devm_ioremap(dev, base_phys, SHM_WIN_SIZE); | ^ Caused by commit c284153a2c55 ("watchdog: lenovo_se30_wdt: Watchdog driver for Lenovo SE30 platform") Somewhere alogn the way a change to some include file means that linux/io.h is no longer implicitly included. I have added the following patch for today. Fixes: c284153a2c55 ("watchdog: lenovo_se30_wdt: Watchdog driver for Lenovo SE30 platform") Signed-off-by: Stephen Rothwell Reviewed-by: Mark Pearson Reviewed-by: Wim Van Sebroeck Link: https://lore.kernel.org/r/20250311210305.3c5a2313@canb.auug.org.au Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/lenovo_se30_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/lenovo_se30_wdt.c b/drivers/watchdog/lenovo_se30_wdt.c index f25429da0cec..024b842499b3 100644 --- a/drivers/watchdog/lenovo_se30_wdt.c +++ b/drivers/watchdog/lenovo_se30_wdt.c @@ -5,6 +5,7 @@ #define dev_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include From 594e1e368f09cea198f991ab122dd72ed559a383 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 11 Mar 2025 07:10:09 -0700 Subject: [PATCH 0696/2157] watchdog: Convert to use device property Use device_property_read_u32() instead of of_property_read_u32() to support reading the timeout from non-devicetree sources. Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20250311141009.756975-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/watchdog_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index d46d8c8c01f2..6152dba4b52c 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -33,7 +33,8 @@ #include /* For __init/__exit/... */ #include /* For ida_* macros */ #include /* For IS_ERR macros */ -#include /* For of_get_timeout_sec */ +#include /* For of_alias_get_id */ +#include /* For device_property_read_u32 */ #include #include "watchdog_core.h" /* For watchdog_dev_register/... */ @@ -137,8 +138,7 @@ int watchdog_init_timeout(struct watchdog_device *wdd, } /* try to get the timeout_sec property */ - if (dev && dev->of_node && - of_property_read_u32(dev->of_node, "timeout-sec", &t) == 0) { + if (dev && device_property_read_u32(dev, "timeout-sec", &t) == 0) { if (t && !watchdog_timeout_invalid(wdd, t)) { wdd->timeout = t; return 0; From 4a29fa2626a1daea1753a6f7cdcd7ae456e0335e Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 10 Dec 2024 14:49:28 +0000 Subject: [PATCH 0697/2157] coresight: docs: Remove target sink from examples Previously the sink had to be specified, but now it auto selects one by default. Including a sink in the examples causes issues when copy pasting the command because it might not work if that sink isn't present. Remove the sink from all the basic examples and create a new section specifically about overriding the default one. Make the text a but more concise now that it's in the advanced section, and similarly for removing the old kernel advice. Signed-off-by: James Clark Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20241210144933.295798-1-james.clark@linaro.org --- Documentation/trace/coresight/coresight.rst | 41 ++++++++----------- .../userspace-api/perf_ring_buffer.rst | 4 +- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst index d4f93d6a2d63..806699871b80 100644 --- a/Documentation/trace/coresight/coresight.rst +++ b/Documentation/trace/coresight/coresight.rst @@ -462,44 +462,35 @@ queried by the perf command line tool: cs_etm// [Kernel PMU event] - linaro@linaro-nano:~$ - Regardless of the number of tracers available in a system (usually equal to the amount of processor cores), the "cs_etm" PMU will be listed only once. A Coresight PMU works the same way as any other PMU, i.e the name of the PMU is -listed along with configuration options within forward slashes '/'. Since a -Coresight system will typically have more than one sink, the name of the sink to -work with needs to be specified as an event option. -On newer kernels the available sinks are listed in sysFS under +provided along with configuration options within forward slashes '/' (see +`Config option formats`_). + +Advanced Perf framework usage +----------------------------- + +Sink selection +~~~~~~~~~~~~~~ + +An appropriate sink will be selected automatically for use with Perf, but since +there will typically be more than one sink, the name of the sink to use may be +specified as a special config option prefixed with '@'. + +The available sinks are listed in sysFS under ($SYSFS)/bus/event_source/devices/cs_etm/sinks/:: root@localhost:/sys/bus/event_source/devices/cs_etm/sinks# ls tmc_etf0 tmc_etr0 tpiu0 -On older kernels, this may need to be found from the list of coresight devices, -available under ($SYSFS)/bus/coresight/devices/:: - - root:~# ls /sys/bus/coresight/devices/ - etm0 etm1 etm2 etm3 etm4 etm5 funnel0 - funnel1 funnel2 replicator0 stm0 tmc_etf0 tmc_etr0 tpiu0 root@linaro-nano:~# perf record -e cs_etm/@tmc_etr0/u --per-thread program -As mentioned above in section "Device Naming scheme", the names of the devices could -look different from what is used in the example above. One must use the device names -as it appears under the sysFS. - -The syntax within the forward slashes '/' is important. The '@' character -tells the parser that a sink is about to be specified and that this is the sink -to use for the trace session. - More information on the above and other example on how to use Coresight with the perf tools can be found in the "HOWTO.md" file of the openCSD gitHub repository [#third]_. -Advanced perf framework usage ------------------------------ - AutoFDO analysis using the perf tools ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -508,7 +499,7 @@ perf can be used to record and analyze trace of programs. Execution can be recorded using 'perf record' with the cs_etm event, specifying the name of the sink to record to, e.g:: - perf record -e cs_etm/@tmc_etr0/u --per-thread + perf record -e cs_etm//u --per-thread The 'perf report' and 'perf script' commands can be used to analyze execution, synthesizing instruction and branch events from the instruction trace. @@ -572,7 +563,7 @@ sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tuto Bubble sorting array of 30000 elements 5910 ms - $ perf record -e cs_etm/@tmc_etr0/u --per-thread taskset -c 2 ./sort + $ perf record -e cs_etm//u --per-thread taskset -c 2 ./sort Bubble sorting array of 30000 elements 12543 ms [ perf record: Woken up 35 times to write data ] diff --git a/Documentation/userspace-api/perf_ring_buffer.rst b/Documentation/userspace-api/perf_ring_buffer.rst index bde9d8cbc106..dc71544532ce 100644 --- a/Documentation/userspace-api/perf_ring_buffer.rst +++ b/Documentation/userspace-api/perf_ring_buffer.rst @@ -627,7 +627,7 @@ regular ring buffer. AUX events and AUX trace data are two different things. Let's see an example:: - perf record -a -e cycles -e cs_etm/@tmc_etr0/ -- sleep 2 + perf record -a -e cycles -e cs_etm// -- sleep 2 The above command enables two events: one is the event *cycles* from PMU and another is the AUX event *cs_etm* from Arm CoreSight, both are saved @@ -766,7 +766,7 @@ only record AUX trace data at a specific time point which users are interested in. E.g. below gives an example of how to take snapshots with 1 second interval with Arm CoreSight:: - perf record -e cs_etm/@tmc_etr0/u -S -a program & + perf record -e cs_etm//u -S -a program & PERFPID=$! while true; do kill -USR2 $PERFPID From 0c74d232578b1a7071e0312312811cb75b26b202 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 11 Mar 2025 17:45:49 +0200 Subject: [PATCH 0698/2157] xhci: Avoid queuing redundant Stop Endpoint command for stalled endpoint If EP_STALLED flag is set in xhci_urb_dequeue(), without EP_HALTED or SET_DEQ_PENDING flags, then the endpoint is in stopped state and the cancelled URB can be given back immediately withouth queueing a 'stop endpoint' command. Without this change the cancelled URB would eventually be given back in the 'context state error' completion path of the 'stop endpoint' command. This is not optimal. For this improvement to work the EP_STALLED flag must be cleared with xhci lock held. Suggested-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250311154551.4035726-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1852175f48c1..19e308f4fc06 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1773,8 +1773,8 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } - /* In this case no commands are pending but the endpoint is stopped */ - if (ep->ep_state & EP_CLEARING_TT) { + /* In these cases no commands are pending but the endpoint is stopped */ + if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) { /* and cancelled TDs can be given back right away */ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", urb->dev->slot_id, ep_index, ep->ep_state); @@ -3211,10 +3211,12 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; ep = &vdev->eps[ep_index]; + + spin_lock_irqsave(&xhci->lock, flags); + ep->ep_state &= ~EP_STALLED; /* Bail out if toggle is already being cleared by a endpoint reset */ - spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; spin_unlock_irqrestore(&xhci->lock, flags); From dfc88357b6b6356dadea06b2c0bc8041f5e11720 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 11 Mar 2025 17:45:50 +0200 Subject: [PATCH 0699/2157] usb: xhci: Don't change the status of stalled TDs on failed Stop EP When the device stalls an endpoint, current TD is assigned -EPIPE status and Reset Endpoint is queued. If a Stop Endpoint is pending at the time, it will run before Reset Endpoint and fail due to the stall. Its handler will change TD's status to -EPROTO before Reset Endpoint handler runs and initiates giveback. Check if the stall has already been handled and don't try to do it again. Since xhci_handle_halted_endpoint() performs this check too, not overwriting td->status is the only difference. I haven't seen this case yet, but I have seen a related one where the xHC has already executed Reset Endpoint, EP Context state is now Stopped and EP_HALTED is set. If the xHC took a bit longer to execute Reset Endpoint, said case would become this one. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250311154551.4035726-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 0f8acbb9cd21..303d66df271e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1220,7 +1220,14 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, */ switch (GET_EP_CTX_STATE(ep_ctx)) { case EP_STATE_HALTED: - xhci_dbg(xhci, "Stop ep completion raced with stall, reset ep\n"); + xhci_dbg(xhci, "Stop ep completion raced with stall\n"); + /* + * If the halt happened before Stop Endpoint failed, its transfer event + * should have already been handled and Reset Endpoint should be pending. + */ + if (ep->ep_state & EP_HALTED) + goto reset_done; + if (ep->ep_state & EP_HAS_STREAMS) { reset_type = EP_SOFT_RESET; } else { @@ -1231,8 +1238,11 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, } /* reset ep, reset handler cleans up cancelled tds */ err = xhci_handle_halted_endpoint(xhci, ep, td, reset_type); + xhci_dbg(xhci, "Stop ep completion resetting ep, status %d\n", err); if (err) break; +reset_done: + /* Reset EP handler will clean up cancelled TDs */ ep->ep_state &= ~EP_STOP_CMD_PENDING; return; case EP_STATE_STOPPED: From 28a76fcc4c85dd39633fb96edb643c91820133e3 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 11 Mar 2025 17:45:51 +0200 Subject: [PATCH 0700/2157] usb: xhci: Avoid Stop Endpoint retry loop if the endpoint seems Running Nothing prevents a broken HC from claiming that an endpoint is Running and repeatedly rejecting Stop Endpoint with Context State Error. Avoid infinite retries and give back cancelled TDs. No such cases known so far, but HCs have bugs. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250311154551.4035726-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 303d66df271e..5d64c297721c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1264,16 +1264,19 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, * Stopped state, but it will soon change to Running. * * Assume this bug on unexpected Stop Endpoint failures. - * Keep retrying until the EP starts and stops again, on - * chips where this is known to help. Wait for 100ms. + * Keep retrying until the EP starts and stops again. */ - if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100))) - break; fallthrough; case EP_STATE_RUNNING: /* Race, HW handled stop ep cmd before ep was running */ xhci_dbg(xhci, "Stop ep completion ctx error, ctx_state %d\n", GET_EP_CTX_STATE(ep_ctx)); + /* + * Don't retry forever if we guessed wrong or a defective HC never starts + * the EP or says 'Running' but fails the command. We must give back TDs. + */ + if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100))) + break; command = xhci_alloc_command(xhci, false, GFP_ATOMIC); if (!command) { From f52b5daf392166c449b4c9d3015c06683dba0ef5 Mon Sep 17 00:00:00 2001 From: Alisa-Dariana Roman Date: Fri, 28 Feb 2025 16:06:00 +0200 Subject: [PATCH 0701/2157] dt-bindings: iio: adc: add AD7191 AD7191 is a pin-programmable, ultra-low noise 24-bit sigma-delta ADC designed for precision bridge sensor measurements. It features two differential analog input channels, selectable output rates, programmable gain, internal temperature sensor and simultaneous 50Hz/60Hz rejection. Signed-off-by: Alisa-Dariana Roman Reviewed-by: Rob Herring (Arm) Reviewed-by: David Lechner Link: https://patch.msgid.link/20250228141327.262488-2-alisa.roman@analog.com Signed-off-by: Jonathan Cameron --- .../bindings/iio/adc/adi,ad7191.yaml | 149 ++++++++++++++++++ MAINTAINERS | 7 + 2 files changed, 156 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml new file mode 100644 index 000000000000..801ed319ee82 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2025 Analog Devices Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/adi,ad7191.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD7191 ADC + +maintainers: + - Alisa-Dariana Roman + +description: | + Bindings for the Analog Devices AD7191 ADC device. Datasheet can be + found here: + https://www.analog.com/media/en/technical-documentation/data-sheets/AD7191.pdf + The device's PDOWN pin must be connected to the SPI controller's chip select + pin. + +properties: + compatible: + enum: + - adi,ad7191 + + reg: + maxItems: 1 + + spi-cpol: true + + spi-cpha: true + + clocks: + maxItems: 1 + description: + Must be present when CLKSEL pin is tied HIGH to select external clock + source (either a crystal between MCLK1 and MCLK2 pins, or a + CMOS-compatible clock driving MCLK2 pin). Must be absent when CLKSEL pin + is tied LOW to use the internal 4.92MHz clock. + + interrupts: + maxItems: 1 + + avdd-supply: + description: AVdd voltage supply + + dvdd-supply: + description: DVdd voltage supply + + vref-supply: + description: Vref voltage supply + + odr-gpios: + description: + ODR1 and ODR2 pins for output data rate selection. Should be defined if + adi,odr-value is absent. + minItems: 2 + maxItems: 2 + + adi,odr-value: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Should be present if ODR pins are pin-strapped. Possible values: + 120 Hz (ODR1=0, ODR2=0) + 60 Hz (ODR1=0, ODR2=1) + 50 Hz (ODR1=1, ODR2=0) + 10 Hz (ODR1=1, ODR2=1) + If defined, odr-gpios must be absent. + enum: [120, 60, 50, 10] + + pga-gpios: + description: + PGA1 and PGA2 pins for gain selection. Should be defined if adi,pga-value + is absent. + minItems: 2 + maxItems: 2 + + adi,pga-value: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Should be present if PGA pins are pin-strapped. Possible values: + Gain 1 (PGA1=0, PGA2=0) + Gain 8 (PGA1=0, PGA2=1) + Gain 64 (PGA1=1, PGA2=0) + Gain 128 (PGA1=1, PGA2=1) + If defined, pga-gpios must be absent. + enum: [1, 8, 64, 128] + + temp-gpios: + description: TEMP pin for temperature sensor enable. + maxItems: 1 + + chan-gpios: + description: CHAN pin for input channel selection. + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - avdd-supply + - dvdd-supply + - vref-supply + - spi-cpol + - spi-cpha + - temp-gpios + - chan-gpios + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - oneOf: + - required: + - adi,odr-value + - required: + - odr-gpios + - oneOf: + - required: + - adi,pga-value + - required: + - pga-gpios + +unevaluatedProperties: false + +examples: + - | + #include + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + compatible = "adi,ad7191"; + reg = <0>; + spi-max-frequency = <1000000>; + spi-cpol; + spi-cpha; + clocks = <&ad7191_mclk>; + interrupts = <25 IRQ_TYPE_EDGE_FALLING>; + interrupt-parent = <&gpio>; + avdd-supply = <&avdd>; + dvdd-supply = <&dvdd>; + vref-supply = <&vref>; + adi,pga-value = <1>; + odr-gpios = <&gpio 23 GPIO_ACTIVE_HIGH>, <&gpio 24 GPIO_ACTIVE_HIGH>; + temp-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; + chan-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 06f122cb8bbd..636b77833825 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1346,6 +1346,13 @@ W: http://ez.analog.com/community/linux-device-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad7091r* F: drivers/iio/adc/ad7091r* +ANALOG DEVICES INC AD7191 DRIVER +M: Alisa-Dariana Roman +L: linux-iio@vger.kernel.org +S: Supported +W: https://ez.analog.com/linux-software-drivers +F: Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml + ANALOG DEVICES INC AD7192 DRIVER M: Alisa-Dariana Roman L: linux-iio@vger.kernel.org From 2e3ae10c3591642ca04389f75caa2126559c8af6 Mon Sep 17 00:00:00 2001 From: Alisa-Dariana Roman Date: Fri, 28 Feb 2025 16:06:01 +0200 Subject: [PATCH 0702/2157] iio: adc: ad7191: add AD7191 AD7191 is a pin-programmable, ultra-low noise 24-bit sigma-delta ADC designed for precision bridge sensor measurements. It features two differential analog input channels, selectable output rates, programmable gain, internal temperature sensor and simultaneous 50Hz/60Hz rejection. Signed-off-by: Alisa-Dariana Roman Reviewed-by: David Lechner Link: https://patch.msgid.link/20250228141327.262488-3-alisa.roman@analog.com Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 + drivers/iio/adc/Kconfig | 10 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ad7191.c | 554 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 566 insertions(+) create mode 100644 drivers/iio/adc/ad7191.c diff --git a/MAINTAINERS b/MAINTAINERS index 636b77833825..8e33b084db17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1352,6 +1352,7 @@ L: linux-iio@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml +F: drivers/iio/adc/ad7191.c ANALOG DEVICES INC AD7192 DRIVER M: Alisa-Dariana Roman diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 27413516216c..b7ae6e0ae0df 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -142,6 +142,16 @@ config AD7173 To compile this driver as a module, choose M here: the module will be called ad7173. +config AD7191 + tristate "Analog Devices AD7191 ADC driver" + depends on SPI + select AD_SIGMA_DELTA + help + Say yes here to build support for Analog Devices AD7191. + + To compile this driver as a module, choose M here: the + module will be called ad7191. + config AD7192 tristate "Analog Devices AD7192 and similar ADC driver" depends on SPI diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 9f26d5eca822..3e918c3eec69 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_AD7091R5) += ad7091r5.o obj-$(CONFIG_AD7091R8) += ad7091r8.o obj-$(CONFIG_AD7124) += ad7124.o obj-$(CONFIG_AD7173) += ad7173.o +obj-$(CONFIG_AD7191) += ad7191.o obj-$(CONFIG_AD7192) += ad7192.o obj-$(CONFIG_AD7266) += ad7266.o obj-$(CONFIG_AD7280) += ad7280a.o diff --git a/drivers/iio/adc/ad7191.c b/drivers/iio/adc/ad7191.c new file mode 100644 index 000000000000..d9cd903ffdd2 --- /dev/null +++ b/drivers/iio/adc/ad7191.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AD7191 ADC driver + * + * Copyright 2025 Analog Devices Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define ad_sigma_delta_to_ad7191(sigmad) \ + container_of((sigmad), struct ad7191_state, sd) + +#define AD7191_TEMP_CODES_PER_DEGREE 2815 + +#define AD7191_CHAN_MASK BIT(0) +#define AD7191_TEMP_MASK BIT(1) + +enum ad7191_channel { + AD7191_CH_AIN1_AIN2, + AD7191_CH_AIN3_AIN4, + AD7191_CH_TEMP, +}; + +/* + * NOTE: + * The AD7191 features a dual-use data out ready DOUT/RDY output. + * In order to avoid contentions on the SPI bus, it's therefore necessary + * to use SPI bus locking. + * + * The DOUT/RDY output must also be wired to an interrupt-capable GPIO. + * + * The SPI controller's chip select must be connected to the PDOWN pin + * of the ADC. When CS (PDOWN) is high, it powers down the device and + * resets the internal circuitry. + */ + +struct ad7191_state { + struct ad_sigma_delta sd; + struct mutex lock; /* Protect device state */ + + struct gpio_descs *odr_gpios; + struct gpio_descs *pga_gpios; + struct gpio_desc *temp_gpio; + struct gpio_desc *chan_gpio; + + u16 int_vref_mv; + const u32 (*scale_avail)[2]; + size_t scale_avail_size; + u32 scale_index; + const u32 *samp_freq_avail; + size_t samp_freq_avail_size; + u32 samp_freq_index; + + struct clk *mclk; +}; + +static int ad7191_set_channel(struct ad_sigma_delta *sd, unsigned int address) +{ + struct ad7191_state *st = ad_sigma_delta_to_ad7191(sd); + u8 temp_gpio_val, chan_gpio_val; + + if (!FIELD_FIT(AD7191_CHAN_MASK | AD7191_TEMP_MASK, address)) + return -EINVAL; + + chan_gpio_val = FIELD_GET(AD7191_CHAN_MASK, address); + temp_gpio_val = FIELD_GET(AD7191_TEMP_MASK, address); + + gpiod_set_value(st->chan_gpio, chan_gpio_val); + gpiod_set_value(st->temp_gpio, temp_gpio_val); + + return 0; +} + +static int ad7191_set_cs(struct ad_sigma_delta *sigma_delta, int assert) +{ + struct spi_transfer t = { + .len = 0, + .cs_change = assert, + }; + struct spi_message m; + + spi_message_init_with_transfers(&m, &t, 1); + + return spi_sync_locked(sigma_delta->spi, &m); +} + +static int ad7191_set_mode(struct ad_sigma_delta *sd, + enum ad_sigma_delta_mode mode) +{ + struct ad7191_state *st = ad_sigma_delta_to_ad7191(sd); + + switch (mode) { + case AD_SD_MODE_CONTINUOUS: + case AD_SD_MODE_SINGLE: + return ad7191_set_cs(&st->sd, 1); + case AD_SD_MODE_IDLE: + return ad7191_set_cs(&st->sd, 0); + default: + return -EINVAL; + } +} + +static const struct ad_sigma_delta_info ad7191_sigma_delta_info = { + .set_channel = ad7191_set_channel, + .set_mode = ad7191_set_mode, + .has_registers = false, +}; + +static int ad7191_init_regulators(struct iio_dev *indio_dev) +{ + struct ad7191_state *st = iio_priv(indio_dev); + struct device *dev = &st->sd.spi->dev; + int ret; + + ret = devm_regulator_get_enable(dev, "avdd"); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable specified AVdd supply\n"); + + ret = devm_regulator_get_enable(dev, "dvdd"); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable specified DVdd supply\n"); + + ret = devm_regulator_get_enable_read_voltage(dev, "vref"); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to get Vref voltage\n"); + + st->int_vref_mv = ret / 1000; + + return 0; +} + +static int ad7191_config_setup(struct iio_dev *indio_dev) +{ + struct ad7191_state *st = iio_priv(indio_dev); + struct device *dev = &st->sd.spi->dev; + /* Sampling frequencies in Hz, see Table 5 */ + static const u32 samp_freq[4] = { 120, 60, 50, 10 }; + /* Gain options, see Table 7 */ + const u32 gain[4] = { 1, 8, 64, 128 }; + static u32 scale_buffer[4][2]; + int odr_value, odr_index = 0, pga_value, pga_index = 0, i, ret; + u64 scale_uv; + + st->samp_freq_index = 0; + st->scale_index = 0; + + ret = device_property_read_u32(dev, "adi,odr-value", &odr_value); + if (ret && ret != -EINVAL) + return dev_err_probe(dev, ret, "Failed to get odr value.\n"); + + if (ret == -EINVAL) { + st->odr_gpios = devm_gpiod_get_array(dev, "odr", GPIOD_OUT_LOW); + if (IS_ERR(st->odr_gpios)) + return dev_err_probe(dev, PTR_ERR(st->odr_gpios), + "Failed to get odr gpios.\n"); + + if (st->odr_gpios->ndescs != 2) + return dev_err_probe(dev, -EINVAL, "Expected 2 odr gpio pins.\n"); + + st->samp_freq_avail = samp_freq; + st->samp_freq_avail_size = ARRAY_SIZE(samp_freq); + } else { + for (i = 0; i < ARRAY_SIZE(samp_freq); i++) { + if (odr_value != samp_freq[i]) + continue; + odr_index = i; + break; + } + + st->samp_freq_avail = &samp_freq[odr_index]; + st->samp_freq_avail_size = 1; + + st->odr_gpios = NULL; + } + + mutex_lock(&st->lock); + + for (i = 0; i < ARRAY_SIZE(scale_buffer); i++) { + scale_uv = ((u64)st->int_vref_mv * NANO) >> + (indio_dev->channels[0].scan_type.realbits - 1); + do_div(scale_uv, gain[i]); + scale_buffer[i][1] = do_div(scale_uv, NANO); + scale_buffer[i][0] = scale_uv; + } + + mutex_unlock(&st->lock); + + ret = device_property_read_u32(dev, "adi,pga-value", &pga_value); + if (ret && ret != -EINVAL) + return dev_err_probe(dev, ret, "Failed to get pga value.\n"); + + if (ret == -EINVAL) { + st->pga_gpios = devm_gpiod_get_array(dev, "pga", GPIOD_OUT_LOW); + if (IS_ERR(st->pga_gpios)) + return dev_err_probe(dev, PTR_ERR(st->pga_gpios), + "Failed to get pga gpios.\n"); + + if (st->pga_gpios->ndescs != 2) + return dev_err_probe(dev, -EINVAL, "Expected 2 pga gpio pins.\n"); + + st->scale_avail = scale_buffer; + st->scale_avail_size = ARRAY_SIZE(scale_buffer); + } else { + for (i = 0; i < ARRAY_SIZE(gain); i++) { + if (pga_value != gain[i]) + continue; + pga_index = i; + break; + } + + st->scale_avail = &scale_buffer[pga_index]; + st->scale_avail_size = 1; + + st->pga_gpios = NULL; + } + + st->temp_gpio = devm_gpiod_get(dev, "temp", GPIOD_OUT_LOW); + if (IS_ERR(st->temp_gpio)) + return dev_err_probe(dev, PTR_ERR(st->temp_gpio), + "Failed to get temp gpio.\n"); + + st->chan_gpio = devm_gpiod_get(dev, "chan", GPIOD_OUT_LOW); + if (IS_ERR(st->chan_gpio)) + return dev_err_probe(dev, PTR_ERR(st->chan_gpio), + "Failed to get chan gpio.\n"); + + return 0; +} + +static int ad7191_clock_setup(struct ad7191_state *st) +{ + struct device *dev = &st->sd.spi->dev; + + st->mclk = devm_clk_get_optional_enabled(dev, "mclk"); + if (IS_ERR(st->mclk)) + return dev_err_probe(dev, PTR_ERR(st->mclk), + "Failed to get mclk.\n"); + + return 0; +} + +static int ad7191_setup(struct iio_dev *indio_dev) +{ + struct ad7191_state *st = iio_priv(indio_dev); + int ret; + + ret = ad7191_init_regulators(indio_dev); + if (ret) + return ret; + + ret = ad7191_config_setup(indio_dev); + if (ret) + return ret; + + return ad7191_clock_setup(st); +} + +static int ad7191_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long m) +{ + struct ad7191_state *st = iio_priv(indio_dev); + + switch (m) { + case IIO_CHAN_INFO_RAW: + return ad_sigma_delta_single_conversion(indio_dev, chan, val); + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_VOLTAGE: { + guard(mutex)(&st->lock); + *val = st->scale_avail[st->scale_index][0]; + *val2 = st->scale_avail[st->scale_index][1]; + return IIO_VAL_INT_PLUS_NANO; + } + case IIO_TEMP: + *val = 0; + *val2 = NANO / AD7191_TEMP_CODES_PER_DEGREE; + return IIO_VAL_INT_PLUS_NANO; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_OFFSET: + *val = -(1 << (chan->scan_type.realbits - 1)); + switch (chan->type) { + case IIO_VOLTAGE: + return IIO_VAL_INT; + case IIO_TEMP: + *val -= 273 * AD7191_TEMP_CODES_PER_DEGREE; + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->samp_freq_avail[st->samp_freq_index]; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7191_set_gain(struct ad7191_state *st, int gain_index) +{ + DECLARE_BITMAP(bitmap, 2) = { }; + + st->scale_index = gain_index; + + bitmap_write(bitmap, gain_index, 0, 2); + + return gpiod_multi_set_value_cansleep(st->pga_gpios, bitmap); +} + +static int ad7191_set_samp_freq(struct ad7191_state *st, int samp_freq_index) +{ + DECLARE_BITMAP(bitmap, 2) = {}; + + st->samp_freq_index = samp_freq_index; + + bitmap_write(bitmap, samp_freq_index, 0, 2); + + return gpiod_multi_set_value_cansleep(st->odr_gpios, bitmap); +} + +static int __ad7191_write_raw(struct ad7191_state *st, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int i; + + switch (mask) { + case IIO_CHAN_INFO_SCALE: { + if (!st->pga_gpios) + return -EPERM; + guard(mutex)(&st->lock); + for (i = 0; i < st->scale_avail_size; i++) { + if (val2 == st->scale_avail[i][1]) + return ad7191_set_gain(st, i); + } + return -EINVAL; + } + case IIO_CHAN_INFO_SAMP_FREQ: { + if (!st->odr_gpios) + return -EPERM; + guard(mutex)(&st->lock); + for (i = 0; i < st->samp_freq_avail_size; i++) { + if (val == st->samp_freq_avail[i]) + return ad7191_set_samp_freq(st, i); + } + return -EINVAL; + } + default: + return -EINVAL; + } +} + +static int ad7191_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int val, int val2, + long mask) +{ + struct ad7191_state *st = iio_priv(indio_dev); + int ret; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = __ad7191_write_raw(st, chan, val, val2, mask); + + iio_device_release_direct(indio_dev); + + return ret; +} + +static int ad7191_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7191_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, const int **vals, + int *type, int *length, long mask) +{ + struct ad7191_state *st = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_SCALE: + *vals = (int *)st->scale_avail; + *type = IIO_VAL_INT_PLUS_NANO; + *length = st->scale_avail_size * 2; + return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = (int *)st->samp_freq_avail; + *type = IIO_VAL_INT; + *length = st->samp_freq_avail_size; + return IIO_AVAIL_LIST; + } + + return -EINVAL; +} + +static const struct iio_info ad7191_info = { + .read_raw = ad7191_read_raw, + .write_raw = ad7191_write_raw, + .write_raw_get_fmt = ad7191_write_raw_get_fmt, + .read_avail = ad7191_read_avail, + .validate_trigger = ad_sd_validate_trigger, +}; + +static const struct iio_chan_spec ad7191_channels[] = { + { + .type = IIO_TEMP, + .address = AD7191_CH_TEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .scan_type = { + .sign = 'u', + .realbits = 24, + .storagebits = 32, + .endianness = IIO_BE, + }, + }, + { + .type = IIO_VOLTAGE, + .differential = 1, + .indexed = 1, + .channel = 1, + .channel2 = 2, + .address = AD7191_CH_AIN1_AIN2, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type_available = BIT(IIO_CHAN_INFO_SCALE), + .scan_index = 1, + .scan_type = { + .sign = 'u', + .realbits = 24, + .storagebits = 32, + .endianness = IIO_BE, + }, + }, + { + .type = IIO_VOLTAGE, + .differential = 1, + .indexed = 1, + .channel = 3, + .channel2 = 4, + .address = AD7191_CH_AIN3_AIN4, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .info_mask_shared_by_type_available = BIT(IIO_CHAN_INFO_SCALE), + .scan_index = 2, + .scan_type = { + .sign = 'u', + .realbits = 24, + .storagebits = 32, + .endianness = IIO_BE, + }, + }, + IIO_CHAN_SOFT_TIMESTAMP(3), +}; + +static int ad7191_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct ad7191_state *st; + struct iio_dev *indio_dev; + int ret; + + indio_dev = devm_iio_device_alloc(dev, sizeof(*st)); + if (!indio_dev) + return -ENOMEM; + + st = iio_priv(indio_dev); + + ret = devm_mutex_init(dev, &st->lock); + if (ret) + return ret; + + indio_dev->name = "ad7191"; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = ad7191_channels; + indio_dev->num_channels = ARRAY_SIZE(ad7191_channels); + indio_dev->info = &ad7191_info; + + ret = ad_sd_init(&st->sd, indio_dev, spi, &ad7191_sigma_delta_info); + if (ret) + return ret; + + ret = devm_ad_sd_setup_buffer_and_trigger(dev, indio_dev); + if (ret) + return ret; + + ret = ad7191_setup(indio_dev); + if (ret) + return ret; + + return devm_iio_device_register(dev, indio_dev); +} + +static const struct of_device_id ad7191_of_match[] = { + { .compatible = "adi,ad7191", }, + { } +}; +MODULE_DEVICE_TABLE(of, ad7191_of_match); + +static const struct spi_device_id ad7191_id_table[] = { + { "ad7191" }, + { } +}; +MODULE_DEVICE_TABLE(spi, ad7191_id_table); + +static struct spi_driver ad7191_driver = { + .driver = { + .name = "ad7191", + .of_match_table = ad7191_of_match, + }, + .probe = ad7191_probe, + .id_table = ad7191_id_table, +}; +module_spi_driver(ad7191_driver); + +MODULE_AUTHOR("Alisa-Dariana Roman "); +MODULE_DESCRIPTION("Analog Devices AD7191 ADC"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("IIO_AD_SIGMA_DELTA"); From 9525c66de334caa85ed3ca20eabe96eff18a85d4 Mon Sep 17 00:00:00 2001 From: Alisa-Dariana Roman Date: Fri, 28 Feb 2025 16:06:02 +0200 Subject: [PATCH 0703/2157] docs: iio: add AD7191 Add documentation for AD7191 driver. Signed-off-by: Alisa-Dariana Roman Reviewed-by: David Lechner Link: https://patch.msgid.link/20250228141327.262488-4-alisa.roman@analog.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad7191.rst | 119 +++++++++++++++++++++++++++++++++++ Documentation/iio/index.rst | 1 + MAINTAINERS | 1 + 3 files changed, 121 insertions(+) create mode 100644 Documentation/iio/ad7191.rst diff --git a/Documentation/iio/ad7191.rst b/Documentation/iio/ad7191.rst new file mode 100644 index 000000000000..977d4fea14b0 --- /dev/null +++ b/Documentation/iio/ad7191.rst @@ -0,0 +1,119 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +============= +AD7191 driver +============= + +Device driver for Analog Devices AD7191 ADC. + +Supported devices +================= + +* `AD7191 `_ + +The AD7191 is a high precision, low noise, 24-bit Σ-Δ ADC with integrated PGA. +It features two differential input channels, an internal temperature sensor, and +configurable sampling rates. + +Devicetree +========== + +Pin Configuration +----------------- + +The driver supports both pin-strapped and GPIO-controlled configurations for ODR +(Output Data Rate) and PGA (Programmable Gain Amplifier) settings. These +configurations are mutually exclusive - you must use either pin-strapped or GPIO +control for each setting, not both. + +ODR Configuration +^^^^^^^^^^^^^^^^^ + +The ODR can be configured either through GPIO control or pin-strapping: + +- When using GPIO control, specify the "odr-gpios" property in the device tree +- For pin-strapped configuration, specify the "adi,odr-value" property in the + device tree + +Available ODR settings: + + - 120 Hz (ODR1=0, ODR2=0) + - 60 Hz (ODR1=0, ODR2=1) + - 50 Hz (ODR1=1, ODR2=0) + - 10 Hz (ODR1=1, ODR2=1) + +PGA Configuration +^^^^^^^^^^^^^^^^^ + +The PGA can be configured either through GPIO control or pin-strapping: + +- When using GPIO control, specify the "pga-gpios" property in the device tree +- For pin-strapped configuration, specify the "adi,pga-value" property in the + device tree + +Available PGA gain settings: + + - 1x (PGA1=0, PGA2=0) + - 8x (PGA1=0, PGA2=1) + - 64x (PGA1=1, PGA2=0) + - 128x (PGA1=1, PGA2=1) + +Clock Configuration +------------------- + +The AD7191 supports both internal and external clock sources: + +- When CLKSEL pin is tied LOW: Uses internal 4.92MHz clock (no clock property + needed) +- When CLKSEL pin is tied HIGH: Requires external clock source + - Can be a crystal between MCLK1 and MCLK2 pins + - Or a CMOS-compatible clock driving MCLK2 pin + - Must specify the "clocks" property in device tree when using external clock + +SPI Interface Requirements +-------------------------- + +The AD7191 has specific SPI interface requirements: + +- The DOUT/RDY output is dual-purpose and requires SPI bus locking +- DOUT/RDY must be connected to an interrupt-capable GPIO +- The SPI controller's chip select must be connected to the PDOWN pin of the ADC +- When CS (PDOWN) is high, the device powers down and resets internal circuitry +- SPI mode 3 operation (CPOL=1, CPHA=1) is required + +Power Supply Requirements +------------------------- + +The device requires the following power supplies: + +- AVdd: Analog power supply +- DVdd: Digital power supply +- Vref: Reference voltage supply (external) + +All power supplies must be specified in the device tree. + +Channel Configuration +===================== + +The device provides three channels: + +1. Temperature Sensor + - 24-bit unsigned + - Internal temperature measurement + - Temperature in millidegrees Celsius + +2. Differential Input (AIN1-AIN2) + - 24-bit unsigned + - Differential voltage measurement + - Configurable gain via PGA + +3. Differential Input (AIN3-AIN4) + - 24-bit unsigned + - Differential voltage measurement + - Configurable gain via PGA + +Buffer Support +============== + +This driver supports IIO triggered buffers. See Documentation/iio/iio_devbuf.rst +for more information about IIO triggered buffers. diff --git a/Documentation/iio/index.rst b/Documentation/iio/index.rst index ea35f2a06496..bbb2edce8272 100644 --- a/Documentation/iio/index.rst +++ b/Documentation/iio/index.rst @@ -22,6 +22,7 @@ Industrial I/O Kernel Drivers ad4000 ad4030 ad4695 + ad7191 ad7380 ad7606 ad7625 diff --git a/MAINTAINERS b/MAINTAINERS index 8e33b084db17..ffdb3f21fc4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1352,6 +1352,7 @@ L: linux-iio@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml +F: Documentation/iio/ad7191.rst F: drivers/iio/adc/ad7191.c ANALOG DEVICES INC AD7192 DRIVER From fb3a0811a7bc12d51cba4b75d6b123ab62e2fe4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:46:59 +0100 Subject: [PATCH 0704/2157] iio: adc: ad_sigma_delta: Disable channel after calibration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ad_sd_calibrate() enables the channel to calibrate at function entry but doesn't disable it on exit. This is problematic because if two (or more) channels are calibrated in a row, the second calibration isn't executed as intended as the first (still enabled) channel is recalibrated and after the first irq (i.e. when the calibration of the first channel completed) the calibration is aborted. This currently affects ad7173 only, as the other drivers using ad_sd_calibrate() never have more than one channel enabled at a time. To fix this, disable the calibrated channel after calibration. Fixes: 031bdc8aee01 ("iio: adc: ad7173: add calibration support") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-11-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 5907c35b98e5..d91a3ba127e3 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -339,6 +339,7 @@ int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, out: sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); + ad_sigma_delta_disable_one(sigma_delta, channel); sigma_delta->bus_locked = false; spi_bus_unlock(sigma_delta->spi->controller); From 280acb19824663d55a3f4d09087c76fabe86fa3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:00 +0100 Subject: [PATCH 0705/2157] iio: adc: ad4130: Fix comparison of channel setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking the binary representation of two structs (of the same type) for equality doesn't have the same semantic as comparing all members for equality. The former might find a difference where the latter doesn't in the presence of padding or when ambiguous types like float or bool are involved. (Floats typically have different representations for single values, like -0.0 vs +0.0, or 0.5 * 2² vs 0.25 * 2³. The type bool has at least 8 bits and the raw values 1 and 2 (probably) both evaluate to true, but memcmp finds a difference.) When searching for a channel that already has the configuration we need, the comparison by member is the one that is needed. Convert the comparison accordingly to compare the members one after another. Also add a static_assert guard to (somewhat) ensure that when struct ad4130_setup_info is expanded, the comparison is adapted, too. This issue is somewhat theoretic, but using memcmp() on a struct is a bad pattern that is worth fixing. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-12-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 41 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index f3ae41214326..af0b2be1bab0 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -224,6 +224,10 @@ enum ad4130_pin_function { AD4130_PIN_FN_VBIAS = BIT(3), }; +/* + * If you make adaptations in this struct, you most likely also have to adapt + * ad4130_setup_info_eq(), too. + */ struct ad4130_setup_info { unsigned int iout0_val; unsigned int iout1_val; @@ -592,6 +596,40 @@ static irqreturn_t ad4130_irq_handler(int irq, void *private) return IRQ_HANDLED; } +static bool ad4130_setup_info_eq(struct ad4130_setup_info *a, + struct ad4130_setup_info *b) +{ + /* + * This is just to make sure that the comparison is adapted after + * struct ad4130_setup_info was changed. + */ + static_assert(sizeof(*a) == + sizeof(struct { + unsigned int iout0_val; + unsigned int iout1_val; + unsigned int burnout; + unsigned int pga; + unsigned int fs; + u32 ref_sel; + enum ad4130_filter_mode filter_mode; + bool ref_bufp; + bool ref_bufm; + })); + + if (a->iout0_val != b->iout0_val || + a->iout1_val != b->iout1_val || + a->burnout != b->burnout || + a->pga != b->pga || + a->fs != b->fs || + a->ref_sel != b->ref_sel || + a->filter_mode != b->filter_mode || + a->ref_bufp != b->ref_bufp || + a->ref_bufm != b->ref_bufm) + return false; + + return true; +} + static int ad4130_find_slot(struct ad4130_state *st, struct ad4130_setup_info *target_setup_info, unsigned int *slot, bool *overwrite) @@ -605,8 +643,7 @@ static int ad4130_find_slot(struct ad4130_state *st, struct ad4130_slot_info *slot_info = &st->slots_info[i]; /* Immediately accept a matching setup info. */ - if (!memcmp(target_setup_info, &slot_info->setup, - sizeof(*target_setup_info))) { + if (ad4130_setup_info_eq(target_setup_info, &slot_info->setup)) { *slot = i; return 0; } From 05a5d874f7327b75e9bc4359618017e047cc129c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:01 +0100 Subject: [PATCH 0706/2157] iio: adc: ad7124: Fix comparison of channel configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking the binary representation of two structs (of the same type) for equality doesn't have the same semantic as comparing all members for equality. The former might find a difference where the latter doesn't in the presence of padding or when ambiguous types like float or bool are involved. (Floats typically have different representations for single values, like -0.0 vs +0.0, or 0.5 * 2² vs 0.25 * 2³. The type bool has at least 8 bits and the raw values 1 and 2 (probably) both evaluate to true, but memcmp finds a difference.) When searching for a channel that already has the configuration we need, the comparison by member is the one that is needed. Convert the comparison accordingly to compare the members one after another. Also add a static_assert guard to (somewhat) ensure that when struct ad7124_channel_config::config_props is expanded, the comparison is adapted, too. This issue is somewhat theoretic, but using memcmp() on a struct is a bad pattern that is worth fixing. Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-13-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 6bc418d38820..de90ecb5f630 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -151,7 +151,11 @@ struct ad7124_chip_info { struct ad7124_channel_config { bool live; unsigned int cfg_slot; - /* Following fields are used to compare equality. */ + /* + * Following fields are used to compare for equality. If you + * make adaptations in it, you most likely also have to adapt + * ad7124_find_similar_live_cfg(), too. + */ struct_group(config_props, enum ad7124_ref_sel refsel; bool bipolar; @@ -338,15 +342,38 @@ static struct ad7124_channel_config *ad7124_find_similar_live_cfg(struct ad7124_ struct ad7124_channel_config *cfg) { struct ad7124_channel_config *cfg_aux; - ptrdiff_t cmp_size; int i; - cmp_size = sizeof_field(struct ad7124_channel_config, config_props); + /* + * This is just to make sure that the comparison is adapted after + * struct ad7124_channel_config was changed. + */ + static_assert(sizeof_field(struct ad7124_channel_config, config_props) == + sizeof(struct { + enum ad7124_ref_sel refsel; + bool bipolar; + bool buf_positive; + bool buf_negative; + unsigned int vref_mv; + unsigned int pga_bits; + unsigned int odr; + unsigned int odr_sel_bits; + unsigned int filter_type; + })); + for (i = 0; i < st->num_channels; i++) { cfg_aux = &st->channels[i].cfg; if (cfg_aux->live && - !memcmp(&cfg->config_props, &cfg_aux->config_props, cmp_size)) + cfg->refsel == cfg_aux->refsel && + cfg->bipolar == cfg_aux->bipolar && + cfg->buf_positive == cfg_aux->buf_positive && + cfg->buf_negative == cfg_aux->buf_negative && + cfg->vref_mv == cfg_aux->vref_mv && + cfg->pga_bits == cfg_aux->pga_bits && + cfg->odr == cfg_aux->odr && + cfg->odr_sel_bits == cfg_aux->odr_sel_bits && + cfg->filter_type == cfg_aux->filter_type) return cfg_aux; } From 7b6033ed5a9e1a369a9cf58018388ae4c5f17e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:02 +0100 Subject: [PATCH 0707/2157] iio: adc: ad7173: Fix comparison of channel configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking the binary representation of two structs (of the same type) for equality doesn't have the same semantic as comparing all members for equality. The former might find a difference where the latter doesn't in the presence of padding or when ambiguous types like float or bool are involved. (Floats typically have different representations for single values, like -0.0 vs +0.0, or 0.5 * 2² vs 0.25 * 2³. The type bool has at least 8 bits and the raw values 1 and 2 (probably) both evaluate to true, but memcmp finds a difference.) When searching for a channel that already has the configuration we need, the comparison by member is the one that is needed. Convert the comparison accordingly to compare the members one after another. Also add a static_assert guard to (somewhat) ensure that when struct ad7173_channel_config::config_props is expanded, the comparison is adapted, too. This issue is somewhat theoretic, but using memcmp() on a struct is a bad pattern that is worth fixing. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-14-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 9ac8ea3cb499..69de5886474c 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -197,7 +197,11 @@ struct ad7173_channel_config { u8 cfg_slot; bool live; - /* Following fields are used to compare equality. */ + /* + * Following fields are used to compare equality. If you + * make adaptations in it, you most likely also have to adapt + * ad7173_find_live_config(), too. + */ struct_group(config_props, bool bipolar; bool input_buf; @@ -573,15 +577,28 @@ static struct ad7173_channel_config * ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg) { struct ad7173_channel_config *cfg_aux; - ptrdiff_t cmp_size; int i; - cmp_size = sizeof_field(struct ad7173_channel_config, config_props); + /* + * This is just to make sure that the comparison is adapted after + * struct ad7173_channel_config was changed. + */ + static_assert(sizeof_field(struct ad7173_channel_config, config_props) == + sizeof(struct { + bool bipolar; + bool input_buf; + u8 odr; + u8 ref_sel; + })); + for (i = 0; i < st->num_channels; i++) { cfg_aux = &st->channels[i].cfg; if (cfg_aux->live && - !memcmp(&cfg->config_props, &cfg_aux->config_props, cmp_size)) + cfg->bipolar == cfg_aux->bipolar && + cfg->input_buf == cfg_aux->input_buf && + cfg->odr == cfg_aux->odr && + cfg->ref_sel == cfg_aux->ref_sel) return cfg_aux; } return NULL; From 7d33bdabf30413ebc9f2e305cfb341223b4a8c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:03 +0100 Subject: [PATCH 0708/2157] iio: adc: ad4130: Adapt internal names to match official filter_type ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently the interface to to select a filter was officially blessed to use "filter_type". Adapt the naming of several functions accordingly to make the new standard more present and so make the driver a better template for other drivers. Apart from the comment update this is just s/filter_mode/filter_type/. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-15-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 84 ++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index af0b2be1bab0..0f4c9cd6c102 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -204,7 +204,7 @@ enum ad4130_mode { AD4130_MODE_IDLE = 0b0100, }; -enum ad4130_filter_mode { +enum ad4130_filter_type { AD4130_FILTER_SINC4, AD4130_FILTER_SINC4_SINC1, AD4130_FILTER_SINC3, @@ -235,7 +235,7 @@ struct ad4130_setup_info { unsigned int pga; unsigned int fs; u32 ref_sel; - enum ad4130_filter_mode filter_mode; + enum ad4130_filter_type filter_type; bool ref_bufp; bool ref_bufm; }; @@ -256,7 +256,7 @@ struct ad4130_chan_info { }; struct ad4130_filter_config { - enum ad4130_filter_mode filter_mode; + enum ad4130_filter_type filter_type; unsigned int odr_div; unsigned int fs_max; enum iio_available_type samp_freq_avail_type; @@ -342,9 +342,9 @@ static const unsigned int ad4130_burnout_current_na_tbl[AD4130_BURNOUT_MAX] = { [AD4130_BURNOUT_4000NA] = 4000, }; -#define AD4130_VARIABLE_ODR_CONFIG(_filter_mode, _odr_div, _fs_max) \ +#define AD4130_VARIABLE_ODR_CONFIG(_filter_type, _odr_div, _fs_max) \ { \ - .filter_mode = (_filter_mode), \ + .filter_type = (_filter_type), \ .odr_div = (_odr_div), \ .fs_max = (_fs_max), \ .samp_freq_avail_type = IIO_AVAIL_RANGE, \ @@ -355,9 +355,9 @@ static const unsigned int ad4130_burnout_current_na_tbl[AD4130_BURNOUT_MAX] = { }, \ } -#define AD4130_FIXED_ODR_CONFIG(_filter_mode, _odr_div) \ +#define AD4130_FIXED_ODR_CONFIG(_filter_type, _odr_div) \ { \ - .filter_mode = (_filter_mode), \ + .filter_type = (_filter_type), \ .odr_div = (_odr_div), \ .fs_max = AD4130_FILTER_SELECT_MIN, \ .samp_freq_avail_type = IIO_AVAIL_LIST, \ @@ -379,7 +379,7 @@ static const struct ad4130_filter_config ad4130_filter_configs[] = { AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF4, 148), }; -static const char * const ad4130_filter_modes_str[] = { +static const char * const ad4130_filter_types_str[] = { [AD4130_FILTER_SINC4] = "sinc4", [AD4130_FILTER_SINC4_SINC1] = "sinc4+sinc1", [AD4130_FILTER_SINC3] = "sinc3", @@ -611,7 +611,7 @@ static bool ad4130_setup_info_eq(struct ad4130_setup_info *a, unsigned int pga; unsigned int fs; u32 ref_sel; - enum ad4130_filter_mode filter_mode; + enum ad4130_filter_type filter_type; bool ref_bufp; bool ref_bufm; })); @@ -622,7 +622,7 @@ static bool ad4130_setup_info_eq(struct ad4130_setup_info *a, a->pga != b->pga || a->fs != b->fs || a->ref_sel != b->ref_sel || - a->filter_mode != b->filter_mode || + a->filter_type != b->filter_type || a->ref_bufp != b->ref_bufp || a->ref_bufm != b->ref_bufm) return false; @@ -729,7 +729,7 @@ static int ad4130_write_slot_setup(struct ad4130_state *st, if (ret) return ret; - val = FIELD_PREP(AD4130_FILTER_MODE_MASK, setup_info->filter_mode) | + val = FIELD_PREP(AD4130_FILTER_MODE_MASK, setup_info->filter_type) | FIELD_PREP(AD4130_FILTER_SELECT_MASK, setup_info->fs); ret = regmap_write(st->regmap, AD4130_FILTER_X_REG(slot), val); @@ -873,11 +873,11 @@ static int ad4130_set_channel_enable(struct ad4130_state *st, * (used in ad4130_fs_to_freq) */ -static void ad4130_freq_to_fs(enum ad4130_filter_mode filter_mode, +static void ad4130_freq_to_fs(enum ad4130_filter_type filter_type, int val, int val2, unsigned int *fs) { const struct ad4130_filter_config *filter_config = - &ad4130_filter_configs[filter_mode]; + &ad4130_filter_configs[filter_type]; u64 dividend, divisor; int temp; @@ -896,11 +896,11 @@ static void ad4130_freq_to_fs(enum ad4130_filter_mode filter_mode, *fs = temp; } -static void ad4130_fs_to_freq(enum ad4130_filter_mode filter_mode, +static void ad4130_fs_to_freq(enum ad4130_filter_type filter_type, unsigned int fs, int *val, int *val2) { const struct ad4130_filter_config *filter_config = - &ad4130_filter_configs[filter_mode]; + &ad4130_filter_configs[filter_type]; unsigned int dividend, divisor; u64 temp; @@ -912,7 +912,7 @@ static void ad4130_fs_to_freq(enum ad4130_filter_mode filter_mode, *val = div_u64_rem(temp, NANO, val2); } -static int ad4130_set_filter_mode(struct iio_dev *indio_dev, +static int ad4130_set_filter_type(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int val) { @@ -920,17 +920,17 @@ static int ad4130_set_filter_mode(struct iio_dev *indio_dev, unsigned int channel = chan->scan_index; struct ad4130_chan_info *chan_info = &st->chans_info[channel]; struct ad4130_setup_info *setup_info = &chan_info->setup; - enum ad4130_filter_mode old_filter_mode; + enum ad4130_filter_type old_filter_type; int freq_val, freq_val2; unsigned int old_fs; int ret = 0; guard(mutex)(&st->lock); - if (setup_info->filter_mode == val) + if (setup_info->filter_type == val) return 0; old_fs = setup_info->fs; - old_filter_mode = setup_info->filter_mode; + old_filter_type = setup_info->filter_type; /* * When switching between filter modes, try to match the ODR as @@ -938,55 +938,55 @@ static int ad4130_set_filter_mode(struct iio_dev *indio_dev, * using the old filter mode, then convert it back into FS using * the new filter mode. */ - ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs, + ad4130_fs_to_freq(setup_info->filter_type, setup_info->fs, &freq_val, &freq_val2); ad4130_freq_to_fs(val, freq_val, freq_val2, &setup_info->fs); - setup_info->filter_mode = val; + setup_info->filter_type = val; ret = ad4130_write_channel_setup(st, channel, false); if (ret) { setup_info->fs = old_fs; - setup_info->filter_mode = old_filter_mode; + setup_info->filter_type = old_filter_type; return ret; } return 0; } -static int ad4130_get_filter_mode(struct iio_dev *indio_dev, +static int ad4130_get_filter_type(struct iio_dev *indio_dev, const struct iio_chan_spec *chan) { struct ad4130_state *st = iio_priv(indio_dev); unsigned int channel = chan->scan_index; struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup; - enum ad4130_filter_mode filter_mode; + enum ad4130_filter_type filter_type; guard(mutex)(&st->lock); - filter_mode = setup_info->filter_mode; + filter_type = setup_info->filter_type; - return filter_mode; + return filter_type; } -static const struct iio_enum ad4130_filter_mode_enum = { - .items = ad4130_filter_modes_str, - .num_items = ARRAY_SIZE(ad4130_filter_modes_str), - .set = ad4130_set_filter_mode, - .get = ad4130_get_filter_mode, +static const struct iio_enum ad4130_filter_type_enum = { + .items = ad4130_filter_types_str, + .num_items = ARRAY_SIZE(ad4130_filter_types_str), + .set = ad4130_set_filter_type, + .get = ad4130_get_filter_type, }; -static const struct iio_chan_spec_ext_info ad4130_filter_mode_ext_info[] = { +static const struct iio_chan_spec_ext_info ad4130_ext_info[] = { /* - * Intentional duplication of attributes to keep backwards compatibility - * while standardizing over the main IIO ABI for digital filtering. + * `filter_type` is the standardized IIO ABI for digital filtering. + * `filter_mode` is just kept for backwards compatibility. */ - IIO_ENUM("filter_mode", IIO_SEPARATE, &ad4130_filter_mode_enum), + IIO_ENUM("filter_mode", IIO_SEPARATE, &ad4130_filter_type_enum), IIO_ENUM_AVAILABLE("filter_mode", IIO_SHARED_BY_TYPE, - &ad4130_filter_mode_enum), - IIO_ENUM("filter_type", IIO_SEPARATE, &ad4130_filter_mode_enum), + &ad4130_filter_type_enum), + IIO_ENUM("filter_type", IIO_SEPARATE, &ad4130_filter_type_enum), IIO_ENUM_AVAILABLE("filter_type", IIO_SHARED_BY_TYPE, - &ad4130_filter_mode_enum), + &ad4130_filter_type_enum), { } }; @@ -1000,7 +1000,7 @@ static const struct iio_chan_spec ad4130_channel_template = { BIT(IIO_CHAN_INFO_SAMP_FREQ), .info_mask_separate_available = BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_SAMP_FREQ), - .ext_info = ad4130_filter_mode_ext_info, + .ext_info = ad4130_ext_info, .scan_type = { .sign = 'u', .endianness = IIO_BE, @@ -1050,7 +1050,7 @@ static int ad4130_set_channel_freq(struct ad4130_state *st, guard(mutex)(&st->lock); old_fs = setup_info->fs; - ad4130_freq_to_fs(setup_info->filter_mode, val, val2, &fs); + ad4130_freq_to_fs(setup_info->filter_type, val, val2, &fs); if (fs == setup_info->fs) return 0; @@ -1142,7 +1142,7 @@ static int ad4130_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SAMP_FREQ: { guard(mutex)(&st->lock); - ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs, + ad4130_fs_to_freq(setup_info->filter_type, setup_info->fs, val, val2); return IIO_VAL_INT_PLUS_NANO; @@ -1172,7 +1172,7 @@ static int ad4130_read_avail(struct iio_dev *indio_dev, return IIO_AVAIL_LIST; case IIO_CHAN_INFO_SAMP_FREQ: scoped_guard(mutex, &st->lock) { - filter_config = &ad4130_filter_configs[setup_info->filter_mode]; + filter_config = &ad4130_filter_configs[setup_info->filter_type]; } *vals = (int *)filter_config->samp_freq_avail; From 780c9dbb160f442ed460ee091fbf646f6d7c3b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:04 +0100 Subject: [PATCH 0709/2157] iio: adc: ad_sigma_delta: Add error checking for ad_sigma_delta_set_channel() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All other calls to ad_sigma_delta_set_channel() in ad_sigma_delta.c check the return value afterwards. Do it for all calls. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-16-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index d91a3ba127e3..6c37f8e21120 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -390,7 +390,9 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, if (!iio_device_claim_direct(indio_dev)) return -EBUSY; - ad_sigma_delta_set_channel(sigma_delta, chan->address); + ret = ad_sigma_delta_set_channel(sigma_delta, chan->address); + if (ret) + goto out_release; spi_bus_lock(sigma_delta->spi->controller); sigma_delta->bus_locked = true; @@ -431,6 +433,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, sigma_delta->keep_cs_asserted = false; sigma_delta->bus_locked = false; spi_bus_unlock(sigma_delta->spi->controller); +out_release: iio_device_release_direct(indio_dev); if (ret) From 47036a03a303b5aa4ebd5fa42a9db805983f72b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:05 +0100 Subject: [PATCH 0710/2157] iio: adc: ad7124: Implement internal calibration at probe time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the calibration function provided by the ad_sigma_delta shim to calibrate all channels at probe time. For measurements with gain 1 (i.e. if CONFIG_x.PGA = 0) full-scale calibrations are not supported and the reset default value of the GAIN register is supposed to be used then. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-17-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 129 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index de90ecb5f630..382f46ff2b51 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -53,6 +53,11 @@ #define AD7124_ADC_CTRL_MODE_MSK GENMASK(5, 2) #define AD7124_ADC_CTRL_MODE(x) FIELD_PREP(AD7124_ADC_CTRL_MODE_MSK, x) +#define AD7124_MODE_CAL_INT_ZERO 0x5 /* Internal Zero-Scale Calibration */ +#define AD7124_MODE_CAL_INT_FULL 0x6 /* Internal Full-Scale Calibration */ +#define AD7124_MODE_CAL_SYS_ZERO 0x7 /* System Zero-Scale Calibration */ +#define AD7124_MODE_CAL_SYS_FULL 0x8 /* System Full-Scale Calibration */ + /* AD7124 ID */ #define AD7124_DEVICE_ID_MSK GENMASK(7, 4) #define AD7124_DEVICE_ID_GET(x) FIELD_GET(AD7124_DEVICE_ID_MSK, x) @@ -166,6 +171,8 @@ struct ad7124_channel_config { unsigned int odr; unsigned int odr_sel_bits; unsigned int filter_type; + unsigned int calibration_offset; + unsigned int calibration_gain; ); }; @@ -186,6 +193,12 @@ struct ad7124_state { unsigned int num_channels; struct mutex cfgs_lock; /* lock for configs access */ unsigned long cfg_slots_status; /* bitmap with slot status (1 means it is used) */ + + /* + * Stores the power-on reset value for the GAIN(x) registers which are + * needed for measurements at gain 1 (i.e. CONFIG(x).PGA == 0) + */ + unsigned int gain_default; DECLARE_KFIFO(live_cfgs_fifo, struct ad7124_channel_config *, AD7124_MAX_CONFIGS); }; @@ -359,6 +372,8 @@ static struct ad7124_channel_config *ad7124_find_similar_live_cfg(struct ad7124_ unsigned int odr; unsigned int odr_sel_bits; unsigned int filter_type; + unsigned int calibration_offset; + unsigned int calibration_gain; })); for (i = 0; i < st->num_channels; i++) { @@ -373,7 +388,9 @@ static struct ad7124_channel_config *ad7124_find_similar_live_cfg(struct ad7124_ cfg->pga_bits == cfg_aux->pga_bits && cfg->odr == cfg_aux->odr && cfg->odr_sel_bits == cfg_aux->odr_sel_bits && - cfg->filter_type == cfg_aux->filter_type) + cfg->filter_type == cfg_aux->filter_type && + cfg->calibration_offset == cfg_aux->calibration_offset && + cfg->calibration_gain == cfg_aux->calibration_gain) return cfg_aux; } @@ -429,6 +446,14 @@ static int ad7124_write_config(struct ad7124_state *st, struct ad7124_channel_co cfg->cfg_slot = cfg_slot; + ret = ad_sd_write_reg(&st->sd, AD7124_OFFSET(cfg->cfg_slot), 3, cfg->calibration_offset); + if (ret) + return ret; + + ret = ad_sd_write_reg(&st->sd, AD7124_GAIN(cfg->cfg_slot), 3, cfg->calibration_gain); + if (ret) + return ret; + tmp = (cfg->buf_positive << 1) + cfg->buf_negative; val = AD7124_CONFIG_BIPOLAR(cfg->bipolar) | AD7124_CONFIG_REF_SEL(cfg->refsel) | AD7124_CONFIG_IN_BUFF(tmp) | AD7124_CONFIG_PGA(cfg->pga_bits); @@ -835,13 +860,22 @@ static int ad7124_soft_reset(struct ad7124_state *st) return dev_err_probe(dev, ret, "Error reading status register\n"); if (!(readval & AD7124_STATUS_POR_FLAG_MSK)) - return 0; + break; /* The AD7124 requires typically 2ms to power up and settle */ usleep_range(100, 2000); } while (--timeout); - return dev_err_probe(dev, -EIO, "Soft reset failed\n"); + if (readval & AD7124_STATUS_POR_FLAG_MSK) + return dev_err_probe(dev, -EIO, "Soft reset failed\n"); + + ret = ad_sd_read_reg(&st->sd, AD7124_GAIN(0), 3, &st->gain_default); + if (ret < 0) + return dev_err_probe(dev, ret, "Error reading gain register\n"); + + dev_dbg(dev, "Reset value of GAIN register is 0x%x\n", st->gain_default); + + return 0; } static int ad7124_check_chip_id(struct ad7124_state *st) @@ -1054,6 +1088,91 @@ static int ad7124_setup(struct ad7124_state *st) return ret; } +static int __ad7124_calibrate_all(struct ad7124_state *st, struct iio_dev *indio_dev) +{ + struct device *dev = &st->sd.spi->dev; + int ret, i; + + for (i = 0; i < st->num_channels; i++) { + + if (indio_dev->channels[i].type != IIO_VOLTAGE) + continue; + + /* + * For calibration the OFFSET register should hold its reset default + * value. For the GAIN register there is no such requirement but + * for gain 1 it should hold the reset default value, too. So to + * simplify matters use the reset default value for both. + */ + st->channels[i].cfg.calibration_offset = 0x800000; + st->channels[i].cfg.calibration_gain = st->gain_default; + + /* + * Full-scale calibration isn't supported at gain 1, so skip in + * that case. Note that untypically full-scale calibration has + * to happen before zero-scale calibration. This only applies to + * the internal calibration. For system calibration it's as + * usual: first zero-scale then full-scale calibration. + */ + if (st->channels[i].cfg.pga_bits > 0) { + ret = ad_sd_calibrate(&st->sd, AD7124_MODE_CAL_INT_FULL, i); + if (ret < 0) + return ret; + + /* + * read out the resulting value of GAIN + * after full-scale calibration because the next + * ad_sd_calibrate() call overwrites this via + * ad_sigma_delta_set_channel() -> ad7124_set_channel() + * ... -> ad7124_enable_channel(). + */ + ret = ad_sd_read_reg(&st->sd, AD7124_GAIN(st->channels[i].cfg.cfg_slot), 3, + &st->channels[i].cfg.calibration_gain); + if (ret < 0) + return ret; + } + + ret = ad_sd_calibrate(&st->sd, AD7124_MODE_CAL_INT_ZERO, i); + if (ret < 0) + return ret; + + ret = ad_sd_read_reg(&st->sd, AD7124_OFFSET(st->channels[i].cfg.cfg_slot), 3, + &st->channels[i].cfg.calibration_offset); + if (ret < 0) + return ret; + + dev_dbg(dev, "offset and gain for channel %d = 0x%x + 0x%x\n", i, + st->channels[i].cfg.calibration_offset, + st->channels[i].cfg.calibration_gain); + } + + return 0; +} + +static int ad7124_calibrate_all(struct ad7124_state *st, struct iio_dev *indio_dev) +{ + int ret; + unsigned int adc_control = st->adc_control; + + /* + * Calibration isn't supported at full power, so speed down a bit. + * Setting .adc_control is enough here because the control register is + * written as part of ad_sd_calibrate() -> ad_sigma_delta_set_mode(). + * The resulting calibration is then also valid for high-speed, so just + * restore adc_control afterwards. + */ + if (FIELD_GET(AD7124_ADC_CTRL_PWR_MSK, adc_control) >= AD7124_FULL_POWER) { + st->adc_control &= ~AD7124_ADC_CTRL_PWR_MSK; + st->adc_control |= AD7124_ADC_CTRL_PWR(AD7124_MID_POWER); + } + + ret = __ad7124_calibrate_all(st, indio_dev); + + st->adc_control = adc_control; + + return ret; +} + static void ad7124_reg_disable(void *r) { regulator_disable(r); @@ -1132,6 +1251,10 @@ static int ad7124_probe(struct spi_device *spi) if (ret < 0) return dev_err_probe(dev, ret, "Failed to setup triggers\n"); + ret = ad7124_calibrate_all(st, indio_dev); + if (ret) + return ret; + ret = devm_iio_device_register(&spi->dev, indio_dev); if (ret < 0) return dev_err_probe(dev, ret, "Failed to register iio device\n"); From df1f2e1470fa52d89288758283db46a47efcf3c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2025 12:47:06 +0100 Subject: [PATCH 0711/2157] iio: adc: ad7124: Implement system calibration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow triggering both zero-scale and full-scale calibration via sysfs in the same way as it's done for ad7173. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/20250303114659.1672695-18-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 152 ++++++++++++++++++++++++++++++++++----- 1 file changed, 135 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 382f46ff2b51..ad27522f429f 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -181,6 +181,7 @@ struct ad7124_channel { struct ad7124_channel_config cfg; unsigned int ain; unsigned int slot; + u8 syscalib_mode; }; struct ad7124_state { @@ -202,23 +203,6 @@ struct ad7124_state { DECLARE_KFIFO(live_cfgs_fifo, struct ad7124_channel_config *, AD7124_MAX_CONFIGS); }; -static const struct iio_chan_spec ad7124_channel_template = { - .type = IIO_VOLTAGE, - .indexed = 1, - .differential = 1, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_SCALE) | - BIT(IIO_CHAN_INFO_OFFSET) | - BIT(IIO_CHAN_INFO_SAMP_FREQ) | - BIT(IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY), - .scan_type = { - .sign = 'u', - .realbits = 24, - .storagebits = 32, - .endianness = IIO_BE, - }, -}; - static struct ad7124_chip_info ad7124_chip_info_tbl[] = { [ID_AD7124_4] = { .name = "ad7124-4", @@ -903,6 +887,140 @@ static int ad7124_check_chip_id(struct ad7124_state *st) return 0; } +enum { + AD7124_SYSCALIB_ZERO_SCALE, + AD7124_SYSCALIB_FULL_SCALE, +}; + +static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan_spec *chan) +{ + struct device *dev = &st->sd.spi->dev; + struct ad7124_channel *ch = &st->channels[chan->channel]; + int ret; + + if (ch->syscalib_mode == AD7124_SYSCALIB_ZERO_SCALE) { + ch->cfg.calibration_offset = 0x800000; + + ret = ad_sd_calibrate(&st->sd, AD7124_MODE_CAL_SYS_ZERO, + chan->address); + if (ret < 0) + return ret; + + ret = ad_sd_read_reg(&st->sd, AD7124_OFFSET(ch->cfg.cfg_slot), 3, + &ch->cfg.calibration_offset); + if (ret < 0) + return ret; + + dev_dbg(dev, "offset for channel %d after zero-scale calibration: 0x%x\n", + chan->channel, ch->cfg.calibration_offset); + } else { + ch->cfg.calibration_gain = st->gain_default; + + ret = ad_sd_calibrate(&st->sd, AD7124_MODE_CAL_SYS_FULL, + chan->address); + if (ret < 0) + return ret; + + ret = ad_sd_read_reg(&st->sd, AD7124_GAIN(ch->cfg.cfg_slot), 3, + &ch->cfg.calibration_gain); + if (ret < 0) + return ret; + + dev_dbg(dev, "gain for channel %d after full-scale calibration: 0x%x\n", + chan->channel, ch->cfg.calibration_gain); + } + + return 0; +} + +static ssize_t ad7124_write_syscalib(struct iio_dev *indio_dev, + uintptr_t private, + const struct iio_chan_spec *chan, + const char *buf, size_t len) +{ + struct ad7124_state *st = iio_priv(indio_dev); + bool sys_calib; + int ret; + + ret = kstrtobool(buf, &sys_calib); + if (ret) + return ret; + + if (!sys_calib) + return len; + + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + + ret = ad7124_syscalib_locked(st, chan); + + iio_device_release_direct(indio_dev); + + return ret ?: len; +} + +static const char * const ad7124_syscalib_modes[] = { + [AD7124_SYSCALIB_ZERO_SCALE] = "zero_scale", + [AD7124_SYSCALIB_FULL_SCALE] = "full_scale", +}; + +static int ad7124_set_syscalib_mode(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + unsigned int mode) +{ + struct ad7124_state *st = iio_priv(indio_dev); + + st->channels[chan->channel].syscalib_mode = mode; + + return 0; +} + +static int ad7124_get_syscalib_mode(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct ad7124_state *st = iio_priv(indio_dev); + + return st->channels[chan->channel].syscalib_mode; +} + +static const struct iio_enum ad7124_syscalib_mode_enum = { + .items = ad7124_syscalib_modes, + .num_items = ARRAY_SIZE(ad7124_syscalib_modes), + .set = ad7124_set_syscalib_mode, + .get = ad7124_get_syscalib_mode +}; + +static const struct iio_chan_spec_ext_info ad7124_calibsys_ext_info[] = { + { + .name = "sys_calibration", + .write = ad7124_write_syscalib, + .shared = IIO_SEPARATE, + }, + IIO_ENUM("sys_calibration_mode", IIO_SEPARATE, + &ad7124_syscalib_mode_enum), + IIO_ENUM_AVAILABLE("sys_calibration_mode", IIO_SHARED_BY_TYPE, + &ad7124_syscalib_mode_enum), + { } +}; + +static const struct iio_chan_spec ad7124_channel_template = { + .type = IIO_VOLTAGE, + .indexed = 1, + .differential = 1, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ) | + BIT(IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY), + .scan_type = { + .sign = 'u', + .realbits = 24, + .storagebits = 32, + .endianness = IIO_BE, + }, + .ext_info = ad7124_calibsys_ext_info, +}; + /* * Input specifiers 8 - 15 are explicitly reserved for ad7124-4 * while they are fine for ad7124-8. Values above 31 don't fit From ecd5b508c10b1e4c6b5196ee7064e4d014044d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 4 Mar 2025 10:41:09 +0100 Subject: [PATCH 0712/2157] iio: adc: ad7124: Benefit of dev = indio_dev->dev.parent in ad7124_parse_channel_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a6eaf02b8274 ("iio: adc: ad7124: Switch from of specific to fwnode based property handling") the function ad7124_parse_channel_config() has a parameter `dev` that holds the value `indio_dev->dev.parent`. Make use of that to shorten two code lines. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/v7l2skqj65vbku3ebjsfndfj3atl6iqpodamios2do6q6kcagf@whmuir6fwede Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index ad27522f429f..3ea81a98e455 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -1060,12 +1060,12 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev, /* Add one for temperature */ st->num_channels = min(num_channels + 1, AD7124_MAX_CHANNELS); - chan = devm_kcalloc(indio_dev->dev.parent, st->num_channels, + chan = devm_kcalloc(dev, st->num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - channels = devm_kcalloc(indio_dev->dev.parent, st->num_channels, sizeof(*channels), + channels = devm_kcalloc(dev, st->num_channels, sizeof(*channels), GFP_KERNEL); if (!channels) return -ENOMEM; From 8236644f5ecb180e80ad92d691c22bc509b747bb Mon Sep 17 00:00:00 2001 From: Sergiu Cuciurean Date: Thu, 6 Mar 2025 18:00:29 -0300 Subject: [PATCH 0713/2157] iio: adc: ad7768-1: Fix conversion result sign The ad7768-1 ADC output code is two's complement, meaning that the voltage conversion result is a signed value.. Since the value is a 24 bit one, stored in a 32 bit variable, the sign should be extended in order to get the correct representation. Also the channel description has been updated to signed representation, to match the ADC specifications. Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support") Reviewed-by: David Lechner Reviewed-by: Marcelo Schmitt Signed-off-by: Sergiu Cuciurean Signed-off-by: Jonathan Santos Cc: Link: https://patch.msgid.link/505994d3b71c2aa38ba714d909a68e021f12124c.1741268122.git.Jonathan.Santos@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index ea829c51e80b..09e7cccfd51c 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -142,7 +142,7 @@ static const struct iio_chan_spec ad7768_channels[] = { .channel = 0, .scan_index = 0, .scan_type = { - .sign = 'u', + .sign = 's', .realbits = 24, .storagebits = 32, .shift = 8, @@ -373,7 +373,7 @@ static int ad7768_read_raw(struct iio_dev *indio_dev, iio_device_release_direct(indio_dev); if (ret < 0) return ret; - *val = ret; + *val = sign_extend32(ret, chan->scan_type.realbits - 1); return IIO_VAL_INT; From 2416cec859299be04d021b4cf98eff814f345af7 Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Thu, 6 Mar 2025 18:00:43 -0300 Subject: [PATCH 0714/2157] iio: adc: ad7768-1: set MOSI idle state to prevent accidental reset Datasheet recommends Setting the MOSI idle state to high in order to prevent accidental reset of the device when SCLK is free running. This happens when the controller clocks out a 1 followed by 63 zeros while the CS is held low. Check if SPI controller supports SPI_MOSI_IDLE_HIGH flag and set it. Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support") Signed-off-by: Jonathan Santos Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/c2a2b0f3d54829079763a5511359a1fa80516cfb.1741268122.git.Jonathan.Santos@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 09e7cccfd51c..a75247cdcaa7 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -572,6 +572,21 @@ static int ad7768_probe(struct spi_device *spi) return -ENOMEM; st = iio_priv(indio_dev); + /* + * Datasheet recommends SDI line to be kept high when data is not being + * clocked out of the controller and the spi clock is free running, + * to prevent accidental reset. + * Since many controllers do not support the SPI_MOSI_IDLE_HIGH flag + * yet, only request the MOSI idle state to enable if the controller + * supports it. + */ + if (spi->controller->mode_bits & SPI_MOSI_IDLE_HIGH) { + spi->mode |= SPI_MOSI_IDLE_HIGH; + ret = spi_setup(spi); + if (ret < 0) + return ret; + } + st->spi = spi; st->vref = devm_regulator_get(&spi->dev, "vref"); From 20182142045fc44ffd883e266d9497396cc0115c Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Thu, 6 Mar 2025 18:01:37 -0300 Subject: [PATCH 0715/2157] Documentation: ABI: add wideband filter type to sysfs-bus-iio The Wideband Low Ripple filter is used for AD7768-1 Driver. Document wideband filter option into filter_type_available attribute. Signed-off-by: Jonathan Santos Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/b390ec6d92dd742ace93bd8e40a0df4379b98e23.1741268122.git.Jonathan.Santos@analog.com Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index b8838cb92d38..722aa989baac 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -2290,6 +2290,8 @@ Description: * "sinc3+pf2" - Sinc3 + device specific Post Filter 2. * "sinc3+pf3" - Sinc3 + device specific Post Filter 3. * "sinc3+pf4" - Sinc3 + device specific Post Filter 4. + * "wideband" - filter with wideband low ripple passband + and sharp transition band. What: /sys/bus/iio/devices/iio:deviceX/filter_type What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_type From 809d605d138032d63013838cf28188414d842919 Mon Sep 17 00:00:00 2001 From: Jonathan Santos Date: Thu, 6 Mar 2025 18:01:51 -0300 Subject: [PATCH 0716/2157] iio: adc: ad7768-1: remove unnecessary locking The current locking is only preventing a triggered buffer Transfer and a debugfs register access from happening at the same time. If a register access happens during a buffered read, the action is doomed to fail anyway, since we need to write a magic value to exit continuous read mode. Remove locking from the trigger handler and use iio_device_claim_direct() instead in the register access function. Reviewed-by: David Lechner Signed-off-by: Jonathan Santos Link: https://patch.msgid.link/d0450b7c5d8467e54913ef905f6147baa2b866b3.1741268122.git.Jonathan.Santos@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index a75247cdcaa7..5a863005aca6 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -154,7 +154,6 @@ static const struct iio_chan_spec ad7768_channels[] = { struct ad7768_state { struct spi_device *spi; struct regulator *vref; - struct mutex lock; struct clk *mclk; unsigned int mclk_freq; unsigned int samp_freq; @@ -256,18 +255,20 @@ static int ad7768_reg_access(struct iio_dev *indio_dev, struct ad7768_state *st = iio_priv(indio_dev); int ret; - mutex_lock(&st->lock); + if (!iio_device_claim_direct(indio_dev)) + return -EBUSY; + if (readval) { ret = ad7768_spi_reg_read(st, reg, 1); if (ret < 0) - goto err_unlock; + goto err_release; *readval = ret; ret = 0; } else { ret = ad7768_spi_reg_write(st, reg, writeval); } -err_unlock: - mutex_unlock(&st->lock); +err_release: + iio_device_release_direct(indio_dev); return ret; } @@ -469,18 +470,15 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p) struct ad7768_state *st = iio_priv(indio_dev); int ret; - mutex_lock(&st->lock); - ret = spi_read(st->spi, &st->data.scan.chan, 3); if (ret < 0) - goto err_unlock; + goto out; iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan, iio_get_time_ns(indio_dev)); -err_unlock: +out: iio_trigger_notify_done(indio_dev->trig); - mutex_unlock(&st->lock); return IRQ_HANDLED; } @@ -609,8 +607,6 @@ static int ad7768_probe(struct spi_device *spi) st->mclk_freq = clk_get_rate(st->mclk); - mutex_init(&st->lock); - indio_dev->channels = ad7768_channels; indio_dev->num_channels = ARRAY_SIZE(ad7768_channels); indio_dev->name = spi_get_device_id(spi)->name; From 352f4a6bb1d819c9890317ab6325bfa67f6b9a73 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 6 Mar 2025 12:08:25 -0500 Subject: [PATCH 0717/2157] dt-bindings: iio: adc: Add i.MX94 and i.MX95 support Add compatible string "nxp,imx94-adc" and "nxp,imx95-adc", which is backward compatible with i.MX93. Set it to fall back to "nxp,imx93-adc". Signed-off-by: Frank Li Acked-by: Conor Dooley Link: https://patch.msgid.link/20250306170825.239933-1-Frank.Li@nxp.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/nxp,imx93-adc.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml index dfc3f512918f..c2e5ff418920 100644 --- a/Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml @@ -19,7 +19,14 @@ description: properties: compatible: - const: nxp,imx93-adc + oneOf: + - enum: + - nxp,imx93-adc + - items: + - enum: + - nxp,imx94-adc + - nxp,imx95-adc + - const: nxp,imx93-adc reg: maxItems: 1 From 85a1159e4c0eba614b17cddb929f871de3a8bff5 Mon Sep 17 00:00:00 2001 From: Jun Yan Date: Thu, 6 Mar 2025 22:57:40 +0800 Subject: [PATCH 0718/2157] iio: gyro: bmg160_spi: add of_match_table Add the missing of_match_table to bmg160_spi driver to enhance devicetree compatibility. Signed-off-by: Jun Yan Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250306145740.32687-1-jerrysteve1101@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_spi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/gyro/bmg160_spi.c b/drivers/iio/gyro/bmg160_spi.c index fc2e453527b9..ac04b3b1b554 100644 --- a/drivers/iio/gyro/bmg160_spi.c +++ b/drivers/iio/gyro/bmg160_spi.c @@ -41,9 +41,19 @@ static const struct spi_device_id bmg160_spi_id[] = { MODULE_DEVICE_TABLE(spi, bmg160_spi_id); +static const struct of_device_id bmg160_of_match[] = { + { .compatible = "bosch,bmg160" }, + { .compatible = "bosch,bmi055_gyro" }, + { .compatible = "bosch,bmi088_gyro" }, + { } +}; + +MODULE_DEVICE_TABLE(of, bmg160_of_match); + static struct spi_driver bmg160_spi_driver = { .driver = { .name = "bmg160_spi", + .of_match_table = bmg160_of_match, .pm = &bmg160_pm_ops, }, .probe = bmg160_spi_probe, From 33220e15ecc713e7aa5e1f8f91a46d0a548904c7 Mon Sep 17 00:00:00 2001 From: Saalim Quadri Date: Thu, 6 Mar 2025 05:30:40 +0530 Subject: [PATCH 0719/2157] staging: iio: ad9832: Use devm_regulator_get_enable() The regulators are only enabled at probe(), hence replace the boilerplate code by making use of devm_regulator_get_enable() helper. Signed-off-by: Saalim Quadri Link: https://patch.msgid.link/20250306000040.1550656-1-danascape@gmail.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/frequency/ad9832.c | 37 +++----------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/staging/iio/frequency/ad9832.c b/drivers/staging/iio/frequency/ad9832.c index 140ee4f9c137..db42810c7664 100644 --- a/drivers/staging/iio/frequency/ad9832.c +++ b/drivers/staging/iio/frequency/ad9832.c @@ -74,8 +74,6 @@ /** * struct ad9832_state - driver instance specific data * @spi: spi_device - * @avdd: supply regulator for the analog section - * @dvdd: supply regulator for the digital section * @mclk: external master clock * @ctrl_fp: cached frequency/phase control word * @ctrl_ss: cached sync/selsrc control word @@ -94,8 +92,6 @@ struct ad9832_state { struct spi_device *spi; - struct regulator *avdd; - struct regulator *dvdd; struct clk *mclk; unsigned short ctrl_fp; unsigned short ctrl_ss; @@ -297,11 +293,6 @@ static const struct iio_info ad9832_info = { .attrs = &ad9832_attribute_group, }; -static void ad9832_reg_disable(void *reg) -{ - regulator_disable(reg); -} - static int ad9832_probe(struct spi_device *spi) { struct ad9832_platform_data *pdata = dev_get_platdata(&spi->dev); @@ -320,33 +311,13 @@ static int ad9832_probe(struct spi_device *spi) st = iio_priv(indio_dev); - st->avdd = devm_regulator_get(&spi->dev, "avdd"); - if (IS_ERR(st->avdd)) - return PTR_ERR(st->avdd); - - ret = regulator_enable(st->avdd); - if (ret) { - dev_err(&spi->dev, "Failed to enable specified AVDD supply\n"); - return ret; - } - - ret = devm_add_action_or_reset(&spi->dev, ad9832_reg_disable, st->avdd); + ret = devm_regulator_get_enable(&spi->dev, "avdd"); if (ret) - return ret; + return dev_err_probe(&spi->dev, ret, "failed to enable specified AVDD voltage\n"); - st->dvdd = devm_regulator_get(&spi->dev, "dvdd"); - if (IS_ERR(st->dvdd)) - return PTR_ERR(st->dvdd); - - ret = regulator_enable(st->dvdd); - if (ret) { - dev_err(&spi->dev, "Failed to enable specified DVDD supply\n"); - return ret; - } - - ret = devm_add_action_or_reset(&spi->dev, ad9832_reg_disable, st->dvdd); + ret = devm_regulator_get_enable(&spi->dev, "dvdd"); if (ret) - return ret; + return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVDD supply\n"); st->mclk = devm_clk_get_enabled(&spi->dev, "mclk"); if (IS_ERR(st->mclk)) From cc26591e7942f1beb04f3fa6309cb6ee3de1e1b1 Mon Sep 17 00:00:00 2001 From: Saalim Quadri Date: Thu, 6 Mar 2025 05:34:59 +0530 Subject: [PATCH 0720/2157] staging: iio: ad9834: Use devm_regulator_get_enable() The regulators are only enabled at probe(), hence replace the boilerplate code by making use of devm_regulator_get_enable() helper. Signed-off-by: Saalim Quadri Link: https://patch.msgid.link/20250306000459.1554007-1-danascape@gmail.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/frequency/ad9834.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c index 6e99e008c5f4..50413da2aa65 100644 --- a/drivers/staging/iio/frequency/ad9834.c +++ b/drivers/staging/iio/frequency/ad9834.c @@ -387,33 +387,15 @@ static const struct iio_info ad9833_info = { .attrs = &ad9833_attribute_group, }; -static void ad9834_disable_reg(void *data) -{ - struct regulator *reg = data; - - regulator_disable(reg); -} - static int ad9834_probe(struct spi_device *spi) { struct ad9834_state *st; struct iio_dev *indio_dev; - struct regulator *reg; int ret; - reg = devm_regulator_get(&spi->dev, "avdd"); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - ret = regulator_enable(reg); - if (ret) { - dev_err(&spi->dev, "Failed to enable specified AVDD supply\n"); - return ret; - } - - ret = devm_add_action_or_reset(&spi->dev, ad9834_disable_reg, reg); + ret = devm_regulator_get_enable(&spi->dev, "avdd"); if (ret) - return ret; + return dev_err_probe(&spi->dev, ret, "Failed to enable specified AVDD supply\n"); indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st)); if (!indio_dev) { From 8d534275f7400d490d16ed81d08f5ae6bda09c04 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 3 Mar 2025 13:33:21 +0200 Subject: [PATCH 0721/2157] iio: adc: ti-ads7924 Drop unnecessary function parameters Device pointer is the only variable which is used by the ads7924_get_channels_config() and which is declared outside this function. Still, the function gets the iio_device and i2c_client as parameters. The sole caller of this function (probe) already has the device pointer which it can directly pass to the function. Simplify code by passing the device pointer directly as a parameter instead of digging it from the iio_device's private data. Signed-off-by: Matti Vaittinen Link: https://patch.msgid.link/2bb4c61122eca2f3a35f6087e7d9815675013f66.1740993491.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads7924.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/ti-ads7924.c b/drivers/iio/adc/ti-ads7924.c index 66b54c0d75aa..b1f745f75dbe 100644 --- a/drivers/iio/adc/ti-ads7924.c +++ b/drivers/iio/adc/ti-ads7924.c @@ -251,11 +251,8 @@ static const struct iio_info ads7924_info = { .read_raw = ads7924_read_raw, }; -static int ads7924_get_channels_config(struct i2c_client *client, - struct iio_dev *indio_dev) +static int ads7924_get_channels_config(struct device *dev) { - struct ads7924_data *priv = iio_priv(indio_dev); - struct device *dev = priv->dev; struct fwnode_handle *node; int num_channels = 0; @@ -380,7 +377,7 @@ static int ads7924_probe(struct i2c_client *client) indio_dev->num_channels = ARRAY_SIZE(ads7924_channels); indio_dev->info = &ads7924_info; - ret = ads7924_get_channels_config(client, indio_dev); + ret = ads7924_get_channels_config(dev); if (ret < 0) return dev_err_probe(dev, ret, "failed to get channels configuration\n"); From ee735aa33db16c1fb5ebccbaf84ad38f5583f3cc Mon Sep 17 00:00:00 2001 From: Karan Sanghavi Date: Thu, 20 Feb 2025 17:34:36 +0000 Subject: [PATCH 0722/2157] iio: light: Add check for array bounds in veml6075_read_int_time_ms The array contains only 5 elements, but the index calculated by veml6075_read_int_time_index can range from 0 to 7, which could lead to out-of-bounds access. The check prevents this issue. Coverity Issue CID 1574309: (#1 of 1): Out-of-bounds read (OVERRUN) overrun-local: Overrunning array veml6075_it_ms of 5 4-byte elements at element index 7 (byte offset 31) using index int_index (which evaluates to 7) This is hardening against potentially broken hardware. Good to have but not necessary to backport. Fixes: 3b82f43238ae ("iio: light: add VEML6075 UVA and UVB light sensor driver") Signed-off-by: Karan Sanghavi Reviewed-by: Javier Carrasco Link: https://patch.msgid.link/Z7dnrEpKQdRZ2qFU@Emma Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6075.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/veml6075.c b/drivers/iio/light/veml6075.c index 05d4c0e9015d..859891e8f115 100644 --- a/drivers/iio/light/veml6075.c +++ b/drivers/iio/light/veml6075.c @@ -195,13 +195,17 @@ static int veml6075_read_uv_direct(struct veml6075_data *data, int chan, static int veml6075_read_int_time_index(struct veml6075_data *data) { - int ret, conf; + int ret, conf, int_index; ret = regmap_read(data->regmap, VEML6075_CMD_CONF, &conf); if (ret < 0) return ret; - return FIELD_GET(VEML6075_CONF_IT, conf); + int_index = FIELD_GET(VEML6075_CONF_IT, conf); + if (int_index >= ARRAY_SIZE(veml6075_it_ms)) + return -EINVAL; + + return int_index; } static int veml6075_read_int_time_ms(struct veml6075_data *data, int *val) From bbeaec81a03e71a34ebb0a61239c1aebb7b106df Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 10 Mar 2025 18:39:52 +0100 Subject: [PATCH 0723/2157] iio: ad7380: add support for SPI offload Add support for SPI offload to the ad7380 driver. SPI offload allows sampling data at the max sample rate (2MSPS with one SDO line). This is developed and tested against the ADI example FPGA design for this family of ADCs [1]. [1]: http://analogdevicesinc.github.io/hdl/projects/ad738x_fmc/index.html Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250310-wip-bl-spi-offload-ad7380-v4-1-b184b37b7c72@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 + drivers/iio/adc/ad7380.c | 515 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 479 insertions(+), 38 deletions(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index b7ae6e0ae0df..6529df1a498c 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -228,7 +228,9 @@ config AD7298 config AD7380 tristate "Analog Devices AD7380 ADC driver" depends on SPI_MASTER + select SPI_OFFLOAD select IIO_BUFFER + select IIO_BUFFER_DMAENGINE select IIO_TRIGGER select IIO_TRIGGERED_BUFFER help diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 407930f1f5dd..4fcb49fdf566 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -16,6 +16,9 @@ * adaq4370-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4370-4.pdf * adaq4380-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4380-4.pdf * adaq4381-4 : https://www.analog.com/media/en/technical-documentation/data-sheets/adaq4381-4.pdf + * + * HDL ad738x_fmc: https://analogdevicesinc.github.io/hdl/projects/ad738x_fmc/index.html + * */ #include @@ -30,11 +33,13 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -93,6 +98,12 @@ #define AD7380_NUM_SDO_LINES 1 #define AD7380_DEFAULT_GAIN_MILLI 1000 +/* + * Using SPI offload, storagebits is always 32, so can't be used to compute struct + * spi_transfer.len. Using realbits instead. + */ +#define AD7380_SPI_BYTES(scan_type) ((scan_type)->realbits > 16 ? 4 : 2) + struct ad7380_timing_specs { const unsigned int t_csh_ns; /* CS minimum high time */ }; @@ -100,6 +111,7 @@ struct ad7380_timing_specs { struct ad7380_chip_info { const char *name; const struct iio_chan_spec *channels; + const struct iio_chan_spec *offload_channels; unsigned int num_channels; unsigned int num_simult_channels; bool has_hardware_gain; @@ -112,6 +124,7 @@ struct ad7380_chip_info { unsigned int num_vcm_supplies; const unsigned long *available_scan_masks; const struct ad7380_timing_specs *timing_specs; + u32 max_conversion_rate_hz; }; static const struct iio_event_spec ad7380_events[] = { @@ -217,6 +230,91 @@ static const struct iio_scan_type ad7380_scan_type_16_u[] = { }, }; +/* + * Defining here scan types for offload mode, since with current available HDL + * only a value of 32 for storagebits is supported. + */ + +/* Extended scan types for 12-bit unsigned chips, offload support. */ +static const struct iio_scan_type ad7380_scan_type_12_u_offload[] = { + [AD7380_SCAN_TYPE_NORMAL] = { + .sign = 'u', + .realbits = 12, + .storagebits = 32, + .endianness = IIO_CPU, + }, + [AD7380_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 'u', + .realbits = 14, + .storagebits = 32, + .endianness = IIO_CPU, + }, +}; + +/* Extended scan types for 14-bit signed chips, offload support. */ +static const struct iio_scan_type ad7380_scan_type_14_s_offload[] = { + [AD7380_SCAN_TYPE_NORMAL] = { + .sign = 's', + .realbits = 14, + .storagebits = 32, + .endianness = IIO_CPU, + }, + [AD7380_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 's', + .realbits = 16, + .storagebits = 32, + .endianness = IIO_CPU, + }, +}; + +/* Extended scan types for 14-bit unsigned chips, offload support. */ +static const struct iio_scan_type ad7380_scan_type_14_u_offload[] = { + [AD7380_SCAN_TYPE_NORMAL] = { + .sign = 'u', + .realbits = 14, + .storagebits = 32, + .endianness = IIO_CPU, + }, + [AD7380_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 'u', + .realbits = 16, + .storagebits = 32, + .endianness = IIO_CPU, + }, +}; + +/* Extended scan types for 16-bit signed_chips, offload support. */ +static const struct iio_scan_type ad7380_scan_type_16_s_offload[] = { + [AD7380_SCAN_TYPE_NORMAL] = { + .sign = 's', + .realbits = 16, + .storagebits = 32, + .endianness = IIO_CPU, + }, + [AD7380_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 's', + .realbits = 18, + .storagebits = 32, + .endianness = IIO_CPU, + }, +}; + +/* Extended scan types for 16-bit unsigned chips, offload support. */ +static const struct iio_scan_type ad7380_scan_type_16_u_offload[] = { + [AD7380_SCAN_TYPE_NORMAL] = { + .sign = 'u', + .realbits = 16, + .storagebits = 32, + .endianness = IIO_CPU, + }, + [AD7380_SCAN_TYPE_RESOLUTION_BOOST] = { + .sign = 'u', + .realbits = 18, + .storagebits = 32, + .endianness = IIO_CPU, + }, +}; + #define _AD7380_CHANNEL(index, bits, diff, sign, gain) { \ .type = IIO_VOLTAGE, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ @@ -238,48 +336,123 @@ static const struct iio_scan_type ad7380_scan_type_16_u[] = { .num_event_specs = ARRAY_SIZE(ad7380_events), \ } +#define _AD7380_OFFLOAD_CHANNEL(index, bits, diff, sign, gain) { \ + .type = IIO_VOLTAGE, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + ((gain) ? BIT(IIO_CHAN_INFO_SCALE) : 0) | \ + ((diff) ? 0 : BIT(IIO_CHAN_INFO_OFFSET)), \ + .info_mask_shared_by_type = ((gain) ? 0 : BIT(IIO_CHAN_INFO_SCALE)) | \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO) | \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .info_mask_shared_by_type_available = \ + BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO) | \ + BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .indexed = 1, \ + .differential = (diff), \ + .channel = (diff) ? (2 * (index)) : (index), \ + .channel2 = (diff) ? (2 * (index) + 1) : 0, \ + .scan_index = (index), \ + .has_ext_scan_type = 1, \ + .ext_scan_type = ad7380_scan_type_##bits##_##sign##_offload, \ + .num_ext_scan_type = \ + ARRAY_SIZE(ad7380_scan_type_##bits##_##sign##_offload), \ + .event_spec = ad7380_events, \ + .num_event_specs = ARRAY_SIZE(ad7380_events), \ +} + +/* + * Notes on the offload channels: + * - There is no soft timestamp since everything is done in hardware. + * - There is a sampling frequency attribute added. This controls the SPI + * offload trigger. + * - The storagebits value depends on the SPI offload provider. Currently there + * is only one supported provider, namely the ADI PULSAR ADC HDL project, + * which always uses 32-bit words for data values, even for <= 16-bit ADCs. + * So the value is just hardcoded to 32 for now. + */ + #define AD7380_CHANNEL(index, bits, diff, sign) \ _AD7380_CHANNEL(index, bits, diff, sign, false) #define ADAQ4380_CHANNEL(index, bits, diff, sign) \ _AD7380_CHANNEL(index, bits, diff, sign, true) -#define DEFINE_AD7380_2_CHANNEL(name, bits, diff, sign) \ -static const struct iio_chan_spec name[] = { \ - AD7380_CHANNEL(0, bits, diff, sign), \ - AD7380_CHANNEL(1, bits, diff, sign), \ - IIO_CHAN_SOFT_TIMESTAMP(2), \ +#define DEFINE_AD7380_2_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_CHANNEL(0, bits, diff, sign), \ + AD7380_CHANNEL(1, bits, diff, sign), \ + IIO_CHAN_SOFT_TIMESTAMP(2), \ } -#define DEFINE_AD7380_4_CHANNEL(name, bits, diff, sign) \ -static const struct iio_chan_spec name[] = { \ - AD7380_CHANNEL(0, bits, diff, sign), \ - AD7380_CHANNEL(1, bits, diff, sign), \ - AD7380_CHANNEL(2, bits, diff, sign), \ - AD7380_CHANNEL(3, bits, diff, sign), \ - IIO_CHAN_SOFT_TIMESTAMP(4), \ +#define DEFINE_AD7380_4_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_CHANNEL(0, bits, diff, sign), \ + AD7380_CHANNEL(1, bits, diff, sign), \ + AD7380_CHANNEL(2, bits, diff, sign), \ + AD7380_CHANNEL(3, bits, diff, sign), \ + IIO_CHAN_SOFT_TIMESTAMP(4), \ } -#define DEFINE_ADAQ4380_4_CHANNEL(name, bits, diff, sign) \ -static const struct iio_chan_spec name[] = { \ - ADAQ4380_CHANNEL(0, bits, diff, sign), \ - ADAQ4380_CHANNEL(1, bits, diff, sign), \ - ADAQ4380_CHANNEL(2, bits, diff, sign), \ - ADAQ4380_CHANNEL(3, bits, diff, sign), \ - IIO_CHAN_SOFT_TIMESTAMP(4), \ +#define DEFINE_ADAQ4380_4_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + ADAQ4380_CHANNEL(0, bits, diff, sign), \ + ADAQ4380_CHANNEL(1, bits, diff, sign), \ + ADAQ4380_CHANNEL(2, bits, diff, sign), \ + ADAQ4380_CHANNEL(3, bits, diff, sign), \ + IIO_CHAN_SOFT_TIMESTAMP(4), \ } -#define DEFINE_AD7380_8_CHANNEL(name, bits, diff, sign) \ +#define DEFINE_AD7380_8_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_CHANNEL(0, bits, diff, sign), \ + AD7380_CHANNEL(1, bits, diff, sign), \ + AD7380_CHANNEL(2, bits, diff, sign), \ + AD7380_CHANNEL(3, bits, diff, sign), \ + AD7380_CHANNEL(4, bits, diff, sign), \ + AD7380_CHANNEL(5, bits, diff, sign), \ + AD7380_CHANNEL(6, bits, diff, sign), \ + AD7380_CHANNEL(7, bits, diff, sign), \ + IIO_CHAN_SOFT_TIMESTAMP(8), \ +} + +#define AD7380_OFFLOAD_CHANNEL(index, bits, diff, sign) \ +_AD7380_OFFLOAD_CHANNEL(index, bits, diff, sign, false) + +#define ADAQ4380_OFFLOAD_CHANNEL(index, bits, diff, sign) \ +_AD7380_OFFLOAD_CHANNEL(index, bits, diff, sign, true) + +#define DEFINE_AD7380_2_OFFLOAD_CHANNEL(name, bits, diff, sign) \ static const struct iio_chan_spec name[] = { \ - AD7380_CHANNEL(0, bits, diff, sign), \ - AD7380_CHANNEL(1, bits, diff, sign), \ - AD7380_CHANNEL(2, bits, diff, sign), \ - AD7380_CHANNEL(3, bits, diff, sign), \ - AD7380_CHANNEL(4, bits, diff, sign), \ - AD7380_CHANNEL(5, bits, diff, sign), \ - AD7380_CHANNEL(6, bits, diff, sign), \ - AD7380_CHANNEL(7, bits, diff, sign), \ - IIO_CHAN_SOFT_TIMESTAMP(8), \ + AD7380_OFFLOAD_CHANNEL(0, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(1, bits, diff, sign), \ +} + +#define DEFINE_AD7380_4_OFFLOAD_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_OFFLOAD_CHANNEL(0, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(1, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(2, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(3, bits, diff, sign), \ +} + +#define DEFINE_ADAQ4380_4_OFFLOAD_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_OFFLOAD_CHANNEL(0, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(1, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(2, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(3, bits, diff, sign), \ +} + +#define DEFINE_AD7380_8_OFFLOAD_CHANNEL(name, bits, diff, sign) \ +static const struct iio_chan_spec name[] = { \ + AD7380_OFFLOAD_CHANNEL(0, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(1, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(2, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(3, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(4, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(5, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(6, bits, diff, sign), \ + AD7380_OFFLOAD_CHANNEL(7, bits, diff, sign), \ } /* fully differential */ @@ -303,6 +476,28 @@ DEFINE_AD7380_8_CHANNEL(ad7386_4_channels, 16, 0, u); DEFINE_AD7380_8_CHANNEL(ad7387_4_channels, 14, 0, u); DEFINE_AD7380_8_CHANNEL(ad7388_4_channels, 12, 0, u); +/* offload channels */ +DEFINE_AD7380_2_OFFLOAD_CHANNEL(ad7380_offload_channels, 16, 1, s); +DEFINE_AD7380_2_OFFLOAD_CHANNEL(ad7381_offload_channels, 14, 1, s); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7380_4_offload_channels, 16, 1, s); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7381_4_offload_channels, 14, 1, s); +DEFINE_ADAQ4380_4_OFFLOAD_CHANNEL(adaq4380_4_offload_channels, 16, 1, s); +DEFINE_ADAQ4380_4_OFFLOAD_CHANNEL(adaq4381_4_offload_channels, 14, 1, s); + +/* pseudo differential */ +DEFINE_AD7380_2_OFFLOAD_CHANNEL(ad7383_offload_channels, 16, 0, s); +DEFINE_AD7380_2_OFFLOAD_CHANNEL(ad7384_offload_channels, 14, 0, s); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7383_4_offload_channels, 16, 0, s); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7384_4_offload_channels, 14, 0, s); + +/* Single ended */ +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7386_offload_channels, 16, 0, u); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7387_offload_channels, 14, 0, u); +DEFINE_AD7380_4_OFFLOAD_CHANNEL(ad7388_offload_channels, 12, 0, u); +DEFINE_AD7380_8_OFFLOAD_CHANNEL(ad7386_4_offload_channels, 16, 0, u); +DEFINE_AD7380_8_OFFLOAD_CHANNEL(ad7387_4_offload_channels, 14, 0, u); +DEFINE_AD7380_8_OFFLOAD_CHANNEL(ad7388_4_offload_channels, 12, 0, u); + static const char * const ad7380_supplies[] = { "vcc", "vlogic", }; @@ -409,28 +604,33 @@ static const int ad7380_gains[] = { static const struct ad7380_chip_info ad7380_chip_info = { .name = "ad7380", .channels = ad7380_channels, + .offload_channels = ad7380_offload_channels, .num_channels = ARRAY_SIZE(ad7380_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, .num_supplies = ARRAY_SIZE(ad7380_supplies), .available_scan_masks = ad7380_2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7381_chip_info = { .name = "ad7381", .channels = ad7381_channels, + .offload_channels = ad7381_offload_channels, .num_channels = ARRAY_SIZE(ad7381_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, .num_supplies = ARRAY_SIZE(ad7380_supplies), .available_scan_masks = ad7380_2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7383_chip_info = { .name = "ad7383", .channels = ad7383_channels, + .offload_channels = ad7383_offload_channels, .num_channels = ARRAY_SIZE(ad7383_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, @@ -439,11 +639,13 @@ static const struct ad7380_chip_info ad7383_chip_info = { .num_vcm_supplies = ARRAY_SIZE(ad7380_2_channel_vcm_supplies), .available_scan_masks = ad7380_2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7384_chip_info = { .name = "ad7384", .channels = ad7384_channels, + .offload_channels = ad7384_offload_channels, .num_channels = ARRAY_SIZE(ad7384_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, @@ -452,11 +654,13 @@ static const struct ad7380_chip_info ad7384_chip_info = { .num_vcm_supplies = ARRAY_SIZE(ad7380_2_channel_vcm_supplies), .available_scan_masks = ad7380_2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7386_chip_info = { .name = "ad7386", .channels = ad7386_channels, + .offload_channels = ad7386_offload_channels, .num_channels = ARRAY_SIZE(ad7386_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, @@ -464,11 +668,13 @@ static const struct ad7380_chip_info ad7386_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7387_chip_info = { .name = "ad7387", .channels = ad7387_channels, + .offload_channels = ad7387_offload_channels, .num_channels = ARRAY_SIZE(ad7387_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, @@ -476,11 +682,13 @@ static const struct ad7380_chip_info ad7387_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7388_chip_info = { .name = "ad7388", .channels = ad7388_channels, + .offload_channels = ad7388_offload_channels, .num_channels = ARRAY_SIZE(ad7388_channels), .num_simult_channels = 2, .supplies = ad7380_supplies, @@ -488,11 +696,13 @@ static const struct ad7380_chip_info ad7388_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x2_channel_scan_masks, .timing_specs = &ad7380_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7380_4_chip_info = { .name = "ad7380-4", .channels = ad7380_4_channels, + .offload_channels = ad7380_4_offload_channels, .num_channels = ARRAY_SIZE(ad7380_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -500,22 +710,26 @@ static const struct ad7380_chip_info ad7380_4_chip_info = { .external_ref_only = true, .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7381_4_chip_info = { .name = "ad7381-4", .channels = ad7381_4_channels, + .offload_channels = ad7381_4_offload_channels, .num_channels = ARRAY_SIZE(ad7381_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, .num_supplies = ARRAY_SIZE(ad7380_supplies), .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7383_4_chip_info = { .name = "ad7383-4", .channels = ad7383_4_channels, + .offload_channels = ad7383_4_offload_channels, .num_channels = ARRAY_SIZE(ad7383_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -524,11 +738,13 @@ static const struct ad7380_chip_info ad7383_4_chip_info = { .num_vcm_supplies = ARRAY_SIZE(ad7380_4_channel_vcm_supplies), .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7384_4_chip_info = { .name = "ad7384-4", .channels = ad7384_4_channels, + .offload_channels = ad7384_4_offload_channels, .num_channels = ARRAY_SIZE(ad7384_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -537,11 +753,13 @@ static const struct ad7380_chip_info ad7384_4_chip_info = { .num_vcm_supplies = ARRAY_SIZE(ad7380_4_channel_vcm_supplies), .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7386_4_chip_info = { .name = "ad7386-4", .channels = ad7386_4_channels, + .offload_channels = ad7386_4_offload_channels, .num_channels = ARRAY_SIZE(ad7386_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -549,11 +767,13 @@ static const struct ad7380_chip_info ad7386_4_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7387_4_chip_info = { .name = "ad7387-4", .channels = ad7387_4_channels, + .offload_channels = ad7387_4_offload_channels, .num_channels = ARRAY_SIZE(ad7387_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -561,11 +781,13 @@ static const struct ad7380_chip_info ad7387_4_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info ad7388_4_chip_info = { .name = "ad7388-4", .channels = ad7388_4_channels, + .offload_channels = ad7388_4_offload_channels, .num_channels = ARRAY_SIZE(ad7388_4_channels), .num_simult_channels = 4, .supplies = ad7380_supplies, @@ -573,11 +795,13 @@ static const struct ad7380_chip_info ad7388_4_chip_info = { .has_mux = true, .available_scan_masks = ad7380_2x4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info adaq4370_4_chip_info = { .name = "adaq4370-4", .channels = adaq4380_4_channels, + .offload_channels = adaq4380_4_offload_channels, .num_channels = ARRAY_SIZE(adaq4380_4_channels), .num_simult_channels = 4, .supplies = adaq4380_supplies, @@ -586,11 +810,13 @@ static const struct ad7380_chip_info adaq4370_4_chip_info = { .has_hardware_gain = true, .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 2 * MEGA, }; static const struct ad7380_chip_info adaq4380_4_chip_info = { .name = "adaq4380-4", .channels = adaq4380_4_channels, + .offload_channels = adaq4380_4_offload_channels, .num_channels = ARRAY_SIZE(adaq4380_4_channels), .num_simult_channels = 4, .supplies = adaq4380_supplies, @@ -599,11 +825,13 @@ static const struct ad7380_chip_info adaq4380_4_chip_info = { .has_hardware_gain = true, .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct ad7380_chip_info adaq4381_4_chip_info = { .name = "adaq4381-4", .channels = adaq4381_4_channels, + .offload_channels = adaq4381_4_offload_channels, .num_channels = ARRAY_SIZE(adaq4381_4_channels), .num_simult_channels = 4, .supplies = adaq4380_supplies, @@ -614,6 +842,11 @@ static const struct ad7380_chip_info adaq4381_4_chip_info = { .timing_specs = &ad7380_4_timing, }; +static const struct spi_offload_config ad7380_offload_config = { + .capability_flags = SPI_OFFLOAD_CAP_TRIGGER | + SPI_OFFLOAD_CAP_RX_STREAM_DMA, +}; + struct ad7380_state { const struct ad7380_chip_info *chip_info; struct spi_device *spi; @@ -629,6 +862,13 @@ struct ad7380_state { struct spi_message normal_msg; struct spi_transfer seq_xfer[4]; struct spi_message seq_msg; + struct spi_transfer offload_xfer; + struct spi_message offload_msg; + struct spi_offload *offload; + struct spi_offload_trigger *offload_trigger; + unsigned long offload_trigger_hz; + + int sample_freq_range[3]; /* * DMA (thus cache coherency maintenance) requires the transfer buffers * to live in their own cache lines. @@ -848,7 +1088,7 @@ static int ad7380_update_xfers(struct ad7380_state *st, xfer[2].bits_per_word = xfer[3].bits_per_word = scan_type->realbits; xfer[2].len = xfer[3].len = - BITS_TO_BYTES(scan_type->storagebits) * + AD7380_SPI_BYTES(scan_type) * st->chip_info->num_simult_channels; xfer[3].rx_buf = xfer[2].rx_buf + xfer[2].len; /* Additional delay required here when oversampling is enabled */ @@ -861,13 +1101,139 @@ static int ad7380_update_xfers(struct ad7380_state *st, xfer[0].delay.value = t_convert; xfer[0].delay.unit = SPI_DELAY_UNIT_NSECS; xfer[1].bits_per_word = scan_type->realbits; - xfer[1].len = BITS_TO_BYTES(scan_type->storagebits) * + xfer[1].len = AD7380_SPI_BYTES(scan_type) * st->chip_info->num_simult_channels; } return 0; } +static int ad7380_set_sample_freq(struct ad7380_state *st, int val) +{ + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = val, + }, + }; + int ret; + + ret = spi_offload_trigger_validate(st->offload_trigger, &config); + if (ret) + return ret; + + st->offload_trigger_hz = config.periodic.frequency_hz; + + return 0; +} + +static int ad7380_init_offload_msg(struct ad7380_state *st, + struct iio_dev *indio_dev) +{ + struct spi_transfer *xfer = &st->offload_xfer; + struct device *dev = &st->spi->dev; + const struct iio_scan_type *scan_type; + int oversampling_ratio; + int ret; + + scan_type = iio_get_current_scan_type(indio_dev, + &indio_dev->channels[0]); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + if (st->chip_info->has_mux) { + int index; + + ret = iio_active_scan_mask_index(indio_dev); + if (ret < 0) + return ret; + + index = ret; + if (index == AD7380_SCAN_MASK_SEQ) { + ret = regmap_set_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, + AD7380_CONFIG1_SEQ); + if (ret) + return ret; + + st->seq = true; + } else { + ret = ad7380_set_ch(st, index); + if (ret) + return ret; + } + } + + ret = ad7380_get_osr(st, &oversampling_ratio); + if (ret) + return ret; + + xfer->bits_per_word = scan_type->realbits; + xfer->offload_flags = SPI_OFFLOAD_XFER_RX_STREAM; + xfer->len = AD7380_SPI_BYTES(scan_type) * st->chip_info->num_simult_channels; + + spi_message_init_with_transfers(&st->offload_msg, xfer, 1); + st->offload_msg.offload = st->offload; + + ret = spi_optimize_message(st->spi, &st->offload_msg); + if (ret) { + dev_err(dev, "failed to prepare offload msg, err: %d\n", + ret); + return ret; + } + + return 0; +} + +static int ad7380_offload_buffer_postenable(struct iio_dev *indio_dev) +{ + struct ad7380_state *st = iio_priv(indio_dev); + struct spi_offload_trigger_config config = { + .type = SPI_OFFLOAD_TRIGGER_PERIODIC, + .periodic = { + .frequency_hz = st->offload_trigger_hz, + }, + }; + int ret; + + ret = ad7380_init_offload_msg(st, indio_dev); + if (ret) + return ret; + + ret = spi_offload_trigger_enable(st->offload, st->offload_trigger, &config); + if (ret) + spi_unoptimize_message(&st->offload_msg); + + return ret; +} + +static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) +{ + struct ad7380_state *st = iio_priv(indio_dev); + int ret; + + if (st->seq) { + ret = regmap_update_bits(st->regmap, + AD7380_REG_ADDR_CONFIG1, + AD7380_CONFIG1_SEQ, + FIELD_PREP(AD7380_CONFIG1_SEQ, 0)); + if (ret) + return ret; + + st->seq = false; + } + + spi_offload_trigger_disable(st->offload, st->offload_trigger); + + spi_unoptimize_message(&st->offload_msg); + + return 0; +} + +static const struct iio_buffer_setup_ops ad7380_offload_buffer_setup_ops = { + .postenable = ad7380_offload_buffer_postenable, + .predisable = ad7380_offload_buffer_predisable, +}; + static int ad7380_triggered_buffer_preenable(struct iio_dev *indio_dev) { struct ad7380_state *st = iio_priv(indio_dev); @@ -998,7 +1364,7 @@ static int ad7380_read_direct(struct ad7380_state *st, unsigned int scan_index, if (ret < 0) return ret; - if (scan_type->storagebits > 16) { + if (scan_type->realbits > 16) { if (scan_type->sign == 's') *val = sign_extend32(*(u32 *)(st->scan_data + 4 * index), scan_type->realbits - 1); @@ -1079,6 +1445,9 @@ static int ad7380_read_raw(struct iio_dev *indio_dev, if (ret) return ret; + return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->offload_trigger_hz; return IIO_VAL_INT; default: return -EINVAL; @@ -1090,6 +1459,8 @@ static int ad7380_read_avail(struct iio_dev *indio_dev, const int **vals, int *type, int *length, long mask) { + struct ad7380_state *st = iio_priv(indio_dev); + switch (mask) { case IIO_CHAN_INFO_OVERSAMPLING_RATIO: *vals = ad7380_oversampling_ratios; @@ -1097,6 +1468,10 @@ static int ad7380_read_avail(struct iio_dev *indio_dev, *type = IIO_VAL_INT; return IIO_AVAIL_LIST; + case IIO_CHAN_INFO_SAMP_FREQ: + *vals = st->sample_freq_range; + *type = IIO_VAL_INT; + return IIO_AVAIL_RANGE; default: return -EINVAL; } @@ -1166,6 +1541,10 @@ static int ad7380_write_raw(struct iio_dev *indio_dev, int ret; switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + if (val < 1) + return -EINVAL; + return ad7380_set_sample_freq(st, val); case IIO_CHAN_INFO_OVERSAMPLING_RATIO: if (!iio_device_claim_direct(indio_dev)) return -EBUSY; @@ -1395,6 +1774,53 @@ static int ad7380_init(struct ad7380_state *st, bool external_ref_en) AD7380_NUM_SDO_LINES)); } +static int ad7380_probe_spi_offload(struct iio_dev *indio_dev, + struct ad7380_state *st) +{ + struct spi_device *spi = st->spi; + struct device *dev = &spi->dev; + struct dma_chan *rx_dma; + int sample_rate, ret; + + indio_dev->setup_ops = &ad7380_offload_buffer_setup_ops; + indio_dev->channels = st->chip_info->offload_channels; + /* Just removing the timestamp channel. */ + indio_dev->num_channels--; + + st->offload_trigger = devm_spi_offload_trigger_get(dev, st->offload, + SPI_OFFLOAD_TRIGGER_PERIODIC); + if (IS_ERR(st->offload_trigger)) + return dev_err_probe(dev, PTR_ERR(st->offload_trigger), + "failed to get offload trigger\n"); + + sample_rate = st->chip_info->max_conversion_rate_hz * + AD7380_NUM_SDO_LINES / st->chip_info->num_simult_channels; + + st->sample_freq_range[0] = 1; /* min */ + st->sample_freq_range[1] = 1; /* step */ + st->sample_freq_range[2] = sample_rate; /* max */ + + /* + * Starting with a quite low frequency, to allow oversampling x32, + * user is then reponsible to adjust the frequency for the specific case. + */ + ret = ad7380_set_sample_freq(st, sample_rate / 32); + if (ret) + return ret; + + rx_dma = devm_spi_offload_rx_stream_request_dma_chan(dev, st->offload); + if (IS_ERR(rx_dma)) + return dev_err_probe(dev, PTR_ERR(rx_dma), + "failed to get offload RX DMA\n"); + + ret = devm_iio_dmaengine_buffer_setup_with_handle(dev, indio_dev, + rx_dma, IIO_BUFFER_DIRECTION_IN); + if (ret) + return dev_err_probe(dev, ret, "cannot setup dma buffer\n"); + + return 0; +} + static int ad7380_probe(struct spi_device *spi) { struct device *dev = &spi->dev; @@ -1566,12 +1992,24 @@ static int ad7380_probe(struct spi_device *spi) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->available_scan_masks = st->chip_info->available_scan_masks; - ret = devm_iio_triggered_buffer_setup(dev, indio_dev, - iio_pollfunc_store_time, - ad7380_trigger_handler, - &ad7380_buffer_setup_ops); - if (ret) - return ret; + st->offload = devm_spi_offload_get(dev, spi, &ad7380_offload_config); + ret = PTR_ERR_OR_ZERO(st->offload); + if (ret && ret != -ENODEV) + return dev_err_probe(dev, ret, "failed to get offload\n"); + + /* If no SPI offload, fall back to low speed usage. */ + if (ret == -ENODEV) { + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, + iio_pollfunc_store_time, + ad7380_trigger_handler, + &ad7380_buffer_setup_ops); + if (ret) + return ret; + } else { + ret = ad7380_probe_spi_offload(indio_dev, st); + if (ret) + return ret; + } ret = ad7380_init(st, external_ref_en); if (ret) @@ -1636,3 +2074,4 @@ module_spi_driver(ad7380_driver); MODULE_AUTHOR("Stefan Popa "); MODULE_DESCRIPTION("Analog Devices AD738x ADC driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("IIO_DMAENGINE_BUFFER"); From 8dbeb413806f9f810d97d25284f585b201aa3bdc Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 10 Mar 2025 18:39:53 +0100 Subject: [PATCH 0724/2157] doc: iio: ad7380: describe offload support Add a section to the ad7380 documentation describing how to use the driver with SPI offloading. Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250310-wip-bl-spi-offload-ad7380-v4-2-b184b37b7c72@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad7380.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/iio/ad7380.rst b/Documentation/iio/ad7380.rst index 35232a0e3ad7..24a92a1c4371 100644 --- a/Documentation/iio/ad7380.rst +++ b/Documentation/iio/ad7380.rst @@ -178,6 +178,24 @@ Unimplemented features - Power down mode - CRC indication +SPI offload support +=================== + +To be able to achieve the maximum sample rate, the driver can be used with the +`AXI SPI Engine`_ to provide SPI offload support. + +.. _AXI SPI Engine: http://analogdevicesinc.github.io/hdl/projects/pulsar_adc/index.html + +When SPI offload is being used, some attributes will be different. + +* ``trigger`` directory is removed. +* ``in_voltage0_sampling_frequency`` attribute is added for setting the sample + rate. +* ``in_voltage0_sampling_frequency_available`` attribute is added for querying + the max sample rate. +* ``timestamp`` channel is removed. +* Buffer data format may be different compared to when offload is not used, + e.g. the ``in_voltage0_type`` attribute. Device buffers ============== From b5060c17f9dc13ee740ae940733ec03fdc9847f9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Aug 2024 11:30:20 +0200 Subject: [PATCH 0725/2157] coresight: configfs: Constify struct config_item_type 'struct config_item_type' is not modified in this driver. These structures are only used with config_group_init_type_name() which takes a "const struct config_item_type *" as a 3rd argument or with struct config_group.cg_item.ci_type which is also a "const struct config_item_type *". Constifying this structure moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 4904 1376 136 6416 1910 drivers/hwtracing/coresight/coresight-syscfg-configfs.o After: ===== text data bss dec hex filename 5264 1120 16 6400 1900 drivers/hwtracing/coresight/coresight-syscfg-configfs.o Signed-off-by: Christophe JAILLET Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1011717e5ed35ec12113a0d8c233823e820fb524.1723368522.git.christophe.jaillet@wanadoo.fr --- .../hwtracing/coresight/coresight-syscfg-configfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c index 433ede94dd63..213b4159b062 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c +++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c @@ -160,7 +160,7 @@ static struct configfs_attribute *cscfg_config_view_attrs[] = { NULL, }; -static struct config_item_type cscfg_config_view_type = { +static const struct config_item_type cscfg_config_view_type = { .ct_owner = THIS_MODULE, .ct_attrs = cscfg_config_view_attrs, }; @@ -170,7 +170,7 @@ static struct configfs_attribute *cscfg_config_preset_attrs[] = { NULL, }; -static struct config_item_type cscfg_config_preset_type = { +static const struct config_item_type cscfg_config_preset_type = { .ct_owner = THIS_MODULE, .ct_attrs = cscfg_config_preset_attrs, }; @@ -272,7 +272,7 @@ static struct configfs_attribute *cscfg_feature_view_attrs[] = { NULL, }; -static struct config_item_type cscfg_feature_view_type = { +static const struct config_item_type cscfg_feature_view_type = { .ct_owner = THIS_MODULE, .ct_attrs = cscfg_feature_view_attrs, }; @@ -309,7 +309,7 @@ static struct configfs_attribute *cscfg_param_view_attrs[] = { NULL, }; -static struct config_item_type cscfg_param_view_type = { +static const struct config_item_type cscfg_param_view_type = { .ct_owner = THIS_MODULE, .ct_attrs = cscfg_param_view_attrs, }; @@ -380,7 +380,7 @@ static struct config_group *cscfg_create_feature_group(struct cscfg_feature_desc return &feat_view->group; } -static struct config_item_type cscfg_configs_type = { +static const struct config_item_type cscfg_configs_type = { .ct_owner = THIS_MODULE, }; @@ -414,7 +414,7 @@ void cscfg_configfs_del_config(struct cscfg_config_desc *config_desc) } } -static struct config_item_type cscfg_features_type = { +static const struct config_item_type cscfg_features_type = { .ct_owner = THIS_MODULE, }; From c40524d1615a72548adc30dbfb15c981fd03dacb Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sun, 26 Jan 2025 13:26:30 +0000 Subject: [PATCH 0726/2157] dt-bindings: watchdog: renesas,wdt: Document RZ/G3E support Document the support for the watchdog IP available on RZ/G3E SoC. The watchdog IP available on RZ/G3E SoC is identical to the one found on RZ/V2H SoC. Reviewed-by: Geert Uytterhoeven Acked-by: Krzysztof Kozlowski Reviewed-by: Tommaso Merciai Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20250126132633.31956-2-biju.das.jz@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml index 29ada89fdcdc..3e0a8747a357 100644 --- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml @@ -75,6 +75,10 @@ properties: - renesas,r8a779h0-wdt # R-Car V4M - const: renesas,rcar-gen4-wdt # R-Car Gen4 + - items: + - const: renesas,r9a09g047-wdt # RZ/G3E + - const: renesas,r9a09g057-wdt # RZ/V2H(P) + - const: renesas,r9a09g057-wdt # RZ/V2H(P) reg: From 969a38be437b68dc9e12e3c3f08911c9f9c8be73 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Wed, 12 Mar 2025 16:00:07 +0800 Subject: [PATCH 0727/2157] phy: rockchip: usbdp: Only verify link rates/lanes/voltage when the corresponding set flags are set According documentation of phy_configure_opts_dp, at the configure stage, link rates should only be verify/configure when set_rate flag is set, the same applies to lanes and voltage. So do it as the documentation says. Because voltage setting depends on the lanes, link rates set previously, so record the link rates and lanes at it's verify stage. Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20250312080041.524546-1-andyshrk@163.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-usbdp.c | 105 +++++++++++++--------- 1 file changed, 62 insertions(+), 43 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-usbdp.c b/drivers/phy/rockchip/phy-rockchip-usbdp.c index 5b1e8a3806ed..77561e347c86 100644 --- a/drivers/phy/rockchip/phy-rockchip-usbdp.c +++ b/drivers/phy/rockchip/phy-rockchip-usbdp.c @@ -187,6 +187,8 @@ struct rk_udphy { u32 dp_aux_din_sel; bool dp_sink_hpd_sel; bool dp_sink_hpd_cfg; + unsigned int link_rate; + unsigned int lanes; u8 bw; int id; @@ -1103,13 +1105,35 @@ static int rk_udphy_dp_phy_power_off(struct phy *phy) return 0; } -static int rk_udphy_dp_phy_verify_link_rate(unsigned int link_rate) +/* + * Verify link rate + */ +static int rk_udphy_dp_phy_verify_link_rate(struct rk_udphy *udphy, + struct phy_configure_opts_dp *dp) { - switch (link_rate) { + switch (dp->link_rate) { case 1620: case 2700: case 5400: case 8100: + udphy->link_rate = dp->link_rate; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int rk_udphy_dp_phy_verify_lanes(struct rk_udphy *udphy, + struct phy_configure_opts_dp *dp) +{ + switch (dp->lanes) { + case 1: + case 2: + case 4: + /* valid lane count. */ + udphy->lanes = dp->lanes; break; default: @@ -1119,45 +1143,26 @@ static int rk_udphy_dp_phy_verify_link_rate(unsigned int link_rate) return 0; } -static int rk_udphy_dp_phy_verify_config(struct rk_udphy *udphy, - struct phy_configure_opts_dp *dp) +/* + * If changing voltages is required, check swing and pre-emphasis + * levels, per-lane. + */ +static int rk_udphy_dp_phy_verify_voltages(struct rk_udphy *udphy, + struct phy_configure_opts_dp *dp) { - int i, ret; + int i; - /* If changing link rate was required, verify it's supported. */ - ret = rk_udphy_dp_phy_verify_link_rate(dp->link_rate); - if (ret) - return ret; + /* Lane count verified previously. */ + for (i = 0; i < udphy->lanes; i++) { + if (dp->voltage[i] > 3 || dp->pre[i] > 3) + return -EINVAL; - /* Verify lane count. */ - switch (dp->lanes) { - case 1: - case 2: - case 4: - /* valid lane count. */ - break; - - default: - return -EINVAL; - } - - /* - * If changing voltages is required, check swing and pre-emphasis - * levels, per-lane. - */ - if (dp->set_voltages) { - /* Lane count verified previously. */ - for (i = 0; i < dp->lanes; i++) { - if (dp->voltage[i] > 3 || dp->pre[i] > 3) - return -EINVAL; - - /* - * Sum of voltage swing and pre-emphasis levels cannot - * exceed 3. - */ - if (dp->voltage[i] + dp->pre[i] > 3) - return -EINVAL; - } + /* + * Sum of voltage swing and pre-emphasis levels cannot + * exceed 3. + */ + if (dp->voltage[i] + dp->pre[i] > 3) + return -EINVAL; } return 0; @@ -1197,9 +1202,23 @@ static int rk_udphy_dp_phy_configure(struct phy *phy, u32 i, val, lane; int ret; - ret = rk_udphy_dp_phy_verify_config(udphy, dp); - if (ret) - return ret; + if (dp->set_rate) { + ret = rk_udphy_dp_phy_verify_link_rate(udphy, dp); + if (ret) + return ret; + } + + if (dp->set_lanes) { + ret = rk_udphy_dp_phy_verify_lanes(udphy, dp); + if (ret) + return ret; + } + + if (dp->set_voltages) { + ret = rk_udphy_dp_phy_verify_voltages(udphy, dp); + if (ret) + return ret; + } if (dp->set_rate) { regmap_update_bits(udphy->pma_regmap, CMN_DP_RSTN_OFFSET, @@ -1244,9 +1263,9 @@ static int rk_udphy_dp_phy_configure(struct phy *phy, } if (dp->set_voltages) { - for (i = 0; i < dp->lanes; i++) { + for (i = 0; i < udphy->lanes; i++) { lane = udphy->dp_lane_sel[i]; - switch (dp->link_rate) { + switch (udphy->link_rate) { case 1620: case 2700: regmap_update_bits(udphy->pma_regmap, From 28dc672a1a877c77b000c896abd8f15afcdc1b0c Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Sun, 2 Mar 2025 19:52:25 +0800 Subject: [PATCH 0728/2157] phy: rockchip: usbdp: Avoid call hpd_event_trigger in dp_phy_init Function rk_udphy_dp_hpd_event_trigger will set vogrf let it trigger HPD interrupt to DP by Type-C. This configuration is only required when the DP work in Alternate Mode, and called by typec_mux_set. In standard DP mode, such settings will prevent the DP from receiving HPD interrupts. Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20250302115257.188774-1-andyshrk@163.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-usbdp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-usbdp.c b/drivers/phy/rockchip/phy-rockchip-usbdp.c index 77561e347c86..fff04e0fbd80 100644 --- a/drivers/phy/rockchip/phy-rockchip-usbdp.c +++ b/drivers/phy/rockchip/phy-rockchip-usbdp.c @@ -1047,7 +1047,6 @@ static int rk_udphy_dp_phy_init(struct phy *phy) mutex_lock(&udphy->mutex); udphy->dp_in_use = true; - rk_udphy_dp_hpd_event_trigger(udphy, udphy->dp_sink_hpd_cfg); mutex_unlock(&udphy->mutex); From b52b330046d16f6e4ce94ae6fa349cc30af027b3 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 24 Feb 2025 23:03:39 +0100 Subject: [PATCH 0729/2157] phy: rockchip: usbdp: Remove unnecessary bool conversion Remove the unnecessary bool conversion and simplify the code. Signed-off-by: Thorsten Blum Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250224220339.199180-2-thorsten.blum@linux.dev Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-usbdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-usbdp.c b/drivers/phy/rockchip/phy-rockchip-usbdp.c index fff04e0fbd80..c066cc0a7b4f 100644 --- a/drivers/phy/rockchip/phy-rockchip-usbdp.c +++ b/drivers/phy/rockchip/phy-rockchip-usbdp.c @@ -980,7 +980,7 @@ static int rk_udphy_parse_dt(struct rk_udphy *udphy) if (device_property_present(dev, "maximum-speed")) { maximum_speed = usb_get_maximum_speed(dev); - udphy->hs = maximum_speed <= USB_SPEED_HIGH ? true : false; + udphy->hs = maximum_speed <= USB_SPEED_HIGH; } ret = rk_udphy_clk_init(udphy, dev); From d1ca8698ca1332625d83ea0d753747be66f9906d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Mar 2025 19:26:31 -0500 Subject: [PATCH 0730/2157] spufs: fix a leak on spufs_new_file() failure It's called from spufs_fill_dir(), and caller of that will do spufs_rmdir() in case of failure. That does remove everything we'd managed to create, but... the problem dentry is still negative. IOW, it needs to be explicitly dropped. Fixes: 3f51dd91c807 "[PATCH] spufs: fix spufs_fill_dir error path" Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 70236d1df3d3..793c005607cf 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -192,8 +192,10 @@ static int spufs_fill_dir(struct dentry *dir, return -ENOMEM; ret = spufs_new_file(dir->d_sb, dentry, files->ops, files->mode & mode, files->size, ctx); - if (ret) + if (ret) { + dput(dentry); return ret; + } files++; } return 0; From c134deabf4784e155d360744d4a6a835b9de4dd4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Mar 2025 19:18:39 -0400 Subject: [PATCH 0731/2157] spufs: fix gang directory lifetimes prior to "[POWERPC] spufs: Fix gang destroy leaks" we used to have a problem with gang lifetimes - creation of a gang returns opened gang directory, which normally gets removed when that gets closed, but if somebody has created a context belonging to that gang and kept it alive until the gang got closed, removal failed and we ended up with a leak. Unfortunately, it had been fixed the wrong way. Dentry of gang directory was no longer pinned, and rmdir on close was gone. One problem was that failure of open kept calling simple_rmdir() as cleanup, which meant an unbalanced dput(). Another bug was in the success case - gang creation incremented link count on root directory, but that was no longer undone when gang got destroyed. Fix consists of * reverting the commit in question * adding a counter to gang, protected by ->i_rwsem of gang directory inode. * having it set to 1 at creation time, dropped in both spufs_dir_close() and spufs_gang_close() and bumped in spufs_create_context(), provided that it's not 0. * using simple_recursive_removal() to take the gang directory out when counter reaches zero. Fixes: 877907d37da9 "[POWERPC] spufs: Fix gang destroy leaks" Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/gang.c | 1 + arch/powerpc/platforms/cell/spufs/inode.c | 54 +++++++++++++++++++---- arch/powerpc/platforms/cell/spufs/spufs.h | 2 + 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 827d338deaf4..2c2999de6bfa 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -25,6 +25,7 @@ struct spu_gang *alloc_spu_gang(void) mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); INIT_LIST_HEAD(&gang->aff_list_head); + gang->alive = 1; out: return gang; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 793c005607cf..c566e7997f2c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -201,6 +201,23 @@ static int spufs_fill_dir(struct dentry *dir, return 0; } +static void unuse_gang(struct dentry *dir) +{ + struct inode *inode = dir->d_inode; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; + + if (gang) { + bool dead; + + inode_lock(inode); // exclusion with spufs_create_context() + dead = !--gang->alive; + inode_unlock(inode); + + if (dead) + simple_recursive_removal(dir, NULL); + } +} + static int spufs_dir_close(struct inode *inode, struct file *file) { struct inode *parent; @@ -215,6 +232,7 @@ static int spufs_dir_close(struct inode *inode, struct file *file) inode_unlock(parent); WARN_ON(ret); + unuse_gang(dir->d_parent); return dcache_dir_close(inode, file); } @@ -407,7 +425,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, { int ret; int affinity; - struct spu_gang *gang; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; struct spu_context *neighbor; struct path path = {.mnt = mnt, .dentry = dentry}; @@ -422,11 +440,15 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) return -ENODEV; - gang = NULL; + if (gang) { + if (!gang->alive) + return -ENOENT; + gang->alive++; + } + neighbor = NULL; affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); if (affinity) { - gang = SPUFS_I(inode)->i_gang; if (!gang) return -EINVAL; mutex_lock(&gang->aff_mutex); @@ -455,6 +477,8 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, out_aff_unlock: if (affinity) mutex_unlock(&gang->aff_mutex); + if (ret && gang) + gang->alive--; // can't reach 0 return ret; } @@ -484,6 +508,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_fop = &simple_dir_operations; d_instantiate(dentry, inode); + dget(dentry); inc_nlink(dir); inc_nlink(d_inode(dentry)); return ret; @@ -494,6 +519,21 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) return ret; } +static int spufs_gang_close(struct inode *inode, struct file *file) +{ + unuse_gang(file->f_path.dentry); + return dcache_dir_close(inode, file); +} + +static const struct file_operations spufs_gang_fops = { + .open = dcache_dir_open, + .release = spufs_gang_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .iterate_shared = dcache_readdir, + .fsync = noop_fsync, +}; + static int spufs_gang_open(const struct path *path) { int ret; @@ -513,7 +553,7 @@ static int spufs_gang_open(const struct path *path) return PTR_ERR(filp); } - filp->f_op = &simple_dir_operations; + filp->f_op = &spufs_gang_fops; fd_install(ret, filp); return ret; } @@ -528,10 +568,8 @@ static int spufs_create_gang(struct inode *inode, ret = spufs_mkgang(inode, dentry, mode & 0777); if (!ret) { ret = spufs_gang_open(&path); - if (ret < 0) { - int err = simple_rmdir(inode, dentry); - WARN_ON(err); - } + if (ret < 0) + unuse_gang(dentry); } return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 84958487f696..d33787c57c39 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -151,6 +151,8 @@ struct spu_gang { int aff_flags; struct spu *aff_ref_spu; atomic_t aff_sched_count; + + int alive; }; /* Flag bits for spu_gang aff_flags */ From 0f5cce3fc55b08ee4da3372baccf4bcd36a98396 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Mar 2025 19:38:28 -0400 Subject: [PATCH 0732/2157] spufs: fix a leak in spufs_create_context() Leak fixes back in 2008 missed one case - if we are trying to set affinity and spufs_mkdir() fails, we need to drop the reference to neighbor. Fixes: 58119068cb27 "[POWERPC] spufs: Fix memory leak on SPU affinity" Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index c566e7997f2c..9f9e4b871627 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -460,8 +460,11 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } ret = spufs_mkdir(inode, dentry, flags, mode & 0777); - if (ret) + if (ret) { + if (neighbor) + put_spu_context(neighbor); goto out_aff_unlock; + } if (affinity) { spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, From bdb43af4fdb39f844ede401bdb1258f67a580a27 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 13 May 2024 17:50:34 -0600 Subject: [PATCH 0733/2157] qibfs: fix _another_ leak failure to allocate inode => leaked dentry... this one had been there since the initial merge; to be fair, if we are that far OOM, the odds of failing at that particular allocation are low... Signed-off-by: Al Viro --- drivers/infiniband/hw/qib/qib_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index b27791029fa9..b9f4a2937c3a 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode = new_inode(dir->i_sb); if (!inode) { + dput(dentry); error = -EPERM; goto bail; } From 0686a818d77a431fc3ba2fab4b46bbb04e8c9380 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Thu, 6 Mar 2025 10:29:13 -0700 Subject: [PATCH 0734/2157] bus: mhi: host: Fix race between unprepare and queue_buf A client driver may use mhi_unprepare_from_transfer() to quiesce incoming data during the client driver's tear down. The client driver might also be processing data at the same time, resulting in a call to mhi_queue_buf() which will invoke mhi_gen_tre(). If mhi_gen_tre() runs after mhi_unprepare_from_transfer() has torn down the channel, a panic will occur due to an invalid dereference leading to a page fault. This occurs because mhi_gen_tre() does not verify the channel state after locking it. Fix this by having mhi_gen_tre() confirm the channel state is valid, or return error to avoid accessing deinitialized data. Cc: stable@vger.kernel.org # 6.8 Fixes: b89b6a863dd5 ("bus: mhi: host: Add spinlock to protect WP access when queueing TREs") Signed-off-by: Jeffrey Hugo Signed-off-by: Jeff Hugo Reviewed-by: Krishna Chaitanya Chundru Reviewed-by: Youssef Samir Reviewed-by: Manivannan Sadhasivam Reviewed-by: Troy Hanson Link: https://lore.kernel.org/r/20250306172913.856982-1-jeff.hugo@oss.qualcomm.com [mani: added stable tag] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/main.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index 4c91ffd6ed0e..9bb0df43ceef 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -1188,11 +1188,16 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, struct mhi_ring_element *mhi_tre; struct mhi_buf_info *buf_info; int eot, eob, chain, bei; - int ret; + int ret = 0; /* Protect accesses for reading and incrementing WP */ write_lock_bh(&mhi_chan->lock); + if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED) { + ret = -ENODEV; + goto out; + } + buf_ring = &mhi_chan->buf_ring; tre_ring = &mhi_chan->tre_ring; @@ -1210,10 +1215,8 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, if (!info->pre_mapped) { ret = mhi_cntrl->map_single(mhi_cntrl, buf_info); - if (ret) { - write_unlock_bh(&mhi_chan->lock); - return ret; - } + if (ret) + goto out; } eob = !!(flags & MHI_EOB); @@ -1231,9 +1234,10 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, mhi_add_ring_element(mhi_cntrl, tre_ring); mhi_add_ring_element(mhi_cntrl, buf_ring); +out: write_unlock_bh(&mhi_chan->lock); - return 0; + return ret; } int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, From dcd2a9a5550ef556c8fc11601a0f729fb71ead5d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2025 13:30:27 +0100 Subject: [PATCH 0735/2157] regulator: dummy: convert to use the faux device interface The dummy regulator driver does not need to create a platform device, it only did so because it was simple to do. Change it over to use the faux bus instead as this is NOT a real platform device, and it makes the code even smaller than before. Reviewed-by: Mark Brown Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/2025021027-outclass-stress-59dd@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/dummy.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c index 5b9b9e4e762d..e5197ec7234d 100644 --- a/drivers/regulator/dummy.c +++ b/drivers/regulator/dummy.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include @@ -37,15 +37,15 @@ static const struct regulator_desc dummy_desc = { .ops = &dummy_ops, }; -static int dummy_regulator_probe(struct platform_device *pdev) +static int dummy_regulator_probe(struct faux_device *fdev) { struct regulator_config config = { }; int ret; - config.dev = &pdev->dev; + config.dev = &fdev->dev; config.init_data = &dummy_initdata; - dummy_regulator_rdev = devm_regulator_register(&pdev->dev, &dummy_desc, + dummy_regulator_rdev = devm_regulator_register(&fdev->dev, &dummy_desc, &config); if (IS_ERR(dummy_regulator_rdev)) { ret = PTR_ERR(dummy_regulator_rdev); @@ -56,36 +56,17 @@ static int dummy_regulator_probe(struct platform_device *pdev) return 0; } -static struct platform_driver dummy_regulator_driver = { - .probe = dummy_regulator_probe, - .driver = { - .name = "reg-dummy", - .probe_type = PROBE_PREFER_ASYNCHRONOUS, - }, +struct faux_device_ops dummy_regulator_driver = { + .probe = dummy_regulator_probe, }; -static struct platform_device *dummy_pdev; +static struct faux_device *dummy_fdev; void __init regulator_dummy_init(void) { - int ret; - - dummy_pdev = platform_device_alloc("reg-dummy", -1); - if (!dummy_pdev) { + dummy_fdev = faux_device_create("reg-dummy", NULL, &dummy_regulator_driver); + if (!dummy_fdev) { pr_err("Failed to allocate dummy regulator device\n"); return; } - - ret = platform_device_add(dummy_pdev); - if (ret != 0) { - pr_err("Failed to register dummy regulator device: %d\n", ret); - platform_device_put(dummy_pdev); - return; - } - - ret = platform_driver_register(&dummy_regulator_driver); - if (ret != 0) { - pr_err("Failed to register dummy regulator driver: %d\n", ret); - platform_device_unregister(dummy_pdev); - } } From 72239a78f9f5b9f05ea4bb7a15b92807906dab71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2025 13:30:30 +0100 Subject: [PATCH 0736/2157] tlclk: convert to use faux_device The tlclk driver does not need to create a platform device, it only did so because it was simple to do that in order to get a place in sysfs to hang some device-specific attributes. Change it over to use the faux device instead as this is NOT a real platform device, and it makes the code even smaller than before. Cc: Mark Gross Cc: Arnd Bergmann Link: https://lore.kernel.org/r/2025021028-askew-smashing-4ff9@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/char/tlclk.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 377bebf6c925..6c1d94eda5a2 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include /* inb/outb */ #include @@ -742,7 +742,7 @@ static ssize_t store_reset (struct device *d, static DEVICE_ATTR(reset, (S_IWUSR|S_IWGRP), NULL, store_reset); -static struct attribute *tlclk_sysfs_entries[] = { +static struct attribute *tlclk_attrs[] = { &dev_attr_current_ref.attr, &dev_attr_telclock_version.attr, &dev_attr_alarms.attr, @@ -766,13 +766,9 @@ static struct attribute *tlclk_sysfs_entries[] = { &dev_attr_reset.attr, NULL }; +ATTRIBUTE_GROUPS(tlclk); -static const struct attribute_group tlclk_attribute_group = { - .name = NULL, /* put in device directory */ - .attrs = tlclk_sysfs_entries, -}; - -static struct platform_device *tlclk_device; +static struct faux_device *tlclk_device; static int __init tlclk_init(void) { @@ -817,24 +813,13 @@ static int __init tlclk_init(void) goto out3; } - tlclk_device = platform_device_register_simple("telco_clock", - -1, NULL, 0); - if (IS_ERR(tlclk_device)) { - printk(KERN_ERR "tlclk: platform_device_register failed.\n"); - ret = PTR_ERR(tlclk_device); + tlclk_device = faux_device_create_with_groups("telco_clock", NULL, NULL, tlclk_groups); + if (!tlclk_device) { + ret = -ENODEV; goto out4; } - ret = sysfs_create_group(&tlclk_device->dev.kobj, - &tlclk_attribute_group); - if (ret) { - printk(KERN_ERR "tlclk: failed to create sysfs device attributes.\n"); - goto out5; - } - return 0; -out5: - platform_device_unregister(tlclk_device); out4: misc_deregister(&tlclk_miscdev); out3: @@ -848,8 +833,7 @@ static int __init tlclk_init(void) static void __exit tlclk_cleanup(void) { - sysfs_remove_group(&tlclk_device->dev.kobj, &tlclk_attribute_group); - platform_device_unregister(tlclk_device); + faux_device_destroy(tlclk_device); misc_deregister(&tlclk_miscdev); unregister_chrdev(tlclk_major, "telco_clock"); From 3b18ccb5472b78c48b03b98e8a0ef467c3e0a175 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2025 13:30:31 +0100 Subject: [PATCH 0737/2157] misc: lis3lv02d: convert to use faux_device The lis3lv02d driver does not need to create a platform device, it only did so because it was simple to do that in order to get a place in sysfs to hang some device-specific attributes. Change it over to use the faux device instead as this is NOT a real platform device, and it makes the code even smaller than before. Cc: Eric Piel Cc: Arnd Bergmann Link: https://lore.kernel.org/r/2025021028-shortcake-stained-b0a8@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lis3lv02d/lis3lv02d.c | 26 ++++++++++---------------- drivers/misc/lis3lv02d/lis3lv02d.h | 4 ++-- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 4233dc4cc7d6..6957091ab6de 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -230,7 +229,7 @@ static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) return 0; } - dev_err(&lis3->pdev->dev, "Error unknown odrs-index: %d\n", odr_idx); + dev_err(&lis3->fdev->dev, "Error unknown odrs-index: %d\n", odr_idx); return -ENXIO; } @@ -694,7 +693,7 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) input_dev->phys = DRIVER_NAME "/input0"; input_dev->id.bustype = BUS_HOST; input_dev->id.vendor = 0; - input_dev->dev.parent = &lis3->pdev->dev; + input_dev->dev.parent = &lis3->fdev->dev; input_dev->open = lis3lv02d_joystick_open; input_dev->close = lis3lv02d_joystick_close; @@ -855,32 +854,27 @@ static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL); static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show, lis3lv02d_rate_set); -static struct attribute *lis3lv02d_attributes[] = { +static struct attribute *lis3lv02d_attrs[] = { &dev_attr_selftest.attr, &dev_attr_position.attr, &dev_attr_rate.attr, NULL }; - -static const struct attribute_group lis3lv02d_attribute_group = { - .attrs = lis3lv02d_attributes -}; - +ATTRIBUTE_GROUPS(lis3lv02d); static int lis3lv02d_add_fs(struct lis3lv02d *lis3) { - lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); - if (IS_ERR(lis3->pdev)) - return PTR_ERR(lis3->pdev); + lis3->fdev = faux_device_create_with_groups(DRIVER_NAME, NULL, NULL, lis3lv02d_groups); + if (!lis3->fdev) + return -ENODEV; - platform_set_drvdata(lis3->pdev, lis3); - return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); + faux_device_set_drvdata(lis3->fdev, lis3); + return 0; } void lis3lv02d_remove_fs(struct lis3lv02d *lis3) { - sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); - platform_device_unregister(lis3->pdev); + faux_device_destroy(lis3->fdev); if (lis3->pm_dev) { /* Barrier after the sysfs remove */ pm_runtime_barrier(lis3->pm_dev); diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index 195bd2fd2eb5..989a49e57554 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -5,7 +5,7 @@ * Copyright (C) 2007-2008 Yan Burman * Copyright (C) 2008-2009 Eric Piel */ -#include +#include #include #include #include @@ -282,7 +282,7 @@ struct lis3lv02d { */ struct input_dev *idev; /* input device */ - struct platform_device *pdev; /* platform device */ + struct faux_device *fdev; /* faux device */ struct regulator_bulk_data regulators[2]; atomic_t count; /* interrupt count after last read */ union axis_conversion ac; /* hw -> logical axis */ From bcb71579db5e9ce0c1332a7574948ddacfaabfa9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Feb 2025 00:21:36 +0100 Subject: [PATCH 0738/2157] usb: common: usb-conn-gpio: switch psy_cfg from of_node to fwnode In order to remove .of_node from the power_supply_config struct, use .fwnode instead. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250225-psy-core-convert-to-fwnode-v1-3-d5e4369936bb@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/usb-conn-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c index aa710b50791b..1e36be2a28fd 100644 --- a/drivers/usb/common/usb-conn-gpio.c +++ b/drivers/usb/common/usb-conn-gpio.c @@ -158,7 +158,7 @@ static int usb_conn_psy_register(struct usb_conn_info *info) struct device *dev = info->dev; struct power_supply_desc *desc = &info->desc; struct power_supply_config cfg = { - .of_node = dev->of_node, + .fwnode = dev_fwnode(dev), }; desc->name = "usb-charger"; From 440ca0cfdfd0d7cf575e9176c07358a1ed2477ba Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 8 Mar 2025 17:24:15 +0100 Subject: [PATCH 0739/2157] dt-bindings: usb: qcom,dwc3: Synchronize minItems for interrupts and -names It makes sense that ARRAY_SIZE(prop) should == ARRAY_SIZE(prop-names), so allow that to happen with interrupts. Fixes bogus warnings such as: usb@c2f8800: interrupt-names: ['pwr_event', 'qusb2_phy', 'hs_phy_irq'] is too short Fixes: 53c6d854be4e ("dt-bindings: usb: dwc3: Clean up hs_phy_irq in binding") Signed-off-by: Konrad Dybcio Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250308-topic-dt_bindings_fixes_usb-v2-1-3169a3394d5b@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index a2b3cf625e5b..64137c1619a6 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -404,6 +404,7 @@ allOf: minItems: 2 maxItems: 3 interrupt-names: + minItems: 2 items: - const: pwr_event - const: qusb2_phy @@ -425,6 +426,7 @@ allOf: minItems: 3 maxItems: 4 interrupt-names: + minItems: 3 items: - const: pwr_event - const: qusb2_phy From 8c75f3e6a433d92084ad4e78b029ae680865420f Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Mon, 10 Mar 2025 20:27:05 -0500 Subject: [PATCH 0740/2157] usb: gadget: aspeed: Add NULL pointer check in ast_vhub_init_dev() The variable d->name, returned by devm_kasprintf(), could be NULL. A pointer check is added to prevent potential NULL pointer dereference. This is similar to the fix in commit 3027e7b15b02 ("ice: Fix some null pointer dereference issues in ice_ptp.c"). This issue is found by our static analysis tool Signed-off-by: Chenyuan Yang Link: https://lore.kernel.org/r/20250311012705.1233829-1-chenyuan0y@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/aspeed-vhub/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/udc/aspeed-vhub/dev.c b/drivers/usb/gadget/udc/aspeed-vhub/dev.c index 573109ca5b79..a09f72772e6e 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/dev.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/dev.c @@ -548,6 +548,9 @@ int ast_vhub_init_dev(struct ast_vhub *vhub, unsigned int idx) d->vhub = vhub; d->index = idx; d->name = devm_kasprintf(parent, GFP_KERNEL, "port%d", idx+1); + if (!d->name) + return -ENOMEM; + d->regs = vhub->regs + 0x100 + 0x10 * idx; ast_vhub_init_ep0(vhub, &d->ep0, d); From f4aa6caa8b420bcc0813fccecbc35f43203297df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 10 Mar 2025 16:38:23 +0100 Subject: [PATCH 0741/2157] usb: core: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a revert of commit 2f964780c03b ("USB: core: replace %p with %pK"). When the formatting was changed from %p to %pK that was a security improvement, as %p would leak raw pointer values to the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. On the other hand, restricted pointers ("%pK") were never meant to be used through printk(). They can unintentionally still leak raw pointers or acquire sleeping looks in atomic contexts. Switch back to regular %p again. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250310-restricted-pointers-usb-v2-1-a7598e2d47d1@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 4 ++-- drivers/usb/core/urb.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index a75cf1f6d741..46026b331267 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1609,7 +1609,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) if (retval == 0) retval = -EINPROGRESS; else if (retval != -EIDRM && retval != -EBUSY) - dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n", + dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n", urb, retval); usb_put_dev(udev); } @@ -1786,7 +1786,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* kick hcd */ unlink1(hcd, urb, -ESHUTDOWN); dev_dbg (hcd->self.controller, - "shutdown urb %pK ep%d%s-%s\n", + "shutdown urb %p ep%d%s-%s\n", urb, usb_endpoint_num(&ep->desc), is_in ? "in" : "out", usb_ep_type_string(usb_endpoint_type(&ep->desc))); diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 7576920e2d5a..5e52a35486af 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -376,7 +376,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) if (!urb || !urb->complete) return -EINVAL; if (urb->hcpriv) { - WARN_ONCE(1, "URB %pK submitted while active\n", urb); + WARN_ONCE(1, "URB %p submitted while active\n", urb); return -EBUSY; } From 042c3dd1f2d3f8a27c5bb9c2e7866a674607d4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 10 Mar 2025 16:38:24 +0100 Subject: [PATCH 0742/2157] usb: dwc3: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a revert of commit 04fb365c453e ("usb: dwc3: replace %p with %pK") When the formatting was changed from %p to %pK that was a security improvement, as %p would leak raw pointer values to the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. On the other hand, restricted pointers ("%pK") were never meant to be used through printk(). They can unintentionally still leak raw pointers or acquire sleeping looks in atomic contexts. Switch back to regular %p again. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Acked-by: Thinh Nguyen Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250310-restricted-pointers-usb-v2-2-a7598e2d47d1@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-st.c | 2 +- drivers/usb/dwc3/gadget.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index ef7c43008946..5d513decaacd 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -225,7 +225,7 @@ static int st_dwc3_probe(struct platform_device *pdev) dwc3_data->syscfg_reg_off = res->start; - dev_vdbg(dev, "glue-logic addr 0x%pK, syscfg-reg offset 0x%x\n", + dev_vdbg(dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n", dwc3_data->glue_base, dwc3_data->syscfg_reg_off); struct device_node *child __free(device_node) = of_get_compatible_child(node, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a2f45b21c6d7..47e73c4ed62d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1979,12 +1979,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) return -ESHUTDOWN; } - if (WARN(req->dep != dep, "request %pK belongs to '%s'\n", + if (WARN(req->dep != dep, "request %p belongs to '%s'\n", &req->request, req->dep->name)) return -EINVAL; if (WARN(req->status < DWC3_REQUEST_STATUS_COMPLETED, - "%s: request %pK already in flight\n", + "%s: request %p already in flight\n", dep->name, &req->request)) return -EINVAL; @@ -2173,7 +2173,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, } } - dev_err(dwc->dev, "request %pK was not queued to %s\n", + dev_err(dwc->dev, "request %p was not queued to %s\n", request, ep->name); ret = -EINVAL; out: From 64eb182d5f7a5ec30227bce4f6922ff663432f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:46 +0100 Subject: [PATCH 0743/2157] usb: host: xhci-plat: mvebu: use ->quirks instead of ->init_quirk() func MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compatible "marvell,armada3700-xhci" match data uses the struct xhci_plat_priv::init_quirk() function pointer to add XHCI_RESET_ON_RESUME as quirk on XHCI. Instead, use the struct xhci_plat_priv::quirks field. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-1-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mvebu.c | 10 ---------- drivers/usb/host/xhci-mvebu.h | 6 ------ drivers/usb/host/xhci-plat.c | 2 +- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-mvebu.c b/drivers/usb/host/xhci-mvebu.c index 87f1597a0e5a..257e4d79971f 100644 --- a/drivers/usb/host/xhci-mvebu.c +++ b/drivers/usb/host/xhci-mvebu.c @@ -73,13 +73,3 @@ int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) return 0; } - -int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) -{ - struct xhci_hcd *xhci = hcd_to_xhci(hcd); - - /* Without reset on resume, the HC won't work at all */ - xhci->quirks |= XHCI_RESET_ON_RESUME; - - return 0; -} diff --git a/drivers/usb/host/xhci-mvebu.h b/drivers/usb/host/xhci-mvebu.h index 3be021793cc8..9d26e22c4842 100644 --- a/drivers/usb/host/xhci-mvebu.h +++ b/drivers/usb/host/xhci-mvebu.h @@ -12,16 +12,10 @@ struct usb_hcd; #if IS_ENABLED(CONFIG_USB_XHCI_MVEBU) int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd); -int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd); #else static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) { return 0; } - -static inline int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) -{ - return 0; -} #endif #endif /* __LINUX_XHCI_MVEBU_H */ diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index d85ffa9ffaa7..ff813dca2d1d 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -106,7 +106,7 @@ static const struct xhci_plat_priv xhci_plat_marvell_armada = { }; static const struct xhci_plat_priv xhci_plat_marvell_armada3700 = { - .init_quirk = xhci_mvebu_a3700_init_quirk, + .quirks = XHCI_RESET_ON_RESUME, }; static const struct xhci_plat_priv xhci_plat_brcm = { From 7aad3a42fee569ad141798a958c22eb683c37b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:47 +0100 Subject: [PATCH 0744/2157] usb: xhci: tegra: rename `runtime` boolean to `is_auto_runtime` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify naming convention: use `is_auto_runtime` in xhci-tegra, to be in phase with (future) drivers/usb/host/xhci.c. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-2-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 22dc86fb5254..107a95b59541 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -2162,11 +2162,11 @@ static void tegra_xhci_program_utmi_power_lp0_exit(struct tegra_xusb *tegra) } } -static int tegra_xusb_enter_elpg(struct tegra_xusb *tegra, bool runtime) +static int tegra_xusb_enter_elpg(struct tegra_xusb *tegra, bool is_auto_resume) { struct xhci_hcd *xhci = hcd_to_xhci(tegra->hcd); struct device *dev = tegra->dev; - bool wakeup = runtime ? true : device_may_wakeup(dev); + bool wakeup = is_auto_resume ? true : device_may_wakeup(dev); unsigned int i; int err; u32 usbcmd; @@ -2232,11 +2232,11 @@ static int tegra_xusb_enter_elpg(struct tegra_xusb *tegra, bool runtime) return err; } -static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool runtime) +static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool is_auto_resume) { struct xhci_hcd *xhci = hcd_to_xhci(tegra->hcd); struct device *dev = tegra->dev; - bool wakeup = runtime ? true : device_may_wakeup(dev); + bool wakeup = is_auto_resume ? true : device_may_wakeup(dev); unsigned int i; u32 usbcmd; int err; @@ -2287,7 +2287,7 @@ static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool runtime) if (wakeup) tegra_xhci_disable_phy_sleepwalk(tegra); - err = xhci_resume(xhci, runtime ? PMSG_AUTO_RESUME : PMSG_RESUME); + err = xhci_resume(xhci, is_auto_resume ? PMSG_AUTO_RESUME : PMSG_RESUME); if (err < 0) { dev_err(tegra->dev, "failed to resume XHCI: %d\n", err); goto disable_phy; From 0bde749c58c72405c54a1eabf6266a0273377226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:48 +0100 Subject: [PATCH 0745/2157] usb: cdns3: rename hibernated argument of role->resume() to lost_power MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cdns_role_driver->resume() callback takes a second boolean argument named `hibernated` in its implementations. This is mistaken; the only potential caller is: int cdns_resume(struct cdns *cdns) { /* ... */ if (cdns->roles[cdns->role]->resume) cdns->roles[cdns->role]->resume(cdns, cdns_power_is_lost(cdns)); return 0; } The argument can be true in cases outside of return from hibernation. Reflect the true meaning by renaming both arguments to `lost_power`. Signed-off-by: Théo Lebrun Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-3-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 4 ++-- drivers/usb/cdns3/cdnsp-gadget.c | 2 +- drivers/usb/cdns3/core.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index fd1beb10bba7..694aa1457739 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -3468,7 +3468,7 @@ __must_hold(&cdns->lock) return 0; } -static int cdns3_gadget_resume(struct cdns *cdns, bool hibernated) +static int cdns3_gadget_resume(struct cdns *cdns, bool lost_power) { struct cdns3_device *priv_dev = cdns->gadget_dev; @@ -3476,7 +3476,7 @@ static int cdns3_gadget_resume(struct cdns *cdns, bool hibernated) return 0; cdns3_gadget_config(priv_dev); - if (hibernated) + if (lost_power) writel(USB_CONF_DEVEN, &priv_dev->regs->usb_conf); return 0; diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 97edf767ecee..87f310841735 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1974,7 +1974,7 @@ static int cdnsp_gadget_suspend(struct cdns *cdns, bool do_wakeup) return 0; } -static int cdnsp_gadget_resume(struct cdns *cdns, bool hibernated) +static int cdnsp_gadget_resume(struct cdns *cdns, bool lost_power) { struct cdnsp_device *pdev = cdns->gadget_dev; enum usb_device_speed max_speed; diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 57d47348dc19..921cccf1ca9d 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -30,7 +30,7 @@ struct cdns_role_driver { int (*start)(struct cdns *cdns); void (*stop)(struct cdns *cdns); int (*suspend)(struct cdns *cdns, bool do_wakeup); - int (*resume)(struct cdns *cdns, bool hibernated); + int (*resume)(struct cdns *cdns, bool lost_power); const char *name; #define CDNS_ROLE_STATE_INACTIVE 0 #define CDNS_ROLE_STATE_ACTIVE 1 From 17c6526b333cfd89a4c888a6f7c876c8c326e5ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:49 +0100 Subject: [PATCH 0746/2157] usb: cdns3: call cdns_power_is_lost() only once in cdns_resume() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cdns_power_is_lost() does a register read. Call it only once rather than twice. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-4-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 98980a23e1c2..1243a5cea91b 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -524,11 +524,12 @@ EXPORT_SYMBOL_GPL(cdns_suspend); int cdns_resume(struct cdns *cdns) { + bool power_lost = cdns_power_is_lost(cdns); enum usb_role real_role; bool role_changed = false; int ret = 0; - if (cdns_power_is_lost(cdns)) { + if (power_lost) { if (!cdns->role_sw) { real_role = cdns_hw_role_state_machine(cdns); if (real_role != cdns->role) { @@ -551,7 +552,7 @@ int cdns_resume(struct cdns *cdns) } if (cdns->roles[cdns->role]->resume) - cdns->roles[cdns->role]->resume(cdns, cdns_power_is_lost(cdns)); + cdns->roles[cdns->role]->resume(cdns, power_lost); return 0; } From 24346dc29174632237ad3f6f920fbe0765061cf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:50 +0100 Subject: [PATCH 0747/2157] usb: cdns3-ti: move reg writes to separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device probe function mixes management code and hardware initialisation code. Extract the latter into an explicitly named cdns_ti_reset_and_init_hw() function to clarify intent. It also will allow easier transition to using runtime PM for triggering HW init. Reviewed-by: Roger Quadros Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-5-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-ti.c | 82 +++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-ti.c b/drivers/usb/cdns3/cdns3-ti.c index 040bb91e9c01..d573a259f1ae 100644 --- a/drivers/usb/cdns3/cdns3-ti.c +++ b/drivers/usb/cdns3/cdns3-ti.c @@ -58,6 +58,7 @@ struct cdns_ti { unsigned vbus_divider:1; struct clk *usb2_refclk; struct clk *lpm_clk; + int usb2_refclk_rate_code; }; static const int cdns_ti_rate_table[] = { /* in KHZ */ @@ -98,15 +99,50 @@ static const struct of_dev_auxdata cdns_ti_auxdata[] = { {}, }; +static void cdns_ti_reset_and_init_hw(struct cdns_ti *data) +{ + u32 reg; + + /* assert RESET */ + reg = cdns_ti_readl(data, USBSS_W1); + reg &= ~USBSS_W1_PWRUP_RST; + cdns_ti_writel(data, USBSS_W1, reg); + + /* set static config */ + reg = cdns_ti_readl(data, USBSS_STATIC_CONFIG); + reg &= ~USBSS1_STATIC_PLL_REF_SEL_MASK; + reg |= data->usb2_refclk_rate_code << USBSS1_STATIC_PLL_REF_SEL_SHIFT; + + reg &= ~USBSS1_STATIC_VBUS_SEL_MASK; + if (data->vbus_divider) + reg |= 1 << USBSS1_STATIC_VBUS_SEL_SHIFT; + + cdns_ti_writel(data, USBSS_STATIC_CONFIG, reg); + reg = cdns_ti_readl(data, USBSS_STATIC_CONFIG); + + /* set USB2_ONLY mode if requested */ + reg = cdns_ti_readl(data, USBSS_W1); + if (data->usb2_only) + reg |= USBSS_W1_USB2_ONLY; + + /* set default modestrap */ + reg |= USBSS_W1_MODESTRAP_SEL; + reg &= ~USBSS_W1_MODESTRAP_MASK; + reg |= USBSS_MODESTRAP_MODE_NONE << USBSS_W1_MODESTRAP_SHIFT; + cdns_ti_writel(data, USBSS_W1, reg); + + /* de-assert RESET */ + reg |= USBSS_W1_PWRUP_RST; + cdns_ti_writel(data, USBSS_W1, reg); +} + static int cdns_ti_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = pdev->dev.of_node; struct cdns_ti *data; - int error; - u32 reg; - int rate_code, i; unsigned long rate; + int error, i; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -146,7 +182,11 @@ static int cdns_ti_probe(struct platform_device *pdev) return -EINVAL; } - rate_code = i; + data->usb2_refclk_rate_code = i; + data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); + data->usb2_only = device_property_read_bool(dev, "ti,usb2-only"); + + cdns_ti_reset_and_init_hw(data); pm_runtime_enable(dev); error = pm_runtime_get_sync(dev); @@ -155,40 +195,6 @@ static int cdns_ti_probe(struct platform_device *pdev) goto err; } - /* assert RESET */ - reg = cdns_ti_readl(data, USBSS_W1); - reg &= ~USBSS_W1_PWRUP_RST; - cdns_ti_writel(data, USBSS_W1, reg); - - /* set static config */ - reg = cdns_ti_readl(data, USBSS_STATIC_CONFIG); - reg &= ~USBSS1_STATIC_PLL_REF_SEL_MASK; - reg |= rate_code << USBSS1_STATIC_PLL_REF_SEL_SHIFT; - - reg &= ~USBSS1_STATIC_VBUS_SEL_MASK; - data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); - if (data->vbus_divider) - reg |= 1 << USBSS1_STATIC_VBUS_SEL_SHIFT; - - cdns_ti_writel(data, USBSS_STATIC_CONFIG, reg); - reg = cdns_ti_readl(data, USBSS_STATIC_CONFIG); - - /* set USB2_ONLY mode if requested */ - reg = cdns_ti_readl(data, USBSS_W1); - data->usb2_only = device_property_read_bool(dev, "ti,usb2-only"); - if (data->usb2_only) - reg |= USBSS_W1_USB2_ONLY; - - /* set default modestrap */ - reg |= USBSS_W1_MODESTRAP_SEL; - reg &= ~USBSS_W1_MODESTRAP_MASK; - reg |= USBSS_MODESTRAP_MODE_NONE << USBSS_W1_MODESTRAP_SHIFT; - cdns_ti_writel(data, USBSS_W1, reg); - - /* de-assert RESET */ - reg |= USBSS_W1_PWRUP_RST; - cdns_ti_writel(data, USBSS_W1, reg); - error = of_platform_populate(node, NULL, cdns_ti_auxdata, dev); if (error) { dev_err(dev, "failed to create children: %d\n", error); From 9925aa4b025e46cffc9fc98c5fb28a4a303a739b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:51 +0100 Subject: [PATCH 0748/2157] usb: cdns3-ti: run HW init at resume() if HW was reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At runtime_resume(), read the W1 (Wrapper Register 1) register to detect if an hardware reset occurred. If it did, run the hardware init sequence. This callback will be called at system-wide resume. Previously, if a reset occurred during suspend, we would crash. The wrapper config had not been written, leading to invalid register accesses inside cdns3. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-6-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-ti.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-ti.c b/drivers/usb/cdns3/cdns3-ti.c index d573a259f1ae..302ebf6d8e53 100644 --- a/drivers/usb/cdns3/cdns3-ti.c +++ b/drivers/usb/cdns3/cdns3-ti.c @@ -186,6 +186,12 @@ static int cdns_ti_probe(struct platform_device *pdev) data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); data->usb2_only = device_property_read_bool(dev, "ti,usb2-only"); + /* + * The call below to pm_runtime_get_sync() MIGHT reset hardware, if it + * detects it as uninitialised. We want to enforce a reset at probe, + * and so do it manually here. This means the first runtime_resume() + * will be a no-op. + */ cdns_ti_reset_and_init_hw(data); pm_runtime_enable(dev); @@ -230,6 +236,24 @@ static void cdns_ti_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); } +static int cdns_ti_runtime_resume(struct device *dev) +{ + const u32 mask = USBSS_W1_PWRUP_RST | USBSS_W1_MODESTRAP_SEL; + struct cdns_ti *data = dev_get_drvdata(dev); + u32 w1; + + w1 = cdns_ti_readl(data, USBSS_W1); + if ((w1 & mask) != mask) + cdns_ti_reset_and_init_hw(data); + + return 0; +} + +static const struct dev_pm_ops cdns_ti_pm_ops = { + RUNTIME_PM_OPS(NULL, cdns_ti_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) +}; + static const struct of_device_id cdns_ti_of_match[] = { { .compatible = "ti,j721e-usb", }, { .compatible = "ti,am64-usb", }, @@ -243,6 +267,7 @@ static struct platform_driver cdns_ti_driver = { .driver = { .name = "cdns3-ti", .of_match_table = cdns_ti_of_match, + .pm = pm_ptr(&cdns_ti_pm_ops), }, }; From 34cca0ceab5ba34a7cfaa32bbca5eb7abb85622b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:52 +0100 Subject: [PATCH 0749/2157] usb: xhci: change xhci_resume() parameters to explicit the desired info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous signature was: int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg); Internally, it extracted two information out of the message: - whether we are after hibernation: msg.event == PM_EVENT_RESTORE, - whether this is an auto resume: msg.event == PM_EVENT_AUTO_RESUME. First bulletpoint is somewhat wrong: driver wants to know if the device did lose power, it doesn't care about hibernation per se. Knowing that, refactor to ask upper layers the right questions: (1) "did we lose power?" and, (2) "is this an auto resume?". Change the signature to: int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume); The goal is to allow some upper layers (cdns3-plat) to tell us when power was lost after system-wise suspend. Note that lost_power is ORed at the start of xhci_resume() to xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend. It is simpler to keep those checks inside of xhci_resume() instead of doing them at each caller of xhci_resume(). Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-7-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-histb.c | 2 +- drivers/usb/host/xhci-pci.c | 8 +++++--- drivers/usb/host/xhci-plat.c | 10 +++++----- drivers/usb/host/xhci-tegra.c | 2 +- drivers/usb/host/xhci.c | 19 ++++++++----------- drivers/usb/host/xhci.h | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c index 8a7d46dae62c..02396c8721dc 100644 --- a/drivers/usb/host/xhci-histb.c +++ b/drivers/usb/host/xhci-histb.c @@ -355,7 +355,7 @@ static int __maybe_unused xhci_histb_resume(struct device *dev) if (!device_may_wakeup(dev)) xhci_histb_host_enable(histb); - return xhci_resume(xhci, PMSG_RESUME); + return xhci_resume(xhci, false, false); } static const struct dev_pm_ops xhci_histb_pm_ops = { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 54460d11f7ee..0c481cbc8f08 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -807,8 +807,10 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) static int xhci_pci_resume(struct usb_hcd *hcd, pm_message_t msg) { - struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + bool power_lost = msg.event == PM_EVENT_RESTORE; + bool is_auto_resume = msg.event == PM_EVENT_AUTO_RESUME; reset_control_reset(xhci->reset); @@ -839,7 +841,7 @@ static int xhci_pci_resume(struct usb_hcd *hcd, pm_message_t msg) if (xhci->quirks & XHCI_PME_STUCK_QUIRK) xhci_pme_quirk(hcd); - return xhci_resume(xhci, msg); + return xhci_resume(xhci, power_lost, is_auto_resume); } static int xhci_pci_poweroff_late(struct usb_hcd *hcd, bool do_wakeup) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index ff813dca2d1d..7c2d7c4e6011 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -479,7 +479,7 @@ static int xhci_plat_suspend(struct device *dev) return 0; } -static int xhci_plat_resume_common(struct device *dev, struct pm_message pmsg) +static int xhci_plat_resume_common(struct device *dev, bool power_lost) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -501,7 +501,7 @@ static int xhci_plat_resume_common(struct device *dev, struct pm_message pmsg) if (ret) goto disable_clks; - ret = xhci_resume(xhci, pmsg); + ret = xhci_resume(xhci, power_lost, false); if (ret) goto disable_clks; @@ -522,12 +522,12 @@ static int xhci_plat_resume_common(struct device *dev, struct pm_message pmsg) static int xhci_plat_resume(struct device *dev) { - return xhci_plat_resume_common(dev, PMSG_RESUME); + return xhci_plat_resume_common(dev, false); } static int xhci_plat_restore(struct device *dev) { - return xhci_plat_resume_common(dev, PMSG_RESTORE); + return xhci_plat_resume_common(dev, true); } static int __maybe_unused xhci_plat_runtime_suspend(struct device *dev) @@ -548,7 +548,7 @@ static int __maybe_unused xhci_plat_runtime_resume(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); - return xhci_resume(xhci, PMSG_AUTO_RESUME); + return xhci_resume(xhci, false, true); } const struct dev_pm_ops xhci_plat_pm_ops = { diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 107a95b59541..b5c362c2051d 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -2287,7 +2287,7 @@ static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool is_auto_resume) if (wakeup) tegra_xhci_disable_phy_sleepwalk(tegra); - err = xhci_resume(xhci, is_auto_resume ? PMSG_AUTO_RESUME : PMSG_RESUME); + err = xhci_resume(xhci, false, is_auto_resume); if (err < 0) { dev_err(tegra->dev, "failed to resume XHCI: %d\n", err); goto disable_phy; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 19e308f4fc06..83a4adf57bae 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -994,16 +994,14 @@ EXPORT_SYMBOL_GPL(xhci_suspend); * This is called when the machine transition from S3/S4 mode. * */ -int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) +int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume) { - bool hibernated = (msg.event == PM_EVENT_RESTORE); u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; bool suspended_usb3_devs = false; - bool reinit_xhc = false; if (!hcd->state) return 0; @@ -1022,10 +1020,10 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) spin_lock_irq(&xhci->lock); - if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend) - reinit_xhc = true; + if (xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend) + power_lost = true; - if (!reinit_xhc) { + if (!power_lost) { /* * Some controllers might lose power during suspend, so wait * for controller not ready bit to clear, just as in xHC init. @@ -1065,12 +1063,12 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) /* re-initialize the HC on Restore Error, or Host Controller Error */ if ((temp & (STS_SRE | STS_HCE)) && !(xhci->xhc_state & XHCI_STATE_REMOVING)) { - reinit_xhc = true; - if (!xhci->broken_suspend) + if (!power_lost) xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + power_lost = true; } - if (reinit_xhc) { + if (power_lost) { if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { del_timer_sync(&xhci->comp_mode_recovery_timer); @@ -1168,8 +1166,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) pending_portevent = xhci_pending_portevent(xhci); - if (suspended_usb3_devs && !pending_portevent && - msg.event == PM_EVENT_AUTO_RESUME) { + if (suspended_usb3_devs && !pending_portevent && is_auto_resume) { msleep(120); pending_portevent = xhci_pending_portevent(xhci); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 3c4da37e1694..37860f1e3aba 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1880,7 +1880,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup); -int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg); +int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume); irqreturn_t xhci_irq(struct usb_hcd *hcd); irqreturn_t xhci_msi_irq(int irq, void *hcd); From 668cc6bc1178ac94a6ff0eba3a9e8be59f89c318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:53 +0100 Subject: [PATCH 0750/2157] usb: host: xhci-plat: allow upper layers to signal power loss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that xhci_resume() exposes a power_lost boolean argument, expose that to all xhci-plat implementations. They are free to set it from wherever they want: - Their own resume() callback. - The xhci_plat_priv::resume_quirk() callback. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-8-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 ++- drivers/usb/host/xhci-plat.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 7c2d7c4e6011..3155e3a842da 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -482,6 +482,7 @@ static int xhci_plat_suspend(struct device *dev) static int xhci_plat_resume_common(struct device *dev, bool power_lost) { struct usb_hcd *hcd = dev_get_drvdata(dev); + struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; @@ -501,7 +502,7 @@ static int xhci_plat_resume_common(struct device *dev, bool power_lost) if (ret) goto disable_clks; - ret = xhci_resume(xhci, power_lost, false); + ret = xhci_resume(xhci, power_lost || priv->power_lost, false); if (ret) goto disable_clks; diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 6475130eac4b..fe4f95e690fa 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -15,6 +15,7 @@ struct usb_hcd; struct xhci_plat_priv { const char *firmware_name; unsigned long long quirks; + bool power_lost; void (*plat_start)(struct usb_hcd *); int (*init_quirk)(struct usb_hcd *); int (*suspend_quirk)(struct usb_hcd *); From 3a85c10115407588fad696d6c121d54cd5ba5d72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 5 Feb 2025 18:36:54 +0100 Subject: [PATCH 0751/2157] usb: host: cdns3: forward lost power information to xhci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cdns3-plat can know if power was lost across system-wide suspend. Forward that information: - Grab the lost_power bool from cdns_role_driver::resume(). Store it into the power_lost field in struct xhci_plat_priv. - xhci-plat will call xhci_resume() with that value (ORed to whether we are in a hibernation restore). Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20250205-s2r-cdns-v7-9-13658a271c3c@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 7ba760ee62e3..f0df114c2b53 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -138,6 +138,16 @@ static void cdns_host_exit(struct cdns *cdns) cdns_drd_host_off(cdns); } +static int cdns_host_resume(struct cdns *cdns, bool power_lost) +{ + struct usb_hcd *hcd = platform_get_drvdata(cdns->host_dev); + struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); + + priv->power_lost = power_lost; + + return 0; +} + int cdns_host_init(struct cdns *cdns) { struct cdns_role_driver *rdrv; @@ -148,6 +158,7 @@ int cdns_host_init(struct cdns *cdns) rdrv->start = __cdns_host_init; rdrv->stop = cdns_host_exit; + rdrv->resume = cdns_host_resume; rdrv->state = CDNS_ROLE_STATE_INACTIVE; rdrv->name = "host"; From 5442d22da7dbff3ba8c6720fc6f23ea4934d402d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Mar 2025 13:55:10 +0300 Subject: [PATCH 0752/2157] Coresight: Fix a NULL vs IS_ERR() bug in probe The devm_platform_get_and_ioremap_resource() function doesn't return NULL, it returns error pointers. Update the checking to match. Fixes: f78d206f3d73 ("Coresight: Add Coresight TMC Control Unit driver") Signed-off-by: Dan Carpenter Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/dab039b9-d58a-41be-92f0-ff209cfabfe2@stanley.mountain --- drivers/hwtracing/coresight/coresight-ctcu-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-ctcu-core.c b/drivers/hwtracing/coresight/coresight-ctcu-core.c index da35d8b4d579..c6bafc96db96 100644 --- a/drivers/hwtracing/coresight/coresight-ctcu-core.c +++ b/drivers/hwtracing/coresight/coresight-ctcu-core.c @@ -204,8 +204,8 @@ static int ctcu_probe(struct platform_device *pdev) dev->platform_data = pdata; base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); - if (!base) - return -ENOMEM; + if (IS_ERR(base)) + return PTR_ERR(base); drvdata->apb_clk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->apb_clk)) From 45fc728515c14f53f6205789de5bfd72a95af3b8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 14 Mar 2025 13:51:32 +0100 Subject: [PATCH 0753/2157] dm: restrict dm device size to 2^63-512 bytes The devices with size >= 2^63 bytes can't be used reliably by userspace because the type off_t is a signed 64-bit integer. Therefore, we limit the maximum size of a device mapper device to 2^63-512 bytes. Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0ef5203387b2..04b3e9758e53 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -697,6 +697,10 @@ int dm_table_add_target(struct dm_table *t, const char *type, DMERR("%s: zero-length target", dm_device_name(t->md)); return -EINVAL; } + if (start + len < start || start + len > LLONG_MAX >> SECTOR_SHIFT) { + DMERR("%s: too large device", dm_device_name(t->md)); + return -EINVAL; + } ti->type = dm_get_target_type(type); if (!ti->type) { From ee20c69c789b6cb2179a535cf440d72b98f4a134 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Mar 2025 12:30:58 +0300 Subject: [PATCH 0754/2157] drm: adp: Fix NULL vs IS_ERR() check in adp_plane_new() The __drmm_universal_plane_alloc() function doesn't return NULL, it returns error pointers. Update the check to match. Fixes: 332122eba628 ("drm: adp: Add Apple Display Pipe driver") Signed-off-by: Dan Carpenter Acked-by: Sasha Finkelstein Link: https://patchwork.freedesktop.org/patch/msgid/14a845e8-54d0-45f8-b8b9-927609d92ede@stanley.mountain Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 0eeb9e5fab26..c98c647f981d 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -232,9 +232,9 @@ static struct drm_plane *adp_plane_new(struct adp_drv_private *adp) ALL_CRTCS, &adp_plane_funcs, plane_formats, ARRAY_SIZE(plane_formats), NULL, DRM_PLANE_TYPE_PRIMARY, "plane"); - if (!plane) { + if (IS_ERR(plane)) { drm_err(drm, "failed to allocate plane"); - return ERR_PTR(-ENOMEM); + return plane; } drm_plane_helper_add(plane, &adp_plane_helper_funcs); From 36f257e3b0ba904f5a4e7fa8dafaa60e88cdd28c Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Mon, 10 Mar 2025 22:38:38 +0000 Subject: [PATCH 0755/2157] acpi/ghes, cxl/pci: Process CXL CPER Protocol Errors When PCIe AER is in FW-First, OS should process CXL Protocol errors from CPER records. Introduce support for handling and logging CXL Protocol errors. The defined trace events cxl_aer_uncorrectable_error and cxl_aer_correctable_error trace native CXL AER endpoint errors. Reuse them to trace FW-First Protocol errors. Since the CXL code is required to be called from process context and GHES is in interrupt context, use workqueues for processing. Similar to CXL CPER event handling, use kfifo to handle errors as it simplifies queue processing by providing lock free fifo operations. Add the ability for the CXL sub-system to register a workqueue to process CXL CPER protocol errors. [DJ: return cxl_cper_register_prot_err_work() directly in cxl_ras_init()] Signed-off-by: Smita Koralahalli Reviewed-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Reviewed-by: Tony Luck Link: https://patch.msgid.link/20250310223839.31342-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/acpi/apei/ghes.c | 49 +++++++++++++++++++++++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/port.c | 7 ++++ drivers/cxl/core/ras.c | 82 +++++++++++++++++++++++++++++++++++++++ include/cxl/event.h | 15 +++++++ tools/testing/cxl/Kbuild | 1 + 7 files changed, 158 insertions(+) create mode 100644 drivers/cxl/core/ras.c diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 4d725d988c43..289e365f84b2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -674,6 +674,15 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +/* Room for 8 entries */ +#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8 +static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data, + CXL_CPER_PROT_ERR_FIFO_DEPTH); + +/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */ +static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock); +struct work_struct *cxl_cper_prot_err_work; + static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, int severity) { @@ -700,6 +709,11 @@ static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) pr_warn(FW_WARN "CXL CPER no device serial number\n"); + guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock); + + if (!cxl_cper_prot_err_work) + return; + switch (prot_err->agent_type) { case RCD: case DEVICE: @@ -721,9 +735,44 @@ static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, prot_err->agent_type); return; } + + if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) { + pr_err_ratelimited("CXL CPER kfifo overflow\n"); + return; + } + + schedule_work(cxl_cper_prot_err_work); #endif } +int cxl_cper_register_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = work; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL"); + +int cxl_cper_unregister_prot_err_work(struct work_struct *work) +{ + if (cxl_cper_prot_err_work != work) + return -EINVAL; + + guard(spinlock)(&cxl_cper_prot_err_work_lock); + cxl_cper_prot_err_work = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL"); + +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) +{ + return kfifo_get(&cxl_cper_prot_err_fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL"); + /* Room for 8 entries for each of the 4 event log queues */ #define CXL_CPER_FIFO_DEPTH 32 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9259bcc6773c..ba5f0916d379 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,5 +14,6 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o +cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..c87dbfcc9403 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -115,4 +115,7 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_ras_init(void); +void cxl_ras_exit(void); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..7ed6e8f374af 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2339,8 +2339,14 @@ static __init int cxl_core_init(void) if (rc) goto err_region; + rc = cxl_ras_init(); + if (rc) + goto err_ras; + return 0; +err_ras: + cxl_region_exit(); err_region: bus_unregister(&cxl_bus_type); err_bus: @@ -2352,6 +2358,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { + cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); destroy_workqueue(cxl_bus_wq); diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c new file mode 100644 index 000000000000..91273af6ccbc --- /dev/null +++ b/drivers/cxl/core/ras.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include "trace.h" + +static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; + struct cxl_dev_state *cxlds; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); +} + +static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + struct cxl_dev_state *cxlds; + u32 fe; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + if (hweight32(status) > 1) + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + ras_cap.cap_control)); + else + fe = status; + + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, + ras_cap.header_log); +} + +static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) +{ + unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, + data->prot_err.agent_addr.function); + struct pci_dev *pdev __free(pci_dev_put) = + pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, + data->prot_err.agent_addr.bus, + devfn); + + if (!pdev) + return; + + guard(device)(&pdev->dev); + + if (data->severity == AER_CORRECTABLE) + cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); + else + cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); +} + +static void cxl_cper_prot_err_work_fn(struct work_struct *work) +{ + struct cxl_cper_prot_err_work_data wd; + + while (cxl_cper_prot_err_kfifo_get(&wd)) + cxl_cper_handle_prot_err(&wd); +} +static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); + +int cxl_ras_init(void) +{ + return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); +} + +void cxl_ras_exit(void) +{ + cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); + cancel_work_sync(&cxl_cper_prot_err_work); +} diff --git a/include/cxl/event.h b/include/cxl/event.h index 8381a07052d0..f9ae1796da85 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -254,6 +254,9 @@ struct cxl_cper_prot_err_work_data { int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work); int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); +int cxl_cper_register_prot_err_work(struct work_struct *work); +int cxl_cper_unregister_prot_err_work(struct work_struct *work); +int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd); #else static inline int cxl_cper_register_work(struct work_struct *work) { @@ -268,6 +271,18 @@ static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) { return 0; } +static inline int cxl_cper_register_prot_err_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_unregister_prot_err_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) +{ + return 0; +} #endif #endif /* _LINUX_CXL_EVENT_H */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index b1256fee3567..3d71447c0bd8 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -61,6 +61,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o +cxl_core-y += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o From 02f4f0177d8e7647016fc29f11c1a7bb75bc2182 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Mon, 10 Mar 2025 22:38:39 +0000 Subject: [PATCH 0756/2157] cxl/pci: Add trace logging for CXL PCIe Port RAS errors The CXL drivers use kernel trace functions for logging endpoint and Restricted CXL host (RCH) Downstream Port RAS errors. Similar functionality is required for CXL Root Ports, CXL Downstream Switch Ports, and CXL Upstream Switch Ports. Introduce trace logging functions for both RAS correctable and uncorrectable errors specific to CXL PCIe Ports. Use them to trace FW-First Protocol errors. Co-developed-by: Terry Bowman Signed-off-by: Terry Bowman Signed-off-by: Smita Koralahalli Reviewed-by: Ira Weiny Reviewed-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Tony Luck Link: https://patch.msgid.link/20250310223839.31342-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/cxl/core/ras.c | 37 +++++++++++++++++++++++++++++++ drivers/cxl/core/trace.h | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 91273af6ccbc..485a831695c7 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -7,6 +7,30 @@ #include #include "trace.h" +static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; + + trace_cxl_port_aer_correctable_error(&pdev->dev, status); +} + +static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + u32 fe; + + if (hweight32(status) > 1) + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + ras_cap.cap_control)); + else + fe = status; + + trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, + ras_cap.header_log); +} + static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, struct cxl_ras_capability_regs ras_cap) { @@ -49,12 +73,25 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, data->prot_err.agent_addr.bus, devfn); + int port_type; if (!pdev) return; guard(device)(&pdev->dev); + port_type = pci_pcie_type(pdev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT || + port_type == PCI_EXP_TYPE_DOWNSTREAM || + port_type == PCI_EXP_TYPE_UPSTREAM) { + if (data->severity == AER_CORRECTABLE) + cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap); + else + cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap); + + return; + } + if (data->severity == AER_CORRECTABLE) cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); else diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index cea706b683b5..342ae8d50f32 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -48,6 +48,34 @@ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ ) +TRACE_EVENT(cxl_port_aer_uncorrectable_error, + TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), + TP_ARGS(dev, status, fe, hl), + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __string(host, dev_name(dev->parent)) + __field(u32, status) + __field(u32, first_error) + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) + ), + TP_fast_assign( + __assign_str(device); + __assign_str(host); + __entry->status = status; + __entry->first_error = fe; + /* + * Embed the 512B headerlog data for user app retrieval and + * parsing, but no need to print this in the trace buffer. + */ + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); + ), + TP_printk("device=%s host=%s status: '%s' first_error: '%s'", + __get_str(device), __get_str(host), + show_uc_errs(__entry->status), + show_uc_errs(__entry->first_error) + ) +); + TRACE_EVENT(cxl_aer_uncorrectable_error, TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), TP_ARGS(cxlmd, status, fe, hl), @@ -96,6 +124,25 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ ) +TRACE_EVENT(cxl_port_aer_correctable_error, + TP_PROTO(struct device *dev, u32 status), + TP_ARGS(dev, status), + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __string(host, dev_name(dev->parent)) + __field(u32, status) + ), + TP_fast_assign( + __assign_str(device); + __assign_str(host); + __entry->status = status; + ), + TP_printk("device=%s host=%s status='%s'", + __get_str(device), __get_str(host), + show_ce_errs(__entry->status) + ) +); + TRACE_EVENT(cxl_aer_correctable_error, TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), TP_ARGS(cxlmd, status), From eeba74747a6634c59887750efcf534b335899993 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:47 +0800 Subject: [PATCH 0757/2157] cxl/core: Use guard() to replace open-coded down_read/write() Some down/up_read() and down/up_write() cases can be replaced by a guard() simply to drop explicit unlock invoked. It helps to align coding style with current CXL subsystem's. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-2-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 15 +++++---------- drivers/cxl/core/memdev.c | 9 +++------ drivers/cxl/core/port.c | 8 ++------ drivers/cxl/core/region.c | 8 +++----- 4 files changed, 13 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 50e6a45b30ba..4a578092377e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -213,13 +213,12 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) { struct resource *p1, *p2; - down_read(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_dpa_rwsem); for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { __cxl_dpa_debug(file, p1, 0); for (p2 = p1->child; p2; p2 = p2->sibling) __cxl_dpa_debug(file, p2, 1); } - up_read(&cxl_dpa_rwsem); } EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL"); @@ -250,9 +249,8 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) static void cxl_dpa_release(void *cxled) { - down_write(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_dpa_rwsem); __cxl_dpa_release(cxled); - up_write(&cxl_dpa_rwsem); } /* @@ -362,14 +360,11 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL"); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { - resource_size_t size = 0; - - down_read(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_dpa_rwsem); if (cxled->dpa_res) - size = resource_size(cxled->dpa_res); - up_read(&cxl_dpa_rwsem); + return resource_size(cxled->dpa_res); - return size; + return 0; } resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index ae3dfcbe8938..98c05426aa4a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -564,10 +564,9 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { - down_write(&cxl_memdev_rwsem); + guard(rwsem_write)(&cxl_memdev_rwsem); bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, CXL_MEM_COMMAND_ID_MAX); - up_write(&cxl_memdev_rwsem); } EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); @@ -579,10 +578,9 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { - down_write(&cxl_memdev_rwsem); + guard(rwsem_write)(&cxl_memdev_rwsem); bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, CXL_MEM_COMMAND_ID_MAX); - up_write(&cxl_memdev_rwsem); } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL"); @@ -590,9 +588,8 @@ static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - down_write(&cxl_memdev_rwsem); + guard(rwsem_write)(&cxl_memdev_rwsem); cxlmd->cxlds = NULL; - up_write(&cxl_memdev_rwsem); } static void cxl_memdev_unregister(void *_cxlmd) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..2c59d65bc18b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -549,13 +549,9 @@ static ssize_t decoders_committed_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_port *port = to_cxl_port(dev); - int rc; - down_read(&cxl_region_rwsem); - rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); - up_read(&cxl_region_rwsem); - - return rc; + guard(rwsem_read)(&cxl_region_rwsem); + return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); } static DEVICE_ATTR_RO(decoders_committed); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e8d11a988fd9..d8a71f9f9fa5 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3208,7 +3208,6 @@ static int match_region_by_range(struct device *dev, const void *data) struct cxl_region_params *p; struct cxl_region *cxlr; const struct range *r = data; - int rc = 0; if (!is_cxl_region(dev)) return 0; @@ -3216,12 +3215,11 @@ static int match_region_by_range(struct device *dev, const void *data) cxlr = to_cxl_region(dev); p = &cxlr->params; - down_read(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_region_rwsem); if (p->res && p->res->start == r->start && p->res->end == r->end) - rc = 1; - up_read(&cxl_region_rwsem); + return 1; - return rc; + return 0; } /* Establish an empty region covering the given HPA range */ From 3ad4f59f38965071e429ace93c75a94a2ede5456 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:48 +0800 Subject: [PATCH 0758/2157] cxl/core: cxl_mem_sanitize() cleanup In cxl_mem_sanitize(), the down_read() and up_read() for cxl_region_rwsem can be simply replaced by a guard(rwsem_read), and the local variable 'rc' can be removed. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-3-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..0601297af0c9 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1222,23 +1222,19 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) { struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_port *endpoint; - int rc; /* synchronize with cxl_mem_probe() and decoder write operations */ guard(device)(&cxlmd->dev); endpoint = cxlmd->endpoint; - down_read(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_region_rwsem); /* * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. */ if (endpoint && cxl_num_decoders_committed(endpoint) == 0) - rc = __cxl_mem_sanitize(mds, cmd); - else - rc = -EBUSY; - up_read(&cxl_region_rwsem); + return __cxl_mem_sanitize(mds, cmd); - return rc; + return -EBUSY; } static int add_dpa_res(struct device *dev, struct resource *parent, From a58afda8bfd4a113295f0e12c0697c35a69614b2 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:49 +0800 Subject: [PATCH 0759/2157] cxl/memdev: cxl_memdev_ioctl() cleanup In cxl_memdev_ioctl(), the down_read(&cxl_memdev_rwsem) and up_read(&cxl_memdev_rwsem) can be replaced by a guard(rwsem_read)(&cxl_memdev_rwsem), it helps to remove the open-coded up_read(&cxl_memdev_rwsem). Besides, the local var 'rc' can be also removed to make the code more cleaner. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-4-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/memdev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 98c05426aa4a..6f90dcd626f9 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -668,15 +668,13 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, { struct cxl_memdev *cxlmd = file->private_data; struct cxl_dev_state *cxlds; - int rc = -ENXIO; - down_read(&cxl_memdev_rwsem); + guard(rwsem_read)(&cxl_memdev_rwsem); cxlds = cxlmd->cxlds; if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM) - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); - up_read(&cxl_memdev_rwsem); + return __cxl_memdev_ioctl(cxlmd, cmd, arg); - return rc; + return -ENXIO; } static int cxl_memdev_open(struct inode *inode, struct file *file) From 16fe6ec4ac3d828b3976bd36e4d99af73f8e43d2 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:50 +0800 Subject: [PATCH 0760/2157] cxl/core: Use guard() to drop the goto pattern of cxl_dpa_free() cxl_dpa_free() has a goto pattern to call up_write() for cxl_dpa_rwsem, it can be removed by using a guard() to replace the down_write() and up_write(). Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-5-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 4a578092377e..874a791f8292 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -382,35 +382,27 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); struct device *dev = &cxled->cxld.dev; - int rc; - down_write(&cxl_dpa_rwsem); - if (!cxled->dpa_res) { - rc = 0; - goto out; - } + guard(rwsem_write)(&cxl_dpa_rwsem); + if (!cxled->dpa_res) + return 0; if (cxled->cxld.region) { dev_dbg(dev, "decoder assigned to: %s\n", dev_name(&cxled->cxld.region->dev)); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { dev_dbg(dev, "decoder enabled\n"); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.id != port->hdm_end) { dev_dbg(dev, "expected decoder%d.%d\n", port->id, port->hdm_end); - rc = -EBUSY; - goto out; + return -EBUSY; } + devm_cxl_dpa_release(cxled); - rc = 0; -out: - up_write(&cxl_dpa_rwsem); - return rc; + return 0; } int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, From a81ebe7d19b6fdc6de0159878fbed9945120813e Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:51 +0800 Subject: [PATCH 0761/2157] cxl/core: Use guard() to drop goto pattern of cxl_dpa_alloc() In cxl_dpa_alloc(), some checking and operations need to be protected by a rwsem called cxl_dpa_rwsem, so there is a goto pattern in cxl_dpa_alloc() to release the rwsem. The goto pattern can be optimized by using guard() to hold the rwsem. Creating a new function called __cxl_dpa_alloc() to include all checking and operations needed to be protected by cxl_dpa_rwsem. Using guard(rwsem_write()) to hold cxl_dpa_rwsem at the beginning of the new function. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-6-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 874a791f8292..1edbf7873471 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -442,29 +442,25 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, return 0; } -int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) +static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); resource_size_t free_ram_start, free_pmem_start; - struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; resource_size_t start, avail, skip; struct resource *p, *last; - int rc; - down_write(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_dpa_rwsem); if (cxled->cxld.region) { dev_dbg(dev, "decoder attached to %s\n", dev_name(&cxled->cxld.region->dev)); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { dev_dbg(dev, "decoder enabled\n"); - rc = -EBUSY; - goto out; + return -EBUSY; } for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) @@ -504,21 +500,24 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) skip = skip_end - skip_start + 1; } else { dev_dbg(dev, "mode not set\n"); - rc = -EINVAL; - goto out; + return -EINVAL; } if (size > avail) { dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, cxl_decoder_mode_name(cxled->mode), &avail); - rc = -ENOSPC; - goto out; + return -ENOSPC; } - rc = __cxl_dpa_reserve(cxled, start, size, skip); -out: - up_write(&cxl_dpa_rwsem); + return __cxl_dpa_reserve(cxled, start, size, skip); +} +int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) +{ + struct cxl_port *port = cxled_to_port(cxled); + int rc; + + rc = __cxl_dpa_alloc(cxled, size); if (rc) return rc; From 9e7b7ab5af69aaa5cd027656529663407da61e6f Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:24:52 +0800 Subject: [PATCH 0762/2157] cxl/region: Drop goto pattern in cxl_dax_region_alloc() In cxl_dax_region_alloc(), there is a goto pattern to release the rwsem cxl_region_rwsem when the function returns, the down_read() and up_read can be replaced by a guard(rwsem_read) then the goto pattern can be removed. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221012453.126366-7-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d8a71f9f9fa5..320a3f218131 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3038,17 +3038,13 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) struct cxl_dax_region *cxlr_dax; struct device *dev; - down_read(&cxl_region_rwsem); - if (p->state != CXL_CONFIG_COMMIT) { - cxlr_dax = ERR_PTR(-ENXIO); - goto out; - } + guard(rwsem_read)(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) + return ERR_PTR(-ENXIO); cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); - if (!cxlr_dax) { - cxlr_dax = ERR_PTR(-ENOMEM); - goto out; - } + if (!cxlr_dax) + return ERR_PTR(-ENOMEM); cxlr_dax->hpa_range.start = p->res->start; cxlr_dax->hpa_range.end = p->res->end; @@ -3061,8 +3057,6 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) dev->parent = &cxlr->dev; dev->bus = &cxl_bus_type; dev->type = &cxl_dax_region_type; -out: - up_read(&cxl_region_rwsem); return cxlr_dax; } From 5ec67596e368cdddddd6770fc2dd2e577e82fbe8 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 21 Feb 2025 09:32:05 +0800 Subject: [PATCH 0763/2157] cxl/region: Drop goto pattern of construct_region() Some operations need to be protected by the cxl_region_rwsem in construct_region(). Currently, construct_region() uses down_write() and up_write() for the cxl_region_rwsem locking, so there is a goto pattern after down_write() invoked to release cxl_region_rwsem. construct region() can be optimized to remove the goto pattern. The changes are creating a new function called __construct_region() which will include all checking and operations protected by the cxl_region_rwsem, and using guard(rwsem_write) to replace down_write() and up_write() in __construct_region(). Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Acked-by: Davidlohr Bueso Signed-off-by: Li Ming Link: https://patch.msgid.link/20250221013205.126419-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 71 +++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 320a3f218131..a750107a3bff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3216,49 +3216,31 @@ static int match_region_by_range(struct device *dev, const void *data) return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) +static int __construct_region(struct cxl_region *cxlr, + struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct range *hpa = &cxled->cxld.hpa_range; struct cxl_region_params *p; - struct cxl_region *cxlr; struct resource *res; int rc; - do { - cxlr = __create_region(cxlrd, cxled->mode, - atomic_read(&cxlrd->region_id)); - } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - - if (IS_ERR(cxlr)) { - dev_err(cxlmd->dev.parent, - "%s:%s: %s failed assign region: %ld\n", - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), - __func__, PTR_ERR(cxlr)); - return cxlr; - } - - down_write(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_region_rwsem); p = &cxlr->params; if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_err(cxlmd->dev.parent, "%s:%s: %s autodiscovery interrupted\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__); - rc = -EBUSY; - goto err; + return -EBUSY; } set_bit(CXL_REGION_F_AUTO, &cxlr->flags); res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) { - rc = -ENOMEM; - goto err; - } + if (!res) + return -ENOMEM; *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), dev_name(&cxlr->dev)); @@ -3281,7 +3263,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); if (rc) - goto err; + return rc; dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, @@ -3290,14 +3272,39 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, /* ...to match put_device() in cxl_add_to_region() */ get_device(&cxlr->dev); - up_write(&cxl_region_rwsem); + + return 0; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_region *cxlr; + int rc; + + do { + cxlr = __create_region(cxlrd, cxled->mode, + atomic_read(&cxlrd->region_id)); + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); + + if (IS_ERR(cxlr)) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s failed assign region: %ld\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, PTR_ERR(cxlr)); + return cxlr; + } + + rc = __construct_region(cxlr, cxlrd, cxled); + if (rc) { + devm_release_action(port->uport_dev, unregister_region, cxlr); + return ERR_PTR(rc); + } return cxlr; - -err: - up_write(&cxl_region_rwsem); - devm_release_action(port->uport_dev, unregister_region, cxlr); - return ERR_PTR(rc); } int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) From 16ca2f5431ee7c003ae3ba3b0c4d4ddc57670b44 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 6 Feb 2025 13:34:00 -0600 Subject: [PATCH 0764/2157] cxl/memdev: Remove unused partition values The next volatile and next persistent values are unused and are cluttering the cxl_memdev_state. Remove these values. Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Reviewed-by: Fan Ni Signed-off-by: Ira Weiny Link: https://patch.msgid.link/20250206-cxl-cleanup-v1-1-9ddf26dd8433@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 4 ---- drivers/cxl/cxlmem.h | 4 ---- 2 files changed, 8 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 62bb3653362f..998e1df36db6 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1097,10 +1097,6 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER; mds->active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER; - mds->next_volatile_bytes = - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; - mds->next_persistent_bytes = - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; return 0; } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 36a091597066..2baf29a0afce 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -506,8 +506,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @partition_align_bytes: alignment size for partition-able capacity * @active_volatile_bytes: sum of hard + soft volatile * @active_persistent_bytes: sum of hard + soft persistent - * @next_volatile_bytes: volatile capacity change pending device reset - * @next_persistent_bytes: persistent capacity change pending device reset * @event: event log driver state * @poison: poison driver state info * @security: security driver state info @@ -528,8 +526,6 @@ struct cxl_memdev_state { u64 partition_align_bytes; u64 active_volatile_bytes; u64 active_persistent_bytes; - u64 next_volatile_bytes; - u64 next_persistent_bytes; struct cxl_event_state event; struct cxl_poison_state poison; From e0feac20d150949dc8b74c1c5998dea70d19bf35 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Tue, 11 Feb 2025 14:20:54 +0800 Subject: [PATCH 0765/2157] cxl/cdat: Remove redundant gp_port initialization gp_port is already pointed to the grandparent port during its definition, remove a redundant code to let gp_port point to the grandparent port again. Signed-off-by: Li Ming Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/20250211062054.300108-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/cdat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index bfba7f5019cf..edb4f41eeacc 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -658,7 +658,6 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, if (IS_ERR(perf)) return PTR_ERR(perf); - gp_port = to_cxl_port(parent_port->dev.parent); *gp_is_root = is_cxl_root(gp_port); /* From 2da9ad027e8094c0944b7dfc28c9e3db368d61cc Mon Sep 17 00:00:00 2001 From: Yuquan Wang Date: Wed, 19 Feb 2025 12:00:29 +0800 Subject: [PATCH 0766/2157] cxl/pmem: debug invalid serial number data In a nvdimm interleave-set each device with an invalid or zero serial number may cause pmem region initialization to fail, but in cxl case such device could still set cookies of nd_interleave_set and create a nvdimm pmem region. This adds the validation of serial number in cxl pmem region creation. The event of no serial number would cause to fail to set the cookie and pmem region. For cxl-test to work properly, always +1 on mock device's serial number. Signed-off-by: Yuquan Wang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250219040029.515451-2-wangyuquan1236@phytium.com.cn Signed-off-by: Dave Jiang --- drivers/cxl/pmem.c | 12 ++++++++++-- tools/testing/cxl/test/mem.c | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index f9c95996e937..11c5a65acacf 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -375,6 +375,16 @@ static int cxl_pmem_region_probe(struct device *dev) goto out_nvd; } + if (cxlds->serial == 0) { + /* include missing alongside invalid in this error message. */ + dev_err(dev, "%s: invalid or missing serial number\n", + dev_name(&cxlmd->dev)); + rc = -ENXIO; + goto out_nvd; + } + info[i].serial = cxlds->serial; + info[i].offset = m->start; + m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -382,8 +392,6 @@ static int cxl_pmem_region_probe(struct device *dev) .size = m->size, .position = i, }; - info[i].offset = m->start; - info[i].serial = cxlds->serial; } ndr_desc.num_mappings = cxlr_pmem->nr_mappings; ndr_desc.mapping = mappings; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 495199238335..4cbfafdf5371 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1534,7 +1534,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id; + cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; From eb8081bcc53fdf951676ee199ed2a8f60d16f0ce Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 18 Feb 2025 14:48:52 -0800 Subject: [PATCH 0767/2157] cxl: Plug typos in ABI doc Trivially update where necessary. Reviewed-by: Jonathan Cameron Signed-off-by: Davidlohr Bueso Reviewed-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250218224853.67457-2-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 3f5627a1210a..7bdf0eb79d7c 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -1,5 +1,5 @@ What: /sys/bus/cxl/flush -Date: Januarry, 2022 +Date: January, 2022 KernelVersion: v5.18 Contact: linux-cxl@vger.kernel.org Description: @@ -33,7 +33,7 @@ Date: May, 2023 KernelVersion: v6.8 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" + (RO) For CXL host platforms that support "QoS Telemetry" this attribute conveys a comma delimited list of platform specific cookies that identifies a QoS performance class for the volatile partition of the CXL mem device. These @@ -60,7 +60,7 @@ Date: May, 2023 KernelVersion: v6.8 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" + (RO) For CXL host platforms that support "QoS Telemetry" this attribute conveys a comma delimited list of platform specific cookies that identifies a QoS performance class for the persistent partition of the CXL mem device. These @@ -423,7 +423,7 @@ Date: May, 2023 KernelVersion: v6.5 Contact: linux-cxl@vger.kernel.org Description: - (RO) For CXL host platforms that support "QoS Telemmetry" this + (RO) For CXL host platforms that support "QoS Telemetry" this root-decoder-only attribute conveys a platform specific cookie that identifies a QoS performance class for the CXL Window. This class-id can be compared against a similar "qos_class" From 17218b02283a8b0de199cfbad079417c8e9de5c9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 18 Feb 2025 14:48:53 -0800 Subject: [PATCH 0768/2157] cxl: Document missing sysfs files Add to the ABI documentation the payload_max and label_storage_size read-only files, which have been there since the early days. Signed-off-by: Davidlohr Bueso Reviewed-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250218224853.67457-3-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 7bdf0eb79d7c..04a880bd1dde 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -18,6 +18,24 @@ Description: specification. +What: /sys/bus/cxl/devices/memX/payload_max +Date: December, 2020 +KernelVersion: v5.12 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) Maximum size (in bytes) of the mailbox command payload + registers. Linux caps this at 1MB if the device reports a + larger size. + + +What: /sys/bus/cxl/devices/memX/label_storage_size +Date: May, 2021 +KernelVersion: v5.13 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) Size (in bytes) of the Label Storage Area (LSA). + + What: /sys/bus/cxl/devices/memX/ram/size Date: December, 2020 KernelVersion: v5.12 From a52b6a2c1c997b5047a724ccde955910f6150a97 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 24 Jan 2025 15:35:33 -0800 Subject: [PATCH 0769/2157] cxl/pci: Support Global Persistent Flush (GPF) Add support for GPF flows. It is found that the CXL specification around this to be a bit too involved from the driver side. And while this should really all handled by the hardware, this patch takes things with a grain of salt. Upon respective port enumeration, both phase timeouts are set to a max of 20 seconds, which is the NMI watchdog default for lockup detection. The premise is that the kernel does not have enough information to set anything better than a max across the board and hope devices finish their GPF flows within the platform energy budget. Timeout detection is based on dirty Shutdown semantics. The driver will mark it as dirty, expecting that the device clear it upon a successful GPF event. The admin may consult the device Health and check the dirty shutdown counter to see if there was a problem with data integrity. [ davej: Explicitly set return to 0 in update_gpf_port_dvsec() ] [ davej: Add spec reference for 'struct cxl_mbox_set_shutdown_state_in ] [ davej: Fix 0-day reported issue ] Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250124233533.910535-1-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/maturity-map.rst | 2 +- drivers/cxl/core/core.h | 2 + drivers/cxl/core/mbox.c | 18 ++++ drivers/cxl/core/pci.c | 87 +++++++++++++++++++ drivers/cxl/core/port.c | 2 + drivers/cxl/cxl.h | 2 + drivers/cxl/cxlmem.h | 6 ++ drivers/cxl/cxlpci.h | 6 ++ drivers/cxl/pmem.c | 8 ++ 9 files changed, 132 insertions(+), 1 deletion(-) diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index df8e2ac2a320..99dd2c841e69 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels -* [0] PMEM GPF / Dirty Shutdown +* [1] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..8f2eb76a3c8c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -115,4 +115,6 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..5b89ae5c5e28 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,6 +1308,24 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_set_shutdown_state_in in = { + .state = 1 + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, + .size_in = sizeof(in), + .payload_in = &in, + }; + + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_dirty_shutdown_state, "CXL"); + int cxl_set_timestamp(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 013b869b66cb..a5c65f79db18 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1054,3 +1054,90 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) return 0; } + +/* + * Set max timeout such that platforms will optimize GPF flow to avoid + * the implied worst-case scenario delays. On a sane platform, all + * devices should always complete GPF within the energy budget of + * the GPF flow. The kernel does not have enough information to pick + * anything better than "maximize timeouts and hope it works". + * + * A misbehaving device could block forward progress of GPF for all + * the other devices, exhausting the energy budget of the platform. + * However, the spec seems to assume that moving on from slow to respond + * devices is a virtue. It is not possible to know that, in actuality, + * the slow to respond device is *the* most critical device in the + * system to wait. + */ +#define GPF_TIMEOUT_BASE_MAX 2 +#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ + +static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) +{ + u64 base, scale; + int rc, offset; + u16 ctrl; + + switch (phase) { + case 1: + offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + break; + case 2: + offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + break; + default: + return -EINVAL; + } + + rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); + if (rc) + return rc; + + if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && + FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) + return 0; + + ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); + ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); + + rc = pci_write_config_word(pdev, dvsec + offset, ctrl); + if (!rc) + pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", + phase, GPF_TIMEOUT_BASE_MAX); + + return rc; +} + +int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dport_dev)) + return 0; + + pdev = to_pci_dev(dport_dev); + if (!pdev || !port) + return -EINVAL; + + if (!port->gpf_dvsec) { + int dvsec; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PORT_GPF); + if (!dvsec) { + pci_warn(pdev, "Port GPF DVSEC not present\n"); + return -EINVAL; + } + + port->gpf_dvsec = dvsec; + } + + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); + + return 0; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..95cd6f11bbfa 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1672,6 +1672,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) if (rc && rc != -EBUSY) return rc; + cxl_gpf_port_setup(dport_dev, port); + /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) continue; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bbbaa0d0a670..55af041df7b2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -610,6 +610,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -633,6 +634,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + int gpf_dvsec; }; /** diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..5d49e0a93426 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -693,6 +693,11 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ +struct cxl_mbox_set_shutdown_state_in { + u8 state; +} __packed; + /* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ struct cxl_mbox_set_timestamp_in { __le64 timestamp; @@ -829,6 +834,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); +int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 4da07727ab9c..54e219b0049e 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -40,6 +40,12 @@ /* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ #define CXL_DVSEC_PORT_GPF 4 +#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) +#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) /* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ #define CXL_DVSEC_DEVICE_GPF 5 diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index f9c95996e937..a39e2c52d7ab 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -85,6 +85,14 @@ static int cxl_nvdimm_probe(struct device *dev) if (!nvdimm) return -ENOMEM; + /* + * Set dirty shutdown now, with the expectation that the device + * clear it upon a successful GPF flow. The exception to this + * is upon Viral detection, per CXL 3.2 section 12.4.2. + */ + if (cxl_dirty_shutdown_state(mds)) + dev_warn(dev, "GPF: could not dirty shutdown state\n"); + dev_set_drvdata(dev, nvdimm); return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); } From 021b7e42fa7bc2c30a4bf676355f1079aa0fe6be Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:32 -0800 Subject: [PATCH 0770/2157] cxl/pci: Introduce cxl_gpf_get_dvsec() Add a helper to fetch the port/device GPF dvsecs. This is currently only used for ports, but a later patch to export dirty count to users will make use of the device one. Signed-off-by: Davidlohr Bueso Reviewed-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-2-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 30 ++++++++++++++++++++---------- drivers/cxl/cxl.h | 2 ++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a5c65f79db18..96fecb799cbc 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,6 +1072,22 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ +u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +{ + u16 dvsec; + + if (!dev_is_pci(dev)) + return 0; + + dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + if (!dvsec) + dev_warn(dev, "%s GPF DVSEC not present\n", + is_port ? "Port" : "Device"); + return dvsec; +} +EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); + static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) { u64 base, scale; @@ -1116,26 +1132,20 @@ int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) { struct pci_dev *pdev; - if (!dev_is_pci(dport_dev)) - return 0; - - pdev = to_pci_dev(dport_dev); - if (!pdev || !port) + if (!port) return -EINVAL; if (!port->gpf_dvsec) { int dvsec; - dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PORT_GPF); - if (!dvsec) { - pci_warn(pdev, "Port GPF DVSEC not present\n"); + dvsec = cxl_gpf_get_dvsec(dport_dev, true); + if (!dvsec) return -EINVAL; - } port->gpf_dvsec = dvsec; } + pdev = to_pci_dev(dport_dev); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 55af041df7b2..fc4edd5536b9 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -922,4 +922,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif +u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); + #endif /* __CXL_H__ */ From 86349aaaeacd6855914ee1b5a76ef0952fa134eb Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:33 -0800 Subject: [PATCH 0771/2157] cxl/pmem: Rename cxl_dirty_shutdown_state() ... to a better suited 'cxl_arm_dirty_shutdown()'. Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Li Ming Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-3-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 4 ++-- drivers/cxl/cxlmem.h | 2 +- drivers/cxl/pmem.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 5b89ae5c5e28..3687c8da1927 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,7 +1308,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); -int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; @@ -1324,7 +1324,7 @@ int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } -EXPORT_SYMBOL_NS_GPL(cxl_dirty_shutdown_state, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d49e0a93426..0e2df6904454 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -834,7 +834,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); -int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds); +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index a39e2c52d7ab..6b284962592f 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -90,7 +90,7 @@ static int cxl_nvdimm_probe(struct device *dev) * clear it upon a successful GPF flow. The exception to this * is upon Viral detection, per CXL 3.2 section 12.4.2. */ - if (cxl_dirty_shutdown_state(mds)) + if (cxl_arm_dirty_shutdown(mds)) dev_warn(dev, "GPF: could not dirty shutdown state\n"); dev_set_drvdata(dev, nvdimm); From 7d0ecc0bd83dc2b2f46087f955c9572073e45aca Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:34 -0800 Subject: [PATCH 0772/2157] cxl/pmem: Export dirty shutdown count via sysfs Similar to how the acpi_nfit driver exports Optane dirty shutdown count, introduce: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown Under the conditions that 1) dirty shutdown can be set, 2) Device GPF DVSEC exists, and 3) the count itself can be retrieved. Suggested-by: Dan Williams Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-4-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 12 +++ Documentation/driver-api/cxl/maturity-map.rst | 2 +- drivers/cxl/core/mbox.c | 21 +++++ drivers/cxl/cxl.h | 1 + drivers/cxl/cxlmem.h | 13 ++++ drivers/cxl/pmem.c | 77 +++++++++++++++++-- 6 files changed, 117 insertions(+), 9 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 3f5627a1210a..a7491d214098 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -586,3 +586,15 @@ Description: See Documentation/ABI/stable/sysfs-devices-node. access0 provides the number to the closest initiator and access1 provides the number to the closest CPU. + + +What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown +Date: Feb, 2025 +KernelVersion: v6.15 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The device dirty shutdown count value, which is the number + of times the device could have incurred in potential data loss. + The count is persistent across power loss and wraps back to 0 + upon overflow. If this file is not present, the device does not + have the necessary support for dirty tracking. diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index 99dd2c841e69..a2288f9df658 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels -* [1] PMEM GPF / Dirty Shutdown +* [3] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 3687c8da1927..ec7b70ae5c06 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,6 +1308,27 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_get_health_info_out hi; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, + .size_out = sizeof(hi), + .payload_out = &hi, + }; + + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (!rc) + *count = le32_to_cpu(hi.dirty_shutdown_cnt); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); + int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index fc4edd5536b9..b265347cedce 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -563,6 +563,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ + u64 dirty_shutdowns; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 0e2df6904454..236e418d26f4 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -693,6 +693,18 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */ +struct cxl_mbox_get_health_info_out { + u8 health_status; + u8 media_status; + u8 additional_status; + u8 life_used; + __le16 device_temperature; + __le32 dirty_shutdown_cnt; + __le32 corrected_volatile_error_cnt; + __le32 corrected_persistent_error_cnt; +} __packed; + /* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ struct cxl_mbox_set_shutdown_state_in { u8 state; @@ -834,6 +846,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count); int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 6b284962592f..81db5d629049 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -42,15 +42,44 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char * } static DEVICE_ATTR_RO(id); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns); +} +static DEVICE_ATTR_RO(dirty_shutdown); + static struct attribute *cxl_dimm_attributes[] = { &dev_attr_id.attr, &dev_attr_provider.attr, + &dev_attr_dirty_shutdown.attr, NULL }; +#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX +static umode_t cxl_dimm_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + if (a == &dev_attr_dirty_shutdown.attr) { + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + if (cxl_nvd->dirty_shutdowns == + CXL_INVALID_DIRTY_SHUTDOWN_COUNT) + return 0; + } + + return a->mode; +} + static const struct attribute_group cxl_dimm_attribute_group = { .name = "cxl", .attrs = cxl_dimm_attributes, + .is_visible = cxl_dimm_visible }; static const struct attribute_group *cxl_dimm_attribute_groups[] = { @@ -58,6 +87,38 @@ static const struct attribute_group *cxl_dimm_attribute_groups[] = { NULL }; +static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) +{ + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = &cxl_nvd->dev; + u32 count; + + /* + * Dirty tracking is enabled and exposed to the user, only when: + * - dirty shutdown on the device can be set, and, + * - the device has a Device GPF DVSEC (albeit unused), and, + * - the Get Health Info cmd can retrieve the device's dirty count. + */ + cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT; + + if (cxl_arm_dirty_shutdown(mds)) { + dev_warn(dev, "GPF: could not set dirty shutdown state\n"); + return; + } + + if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + return; + + if (cxl_get_dirty_count(mds, &count)) { + dev_warn(dev, "GPF: could not retrieve dirty count\n"); + return; + } + + cxl_nvd->dirty_shutdowns = count; +} + static int cxl_nvdimm_probe(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); @@ -78,20 +139,20 @@ static int cxl_nvdimm_probe(struct device *dev) set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); - nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, - cxl_dimm_attribute_groups, flags, - cmd_mask, 0, NULL, cxl_nvd->dev_id, - cxl_security_ops, NULL); - if (!nvdimm) - return -ENOMEM; /* * Set dirty shutdown now, with the expectation that the device * clear it upon a successful GPF flow. The exception to this * is upon Viral detection, per CXL 3.2 section 12.4.2. */ - if (cxl_arm_dirty_shutdown(mds)) - dev_warn(dev, "GPF: could not dirty shutdown state\n"); + cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd); + + nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, + cxl_dimm_attribute_groups, flags, + cmd_mask, 0, NULL, cxl_nvd->dev_id, + cxl_security_ops, NULL); + if (!nvdimm) + return -ENOMEM; dev_set_drvdata(dev, nvdimm); return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); From 6eb52f63ea47c6aa7f820262911be47602e12da6 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:35 -0800 Subject: [PATCH 0773/2157] tools/testing/cxl: Set Shutdown State support Add support to emulate the CXL Set Shutdown State operation. Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Li Ming Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-5-dave@stgolabs.net Signed-off-by: Dave Jiang --- tools/testing/cxl/test/mem.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..c99105dabe30 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -65,6 +65,10 @@ static struct cxl_cel_entry mock_cel[] = { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), .effect = CXL_CMD_EFFECT_NONE, }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE), + .effect = POLICY_CHANGE_IMMEDIATE, + }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), .effect = CXL_CMD_EFFECT_NONE, @@ -161,6 +165,7 @@ struct cxl_mockmem_data { u8 event_buf[SZ_4K]; u64 timestamp; unsigned long sanitize_timeout; + u8 shutdown_state; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -1088,6 +1093,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd) return 0; } +static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in; + + if (cmd->size_in != sizeof(*ss)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + mdata->shutdown_state = ss->state; + return 0; +} + static struct mock_poison { struct cxl_dev_state *cxlds; u64 dpa; @@ -1421,6 +1441,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: rc = mock_passphrase_secure_erase(mdata, cmd); break; + case CXL_MBOX_OP_SET_SHUTDOWN_STATE: + rc = mock_set_shutdown_state(mdata, cmd); + break; case CXL_MBOX_OP_GET_POISON: rc = mock_get_poison(cxlds, cmd); break; From 84f8b6e242deb997f2b32b4fc0895e8703704029 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Thu, 27 Feb 2025 18:18:48 +0800 Subject: [PATCH 0774/2157] cxl/mem: Do not return error if CONFIG_CXL_MCE unset CONFIG_CXL_MCE depends on CONFIG_MEMORY_FAILURE, if CONFIG_CXL_MCE is not set, devm_cxl_register_mce_notifier() will return an -EOPNOTSUPP, it will cause cxl_mem_state_create() failure , and then cxl pci device probing failed. In this case, it should not break cxl pci device probing. Add a checking in cxl_mem_state_create() to check if the returned value of devm_cxl_register_mce_notifier() is -EOPNOTSUPP, if yes, just output a warning log, do not let cxl_mem_state_create() return an error. Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250227101848.388595-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index e088c6ba1705..7450c4df522e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1473,7 +1473,9 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); - if (rc) + if (rc == -EOPNOTSUPP) + dev_warn(dev, "CXL MCE unsupported\n"); + else if (rc) return ERR_PTR(rc); return mds; From 114a89b433aa899cdcc867bb26806a41aae505ee Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 26 Feb 2025 14:19:27 -0800 Subject: [PATCH 0775/2157] cxl/test: Define a CFMWS capable of a 3 way HB interleave The CXL unit test cxl-xor-region.sh is skipping a 1+1+1 region interleave test case because the window is not defined. Additionally, upcoming expansion of 3 way HB interleave test cases (like 2+2+2) require the same window. Replace an unused CFMWS with a 3-way capable CFMWS in the set of CFMWS's loaded when interleave_arithmetic=1. Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Tested-by: Li Zhijian Link: https://patch.msgid.link/20250226221931.2352061-1-alison.schofield@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/cxl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 083a66a52731..1c3336095923 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -155,7 +155,7 @@ static struct { } cfmws7; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[3]; } cfmws8; struct { struct acpi_cedt_cxims cxims; @@ -331,14 +331,14 @@ static struct { .length = sizeof(mock_cedt.cfmws8), }, .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, - .interleave_ways = 2, - .granularity = 0, + .interleave_ways = 8, + .granularity = 1, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, - .window_size = SZ_256M * 16UL, + .window_size = SZ_512M * 6UL, }, - .target = { 0, 1, 0, 1, }, + .target = { 0, 1, 2, }, }, .cxims0 = { .cxims = { From 3d3e3b94440631179b7b6ffb1a64b944b27519c1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 28 Feb 2025 13:47:39 -0700 Subject: [PATCH 0776/2157] cxl: Fix warning from emitting resource_size_t as long long int on 32bit systems Reported by kernel test bot from an ARM build: drivers/cxl/core/region.c:3263:26: warning: format '%lld' expects argument of type 'long long int', but argument 3 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=] On a 32bit system, resource_size_t is defined as 32bit long vs on a 64bit system it is defined as 64bit long. Use %pa format to deal with resource_size_t. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503010252.mIDhZ5kY-lkp@intel.com/ Fixes: 0ec9849b6333 ("acpi/hmat / cxl: Add extended linear cache support for CXL") Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250228204739.3849309-1-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 80ba19cf3094..a09ae21a26c6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3251,8 +3251,8 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, if (size != cache_size) { dev_warn(&cxlr->dev, - "Extended Linear Cache size %lld != CXL size %lld. No Support!", - cache_size, size); + "Extended Linear Cache size %pa != CXL size %pa. No Support!", + &cache_size, &size); return -EOPNOTSUPP; } From 962ac4c83e81e38b0761f31b500b398cfbc33857 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 24 Feb 2025 12:29:29 -0600 Subject: [PATCH 0777/2157] cxl/Documentation: Remove 'mixed' from sysfs mode doc Commit 188e9529a606 ("cxl: Remove the CXL_DECODER_MIXED mistake") removed the mixed mode. Remove it from the sysfs documentation. Cc: Dan Williams Signed-off-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250224-remove-mixed-sysfs-v1-1-a329db313dac@intel.com Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 6d911f046a78..99bb3faf7a0e 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -339,14 +339,13 @@ KernelVersion: v6.0 Contact: linux-cxl@vger.kernel.org Description: (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it - translates from a host physical address range, to a device local - address range. Device-local address ranges are further split - into a 'ram' (volatile memory) range and 'pmem' (persistent - memory) range. The 'mode' attribute emits one of 'ram', 'pmem', - 'mixed', or 'none'. The 'mixed' indication is for error cases - when a decoder straddles the volatile/persistent partition - boundary, and 'none' indicates the decoder is not actively - decoding, or no DPA allocation policy has been set. + translates from a host physical address range, to a device + local address range. Device-local address ranges are further + split into a 'ram' (volatile memory) range and 'pmem' + (persistent memory) range. The 'mode' attribute emits one of + 'ram', 'pmem', or 'none'. The 'none' indicates the decoder is + not actively decoding, or no DPA allocation policy has been + set. 'mode' can be written, when the decoder is in the 'disabled' state, with either 'ram' or 'pmem' to set the boundaries for the From 74d9c59658e4d3b06f163da0c5ed7647656705c1 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Thu, 6 Mar 2025 13:36:51 -0800 Subject: [PATCH 0778/2157] cxl/region: Quiet some dev_warn()s in extended linear cache setup Extended Linear Cache (ELC) setup code emits a dev_warn(), "Extended linear cache calculation failed." for issues found while setting up the ELC. For platforms without CONFIG_ACPI_HMAT, every auto region setup will emit the warning because the default !ACPI_HMAT return value is EOPNOTSUPP. Suppress it by skipping the warn for EOPNOTSUPP. Change the EOPNOTSUPP in the actual ELC failure path to ENXIO. Remove the check and enusing dev_warn() when region resource size is NULL. The endpoint decoders hpa_range used to create the resource is checked in init_hdm_decoder(), so it cannot be NULL here. For good measure, add the rc value to the dev_warn(). It will either be the -ENOENT returned by HMAT if the mem target is not found, or the -ENXIO from the region driver calculation. Reviewed-by: Li Ming Signed-off-by: Alison Schofield Link: https://patch.msgid.link/20250306213700.2606304-1-alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index a09ae21a26c6..6d8bdb53f258 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3235,13 +3235,10 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; int nid = phys_to_target_node(res->start); - resource_size_t size, cache_size, start; + resource_size_t size = resource_size(res); + resource_size_t cache_size, start; int rc; - size = resource_size(res); - if (!size) - return -EINVAL; - rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); if (rc) return rc; @@ -3253,7 +3250,7 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, dev_warn(&cxlr->dev, "Extended Linear Cache size %pa != CXL size %pa. No Support!", &cache_size, &size); - return -EOPNOTSUPP; + return -ENXIO; } /* @@ -3305,14 +3302,14 @@ static int __construct_region(struct cxl_region *cxlr, dev_name(&cxlr->dev)); rc = cxl_extended_linear_cache_resize(cxlr, res); - if (rc) { + if (rc && rc != -EOPNOTSUPP) { /* * Failing to support extended linear cache region resize does not * prevent the region from functioning. Only causes cxl list showing * incorrect region size. */ dev_warn(cxlmd->dev.parent, - "Extended linear cache calculation failed.\n"); + "Extended linear cache calculation failed rc:%d\n", rc); } rc = insert_resource(cxlrd->res, res); From 10e9510a6d238a8e6c994b81748b00b9c696c48b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 3 Feb 2025 21:26:32 +0000 Subject: [PATCH 0779/2157] gendwarfksyms: Add a separate pass to resolve FQNs Using dwarf_getscopes_die to resolve fully-qualified names turns out to be rather slow, and also results in duplicate scopes being processed, which doesn't help. Simply adding an extra pass to resolve names for all DIEs before processing exports is noticeably faster. For the object files with the most exports in a defconfig+Rust build, the performance improvement is consistently >50%: rust/bindings.o: 1038 exports before: 9.5980 +- 0.0183 seconds time elapsed ( +- 0.19% ) after: 4.3116 +- 0.0287 seconds time elapsed ( +- 0.67% ) rust/core.o: 424 exports before: 5.3584 +- 0.0204 seconds time elapsed ( +- 0.38% ) after: 0.05348 +- 0.00129 seconds time elapsed ( +- 2.42% ) ^ Not a mistake. net/core/dev.o: 190 exports before: 9.0507 +- 0.0297 seconds time elapsed ( +- 0.33% ) after: 3.2882 +- 0.0165 seconds time elapsed ( +- 0.50% ) rust/kernel.o: 129 exports before: 6.8571 +- 0.0317 seconds time elapsed ( +- 0.46% ) after: 2.9096 +- 0.0316 seconds time elapsed ( +- 1.09% ) net/core/skbuff.o: 120 exports before: 5.4805 +- 0.0291 seconds time elapsed ( +- 0.53% ) after: 2.0339 +- 0.0231 seconds time elapsed ( +- 1.14% ) drivers/gpu/drm/display/drm_dp_helper.o: 101 exports before: 1.7877 +- 0.0187 seconds time elapsed ( +- 1.05% ) after: 0.69245 +- 0.00994 seconds time elapsed ( +- 1.44% ) net/core/sock.o: 97 exports before: 5.8327 +- 0.0653 seconds time elapsed ( +- 1.12% ) after: 2.0784 +- 0.0291 seconds time elapsed ( +- 1.40% ) drivers/net/phy/phy_device.o: 95 exports before: 3.0671 +- 0.0371 seconds time elapsed ( +- 1.21% ) after: 1.2127 +- 0.0207 seconds time elapsed ( +- 1.70% ) drivers/pci/pci.o: 93 exports before: 1.1130 +- 0.0113 seconds time elapsed ( +- 1.01% ) after: 0.4848 +- 0.0127 seconds time elapsed ( +- 2.63% ) kernel/sched/core.o: 83 exports before: 3.5092 +- 0.0223 seconds time elapsed ( +- 0.64% ) after: 1.1231 +- 0.0145 seconds time elapsed ( +- 1.29% ) Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8% faster with this patch applied on my test system. Without Rust, there's still a 10.4% improvement in build time when gendwarfksyms is used. Note that symbol versions are unchanged with this patch. Suggested-by: Giuliano Procida Signed-off-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/die.c | 2 +- scripts/gendwarfksyms/dwarf.c | 154 ++++++++++++++------------ scripts/gendwarfksyms/gendwarfksyms.h | 2 + scripts/gendwarfksyms/types.c | 2 +- 4 files changed, 87 insertions(+), 73 deletions(-) diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index 66bd4c9bc952..6183bbbe7b54 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -6,7 +6,7 @@ #include #include "gendwarfksyms.h" -#define DIE_HASH_BITS 15 +#define DIE_HASH_BITS 16 /* {die->addr, state} -> struct die * */ static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS); diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 534d9aa7c114..eed247d8abfc 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -3,6 +3,7 @@ * Copyright (C) 2024 Google LLC */ +#define _GNU_SOURCE #include #include #include @@ -193,79 +194,17 @@ static void process_fmt(struct die *cache, const char *fmt, ...) va_end(args); } -#define MAX_FQN_SIZE 64 - -/* Get a fully qualified name from DWARF scopes */ -static char *get_fqn(Dwarf_Die *die) -{ - const char *list[MAX_FQN_SIZE]; - Dwarf_Die *scopes = NULL; - bool has_name = false; - char *fqn = NULL; - char *p; - int count = 0; - int len = 0; - int res; - int i; - - res = checkp(dwarf_getscopes_die(die, &scopes)); - if (!res) { - list[count] = get_name_attr(die); - - if (!list[count]) - return NULL; - - len += strlen(list[count]); - count++; - - goto done; - } - - for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { - if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) - continue; - - list[count] = get_name_attr(&scopes[i]); - - if (list[count]) { - has_name = true; - } else { - list[count] = ""; - has_name = false; - } - - len += strlen(list[count]); - count++; - - if (i > 0) { - list[count++] = "::"; - len += 2; - } - } - - free(scopes); - - if (count == MAX_FQN_SIZE) - warn("increase MAX_FQN_SIZE: reached the maximum"); - - /* Consider the DIE unnamed if the last scope doesn't have a name */ - if (!has_name) - return NULL; -done: - fqn = xmalloc(len + 1); - *fqn = '\0'; - - p = fqn; - for (i = 0; i < count; i++) - p = stpcpy(p, list[i]); - - return fqn; -} - static void update_fqn(struct die *cache, Dwarf_Die *die) { - if (!cache->fqn) - cache->fqn = get_fqn(die) ?: ""; + struct die *fqn; + + if (!cache->fqn) { + if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) && + *fqn->fqn) + cache->fqn = xstrdup(fqn->fqn); + else + cache->fqn = ""; + } } static void process_fqn(struct die *cache, Dwarf_Die *die) @@ -1148,8 +1087,81 @@ static void process_symbol_ptr(struct symbol *sym, void *arg) cache_free(&state.expansion_cache); } +static int resolve_fqns(struct state *parent, struct die *unused, + Dwarf_Die *die) +{ + struct state state; + struct die *cache; + const char *name; + bool use_prefix; + char *prefix = NULL; + char *fqn = ""; + int tag; + + if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache)) + return 0; + + tag = dwarf_tag(die); + + /* + * Only namespaces and structures need to pass a prefix to the next + * scope. + */ + use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type || + tag == DW_TAG_structure_type; + + state.expand.current_fqn = NULL; + name = get_name_attr(die); + + if (parent && parent->expand.current_fqn && (use_prefix || name)) { + /* + * The fqn for the current DIE, and if needed, a prefix for the + * next scope. + */ + if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn, + name ? name : "") < 0) + error("asprintf failed"); + + if (use_prefix) + state.expand.current_fqn = prefix; + + /* + * Use fqn only if the DIE has a name. Otherwise fqn will + * remain empty. + */ + if (name) { + fqn = prefix; + /* prefix will be freed by die_map. */ + prefix = NULL; + } + } else if (name) { + /* No prefix from the previous scope. Use only the name. */ + fqn = xstrdup(name); + + if (use_prefix) + state.expand.current_fqn = fqn; + } + + /* If the DIE has a non-empty name, cache it. */ + if (*fqn) { + cache = die_map_get(die, DIE_FQN); + /* Move ownership of fqn to die_map. */ + cache->fqn = fqn; + cache->state = DIE_FQN; + } + + check(process_die_container(&state, NULL, die, resolve_fqns, + match_all)); + + free(prefix); + return 0; +} + void process_cu(Dwarf_Die *cudie) { + check(process_die_container(NULL, NULL, cudie, resolve_fqns, + match_all)); + check(process_die_container(NULL, NULL, cudie, process_exported_symbols, match_all)); diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 197a1a8123c6..2feec168bf73 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -139,6 +139,7 @@ void symbol_free(void); enum die_state { DIE_INCOMPLETE, + DIE_FQN, DIE_UNEXPANDED, DIE_COMPLETE, DIE_SYMBOL, @@ -170,6 +171,7 @@ static inline const char *die_state_name(enum die_state state) { switch (state) { CASE_CONST_TO_STR(DIE_INCOMPLETE) + CASE_CONST_TO_STR(DIE_FQN) CASE_CONST_TO_STR(DIE_UNEXPANDED) CASE_CONST_TO_STR(DIE_COMPLETE) CASE_CONST_TO_STR(DIE_SYMBOL) diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 6c03265f4d10..6f37289104ff 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -248,7 +248,7 @@ static char *get_type_name(struct die *cache) warn("found incomplete cache entry: %p", cache); return NULL; } - if (cache->state == DIE_SYMBOL) + if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN) return NULL; if (!cache->fqn || !*cache->fqn) return NULL; From e966ad0edd0056c7491b8f23992c11734ab61ddf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Feb 2025 01:39:38 +0900 Subject: [PATCH 0780/2157] kbuild: remove EXTRA_*FLAGS support Commit f77bf01425b1 ("kbuild: introduce ccflags-y, asflags-y and ldflags-y") deprecated these in 2007. The migration should have been completed by now. Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor --- Documentation/dev-tools/checkpatch.rst | 18 ------------------ Documentation/kbuild/makefiles.rst | 3 --- scripts/Makefile.build | 4 ---- scripts/Makefile.lib | 5 ----- scripts/checkpatch.pl | 14 -------------- 5 files changed, 44 deletions(-) diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst index abb3ff682076..76bd0ddb0041 100644 --- a/Documentation/dev-tools/checkpatch.rst +++ b/Documentation/dev-tools/checkpatch.rst @@ -342,24 +342,6 @@ API usage See: https://www.kernel.org/doc/html/latest/RCU/whatisRCU.html#full-list-of-rcu-apis - **DEPRECATED_VARIABLE** - EXTRA_{A,C,CPP,LD}FLAGS are deprecated and should be replaced by the new - flags added via commit f77bf01425b1 ("kbuild: introduce ccflags-y, - asflags-y and ldflags-y"). - - The following conversion scheme maybe used:: - - EXTRA_AFLAGS -> asflags-y - EXTRA_CFLAGS -> ccflags-y - EXTRA_CPPFLAGS -> cppflags-y - EXTRA_LDFLAGS -> ldflags-y - - See: - - 1. https://lore.kernel.org/lkml/20070930191054.GA15876@uranus.ravnborg.org/ - 2. https://lore.kernel.org/lkml/1313384834-24433-12-git-send-email-lacombar@gmail.com/ - 3. https://www.kernel.org/doc/html/latest/kbuild/makefiles.html#compilation-flags - **DEVICE_ATTR_FUNCTIONS** The function names used in DEVICE_ATTR is unusual. Typically, the store and show functions are used with _store and diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d36519f194dc..25e04e47faff 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -318,9 +318,6 @@ ccflags-y, asflags-y and ldflags-y These three flags apply only to the kbuild makefile in which they are assigned. They are used for all the normal cc, as and ld invocations happening during a recursive build. - Note: Flags with the same behaviour were previously named: - EXTRA_CFLAGS, EXTRA_AFLAGS and EXTRA_LDFLAGS. - They are still supported but their usage is deprecated. ccflags-y specifies options for compiling with $(CC). diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 993708d11874..a59650ba140b 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -20,10 +20,6 @@ always-m := targets := subdir-y := subdir-m := -EXTRA_AFLAGS := -EXTRA_CFLAGS := -EXTRA_CPPFLAGS := -EXTRA_LDFLAGS := asflags-y := ccflags-y := rustflags-y := diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index cad20f0e66ee..47f56ef71937 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# Backward compatibility -asflags-y += $(EXTRA_AFLAGS) -ccflags-y += $(EXTRA_CFLAGS) -cppflags-y += $(EXTRA_CPPFLAGS) -ldflags-y += $(EXTRA_LDFLAGS) # flags that take effect in current and sub directories KBUILD_AFLAGS += $(subdir-asflags-y) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7b28ad331742..8f70bedc18be 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3689,20 +3689,6 @@ sub process { } } - if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) && - ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) { - my $flag = $1; - my $replacement = { - 'EXTRA_AFLAGS' => 'asflags-y', - 'EXTRA_CFLAGS' => 'ccflags-y', - 'EXTRA_CPPFLAGS' => 'cppflags-y', - 'EXTRA_LDFLAGS' => 'ldflags-y', - }; - - WARN("DEPRECATED_VARIABLE", - "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag}); - } - # check for DT compatible documentation if (defined $root && (($realfile =~ /\.dtsi?$/ && $line =~ /^\+\s*compatible\s*=\s*\"/) || From 90efe2b9119ff8a83a67eb5323e52311a1a49de9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Feb 2025 02:18:02 +0900 Subject: [PATCH 0781/2157] gen_compile_commands.py: remove code for '\#' replacement Since commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files for future Make"), '#' in the build command is replaced with $(pound) rather than '\#'. Calling .replace(r'\#', '#') is only necessary when this tool is used to parse .*.cmd files generated by Linux 4.16 or earlier, which is unlikely to happen. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/clang-tools/gen_compile_commands.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index e4fb686dfaa9..96e6e46ad1a7 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -167,10 +167,10 @@ def process_line(root_directory, command_prefix, file_path): root_directory or file_directory. """ # The .cmd files are intended to be included directly by Make, so they - # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the - # kernel version). The compile_commands.json file is not interepreted - # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') + # escape the pound sign '#' as '$(pound)'. The compile_commands.json file + # is not interepreted by Make, so this code replaces the escaped version + # with '#'. + prefix = command_prefix.replace('$(pound)', '#') # Return the canonical path, eliminating any symbolic links encountered in the path. abs_path = os.path.realpath(os.path.join(root_directory, file_path)) From 6c3fb0bb4d4f1173f32cc26d077b151d72226a2f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Feb 2025 02:45:28 +0900 Subject: [PATCH 0782/2157] genksyms: factor out APP for the ST_NORMAL state For the ST_NORMAL state, APP is called regardless of the token type. Factor it out. Signed-off-by: Masahiro Yamada --- scripts/genksyms/lex.l | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index 22aeb57649d9..f81033af1528 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -176,10 +176,10 @@ repeat: switch (lexstate) { case ST_NORMAL: + APP; switch (token) { case IDENT: - APP; { int r = is_reserved_word(yytext, yyleng); if (r >= 0) @@ -224,13 +224,11 @@ repeat: break; case '[': - APP; lexstate = ST_BRACKET; count = 1; goto repeat; case '{': - APP; if (dont_want_brace_phrase) break; lexstate = ST_BRACE; @@ -238,12 +236,10 @@ repeat: goto repeat; case '=': case ':': - APP; lexstate = ST_EXPRESSION; break; default: - APP; break; } break; From 226ac19c217f24f0927d0a73cf9ee613971a188d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Feb 2025 03:41:55 +0900 Subject: [PATCH 0783/2157] kconfig: do not clear SYMBOL_VALID when reading include/config/auto.conf When conf_read_simple() is called with S_DEF_AUTO, it is meant to read previous symbol values from include/config/auto.conf to determine which include/config/* files should be touched. This process should not modify the current symbol status in any way. However, conf_touch_deps() currently invalidates all symbol values and recalculates them, which is totally unneeded. Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 3b55e7a4131d..ac95661a1c9d 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -385,7 +385,7 @@ int conf_read_simple(const char *name, int def) def_flags = SYMBOL_DEF << def; for_all_symbols(sym) { - sym->flags &= ~(def_flags|SYMBOL_VALID); + sym->flags &= ~def_flags; switch (sym->type) { case S_INT: case S_HEX: @@ -398,7 +398,11 @@ int conf_read_simple(const char *name, int def) } } - expr_invalidate_all(); + if (def == S_DEF_USER) { + for_all_symbols(sym) + sym->flags &= ~SYMBOL_VALID; + expr_invalidate_all(); + } while (getline_stripped(&line, &line_asize, in) != -1) { struct menu *choice; @@ -464,6 +468,9 @@ int conf_read_simple(const char *name, int def) if (conf_set_sym_val(sym, def, def_flags, val)) continue; + if (def != S_DEF_USER) + continue; + /* * If this is a choice member, give it the highest priority. * If conflicting CONFIG options are given from an input file, @@ -967,10 +974,8 @@ static int conf_touch_deps(void) depfile_path[depfile_prefix_len] = 0; conf_read_simple(name, S_DEF_AUTO); - sym_calc_value(modules_sym); for_all_symbols(sym) { - sym_calc_value(sym); if (sym_is_choice(sym)) continue; if (sym->flags & SYMBOL_WRITE) { @@ -1084,12 +1089,12 @@ int conf_write_autoconf(int overwrite) if (ret) return -1; - if (conf_touch_deps()) - return 1; - for_all_symbols(sym) sym_calc_value(sym); + if (conf_touch_deps()) + return 1; + ret = __conf_write_autoconf(conf_get_autoheader_name(), print_symbol_for_c, &comment_style_c); From ab5bc764bdc270d1c55aaf9d238e0986ff301355 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Feb 2025 03:42:48 +0900 Subject: [PATCH 0784/2157] kconfig: remove unnecessary cast in sym_get_string() The explicit casting from (char *) to (const char *) is unneeded. Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 7beb59dec5a0..d57f8cbba291 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -879,7 +879,7 @@ const char *sym_get_string_value(struct symbol *sym) default: ; } - return (const char *)sym->curr.val; + return sym->curr.val; } bool sym_is_changeable(const struct symbol *sym) From 59d60d26a58be75e9e3b813104c2dfd3b58eb662 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Feb 2025 02:50:13 +0900 Subject: [PATCH 0785/2157] modpost: introduce get_basename() helper The logic to retrieve the basename appears multiple times. Factor out the common pattern into a helper function. I copied kbasename() from include/linux/string.h and renamed it to get_basename(). Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/mod/modpost.c | 41 ++++++++++++++++++---------------------- scripts/mod/modpost.h | 1 + scripts/mod/sumversion.c | 13 ++++--------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index c35d22607978..7f4c72eca72c 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -98,6 +98,18 @@ static inline bool strends(const char *str, const char *postfix) return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; } +/** + * get_basename - return the last part of a pathname. + * + * @path: path to extract the filename from. + */ +const char *get_basename(const char *path) +{ + const char *tail = strrchr(path, '/'); + + return tail ? tail + 1 : path; +} + char *read_text_file(const char *filename) { struct stat st; @@ -1461,14 +1473,8 @@ static void extract_crcs_for_object(const char *object, struct module *mod) const char *base; int dirlen, ret; - base = strrchr(object, '/'); - if (base) { - base++; - dirlen = base - object; - } else { - dirlen = 0; - base = object; - } + base = get_basename(object); + dirlen = base - object; ret = snprintf(cmd_file, sizeof(cmd_file), "%.*s.%s.cmd", dirlen, object, base); @@ -1703,11 +1709,7 @@ static void check_exports(struct module *mod) s->crc_valid = exp->crc_valid; s->crc = exp->crc; - basename = strrchr(mod->name, '/'); - if (basename) - basename++; - else - basename = mod->name; + basename = get_basename(mod->name); if (!contains_namespace(&mod->imported_namespaces, exp->namespace)) { modpost_log(!allow_missing_ns_imports, @@ -1765,11 +1767,8 @@ static void check_modname_len(struct module *mod) { const char *mod_name; - mod_name = strrchr(mod->name, '/'); - if (mod_name == NULL) - mod_name = mod->name; - else - mod_name++; + mod_name = get_basename(mod->name); + if (strlen(mod_name) >= MODULE_NAME_LEN) error("module name is too long [%s.ko]\n", mod->name); } @@ -1946,11 +1945,7 @@ static void add_depends(struct buffer *b, struct module *mod) continue; s->module->seen = true; - p = strrchr(s->module->name, '/'); - if (p) - p++; - else - p = s->module->name; + p = get_basename(s->module->name); buf_printf(b, "%s%s", first ? "" : ",", p); first = 0; } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 59366f456b76..9133e4c3803f 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -216,6 +216,7 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen); /* from modpost.c */ extern bool target_is_big_endian; extern bool host_is_big_endian; +const char *get_basename(const char *path); char *read_text_file(const char *filename); char *get_line(char **stringp); void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym); diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6de9af17599d..e79fc40d852f 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -309,15 +309,10 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) cmd = xmalloc(strlen(objfile) + sizeof("..cmd")); - base = strrchr(objfile, '/'); - if (base) { - base++; - dirlen = base - objfile; - sprintf(cmd, "%.*s.%s.cmd", dirlen, objfile, base); - } else { - dirlen = 0; - sprintf(cmd, ".%s.cmd", objfile); - } + base = get_basename(objfile); + dirlen = base - objfile; + sprintf(cmd, "%.*s.%s.cmd", dirlen, objfile, base); + dir = xmalloc(dirlen + 1); strncpy(dir, objfile, dirlen); dir[dirlen] = '\0'; From 144fced6852ba11c90560c996e986b4563f089af Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Feb 2025 02:50:55 +0900 Subject: [PATCH 0786/2157] modpost: use strstarts() to clean up parse_source_files() No functional changes are intended. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/mod/sumversion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index e79fc40d852f..3dd28b4d0099 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -330,7 +330,7 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) line++; p = line; - if (strncmp(line, "source_", sizeof("source_")-1) == 0) { + if (strstarts(line, "source_")) { p = strrchr(line, ' '); if (!p) { warn("malformed line: %s\n", line); @@ -344,7 +344,7 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) } continue; } - if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) { + if (strstarts(line, "deps_")) { check_files = 1; continue; } From ac954145e1ee3f72033161cbe4ac0b16b5354ae7 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 10 Feb 2025 17:42:45 +0100 Subject: [PATCH 0787/2157] kbuild: rust: add rustc-min-version support function Introduce `rustc-min-version` support function that mimics `{gcc,clang}-min-version` ones, following commit 88b61e3bff93 ("Makefile.compiler: replace cc-ifversion with compiler-specific macros"). In addition, use it in the first use case we have in the kernel (which was done independently to minimize the changes needed for the fix). Signed-off-by: Miguel Ojeda Reviewed-by: Fiona Behrens Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 14 ++++++++++++++ arch/arm64/Makefile | 2 +- scripts/Makefile.compiler | 4 ++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 25e04e47faff..3b9a8bc671e2 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -667,6 +667,20 @@ cc-cross-prefix endif endif +$(RUSTC) support functions +-------------------------- + +rustc-min-version + rustc-min-version tests if the value of $(CONFIG_RUSTC_VERSION) is greater + than or equal to the provided value and evaluates to y if so. + + Example:: + + rustflags-$(call rustc-min-version, 108500) := -Cfoo + + In this example, rustflags-y will be assigned the value -Cfoo if + $(CONFIG_RUSTC_VERSION) is >= 1.85.0. + $(LD) support functions ----------------------- diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2b25d671365f..1d5dfcd1c13e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -48,7 +48,7 @@ KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \ KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) -ifeq ($(call test-ge, $(CONFIG_RUSTC_VERSION), 108500),y) +ifeq ($(call rustc-min-version, 108500),y) KBUILD_RUSTFLAGS += --target=aarch64-unknown-none-softfloat else KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon" diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 8c1029687e2e..8956587b8547 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -67,6 +67,10 @@ gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1) # Usage: cflags-$(call clang-min-version, 110000) += -foo clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1) +# rustc-min-version +# Usage: rustc-$(call rustc-min-version, 108500) += -Cfoo +rustc-min-version = $(call test-ge, $(CONFIG_RUSTC_VERSION), $1) + # ld-option # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) From 9d702bb1d3c03bb78d4fd2b3424169e3ef4cd402 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Mon, 10 Feb 2025 18:11:54 -0700 Subject: [PATCH 0788/2157] scripts: make python shebangs specific about desired version The RPM packaging tools like to make sure that all packaged python scripts have version-unambiguous shebangs. Be more specific about the desired python version in a couple of places to avoid having to disable these checks in make rpm-pkg. Signed-off-by: Uday Shankar Signed-off-by: Masahiro Yamada --- scripts/show_delta | 2 +- scripts/tracing/draw_functrace.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/show_delta b/scripts/show_delta index 291ad65e3089..3755b6c6e557 100755 --- a/scripts/show_delta +++ b/scripts/show_delta @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only # # show_deltas: Read list of printk messages instrumented with diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 42fa87300941..97594b65f8ce 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only """ From c15253494fd98cd76250c9faaebbc8b45f7d0072 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Feb 2025 01:15:52 +0900 Subject: [PATCH 0789/2157] kbuild: move -fzero-init-padding-bits=all to the top-level Makefile The -fzero-init-padding-bits=all option is not a warning flag, so defining it in scripts/Makefile.extrawarn is inconsistent. Move it to the top-level Makefile for consistency. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook --- Makefile | 3 +++ scripts/Makefile.extrawarn | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1d6a9ec8a2ac..7cd80ff2d69b 100644 --- a/Makefile +++ b/Makefile @@ -928,6 +928,9 @@ KBUILD_CFLAGS += $(CC_AUTO_VAR_INIT_ZERO_ENABLER) endif endif +# Explicitly clear padding bits during variable initialization +KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all) + # While VLAs have been removed, GCC produces unreachable stack probes # for the randomize_kstack_offset feature. Disable it for all compilers. KBUILD_CFLAGS += $(call cc-option, -fno-stack-clash-protection) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index dc081cf46d21..d75897559d18 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -82,9 +82,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -# Explicitly clear padding bits during variable initialization -KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all) - KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused From d0beb73d1d8a89c6c6830d8d873ac9d3163132dc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Feb 2025 20:36:48 +0900 Subject: [PATCH 0790/2157] kbuild: remove KBUILD_ENABLE_EXTRA_GCC_CHECKS support Commit e27128db6283 ("kbuild: rename KBUILD_ENABLE_EXTRA_GCC_CHECKS to KBUILD_EXTRA_WARN") renamed KBUILD_ENABLE_EXTRA_GCC_CHECKS in 2019. The migration in downstream code should be complete. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile b/Makefile index 7cd80ff2d69b..3428e4630069 100644 --- a/Makefile +++ b/Makefile @@ -151,9 +151,6 @@ endif export KBUILD_EXTMOD -# backward compatibility -KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS) - ifeq ("$(origin W)", "command line") KBUILD_EXTRA_WARN := $(W) endif From 700bd25bd4f47a0f4e02e0a25dde05f1a6b16eea Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Feb 2025 12:31:53 +0100 Subject: [PATCH 0791/2157] docs: kconfig: Mention IS_REACHABLE as way for optional dependency Several drivers express optional Kconfig dependency with FOO || !FOO, but for many choices this is not suitable: lack of stubs for !FOO like in HWMON. Describe the second, less favorable way of optional dependency with IS_REACHABLE by moving the code from "imply" chapter to "Optional dependencies". Signed-off-by: Krzysztof Kozlowski Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.rst | 29 ++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst index 2619fdf56e68..a91abb8f6840 100644 --- a/Documentation/kbuild/kconfig-language.rst +++ b/Documentation/kbuild/kconfig-language.rst @@ -194,16 +194,6 @@ applicable everywhere (see syntax). ability to hook into a secondary subsystem while allowing the user to configure that subsystem out without also having to unset these drivers. - Note: If the combination of FOO=y and BAZ=m causes a link error, - you can guard the function call with IS_REACHABLE():: - - foo_init() - { - if (IS_REACHABLE(CONFIG_BAZ)) - baz_register(&foo); - ... - } - Note: If the feature provided by BAZ is highly desirable for FOO, FOO should imply not only BAZ, but also its dependency BAR:: @@ -588,7 +578,9 @@ uses the slightly counterintuitive:: depends on BAR || !BAR This means that there is either a dependency on BAR that disallows -the combination of FOO=y with BAR=m, or BAR is completely disabled. +the combination of FOO=y with BAR=m, or BAR is completely disabled. The BAR +module must provide all the stubs for !BAR case. + For a more formalized approach if there are multiple drivers that have the same dependency, a helper symbol can be used, like:: @@ -599,6 +591,21 @@ the same dependency, a helper symbol can be used, like:: config BAR_OPTIONAL def_tristate BAR || !BAR +Much less favorable way to express optional dependency is IS_REACHABLE() within +the module code, useful for example when the module BAR does not provide +!BAR stubs:: + + foo_init() + { + if (IS_REACHABLE(CONFIG_BAR)) + bar_register(&foo); + ... + } + +IS_REACHABLE() is generally discouraged, because the code will be silently +discarded, when CONFIG_BAR=m and this code is built-in. This is not what users +usually expect when enabling BAR as module. + Kconfig recursive dependency limitations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 268d191abc57a89f4ed92efcb276aae54f86c88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 11:59:21 +0100 Subject: [PATCH 0792/2157] kbuild: implement CONFIG_HEADERS_INSTALL for Usermode Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit userprogs sometimes need access to UAPI headers. This is currently not possible for Usermode Linux, as UM is only a pseudo architecture built on top of a regular architecture and does not have its own UAPI. Instead use the UAPI headers from the underlying regular architecture. Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- Makefile | 5 ++++- lib/Kconfig.debug | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3428e4630069..f2ec8d05c0f5 100644 --- a/Makefile +++ b/Makefile @@ -1361,9 +1361,12 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj PHONY += headers headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts - $(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML)) +ifdef HEADER_ARCH + $(Q)$(MAKE) -f $(srctree)/Makefile HEADER_ARCH= SRCARCH=$(HEADER_ARCH) headers +else $(Q)$(MAKE) $(hdr-inst)=include/uapi $(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi +endif ifdef CONFIG_HEADERS_INSTALL prepare: headers diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 35796c290ca3..17ccd913975d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -473,7 +473,6 @@ config READABLE_ASM config HEADERS_INSTALL bool "Install uapi headers to usr/include" - depends on !UML help This option will install uapi headers (headers exported to user-space) into the usr/include directory for use during the kernel build. From dbdffaf50ff9cee3259a7cef8a7bd9e0f0ba9f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 19 Feb 2025 22:22:13 +0100 Subject: [PATCH 0793/2157] kbuild, rust: use -fremap-path-prefix to make paths relative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remap source path prefixes in all output, including compiler diagnostics, debug information, macro expansions, etc. This removes a few absolute paths from the binary and also makes it possible to use core::panic::Location properly. Equivalent to the same configuration done for C sources in commit 1d3730f0012f ("kbuild: support -fmacro-prefix-map for external modules") and commit a73619a845d5 ("kbuild: use -fmacro-prefix-map to make __FILE__ a relative path"). Link: https://doc.rust-lang.org/rustc/command-line-arguments.html#--remap-path-prefix-remap-source-names-in-output Acked-by: Miguel Ojeda Signed-off-by: Thomas Weißschuh Tested-by: Gary Guo Signed-off-by: Masahiro Yamada --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index f2ec8d05c0f5..30242e731a0d 100644 --- a/Makefile +++ b/Makefile @@ -1068,6 +1068,7 @@ endif # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) +KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= endif # include additional Makefiles when needed From 1195306ee359ced2cb9d7a192e973872571cb93a Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 25 Feb 2025 02:26:19 +0800 Subject: [PATCH 0794/2157] kbuild: deb-pkg: add debarch for ARCH=loongarch64 Fix follow warning when 'make ARCH=loongarch64 bindeb-pkg': ** ** ** WARNING ** ** ** Your architecture doesn't have its equivalent Debian userspace architecture defined! Falling back to the current host architecture (loong64). Please add support for loongarch64 to ./scripts/package/mkdebian ... Reported-by: Shiwei Liu Signed-off-by: WangYuli Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/package/mkdebian | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index b6dd98ca860b..0178000197fe 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -77,6 +77,8 @@ set_debarch() { debarch=i386 fi ;; + loongarch64) + debarch=loong64 ;; esac if [ -z "$debarch" ]; then debarch=$(dpkg-architecture -qDEB_HOST_ARCH) From 82c09de2d4c472ab1b973e6e033671020691e637 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Wed, 26 Feb 2025 21:30:14 +0800 Subject: [PATCH 0795/2157] kbuild: add dependency from vmlinux to sorttable Without this dependency it's really puzzling when we bisect for a "bad" commit in a series of sorttable change: when "git bisect" switches to another commit, "make" just does nothing to vmlinux. Signed-off-by: Xi Ruoyao Acked-by: Steven Rostedt (Google) Signed-off-by: Masahiro Yamada --- scripts/Makefile.vmlinux | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 873caaa55313..fb79fd6b2465 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -79,6 +79,10 @@ ifdef CONFIG_DEBUG_INFO_BTF vmlinux: $(RESOLVE_BTFIDS) endif +ifdef CONFIG_BUILDTIME_TABLE_SORT +vmlinux: scripts/sorttable +endif + # module.builtin.ranges # --------------------------------------------------------------------------- ifdef CONFIG_BUILTIN_MODULE_RANGES From f757f6011c92b5a01db742c39149bed9e526478f Mon Sep 17 00:00:00 2001 From: Seyediman Seyedarab Date: Sat, 1 Mar 2025 17:21:37 -0500 Subject: [PATCH 0796/2157] kbuild: fix argument parsing in scripts/config The script previously assumed --file was always the first argument, which caused issues when it appeared later. This patch updates the parsing logic to scan all arguments to find --file, sets the config file correctly, and resets the argument list with the remaining commands. It also fixes --refresh to respect --file by passing KCONFIG_CONFIG=$FN to make oldconfig. Signed-off-by: Seyediman Seyedarab Signed-off-by: Masahiro Yamada --- scripts/config | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/config b/scripts/config index ff88e2faefd3..ea475c07de28 100755 --- a/scripts/config +++ b/scripts/config @@ -32,6 +32,7 @@ commands: Disable option directly after other option --module-after|-M beforeopt option Turn option into module directly after other option + --refresh Refresh the config using old settings commands can be repeated multiple times @@ -124,16 +125,22 @@ undef_var() { txt_delete "^# $name is not set" "$FN" } -if [ "$1" = "--file" ]; then - FN="$2" - if [ "$FN" = "" ] ; then - usage +FN=.config +CMDS=() +while [[ $# -gt 0 ]]; do + if [ "$1" = "--file" ]; then + if [ "$2" = "" ]; then + usage + fi + FN="$2" + shift 2 + else + CMDS+=("$1") + shift fi - shift 2 -else - FN=.config -fi +done +set -- "${CMDS[@]}" if [ "$1" = "" ] ; then usage fi @@ -217,9 +224,8 @@ while [ "$1" != "" ] ; do set_var "${CONFIG_}$B" "${CONFIG_}$B=m" "${CONFIG_}$A" ;; - # undocumented because it ignores --file (fixme) --refresh) - yes "" | make oldconfig + yes "" | make oldconfig KCONFIG_CONFIG=$FN ;; *) From eb47ee018173f144f10eb38a3f7bd9f17ec6329e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Mar 2025 00:56:22 +0900 Subject: [PATCH 0797/2157] kbuild: add Kbuild bash completion Kernel build commands can sometimes be long, particularly when cross-compiling, making them tedious to type and prone to mistypes. This commit introduces bash completion support for common variables and targets in Kbuild. For installation instructions, please refer to the documentation in Documentation/kbuild/bash-completion.rst. The following examples demonstrate how this saves typing. [Example 1] a long command line for cross-compiling $ make A -> completes 'A' to 'ARCH=' $ make ARCH= -> displays all supported architectures $ make ARCH=arm64 CR -> completes 'CR' to 'CROSS_COMPILE=' $ make ARCH=arm64 CROSS_COMPILE= -> displays installed toolchains $ make ARCH=arm64 CROSS_COMPILE=aa -> completes 'CROSS_COMPILE=aa' to 'CROSS_COMPILE=aarch64-linux-gnu-' $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- def -> completes 'def' to 'defconfig' [Example 2] a single build target $ make f -> completes 'f' to 'fs/' $ make fs/ -> displays objects and sub-directories in fs/ $ make fs/xf -> completes 'fs/xf' to 'fs/xfs/' $ make fs/xfs/l -> completes 'fs/xfs/l' to 'fs/xfs/libxfs/xfs_' $ make fs/xfs/libxfs/xfs_g -> completes 'fs/xfs/libxfs/xfs_g' to 'fs/xfs/libxfs/xfs_group.o' This does not aim to provide a complete list of variables and targets, as there are too many. However, it covers variables and targets used in common scenarios, and I hope this is useful enough. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Tested-by: Nicolas Schier --- Documentation/kbuild/bash-completion.rst | 65 ++++ Documentation/kbuild/index.rst | 2 + MAINTAINERS | 1 + scripts/bash-completion/make | 451 +++++++++++++++++++++++ 4 files changed, 519 insertions(+) create mode 100644 Documentation/kbuild/bash-completion.rst create mode 100644 scripts/bash-completion/make diff --git a/Documentation/kbuild/bash-completion.rst b/Documentation/kbuild/bash-completion.rst new file mode 100644 index 000000000000..2b52dbcd0933 --- /dev/null +++ b/Documentation/kbuild/bash-completion.rst @@ -0,0 +1,65 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +========================== +Bash completion for Kbuild +========================== + +The kernel build system is written using Makefiles, and Bash completion +for the `make` command is available through the `bash-completion`_ project. + +However, the Makefiles for the kernel build are complex. The generic completion +rules for the `make` command do not provide meaningful suggestions for the +kernel build system, except for the options of the `make` command itself. + +To enhance completion for various variables and targets, the kernel source +includes its own completion script at `scripts/bash-completion/make`. + +This script provides additional completions when working within the kernel tree. +Outside the kernel tree, it defaults to the generic completion rules for the +`make` command. + +Prerequisites +============= + +The script relies on helper functions provided by `bash-completion`_ project. +Please ensure it is installed on your system. On most distributions, you can +install the `bash-completion` package through the standard package manager. + +How to use +========== + +You can source the script directly:: + + $ source scripts/bash-completion/make + +Or, you can copy it into the search path for Bash completion scripts. +For example:: + + $ mkdir -p ~/.local/share/bash-completion/completions + $ cp scripts/bash-completion/make ~/.local/share/bash-completion/completions/ + +Details +======= + +The additional completion for Kbuild is enabled in the following cases: + + - You are in the root directory of the kernel source. + - You are in the top-level build directory created by the O= option + (checked via the `source` symlink pointing to the kernel source). + - The -C make option specifies the kernel source or build directory. + - The -f make option specifies a file in the kernel source or build directory. + +If none of the above are met, it falls back to the generic completion rules. + +The completion supports: + + - Commonly used targets, such as `all`, `menuconfig`, `dtbs`, etc. + - Make (or environment) variables, such as `ARCH`, `LLVM`, etc. + - Single-target builds (`foo/bar/baz.o`) + - Configuration files (`*_defconfig` and `*.config`) + +Some variables offer intelligent behavior. For instance, `CROSS_COMPILE=` +followed by a TAB displays installed toolchains. The list of defconfig files +shown depends on the value of the `ARCH=` variable. + +.. _bash-completion: https://github.com/scop/bash-completion/ diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index e82af05cd652..3731ab22bfe7 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -23,6 +23,8 @@ Kernel Build System llvm gendwarfksyms + bash-completion + .. only:: subproject and html Indices diff --git a/MAINTAINERS b/MAINTAINERS index ed7aa6867674..e4849f6ebc98 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12566,6 +12566,7 @@ F: Makefile F: scripts/*vmlinux* F: scripts/Kbuild* F: scripts/Makefile* +F: scripts/bash-completion/ F: scripts/basic/ F: scripts/clang-tools/ F: scripts/dummy-tools/ diff --git a/scripts/bash-completion/make b/scripts/bash-completion/make new file mode 100644 index 000000000000..42e8dcead25a --- /dev/null +++ b/scripts/bash-completion/make @@ -0,0 +1,451 @@ +# SPDX-License-Identifier: GPL-2.0-only +# bash completion for GNU make with kbuild extension -*- shell-script -*- + +# Load the default completion script for make. It is typically located at +# /usr/share/bash-completion/completions/make, but we do not rely on it. +__kbuild_load_default_make_completion() +{ + local -a dirs=("${BASH_COMPLETION_USER_DIR:-${XDG_DATA_HOME:-$HOME/.local/share}/bash-completion}/completions") + local ifs=$IFS IFS=: dir compfile this_dir + + for dir in ${XDG_DATA_DIRS:-/usr/local/share:/usr/share}; do + dirs+=("$dir"/bash-completion/completions) + done + IFS=$ifs + + this_dir="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" + + for dir in "${dirs[@]}"; do + if [[ ! -d ${dir} || ${dir} = "${this_dir}" ]]; then + continue + fi + + for compfile in make make.bash _make; do + compfile=$dir/$compfile + # Avoid trying to source dirs; https://bugzilla.redhat.com/903540 + if [[ -f ${compfile} ]] && . "${compfile}" &>/dev/null; then + + __kbuild_default_make_completion=$( + # shellcheck disable=SC2046 # word splitting is the point here + set -- $(complete -p make) + + while [[ $# -gt 1 && "$1" != -F ]]; do + shift + done + + if [[ "$1" = -F ]]; then + echo "$2" + fi + ) + + return + fi + done + done +} + +__kbuild_load_default_make_completion + +__kbuild_handle_variable() +{ + local var=${1%%=*} + local cur=${cur#"${var}"=} + local srctree=$2 + local keywords=() + + case $var in + ARCH) + # sub-directories under arch/ + keywords+=($(find "${srctree}/arch" -mindepth 1 -maxdepth 1 -type d -printf '%P\n')) + # architectures hard-coded in the top Makefile + keywords+=(i386 x86_64 sparc32 sparc64 parisc64) + ;; + CROSS_COMPILE) + # toolchains with a full path + local cross_compile=() + local c c2 + _filedir + + for c in "${COMPREPLY[@]}"; do + # eval for tilde expansion + # suppress error, as this fails when it contains a space + eval "c2=${c}" 2>/dev/null || continue + if [[ ${c} == *-elfedit && ! -d ${c2} && -x ${c2} ]]; then + cross_compile+=("${c%elfedit}") + fi + done + + # toolchains in the PATH environment + while read -r c; do + if [[ ${c} == *-elfedit ]]; then + keywords+=("${c%elfedit}") + fi + done < <(compgen -c) + + COMPREPLY=() + _filedir -d + + # Add cross_compile directly without passing it to compgen. + # Otherwise, toolchain paths with a tilde do not work. + # e.g.) + # CROSS_COMPILE=~/0day/gcc-14.2.0-nolibc/aarch64-linux/bin/aarch64-linux- + COMPREPLY+=("${cross_compile[@]}") + ;; + LLVM) + # LLVM=1 uses the default 'clang' etc. + keywords+=(1) + + # suffix for a particular version. LLVM=-18 uses 'clang-18' etc. + while read -r c; do + if [[ ${c} == clang-[0-9]* ]]; then + keywords+=("${c#clang}") + fi + done < <(compgen -c) + + # directory path to LLVM toolchains + _filedir -d + ;; + KCONFIG_ALLCONFIG) + # KCONFIG_ALLCONFIG=1 selects the default fragment + keywords+=(1) + # or the path to a fragment file + _filedir + ;; + C | KBUILD_CHECKSRC) + keywords+=(1 2) + ;; + V | KBUILD_VERBOSE) + keywords+=({,1}{,2}) + ;; + W | KBUILD_EXTRA_WARN) + keywords+=({,1}{,2}{,3}{,c}{,e}) + ;; + KBUILD_ABS_SRCTREE | KBUILD_MODPOST_NOFINAL | KBUILD_MODPOST_WARN | \ + CLIPPY | KBUILD_CLIPPY | KCONFIG_NOSILENTUPDATE | \ + KCONFIG_OVERWRITECONFIG | KCONFIG_WARN_UNKNOWN_SYMBOL | \ + KCONFIG_WERROR ) + keywords+=(1) + ;; + INSTALL_MOD_STRIP) + keywords+=(1 --strip-debug --strip-unneeded) + ;; + O | KBUILD_OUTPUT | M | KBUILD_EXTMOD | MO | KBUILD_EXTMOD_OUTPUT | *_PATH) + # variables that take a directory. + _filedir -d + return + ;; + KBUILD_EXTRA_SYMBOL | KBUILD_KCONFIG | KCONFIG_CONFIG) + # variables that take a file. + _filedir + return + esac + + COMPREPLY+=($(compgen -W "${keywords[*]}" -- "${cur}")) +} + +# Check the -C, -f options and 'source' symlink. Return the source tree we are +# working in. +__kbuild_get_srctree() +{ + local words=("$@") + local cwd makef_dir + + # see if a path was specified with -C/--directory + for ((i = 1; i < ${#words[@]}; i++)); do + if [[ ${words[i]} == -@(C|-directory) ]]; then + # eval for tilde expansion. + # suppress error, as this fails when it contains a space + eval "cwd=${words[i + 1]}" 2>/dev/null + break + fi + done + + if [[ -z ${cwd} ]]; then + cwd=. + fi + + # see if a Makefile was specified with -f/--file/--makefile + for ((i = 1; i < ${#words[@]}; i++)); do + if [[ ${words[i]} == -@(f|-?(make)file) ]]; then + # eval for tilde expansion + # suppress error, as this fails when it contains a space + eval "makef_dir=${words[i + 1]%/*}" 2>/dev/null + break + fi + done + + if [ -z "${makef_dir}" ]; then + makef_dir=${cwd} + elif [[ ${makef_dir} != /* ]]; then + makef_dir=${cwd}/${makef_dir} + fi + + # If ${makef_dir} is a build directory created by the O= option, there + # is a symbolic link 'source', which points to the kernel source tree. + if [[ -L ${makef_dir}/source ]]; then + makef_dir=$(readlink "${makef_dir}/source") + fi + + echo "${makef_dir}" +} + +# Get SRCARCH to do a little more clever things +__kbuild_get_srcarch() +{ + local words=("$@") + local arch srcarch uname_m + + # see if ARCH= is explicitly specified + for ((i = 1; i < ${#words[@]}; i++)); do + if [[ ${words[i]} == ARCH=* ]]; then + arch=${words[i]#ARCH=} + break + fi + done + + # If ARCH= is not specified, check the build marchine's architecture + if [[ -z ${arch} ]]; then + uname_m=$(uname -m) + + # shellcheck disable=SC2209 # 'sh' is SuperH, not a shell command + case ${uname_m} in + arm64 | aarch64*) arch=arm64 ;; + arm* | sa110) arch=arm ;; + i?86 | x86_64) arch=x86 ;; + loongarch*) arch=loongarch ;; + mips*) arch=mips ;; + ppc*) arch=powerpc ;; + riscv*) arch=riscv ;; + s390x) arch=s390 ;; + sh[234]*) arch=sh ;; + sun4u) arch=sparc64 ;; + *) arch=${uname_m} ;; + esac + fi + + case ${arch} in + parisc64) srcarch=parisc ;; + sparc32 | sparc64) srcarch=sparc ;; + i386 | x86_64) srcarch=x86 ;; + *) srcarch=${arch} ;; + esac + + echo "$srcarch" +} + +# small Makefile to parse obj-* syntax +__kbuild_tmp_makefile() +{ +cat <<'EOF' +.PHONY: __default +__default: + $(foreach m,$(obj-y) $(obj-m) $(obj-),$(foreach s, -objs -y -m -,$($(m:%.o=%$s))) $(m)) +EOF +echo "include ${1}" +} + +_make_for_kbuild () +{ + # shellcheck disable=SC2034 # these are set by _init_completion + local cur prev words cword split + _init_completion -s || return + + local srctree + srctree=$(__kbuild_get_srctree "${words[@]}") + + # If 'kernel' and 'Documentation' directories are found, we assume this + # is a kernel tree. Otherwise, we fall back to the generic rule provided + # by the bash-completion project. + if [[ ! -d ${srctree}/kernel || ! -d ${srctree}/Documentation ]]; then + if [ -n "${__kbuild_default_make_completion}" ]; then + "${__kbuild_default_make_completion}" "$@" + fi + return + fi + + # make options with a parameter (copied from the bash-completion project) + case ${prev} in + --file | --makefile | --old-file | --assume-old | --what-if | --new-file | \ + --assume-new | -!(-*)[foW]) + _filedir + return + ;; + --include-dir | --directory | -!(-*)[ICm]) + _filedir -d + return + ;; + -!(-*)E) + COMPREPLY=($(compgen -v -- "$cur")) + return + ;; + --eval | -!(-*)[DVx]) + return + ;; + --jobs | -!(-*)j) + COMPREPLY=($(compgen -W "{1..$(($(_ncpus) * 2))}" -- "$cur")) + return + ;; + esac + + local keywords=() + + case ${cur} in + -*) + # make options (copied from the bash-completion project) + local opts + opts="$(_parse_help "$1")" + COMPREPLY=($(compgen -W "${opts:-$(_parse_usage "$1")}" -- "$cur")) + if [[ ${COMPREPLY-} == *= ]]; then + compopt -o nospace + fi + return + ;; + *=*) + __kbuild_handle_variable "${cur}" "${srctree}" + return + ;; + KBUILD_*) + # There are many variables prefixed with 'KBUILD_'. + # Display them only when 'KBUILD_' is entered. + # shellcheck disable=SC2191 # '=' is appended for variables + keywords+=( + KBUILD_{CHECKSRC,EXTMOD,EXTMOD_OUTPUT,OUTPUT,VERBOSE,EXTRA_WARN,CLIPPY}= + KBUILD_BUILD_{USER,HOST,TIMESTAMP}= + KBUILD_MODPOST_{NOFINAL,WARN}= + KBUILD_{ABS_SRCTREE,EXTRA_SYMBOLS,KCONFIG}= + ) + ;; + KCONFIG_*) + # There are many variables prefixed with 'KCONFIG_'. + # Display them only when 'KCONFIG_' is entered. + # shellcheck disable=SC2191 # '=' is appended for variables + keywords+=( + KCONFIG_{CONFIG,ALLCONFIG,NOSILENTUPDATE,OVERWRITECONFIG}= + KCONFIG_{SEED,PROBABILITY}= + KCONFIG_WARN_UNKNOWN_SYMBOL= + KCONFIG_WERROR= + ) + ;; + *) + # By default, hide KBUILD_* and KCONFIG_* variables. + # Instead, display only the prefix parts. + keywords+=(KBUILD_ KCONFIG_) + ;; + esac + + if [[ ${cur} != /* && ${cur} != *//* ]]; then + local dir srcarch kbuild_file tmp + srcarch=$(__kbuild_get_srcarch "${words[@]}") + + # single build + dir=${cur} + while true; do + if [[ ${dir} == */* ]]; then + dir=${dir%/*} + else + dir=. + fi + + # Search for 'Kbuild' or 'Makefile' in the parent + # directories (may not be a direct parent) + if [[ -f ${srctree}/${dir}/Kbuild ]]; then + kbuild_file=${srctree}/${dir}/Kbuild + break + fi + if [[ -f ${srctree}/${dir}/Makefile ]]; then + kbuild_file=${srctree}/${dir}/Makefile + break + fi + + if [[ ${dir} == . ]]; then + break + fi + done + + if [[ -n ${kbuild_file} ]]; then + tmp=($(__kbuild_tmp_makefile "${kbuild_file}" | + SRCARCH=${srcarch} obj=${dir} src=${srctree}/${dir} \ + "${1}" -n -f - 2>/dev/null)) + + # Add $(obj)/ prefix + if [[ ${dir} != . ]]; then + tmp=("${tmp[@]/#/${dir}\/}") + fi + + keywords+=("${tmp[@]}") + fi + + # *_defconfig and *.config files. These might be grouped into + # subdirectories, e.g., arch/powerpc/configs/*/*_defconfig. + if [[ ${cur} == */* ]]; then + dir=${cur%/*} + else + dir=. + fi + + tmp=($(find "${srctree}/arch/${srcarch}/configs/${dir}" \ + "${srctree}/kernel/configs/${dir}" \ + -mindepth 1 -maxdepth 1 -type d -printf '%P/\n' \ + -o -printf '%P\n' 2>/dev/null)) + + if [[ ${dir} != . ]]; then + tmp=("${tmp[@]/#/${dir}\/}") + fi + + keywords+=("${tmp[@]}") + fi + + # shellcheck disable=SC2191 # '=' is appended for variables + keywords+=( + # + # variables (append =) + # + ARCH= + CROSS_COMPILE= + LLVM= + C= M= MO= O= V= W= + INSTALL{,_MOD,_HDR,_DTBS}_PATH= + KERNELRELEASE= + + # + # targets + # + all help + clean mrproper distclean + clang-{tidy,analyzer} compile_commands.json + coccicheck + dtbs{,_check,_install} dt_binding_{check,schemas} + headers{,_install} + vmlinux install + modules{,_prepare,_install,_sign} + vdso_install + tags TAGS cscope gtags + rust{available,fmt,fmtcheck} + kernel{version,release} image_name + kselftest{,-all,-install,-clean,-merge} + + # configuration + {,old,olddef,sync,def,savedef,rand,listnew,helpnew,test,tiny}config + {,build_}{menu,n,g,x}config + local{mod,yes}config + all{no,yes,mod,def}config + {yes2mod,mod2yes,mod2no}config + + # docs + {html,textinfo,info,latex,pdf,epub,xml,linkcheck,refcheck,clean}docs + + # package + {,bin,src}{rpm,deb}-pkg + {pacman,dir,tar}-pkg + tar{,gz,bz2,xz,zst}-pkg + perf-tar{,gz,bz2,xz,zst}-src-pkg + ) + + COMPREPLY=($(compgen -W "${keywords[*]}" -- "${cur}")) + + # Do not append a space for variables, subdirs, "KBUILD_", "KCONFIG_". + if [[ ${COMPREPLY-} == *[=/] || ${COMPREPLY-} =~ ^(KBUILD|KCONFIG)_$ ]]; then + compopt -o nospace + fi + +} && complete -F _make_for_kbuild make From 87bb368d0637c466a8a77433837056f981d01991 Mon Sep 17 00:00:00 2001 From: Kris Van Hees Date: Fri, 7 Mar 2025 11:53:28 -0500 Subject: [PATCH 0798/2157] kbuild: exclude .rodata.(cst|str)* when building ranges The .rodata.(cst|str)* sections are often resized during the final linking and since these sections do not cover actual symbols there is no need to include them in the modules.builtin.ranges data. When these sections were included in processing and resizing occurred, modules were reported with ranges that extended beyond their true end, causing subsequent symbols (in address order) to be associated with the wrong module. Fixes: 5f5e7344322f ("kbuild: generate offset range data for builtin modules") Cc: stable@vger.kernel.org Signed-off-by: Kris Van Hees Reviewed-by: Jack Vogel Signed-off-by: Masahiro Yamada --- scripts/generate_builtin_ranges.awk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/generate_builtin_ranges.awk b/scripts/generate_builtin_ranges.awk index b9ec761b3bef..d4bd5c2b998c 100755 --- a/scripts/generate_builtin_ranges.awk +++ b/scripts/generate_builtin_ranges.awk @@ -282,6 +282,11 @@ ARGIND == 2 && !anchor && NF == 2 && $1 ~ /^0x/ && $2 !~ /^0x/ { # section. # ARGIND == 2 && sect && NF == 4 && /^ [^ \*]/ && !($1 in sect_addend) { + # There are a few sections with constant data (without symbols) that + # can get resized during linking, so it is best to ignore them. + if ($1 ~ /^\.rodata\.(cst|str)[0-9]/) + next; + if (!($1 in sect_base)) { sect_base[$1] = base; From 479fde496586efa1105496c536c4c65bed43fe2b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 3 Mar 2025 01:42:54 +0900 Subject: [PATCH 0799/2157] Revert "kheaders: Ignore silly-rename files" This reverts commit 973b710b8821c3401ad7a25360c89e94b26884ac. As I mentioned in the review [1], I do not believe this was the correct fix. Commit 41a00051283e ("kheaders: prevent `find` from seeing perl temp files") addressed the root cause of the issue. I asked David to test it but received no response. Commit 973b710b8821 ("kheaders: Ignore silly-rename files") merely worked around the issue by excluding such files, rather than preventing their creation. I have reverted the latter commit, hoping the issue has already been resolved by the former. If the silly-rename files come back, I will restore this change (or preferably, investigate the root cause). [1]: https://lore.kernel.org/lkml/CAK7LNAQndCMudAtVRAbfSfnV+XhSMDcnP-s1_GAQh8UiEdLBSg@mail.gmail.com/ Signed-off-by: Masahiro Yamada --- kernel/gen_kheaders.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 00529c81cc40..c9e5dc068e85 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -89,7 +89,6 @@ rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ - --exclude=".__afs*" --exclude=".nfs*" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null From ba4d705046fb568bad4aeffeb79db78d4e835a1f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Mar 2025 19:26:03 +0900 Subject: [PATCH 0800/2157] kbuild: do not generate .tmp_vmlinux*.map when CONFIG_VMLINUX_MAP=y Commit 5cc124720461 ("kbuild: add CONFIG_VMLINUX_MAP expert option") mentioned that "the .map file can be rather large (several MB), and that's a waste of space when one isn't interested in these things." If that is the case, generating map files for the intermediate tmp_vmlinux* files is also a waste of space. It is unlikely that anyone would be interested in the .tmp_vmlinux*.map files. This commit stops passing the -Map= option when linking the .tmp_vmlinux* intermediates. I also hard-coded the file name 'vmlinux.map' instead of ${output}.map because a later commit will introduce vmlinux.unstripped but I want to keep the current name of the map file. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel --- scripts/link-vmlinux.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 56a077d204cf..96ae0bb65308 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -97,8 +97,8 @@ vmlinux_link() ldflags="${ldflags} ${wl}--strip-debug" fi - if is_enabled CONFIG_VMLINUX_MAP; then - ldflags="${ldflags} ${wl}-Map=${output}.map" + if [ -n "${generate_map}" ]; then + ldflags="${ldflags} ${wl}-Map=vmlinux.map" fi ${ld} ${ldflags} -o ${output} \ @@ -210,6 +210,7 @@ fi btf_vmlinux_bin_o= kallsymso= strip_debug= +generate_map= if is_enabled CONFIG_KALLSYMS; then true > .tmp_vmlinux0.syms @@ -278,6 +279,10 @@ fi strip_debug= +if is_enabled CONFIG_VMLINUX_MAP; then + generate_map=1 +fi + vmlinux_link vmlinux # fill in BTF IDs From ca2c736554c105897d67a015a97973af315e1c32 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 14 Mar 2025 08:58:23 +1030 Subject: [PATCH 0801/2157] firewire: core: avoid -Wflex-array-member-not-at-end warning Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of a flexible structure where the size of the flexible-array member is known at compile-time, and refactor the rest of the code, accordingly. So, with these changes, fix the following warning: drivers/firewire/core-cdev.c:1141:38: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/Z9NcB81yfPo-8o0h@kspp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-cdev.c | 42 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index b360dca2c69e..bd04980009a4 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1137,10 +1137,7 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg) unsigned long payload, buffer_end, transmit_header_bytes = 0; u32 control; int count; - struct { - struct fw_iso_packet packet; - u8 header[256]; - } u; + DEFINE_RAW_FLEX(struct fw_iso_packet, u, header, 64); if (ctx == NULL || a->handle != 0) return -EINVAL; @@ -1172,29 +1169,29 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg) while (p < end) { if (get_user(control, &p->control)) return -EFAULT; - u.packet.payload_length = GET_PAYLOAD_LENGTH(control); - u.packet.interrupt = GET_INTERRUPT(control); - u.packet.skip = GET_SKIP(control); - u.packet.tag = GET_TAG(control); - u.packet.sy = GET_SY(control); - u.packet.header_length = GET_HEADER_LENGTH(control); + u->payload_length = GET_PAYLOAD_LENGTH(control); + u->interrupt = GET_INTERRUPT(control); + u->skip = GET_SKIP(control); + u->tag = GET_TAG(control); + u->sy = GET_SY(control); + u->header_length = GET_HEADER_LENGTH(control); switch (ctx->type) { case FW_ISO_CONTEXT_TRANSMIT: - if (u.packet.header_length & 3) + if (u->header_length & 3) return -EINVAL; - transmit_header_bytes = u.packet.header_length; + transmit_header_bytes = u->header_length; break; case FW_ISO_CONTEXT_RECEIVE: - if (u.packet.header_length == 0 || - u.packet.header_length % ctx->header_size != 0) + if (u->header_length == 0 || + u->header_length % ctx->header_size != 0) return -EINVAL; break; case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL: - if (u.packet.payload_length == 0 || - u.packet.payload_length & 3) + if (u->payload_length == 0 || + u->payload_length & 3) return -EINVAL; break; } @@ -1204,20 +1201,19 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg) if (next > end) return -EINVAL; if (copy_from_user - (u.packet.header, p->header, transmit_header_bytes)) + (u->header, p->header, transmit_header_bytes)) return -EFAULT; - if (u.packet.skip && ctx->type == FW_ISO_CONTEXT_TRANSMIT && - u.packet.header_length + u.packet.payload_length > 0) + if (u->skip && ctx->type == FW_ISO_CONTEXT_TRANSMIT && + u->header_length + u->payload_length > 0) return -EINVAL; - if (payload + u.packet.payload_length > buffer_end) + if (payload + u->payload_length > buffer_end) return -EINVAL; - if (fw_iso_context_queue(ctx, &u.packet, - &client->buffer, payload)) + if (fw_iso_context_queue(ctx, u, &client->buffer, payload)) break; p = next; - payload += u.packet.payload_length; + payload += u->payload_length; count++; } fw_iso_context_queue_flush(ctx); From ad3746700ae238a96a9a4244d02a2e8eb00f157e Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Thu, 13 Mar 2025 16:44:19 +0800 Subject: [PATCH 0802/2157] watchdog: npcm: Remove unnecessary NULL check before clk_prepare_enable/clk_disable_unprepare clk_prepare_enable() and clk_disable_unprepare() already checked NULL clock parameter.Remove unneeded NULL check for clk here. Signed-off-by: Chen Ni Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250313084420.2481763-1-nichen@iscas.ac.cn Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/npcm_wdt.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/watchdog/npcm_wdt.c b/drivers/watchdog/npcm_wdt.c index a5dd1c230137..e62ea054bc61 100644 --- a/drivers/watchdog/npcm_wdt.c +++ b/drivers/watchdog/npcm_wdt.c @@ -68,8 +68,7 @@ static int npcm_wdt_start(struct watchdog_device *wdd) struct npcm_wdt *wdt = to_npcm_wdt(wdd); u32 val; - if (wdt->clk) - clk_prepare_enable(wdt->clk); + clk_prepare_enable(wdt->clk); if (wdd->timeout < 2) val = 0x800; @@ -105,8 +104,7 @@ static int npcm_wdt_stop(struct watchdog_device *wdd) writel(0, wdt->reg); - if (wdt->clk) - clk_disable_unprepare(wdt->clk); + clk_disable_unprepare(wdt->clk); return 0; } @@ -156,8 +154,7 @@ static int npcm_wdt_restart(struct watchdog_device *wdd, struct npcm_wdt *wdt = to_npcm_wdt(wdd); /* For reset, we start the WDT clock and leave it running. */ - if (wdt->clk) - clk_prepare_enable(wdt->clk); + clk_prepare_enable(wdt->clk); writel(NPCM_WTR | NPCM_WTRE | NPCM_WTE, wdt->reg); udelay(1000); From 48a136639ec233614a61653e19f559977d5da2b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 17:02:44 +0100 Subject: [PATCH 0803/2157] watchdog: aspeed: fix 64-bit division On 32-bit architectures, the new calculation causes a build failure: ld.lld-21: error: undefined symbol: __aeabi_uldivmod Since neither value is ever larger than a register, cast both sides into a uintptr_t. Fixes: 5c03f9f4d362 ("watchdog: aspeed: Update bootstatus handling") Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250314160248.502324-1-arnd@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index 369635b38ca0..837e15701c0e 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -254,7 +254,7 @@ static void aspeed_wdt_update_bootstatus(struct platform_device *pdev, if (!of_device_is_compatible(pdev->dev.of_node, "aspeed,ast2400-wdt")) { res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - idx = ((intptr_t)wdt->base & 0x00000fff) / resource_size(res); + idx = ((intptr_t)wdt->base & 0x00000fff) / (uintptr_t)resource_size(res); } scu_base = syscon_regmap_lookup_by_compatible(scu.compatible); From 5a0fcb0ef5584caf7da3f31896e08650c532e4c1 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 19 Feb 2025 18:00:06 +1100 Subject: [PATCH 0804/2157] cxl: Remove driver Remove the cxl driver that provides support for the IBM Coherent Accelerator Processor Interface. Revert or clean up associated code in arch/powerpc that is no longer necessary. cxl has received minimal maintenance for several years, and is not supported on the Power10 processor. We aren't aware of any users who are likely to be using recent kernels. Thanks to Mikey Neuling, Ian Munsie, Daniel Axtens, Frederic Barrat, Christophe Lombard, Philippe Bergheaud, Vaibhav Jain and Alastair D'Silva for their work on this driver over the years. Signed-off-by: Andrew Donnellan Acked-by: Frederic Barrat Acked-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20250219070007.177725-2-ajd@linux.ibm.com --- .../ABI/{obsolete => removed}/sysfs-class-cxl | 55 +- Documentation/arch/powerpc/cxl.rst | 470 ---- Documentation/arch/powerpc/index.rst | 1 - .../userspace-api/ioctl/ioctl-number.rst | 2 +- MAINTAINERS | 12 - arch/powerpc/configs/skiroot_defconfig | 1 - arch/powerpc/include/asm/copro.h | 6 - arch/powerpc/include/asm/device.h | 3 - arch/powerpc/include/asm/pnv-pci.h | 17 - arch/powerpc/mm/book3s64/hash_native.c | 13 +- arch/powerpc/mm/book3s64/hash_utils.c | 10 +- arch/powerpc/mm/book3s64/pgtable.c | 1 - arch/powerpc/mm/book3s64/slice.c | 6 +- arch/powerpc/mm/copro_fault.c | 11 - arch/powerpc/platforms/powernv/Makefile | 1 - arch/powerpc/platforms/powernv/pci-cxl.c | 153 -- arch/powerpc/platforms/powernv/pci-ioda.c | 43 - arch/powerpc/platforms/powernv/pci.c | 61 - arch/powerpc/platforms/powernv/pci.h | 2 - drivers/misc/Kconfig | 1 - drivers/misc/Makefile | 1 - drivers/misc/cxl/Kconfig | 28 - drivers/misc/cxl/Makefile | 14 - drivers/misc/cxl/api.c | 532 ----- drivers/misc/cxl/base.c | 126 - drivers/misc/cxl/context.c | 362 --- drivers/misc/cxl/cxl.h | 1135 --------- drivers/misc/cxl/cxllib.c | 271 --- drivers/misc/cxl/debugfs.c | 134 -- drivers/misc/cxl/fault.c | 341 --- drivers/misc/cxl/file.c | 699 ------ drivers/misc/cxl/flash.c | 538 ----- drivers/misc/cxl/guest.c | 1208 ---------- drivers/misc/cxl/hcalls.c | 643 ----- drivers/misc/cxl/hcalls.h | 200 -- drivers/misc/cxl/irq.c | 450 ---- drivers/misc/cxl/main.c | 383 --- drivers/misc/cxl/native.c | 1592 ------------- drivers/misc/cxl/of.c | 346 --- drivers/misc/cxl/pci.c | 2103 ----------------- drivers/misc/cxl/sysfs.c | 771 ------ drivers/misc/cxl/trace.c | 9 - drivers/misc/cxl/trace.h | 691 ------ drivers/misc/cxl/vphb.c | 309 --- include/misc/cxl-base.h | 48 - include/misc/cxl.h | 265 --- include/misc/cxllib.h | 129 - include/uapi/misc/cxl.h | 156 -- 48 files changed, 42 insertions(+), 14311 deletions(-) rename Documentation/ABI/{obsolete => removed}/sysfs-class-cxl (87%) delete mode 100644 Documentation/arch/powerpc/cxl.rst delete mode 100644 arch/powerpc/platforms/powernv/pci-cxl.c delete mode 100644 drivers/misc/cxl/Kconfig delete mode 100644 drivers/misc/cxl/Makefile delete mode 100644 drivers/misc/cxl/api.c delete mode 100644 drivers/misc/cxl/base.c delete mode 100644 drivers/misc/cxl/context.c delete mode 100644 drivers/misc/cxl/cxl.h delete mode 100644 drivers/misc/cxl/cxllib.c delete mode 100644 drivers/misc/cxl/debugfs.c delete mode 100644 drivers/misc/cxl/fault.c delete mode 100644 drivers/misc/cxl/file.c delete mode 100644 drivers/misc/cxl/flash.c delete mode 100644 drivers/misc/cxl/guest.c delete mode 100644 drivers/misc/cxl/hcalls.c delete mode 100644 drivers/misc/cxl/hcalls.h delete mode 100644 drivers/misc/cxl/irq.c delete mode 100644 drivers/misc/cxl/main.c delete mode 100644 drivers/misc/cxl/native.c delete mode 100644 drivers/misc/cxl/of.c delete mode 100644 drivers/misc/cxl/pci.c delete mode 100644 drivers/misc/cxl/sysfs.c delete mode 100644 drivers/misc/cxl/trace.c delete mode 100644 drivers/misc/cxl/trace.h delete mode 100644 drivers/misc/cxl/vphb.c delete mode 100644 include/misc/cxl-base.h delete mode 100644 include/misc/cxl.h delete mode 100644 include/misc/cxllib.h delete mode 100644 include/uapi/misc/cxl.h diff --git a/Documentation/ABI/obsolete/sysfs-class-cxl b/Documentation/ABI/removed/sysfs-class-cxl similarity index 87% rename from Documentation/ABI/obsolete/sysfs-class-cxl rename to Documentation/ABI/removed/sysfs-class-cxl index 8cba1b626985..15756a49eba2 100644 --- a/Documentation/ABI/obsolete/sysfs-class-cxl +++ b/Documentation/ABI/removed/sysfs-class-cxl @@ -1,5 +1,4 @@ -The cxl driver is no longer maintained, and will be removed from the kernel in -the near future. +The cxl driver was removed in 6.14. Please note that attributes that are shared between devices are stored in the directory pointed to by the symlink device/. @@ -10,7 +9,7 @@ For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is Slave contexts (eg. /sys/class/cxl/afu0.0s): What: /sys/class/cxl//afu_err_buf -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only AFU Error Buffer contents. The contents of this file are @@ -21,7 +20,7 @@ Description: read only What: /sys/class/cxl//irqs_max -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write Decimal value of maximum number of interrupts that can be @@ -32,7 +31,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//irqs_min -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the minimum number of interrupts that @@ -42,7 +41,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//mmio_size -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the size of the MMIO space that may be mmapped @@ -50,7 +49,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//modes_supported -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only List of the modes this AFU supports. One per line. @@ -58,7 +57,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//mode -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write The current mode the AFU is using. Will be one of the modes @@ -68,7 +67,7 @@ Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//prefault_mode -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write Set the mode for prefaulting in segments into the segment table @@ -88,7 +87,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//reset -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: write only Writing 1 here will reset the AFU provided there are not @@ -96,14 +95,14 @@ Description: write only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//api_version -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the current version of the kernel/user API. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//api_version_compatible -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the lowest version of the userspace API @@ -117,7 +116,7 @@ An AFU may optionally export one or more PCIe like configuration records, known as AFU configuration records, which will show up here (if present). What: /sys/class/cxl//cr/vendor -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the vendor ID found in this AFU @@ -125,7 +124,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/device -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the device ID found in this AFU @@ -133,7 +132,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/class -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Hexadecimal value of the class code found in this AFU @@ -141,7 +140,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//cr/config -Date: February 2015 +Date: February 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only This binary file provides raw access to the AFU configuration @@ -155,7 +154,7 @@ Users: https://github.com/ibm-capi/libcxl Master contexts (eg. /sys/class/cxl/afu0.0m) What: /sys/class/cxl/m/mmio_size -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the size of the MMIO space that may be mmapped @@ -163,14 +162,14 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl/m/pp_mmio_len -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the Per Process MMIO space length. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl/m/pp_mmio_off -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -181,21 +180,21 @@ Users: https://github.com/ibm-capi/libcxl Card info (eg. /sys/class/cxl/card0) What: /sys/class/cxl//caia_version -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Identifies the CAIA Version the card implements. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//psl_revision -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Identifies the revision level of the PSL. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//base_image -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -206,7 +205,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//image_loaded -Date: September 2014 +Date: September 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only (not in a guest) @@ -215,7 +214,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//load_image_on_perst -Date: December 2014 +Date: December 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write (not in a guest) @@ -232,7 +231,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//reset -Date: October 2014 +Date: October 2014, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: write only Writing 1 will issue a PERST to card provided there are no @@ -243,7 +242,7 @@ Description: write only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//perst_reloads_same_image -Date: July 2015 +Date: July 2015, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write (not in a guest) @@ -257,7 +256,7 @@ Description: read/write Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//psl_timebase_synced -Date: March 2016 +Date: March 2016, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Returns 1 if the psl timebase register is synchronized @@ -265,7 +264,7 @@ Description: read only Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//tunneled_ops_supported -Date: May 2018 +Date: May 2018, removed February 2025 Contact: linuxppc-dev@lists.ozlabs.org Description: read only Returns 1 if tunneled operations are supported in capi mode, diff --git a/Documentation/arch/powerpc/cxl.rst b/Documentation/arch/powerpc/cxl.rst deleted file mode 100644 index 778adda740d2..000000000000 --- a/Documentation/arch/powerpc/cxl.rst +++ /dev/null @@ -1,470 +0,0 @@ -==================================== -Coherent Accelerator Interface (CXL) -==================================== - -Introduction -============ - - The coherent accelerator interface is designed to allow the - coherent connection of accelerators (FPGAs and other devices) to a - POWER system. These devices need to adhere to the Coherent - Accelerator Interface Architecture (CAIA). - - IBM refers to this as the Coherent Accelerator Processor Interface - or CAPI. In the kernel it's referred to by the name CXL to avoid - confusion with the ISDN CAPI subsystem. - - Coherent in this context means that the accelerator and CPUs can - both access system memory directly and with the same effective - addresses. - - **This driver is deprecated and will be removed in a future release.** - -Hardware overview -================= - - :: - - POWER8/9 FPGA - +----------+ +---------+ - | | | | - | CPU | | AFU | - | | | | - | | | | - | | | | - +----------+ +---------+ - | PHB | | | - | +------+ | PSL | - | | CAPP |<------>| | - +---+------+ PCIE +---------+ - - The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP) - unit which is part of the PCIe Host Bridge (PHB). This is managed - by Linux by calls into OPAL. Linux doesn't directly program the - CAPP. - - The FPGA (or coherently attached device) consists of two parts. - The POWER Service Layer (PSL) and the Accelerator Function Unit - (AFU). The AFU is used to implement specific functionality behind - the PSL. The PSL, among other things, provides memory address - translation services to allow each AFU direct access to userspace - memory. - - The AFU is the core part of the accelerator (eg. the compression, - crypto etc function). The kernel has no knowledge of the function - of the AFU. Only userspace interacts directly with the AFU. - - The PSL provides the translation and interrupt services that the - AFU needs. This is what the kernel interacts with. For example, if - the AFU needs to read a particular effective address, it sends - that address to the PSL, the PSL then translates it, fetches the - data from memory and returns it to the AFU. If the PSL has a - translation miss, it interrupts the kernel and the kernel services - the fault. The context to which this fault is serviced is based on - who owns that acceleration function. - - - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0. - - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0. - - This PSL Version 9 provides new features such as: - - * Interaction with the nest MMU on the P9 chip. - * Native DMA support. - * Supports sending ASB_Notify messages for host thread wakeup. - * Supports Atomic operations. - * etc. - - Cards with a PSL9 won't work on a POWER8 system and cards with a - PSL8 won't work on a POWER9 system. - -AFU Modes -========= - - There are two programming modes supported by the AFU. Dedicated - and AFU directed. AFU may support one or both modes. - - When using dedicated mode only one MMU context is supported. In - this mode, only one userspace process can use the accelerator at - time. - - When using AFU directed mode, up to 16K simultaneous contexts can - be supported. This means up to 16K simultaneous userspace - applications may use the accelerator (although specific AFUs may - support fewer). In this mode, the AFU sends a 16 bit context ID - with each of its requests. This tells the PSL which context is - associated with each operation. If the PSL can't translate an - operation, the ID can also be accessed by the kernel so it can - determine the userspace context associated with an operation. - - -MMIO space -========== - - A portion of the accelerator MMIO space can be directly mapped - from the AFU to userspace. Either the whole space can be mapped or - just a per context portion. The hardware is self describing, hence - the kernel can determine the offset and size of the per context - portion. - - -Interrupts -========== - - AFUs may generate interrupts that are destined for userspace. These - are received by the kernel as hardware interrupts and passed onto - userspace by a read syscall documented below. - - Data storage faults and error interrupts are handled by the kernel - driver. - - -Work Element Descriptor (WED) -============================= - - The WED is a 64-bit parameter passed to the AFU when a context is - started. Its format is up to the AFU hence the kernel has no - knowledge of what it represents. Typically it will be the - effective address of a work queue or status block where the AFU - and userspace can share control and status information. - - - - -User API -======== - -1. AFU character devices -^^^^^^^^^^^^^^^^^^^^^^^^ - - For AFUs operating in AFU directed mode, two character device - files will be created. /dev/cxl/afu0.0m will correspond to a - master context and /dev/cxl/afu0.0s will correspond to a slave - context. Master contexts have access to the full MMIO space an - AFU provides. Slave contexts have access to only the per process - MMIO space an AFU provides. - - For AFUs operating in dedicated process mode, the driver will - only create a single character device per AFU called - /dev/cxl/afu0.0d. This will have access to the entire MMIO space - that the AFU provides (like master contexts in AFU directed). - - The types described below are defined in include/uapi/misc/cxl.h - - The following file operations are supported on both slave and - master devices. - - A userspace library libcxl is available here: - - https://github.com/ibm-capi/libcxl - - This provides a C interface to this kernel API. - -open ----- - - Opens the device and allocates a file descriptor to be used with - the rest of the API. - - A dedicated mode AFU only has one context and only allows the - device to be opened once. - - An AFU directed mode AFU can have many contexts, the device can be - opened once for each context that is available. - - When all available contexts are allocated the open call will fail - and return -ENOSPC. - - Note: - IRQs need to be allocated for each context, which may limit - the number of contexts that can be created, and therefore - how many times the device can be opened. The POWER8 CAPP - supports 2040 IRQs and 3 are used by the kernel, so 2037 are - left. If 1 IRQ is needed per context, then only 2037 - contexts can be allocated. If 4 IRQs are needed per context, - then only 2037/4 = 509 contexts can be allocated. - - -ioctl ------ - - CXL_IOCTL_START_WORK: - Starts the AFU context and associates it with the current - process. Once this ioctl is successfully executed, all memory - mapped into this process is accessible to this AFU context - using the same effective addresses. No additional calls are - required to map/unmap memory. The AFU memory context will be - updated as userspace allocates and frees memory. This ioctl - returns once the AFU context is started. - - Takes a pointer to a struct cxl_ioctl_start_work - - :: - - struct cxl_ioctl_start_work { - __u64 flags; - __u64 work_element_descriptor; - __u64 amr; - __s16 num_interrupts; - __s16 reserved1; - __s32 reserved2; - __u64 reserved3; - __u64 reserved4; - __u64 reserved5; - __u64 reserved6; - }; - - flags: - Indicates which optional fields in the structure are - valid. - - work_element_descriptor: - The Work Element Descriptor (WED) is a 64-bit argument - defined by the AFU. Typically this is an effective - address pointing to an AFU specific structure - describing what work to perform. - - amr: - Authority Mask Register (AMR), same as the powerpc - AMR. This field is only used by the kernel when the - corresponding CXL_START_WORK_AMR value is specified in - flags. If not specified the kernel will use a default - value of 0. - - num_interrupts: - Number of userspace interrupts to request. This field - is only used by the kernel when the corresponding - CXL_START_WORK_NUM_IRQS value is specified in flags. - If not specified the minimum number required by the - AFU will be allocated. The min and max number can be - obtained from sysfs. - - reserved fields: - For ABI padding and future extensions - - CXL_IOCTL_GET_PROCESS_ELEMENT: - Get the current context id, also known as the process element. - The value is returned from the kernel as a __u32. - - -mmap ----- - - An AFU may have an MMIO space to facilitate communication with the - AFU. If it does, the MMIO space can be accessed via mmap. The size - and contents of this area are specific to the particular AFU. The - size can be discovered via sysfs. - - In AFU directed mode, master contexts are allowed to map all of - the MMIO space and slave contexts are allowed to only map the per - process MMIO space associated with the context. In dedicated - process mode the entire MMIO space can always be mapped. - - This mmap call must be done after the START_WORK ioctl. - - Care should be taken when accessing MMIO space. Only 32 and 64-bit - accesses are supported by POWER8. Also, the AFU will be designed - with a specific endianness, so all MMIO accesses should consider - endianness (recommend endian(3) variants like: le64toh(), - be64toh() etc). These endian issues equally apply to shared memory - queues the WED may describe. - - -read ----- - - Reads events from the AFU. Blocks if no events are pending - (unless O_NONBLOCK is supplied). Returns -EIO in the case of an - unrecoverable error or if the card is removed. - - read() will always return an integral number of events. - - The buffer passed to read() must be at least 4K bytes. - - The result of the read will be a buffer of one or more events, - each event is of type struct cxl_event, of varying size:: - - struct cxl_event { - struct cxl_event_header header; - union { - struct cxl_event_afu_interrupt irq; - struct cxl_event_data_storage fault; - struct cxl_event_afu_error afu_error; - }; - }; - - The struct cxl_event_header is defined as - - :: - - struct cxl_event_header { - __u16 type; - __u16 size; - __u16 process_element; - __u16 reserved1; - }; - - type: - This defines the type of event. The type determines how - the rest of the event is structured. These types are - described below and defined by enum cxl_event_type. - - size: - This is the size of the event in bytes including the - struct cxl_event_header. The start of the next event can - be found at this offset from the start of the current - event. - - process_element: - Context ID of the event. - - reserved field: - For future extensions and padding. - - If the event type is CXL_EVENT_AFU_INTERRUPT then the event - structure is defined as - - :: - - struct cxl_event_afu_interrupt { - __u16 flags; - __u16 irq; /* Raised AFU interrupt number */ - __u32 reserved1; - }; - - flags: - These flags indicate which optional fields are present - in this struct. Currently all fields are mandatory. - - irq: - The IRQ number sent by the AFU. - - reserved field: - For future extensions and padding. - - If the event type is CXL_EVENT_DATA_STORAGE then the event - structure is defined as - - :: - - struct cxl_event_data_storage { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 addr; - __u64 dsisr; - __u64 reserved3; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are mandatory. - - address: - The address that the AFU unsuccessfully attempted to - access. Valid accesses will be handled transparently by the - kernel but invalid accesses will generate this event. - - dsisr: - This field gives information on the type of fault. It is a - copy of the DSISR from the PSL hardware when the address - fault occurred. The form of the DSISR is as defined in the - CAIA. - - reserved fields: - For future extensions - - If the event type is CXL_EVENT_AFU_ERROR then the event structure - is defined as - - :: - - struct cxl_event_afu_error { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 error; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are Mandatory. - - error: - Error status from the AFU. Defined by the AFU. - - reserved fields: - For future extensions and padding - - -2. Card character device (powerVM guest only) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - In a powerVM guest, an extra character device is created for the - card. The device is only used to write (flash) a new image on the - FPGA accelerator. Once the image is written and verified, the - device tree is updated and the card is reset to reload the updated - image. - -open ----- - - Opens the device and allocates a file descriptor to be used with - the rest of the API. The device can only be opened once. - -ioctl ------ - -CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE: - Starts and controls flashing a new FPGA image. Partial - reconfiguration is not supported (yet), so the image must contain - a copy of the PSL and AFU(s). Since an image can be quite large, - the caller may have to iterate, splitting the image in smaller - chunks. - - Takes a pointer to a struct cxl_adapter_image:: - - struct cxl_adapter_image { - __u64 flags; - __u64 data; - __u64 len_data; - __u64 len_image; - __u64 reserved1; - __u64 reserved2; - __u64 reserved3; - __u64 reserved4; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are mandatory. - - data: - Pointer to a buffer with part of the image to write to the - card. - - len_data: - Size of the buffer pointed to by data. - - len_image: - Full size of the image. - - -Sysfs Class -=========== - - A cxl sysfs class is added under /sys/class/cxl to facilitate - enumeration and tuning of the accelerators. Its layout is - described in Documentation/ABI/obsolete/sysfs-class-cxl - - -Udev rules -========== - - The following udev rules could be used to create a symlink to the - most logical chardev to use in any programming mode (afuX.Yd for - dedicated, afuX.Ys for afu directed), since the API is virtually - identical for each:: - - SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b" - SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \ - KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b" diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst index 995268530f21..0560cbae5fa1 100644 --- a/Documentation/arch/powerpc/index.rst +++ b/Documentation/arch/powerpc/index.rst @@ -12,7 +12,6 @@ powerpc bootwrapper cpu_families cpu_features - cxl dawr-power9 dexcr dscr diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index efe133c50615..0b46257904d6 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -371,7 +371,7 @@ Code Seq# Include File Comments 0xB7 all uapi/linux/nsfs.h > 0xB8 01-02 uapi/misc/mrvl_cn10k_dpi.h Marvell CN10K DPI driver 0xC0 00-0F linux/usb/iowarrior.h -0xCA 00-0F uapi/misc/cxl.h +0xCA 00-0F uapi/misc/cxl.h Dead since 6.14 0xCA 10-2F uapi/misc/ocxl.h 0xCA 80-BF uapi/scsi/cxlflash_ioctl.h Dead since 6.14 0xCB 00-1F CBM serial IEC bus in development: diff --git a/MAINTAINERS b/MAINTAINERS index ac8b15cec513..42dcc87e2112 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6306,18 +6306,6 @@ S: Maintained W: http://www.chelsio.com F: drivers/net/ethernet/chelsio/cxgb4vf/ -CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER -M: Frederic Barrat -M: Andrew Donnellan -L: linuxppc-dev@lists.ozlabs.org -S: Obsolete -F: Documentation/ABI/obsolete/sysfs-class-cxl -F: Documentation/arch/powerpc/cxl.rst -F: arch/powerpc/platforms/powernv/pci-cxl.c -F: drivers/misc/cxl/ -F: include/misc/cxl* -F: include/uapi/misc/cxl.h - CYBERPRO FB DRIVER M: Russell King L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 9d44e6630908..d70df2c3e393 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -78,7 +78,6 @@ CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_NVME=m CONFIG_NVME_MULTIPATH=y CONFIG_EEPROM_AT24=m -# CONFIG_CXL is not set # CONFIG_OCXL is not set CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h index fd2e166ea02a..81bd176203ab 100644 --- a/arch/powerpc/include/asm/copro.h +++ b/arch/powerpc/include/asm/copro.h @@ -18,10 +18,4 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); - -#ifdef CONFIG_PPC_COPRO_BASE -void copro_flush_all_slbs(struct mm_struct *mm); -#else -static inline void copro_flush_all_slbs(struct mm_struct *mm) {} -#endif #endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 47ed639f3b8f..a4dc27655b3e 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -38,9 +38,6 @@ struct dev_archdata { #ifdef CONFIG_FAIL_IOMMU int fail_iommu; #endif -#ifdef CONFIG_CXL_BASE - struct cxl_context *cxl_ctx; -#endif #ifdef CONFIG_PCI_IOV void *iov_data; #endif diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 8afc92860dbb..7e9a479951a3 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #define PCI_SLOT_ID_PREFIX (1UL << 63) @@ -25,25 +24,9 @@ extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); -extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, - int enable); -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq); -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); -int pnv_cxl_get_irq_count(struct pci_dev *dev); -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); int64_t pnv_opal_pci_msi_eoi(struct irq_data *d); bool is_pnv_opal_msi(struct irq_chip *chip); -#ifdef CONFIG_CXL_BASE -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num); -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev); -#endif - struct pnv_php_slot { struct hotplug_slot slot; uint64_t id; diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 430d1d935a7c..e9e2dd70c060 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -27,8 +27,6 @@ #include #include -#include - #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -217,11 +215,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local; + unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); - if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) @@ -789,10 +785,6 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long psize = batch->psize; int ssize = batch->ssize; int i; - unsigned int use_local; - - use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && - mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); local_irq_save(flags); @@ -827,7 +819,8 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (use_local) { + if (mmu_has_feature(MMU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { vpn = batch->vpn[i]; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c8b4fa71d4a7..790f94dbdaad 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include #include @@ -1612,7 +1612,9 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { copy_mm_to_paca(mm); @@ -1881,7 +1883,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif } } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index ce64abea9e3e..45b763b30708 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index bc9a39821d1c..28bec5bc7879 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -248,7 +248,9 @@ static void slice_convert(struct mm_struct *mm, spin_unlock_irqrestore(&slice_convert_lock, flags); - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif } /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f49fd873df8d..f5f8692e2c69 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -135,13 +133,4 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) return 0; } EXPORT_SYMBOL_GPL(copro_calculate_slb); - -void copro_flush_all_slbs(struct mm_struct *mm) -{ -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif - cxl_slbia(mm); -} -EXPORT_SYMBOL_GPL(copro_flush_all_slbs); #endif diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 19f0fc5c6f1b..9e5d0c847ee2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o obj-$(CONFIG_PCI_IOV) += pci-sriov.o -obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c deleted file mode 100644 index 7e419de71db8..000000000000 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014-2016 IBM Corp. - */ - -#include -#include -#include -#include - -#include "pci.h" - -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - int rc; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - pe_info(pe, "Switching PHB to CXL\n"); - - rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); - if (rc == OPAL_UNSUPPORTED) - dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); - else if (rc) - dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); - - return rc; -} -EXPORT_SYMBOL(pnv_phb_to_cxl_mode); - -/* Find PHB for cxl dev and allocate MSI hwirqs? - * Returns the absolute hardware IRQ number - */ -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); - - if (hwirq < 0) { - dev_warn(&dev->dev, "Failed to find a free MSI\n"); - return -ENOSPC; - } - - return phb->msi_base + hwirq; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); - -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); -} -EXPORT_SYMBOL(pnv_cxl_release_hwirqs); - -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq; - - for (i = 1; i < CXL_IRQ_RANGES; i++) { - if (!irqs->range[i]) - continue; - pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", - i, irqs->offset[i], - irqs->range[i]); - hwirq = irqs->offset[i] - phb->msi_base; - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, - irqs->range[i]); - } -} -EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); - -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq, try; - - memset(irqs, 0, sizeof(struct cxl_irq_ranges)); - - /* 0 is reserved for the multiplexed PSL DSI interrupt */ - for (i = 1; i < CXL_IRQ_RANGES && num; i++) { - try = num; - while (try) { - hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); - if (hwirq >= 0) - break; - try /= 2; - } - if (!try) - goto fail; - - irqs->offset[i] = phb->msi_base + hwirq; - irqs->range[i] = try; - pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", - i, irqs->offset[i], irqs->range[i]); - num -= try; - } - if (num) - goto fail; - - return 0; -fail: - pnv_cxl_release_hwirq_ranges(irqs, dev); - return -ENOSPC; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); - -int pnv_cxl_get_irq_count(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return phb->msi_bmp.irq_count; -} -EXPORT_SYMBOL(pnv_cxl_get_irq_count); - -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - unsigned int xive_num = hwirq - phb->msi_base; - struct pnv_ioda_pe *pe; - int rc; - - if (!(pe = pnv_ioda_get_pe(dev))) - return -ENODEV; - - /* Assign XIVE to PE */ - rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); - if (rc) { - pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " - "hwirq 0x%x XIVE 0x%x PE\n", - pci_name(dev), rc, phb->msi_base, hwirq, xive_num); - return -EIO; - } - pnv_set_msi_irq_chip(phb, virq); - - return 0; -} -EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b0a14e48175c..d2a8e0287811 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -39,8 +39,6 @@ #include #include -#include - #include "powernv.h" #include "pci.h" #include "../../../../drivers/pci/pci.h" @@ -1636,47 +1634,6 @@ int64_t pnv_opal_pci_msi_eoi(struct irq_data *d) return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq); } -/* - * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers - */ -static void pnv_ioda2_msi_eoi(struct irq_data *d) -{ - int64_t rc; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct pci_controller *hose = irq_data_get_irq_chip_data(d); - struct pnv_phb *phb = hose->private_data; - - rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); - WARN_ON_ONCE(rc); - - icp_native_eoi(d); -} - -/* P8/CXL only */ -void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) -{ - struct irq_data *idata; - struct irq_chip *ichip; - - /* The MSI EOI OPAL call is only needed on PHB3 */ - if (phb->model != PNV_PHB_MODEL_PHB3) - return; - - if (!phb->ioda.irq_chip_init) { - /* - * First time we setup an MSI IRQ, we need to setup the - * corresponding IRQ chip to route correctly. - */ - idata = irq_get_irq_data(virq); - ichip = irq_data_get_irq_chip(idata); - phb->ioda.irq_chip_init = 1; - phb->ioda.irq_chip = *ichip; - phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; - } - irq_set_chip(virq, &phb->ioda.irq_chip); - irq_set_chip_data(virq, phb->hose); -} - static struct irq_chip pnv_pci_msi_irq_chip; /* diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 35f566aa0424..b2c1da025410 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -33,8 +32,6 @@ #include "powernv.h" #include "pci.h" -static DEFINE_MUTEX(tunnel_mutex); - int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { struct device_node *node = np; @@ -744,64 +741,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid) return tbl; } -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - -int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) -{ - struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); - u64 tunnel_bar; - __be64 val; - int rc; - - if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) - return -ENXIO; - if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) - return -ENXIO; - - mutex_lock(&tunnel_mutex); - rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - tunnel_bar = be64_to_cpu(val); - if (enable) { - /* - * Only one device per PHB can use atomics. - * Our policy is first-come, first-served. - */ - if (tunnel_bar) { - if (tunnel_bar != addr) - rc = -EBUSY; - else - rc = 0; /* Setting same address twice is ok */ - goto out; - } - } else { - /* - * The device that owns atomics and wants to release - * them must pass the same address with enable == 0. - */ - if (tunnel_bar != addr) { - rc = -EPERM; - goto out; - } - addr = 0x0ULL; - } - rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); - rc = opal_error_code(rc); -out: - mutex_unlock(&tunnel_mutex); - return rc; -} -EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 93fba1f8661f..42075501663b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -163,7 +163,6 @@ struct pnv_phb { unsigned int *io_segmap; /* IRQ chip */ - int irq_chip_init; struct irq_chip irq_chip; /* Sorted list of used PE's based @@ -281,7 +280,6 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); -extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 56bc72c7ce4a..6b37d61150ee 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -641,7 +641,6 @@ source "drivers/misc/mei/Kconfig" source "drivers/misc/vmw_vmci/Kconfig" source "drivers/misc/genwqe/Kconfig" source "drivers/misc/echo/Kconfig" -source "drivers/misc/cxl/Kconfig" source "drivers/misc/ocxl/Kconfig" source "drivers/misc/bcm-vk/Kconfig" source "drivers/misc/cardreader/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 545aad06d088..d6c917229c45 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -50,7 +50,6 @@ obj-$(CONFIG_SRAM) += sram.o obj-$(CONFIG_SRAM_EXEC) += sram-exec.o obj-$(CONFIG_GENWQE) += genwqe/ obj-$(CONFIG_ECHO) += echo/ -obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_DW_XDATA_PCIE) += dw-xdata-pcie.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o obj-$(CONFIG_OCXL) += ocxl/ diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig deleted file mode 100644 index 15307f5e4307..000000000000 --- a/drivers/misc/cxl/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# IBM Coherent Accelerator (CXL) compatible devices -# - -config CXL_BASE - bool - select PPC_COPRO_BASE - select PPC_64S_HASH_MMU - -config CXL - tristate "Support for IBM Coherent Accelerators (CXL) (DEPRECATED)" - depends on PPC_POWERNV && PCI_MSI && EEH - select CXL_BASE - help - The cxl driver is deprecated and will be removed in a future - kernel release. - - Select this option to enable driver support for IBM Coherent - Accelerators (CXL). CXL is otherwise known as Coherent Accelerator - Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be - coherently attached to a CPU via an MMU. This driver enables - userspace programs to access these accelerators via /dev/cxl/afuM.N - devices. - - CAPI adapters are found in POWER8 based systems. - - If unsure, say N. diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile deleted file mode 100644 index 5eea61b9584f..000000000000 --- a/drivers/misc/cxl/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := $(call cc-disable-warning, unused-const-variable) -ccflags-$(CONFIG_PPC_WERROR) += -Werror - -cxl-y += main.o file.o irq.o fault.o native.o -cxl-y += context.o sysfs.o pci.o trace.o -cxl-y += vphb.o api.o cxllib.o -cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o -cxl-$(CONFIG_DEBUG_FS) += debugfs.o -obj-$(CONFIG_CXL) += cxl.o -obj-$(CONFIG_CXL_BASE) += base.o - -# For tracepoints to include our trace.h from tracepoint infrastructure: -CFLAGS_trace.o := -I$(src) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c deleted file mode 100644 index d85c56530863..000000000000 --- a/drivers/misc/cxl/api.c +++ /dev/null @@ -1,532 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" - -/* - * Since we want to track memory mappings to be able to force-unmap - * when the AFU is no longer reachable, we need an inode. For devices - * opened through the cxl user API, this is not a problem, but a - * userland process can also get a cxl fd through the cxl_get_fd() - * API, which is used by the cxlflash driver. - * - * Therefore we implement our own simple pseudo-filesystem and inode - * allocator. We don't use the anonymous inode, as we need the - * meta-data associated with it (address_space) and it is shared by - * other drivers/processes, so it could lead to cxl unmapping VMAs - * from random processes. - */ - -#define CXL_PSEUDO_FS_MAGIC 0x1697697f - -static int cxl_fs_cnt; -static struct vfsmount *cxl_vfs_mount; - -static int cxl_fs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, CXL_PSEUDO_FS_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type cxl_fs_type = { - .name = "cxl", - .owner = THIS_MODULE, - .init_fs_context = cxl_fs_init_fs_context, - .kill_sb = kill_anon_super, -}; - - -void cxl_release_mapping(struct cxl_context *ctx) -{ - if (ctx->kernelapi && ctx->mapping) - simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); -} - -static struct file *cxl_getfile(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - struct file *file; - struct inode *inode; - int rc; - - /* strongly inspired by anon_inode_getfile() */ - - if (fops->owner && !try_module_get(fops->owner)) - return ERR_PTR(-ENOENT); - - rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt); - if (rc < 0) { - pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc); - file = ERR_PTR(rc); - goto err_module; - } - - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); - if (IS_ERR(inode)) { - file = ERR_CAST(inode); - goto err_fs; - } - - file = alloc_file_pseudo(inode, cxl_vfs_mount, name, - flags & (O_ACCMODE | O_NONBLOCK), fops); - if (IS_ERR(file)) - goto err_inode; - - file->private_data = priv; - - return file; - -err_inode: - iput(inode); -err_fs: - simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); -err_module: - module_put(fops->owner); - return file; -} - -struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) -{ - struct cxl_afu *afu; - struct cxl_context *ctx; - int rc; - - afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return ERR_CAST(afu); - - ctx = cxl_context_alloc(); - if (!ctx) - return ERR_PTR(-ENOMEM); - - ctx->kernelapi = true; - - /* Make it a slave context. We can promote it later? */ - rc = cxl_context_init(ctx, afu, false); - if (rc) - goto err_ctx; - - return ctx; - -err_ctx: - kfree(ctx); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_GPL(cxl_dev_context_init); - -struct cxl_context *cxl_get_context(struct pci_dev *dev) -{ - return dev->dev.archdata.cxl_ctx; -} -EXPORT_SYMBOL_GPL(cxl_get_context); - -int cxl_release_context(struct cxl_context *ctx) -{ - if (ctx->status >= STARTED) - return -EBUSY; - - cxl_context_free(ctx); - - return 0; -} -EXPORT_SYMBOL_GPL(cxl_release_context); - -static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) -{ - __u16 range; - int r; - - for (r = 0; r < CXL_IRQ_RANGES; r++) { - range = ctx->irqs.range[r]; - if (num < range) { - return ctx->irqs.offset[r] + num; - } - num -= range; - } - return 0; -} - - -int cxl_set_priv(struct cxl_context *ctx, void *priv) -{ - if (!ctx) - return -EINVAL; - - ctx->priv = priv; - - return 0; -} -EXPORT_SYMBOL_GPL(cxl_set_priv); - -void *cxl_get_priv(struct cxl_context *ctx) -{ - if (!ctx) - return ERR_PTR(-EINVAL); - - return ctx->priv; -} -EXPORT_SYMBOL_GPL(cxl_get_priv); - -int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) -{ - int res; - irq_hw_number_t hwirq; - - if (num == 0) - num = ctx->afu->pp_irqs; - res = afu_allocate_irqs(ctx, num); - if (res) - return res; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) { - /* In a guest, the PSL interrupt is not multiplexed. It was - * allocated above, and we need to set its handler - */ - hwirq = cxl_find_afu_irq(ctx, 0); - if (hwirq) - cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); - } - - if (ctx->status == STARTED) { - if (cxl_ops->update_ivtes) - cxl_ops->update_ivtes(ctx); - else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); - } - - return res; -} -EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); - -void cxl_free_afu_irqs(struct cxl_context *ctx) -{ - irq_hw_number_t hwirq; - unsigned int virq; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) { - hwirq = cxl_find_afu_irq(ctx, 0); - if (hwirq) { - virq = irq_find_mapping(NULL, hwirq); - if (virq) - cxl_unmap_irq(virq, ctx); - } - } - afu_irq_name_free(ctx); - cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); -} -EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); - -int cxl_map_afu_irq(struct cxl_context *ctx, int num, - irq_handler_t handler, void *cookie, char *name) -{ - irq_hw_number_t hwirq; - - /* - * Find interrupt we are to register. - */ - hwirq = cxl_find_afu_irq(ctx, num); - if (!hwirq) - return -ENOENT; - - return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name); -} -EXPORT_SYMBOL_GPL(cxl_map_afu_irq); - -void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie) -{ - irq_hw_number_t hwirq; - unsigned int virq; - - hwirq = cxl_find_afu_irq(ctx, num); - if (!hwirq) - return; - - virq = irq_find_mapping(NULL, hwirq); - if (virq) - cxl_unmap_irq(virq, cookie); -} -EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq); - -/* - * Start a context - * Code here similar to afu_ioctl_start_work(). - */ -int cxl_start_context(struct cxl_context *ctx, u64 wed, - struct task_struct *task) -{ - int rc = 0; - bool kernel = true; - - pr_devel("%s: pe: %i\n", __func__, ctx->pe); - - mutex_lock(&ctx->status_mutex); - if (ctx->status == STARTED) - goto out; /* already started */ - - /* - * Increment the mapped context count for adapter. This also checks - * if adapter_context_lock is taken. - */ - rc = cxl_adapter_context_get(ctx->afu->adapter); - if (rc) - goto out; - - if (task) { - ctx->pid = get_task_pid(task, PIDTYPE_PID); - kernel = false; - - /* acquire a reference to the task's mm */ - ctx->mm = get_task_mm(current); - - /* ensure this mm_struct can't be freed */ - cxl_context_mm_count_get(ctx); - - if (ctx->mm) { - /* decrement the use count from above */ - mmput(ctx->mm); - /* make TLBIs for this context global */ - mm_context_add_copro(ctx->mm); - } - } - - /* - * Increment driver use count. Enables global TLBIs for hash - * and callbacks to handle the segment table - */ - cxl_ctx_get(); - - /* See the comment in afu_ioctl_start_work() */ - smp_mb(); - - if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { - put_pid(ctx->pid); - ctx->pid = NULL; - cxl_adapter_context_put(ctx->afu->adapter); - cxl_ctx_put(); - if (task) { - cxl_context_mm_count_put(ctx); - if (ctx->mm) - mm_context_remove_copro(ctx->mm); - } - goto out; - } - - ctx->status = STARTED; -out: - mutex_unlock(&ctx->status_mutex); - return rc; -} -EXPORT_SYMBOL_GPL(cxl_start_context); - -int cxl_process_element(struct cxl_context *ctx) -{ - return ctx->external_pe; -} -EXPORT_SYMBOL_GPL(cxl_process_element); - -/* Stop a context. Returns 0 on success, otherwise -Errno */ -int cxl_stop_context(struct cxl_context *ctx) -{ - return __detach_context(ctx); -} -EXPORT_SYMBOL_GPL(cxl_stop_context); - -void cxl_set_master(struct cxl_context *ctx) -{ - ctx->master = true; -} -EXPORT_SYMBOL_GPL(cxl_set_master); - -/* wrappers around afu_* file ops which are EXPORTED */ -int cxl_fd_open(struct inode *inode, struct file *file) -{ - return afu_open(inode, file); -} -EXPORT_SYMBOL_GPL(cxl_fd_open); -int cxl_fd_release(struct inode *inode, struct file *file) -{ - return afu_release(inode, file); -} -EXPORT_SYMBOL_GPL(cxl_fd_release); -long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return afu_ioctl(file, cmd, arg); -} -EXPORT_SYMBOL_GPL(cxl_fd_ioctl); -int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm) -{ - return afu_mmap(file, vm); -} -EXPORT_SYMBOL_GPL(cxl_fd_mmap); -__poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll) -{ - return afu_poll(file, poll); -} -EXPORT_SYMBOL_GPL(cxl_fd_poll); -ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, - loff_t *off) -{ - return afu_read(file, buf, count, off); -} -EXPORT_SYMBOL_GPL(cxl_fd_read); - -#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME - -/* Get a struct file and fd for a context and attach the ops */ -struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, - int *fd) -{ - struct file *file; - int rc, flags, fdtmp; - char *name = NULL; - - /* only allow one per context */ - if (ctx->mapping) - return ERR_PTR(-EEXIST); - - flags = O_RDWR | O_CLOEXEC; - - /* This code is similar to anon_inode_getfd() */ - rc = get_unused_fd_flags(flags); - if (rc < 0) - return ERR_PTR(rc); - fdtmp = rc; - - /* - * Patch the file ops. Needs to be careful that this is rentrant safe. - */ - if (fops) { - PATCH_FOPS(open); - PATCH_FOPS(poll); - PATCH_FOPS(read); - PATCH_FOPS(release); - PATCH_FOPS(unlocked_ioctl); - PATCH_FOPS(compat_ioctl); - PATCH_FOPS(mmap); - } else /* use default ops */ - fops = (struct file_operations *)&afu_fops; - - name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe); - file = cxl_getfile(name, fops, ctx, flags); - kfree(name); - if (IS_ERR(file)) - goto err_fd; - - cxl_context_set_mapping(ctx, file->f_mapping); - *fd = fdtmp; - return file; - -err_fd: - put_unused_fd(fdtmp); - return NULL; -} -EXPORT_SYMBOL_GPL(cxl_get_fd); - -struct cxl_context *cxl_fops_get_context(struct file *file) -{ - return file->private_data; -} -EXPORT_SYMBOL_GPL(cxl_fops_get_context); - -void cxl_set_driver_ops(struct cxl_context *ctx, - struct cxl_afu_driver_ops *ops) -{ - WARN_ON(!ops->fetch_event || !ops->event_delivered); - atomic_set(&ctx->afu_driver_events, 0); - ctx->afu_driver_ops = ops; -} -EXPORT_SYMBOL_GPL(cxl_set_driver_ops); - -void cxl_context_events_pending(struct cxl_context *ctx, - unsigned int new_events) -{ - atomic_add(new_events, &ctx->afu_driver_events); - wake_up_all(&ctx->wq); -} -EXPORT_SYMBOL_GPL(cxl_context_events_pending); - -int cxl_start_work(struct cxl_context *ctx, - struct cxl_ioctl_start_work *work) -{ - int rc; - - /* code taken from afu_ioctl_start_work */ - if (!(work->flags & CXL_START_WORK_NUM_IRQS)) - work->num_interrupts = ctx->afu->pp_irqs; - else if ((work->num_interrupts < ctx->afu->pp_irqs) || - (work->num_interrupts > ctx->afu->irqs_max)) { - return -EINVAL; - } - - rc = afu_register_irqs(ctx, work->num_interrupts); - if (rc) - return rc; - - rc = cxl_start_context(ctx, work->work_element_descriptor, current); - if (rc < 0) { - afu_release_irqs(ctx, ctx); - return rc; - } - - return 0; -} -EXPORT_SYMBOL_GPL(cxl_start_work); - -void __iomem *cxl_psa_map(struct cxl_context *ctx) -{ - if (ctx->status != STARTED) - return NULL; - - pr_devel("%s: psn_phys%llx size:%llx\n", - __func__, ctx->psn_phys, ctx->psn_size); - return ioremap(ctx->psn_phys, ctx->psn_size); -} -EXPORT_SYMBOL_GPL(cxl_psa_map); - -void cxl_psa_unmap(void __iomem *addr) -{ - iounmap(addr); -} -EXPORT_SYMBOL_GPL(cxl_psa_unmap); - -int cxl_afu_reset(struct cxl_context *ctx) -{ - struct cxl_afu *afu = ctx->afu; - int rc; - - rc = cxl_ops->afu_reset(afu); - if (rc) - return rc; - - return cxl_ops->afu_check_and_enable(afu); -} -EXPORT_SYMBOL_GPL(cxl_afu_reset); - -void cxl_perst_reloads_same_image(struct cxl_afu *afu, - bool perst_reloads_same_image) -{ - afu->adapter->perst_same_image = perst_reloads_same_image; -} -EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); - -ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) -{ - struct cxl_afu *afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return -ENODEV; - - return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); -} -EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c deleted file mode 100644 index b054562c046e..000000000000 --- a/drivers/misc/cxl/base.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include "cxl.h" - -/* protected by rcu */ -static struct cxl_calls *cxl_calls; - -atomic_t cxl_use_count = ATOMIC_INIT(0); -EXPORT_SYMBOL(cxl_use_count); - -#ifdef CONFIG_CXL_MODULE - -static inline struct cxl_calls *cxl_calls_get(void) -{ - struct cxl_calls *calls = NULL; - - rcu_read_lock(); - calls = rcu_dereference(cxl_calls); - if (calls && !try_module_get(calls->owner)) - calls = NULL; - rcu_read_unlock(); - - return calls; -} - -static inline void cxl_calls_put(struct cxl_calls *calls) -{ - BUG_ON(calls != cxl_calls); - - /* we don't need to rcu this, as we hold a reference to the module */ - module_put(cxl_calls->owner); -} - -#else /* !defined CONFIG_CXL_MODULE */ - -static inline struct cxl_calls *cxl_calls_get(void) -{ - return cxl_calls; -} - -static inline void cxl_calls_put(struct cxl_calls *calls) { } - -#endif /* CONFIG_CXL_MODULE */ - -/* AFU refcount management */ -struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) -{ - return (get_device(&afu->dev) == NULL) ? NULL : afu; -} -EXPORT_SYMBOL_GPL(cxl_afu_get); - -void cxl_afu_put(struct cxl_afu *afu) -{ - put_device(&afu->dev); -} -EXPORT_SYMBOL_GPL(cxl_afu_put); - -void cxl_slbia(struct mm_struct *mm) -{ - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return; - - if (cxl_ctx_in_use()) - calls->cxl_slbia(mm); - - cxl_calls_put(calls); -} - -int register_cxl_calls(struct cxl_calls *calls) -{ - if (cxl_calls) - return -EBUSY; - - rcu_assign_pointer(cxl_calls, calls); - return 0; -} -EXPORT_SYMBOL_GPL(register_cxl_calls); - -void unregister_cxl_calls(struct cxl_calls *calls) -{ - BUG_ON(cxl_calls->owner != calls->owner); - RCU_INIT_POINTER(cxl_calls, NULL); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(unregister_cxl_calls); - -int cxl_update_properties(struct device_node *dn, - struct property *new_prop) -{ - return of_update_property(dn, new_prop); -} -EXPORT_SYMBOL_GPL(cxl_update_properties); - -static int __init cxl_base_init(void) -{ - struct device_node *np; - struct platform_device *dev; - int count = 0; - - /* - * Scan for compatible devices in guest only - */ - if (cpu_has_feature(CPU_FTR_HVMODE)) - return 0; - - for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") { - dev = of_platform_device_create(np, NULL, NULL); - if (dev) - count++; - } - pr_devel("Found %d cxl device(s)\n", count); - return 0; -} -device_initcall(cxl_base_init); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c deleted file mode 100644 index 76b5ea66dfa1..000000000000 --- a/drivers/misc/cxl/context.c +++ /dev/null @@ -1,362 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" - -/* - * Allocates space for a CXL context. - */ -struct cxl_context *cxl_context_alloc(void) -{ - return kzalloc(sizeof(struct cxl_context), GFP_KERNEL); -} - -/* - * Initialises a CXL context. - */ -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) -{ - int i; - - ctx->afu = afu; - ctx->master = master; - ctx->pid = NULL; /* Set in start work ioctl */ - mutex_init(&ctx->mapping_lock); - ctx->mapping = NULL; - ctx->tidr = 0; - ctx->assign_tidr = false; - - if (cxl_is_power8()) { - spin_lock_init(&ctx->sste_lock); - - /* - * Allocate the segment table before we put it in the IDR so that we - * can always access it when dereferenced from IDR. For the same - * reason, the segment table is only destroyed after the context is - * removed from the IDR. Access to this in the IOCTL is protected by - * Linux filesystem semantics (can't IOCTL until open is complete). - */ - i = cxl_alloc_sst(ctx); - if (i) - return i; - } - - INIT_WORK(&ctx->fault_work, cxl_handle_fault); - - init_waitqueue_head(&ctx->wq); - spin_lock_init(&ctx->lock); - - ctx->irq_bitmap = NULL; - ctx->pending_irq = false; - ctx->pending_fault = false; - ctx->pending_afu_err = false; - - INIT_LIST_HEAD(&ctx->irq_names); - - /* - * When we have to destroy all contexts in cxl_context_detach_all() we - * end up with afu_release_irqs() called from inside a - * idr_for_each_entry(). Hence we need to make sure that anything - * dereferenced from this IDR is ok before we allocate the IDR here. - * This clears out the IRQ ranges to ensure this. - */ - for (i = 0; i < CXL_IRQ_RANGES; i++) - ctx->irqs.range[i] = 0; - - mutex_init(&ctx->status_mutex); - - ctx->status = OPENED; - - /* - * Allocating IDR! We better make sure everything's setup that - * dereferences from it. - */ - mutex_lock(&afu->contexts_lock); - idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, - ctx->afu->num_procs, GFP_NOWAIT); - idr_preload_end(); - mutex_unlock(&afu->contexts_lock); - if (i < 0) - return i; - - ctx->pe = i; - if (cpu_has_feature(CPU_FTR_HVMODE)) { - ctx->elem = &ctx->afu->native->spa[i]; - ctx->external_pe = ctx->pe; - } else { - ctx->external_pe = -1; /* assigned when attaching */ - } - ctx->pe_inserted = false; - - /* - * take a ref on the afu so that it stays alive at-least till - * this context is reclaimed inside reclaim_ctx. - */ - cxl_afu_get(afu); - return 0; -} - -void cxl_context_set_mapping(struct cxl_context *ctx, - struct address_space *mapping) -{ - mutex_lock(&ctx->mapping_lock); - ctx->mapping = mapping; - mutex_unlock(&ctx->mapping_lock); -} - -static vm_fault_t cxl_mmap_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct cxl_context *ctx = vma->vm_file->private_data; - u64 area, offset; - vm_fault_t ret; - - offset = vmf->pgoff << PAGE_SHIFT; - - pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n", - __func__, ctx->pe, vmf->address, offset); - - if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { - area = ctx->afu->psn_phys; - if (offset >= ctx->afu->adapter->ps_size) - return VM_FAULT_SIGBUS; - } else { - area = ctx->psn_phys; - if (offset >= ctx->psn_size) - return VM_FAULT_SIGBUS; - } - - mutex_lock(&ctx->status_mutex); - - if (ctx->status != STARTED) { - mutex_unlock(&ctx->status_mutex); - pr_devel("%s: Context not started, failing problem state access\n", __func__); - if (ctx->mmio_err_ff) { - if (!ctx->ff_page) { - ctx->ff_page = alloc_page(GFP_USER); - if (!ctx->ff_page) - return VM_FAULT_OOM; - memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); - } - get_page(ctx->ff_page); - vmf->page = ctx->ff_page; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - return 0; - } - return VM_FAULT_SIGBUS; - } - - ret = vmf_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT); - - mutex_unlock(&ctx->status_mutex); - - return ret; -} - -static const struct vm_operations_struct cxl_mmap_vmops = { - .fault = cxl_mmap_fault, -}; - -/* - * Map a per-context mmio space into the given vma. - */ -int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) -{ - u64 start = vma->vm_pgoff << PAGE_SHIFT; - u64 len = vma->vm_end - vma->vm_start; - - if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { - if (start + len > ctx->afu->adapter->ps_size) - return -EINVAL; - - if (cxl_is_power9()) { - /* - * Make sure there is a valid problem state - * area space for this AFU. - */ - if (ctx->master && !ctx->afu->psa) { - pr_devel("AFU doesn't support mmio space\n"); - return -EINVAL; - } - - /* Can't mmap until the AFU is enabled */ - if (!ctx->afu->enabled) - return -EBUSY; - } - } else { - if (start + len > ctx->psn_size) - return -EINVAL; - - /* Make sure there is a valid per process space for this AFU */ - if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) { - pr_devel("AFU doesn't support mmio space\n"); - return -EINVAL; - } - - /* Can't mmap until the AFU is enabled */ - if (!ctx->afu->enabled) - return -EBUSY; - } - - pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__, - ctx->psn_phys, ctx->pe , ctx->master); - - vm_flags_set(vma, VM_IO | VM_PFNMAP); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_ops = &cxl_mmap_vmops; - return 0; -} - -/* - * Detach a context from the hardware. This disables interrupts and doesn't - * return until all outstanding interrupts for this context have completed. The - * hardware should no longer access *ctx after this has returned. - */ -int __detach_context(struct cxl_context *ctx) -{ - enum cxl_context_status status; - - mutex_lock(&ctx->status_mutex); - status = ctx->status; - ctx->status = CLOSED; - mutex_unlock(&ctx->status_mutex); - if (status != STARTED) - return -EBUSY; - - /* Only warn if we detached while the link was OK. - * If detach fails when hw is down, we don't care. - */ - WARN_ON(cxl_ops->detach_process(ctx) && - cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)); - flush_work(&ctx->fault_work); /* Only needed for dedicated process */ - - /* - * Wait until no further interrupts are presented by the PSL - * for this context. - */ - if (cxl_ops->irq_wait) - cxl_ops->irq_wait(ctx); - - /* release the reference to the group leader and mm handling pid */ - put_pid(ctx->pid); - - cxl_ctx_put(); - - /* Decrease the attached context count on the adapter */ - cxl_adapter_context_put(ctx->afu->adapter); - - /* Decrease the mm count on the context */ - cxl_context_mm_count_put(ctx); - if (ctx->mm) - mm_context_remove_copro(ctx->mm); - ctx->mm = NULL; - - return 0; -} - -/* - * Detach the given context from the AFU. This doesn't actually - * free the context but it should stop the context running in hardware - * (ie. prevent this context from generating any further interrupts - * so that it can be freed). - */ -void cxl_context_detach(struct cxl_context *ctx) -{ - int rc; - - rc = __detach_context(ctx); - if (rc) - return; - - afu_release_irqs(ctx, ctx); - wake_up_all(&ctx->wq); -} - -/* - * Detach all contexts on the given AFU. - */ -void cxl_context_detach_all(struct cxl_afu *afu) -{ - struct cxl_context *ctx; - int tmp; - - mutex_lock(&afu->contexts_lock); - idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { - /* - * Anything done in here needs to be setup before the IDR is - * created and torn down after the IDR removed - */ - cxl_context_detach(ctx); - - /* - * We are force detaching - remove any active PSA mappings so - * userspace cannot interfere with the card if it comes back. - * Easiest way to exercise this is to unbind and rebind the - * driver via sysfs while it is in use. - */ - mutex_lock(&ctx->mapping_lock); - if (ctx->mapping) - unmap_mapping_range(ctx->mapping, 0, 0, 1); - mutex_unlock(&ctx->mapping_lock); - } - mutex_unlock(&afu->contexts_lock); -} - -static void reclaim_ctx(struct rcu_head *rcu) -{ - struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); - - if (cxl_is_power8()) - free_page((u64)ctx->sstp); - if (ctx->ff_page) - __free_page(ctx->ff_page); - ctx->sstp = NULL; - - bitmap_free(ctx->irq_bitmap); - - /* Drop ref to the afu device taken during cxl_context_init */ - cxl_afu_put(ctx->afu); - - kfree(ctx); -} - -void cxl_context_free(struct cxl_context *ctx) -{ - if (ctx->kernelapi && ctx->mapping) - cxl_release_mapping(ctx); - mutex_lock(&ctx->afu->contexts_lock); - idr_remove(&ctx->afu->contexts_idr, ctx->pe); - mutex_unlock(&ctx->afu->contexts_lock); - call_rcu(&ctx->rcu, reclaim_ctx); -} - -void cxl_context_mm_count_get(struct cxl_context *ctx) -{ - if (ctx->mm) - mmgrab(ctx->mm); -} - -void cxl_context_mm_count_put(struct cxl_context *ctx) -{ - if (ctx->mm) - mmdrop(ctx->mm); -} diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h deleted file mode 100644 index 6ad0ab892675..000000000000 --- a/drivers/misc/cxl/cxl.h +++ /dev/null @@ -1,1135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2014 IBM Corp. - */ - -#ifndef _CXL_H_ -#define _CXL_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern uint cxl_verbose; - -struct property; - -#define CXL_TIMEOUT 5 - -/* - * Bump version each time a user API change is made, whether it is - * backwards compatible ot not. - */ -#define CXL_API_VERSION 3 -#define CXL_API_VERSION_COMPATIBLE 1 - -/* - * Opaque types to avoid accidentally passing registers for the wrong MMIO - * - * At the end of the day, I'm not married to using typedef here, but it might - * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and - * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write. - * - * I'm quite happy if these are changed back to #defines before upstreaming, it - * should be little more than a regexp search+replace operation in this file. - */ -typedef struct { - const int x; -} cxl_p1_reg_t; -typedef struct { - const int x; -} cxl_p1n_reg_t; -typedef struct { - const int x; -} cxl_p2n_reg_t; -#define cxl_reg_off(reg) \ - (reg.x) - -/* Memory maps. Ref CXL Appendix A */ - -/* PSL Privilege 1 Memory Map */ -/* Configuration and Control area - CAIA 1&2 */ -static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000}; -static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008}; -static const cxl_p1_reg_t CXL_PSL_KEY1 = {0x0010}; -static const cxl_p1_reg_t CXL_PSL_KEY2 = {0x0018}; -static const cxl_p1_reg_t CXL_PSL_Control = {0x0020}; -/* Downloading */ -static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; -static const cxl_p1_reg_t CXL_PSL_DLADDR = {0x0068}; - -/* PSL Lookaside Buffer Management Area - CAIA 1 */ -static const cxl_p1_reg_t CXL_PSL_LBISEL = {0x0080}; -static const cxl_p1_reg_t CXL_PSL_SLBIE = {0x0088}; -static const cxl_p1_reg_t CXL_PSL_SLBIA = {0x0090}; -static const cxl_p1_reg_t CXL_PSL_TLBIE = {0x00A0}; -static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; -static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; - -/* 0x00C0:7EFF Implementation dependent area */ -/* PSL registers - CAIA 1 */ -static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; -static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; -static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; -static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; -static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; -static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140}; -static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; -static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; -static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; -static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; -/* PSL registers - CAIA 2 */ -static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020}; -static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110}; -static const cxl_p1_reg_t CXL_XSL9_DBG = {0x0130}; -static const cxl_p1_reg_t CXL_XSL9_DEF = {0x0140}; -static const cxl_p1_reg_t CXL_XSL9_DSNCTL = {0x0168}; -static const cxl_p1_reg_t CXL_PSL9_FIR1 = {0x0300}; -static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308}; -static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310}; -static const cxl_p1_reg_t CXL_PSL9_DEBUG = {0x0320}; -static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348}; -static const cxl_p1_reg_t CXL_PSL9_DSNDCTL = {0x0350}; -static const cxl_p1_reg_t CXL_PSL9_TB_CTLSTAT = {0x0340}; -static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368}; -static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378}; -static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380}; -static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388}; -static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390}; -static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398}; -static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588}; -static const cxl_p1_reg_t CXL_XSL9_ILPP = {0x0590}; - -/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ -/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ - -/* PSL Slice Privilege 1 Memory Map */ -/* Configuration Area - CAIA 1&2 */ -static const cxl_p1n_reg_t CXL_PSL_SR_An = {0x00}; -static const cxl_p1n_reg_t CXL_PSL_LPID_An = {0x08}; -static const cxl_p1n_reg_t CXL_PSL_AMBAR_An = {0x10}; -static const cxl_p1n_reg_t CXL_PSL_SPOffset_An = {0x18}; -static const cxl_p1n_reg_t CXL_PSL_ID_An = {0x20}; -static const cxl_p1n_reg_t CXL_PSL_SERR_An = {0x28}; -/* Memory Management and Lookaside Buffer Management - CAIA 1*/ -static const cxl_p1n_reg_t CXL_PSL_SDR_An = {0x30}; -/* Memory Management and Lookaside Buffer Management - CAIA 1&2 */ -static const cxl_p1n_reg_t CXL_PSL_AMOR_An = {0x38}; -/* Pointer Area - CAIA 1&2 */ -static const cxl_p1n_reg_t CXL_HAURP_An = {0x80}; -static const cxl_p1n_reg_t CXL_PSL_SPAP_An = {0x88}; -static const cxl_p1n_reg_t CXL_PSL_LLCMD_An = {0x90}; -/* Control Area - CAIA 1&2 */ -static const cxl_p1n_reg_t CXL_PSL_SCNTL_An = {0xA0}; -static const cxl_p1n_reg_t CXL_PSL_CtxTime_An = {0xA8}; -static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0}; -static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An = {0xB8}; -/* 0xC0:FF Implementation Dependent Area - CAIA 1&2 */ -static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An = {0xC0}; -static const cxl_p1n_reg_t CXL_AFU_DEBUG_An = {0xC8}; -/* 0xC0:FF Implementation Dependent Area - CAIA 1 */ -static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A = {0xD0}; -static const cxl_p1n_reg_t CXL_PSL_COALLOC_A = {0xD8}; -static const cxl_p1n_reg_t CXL_PSL_RXCTL_A = {0xE0}; -static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE = {0xE8}; - -/* PSL Slice Privilege 2 Memory Map */ -/* Configuration and Control Area - CAIA 1&2 */ -static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000}; -static const cxl_p2n_reg_t CXL_CSRP_An = {0x008}; -/* Configuration and Control Area - CAIA 1 */ -static const cxl_p2n_reg_t CXL_AURP0_An = {0x010}; -static const cxl_p2n_reg_t CXL_AURP1_An = {0x018}; -static const cxl_p2n_reg_t CXL_SSTP0_An = {0x020}; -static const cxl_p2n_reg_t CXL_SSTP1_An = {0x028}; -/* Configuration and Control Area - CAIA 1 */ -static const cxl_p2n_reg_t CXL_PSL_AMR_An = {0x030}; -/* Segment Lookaside Buffer Management - CAIA 1 */ -static const cxl_p2n_reg_t CXL_SLBIE_An = {0x040}; -static const cxl_p2n_reg_t CXL_SLBIA_An = {0x048}; -static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050}; -/* Interrupt Registers - CAIA 1&2 */ -static const cxl_p2n_reg_t CXL_PSL_DSISR_An = {0x060}; -static const cxl_p2n_reg_t CXL_PSL_DAR_An = {0x068}; -static const cxl_p2n_reg_t CXL_PSL_DSR_An = {0x070}; -static const cxl_p2n_reg_t CXL_PSL_TFC_An = {0x078}; -static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080}; -static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088}; -/* AFU Registers - CAIA 1&2 */ -static const cxl_p2n_reg_t CXL_AFU_Cntl_An = {0x090}; -static const cxl_p2n_reg_t CXL_AFU_ERR_An = {0x098}; -/* Work Element Descriptor - CAIA 1&2 */ -static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; -/* 0x0C0:FFF Implementation Dependent Area */ - -#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL -#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL -#define CXL_PSL_SPAP_Size_Shift 4 -#define CXL_PSL_SPAP_V 0x0000000000000001ULL - -/****** CXL_PSL_Control ****************************************************/ -#define CXL_PSL_Control_tb (0x1ull << (63-63)) -#define CXL_PSL_Control_Fr (0x1ull << (63-31)) -#define CXL_PSL_Control_Fs_MASK (0x3ull << (63-29)) -#define CXL_PSL_Control_Fs_Complete (0x3ull << (63-29)) - -/****** CXL_PSL_DLCNTL *****************************************************/ -#define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) -#define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) -#define CXL_PSL_DLCNTL_E (0x1ull << (63-30)) -#define CXL_PSL_DLCNTL_S (0x1ull << (63-31)) -#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E) -#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S) - -/****** CXL_PSL_SR_An ******************************************************/ -#define CXL_PSL_SR_An_SF MSR_SF /* 64bit */ -#define CXL_PSL_SR_An_TA (1ull << (63-1)) /* Tags active, GA1: 0 */ -#define CXL_PSL_SR_An_HV MSR_HV /* Hypervisor, GA1: 0 */ -#define CXL_PSL_SR_An_XLAT_hpt (0ull << (63-6))/* Hashed page table (HPT) mode */ -#define CXL_PSL_SR_An_XLAT_roh (2ull << (63-6))/* Radix on HPT mode */ -#define CXL_PSL_SR_An_XLAT_ror (3ull << (63-6))/* Radix on Radix mode */ -#define CXL_PSL_SR_An_BOT (1ull << (63-10)) /* Use the in-memory segment table */ -#define CXL_PSL_SR_An_PR MSR_PR /* Problem state, GA1: 1 */ -#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */ -#define CXL_PSL_SR_An_TC (1ull << (63-54)) /* Page Table secondary hash */ -#define CXL_PSL_SR_An_US (1ull << (63-56)) /* User state, GA1: X */ -#define CXL_PSL_SR_An_SC (1ull << (63-58)) /* Segment Table secondary hash */ -#define CXL_PSL_SR_An_R MSR_DR /* Relocate, GA1: 1 */ -#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */ -#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */ - -/****** CXL_PSL_ID_An ****************************************************/ -#define CXL_PSL_ID_An_F (1ull << (63-31)) -#define CXL_PSL_ID_An_L (1ull << (63-30)) - -/****** CXL_PSL_SERR_An ****************************************************/ -#define CXL_PSL_SERR_An_afuto (1ull << (63-0)) -#define CXL_PSL_SERR_An_afudis (1ull << (63-1)) -#define CXL_PSL_SERR_An_afuov (1ull << (63-2)) -#define CXL_PSL_SERR_An_badsrc (1ull << (63-3)) -#define CXL_PSL_SERR_An_badctx (1ull << (63-4)) -#define CXL_PSL_SERR_An_llcmdis (1ull << (63-5)) -#define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) -#define CXL_PSL_SERR_An_afupar (1ull << (63-7)) -#define CXL_PSL_SERR_An_afudup (1ull << (63-8)) -#define CXL_PSL_SERR_An_IRQS ( \ - CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \ - CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \ - CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup) -#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32)) -#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33)) -#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34)) -#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35)) -#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36)) -#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37)) -#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38)) -#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39)) -#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40)) -#define CXL_PSL_SERR_An_IRQ_MASKS ( \ - CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \ - CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \ - CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask) - -#define CXL_PSL_SERR_An_AE (1ull << (63-30)) - -/****** CXL_PSL_SCNTL_An ****************************************************/ -#define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) -/* Programming Modes: */ -#define CXL_PSL_SCNTL_An_PM_MASK (0xffffull << (63-31)) -#define CXL_PSL_SCNTL_An_PM_Shared (0x0000ull << (63-31)) -#define CXL_PSL_SCNTL_An_PM_OS (0x0001ull << (63-31)) -#define CXL_PSL_SCNTL_An_PM_Process (0x0002ull << (63-31)) -#define CXL_PSL_SCNTL_An_PM_AFU (0x0004ull << (63-31)) -#define CXL_PSL_SCNTL_An_PM_AFU_PBT (0x0104ull << (63-31)) -/* Purge Status (ro) */ -#define CXL_PSL_SCNTL_An_Ps_MASK (0x3ull << (63-39)) -#define CXL_PSL_SCNTL_An_Ps_Pending (0x1ull << (63-39)) -#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39)) -/* Purge */ -#define CXL_PSL_SCNTL_An_Pc (0x1ull << (63-48)) -/* Suspend Status (ro) */ -#define CXL_PSL_SCNTL_An_Ss_MASK (0x3ull << (63-55)) -#define CXL_PSL_SCNTL_An_Ss_Pending (0x1ull << (63-55)) -#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55)) -/* Suspend Control */ -#define CXL_PSL_SCNTL_An_Sc (0x1ull << (63-63)) - -/* AFU Slice Enable Status (ro) */ -#define CXL_AFU_Cntl_An_ES_MASK (0x7ull << (63-2)) -#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2)) -#define CXL_AFU_Cntl_An_ES_Enabled (0x4ull << (63-2)) -/* AFU Slice Enable */ -#define CXL_AFU_Cntl_An_E (0x1ull << (63-3)) -/* AFU Slice Reset status (ro) */ -#define CXL_AFU_Cntl_An_RS_MASK (0x3ull << (63-5)) -#define CXL_AFU_Cntl_An_RS_Pending (0x1ull << (63-5)) -#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5)) -/* AFU Slice Reset */ -#define CXL_AFU_Cntl_An_RA (0x1ull << (63-7)) - -/****** CXL_SSTP0/1_An ******************************************************/ -/* These top bits are for the segment that CONTAINS the segment table */ -#define CXL_SSTP0_An_B_SHIFT SLB_VSID_SSIZE_SHIFT -#define CXL_SSTP0_An_KS (1ull << (63-2)) -#define CXL_SSTP0_An_KP (1ull << (63-3)) -#define CXL_SSTP0_An_N (1ull << (63-4)) -#define CXL_SSTP0_An_L (1ull << (63-5)) -#define CXL_SSTP0_An_C (1ull << (63-6)) -#define CXL_SSTP0_An_TA (1ull << (63-7)) -#define CXL_SSTP0_An_LP_SHIFT (63-9) /* 2 Bits */ -/* And finally, the virtual address & size of the segment table: */ -#define CXL_SSTP0_An_SegTableSize_SHIFT (63-31) /* 12 Bits */ -#define CXL_SSTP0_An_SegTableSize_MASK \ - (((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT) -#define CXL_SSTP0_An_STVA_U_MASK ((1ull << (63-49))-1) -#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1)) -#define CXL_SSTP1_An_V (1ull << (63-63)) - -/****** CXL_PSL_SLBIE_[An] - CAIA 1 **************************************************/ -/* write: */ -#define CXL_SLBIE_C PPC_BIT(36) /* Class */ -#define CXL_SLBIE_SS PPC_BITMASK(37, 38) /* Segment Size */ -#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38) -#define CXL_SLBIE_TA PPC_BIT(38) /* Tags Active */ -/* read: */ -#define CXL_SLBIE_MAX PPC_BITMASK(24, 31) -#define CXL_SLBIE_PENDING PPC_BITMASK(56, 63) - -/****** Common to all CXL_TLBIA/SLBIA_[An] - CAIA 1 **********************************/ -#define CXL_TLB_SLB_P (1ull) /* Pending (read) */ - -/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers - CAIA 1 **********************/ -#define CXL_TLB_SLB_IQ_ALL (0ull) /* Inv qualifier */ -#define CXL_TLB_SLB_IQ_LPID (1ull) /* Inv qualifier */ -#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */ - -/****** CXL_PSL_AFUSEL ******************************************************/ -#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */ - -/****** CXL_PSL_DSISR_An - CAIA 1 ****************************************************/ -#define CXL_PSL_DSISR_An_DS (1ull << (63-0)) /* Segment not found */ -#define CXL_PSL_DSISR_An_DM (1ull << (63-1)) /* PTE not found (See also: M) or protection fault */ -#define CXL_PSL_DSISR_An_ST (1ull << (63-2)) /* Segment Table PTE not found */ -#define CXL_PSL_DSISR_An_UR (1ull << (63-3)) /* AURP PTE not found */ -#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR) -#define CXL_PSL_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ -#define CXL_PSL_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ -#define CXL_PSL_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ -#define CXL_PSL_DSISR_PENDING (CXL_PSL_DSISR_TRANS | CXL_PSL_DSISR_An_PE | CXL_PSL_DSISR_An_AE | CXL_PSL_DSISR_An_OC) -/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */ -#define CXL_PSL_DSISR_An_M DSISR_NOHPTE /* PTE not found */ -#define CXL_PSL_DSISR_An_P DSISR_PROTFAULT /* Storage protection violation */ -#define CXL_PSL_DSISR_An_A (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */ -#define CXL_PSL_DSISR_An_S DSISR_ISSTORE /* Access was afu_wr or afu_zero */ -#define CXL_PSL_DSISR_An_K DSISR_KEYFAULT /* Access not permitted by virtual page class key protection */ - -/****** CXL_PSL_DSISR_An - CAIA 2 ****************************************************/ -#define CXL_PSL9_DSISR_An_TF (1ull << (63-3)) /* Translation fault */ -#define CXL_PSL9_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ -#define CXL_PSL9_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ -#define CXL_PSL9_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ -#define CXL_PSL9_DSISR_An_S (1ull << (63-38)) /* TF for a write operation */ -#define CXL_PSL9_DSISR_PENDING (CXL_PSL9_DSISR_An_TF | CXL_PSL9_DSISR_An_PE | CXL_PSL9_DSISR_An_AE | CXL_PSL9_DSISR_An_OC) -/* - * NOTE: Bits 56:63 (Checkout Response Status) are valid when DSISR_An[TF] = 1 - * Status (0:7) Encoding - */ -#define CXL_PSL9_DSISR_An_CO_MASK 0x00000000000000ffULL -#define CXL_PSL9_DSISR_An_SF 0x0000000000000080ULL /* Segment Fault 0b10000000 */ -#define CXL_PSL9_DSISR_An_PF_SLR 0x0000000000000088ULL /* PTE not found (Single Level Radix) 0b10001000 */ -#define CXL_PSL9_DSISR_An_PF_RGC 0x000000000000008CULL /* PTE not found (Radix Guest (child)) 0b10001100 */ -#define CXL_PSL9_DSISR_An_PF_RGP 0x0000000000000090ULL /* PTE not found (Radix Guest (parent)) 0b10010000 */ -#define CXL_PSL9_DSISR_An_PF_HRH 0x0000000000000094ULL /* PTE not found (HPT/Radix Host) 0b10010100 */ -#define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL /* PTE not found (STEG VA) 0b10011100 */ -#define CXL_PSL9_DSISR_An_URTCH 0x00000000000000B4ULL /* Unsupported Radix Tree Configuration 0b10110100 */ - -/****** CXL_PSL_TFC_An ******************************************************/ -#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */ -#define CXL_PSL_TFC_An_C (1ull << (63-29)) /* Continue (abort transaction) */ -#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */ -#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */ - -/****** CXL_PSL_DEBUG *****************************************************/ -#define CXL_PSL_DEBUG_CDC (1ull << (63-27)) /* Coherent Data cache support */ - -/****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/ -#define CXL_XSL9_IERAT_MLPID (1ull << (63-0)) /* Match LPID */ -#define CXL_XSL9_IERAT_MPID (1ull << (63-1)) /* Match PID */ -#define CXL_XSL9_IERAT_PRS (1ull << (63-4)) /* PRS bit for Radix invalidations */ -#define CXL_XSL9_IERAT_INVR (1ull << (63-3)) /* Invalidate Radix */ -#define CXL_XSL9_IERAT_IALL (1ull << (63-8)) /* Invalidate All */ -#define CXL_XSL9_IERAT_IINPROG (1ull << (63-63)) /* Invalidate in progress */ - -/* cxl_process_element->software_status */ -#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 - 0)) /* Valid */ -#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */ -#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */ -#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */ - -/****** CXL_PSL_RXCTL_An (Implementation Specific) ************************** - * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to - * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x - * of the hang pulse frequency. - */ -#define CXL_PSL_RXCTL_AFUHP_4S 0x7000000000000000ULL - -/* SPA->sw_command_status */ -#define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL -#define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL -#define CXL_SPA_SW_CMD_REMOVE 0x0002000000000000ULL -#define CXL_SPA_SW_CMD_SUSPEND 0x0003000000000000ULL -#define CXL_SPA_SW_CMD_RESUME 0x0004000000000000ULL -#define CXL_SPA_SW_CMD_ADD 0x0005000000000000ULL -#define CXL_SPA_SW_CMD_UPDATE 0x0006000000000000ULL -#define CXL_SPA_SW_STATE_MASK 0x0000ffff00000000ULL -#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL -#define CXL_SPA_SW_STATE_REMOVED 0x0000000200000000ULL -#define CXL_SPA_SW_STATE_SUSPENDED 0x0000000300000000ULL -#define CXL_SPA_SW_STATE_RESUMED 0x0000000400000000ULL -#define CXL_SPA_SW_STATE_ADDED 0x0000000500000000ULL -#define CXL_SPA_SW_STATE_UPDATED 0x0000000600000000ULL -#define CXL_SPA_SW_PSL_ID_MASK 0x00000000ffff0000ULL -#define CXL_SPA_SW_LINK_MASK 0x000000000000ffffULL - -#define CXL_MAX_SLICES 4 -#define MAX_AFU_MMIO_REGS 3 - -#define CXL_MODE_TIME_SLICED 0x4 -#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) - -#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ -#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) -#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) - -#define CXL_PSL9_TRACEID_MAX 0xAU -#define CXL_PSL9_TRACESTATE_FIN 0x3U - -enum cxl_context_status { - CLOSED, - OPENED, - STARTED -}; - -enum prefault_modes { - CXL_PREFAULT_NONE, - CXL_PREFAULT_WED, - CXL_PREFAULT_ALL, -}; - -enum cxl_attrs { - CXL_ADAPTER_ATTRS, - CXL_AFU_MASTER_ATTRS, - CXL_AFU_ATTRS, -}; - -struct cxl_sste { - __be64 esid_data; - __be64 vsid_data; -}; - -#define to_cxl_adapter(d) container_of(d, struct cxl, dev) -#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) - -struct cxl_afu_native { - void __iomem *p1n_mmio; - void __iomem *afu_desc_mmio; - irq_hw_number_t psl_hwirq; - unsigned int psl_virq; - struct mutex spa_mutex; - /* - * Only the first part of the SPA is used for the process element - * linked list. The only other part that software needs to worry about - * is sw_command_status, which we store a separate pointer to. - * Everything else in the SPA is only used by hardware - */ - struct cxl_process_element *spa; - __be64 *sw_command_status; - unsigned int spa_size; - int spa_order; - int spa_max_procs; - u64 pp_offset; -}; - -struct cxl_afu_guest { - struct cxl_afu *parent; - u64 handle; - phys_addr_t p2n_phys; - u64 p2n_size; - int max_ints; - bool handle_err; - struct delayed_work work_err; - int previous_state; -}; - -struct cxl_afu { - struct cxl_afu_native *native; - struct cxl_afu_guest *guest; - irq_hw_number_t serr_hwirq; - unsigned int serr_virq; - char *psl_irq_name; - char *err_irq_name; - void __iomem *p2n_mmio; - phys_addr_t psn_phys; - u64 pp_size; - - struct cxl *adapter; - struct device dev; - struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; - struct device *chardev_s, *chardev_m, *chardev_d; - struct idr contexts_idr; - struct dentry *debugfs; - struct mutex contexts_lock; - spinlock_t afu_cntl_lock; - - /* -1: AFU deconfigured/locked, >= 0: number of readers */ - atomic_t configured_state; - - /* AFU error buffer fields and bin attribute for sysfs */ - u64 eb_len, eb_offset; - struct bin_attribute attr_eb; - - /* pointer to the vphb */ - struct pci_controller *phb; - - int pp_irqs; - int irqs_max; - int num_procs; - int max_procs_virtualised; - int slice; - int modes_supported; - int current_mode; - int crs_num; - u64 crs_len; - u64 crs_offset; - struct list_head crs; - enum prefault_modes prefault_mode; - bool psa; - bool pp_psa; - bool enabled; -}; - - -struct cxl_irq_name { - struct list_head list; - char *name; -}; - -struct irq_avail { - irq_hw_number_t offset; - irq_hw_number_t range; - unsigned long *bitmap; -}; - -/* - * This is a cxl context. If the PSL is in dedicated mode, there will be one - * of these per AFU. If in AFU directed there can be lots of these. - */ -struct cxl_context { - struct cxl_afu *afu; - - /* Problem state MMIO */ - phys_addr_t psn_phys; - u64 psn_size; - - /* Used to unmap any mmaps when force detaching */ - struct address_space *mapping; - struct mutex mapping_lock; - struct page *ff_page; - bool mmio_err_ff; - bool kernelapi; - - spinlock_t sste_lock; /* Protects segment table entries */ - struct cxl_sste *sstp; - u64 sstp0, sstp1; - unsigned int sst_size, sst_lru; - - wait_queue_head_t wq; - /* use mm context associated with this pid for ds faults */ - struct pid *pid; - spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */ - /* Only used in PR mode */ - u64 process_token; - - /* driver private data */ - void *priv; - - unsigned long *irq_bitmap; /* Accessed from IRQ context */ - struct cxl_irq_ranges irqs; - struct list_head irq_names; - u64 fault_addr; - u64 fault_dsisr; - u64 afu_err; - - /* - * This status and it's lock pretects start and detach context - * from racing. It also prevents detach from racing with - * itself - */ - enum cxl_context_status status; - struct mutex status_mutex; - - - /* XXX: Is it possible to need multiple work items at once? */ - struct work_struct fault_work; - u64 dsisr; - u64 dar; - - struct cxl_process_element *elem; - - /* - * pe is the process element handle, assigned by this driver when the - * context is initialized. - * - * external_pe is the PE shown outside of cxl. - * On bare-metal, pe=external_pe, because we decide what the handle is. - * In a guest, we only find out about the pe used by pHyp when the - * context is attached, and that's the value we want to report outside - * of cxl. - */ - int pe; - int external_pe; - - u32 irq_count; - bool pe_inserted; - bool master; - bool kernel; - bool pending_irq; - bool pending_fault; - bool pending_afu_err; - - /* Used by AFU drivers for driver specific event delivery */ - struct cxl_afu_driver_ops *afu_driver_ops; - atomic_t afu_driver_events; - - struct rcu_head rcu; - - struct mm_struct *mm; - - u16 tidr; - bool assign_tidr; -}; - -struct cxl_irq_info; - -struct cxl_service_layer_ops { - int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev); - int (*invalidate_all)(struct cxl *adapter); - int (*afu_regs_init)(struct cxl_afu *afu); - int (*sanitise_afu_regs)(struct cxl_afu *afu); - int (*register_serr_irq)(struct cxl_afu *afu); - void (*release_serr_irq)(struct cxl_afu *afu); - irqreturn_t (*handle_interrupt)(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); - irqreturn_t (*fail_irq)(struct cxl_afu *afu, struct cxl_irq_info *irq_info); - int (*activate_dedicated_process)(struct cxl_afu *afu); - int (*attach_afu_directed)(struct cxl_context *ctx, u64 wed, u64 amr); - int (*attach_dedicated_process)(struct cxl_context *ctx, u64 wed, u64 amr); - void (*update_dedicated_ivtes)(struct cxl_context *ctx); - void (*debugfs_add_adapter_regs)(struct cxl *adapter, struct dentry *dir); - void (*debugfs_add_afu_regs)(struct cxl_afu *afu, struct dentry *dir); - void (*psl_irq_dump_registers)(struct cxl_context *ctx); - void (*err_irq_dump_registers)(struct cxl *adapter); - void (*debugfs_stop_trace)(struct cxl *adapter); - void (*write_timebase_ctrl)(struct cxl *adapter); - u64 (*timebase_read)(struct cxl *adapter); - int capi_mode; - bool needs_reset_before_disable; -}; - -struct cxl_native { - u64 afu_desc_off; - u64 afu_desc_size; - void __iomem *p1_mmio; - void __iomem *p2_mmio; - irq_hw_number_t err_hwirq; - unsigned int err_virq; - u64 ps_off; - bool no_data_cache; /* set if no data cache on the card */ - const struct cxl_service_layer_ops *sl_ops; -}; - -struct cxl_guest { - struct platform_device *pdev; - int irq_nranges; - struct cdev cdev; - irq_hw_number_t irq_base_offset; - struct irq_avail *irq_avail; - spinlock_t irq_alloc_lock; - u64 handle; - char *status; - u16 vendor; - u16 device; - u16 subsystem_vendor; - u16 subsystem; -}; - -struct cxl { - struct cxl_native *native; - struct cxl_guest *guest; - spinlock_t afu_list_lock; - struct cxl_afu *afu[CXL_MAX_SLICES]; - struct device dev; - struct dentry *trace; - struct dentry *psl_err_chk; - struct dentry *debugfs; - char *irq_name; - struct bin_attribute cxl_attr; - int adapter_num; - int user_irqs; - u64 ps_size; - u16 psl_rev; - u16 base_image; - u8 vsec_status; - u8 caia_major; - u8 caia_minor; - u8 slices; - bool user_image_loaded; - bool perst_loads_image; - bool perst_select_user; - bool perst_same_image; - bool psl_timebase_synced; - bool tunneled_ops_supported; - - /* - * number of contexts mapped on to this card. Possible values are: - * >0: Number of contexts mapped and new one can be mapped. - * 0: No active contexts and new ones can be mapped. - * -1: No contexts mapped and new ones cannot be mapped. - */ - atomic_t contexts_num; -}; - -int cxl_pci_alloc_one_irq(struct cxl *adapter); -void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq); -int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); -void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); -int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); -int cxl_update_image_control(struct cxl *adapter); -int cxl_pci_reset(struct cxl *adapter); -void cxl_pci_release_afu(struct device *dev); -ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); - -/* common == phyp + powernv - CAIA 1&2 */ -struct cxl_process_element_common { - __be32 tid; - __be32 pid; - __be64 csrp; - union { - struct { - __be64 aurp0; - __be64 aurp1; - __be64 sstp0; - __be64 sstp1; - } psl8; /* CAIA 1 */ - struct { - u8 reserved2[8]; - u8 reserved3[8]; - u8 reserved4[8]; - u8 reserved5[8]; - } psl9; /* CAIA 2 */ - } u; - __be64 amr; - u8 reserved6[4]; - __be64 wed; -} __packed; - -/* just powernv - CAIA 1&2 */ -struct cxl_process_element { - __be64 sr; - __be64 SPOffset; - union { - __be64 sdr; /* CAIA 1 */ - u8 reserved1[8]; /* CAIA 2 */ - } u; - __be64 haurp; - __be32 ctxtime; - __be16 ivte_offsets[4]; - __be16 ivte_ranges[4]; - __be32 lpid; - struct cxl_process_element_common common; - __be32 software_state; -} __packed; - -static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu) -{ - struct pci_dev *pdev; - - if (cpu_has_feature(CPU_FTR_HVMODE)) { - pdev = to_pci_dev(cxl->dev.parent); - return !pci_channel_offline(pdev); - } - return true; -} - -static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) -{ - WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return cxl->native->p1_mmio + cxl_reg_off(reg); -} - -static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) -{ - if (likely(cxl_adapter_link_ok(cxl, NULL))) - out_be64(_cxl_p1_addr(cxl, reg), val); -} - -static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) -{ - if (likely(cxl_adapter_link_ok(cxl, NULL))) - return in_be64(_cxl_p1_addr(cxl, reg)); - else - return ~0ULL; -} - -static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) -{ - WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return afu->native->p1n_mmio + cxl_reg_off(reg); -} - -static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) -{ - if (likely(cxl_adapter_link_ok(afu->adapter, afu))) - out_be64(_cxl_p1n_addr(afu, reg), val); -} - -static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) -{ - if (likely(cxl_adapter_link_ok(afu->adapter, afu))) - return in_be64(_cxl_p1n_addr(afu, reg)); - else - return ~0ULL; -} - -static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) -{ - return afu->p2n_mmio + cxl_reg_off(reg); -} - -static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) -{ - if (likely(cxl_adapter_link_ok(afu->adapter, afu))) - out_be64(_cxl_p2n_addr(afu, reg), val); -} - -static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) -{ - if (likely(cxl_adapter_link_ok(afu->adapter, afu))) - return in_be64(_cxl_p2n_addr(afu, reg)); - else - return ~0ULL; -} - -static inline bool cxl_is_power8(void) -{ - if ((pvr_version_is(PVR_POWER8E)) || - (pvr_version_is(PVR_POWER8NVL)) || - (pvr_version_is(PVR_POWER8)) || - (pvr_version_is(PVR_HX_C2000))) - return true; - return false; -} - -static inline bool cxl_is_power9(void) -{ - if (pvr_version_is(PVR_POWER9)) - return true; - return false; -} - -ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, - loff_t off, size_t count); - - -struct cxl_calls { - void (*cxl_slbia)(struct mm_struct *mm); - struct module *owner; -}; -int register_cxl_calls(struct cxl_calls *calls); -void unregister_cxl_calls(struct cxl_calls *calls); -int cxl_update_properties(struct device_node *dn, struct property *new_prop); - -void cxl_remove_adapter_nr(struct cxl *adapter); - -void cxl_release_spa(struct cxl_afu *afu); - -dev_t cxl_get_dev(void); -int cxl_file_init(void); -void cxl_file_exit(void); -int cxl_register_adapter(struct cxl *adapter); -int cxl_register_afu(struct cxl_afu *afu); -int cxl_chardev_d_afu_add(struct cxl_afu *afu); -int cxl_chardev_m_afu_add(struct cxl_afu *afu); -int cxl_chardev_s_afu_add(struct cxl_afu *afu); -void cxl_chardev_afu_remove(struct cxl_afu *afu); - -void cxl_context_detach_all(struct cxl_afu *afu); -void cxl_context_free(struct cxl_context *ctx); -void cxl_context_detach(struct cxl_context *ctx); - -int cxl_sysfs_adapter_add(struct cxl *adapter); -void cxl_sysfs_adapter_remove(struct cxl *adapter); -int cxl_sysfs_afu_add(struct cxl_afu *afu); -void cxl_sysfs_afu_remove(struct cxl_afu *afu); -int cxl_sysfs_afu_m_add(struct cxl_afu *afu); -void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); - -struct cxl *cxl_alloc_adapter(void); -struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice); -int cxl_afu_select_best_mode(struct cxl_afu *afu); - -int cxl_native_register_psl_irq(struct cxl_afu *afu); -void cxl_native_release_psl_irq(struct cxl_afu *afu); -int cxl_native_register_psl_err_irq(struct cxl *adapter); -void cxl_native_release_psl_err_irq(struct cxl *adapter); -int cxl_native_register_serr_irq(struct cxl_afu *afu); -void cxl_native_release_serr_irq(struct cxl_afu *afu); -int afu_register_irqs(struct cxl_context *ctx, u32 count); -void afu_release_irqs(struct cxl_context *ctx, void *cookie); -void afu_irq_name_free(struct cxl_context *ctx); - -int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr); -int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr); -int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu); -int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu); -int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr); -int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr); -void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx); -void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx); - -#ifdef CONFIG_DEBUG_FS - -void cxl_debugfs_init(void); -void cxl_debugfs_exit(void); -void cxl_debugfs_adapter_add(struct cxl *adapter); -void cxl_debugfs_adapter_remove(struct cxl *adapter); -void cxl_debugfs_afu_add(struct cxl_afu *afu); -void cxl_debugfs_afu_remove(struct cxl_afu *afu); -void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir); -void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir); -void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir); -void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir); - -#else /* CONFIG_DEBUG_FS */ - -static inline void __init cxl_debugfs_init(void) -{ -} - -static inline void cxl_debugfs_exit(void) -{ -} - -static inline void cxl_debugfs_adapter_add(struct cxl *adapter) -{ -} - -static inline void cxl_debugfs_adapter_remove(struct cxl *adapter) -{ -} - -static inline void cxl_debugfs_afu_add(struct cxl_afu *afu) -{ -} - -static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu) -{ -} - -static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, - struct dentry *dir) -{ -} - -static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, - struct dentry *dir) -{ -} - -static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) -{ -} - -static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir) -{ -} - -#endif /* CONFIG_DEBUG_FS */ - -void cxl_handle_fault(struct work_struct *work); -void cxl_prefault(struct cxl_context *ctx, u64 wed); -int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar); - -struct cxl *get_cxl_adapter(int num); -int cxl_alloc_sst(struct cxl_context *ctx); -void cxl_dump_debug_buffer(void *addr, size_t size); - -void init_cxl_native(void); - -struct cxl_context *cxl_context_alloc(void); -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master); -void cxl_context_set_mapping(struct cxl_context *ctx, - struct address_space *mapping); -void cxl_context_free(struct cxl_context *ctx); -int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); -unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie, const char *name); -void cxl_unmap_irq(unsigned int virq, void *cookie); -int __detach_context(struct cxl_context *ctx); - -/* - * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined - * in PAPR. - * Field pid_tid is now 'reserved' because it's no more used on bare-metal. - * On a guest environment, PSL_PID_An is located on the upper 32 bits and - * PSL_TID_An register in the lower 32 bits. - */ -struct cxl_irq_info { - u64 dsisr; - u64 dar; - u64 dsr; - u64 reserved; - u64 afu_err; - u64 errstat; - u64 proc_handle; - u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */ -}; - -void cxl_assign_psn_space(struct cxl_context *ctx); -int cxl_invalidate_all_psl9(struct cxl *adapter); -int cxl_invalidate_all_psl8(struct cxl *adapter); -irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); -irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); -irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info); -int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, - void *cookie, irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq, const char *name); - -int cxl_check_error(struct cxl_afu *afu); -int cxl_afu_slbia(struct cxl_afu *afu); -int cxl_data_cache_flush(struct cxl *adapter); -int cxl_afu_disable(struct cxl_afu *afu); -int cxl_psl_purge(struct cxl_afu *afu); -int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, - u32 *phb_index, u64 *capp_unit_id); -int cxl_slot_is_switched(struct pci_dev *dev); -int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg); -u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9); - -void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx); -void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx); -void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter); -void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter); -int cxl_pci_vphb_add(struct cxl_afu *afu); -void cxl_pci_vphb_remove(struct cxl_afu *afu); -void cxl_release_mapping(struct cxl_context *ctx); - -extern struct pci_driver cxl_pci_driver; -extern struct platform_driver cxl_of_driver; -int afu_allocate_irqs(struct cxl_context *ctx, u32 count); - -int afu_open(struct inode *inode, struct file *file); -int afu_release(struct inode *inode, struct file *file); -long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int afu_mmap(struct file *file, struct vm_area_struct *vm); -__poll_t afu_poll(struct file *file, struct poll_table_struct *poll); -ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); -extern const struct file_operations afu_fops; - -struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev); -void cxl_guest_remove_adapter(struct cxl *adapter); -int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np); -int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np); -ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); -ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len); -int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np); -void cxl_guest_remove_afu(struct cxl_afu *afu); -int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np); -int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np); -int cxl_guest_add_chardev(struct cxl *adapter); -void cxl_guest_remove_chardev(struct cxl *adapter); -void cxl_guest_reload_module(struct cxl *adapter); -int cxl_of_probe(struct platform_device *pdev); - -struct cxl_backend_ops { - struct module *module; - int (*adapter_reset)(struct cxl *adapter); - int (*alloc_one_irq)(struct cxl *adapter); - void (*release_one_irq)(struct cxl *adapter, int hwirq); - int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs, - struct cxl *adapter, unsigned int num); - void (*release_irq_ranges)(struct cxl_irq_ranges *irqs, - struct cxl *adapter); - int (*setup_irq)(struct cxl *adapter, unsigned int hwirq, - unsigned int virq); - irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx, - u64 dsisr, u64 errstat); - irqreturn_t (*psl_interrupt)(int irq, void *data); - int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); - void (*irq_wait)(struct cxl_context *ctx); - int (*attach_process)(struct cxl_context *ctx, bool kernel, - u64 wed, u64 amr); - int (*detach_process)(struct cxl_context *ctx); - void (*update_ivtes)(struct cxl_context *ctx); - bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); - bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); - void (*release_afu)(struct device *dev); - ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf, - loff_t off, size_t count); - int (*afu_check_and_enable)(struct cxl_afu *afu); - int (*afu_activate_mode)(struct cxl_afu *afu, int mode); - int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode); - int (*afu_reset)(struct cxl_afu *afu); - int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val); - int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val); - int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val); - int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val); - int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val); - int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val); - int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val); - ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count); -}; -extern const struct cxl_backend_ops cxl_native_ops; -extern const struct cxl_backend_ops cxl_guest_ops; -extern const struct cxl_backend_ops *cxl_ops; - -/* check if the given pci_dev is on the cxl vphb bus */ -bool cxl_pci_is_vphb_device(struct pci_dev *dev); - -/* decode AFU error bits in the PSL register PSL_SERR_An */ -void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); - -/* - * Increments the number of attached contexts on an adapter. - * In case an adapter_context_lock is taken the return -EBUSY. - */ -int cxl_adapter_context_get(struct cxl *adapter); - -/* Decrements the number of attached contexts on an adapter */ -void cxl_adapter_context_put(struct cxl *adapter); - -/* If no active contexts then prevents contexts from being attached */ -int cxl_adapter_context_lock(struct cxl *adapter); - -/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */ -void cxl_adapter_context_unlock(struct cxl *adapter); - -/* Increases the reference count to "struct mm_struct" */ -void cxl_context_mm_count_get(struct cxl_context *ctx); - -/* Decrements the reference count to "struct mm_struct" */ -void cxl_context_mm_count_put(struct cxl_context *ctx); - -#endif diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c deleted file mode 100644 index e5fe0a171472..000000000000 --- a/drivers/misc/cxl/cxllib.c +++ /dev/null @@ -1,271 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2017 IBM Corp. - */ - -#include -#include -#include -#include -#include - -#include "cxl.h" - -#define CXL_INVALID_DRA ~0ull -#define CXL_DUMMY_READ_SIZE 128 -#define CXL_DUMMY_READ_ALIGN 8 -#define CXL_CAPI_WINDOW_START 0x2000000000000ull -#define CXL_CAPI_WINDOW_LOG_SIZE 48 -#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 - - -bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) -{ - int rc; - u32 phb_index; - u64 chip_id, capp_unit_id; - - /* No flags currently supported */ - if (flags) - return false; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return false; - - if (!cxl_is_power9()) - return false; - - if (cxl_slot_is_switched(dev)) - return false; - - /* on p9, some pci slots are not connected to a CAPP unit */ - rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); - if (rc) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); - -static DEFINE_MUTEX(dra_mutex); -static u64 dummy_read_addr = CXL_INVALID_DRA; - -static int allocate_dummy_read_buf(void) -{ - u64 buf, vaddr; - size_t buf_size; - - /* - * Dummy read buffer is 128-byte long, aligned on a - * 256-byte boundary and we need the physical address. - */ - buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); - buf = (u64) kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & - (~0ull << CXL_DUMMY_READ_ALIGN); - - WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), - "Dummy read buffer alignment issue"); - dummy_read_addr = virt_to_phys((void *) vaddr); - return 0; -} - -int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) -{ - int rc; - u32 phb_index; - u64 chip_id, capp_unit_id; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return -EINVAL; - - mutex_lock(&dra_mutex); - if (dummy_read_addr == CXL_INVALID_DRA) { - rc = allocate_dummy_read_buf(); - if (rc) { - mutex_unlock(&dra_mutex); - return rc; - } - } - mutex_unlock(&dra_mutex); - - rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); - if (rc) - return rc; - - rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); - if (rc) - return rc; - - cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; - cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; - cfg->bar_addr = CXL_CAPI_WINDOW_START; - cfg->dra = dummy_read_addr; - return 0; -} -EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); - -int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, - unsigned long flags) -{ - int rc = 0; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return -EINVAL; - - switch (mode) { - case CXL_MODE_PCI: - /* - * We currently don't support going back to PCI mode - * However, we'll turn the invalidations off, so that - * the firmware doesn't have to ack them and can do - * things like reset, etc.. with no worries. - * So always return EPERM (can't go back to PCI) or - * EBUSY if we couldn't even turn off snooping - */ - rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); - if (rc) - rc = -EBUSY; - else - rc = -EPERM; - break; - case CXL_MODE_CXL: - /* DMA only supported on TVT1 for the time being */ - if (flags != CXL_MODE_DMA_TVT1) - return -EINVAL; - rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); - if (rc) - return rc; - rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); - break; - default: - rc = -EINVAL; - } - return rc; -} -EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); - -/* - * When switching the PHB to capi mode, the TVT#1 entry for - * the Partitionable Endpoint is set in bypass mode, like - * in PCI mode. - * Configure the device dma to use TVT#1, which is done - * by calling dma_set_mask() with a mask large enough. - */ -int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) -{ - int rc; - - if (flags) - return -EINVAL; - - rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); - return rc; -} -EXPORT_SYMBOL_GPL(cxllib_set_device_dma); - -int cxllib_get_PE_attributes(struct task_struct *task, - unsigned long translation_mode, - struct cxllib_pe_attributes *attr) -{ - if (translation_mode != CXL_TRANSLATED_MODE && - translation_mode != CXL_REAL_MODE) - return -EINVAL; - - attr->sr = cxl_calculate_sr(false, - task == NULL, - translation_mode == CXL_REAL_MODE, - true); - attr->lpid = mfspr(SPRN_LPID); - if (task) { - struct mm_struct *mm = get_task_mm(task); - if (mm == NULL) - return -EINVAL; - /* - * Caller is keeping a reference on mm_users for as long - * as XSL uses the memory context - */ - attr->pid = mm->context.id; - mmput(mm); - attr->tid = task->thread.tidr; - } else { - attr->pid = 0; - attr->tid = 0; - } - return 0; -} -EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); - -static int get_vma_info(struct mm_struct *mm, u64 addr, - u64 *vma_start, u64 *vma_end, - unsigned long *page_size) -{ - struct vm_area_struct *vma = NULL; - int rc = 0; - - mmap_read_lock(mm); - - vma = find_vma(mm, addr); - if (!vma) { - rc = -EFAULT; - goto out; - } - *page_size = vma_kernel_pagesize(vma); - *vma_start = vma->vm_start; - *vma_end = vma->vm_end; -out: - mmap_read_unlock(mm); - return rc; -} - -int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) -{ - int rc; - u64 dar, vma_start, vma_end; - unsigned long page_size; - - if (mm == NULL) - return -EFAULT; - - /* - * The buffer we have to process can extend over several pages - * and may also cover several VMAs. - * We iterate over all the pages. The page size could vary - * between VMAs. - */ - rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); - if (rc) - return rc; - - for (dar = (addr & ~(page_size - 1)); dar < (addr + size); - dar += page_size) { - if (dar < vma_start || dar >= vma_end) { - /* - * We don't hold mm->mmap_lock while iterating, since - * the lock is required by one of the lower-level page - * fault processing functions and it could - * create a deadlock. - * - * It means the VMAs can be altered between 2 - * loop iterations and we could theoretically - * miss a page (however unlikely). But that's - * not really a problem, as the driver will - * retry access, get another page fault on the - * missing page and call us again. - */ - rc = get_vma_info(mm, dar, &vma_start, &vma_end, - &page_size); - if (rc) - return rc; - } - - rc = cxl_handle_mm_fault(mm, flags, dar); - if (rc) - return -EFAULT; - } - return 0; -} -EXPORT_SYMBOL_GPL(cxllib_handle_fault); diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c deleted file mode 100644 index 7b987bf498b5..000000000000 --- a/drivers/misc/cxl/debugfs.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include - -#include "cxl.h" - -static struct dentry *cxl_debugfs; - -/* Helpers to export CXL mmaped IO registers via debugfs */ -static int debugfs_io_u64_get(void *data, u64 *val) -{ - *val = in_be64((u64 __iomem *)data); - return 0; -} - -static int debugfs_io_u64_set(void *data, u64 val) -{ - out_be64((u64 __iomem *)data, val); - return 0; -} -DEFINE_DEBUGFS_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, - "0x%016llx\n"); - -static void debugfs_create_io_x64(const char *name, umode_t mode, - struct dentry *parent, u64 __iomem *value) -{ - debugfs_create_file_unsafe(name, mode, parent, (void __force *)value, - &fops_io_x64); -} - -void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir) -{ - debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1)); - debugfs_create_io_x64("fir_mask", 0400, dir, - _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK)); - debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG)); - debugfs_create_io_x64("debug", 0600, dir, - _cxl_p1_addr(adapter, CXL_PSL9_DEBUG)); - debugfs_create_io_x64("xsl-debug", 0600, dir, - _cxl_p1_addr(adapter, CXL_XSL9_DBG)); -} - -void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir) -{ - debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); - debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); - debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); -} - -void cxl_debugfs_adapter_add(struct cxl *adapter) -{ - struct dentry *dir; - char buf[32]; - - if (!cxl_debugfs) - return; - - snprintf(buf, 32, "card%i", adapter->adapter_num); - dir = debugfs_create_dir(buf, cxl_debugfs); - adapter->debugfs = dir; - - debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); - - if (adapter->native->sl_ops->debugfs_add_adapter_regs) - adapter->native->sl_ops->debugfs_add_adapter_regs(adapter, dir); -} - -void cxl_debugfs_adapter_remove(struct cxl *adapter) -{ - debugfs_remove_recursive(adapter->debugfs); -} - -void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) -{ - debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); -} - -void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir) -{ - debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); - debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); - - debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); - debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); - debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); -} - -void cxl_debugfs_afu_add(struct cxl_afu *afu) -{ - struct dentry *dir; - char buf[32]; - - if (!afu->adapter->debugfs) - return; - - snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice); - dir = debugfs_create_dir(buf, afu->adapter->debugfs); - afu->debugfs = dir; - - debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); - debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); - debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); - - debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); - - if (afu->adapter->native->sl_ops->debugfs_add_afu_regs) - afu->adapter->native->sl_ops->debugfs_add_afu_regs(afu, dir); -} - -void cxl_debugfs_afu_remove(struct cxl_afu *afu) -{ - debugfs_remove_recursive(afu->debugfs); -} - -void __init cxl_debugfs_init(void) -{ - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return; - - cxl_debugfs = debugfs_create_dir("cxl", NULL); -} - -void cxl_debugfs_exit(void) -{ - debugfs_remove_recursive(cxl_debugfs); -} diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c deleted file mode 100644 index 2c64f55cf01f..000000000000 --- a/drivers/misc/cxl/fault.c +++ /dev/null @@ -1,341 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include - -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "cxl" "." -#include -#include -#include - -#include "cxl.h" -#include "trace.h" - -static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) -{ - return ((sste->vsid_data == cpu_to_be64(slb->vsid)) && - (sste->esid_data == cpu_to_be64(slb->esid))); -} - -/* - * This finds a free SSTE for the given SLB, or returns NULL if it's already in - * the segment table. - */ -static struct cxl_sste *find_free_sste(struct cxl_context *ctx, - struct copro_slb *slb) -{ - struct cxl_sste *primary, *sste, *ret = NULL; - unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */ - unsigned int entry; - unsigned int hash; - - if (slb->vsid & SLB_VSID_B_1T) - hash = (slb->esid >> SID_SHIFT_1T) & mask; - else /* 256M */ - hash = (slb->esid >> SID_SHIFT) & mask; - - primary = ctx->sstp + (hash << 3); - - for (entry = 0, sste = primary; entry < 8; entry++, sste++) { - if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V)) - ret = sste; - if (sste_matches(sste, slb)) - return NULL; - } - if (ret) - return ret; - - /* Nothing free, select an entry to cast out */ - ret = primary + ctx->sst_lru; - ctx->sst_lru = (ctx->sst_lru + 1) & 0x7; - - return ret; -} - -static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) -{ - /* mask is the group index, we search primary and secondary here. */ - struct cxl_sste *sste; - unsigned long flags; - - spin_lock_irqsave(&ctx->sste_lock, flags); - sste = find_free_sste(ctx, slb); - if (!sste) - goto out_unlock; - - pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", - sste - ctx->sstp, slb->vsid, slb->esid); - trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid); - - sste->vsid_data = cpu_to_be64(slb->vsid); - sste->esid_data = cpu_to_be64(slb->esid); -out_unlock: - spin_unlock_irqrestore(&ctx->sste_lock, flags); -} - -static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm, - u64 ea) -{ - struct copro_slb slb = {0,0}; - int rc; - - if (!(rc = copro_calculate_slb(mm, ea, &slb))) { - cxl_load_segment(ctx, &slb); - } - - return rc; -} - -static void cxl_ack_ae(struct cxl_context *ctx) -{ - unsigned long flags; - - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); - - spin_lock_irqsave(&ctx->lock, flags); - ctx->pending_fault = true; - ctx->fault_addr = ctx->dar; - ctx->fault_dsisr = ctx->dsisr; - spin_unlock_irqrestore(&ctx->lock, flags); - - wake_up_all(&ctx->wq); -} - -static int cxl_handle_segment_miss(struct cxl_context *ctx, - struct mm_struct *mm, u64 ea) -{ - int rc; - - pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); - trace_cxl_ste_miss(ctx, ea); - - if ((rc = cxl_fault_segment(ctx, mm, ea))) - cxl_ack_ae(ctx); - else { - - mb(); /* Order seg table write to TFC MMIO write */ - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); - } - - return IRQ_HANDLED; -} - -int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar) -{ - vm_fault_t flt = 0; - int result; - unsigned long access, flags, inv_flags = 0; - - /* - * Add the fault handling cpu to task mm cpumask so that we - * can do a safe lockless page table walk when inserting the - * hash page table entry. This function get called with a - * valid mm for user space addresses. Hence using the if (mm) - * check is sufficient here. - */ - if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - /* - * We need to make sure we walk the table only after - * we update the cpumask. The other side of the barrier - * is explained in serialize_against_pte_lookup() - */ - smp_mb(); - } - if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { - pr_devel("copro_handle_mm_fault failed: %#x\n", result); - return result; - } - - if (!radix_enabled()) { - /* - * update_mmu_cache() will not have loaded the hash since current->trap - * is not a 0x400 or 0x300, so just call hash_page_mm() here. - */ - access = _PAGE_PRESENT | _PAGE_READ; - if (dsisr & CXL_PSL_DSISR_An_S) - access |= _PAGE_WRITE; - - if (!mm && (get_region_id(dar) != USER_REGION_ID)) - access |= _PAGE_PRIVILEGED; - - if (dsisr & DSISR_NOHPTE) - inv_flags |= HPTE_NOHPTE_UPDATE; - - local_irq_save(flags); - hash_page_mm(mm, dar, access, 0x300, inv_flags); - local_irq_restore(flags); - } - return 0; -} - -static void cxl_handle_page_fault(struct cxl_context *ctx, - struct mm_struct *mm, - u64 dsisr, u64 dar) -{ - trace_cxl_pte_miss(ctx, dsisr, dar); - - if (cxl_handle_mm_fault(mm, dsisr, dar)) { - cxl_ack_ae(ctx); - } else { - pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); - } -} - -/* - * Returns the mm_struct corresponding to the context ctx. - * mm_users == 0, the context may be in the process of being closed. - */ -static struct mm_struct *get_mem_context(struct cxl_context *ctx) -{ - if (ctx->mm == NULL) - return NULL; - - if (!mmget_not_zero(ctx->mm)) - return NULL; - - return ctx->mm; -} - -static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr) -{ - if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS))) - return true; - - return false; -} - -static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr) -{ - if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM)) - return true; - - if (cxl_is_power9()) - return true; - - return false; -} - -void cxl_handle_fault(struct work_struct *fault_work) -{ - struct cxl_context *ctx = - container_of(fault_work, struct cxl_context, fault_work); - u64 dsisr = ctx->dsisr; - u64 dar = ctx->dar; - struct mm_struct *mm = NULL; - - if (cpu_has_feature(CPU_FTR_HVMODE)) { - if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || - cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || - cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { - /* Most likely explanation is harmless - a dedicated - * process has detached and these were cleared by the - * PSL purge, but warn about it just in case - */ - dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); - return; - } - } - - /* Early return if the context is being / has been detached */ - if (ctx->status == CLOSED) { - cxl_ack_ae(ctx); - return; - } - - pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " - "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); - - if (!ctx->kernel) { - - mm = get_mem_context(ctx); - if (mm == NULL) { - pr_devel("%s: unable to get mm for pe=%d pid=%i\n", - __func__, ctx->pe, pid_nr(ctx->pid)); - cxl_ack_ae(ctx); - return; - } else { - pr_devel("Handling page fault for pe=%d pid=%i\n", - ctx->pe, pid_nr(ctx->pid)); - } - } - - if (cxl_is_segment_miss(ctx, dsisr)) - cxl_handle_segment_miss(ctx, mm, dar); - else if (cxl_is_page_fault(ctx, dsisr)) - cxl_handle_page_fault(ctx, mm, dsisr, dar); - else - WARN(1, "cxl_handle_fault has nothing to handle\n"); - - if (mm) - mmput(mm); -} - -static u64 next_segment(u64 ea, u64 vsid) -{ - if (vsid & SLB_VSID_B_1T) - ea |= (1ULL << 40) - 1; - else - ea |= (1ULL << 28) - 1; - - return ea + 1; -} - -static void cxl_prefault_vma(struct cxl_context *ctx, struct mm_struct *mm) -{ - u64 ea, last_esid = 0; - struct copro_slb slb; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - int rc; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - for (ea = vma->vm_start; ea < vma->vm_end; - ea = next_segment(ea, slb.vsid)) { - rc = copro_calculate_slb(mm, ea, &slb); - if (rc) - continue; - - if (last_esid == slb.esid) - continue; - - cxl_load_segment(ctx, &slb); - last_esid = slb.esid; - } - } - mmap_read_unlock(mm); -} - -void cxl_prefault(struct cxl_context *ctx, u64 wed) -{ - struct mm_struct *mm = get_mem_context(ctx); - - if (mm == NULL) { - pr_devel("cxl_prefault unable to get mm %i\n", - pid_nr(ctx->pid)); - return; - } - - switch (ctx->afu->prefault_mode) { - case CXL_PREFAULT_WED: - cxl_fault_segment(ctx, mm, wed); - break; - case CXL_PREFAULT_ALL: - cxl_prefault_vma(ctx, mm); - break; - default: - break; - } - - mmput(mm); -} diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c deleted file mode 100644 index 012e11b959bc..000000000000 --- a/drivers/misc/cxl/file.c +++ /dev/null @@ -1,699 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" -#include "trace.h" - -#define CXL_NUM_MINORS 256 /* Total to reserve */ - -#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) -#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) -#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) -#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu)) -#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) -#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) - -#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) - -#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) - -static dev_t cxl_dev; - -static int __afu_open(struct inode *inode, struct file *file, bool master) -{ - struct cxl *adapter; - struct cxl_afu *afu; - struct cxl_context *ctx; - int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); - int slice = CXL_DEVT_AFU(inode->i_rdev); - int rc = -ENODEV; - - pr_devel("afu_open afu%i.%i\n", slice, adapter_num); - - if (!(adapter = get_cxl_adapter(adapter_num))) - return -ENODEV; - - if (slice > adapter->slices) - goto err_put_adapter; - - spin_lock(&adapter->afu_list_lock); - if (!(afu = adapter->afu[slice])) { - spin_unlock(&adapter->afu_list_lock); - goto err_put_adapter; - } - - /* - * taking a ref to the afu so that it doesn't go away - * for rest of the function. This ref is released before - * we return. - */ - cxl_afu_get(afu); - spin_unlock(&adapter->afu_list_lock); - - if (!afu->current_mode) - goto err_put_afu; - - if (!cxl_ops->link_ok(adapter, afu)) { - rc = -EIO; - goto err_put_afu; - } - - if (!(ctx = cxl_context_alloc())) { - rc = -ENOMEM; - goto err_put_afu; - } - - rc = cxl_context_init(ctx, afu, master); - if (rc) - goto err_put_afu; - - cxl_context_set_mapping(ctx, inode->i_mapping); - - pr_devel("afu_open pe: %i\n", ctx->pe); - file->private_data = ctx; - - /* indicate success */ - rc = 0; - -err_put_afu: - /* release the ref taken earlier */ - cxl_afu_put(afu); -err_put_adapter: - put_device(&adapter->dev); - return rc; -} - -int afu_open(struct inode *inode, struct file *file) -{ - return __afu_open(inode, file, false); -} - -static int afu_master_open(struct inode *inode, struct file *file) -{ - return __afu_open(inode, file, true); -} - -int afu_release(struct inode *inode, struct file *file) -{ - struct cxl_context *ctx = file->private_data; - - pr_devel("%s: closing cxl file descriptor. pe: %i\n", - __func__, ctx->pe); - cxl_context_detach(ctx); - - - /* - * Delete the context's mapping pointer, unless it's created by the - * kernel API, in which case leave it so it can be freed by reclaim_ctx() - */ - if (!ctx->kernelapi) { - mutex_lock(&ctx->mapping_lock); - ctx->mapping = NULL; - mutex_unlock(&ctx->mapping_lock); - } - - /* - * At this this point all bottom halfs have finished and we should be - * getting no more IRQs from the hardware for this context. Once it's - * removed from the IDR (and RCU synchronised) it's safe to free the - * sstp and context. - */ - cxl_context_free(ctx); - - return 0; -} - -static long afu_ioctl_start_work(struct cxl_context *ctx, - struct cxl_ioctl_start_work __user *uwork) -{ - struct cxl_ioctl_start_work work; - u64 amr = 0; - int rc; - - pr_devel("%s: pe: %i\n", __func__, ctx->pe); - - /* Do this outside the status_mutex to avoid a circular dependency with - * the locking in cxl_mmap_fault() */ - if (copy_from_user(&work, uwork, sizeof(work))) - return -EFAULT; - - mutex_lock(&ctx->status_mutex); - if (ctx->status != OPENED) { - rc = -EIO; - goto out; - } - - /* - * if any of the reserved fields are set or any of the unused - * flags are set it's invalid - */ - if (work.reserved1 || work.reserved2 || work.reserved3 || - work.reserved4 || work.reserved5 || - (work.flags & ~CXL_START_WORK_ALL)) { - rc = -EINVAL; - goto out; - } - - if (!(work.flags & CXL_START_WORK_NUM_IRQS)) - work.num_interrupts = ctx->afu->pp_irqs; - else if ((work.num_interrupts < ctx->afu->pp_irqs) || - (work.num_interrupts > ctx->afu->irqs_max)) { - rc = -EINVAL; - goto out; - } - - if ((rc = afu_register_irqs(ctx, work.num_interrupts))) - goto out; - - if (work.flags & CXL_START_WORK_AMR) - amr = work.amr & mfspr(SPRN_UAMOR); - - if (work.flags & CXL_START_WORK_TID) - ctx->assign_tidr = true; - - ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); - - /* - * Increment the mapped context count for adapter. This also checks - * if adapter_context_lock is taken. - */ - rc = cxl_adapter_context_get(ctx->afu->adapter); - if (rc) { - afu_release_irqs(ctx, ctx); - goto out; - } - - /* - * We grab the PID here and not in the file open to allow for the case - * where a process (master, some daemon, etc) has opened the chardev on - * behalf of another process, so the AFU's mm gets bound to the process - * that performs this ioctl and not the process that opened the file. - * Also we grab the PID of the group leader so that if the task that - * has performed the attach operation exits the mm context of the - * process is still accessible. - */ - ctx->pid = get_task_pid(current, PIDTYPE_PID); - - /* acquire a reference to the task's mm */ - ctx->mm = get_task_mm(current); - - /* ensure this mm_struct can't be freed */ - cxl_context_mm_count_get(ctx); - - if (ctx->mm) { - /* decrement the use count from above */ - mmput(ctx->mm); - /* make TLBIs for this context global */ - mm_context_add_copro(ctx->mm); - } - - /* - * Increment driver use count. Enables global TLBIs for hash - * and callbacks to handle the segment table - */ - cxl_ctx_get(); - - /* - * A barrier is needed to make sure all TLBIs are global - * before we attach and the context starts being used by the - * adapter. - * - * Needed after mm_context_add_copro() for radix and - * cxl_ctx_get() for hash/p8. - * - * The barrier should really be mb(), since it involves a - * device. However, it's only useful when we have local - * vs. global TLBIs, i.e SMP=y. So keep smp_mb(). - */ - smp_mb(); - - trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); - - if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, - amr))) { - afu_release_irqs(ctx, ctx); - cxl_adapter_context_put(ctx->afu->adapter); - put_pid(ctx->pid); - ctx->pid = NULL; - cxl_ctx_put(); - cxl_context_mm_count_put(ctx); - if (ctx->mm) - mm_context_remove_copro(ctx->mm); - goto out; - } - - rc = 0; - if (work.flags & CXL_START_WORK_TID) { - work.tid = ctx->tidr; - if (copy_to_user(uwork, &work, sizeof(work))) - rc = -EFAULT; - } - - ctx->status = STARTED; - -out: - mutex_unlock(&ctx->status_mutex); - return rc; -} - -static long afu_ioctl_process_element(struct cxl_context *ctx, - int __user *upe) -{ - pr_devel("%s: pe: %i\n", __func__, ctx->pe); - - if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32))) - return -EFAULT; - - return 0; -} - -static long afu_ioctl_get_afu_id(struct cxl_context *ctx, - struct cxl_afu_id __user *upafuid) -{ - struct cxl_afu_id afuid = { 0 }; - - afuid.card_id = ctx->afu->adapter->adapter_num; - afuid.afu_offset = ctx->afu->slice; - afuid.afu_mode = ctx->afu->current_mode; - - /* set the flag bit in case the afu is a slave */ - if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master) - afuid.flags |= CXL_AFUID_FLAG_SLAVE; - - if (copy_to_user(upafuid, &afuid, sizeof(afuid))) - return -EFAULT; - - return 0; -} - -long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct cxl_context *ctx = file->private_data; - - if (ctx->status == CLOSED) - return -EIO; - - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - return -EIO; - - pr_devel("afu_ioctl\n"); - switch (cmd) { - case CXL_IOCTL_START_WORK: - return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); - case CXL_IOCTL_GET_PROCESS_ELEMENT: - return afu_ioctl_process_element(ctx, (__u32 __user *)arg); - case CXL_IOCTL_GET_AFU_ID: - return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *) - arg); - } - return -EINVAL; -} - -static long afu_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return afu_ioctl(file, cmd, arg); -} - -int afu_mmap(struct file *file, struct vm_area_struct *vm) -{ - struct cxl_context *ctx = file->private_data; - - /* AFU must be started before we can MMIO */ - if (ctx->status != STARTED) - return -EIO; - - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - return -EIO; - - return cxl_context_iomap(ctx, vm); -} - -static inline bool ctx_event_pending(struct cxl_context *ctx) -{ - if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err) - return true; - - if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) - return true; - - return false; -} - -__poll_t afu_poll(struct file *file, struct poll_table_struct *poll) -{ - struct cxl_context *ctx = file->private_data; - __poll_t mask = 0; - unsigned long flags; - - - poll_wait(file, &ctx->wq, poll); - - pr_devel("afu_poll wait done pe: %i\n", ctx->pe); - - spin_lock_irqsave(&ctx->lock, flags); - if (ctx_event_pending(ctx)) - mask |= EPOLLIN | EPOLLRDNORM; - else if (ctx->status == CLOSED) - /* Only error on closed when there are no futher events pending - */ - mask |= EPOLLERR; - spin_unlock_irqrestore(&ctx->lock, flags); - - pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask); - - return mask; -} - -static ssize_t afu_driver_event_copy(struct cxl_context *ctx, - char __user *buf, - struct cxl_event *event, - struct cxl_event_afu_driver_reserved *pl) -{ - /* Check event */ - if (!pl) { - ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); - return -EFAULT; - } - - /* Check event size */ - event->header.size += pl->data_size; - if (event->header.size > CXL_READ_MIN_SIZE) { - ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); - return -EFAULT; - } - - /* Copy event header */ - if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) { - ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); - return -EFAULT; - } - - /* Copy event data */ - buf += sizeof(struct cxl_event_header); - if (copy_to_user(buf, &pl->data, pl->data_size)) { - ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); - return -EFAULT; - } - - ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */ - return event->header.size; -} - -ssize_t afu_read(struct file *file, char __user *buf, size_t count, - loff_t *off) -{ - struct cxl_context *ctx = file->private_data; - struct cxl_event_afu_driver_reserved *pl = NULL; - struct cxl_event event; - unsigned long flags; - int rc; - DEFINE_WAIT(wait); - - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - return -EIO; - - if (count < CXL_READ_MIN_SIZE) - return -EINVAL; - - spin_lock_irqsave(&ctx->lock, flags); - - for (;;) { - prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); - if (ctx_event_pending(ctx) || (ctx->status == CLOSED)) - break; - - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { - rc = -EIO; - goto out; - } - - if (file->f_flags & O_NONBLOCK) { - rc = -EAGAIN; - goto out; - } - - if (signal_pending(current)) { - rc = -ERESTARTSYS; - goto out; - } - - spin_unlock_irqrestore(&ctx->lock, flags); - pr_devel("afu_read going to sleep...\n"); - schedule(); - pr_devel("afu_read woken up\n"); - spin_lock_irqsave(&ctx->lock, flags); - } - - finish_wait(&ctx->wq, &wait); - - memset(&event, 0, sizeof(event)); - event.header.process_element = ctx->pe; - event.header.size = sizeof(struct cxl_event_header); - if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) { - pr_devel("afu_read delivering AFU driver specific event\n"); - pl = ctx->afu_driver_ops->fetch_event(ctx); - atomic_dec(&ctx->afu_driver_events); - event.header.type = CXL_EVENT_AFU_DRIVER; - } else if (ctx->pending_irq) { - pr_devel("afu_read delivering AFU interrupt\n"); - event.header.size += sizeof(struct cxl_event_afu_interrupt); - event.header.type = CXL_EVENT_AFU_INTERRUPT; - event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1; - clear_bit(event.irq.irq - 1, ctx->irq_bitmap); - if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count)) - ctx->pending_irq = false; - } else if (ctx->pending_fault) { - pr_devel("afu_read delivering data storage fault\n"); - event.header.size += sizeof(struct cxl_event_data_storage); - event.header.type = CXL_EVENT_DATA_STORAGE; - event.fault.addr = ctx->fault_addr; - event.fault.dsisr = ctx->fault_dsisr; - ctx->pending_fault = false; - } else if (ctx->pending_afu_err) { - pr_devel("afu_read delivering afu error\n"); - event.header.size += sizeof(struct cxl_event_afu_error); - event.header.type = CXL_EVENT_AFU_ERROR; - event.afu_error.error = ctx->afu_err; - ctx->pending_afu_err = false; - } else if (ctx->status == CLOSED) { - pr_devel("afu_read fatal error\n"); - spin_unlock_irqrestore(&ctx->lock, flags); - return -EIO; - } else - WARN(1, "afu_read must be buggy\n"); - - spin_unlock_irqrestore(&ctx->lock, flags); - - if (event.header.type == CXL_EVENT_AFU_DRIVER) - return afu_driver_event_copy(ctx, buf, &event, pl); - - if (copy_to_user(buf, &event, event.header.size)) - return -EFAULT; - return event.header.size; - -out: - finish_wait(&ctx->wq, &wait); - spin_unlock_irqrestore(&ctx->lock, flags); - return rc; -} - -/* - * Note: if this is updated, we need to update api.c to patch the new ones in - * too - */ -const struct file_operations afu_fops = { - .owner = THIS_MODULE, - .open = afu_open, - .poll = afu_poll, - .read = afu_read, - .release = afu_release, - .unlocked_ioctl = afu_ioctl, - .compat_ioctl = afu_compat_ioctl, - .mmap = afu_mmap, -}; - -static const struct file_operations afu_master_fops = { - .owner = THIS_MODULE, - .open = afu_master_open, - .poll = afu_poll, - .read = afu_read, - .release = afu_release, - .unlocked_ioctl = afu_ioctl, - .compat_ioctl = afu_compat_ioctl, - .mmap = afu_mmap, -}; - - -static char *cxl_devnode(const struct device *dev, umode_t *mode) -{ - if (cpu_has_feature(CPU_FTR_HVMODE) && - CXL_DEVT_IS_CARD(dev->devt)) { - /* - * These minor numbers will eventually be used to program the - * PSL and AFUs once we have dynamic reprogramming support - */ - return NULL; - } - return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); -} - -static const struct class cxl_class = { - .name = "cxl", - .devnode = cxl_devnode, -}; - -static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev, - struct device **chardev, char *postfix, char *desc, - const struct file_operations *fops) -{ - struct device *dev; - int rc; - - cdev_init(cdev, fops); - rc = cdev_add(cdev, devt, 1); - if (rc) { - dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc); - return rc; - } - - dev = device_create(&cxl_class, &afu->dev, devt, afu, - "afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix); - if (IS_ERR(dev)) { - rc = PTR_ERR(dev); - dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc); - goto err; - } - - *chardev = dev; - - return 0; -err: - cdev_del(cdev); - return rc; -} - -int cxl_chardev_d_afu_add(struct cxl_afu *afu) -{ - return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d, - &afu->chardev_d, "d", "dedicated", - &afu_master_fops); /* Uses master fops */ -} - -int cxl_chardev_m_afu_add(struct cxl_afu *afu) -{ - return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m, - &afu->chardev_m, "m", "master", - &afu_master_fops); -} - -int cxl_chardev_s_afu_add(struct cxl_afu *afu) -{ - return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s, - &afu->chardev_s, "s", "shared", - &afu_fops); -} - -void cxl_chardev_afu_remove(struct cxl_afu *afu) -{ - if (afu->chardev_d) { - cdev_del(&afu->afu_cdev_d); - device_unregister(afu->chardev_d); - afu->chardev_d = NULL; - } - if (afu->chardev_m) { - cdev_del(&afu->afu_cdev_m); - device_unregister(afu->chardev_m); - afu->chardev_m = NULL; - } - if (afu->chardev_s) { - cdev_del(&afu->afu_cdev_s); - device_unregister(afu->chardev_s); - afu->chardev_s = NULL; - } -} - -int cxl_register_afu(struct cxl_afu *afu) -{ - afu->dev.class = &cxl_class; - - return device_register(&afu->dev); -} - -int cxl_register_adapter(struct cxl *adapter) -{ - adapter->dev.class = &cxl_class; - - /* - * Future: When we support dynamically reprogramming the PSL & AFU we - * will expose the interface to do that via a chardev: - * adapter->dev.devt = CXL_CARD_MKDEV(adapter); - */ - - return device_register(&adapter->dev); -} - -dev_t cxl_get_dev(void) -{ - return cxl_dev; -} - -int __init cxl_file_init(void) -{ - int rc; - - /* - * If these change we really need to update API. Either change some - * flags or update API version number CXL_API_VERSION. - */ - BUILD_BUG_ON(CXL_API_VERSION != 3); - BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); - BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); - BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); - BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32); - BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16); - - if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) { - pr_err("Unable to allocate CXL major number: %i\n", rc); - return rc; - } - - pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev)); - - rc = class_register(&cxl_class); - if (rc) { - pr_err("Unable to create CXL class\n"); - goto err; - } - - return 0; - -err: - unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); - return rc; -} - -void cxl_file_exit(void) -{ - unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); - class_unregister(&cxl_class); -} diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c deleted file mode 100644 index eee9decc121e..000000000000 --- a/drivers/misc/cxl/flash.c +++ /dev/null @@ -1,538 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" -#include "hcalls.h" - -#define DOWNLOAD_IMAGE 1 -#define VALIDATE_IMAGE 2 - -struct ai_header { - u16 version; - u8 reserved0[6]; - u16 vendor; - u16 device; - u16 subsystem_vendor; - u16 subsystem; - u64 image_offset; - u64 image_length; - u8 reserved1[96]; -}; - -static struct semaphore sem; -static unsigned long *buffer[CXL_AI_MAX_ENTRIES]; -static struct sg_list *le; -static u64 continue_token; -static unsigned int transfer; - -struct update_props_workarea { - __be32 phandle; - __be32 state; - __be64 reserved; - __be32 nprops; -} __packed; - -struct update_nodes_workarea { - __be32 state; - __be64 unit_address; - __be32 reserved; -} __packed; - -#define DEVICE_SCOPE 3 -#define NODE_ACTION_MASK 0xff000000 -#define NODE_COUNT_MASK 0x00ffffff -#define OPCODE_DELETE 0x01000000 -#define OPCODE_UPDATE 0x02000000 -#define OPCODE_ADD 0x03000000 - -static int rcall(int token, char *buf, s32 scope) -{ - int rc; - - spin_lock(&rtas_data_buf_lock); - - memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); - rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope); - memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); - - spin_unlock(&rtas_data_buf_lock); - return rc; -} - -static int update_property(struct device_node *dn, const char *name, - u32 vd, char *value) -{ - struct property *new_prop; - u32 *val; - int rc; - - new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); - if (!new_prop) - return -ENOMEM; - - new_prop->name = kstrdup(name, GFP_KERNEL); - if (!new_prop->name) { - kfree(new_prop); - return -ENOMEM; - } - - new_prop->length = vd; - new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); - if (!new_prop->value) { - kfree(new_prop->name); - kfree(new_prop); - return -ENOMEM; - } - memcpy(new_prop->value, value, vd); - - val = (u32 *)new_prop->value; - rc = cxl_update_properties(dn, new_prop); - pr_devel("%pOFn: update property (%s, length: %i, value: %#x)\n", - dn, name, vd, be32_to_cpu(*val)); - - if (rc) { - kfree(new_prop->name); - kfree(new_prop->value); - kfree(new_prop); - } - return rc; -} - -static int update_node(__be32 phandle, s32 scope) -{ - struct update_props_workarea *upwa; - struct device_node *dn; - int i, rc, ret; - char *prop_data; - char *buf; - int token; - u32 nprops; - u32 vd; - - token = rtas_token("ibm,update-properties"); - if (token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; - - buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) { - kfree(buf); - return -ENOENT; - } - - upwa = (struct update_props_workarea *)&buf[0]; - upwa->phandle = phandle; - do { - rc = rcall(token, buf, scope); - if (rc < 0) - break; - - prop_data = buf + sizeof(*upwa); - nprops = be32_to_cpu(upwa->nprops); - - if (*prop_data == 0) { - prop_data++; - vd = be32_to_cpu(*(__be32 *)prop_data); - prop_data += vd + sizeof(vd); - nprops--; - } - - for (i = 0; i < nprops; i++) { - char *prop_name; - - prop_name = prop_data; - prop_data += strlen(prop_name) + 1; - vd = be32_to_cpu(*(__be32 *)prop_data); - prop_data += sizeof(vd); - - if ((vd != 0x00000000) && (vd != 0x80000000)) { - ret = update_property(dn, prop_name, vd, - prop_data); - if (ret) - pr_err("cxl: Could not update property %s - %i\n", - prop_name, ret); - - prop_data += vd; - } - } - } while (rc == 1); - - of_node_put(dn); - kfree(buf); - return rc; -} - -static int update_devicetree(struct cxl *adapter, s32 scope) -{ - struct update_nodes_workarea *unwa; - u32 action, node_count; - int token, rc, i; - __be32 *data, phandle; - char *buf; - - token = rtas_token("ibm,update-nodes"); - if (token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; - - buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - unwa = (struct update_nodes_workarea *)&buf[0]; - unwa->unit_address = cpu_to_be64(adapter->guest->handle); - do { - rc = rcall(token, buf, scope); - if (rc && rc != 1) - break; - - data = (__be32 *)buf + 4; - while (be32_to_cpu(*data) & NODE_ACTION_MASK) { - action = be32_to_cpu(*data) & NODE_ACTION_MASK; - node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; - pr_devel("device reconfiguration - action: %#x, nodes: %#x\n", - action, node_count); - data++; - - for (i = 0; i < node_count; i++) { - phandle = *data++; - - switch (action) { - case OPCODE_DELETE: - /* nothing to do */ - break; - case OPCODE_UPDATE: - update_node(phandle, scope); - break; - case OPCODE_ADD: - /* nothing to do, just move pointer */ - data++; - break; - } - } - } - } while (rc == 1); - - kfree(buf); - return 0; -} - -static int handle_image(struct cxl *adapter, int operation, - long (*fct)(u64, u64, u64, u64 *), - struct cxl_adapter_image *ai) -{ - size_t mod, s_copy, len_chunk = 0; - struct ai_header *header = NULL; - unsigned int entries = 0, i; - void *dest, *from; - int rc = 0, need_header; - - /* base adapter image header */ - need_header = (ai->flags & CXL_AI_NEED_HEADER); - if (need_header) { - header = kzalloc(sizeof(struct ai_header), GFP_KERNEL); - if (!header) - return -ENOMEM; - header->version = cpu_to_be16(1); - header->vendor = cpu_to_be16(adapter->guest->vendor); - header->device = cpu_to_be16(adapter->guest->device); - header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor); - header->subsystem = cpu_to_be16(adapter->guest->subsystem); - header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE); - header->image_length = cpu_to_be64(ai->len_image); - } - - /* number of entries in the list */ - len_chunk = ai->len_data; - if (need_header) - len_chunk += CXL_AI_HEADER_SIZE; - - entries = len_chunk / CXL_AI_BUFFER_SIZE; - mod = len_chunk % CXL_AI_BUFFER_SIZE; - if (mod) - entries++; - - if (entries > CXL_AI_MAX_ENTRIES) { - rc = -EINVAL; - goto err; - } - - /* < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes --> - * chunk 0 ---------------------------------------------------- - * | header | data | - * ---------------------------------------------------- - * chunk 1 ---------------------------------------------------- - * | data | - * ---------------------------------------------------- - * .... - * chunk n ---------------------------------------------------- - * | data | - * ---------------------------------------------------- - */ - from = (void *) ai->data; - for (i = 0; i < entries; i++) { - dest = buffer[i]; - s_copy = CXL_AI_BUFFER_SIZE; - - if ((need_header) && (i == 0)) { - /* add adapter image header */ - memcpy(buffer[i], header, sizeof(struct ai_header)); - s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE; - dest += CXL_AI_HEADER_SIZE; /* image offset */ - } - if ((i == (entries - 1)) && mod) - s_copy = mod; - - /* copy data */ - if (copy_from_user(dest, from, s_copy)) - goto err; - - /* fill in the list */ - le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i])); - le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE); - if ((i == (entries - 1)) && mod) - le[i].len = cpu_to_be64(mod); - from += s_copy; - } - pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n", - __func__, operation, need_header, entries, continue_token); - - /* - * download/validate the adapter image to the coherent - * platform facility - */ - rc = fct(adapter->guest->handle, virt_to_phys(le), entries, - &continue_token); - if (rc == 0) /* success of download/validation operation */ - continue_token = 0; - -err: - kfree(header); - - return rc; -} - -static int transfer_image(struct cxl *adapter, int operation, - struct cxl_adapter_image *ai) -{ - int rc = 0; - int afu; - - switch (operation) { - case DOWNLOAD_IMAGE: - rc = handle_image(adapter, operation, - &cxl_h_download_adapter_image, ai); - if (rc < 0) { - pr_devel("resetting adapter\n"); - cxl_h_reset_adapter(adapter->guest->handle); - } - return rc; - - case VALIDATE_IMAGE: - rc = handle_image(adapter, operation, - &cxl_h_validate_adapter_image, ai); - if (rc < 0) { - pr_devel("resetting adapter\n"); - cxl_h_reset_adapter(adapter->guest->handle); - return rc; - } - if (rc == 0) { - pr_devel("remove current afu\n"); - for (afu = 0; afu < adapter->slices; afu++) - cxl_guest_remove_afu(adapter->afu[afu]); - - pr_devel("resetting adapter\n"); - cxl_h_reset_adapter(adapter->guest->handle); - - /* The entire image has now been - * downloaded and the validation has - * been successfully performed. - * After that, the partition should call - * ibm,update-nodes and - * ibm,update-properties to receive the - * current configuration - */ - rc = update_devicetree(adapter, DEVICE_SCOPE); - transfer = 1; - } - return rc; - } - - return -EINVAL; -} - -static long ioctl_transfer_image(struct cxl *adapter, int operation, - struct cxl_adapter_image __user *uai) -{ - struct cxl_adapter_image ai; - - pr_devel("%s\n", __func__); - - if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image))) - return -EFAULT; - - /* - * Make sure reserved fields and bits are set to 0 - */ - if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 || - (ai.flags & ~CXL_AI_ALL)) - return -EINVAL; - - return transfer_image(adapter, operation, &ai); -} - -static int device_open(struct inode *inode, struct file *file) -{ - int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); - struct cxl *adapter; - int rc = 0, i; - - pr_devel("in %s\n", __func__); - - BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE); - - /* Allows one process to open the device by using a semaphore */ - if (down_interruptible(&sem) != 0) - return -EPERM; - - if (!(adapter = get_cxl_adapter(adapter_num))) { - rc = -ENODEV; - goto err_unlock; - } - - file->private_data = adapter; - continue_token = 0; - transfer = 0; - - for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) - buffer[i] = NULL; - - /* aligned buffer containing list entries which describes up to - * 1 megabyte of data (256 entries of 4096 bytes each) - * Logical real address of buffer 0 - Buffer 0 length in bytes - * Logical real address of buffer 1 - Buffer 1 length in bytes - * Logical real address of buffer 2 - Buffer 2 length in bytes - * .... - * .... - * Logical real address of buffer N - Buffer N length in bytes - */ - le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); - if (!le) { - rc = -ENOMEM; - goto err; - } - - for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { - buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); - if (!buffer[i]) { - rc = -ENOMEM; - goto err1; - } - } - - return 0; - -err1: - for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { - if (buffer[i]) - free_page((unsigned long) buffer[i]); - } - - if (le) - free_page((unsigned long) le); -err: - put_device(&adapter->dev); -err_unlock: - up(&sem); - - return rc; -} - -static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct cxl *adapter = file->private_data; - - pr_devel("in %s\n", __func__); - - if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE) - return ioctl_transfer_image(adapter, - DOWNLOAD_IMAGE, - (struct cxl_adapter_image __user *)arg); - else if (cmd == CXL_IOCTL_VALIDATE_IMAGE) - return ioctl_transfer_image(adapter, - VALIDATE_IMAGE, - (struct cxl_adapter_image __user *)arg); - else - return -EINVAL; -} - -static int device_close(struct inode *inode, struct file *file) -{ - struct cxl *adapter = file->private_data; - int i; - - pr_devel("in %s\n", __func__); - - for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { - if (buffer[i]) - free_page((unsigned long) buffer[i]); - } - - if (le) - free_page((unsigned long) le); - - up(&sem); - put_device(&adapter->dev); - continue_token = 0; - - /* reload the module */ - if (transfer) - cxl_guest_reload_module(adapter); - else { - pr_devel("resetting adapter\n"); - cxl_h_reset_adapter(adapter->guest->handle); - } - - transfer = 0; - return 0; -} - -static const struct file_operations fops = { - .owner = THIS_MODULE, - .open = device_open, - .unlocked_ioctl = device_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .release = device_close, -}; - -void cxl_guest_remove_chardev(struct cxl *adapter) -{ - cdev_del(&adapter->guest->cdev); -} - -int cxl_guest_add_chardev(struct cxl *adapter) -{ - dev_t devt; - int rc; - - devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter)); - cdev_init(&adapter->guest->cdev, &fops); - if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) { - dev_err(&adapter->dev, - "Unable to add chardev on adapter (card%i): %i\n", - adapter->adapter_num, rc); - goto err; - } - adapter->dev.devt = devt; - sema_init(&sem, 1); -err: - return rc; -} diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c deleted file mode 100644 index fb95a2d5cef4..000000000000 --- a/drivers/misc/cxl/guest.c +++ /dev/null @@ -1,1208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2015 IBM Corp. - */ - -#include -#include -#include -#include -#include - -#include "cxl.h" -#include "hcalls.h" -#include "trace.h" - -#define CXL_ERROR_DETECTED_EVENT 1 -#define CXL_SLOT_RESET_EVENT 2 -#define CXL_RESUME_EVENT 3 - -static void pci_error_handlers(struct cxl_afu *afu, - int bus_error_event, - pci_channel_state_t state) -{ - struct pci_dev *afu_dev; - struct pci_driver *afu_drv; - const struct pci_error_handlers *err_handler; - - if (afu->phb == NULL) - return; - - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { - afu_drv = to_pci_driver(afu_dev->dev.driver); - if (!afu_drv) - continue; - - err_handler = afu_drv->err_handler; - switch (bus_error_event) { - case CXL_ERROR_DETECTED_EVENT: - afu_dev->error_state = state; - - if (err_handler && - err_handler->error_detected) - err_handler->error_detected(afu_dev, state); - break; - case CXL_SLOT_RESET_EVENT: - afu_dev->error_state = state; - - if (err_handler && - err_handler->slot_reset) - err_handler->slot_reset(afu_dev); - break; - case CXL_RESUME_EVENT: - if (err_handler && - err_handler->resume) - err_handler->resume(afu_dev); - break; - } - } -} - -static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, - u64 errstat) -{ - pr_devel("in %s\n", __func__); - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); - - return cxl_ops->ack_irq(ctx, 0, errstat); -} - -static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu, - void *buf, size_t len) -{ - unsigned int entries, mod; - unsigned long **vpd_buf = NULL; - struct sg_list *le; - int rc = 0, i, tocopy; - u64 out = 0; - - if (buf == NULL) - return -EINVAL; - - /* number of entries in the list */ - entries = len / SG_BUFFER_SIZE; - mod = len % SG_BUFFER_SIZE; - if (mod) - entries++; - - if (entries > SG_MAX_ENTRIES) { - entries = SG_MAX_ENTRIES; - len = SG_MAX_ENTRIES * SG_BUFFER_SIZE; - mod = 0; - } - - vpd_buf = kcalloc(entries, sizeof(unsigned long *), GFP_KERNEL); - if (!vpd_buf) - return -ENOMEM; - - le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); - if (!le) { - rc = -ENOMEM; - goto err1; - } - - for (i = 0; i < entries; i++) { - vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); - if (!vpd_buf[i]) { - rc = -ENOMEM; - goto err2; - } - le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i])); - le[i].len = cpu_to_be64(SG_BUFFER_SIZE); - if ((i == (entries - 1)) && mod) - le[i].len = cpu_to_be64(mod); - } - - if (adapter) - rc = cxl_h_collect_vpd_adapter(adapter->guest->handle, - virt_to_phys(le), entries, &out); - else - rc = cxl_h_collect_vpd(afu->guest->handle, 0, - virt_to_phys(le), entries, &out); - pr_devel("length of available (entries: %i), vpd: %#llx\n", - entries, out); - - if (!rc) { - /* - * hcall returns in 'out' the size of available VPDs. - * It fills the buffer with as much data as possible. - */ - if (out < len) - len = out; - rc = len; - if (out) { - for (i = 0; i < entries; i++) { - if (len < SG_BUFFER_SIZE) - tocopy = len; - else - tocopy = SG_BUFFER_SIZE; - memcpy(buf, vpd_buf[i], tocopy); - buf += tocopy; - len -= tocopy; - } - } - } -err2: - for (i = 0; i < entries; i++) { - if (vpd_buf[i]) - free_page((unsigned long) vpd_buf[i]); - } - free_page((unsigned long) le); -err1: - kfree(vpd_buf); - return rc; -} - -static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info) -{ - return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info); -} - -static irqreturn_t guest_psl_irq(int irq, void *data) -{ - struct cxl_context *ctx = data; - struct cxl_irq_info irq_info; - int rc; - - pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq); - rc = guest_get_irq_info(ctx, &irq_info); - if (rc) { - WARN(1, "Unable to get IRQ info: %i\n", rc); - return IRQ_HANDLED; - } - - rc = cxl_irq_psl8(irq, ctx, &irq_info); - return rc; -} - -static int afu_read_error_state(struct cxl_afu *afu, int *state_out) -{ - u64 state; - int rc = 0; - - if (!afu) - return -EIO; - - rc = cxl_h_read_error_state(afu->guest->handle, &state); - if (!rc) { - WARN_ON(state != H_STATE_NORMAL && - state != H_STATE_DISABLE && - state != H_STATE_TEMP_UNAVAILABLE && - state != H_STATE_PERM_UNAVAILABLE); - *state_out = state & 0xffffffff; - } - return rc; -} - -static irqreturn_t guest_slice_irq_err(int irq, void *data) -{ - struct cxl_afu *afu = data; - int rc; - u64 serr, afu_error, dsisr; - - rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); - if (rc) { - dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); - return IRQ_HANDLED; - } - afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); - dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - cxl_afu_decode_psl_serr(afu, serr); - dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); - dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); - - rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); - if (rc) - dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n", - rc); - - return IRQ_HANDLED; -} - - -static int irq_alloc_range(struct cxl *adapter, int len, int *irq) -{ - int i, n; - struct irq_avail *cur; - - for (i = 0; i < adapter->guest->irq_nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - n = bitmap_find_next_zero_area(cur->bitmap, cur->range, - 0, len, 0); - if (n < cur->range) { - bitmap_set(cur->bitmap, n, len); - *irq = cur->offset + n; - pr_devel("guest: allocate IRQs %#x->%#x\n", - *irq, *irq + len - 1); - - return 0; - } - } - return -ENOSPC; -} - -static int irq_free_range(struct cxl *adapter, int irq, int len) -{ - int i, n; - struct irq_avail *cur; - - if (len == 0) - return -ENOENT; - - for (i = 0; i < adapter->guest->irq_nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - if (irq >= cur->offset && - (irq + len) <= (cur->offset + cur->range)) { - n = irq - cur->offset; - bitmap_clear(cur->bitmap, n, len); - pr_devel("guest: release IRQs %#x->%#x\n", - irq, irq + len - 1); - return 0; - } - } - return -ENOENT; -} - -static int guest_reset(struct cxl *adapter) -{ - struct cxl_afu *afu = NULL; - int i, rc; - - pr_devel("Adapter reset request\n"); - spin_lock(&adapter->afu_list_lock); - for (i = 0; i < adapter->slices; i++) { - if ((afu = adapter->afu[i])) { - pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, - pci_channel_io_frozen); - cxl_context_detach_all(afu); - } - } - - rc = cxl_h_reset_adapter(adapter->guest->handle); - for (i = 0; i < adapter->slices; i++) { - if (!rc && (afu = adapter->afu[i])) { - pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, - pci_channel_io_normal); - pci_error_handlers(afu, CXL_RESUME_EVENT, 0); - } - } - spin_unlock(&adapter->afu_list_lock); - return rc; -} - -static int guest_alloc_one_irq(struct cxl *adapter) -{ - int irq; - - spin_lock(&adapter->guest->irq_alloc_lock); - if (irq_alloc_range(adapter, 1, &irq)) - irq = -ENOSPC; - spin_unlock(&adapter->guest->irq_alloc_lock); - return irq; -} - -static void guest_release_one_irq(struct cxl *adapter, int irq) -{ - spin_lock(&adapter->guest->irq_alloc_lock); - irq_free_range(adapter, irq, 1); - spin_unlock(&adapter->guest->irq_alloc_lock); -} - -static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs, - struct cxl *adapter, unsigned int num) -{ - int i, try, irq; - - memset(irqs, 0, sizeof(struct cxl_irq_ranges)); - - spin_lock(&adapter->guest->irq_alloc_lock); - for (i = 0; i < CXL_IRQ_RANGES && num; i++) { - try = num; - while (try) { - if (irq_alloc_range(adapter, try, &irq) == 0) - break; - try /= 2; - } - if (!try) - goto error; - irqs->offset[i] = irq; - irqs->range[i] = try; - num -= try; - } - if (num) - goto error; - spin_unlock(&adapter->guest->irq_alloc_lock); - return 0; - -error: - for (i = 0; i < CXL_IRQ_RANGES; i++) - irq_free_range(adapter, irqs->offset[i], irqs->range[i]); - spin_unlock(&adapter->guest->irq_alloc_lock); - return -ENOSPC; -} - -static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs, - struct cxl *adapter) -{ - int i; - - spin_lock(&adapter->guest->irq_alloc_lock); - for (i = 0; i < CXL_IRQ_RANGES; i++) - irq_free_range(adapter, irqs->offset[i], irqs->range[i]); - spin_unlock(&adapter->guest->irq_alloc_lock); -} - -static int guest_register_serr_irq(struct cxl_afu *afu) -{ - afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&afu->dev)); - if (!afu->err_irq_name) - return -ENOMEM; - - if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq, - guest_slice_irq_err, afu, afu->err_irq_name))) { - kfree(afu->err_irq_name); - afu->err_irq_name = NULL; - return -ENOMEM; - } - - return 0; -} - -static void guest_release_serr_irq(struct cxl_afu *afu) -{ - cxl_unmap_irq(afu->serr_virq, afu); - cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); - kfree(afu->err_irq_name); -} - -static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) -{ - return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token, - tfc >> 32, (psl_reset_mask != 0)); -} - -static void disable_afu_irqs(struct cxl_context *ctx) -{ - irq_hw_number_t hwirq; - unsigned int virq; - int r, i; - - pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice); - for (r = 0; r < CXL_IRQ_RANGES; r++) { - hwirq = ctx->irqs.offset[r]; - for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - virq = irq_find_mapping(NULL, hwirq); - disable_irq(virq); - } - } -} - -static void enable_afu_irqs(struct cxl_context *ctx) -{ - irq_hw_number_t hwirq; - unsigned int virq; - int r, i; - - pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice); - for (r = 0; r < CXL_IRQ_RANGES; r++) { - hwirq = ctx->irqs.offset[r]; - for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - virq = irq_find_mapping(NULL, hwirq); - enable_irq(virq); - } - } -} - -static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx, - u64 offset, u64 *val) -{ - unsigned long cr; - char c; - int rc = 0; - - if (afu->crs_len < sz) - return -ENOENT; - - if (unlikely(offset >= afu->crs_len)) - return -ERANGE; - - cr = get_zeroed_page(GFP_KERNEL); - if (!cr) - return -ENOMEM; - - rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset, - virt_to_phys((void *)cr), sz); - if (rc) - goto err; - - switch (sz) { - case 1: - c = *((char *) cr); - *val = c; - break; - case 2: - *val = in_le16((u16 *)cr); - break; - case 4: - *val = in_le32((unsigned *)cr); - break; - case 8: - *val = in_le64((u64 *)cr); - break; - default: - WARN_ON(1); - } -err: - free_page(cr); - return rc; -} - -static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset, - u32 *out) -{ - int rc; - u64 val; - - rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val); - if (!rc) - *out = (u32) val; - return rc; -} - -static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset, - u16 *out) -{ - int rc; - u64 val; - - rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val); - if (!rc) - *out = (u16) val; - return rc; -} - -static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset, - u8 *out) -{ - int rc; - u64 val; - - rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val); - if (!rc) - *out = (u8) val; - return rc; -} - -static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset, - u64 *out) -{ - return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out); -} - -static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) -{ - /* config record is not writable from guest */ - return -EPERM; -} - -static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) -{ - /* config record is not writable from guest */ - return -EPERM; -} - -static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) -{ - /* config record is not writable from guest */ - return -EPERM; -} - -static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) -{ - struct cxl_process_element_hcall *elem; - struct cxl *adapter = ctx->afu->adapter; - const struct cred *cred; - u32 pid, idx; - int rc, r, i; - u64 mmio_addr, mmio_size; - __be64 flags = 0; - - /* Must be 8 byte aligned and cannot cross a 4096 byte boundary */ - if (!(elem = (struct cxl_process_element_hcall *) - get_zeroed_page(GFP_KERNEL))) - return -ENOMEM; - - elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION); - if (ctx->kernel) { - pid = 0; - flags |= CXL_PE_TRANSLATION_ENABLED; - flags |= CXL_PE_PRIVILEGED_PROCESS; - if (mfmsr() & MSR_SF) - flags |= CXL_PE_64_BIT; - } else { - pid = current->pid; - flags |= CXL_PE_PROBLEM_STATE; - flags |= CXL_PE_TRANSLATION_ENABLED; - if (!test_tsk_thread_flag(current, TIF_32BIT)) - flags |= CXL_PE_64_BIT; - cred = get_current_cred(); - if (uid_eq(cred->euid, GLOBAL_ROOT_UID)) - flags |= CXL_PE_PRIVILEGED_PROCESS; - put_cred(cred); - } - elem->flags = cpu_to_be64(flags); - elem->common.tid = cpu_to_be32(0); /* Unused */ - elem->common.pid = cpu_to_be32(pid); - elem->common.csrp = cpu_to_be64(0); /* disable */ - elem->common.u.psl8.aurp0 = cpu_to_be64(0); /* disable */ - elem->common.u.psl8.aurp1 = cpu_to_be64(0); /* disable */ - - cxl_prefault(ctx, wed); - - elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0); - elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1); - - /* - * Ensure we have at least one interrupt allocated to take faults for - * kernel contexts that may not have allocated any AFU IRQs at all: - */ - if (ctx->irqs.range[0] == 0) { - rc = afu_register_irqs(ctx, 0); - if (rc) - goto out_free; - } - - for (r = 0; r < CXL_IRQ_RANGES; r++) { - for (i = 0; i < ctx->irqs.range[r]; i++) { - if (r == 0 && i == 0) { - elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]); - } else { - idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset; - elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8); - } - } - } - elem->common.amr = cpu_to_be64(amr); - elem->common.wed = cpu_to_be64(wed); - - disable_afu_irqs(ctx); - - rc = cxl_h_attach_process(ctx->afu->guest->handle, elem, - &ctx->process_token, &mmio_addr, &mmio_size); - if (rc == H_SUCCESS) { - if (ctx->master || !ctx->afu->pp_psa) { - ctx->psn_phys = ctx->afu->psn_phys; - ctx->psn_size = ctx->afu->adapter->ps_size; - } else { - ctx->psn_phys = mmio_addr; - ctx->psn_size = mmio_size; - } - if (ctx->afu->pp_psa && mmio_size && - ctx->afu->pp_size == 0) { - /* - * There's no property in the device tree to read the - * pp_size. We only find out at the 1st attach. - * Compared to bare-metal, it is too late and we - * should really lock here. However, on powerVM, - * pp_size is really only used to display in /sys. - * Being discussed with pHyp for their next release. - */ - ctx->afu->pp_size = mmio_size; - } - /* from PAPR: process element is bytes 4-7 of process token */ - ctx->external_pe = ctx->process_token & 0xFFFFFFFF; - pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx", - ctx->pe, ctx->external_pe, ctx->psn_size); - ctx->pe_inserted = true; - enable_afu_irqs(ctx); - } - -out_free: - free_page((u64)elem); - return rc; -} - -static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) -{ - pr_devel("in %s\n", __func__); - - ctx->kernel = kernel; - if (ctx->afu->current_mode == CXL_MODE_DIRECTED) - return attach_afu_directed(ctx, wed, amr); - - /* dedicated mode not supported on FW840 */ - - return -EINVAL; -} - -static int detach_afu_directed(struct cxl_context *ctx) -{ - if (!ctx->pe_inserted) - return 0; - if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token)) - return -1; - return 0; -} - -static int guest_detach_process(struct cxl_context *ctx) -{ - pr_devel("in %s\n", __func__); - trace_cxl_detach(ctx); - - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - return -EIO; - - if (ctx->afu->current_mode == CXL_MODE_DIRECTED) - return detach_afu_directed(ctx); - - return -EINVAL; -} - -static void guest_release_afu(struct device *dev) -{ - struct cxl_afu *afu = to_cxl_afu(dev); - - pr_devel("%s\n", __func__); - - idr_destroy(&afu->contexts_idr); - - kfree(afu->guest); - kfree(afu); -} - -ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len) -{ - return guest_collect_vpd(NULL, afu, buf, len); -} - -#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE -static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf, - loff_t off, size_t count) -{ - void *tbuf = NULL; - int rc = 0; - - tbuf = (void *) get_zeroed_page(GFP_KERNEL); - if (!tbuf) - return -ENOMEM; - - rc = cxl_h_get_afu_err(afu->guest->handle, - off & 0x7, - virt_to_phys(tbuf), - count); - if (rc) - goto err; - - if (count > ERR_BUFF_MAX_COPY_SIZE) - count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); - memcpy(buf, tbuf, count); -err: - free_page((u64)tbuf); - - return rc; -} - -static int guest_afu_check_and_enable(struct cxl_afu *afu) -{ - return 0; -} - -static bool guest_support_attributes(const char *attr_name, - enum cxl_attrs type) -{ - switch (type) { - case CXL_ADAPTER_ATTRS: - if ((strcmp(attr_name, "base_image") == 0) || - (strcmp(attr_name, "load_image_on_perst") == 0) || - (strcmp(attr_name, "perst_reloads_same_image") == 0) || - (strcmp(attr_name, "image_loaded") == 0)) - return false; - break; - case CXL_AFU_MASTER_ATTRS: - if ((strcmp(attr_name, "pp_mmio_off") == 0)) - return false; - break; - case CXL_AFU_ATTRS: - break; - default: - break; - } - - return true; -} - -static int activate_afu_directed(struct cxl_afu *afu) -{ - int rc; - - dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice); - - afu->current_mode = CXL_MODE_DIRECTED; - - afu->num_procs = afu->max_procs_virtualised; - - if ((rc = cxl_chardev_m_afu_add(afu))) - return rc; - - if ((rc = cxl_sysfs_afu_m_add(afu))) - goto err; - - if ((rc = cxl_chardev_s_afu_add(afu))) - goto err1; - - return 0; -err1: - cxl_sysfs_afu_m_remove(afu); -err: - cxl_chardev_afu_remove(afu); - return rc; -} - -static int guest_afu_activate_mode(struct cxl_afu *afu, int mode) -{ - if (!mode) - return 0; - if (!(mode & afu->modes_supported)) - return -EINVAL; - - if (mode == CXL_MODE_DIRECTED) - return activate_afu_directed(afu); - - if (mode == CXL_MODE_DEDICATED) - dev_err(&afu->dev, "Dedicated mode not supported\n"); - - return -EINVAL; -} - -static int deactivate_afu_directed(struct cxl_afu *afu) -{ - dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice); - - afu->current_mode = 0; - afu->num_procs = 0; - - cxl_sysfs_afu_m_remove(afu); - cxl_chardev_afu_remove(afu); - - cxl_ops->afu_reset(afu); - - return 0; -} - -static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode) -{ - if (!mode) - return 0; - if (!(mode & afu->modes_supported)) - return -EINVAL; - - if (mode == CXL_MODE_DIRECTED) - return deactivate_afu_directed(afu); - return 0; -} - -static int guest_afu_reset(struct cxl_afu *afu) -{ - pr_devel("AFU(%d) reset request\n", afu->slice); - return cxl_h_reset_afu(afu->guest->handle); -} - -static int guest_map_slice_regs(struct cxl_afu *afu) -{ - if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) { - dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n", - afu->slice); - return -ENOMEM; - } - return 0; -} - -static void guest_unmap_slice_regs(struct cxl_afu *afu) -{ - if (afu->p2n_mmio) - iounmap(afu->p2n_mmio); -} - -static int afu_update_state(struct cxl_afu *afu) -{ - int rc, cur_state; - - rc = afu_read_error_state(afu, &cur_state); - if (rc) - return rc; - - if (afu->guest->previous_state == cur_state) - return 0; - - pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state); - - switch (cur_state) { - case H_STATE_NORMAL: - afu->guest->previous_state = cur_state; - break; - - case H_STATE_DISABLE: - pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, - pci_channel_io_frozen); - - cxl_context_detach_all(afu); - if ((rc = cxl_ops->afu_reset(afu))) - pr_devel("reset hcall failed %d\n", rc); - - rc = afu_read_error_state(afu, &cur_state); - if (!rc && cur_state == H_STATE_NORMAL) { - pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, - pci_channel_io_normal); - pci_error_handlers(afu, CXL_RESUME_EVENT, 0); - } - afu->guest->previous_state = 0; - break; - - case H_STATE_TEMP_UNAVAILABLE: - afu->guest->previous_state = cur_state; - break; - - case H_STATE_PERM_UNAVAILABLE: - dev_err(&afu->dev, "AFU is in permanent error state\n"); - pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, - pci_channel_io_perm_failure); - afu->guest->previous_state = cur_state; - break; - - default: - pr_err("Unexpected AFU(%d) error state: %#x\n", - afu->slice, cur_state); - return -EINVAL; - } - - return rc; -} - -static void afu_handle_errstate(struct work_struct *work) -{ - struct cxl_afu_guest *afu_guest = - container_of(to_delayed_work(work), struct cxl_afu_guest, work_err); - - if (!afu_update_state(afu_guest->parent) && - afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE) - return; - - if (afu_guest->handle_err) - schedule_delayed_work(&afu_guest->work_err, - msecs_to_jiffies(3000)); -} - -static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu) -{ - int state; - - if (afu && (!afu_read_error_state(afu, &state))) { - if (state == H_STATE_NORMAL) - return true; - } - - return false; -} - -static int afu_properties_look_ok(struct cxl_afu *afu) -{ - if (afu->pp_irqs < 0) { - dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n"); - return -EINVAL; - } - - if (afu->max_procs_virtualised < 1) { - dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n"); - return -EINVAL; - } - - return 0; -} - -int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np) -{ - struct cxl_afu *afu; - bool free = true; - int rc; - - pr_devel("in %s - AFU(%d)\n", __func__, slice); - if (!(afu = cxl_alloc_afu(adapter, slice))) - return -ENOMEM; - - if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) { - kfree(afu); - return -ENOMEM; - } - - if ((rc = dev_set_name(&afu->dev, "afu%i.%i", - adapter->adapter_num, - slice))) - goto err1; - - adapter->slices++; - - if ((rc = cxl_of_read_afu_handle(afu, afu_np))) - goto err1; - - if ((rc = cxl_ops->afu_reset(afu))) - goto err1; - - if ((rc = cxl_of_read_afu_properties(afu, afu_np))) - goto err1; - - if ((rc = afu_properties_look_ok(afu))) - goto err1; - - if ((rc = guest_map_slice_regs(afu))) - goto err1; - - if ((rc = guest_register_serr_irq(afu))) - goto err2; - - /* - * After we call this function we must not free the afu directly, even - * if it returns an error! - */ - if ((rc = cxl_register_afu(afu))) - goto err_put_dev; - - if ((rc = cxl_sysfs_afu_add(afu))) - goto err_del_dev; - - /* - * pHyp doesn't expose the programming models supported by the - * AFU. pHyp currently only supports directed mode. If it adds - * dedicated mode later, this version of cxl has no way to - * detect it. So we'll initialize the driver, but the first - * attach will fail. - * Being discussed with pHyp to do better (likely new property) - */ - if (afu->max_procs_virtualised == 1) - afu->modes_supported = CXL_MODE_DEDICATED; - else - afu->modes_supported = CXL_MODE_DIRECTED; - - if ((rc = cxl_afu_select_best_mode(afu))) - goto err_remove_sysfs; - - adapter->afu[afu->slice] = afu; - - afu->enabled = true; - - /* - * wake up the cpu periodically to check the state - * of the AFU using "afu" stored in the guest structure. - */ - afu->guest->parent = afu; - afu->guest->handle_err = true; - INIT_DELAYED_WORK(&afu->guest->work_err, afu_handle_errstate); - schedule_delayed_work(&afu->guest->work_err, msecs_to_jiffies(1000)); - - if ((rc = cxl_pci_vphb_add(afu))) - dev_info(&afu->dev, "Can't register vPHB\n"); - - return 0; - -err_remove_sysfs: - cxl_sysfs_afu_remove(afu); -err_del_dev: - device_del(&afu->dev); -err_put_dev: - put_device(&afu->dev); - free = false; - guest_release_serr_irq(afu); -err2: - guest_unmap_slice_regs(afu); -err1: - if (free) { - kfree(afu->guest); - kfree(afu); - } - return rc; -} - -void cxl_guest_remove_afu(struct cxl_afu *afu) -{ - if (!afu) - return; - - /* flush and stop pending job */ - afu->guest->handle_err = false; - flush_delayed_work(&afu->guest->work_err); - - cxl_pci_vphb_remove(afu); - cxl_sysfs_afu_remove(afu); - - spin_lock(&afu->adapter->afu_list_lock); - afu->adapter->afu[afu->slice] = NULL; - spin_unlock(&afu->adapter->afu_list_lock); - - cxl_context_detach_all(afu); - cxl_ops->afu_deactivate_mode(afu, afu->current_mode); - guest_release_serr_irq(afu); - guest_unmap_slice_regs(afu); - - device_unregister(&afu->dev); -} - -static void free_adapter(struct cxl *adapter) -{ - struct irq_avail *cur; - int i; - - if (adapter->guest) { - if (adapter->guest->irq_avail) { - for (i = 0; i < adapter->guest->irq_nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - bitmap_free(cur->bitmap); - } - kfree(adapter->guest->irq_avail); - } - kfree(adapter->guest->status); - kfree(adapter->guest); - } - cxl_remove_adapter_nr(adapter); - kfree(adapter); -} - -static int properties_look_ok(struct cxl *adapter) -{ - /* The absence of this property means that the operational - * status is unknown or okay - */ - if (strlen(adapter->guest->status) && - strcmp(adapter->guest->status, "okay")) { - pr_err("ABORTING:Bad operational status of the device\n"); - return -EINVAL; - } - - return 0; -} - -ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) -{ - return guest_collect_vpd(adapter, NULL, buf, len); -} - -void cxl_guest_remove_adapter(struct cxl *adapter) -{ - pr_devel("in %s\n", __func__); - - cxl_sysfs_adapter_remove(adapter); - - cxl_guest_remove_chardev(adapter); - device_unregister(&adapter->dev); -} - -static void release_adapter(struct device *dev) -{ - free_adapter(to_cxl_adapter(dev)); -} - -struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev) -{ - struct cxl *adapter; - bool free = true; - int rc; - - if (!(adapter = cxl_alloc_adapter())) - return ERR_PTR(-ENOMEM); - - if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) { - free_adapter(adapter); - return ERR_PTR(-ENOMEM); - } - - adapter->slices = 0; - adapter->guest->pdev = pdev; - adapter->dev.parent = &pdev->dev; - adapter->dev.release = release_adapter; - dev_set_drvdata(&pdev->dev, adapter); - - /* - * Hypervisor controls PSL timebase initialization (p1 register). - * On FW840, PSL is initialized. - */ - adapter->psl_timebase_synced = true; - - if ((rc = cxl_of_read_adapter_handle(adapter, np))) - goto err1; - - if ((rc = cxl_of_read_adapter_properties(adapter, np))) - goto err1; - - if ((rc = properties_look_ok(adapter))) - goto err1; - - if ((rc = cxl_guest_add_chardev(adapter))) - goto err1; - - /* - * After we call this function we must not free the adapter directly, - * even if it returns an error! - */ - if ((rc = cxl_register_adapter(adapter))) - goto err_put_dev; - - if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_del_dev; - - /* release the context lock as the adapter is configured */ - cxl_adapter_context_unlock(adapter); - - return adapter; - -err_del_dev: - device_del(&adapter->dev); -err_put_dev: - put_device(&adapter->dev); - free = false; - cxl_guest_remove_chardev(adapter); -err1: - if (free) - free_adapter(adapter); - return ERR_PTR(rc); -} - -void cxl_guest_reload_module(struct cxl *adapter) -{ - struct platform_device *pdev; - - pdev = adapter->guest->pdev; - cxl_guest_remove_adapter(adapter); - - cxl_of_probe(pdev); -} - -const struct cxl_backend_ops cxl_guest_ops = { - .module = THIS_MODULE, - .adapter_reset = guest_reset, - .alloc_one_irq = guest_alloc_one_irq, - .release_one_irq = guest_release_one_irq, - .alloc_irq_ranges = guest_alloc_irq_ranges, - .release_irq_ranges = guest_release_irq_ranges, - .setup_irq = NULL, - .handle_psl_slice_error = guest_handle_psl_slice_error, - .psl_interrupt = guest_psl_irq, - .ack_irq = guest_ack_irq, - .attach_process = guest_attach_process, - .detach_process = guest_detach_process, - .update_ivtes = NULL, - .support_attributes = guest_support_attributes, - .link_ok = guest_link_ok, - .release_afu = guest_release_afu, - .afu_read_err_buffer = guest_afu_read_err_buffer, - .afu_check_and_enable = guest_afu_check_and_enable, - .afu_activate_mode = guest_afu_activate_mode, - .afu_deactivate_mode = guest_afu_deactivate_mode, - .afu_reset = guest_afu_reset, - .afu_cr_read8 = guest_afu_cr_read8, - .afu_cr_read16 = guest_afu_cr_read16, - .afu_cr_read32 = guest_afu_cr_read32, - .afu_cr_read64 = guest_afu_cr_read64, - .afu_cr_write8 = guest_afu_cr_write8, - .afu_cr_write16 = guest_afu_cr_write16, - .afu_cr_write32 = guest_afu_cr_write32, - .read_adapter_vpd = cxl_guest_read_adapter_vpd, -}; diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c deleted file mode 100644 index aba5e20eeb1f..000000000000 --- a/drivers/misc/cxl/hcalls.c +++ /dev/null @@ -1,643 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2015 IBM Corp. - */ - - -#include -#include -#include -#include -#include "hcalls.h" -#include "trace.h" - -#define CXL_HCALL_TIMEOUT 60000 -#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000 - -#define H_ATTACH_CA_PROCESS 0x344 -#define H_CONTROL_CA_FUNCTION 0x348 -#define H_DETACH_CA_PROCESS 0x34C -#define H_COLLECT_CA_INT_INFO 0x350 -#define H_CONTROL_CA_FAULTS 0x354 -#define H_DOWNLOAD_CA_FUNCTION 0x35C -#define H_DOWNLOAD_CA_FACILITY 0x364 -#define H_CONTROL_CA_FACILITY 0x368 - -#define H_CONTROL_CA_FUNCTION_RESET 1 /* perform a reset */ -#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS 2 /* suspend a process from being executed */ -#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS 3 /* resume a process to be executed */ -#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE 4 /* read the error state */ -#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR 5 /* collect the AFU error buffer */ -#define H_CONTROL_CA_FUNCTION_GET_CONFIG 6 /* collect configuration record */ -#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE 7 /* query to return download status */ -#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS 8 /* terminate the process before completion */ -#define H_CONTROL_CA_FUNCTION_COLLECT_VPD 9 /* collect VPD */ -#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT 11 /* read the function-wide error data based on an interrupt */ -#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT 12 /* acknowledge function-wide error data based on an interrupt */ -#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG 13 /* retrieve the Platform Log ID (PLID) of an error log */ - -#define H_CONTROL_CA_FAULTS_RESPOND_PSL 1 -#define H_CONTROL_CA_FAULTS_RESPOND_AFU 2 - -#define H_CONTROL_CA_FACILITY_RESET 1 /* perform a reset */ -#define H_CONTROL_CA_FACILITY_COLLECT_VPD 2 /* collect VPD */ - -#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD 1 /* download adapter image */ -#define H_DOWNLOAD_CA_FACILITY_VALIDATE 2 /* validate adapter image */ - - -#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...) \ - { \ - unsigned int delay, total_delay = 0; \ - u64 token = 0; \ - \ - memset(retbuf, 0, sizeof(retbuf)); \ - while (1) { \ - rc = call(fn, retbuf, __VA_ARGS__, token); \ - token = retbuf[0]; \ - if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) \ - break; \ - \ - if (rc == H_BUSY) \ - delay = 10; \ - else \ - delay = get_longbusy_msecs(rc); \ - \ - total_delay += delay; \ - if (total_delay > CXL_HCALL_TIMEOUT) { \ - WARN(1, "Warning: Giving up waiting for CXL hcall " \ - "%#x after %u msec\n", fn, total_delay); \ - rc = H_BUSY; \ - break; \ - } \ - msleep(delay); \ - } \ - } -#define CXL_H_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__) -#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__) - -#define _PRINT_MSG(rc, format, ...) \ - { \ - if ((rc != H_SUCCESS) && (rc != H_CONTINUE)) \ - pr_err(format, __VA_ARGS__); \ - else \ - pr_devel(format, __VA_ARGS__); \ - } \ - - -static char *afu_op_names[] = { - "UNKNOWN_OP", /* 0 undefined */ - "RESET", /* 1 */ - "SUSPEND_PROCESS", /* 2 */ - "RESUME_PROCESS", /* 3 */ - "READ_ERR_STATE", /* 4 */ - "GET_AFU_ERR", /* 5 */ - "GET_CONFIG", /* 6 */ - "GET_DOWNLOAD_STATE", /* 7 */ - "TERMINATE_PROCESS", /* 8 */ - "COLLECT_VPD", /* 9 */ - "UNKNOWN_OP", /* 10 undefined */ - "GET_FUNCTION_ERR_INT", /* 11 */ - "ACK_FUNCTION_ERR_INT", /* 12 */ - "GET_ERROR_LOG", /* 13 */ -}; - -static char *control_adapter_op_names[] = { - "UNKNOWN_OP", /* 0 undefined */ - "RESET", /* 1 */ - "COLLECT_VPD", /* 2 */ -}; - -static char *download_op_names[] = { - "UNKNOWN_OP", /* 0 undefined */ - "DOWNLOAD", /* 1 */ - "VALIDATE", /* 2 */ -}; - -static char *op_str(unsigned int op, char *name_array[], int array_len) -{ - if (op >= array_len) - return "UNKNOWN_OP"; - return name_array[op]; -} - -#define OP_STR(op, name_array) op_str(op, name_array, ARRAY_SIZE(name_array)) - -#define OP_STR_AFU(op) OP_STR(op, afu_op_names) -#define OP_STR_CONTROL_ADAPTER(op) OP_STR(op, control_adapter_op_names) -#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names) - - -long cxl_h_attach_process(u64 unit_address, - struct cxl_process_element_hcall *element, - u64 *process_token, u64 *mmio_addr, u64 *mmio_size) -{ - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - long rc; - - CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element)); - _PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n", - unit_address, virt_to_phys(element), rc); - trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc); - - pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n", - retbuf[0], retbuf[1], retbuf[2]); - cxl_dump_debug_buffer(element, sizeof(*element)); - - switch (rc) { - case H_SUCCESS: /* The process info is attached to the coherent platform function */ - *process_token = retbuf[0]; - if (mmio_addr) - *mmio_addr = retbuf[1]; - if (mmio_size) - *mmio_size = retbuf[2]; - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - case H_FUNCTION: /* The function is not supported. */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ - case H_RESOURCE: /* The coherent platform function does not have enough additional resource to attach the process */ - case H_HARDWARE: /* A hardware event prevented the attach operation */ - case H_STATE: /* The coherent platform function is not in a valid state */ - case H_BUSY: - return -EBUSY; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_detach_process - Detach a process element from a coherent - * platform function. - */ -long cxl_h_detach_process(u64 unit_address, u64 process_token) -{ - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - long rc; - - CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token); - _PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc); - trace_cxl_hcall_detach(unit_address, process_token, rc); - - switch (rc) { - case H_SUCCESS: /* The process was detached from the coherent platform function */ - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ - case H_RESOURCE: /* The function has page table mappings for MMIO */ - case H_HARDWARE: /* A hardware event prevented the detach operation */ - case H_STATE: /* The coherent platform function is not in a valid state */ - case H_BUSY: - return -EBUSY; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows - * the partition to manipulate or query - * certain coherent platform function behaviors. - */ -static long cxl_h_control_function(u64 unit_address, u64 op, - u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) -{ - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - long rc; - - CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4); - _PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", - unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); - trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); - - switch (rc) { - case H_SUCCESS: /* The operation is completed for the coherent platform function */ - if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT || - op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE || - op == H_CONTROL_CA_FUNCTION_COLLECT_VPD)) - *out = retbuf[0]; - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - case H_FUNCTION: /* The function is not supported. */ - case H_NOT_FOUND: /* The operation supplied was not valid */ - case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ - case H_SG_LIST: /* An block list entry was invalid */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ - case H_RESOURCE: /* The function has page table mappings for MMIO */ - case H_HARDWARE: /* A hardware event prevented the attach operation */ - case H_STATE: /* The coherent platform function is not in a valid state */ - case H_BUSY: - return -EBUSY; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_reset_afu - Perform a reset to the coherent platform function. - */ -long cxl_h_reset_afu(u64 unit_address) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_RESET, - 0, 0, 0, 0, - NULL); -} - -/* - * cxl_h_suspend_process - Suspend a process from being executed - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_suspend_process(u64 unit_address, u64 process_token) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS, - process_token, 0, 0, 0, - NULL); -} - -/* - * cxl_h_resume_process - Resume a process to be executed - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_resume_process(u64 unit_address, u64 process_token) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_RESUME_PROCESS, - process_token, 0, 0, 0, - NULL); -} - -/* - * cxl_h_read_error_state - Checks the error state of the coherent - * platform function. - * R4 contains the error state - */ -long cxl_h_read_error_state(u64 unit_address, u64 *state) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_READ_ERR_STATE, - 0, 0, 0, 0, - state); -} - -/* - * cxl_h_get_afu_err - collect the AFU error buffer - * Parameter1 = byte offset into error buffer to retrieve, valid values - * are between 0 and (ibm,error-buffer-size - 1) - * Parameter2 = 4K aligned real address of error buffer, to be filled in - * Parameter3 = length of error buffer, valid values are 4K or less - */ -long cxl_h_get_afu_err(u64 unit_address, u64 offset, - u64 buf_address, u64 len) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_GET_AFU_ERR, - offset, buf_address, len, 0, - NULL); -} - -/* - * cxl_h_get_config - collect configuration record for the - * coherent platform function - * Parameter1 = # of configuration record to retrieve, valid values are - * between 0 and (ibm,#config-records - 1) - * Parameter2 = byte offset into configuration record to retrieve, - * valid values are between 0 and (ibm,config-record-size - 1) - * Parameter3 = 4K aligned real address of configuration record buffer, - * to be filled in - * Parameter4 = length of configuration buffer, valid values are 4K or less - */ -long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, - u64 buf_address, u64 len) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_GET_CONFIG, - cr_num, offset, buf_address, len, - NULL); -} - -/* - * cxl_h_terminate_process - Terminate the process before completion - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_terminate_process(u64 unit_address, u64 process_token) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS, - process_token, 0, 0, 0, - NULL); -} - -/* - * cxl_h_collect_vpd - Collect VPD for the coherent platform function. - * Parameter1 = # of VPD record to retrieve, valid values are between 0 - * and (ibm,#config-records - 1). - * Parameter2 = 4K naturally aligned real buffer containing block - * list entries - * Parameter3 = number of block list entries in the block list, valid - * values are between 0 and 256 - */ -long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, - u64 num, u64 *out) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_COLLECT_VPD, - record, list_address, num, 0, - out); -} - -/* - * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt - */ -long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT, - 0, 0, 0, 0, reg); -} - -/* - * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data - * based on an interrupt - * Parameter1 = value to write to the function-wide error interrupt register - */ -long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT, - value, 0, 0, 0, - NULL); -} - -/* - * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of - * an error log - */ -long cxl_h_get_error_log(u64 unit_address, u64 value) -{ - return cxl_h_control_function(unit_address, - H_CONTROL_CA_FUNCTION_GET_ERROR_LOG, - 0, 0, 0, 0, - NULL); -} - -/* - * cxl_h_collect_int_info - Collect interrupt info about a coherent - * platform function after an interrupt occurred. - */ -long cxl_h_collect_int_info(u64 unit_address, u64 process_token, - struct cxl_irq_info *info) -{ - long rc; - - BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE])); - - rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info, - unit_address, process_token); - _PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n", - unit_address, process_token, rc); - trace_cxl_hcall_collect_int_info(unit_address, process_token, rc); - - switch (rc) { - case H_SUCCESS: /* The interrupt info is returned in return registers. */ - pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid_tid:%#llx, afu_err:%#llx, errstat:%#llx\n", - info->dsisr, info->dar, info->dsr, info->reserved, - info->afu_err, info->errstat); - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall. */ - case H_HARDWARE: /* A hardware event prevented the collection of the interrupt info.*/ - case H_STATE: /* The coherent platform function is not in a valid state to collect interrupt info. */ - return -EBUSY; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_control_faults - Control the operation of a coherent platform - * function after a fault occurs. - * - * Parameters - * control-mask: value to control the faults - * looks like PSL_TFC_An shifted >> 32 - * reset-mask: mask to control reset of function faults - * Set reset_mask = 1 to reset PSL errors - */ -long cxl_h_control_faults(u64 unit_address, u64 process_token, - u64 control_mask, u64 reset_mask) -{ - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - long rc; - - memset(retbuf, 0, sizeof(retbuf)); - - rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address, - H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token, - control_mask, reset_mask); - _PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n", - unit_address, process_token, control_mask, reset_mask, - rc, retbuf[0]); - trace_cxl_hcall_control_faults(unit_address, process_token, - control_mask, reset_mask, retbuf[0], rc); - - switch (rc) { - case H_SUCCESS: /* Faults were successfully controlled for the function. */ - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - return -EINVAL; - case H_HARDWARE: /* A hardware event prevented the control of faults. */ - case H_STATE: /* The function was in an invalid state. */ - case H_AUTHORITY: /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */ - return -EBUSY; - case H_FUNCTION: /* The function is not supported */ - case H_NOT_FOUND: /* The operation supplied was not valid */ - return -EINVAL; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call - * allows the partition to manipulate or query - * certain coherent platform facility behaviors. - */ -static long cxl_h_control_facility(u64 unit_address, u64 op, - u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) -{ - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - long rc; - - CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4); - _PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", - unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); - trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); - - switch (rc) { - case H_SUCCESS: /* The operation is completed for the coherent platform facility */ - if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD) - *out = retbuf[0]; - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied. */ - case H_FUNCTION: /* The function is not supported. */ - case H_NOT_FOUND: /* The operation supplied was not valid */ - case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ - case H_SG_LIST: /* An block list entry was invalid */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ - case H_RESOURCE: /* The function has page table mappings for MMIO */ - case H_HARDWARE: /* A hardware event prevented the attach operation */ - case H_STATE: /* The coherent platform facility is not in a valid state */ - case H_BUSY: - return -EBUSY; - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. - */ -long cxl_h_reset_adapter(u64 unit_address) -{ - return cxl_h_control_facility(unit_address, - H_CONTROL_CA_FACILITY_RESET, - 0, 0, 0, 0, - NULL); -} - -/* - * cxl_h_collect_vpd - Collect VPD for the coherent platform function. - * Parameter1 = 4K naturally aligned real buffer containing block - * list entries - * Parameter2 = number of block list entries in the block list, valid - * values are between 0 and 256 - */ -long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, - u64 num, u64 *out) -{ - return cxl_h_control_facility(unit_address, - H_CONTROL_CA_FACILITY_COLLECT_VPD, - list_address, num, 0, 0, - out); -} - -/* - * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY - * hypervisor call provide platform support for - * downloading a base adapter image to the coherent - * platform facility, and for validating the entire - * image after the download. - * Parameters - * op: operation to perform to the coherent platform function - * Download: operation = 1, the base image in the coherent platform - * facility is first erased, and then - * programmed using the image supplied - * in the scatter/gather list. - * Validate: operation = 2, the base image in the coherent platform - * facility is compared with the image - * supplied in the scatter/gather list. - * list_address: 4K naturally aligned real buffer containing - * scatter/gather list entries. - * num: number of block list entries in the scatter/gather list. - */ -static long cxl_h_download_facility(u64 unit_address, u64 op, - u64 list_address, u64 num, - u64 *out) -{ - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - unsigned int delay, total_delay = 0; - u64 token = 0; - long rc; - - if (*out != 0) - token = *out; - - memset(retbuf, 0, sizeof(retbuf)); - while (1) { - rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf, - unit_address, op, list_address, num, - token); - token = retbuf[0]; - if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) - break; - - if (rc != H_BUSY) { - delay = get_longbusy_msecs(rc); - total_delay += delay; - if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) { - WARN(1, "Warning: Giving up waiting for CXL hcall " - "%#x after %u msec\n", - H_DOWNLOAD_CA_FACILITY, total_delay); - rc = H_BUSY; - break; - } - msleep(delay); - } - } - _PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n", - unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); - trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); - - switch (rc) { - case H_SUCCESS: /* The operation is completed for the coherent platform facility */ - return 0; - case H_PARAMETER: /* An incorrect parameter was supplied */ - case H_FUNCTION: /* The function is not supported. */ - case H_SG_LIST: /* An block list entry was invalid */ - case H_BAD_DATA: /* Image verification failed */ - return -EINVAL; - case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ - case H_RESOURCE: /* The function has page table mappings for MMIO */ - case H_HARDWARE: /* A hardware event prevented the attach operation */ - case H_STATE: /* The coherent platform facility is not in a valid state */ - case H_BUSY: - return -EBUSY; - case H_CONTINUE: - *out = retbuf[0]; - return 1; /* More data is needed for the complete image */ - default: - WARN(1, "Unexpected return code: %lx", rc); - return -EINVAL; - } -} - -/* - * cxl_h_download_adapter_image - Download the base image to the coherent - * platform facility. - */ -long cxl_h_download_adapter_image(u64 unit_address, - u64 list_address, u64 num, - u64 *out) -{ - return cxl_h_download_facility(unit_address, - H_DOWNLOAD_CA_FACILITY_DOWNLOAD, - list_address, num, out); -} - -/* - * cxl_h_validate_adapter_image - Validate the base image in the coherent - * platform facility. - */ -long cxl_h_validate_adapter_image(u64 unit_address, - u64 list_address, u64 num, - u64 *out) -{ - return cxl_h_download_facility(unit_address, - H_DOWNLOAD_CA_FACILITY_VALIDATE, - list_address, num, out); -} diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h deleted file mode 100644 index d200465dc6ac..000000000000 --- a/drivers/misc/cxl/hcalls.h +++ /dev/null @@ -1,200 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2015 IBM Corp. - */ - -#ifndef _HCALLS_H -#define _HCALLS_H - -#include -#include -#include -#include "cxl.h" - -#define SG_BUFFER_SIZE 4096 -#define SG_MAX_ENTRIES 256 - -struct sg_list { - u64 phys_addr; - u64 len; -}; - -/* - * This is straight out of PAPR, but replacing some of the compound fields with - * a single field, where they were identical to the register layout. - * - * The 'flags' parameter regroups the various bit-fields - */ -#define CXL_PE_CSRP_VALID (1ULL << 63) -#define CXL_PE_PROBLEM_STATE (1ULL << 62) -#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH (1ULL << 61) -#define CXL_PE_TAGS_ACTIVE (1ULL << 60) -#define CXL_PE_USER_STATE (1ULL << 59) -#define CXL_PE_TRANSLATION_ENABLED (1ULL << 58) -#define CXL_PE_64_BIT (1ULL << 57) -#define CXL_PE_PRIVILEGED_PROCESS (1ULL << 56) - -#define CXL_PROCESS_ELEMENT_VERSION 1 -struct cxl_process_element_hcall { - __be64 version; - __be64 flags; - u8 reserved0[12]; - __be32 pslVirtualIsn; - u8 applicationVirtualIsnBitmap[256]; - u8 reserved1[144]; - struct cxl_process_element_common common; - u8 reserved4[12]; -} __packed; - -#define H_STATE_NORMAL 1 -#define H_STATE_DISABLE 2 -#define H_STATE_TEMP_UNAVAILABLE 3 -#define H_STATE_PERM_UNAVAILABLE 4 - -/* NOTE: element must be a logical real address, and must be pinned */ -long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element, - u64 *process_token, u64 *mmio_addr, u64 *mmio_size); - -/** - * cxl_h_detach_process - Detach a process element from a coherent - * platform function. - */ -long cxl_h_detach_process(u64 unit_address, u64 process_token); - -/** - * cxl_h_reset_afu - Perform a reset to the coherent platform function. - */ -long cxl_h_reset_afu(u64 unit_address); - -/** - * cxl_h_suspend_process - Suspend a process from being executed - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_suspend_process(u64 unit_address, u64 process_token); - -/** - * cxl_h_resume_process - Resume a process to be executed - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_resume_process(u64 unit_address, u64 process_token); - -/** - * cxl_h_read_error_state - Reads the error state of the coherent - * platform function. - * R4 contains the error state - */ -long cxl_h_read_error_state(u64 unit_address, u64 *state); - -/** - * cxl_h_get_afu_err - collect the AFU error buffer - * Parameter1 = byte offset into error buffer to retrieve, valid values - * are between 0 and (ibm,error-buffer-size - 1) - * Parameter2 = 4K aligned real address of error buffer, to be filled in - * Parameter3 = length of error buffer, valid values are 4K or less - */ -long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len); - -/** - * cxl_h_get_config - collect configuration record for the - * coherent platform function - * Parameter1 = # of configuration record to retrieve, valid values are - * between 0 and (ibm,#config-records - 1) - * Parameter2 = byte offset into configuration record to retrieve, - * valid values are between 0 and (ibm,config-record-size - 1) - * Parameter3 = 4K aligned real address of configuration record buffer, - * to be filled in - * Parameter4 = length of configuration buffer, valid values are 4K or less - */ -long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, - u64 buf_address, u64 len); - -/** - * cxl_h_terminate_process - Terminate the process before completion - * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when - * process was attached. - */ -long cxl_h_terminate_process(u64 unit_address, u64 process_token); - -/** - * cxl_h_collect_vpd - Collect VPD for the coherent platform function. - * Parameter1 = # of VPD record to retrieve, valid values are between 0 - * and (ibm,#config-records - 1). - * Parameter2 = 4K naturally aligned real buffer containing block - * list entries - * Parameter3 = number of block list entries in the block list, valid - * values are between 0 and 256 - */ -long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, - u64 num, u64 *out); - -/** - * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt - */ -long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg); - -/** - * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data - * based on an interrupt - * Parameter1 = value to write to the function-wide error interrupt register - */ -long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value); - -/** - * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of - * an error log - */ -long cxl_h_get_error_log(u64 unit_address, u64 value); - -/** - * cxl_h_collect_int_info - Collect interrupt info about a coherent - * platform function after an interrupt occurred. - */ -long cxl_h_collect_int_info(u64 unit_address, u64 process_token, - struct cxl_irq_info *info); - -/** - * cxl_h_control_faults - Control the operation of a coherent platform - * function after a fault occurs. - * - * Parameters - * control-mask: value to control the faults - * looks like PSL_TFC_An shifted >> 32 - * reset-mask: mask to control reset of function faults - * Set reset_mask = 1 to reset PSL errors - */ -long cxl_h_control_faults(u64 unit_address, u64 process_token, - u64 control_mask, u64 reset_mask); - -/** - * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. - */ -long cxl_h_reset_adapter(u64 unit_address); - -/** - * cxl_h_collect_vpd - Collect VPD for the coherent platform function. - * Parameter1 = 4K naturally aligned real buffer containing block - * list entries - * Parameter2 = number of block list entries in the block list, valid - * values are between 0 and 256 - */ -long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, - u64 num, u64 *out); - -/** - * cxl_h_download_adapter_image - Download the base image to the coherent - * platform facility. - */ -long cxl_h_download_adapter_image(u64 unit_address, - u64 list_address, u64 num, - u64 *out); - -/** - * cxl_h_validate_adapter_image - Validate the base image in the coherent - * platform facility. - */ -long cxl_h_validate_adapter_image(u64 unit_address, - u64 list_address, u64 num, - u64 *out); -#endif /* _HCALLS_H */ diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c deleted file mode 100644 index b730e022a48e..000000000000 --- a/drivers/misc/cxl/irq.c +++ /dev/null @@ -1,450 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" -#include "trace.h" - -static int afu_irq_range_start(void) -{ - if (cpu_has_feature(CPU_FTR_HVMODE)) - return 1; - return 0; -} - -static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) -{ - ctx->dsisr = dsisr; - ctx->dar = dar; - schedule_work(&ctx->fault_work); - return IRQ_HANDLED; -} - -irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) -{ - u64 dsisr, dar; - - dsisr = irq_info->dsisr; - dar = irq_info->dar; - - trace_cxl_psl9_irq(ctx, irq, dsisr, dar); - - pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); - - if (dsisr & CXL_PSL9_DSISR_An_TF) { - pr_devel("CXL interrupt: Scheduling translation fault handling for later (pe: %i)\n", ctx->pe); - return schedule_cxl_fault(ctx, dsisr, dar); - } - - if (dsisr & CXL_PSL9_DSISR_An_PE) - return cxl_ops->handle_psl_slice_error(ctx, dsisr, - irq_info->errstat); - if (dsisr & CXL_PSL9_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); - - if (ctx->pending_afu_err) { - /* - * This shouldn't happen - the PSL treats these errors - * as fatal and will have reset the AFU, so there's not - * much point buffering multiple AFU errors. - * OTOH if we DO ever see a storm of these come in it's - * probably best that we log them somewhere: - */ - dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error undelivered to pe %i: 0x%016llx\n", - ctx->pe, irq_info->afu_err); - } else { - spin_lock(&ctx->lock); - ctx->afu_err = irq_info->afu_err; - ctx->pending_afu_err = 1; - spin_unlock(&ctx->lock); - - wake_up_all(&ctx->wq); - } - - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); - return IRQ_HANDLED; - } - if (dsisr & CXL_PSL9_DSISR_An_OC) - pr_devel("CXL interrupt: OS Context Warning\n"); - - WARN(1, "Unhandled CXL PSL IRQ\n"); - return IRQ_HANDLED; -} - -irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) -{ - u64 dsisr, dar; - - dsisr = irq_info->dsisr; - dar = irq_info->dar; - - trace_cxl_psl_irq(ctx, irq, dsisr, dar); - - pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); - - if (dsisr & CXL_PSL_DSISR_An_DS) { - /* - * We don't inherently need to sleep to handle this, but we do - * need to get a ref to the task's mm, which we can't do from - * irq context without the potential for a deadlock since it - * takes the task_lock. An alternate option would be to keep a - * reference to the task's mm the entire time it has cxl open, - * but to do that we need to solve the issue where we hold a - * ref to the mm, but the mm can hold a ref to the fd after an - * mmap preventing anything from being cleaned up. - */ - pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe); - return schedule_cxl_fault(ctx, dsisr, dar); - } - - if (dsisr & CXL_PSL_DSISR_An_M) - pr_devel("CXL interrupt: PTE not found\n"); - if (dsisr & CXL_PSL_DSISR_An_P) - pr_devel("CXL interrupt: Storage protection violation\n"); - if (dsisr & CXL_PSL_DSISR_An_A) - pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n"); - if (dsisr & CXL_PSL_DSISR_An_S) - pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n"); - if (dsisr & CXL_PSL_DSISR_An_K) - pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n"); - - if (dsisr & CXL_PSL_DSISR_An_DM) { - /* - * In some cases we might be able to handle the fault - * immediately if hash_page would succeed, but we still need - * the task's mm, which as above we can't get without a lock - */ - pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe); - return schedule_cxl_fault(ctx, dsisr, dar); - } - if (dsisr & CXL_PSL_DSISR_An_ST) - WARN(1, "CXL interrupt: Segment Table PTE not found\n"); - if (dsisr & CXL_PSL_DSISR_An_UR) - pr_devel("CXL interrupt: AURP PTE not found\n"); - if (dsisr & CXL_PSL_DSISR_An_PE) - return cxl_ops->handle_psl_slice_error(ctx, dsisr, - irq_info->errstat); - if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); - - if (ctx->pending_afu_err) { - /* - * This shouldn't happen - the PSL treats these errors - * as fatal and will have reset the AFU, so there's not - * much point buffering multiple AFU errors. - * OTOH if we DO ever see a storm of these come in it's - * probably best that we log them somewhere: - */ - dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " - "undelivered to pe %i: 0x%016llx\n", - ctx->pe, irq_info->afu_err); - } else { - spin_lock(&ctx->lock); - ctx->afu_err = irq_info->afu_err; - ctx->pending_afu_err = true; - spin_unlock(&ctx->lock); - - wake_up_all(&ctx->wq); - } - - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); - return IRQ_HANDLED; - } - if (dsisr & CXL_PSL_DSISR_An_OC) - pr_devel("CXL interrupt: OS Context Warning\n"); - - WARN(1, "Unhandled CXL PSL IRQ\n"); - return IRQ_HANDLED; -} - -static irqreturn_t cxl_irq_afu(int irq, void *data) -{ - struct cxl_context *ctx = data; - irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); - int irq_off, afu_irq = 0; - __u16 range; - int r; - - /* - * Look for the interrupt number. - * On bare-metal, we know range 0 only contains the PSL - * interrupt so we could start counting at range 1 and initialize - * afu_irq at 1. - * In a guest, range 0 also contains AFU interrupts, so it must - * be counted for. Therefore we initialize afu_irq at 0 to take into - * account the PSL interrupt. - * - * For code-readability, it just seems easier to go over all - * the ranges on bare-metal and guest. The end result is the same. - */ - for (r = 0; r < CXL_IRQ_RANGES; r++) { - irq_off = hwirq - ctx->irqs.offset[r]; - range = ctx->irqs.range[r]; - if (irq_off >= 0 && irq_off < range) { - afu_irq += irq_off; - break; - } - afu_irq += range; - } - if (unlikely(r >= CXL_IRQ_RANGES)) { - WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", - ctx->pe, irq, hwirq); - return IRQ_HANDLED; - } - - trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq); - pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", - afu_irq, ctx->pe, irq, hwirq); - - if (unlikely(!ctx->irq_bitmap)) { - WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n"); - return IRQ_HANDLED; - } - spin_lock(&ctx->lock); - set_bit(afu_irq - 1, ctx->irq_bitmap); - ctx->pending_irq = true; - spin_unlock(&ctx->lock); - - wake_up_all(&ctx->wq); - - return IRQ_HANDLED; -} - -unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie, const char *name) -{ - unsigned int virq; - int result; - - /* IRQ Domain? */ - virq = irq_create_mapping(NULL, hwirq); - if (!virq) { - dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n"); - return 0; - } - - if (cxl_ops->setup_irq) - cxl_ops->setup_irq(adapter, hwirq, virq); - - pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); - - result = request_irq(virq, handler, 0, name, cookie); - if (result) { - dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); - return 0; - } - - return virq; -} - -void cxl_unmap_irq(unsigned int virq, void *cookie) -{ - free_irq(virq, cookie); -} - -int cxl_register_one_irq(struct cxl *adapter, - irq_handler_t handler, - void *cookie, - irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq, - const char *name) -{ - int hwirq, virq; - - if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0) - return hwirq; - - if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) - goto err; - - *dest_hwirq = hwirq; - *dest_virq = virq; - - return 0; - -err: - cxl_ops->release_one_irq(adapter, hwirq); - return -ENOMEM; -} - -void afu_irq_name_free(struct cxl_context *ctx) -{ - struct cxl_irq_name *irq_name, *tmp; - - list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { - kfree(irq_name->name); - list_del(&irq_name->list); - kfree(irq_name); - } -} - -int afu_allocate_irqs(struct cxl_context *ctx, u32 count) -{ - int rc, r, i, j = 1; - struct cxl_irq_name *irq_name; - int alloc_count; - - /* - * In native mode, range 0 is reserved for the multiplexed - * PSL interrupt. It has been allocated when the AFU was initialized. - * - * In a guest, the PSL interrupt is not mutliplexed, but per-context, - * and is the first interrupt from range 0. It still needs to be - * allocated, so bump the count by one. - */ - if (cpu_has_feature(CPU_FTR_HVMODE)) - alloc_count = count; - else - alloc_count = count + 1; - - if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, - alloc_count))) - return rc; - - if (cpu_has_feature(CPU_FTR_HVMODE)) { - /* Multiplexed PSL Interrupt */ - ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; - ctx->irqs.range[0] = 1; - } - - ctx->irq_count = count; - ctx->irq_bitmap = bitmap_zalloc(count, GFP_KERNEL); - if (!ctx->irq_bitmap) - goto out; - - /* - * Allocate names first. If any fail, bail out before allocating - * actual hardware IRQs. - */ - for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { - for (i = 0; i < ctx->irqs.range[r]; i++) { - irq_name = kmalloc(sizeof(struct cxl_irq_name), - GFP_KERNEL); - if (!irq_name) - goto out; - irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", - dev_name(&ctx->afu->dev), - ctx->pe, j); - if (!irq_name->name) { - kfree(irq_name); - goto out; - } - /* Add to tail so next look get the correct order */ - list_add_tail(&irq_name->list, &ctx->irq_names); - j++; - } - } - return 0; - -out: - cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); - bitmap_free(ctx->irq_bitmap); - afu_irq_name_free(ctx); - return -ENOMEM; -} - -static void afu_register_hwirqs(struct cxl_context *ctx) -{ - irq_hw_number_t hwirq; - struct cxl_irq_name *irq_name; - int r, i; - irqreturn_t (*handler)(int irq, void *data); - - /* We've allocated all memory now, so let's do the irq allocations */ - irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); - for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { - hwirq = ctx->irqs.offset[r]; - for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - if (r == 0 && i == 0) - /* - * The very first interrupt of range 0 is - * always the PSL interrupt, but we only - * need to connect a handler for guests, - * because there's one PSL interrupt per - * context. - * On bare-metal, the PSL interrupt is - * multiplexed and was setup when the AFU - * was configured. - */ - handler = cxl_ops->psl_interrupt; - else - handler = cxl_irq_afu; - cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx, - irq_name->name); - irq_name = list_next_entry(irq_name, list); - } - } -} - -int afu_register_irqs(struct cxl_context *ctx, u32 count) -{ - int rc; - - rc = afu_allocate_irqs(ctx, count); - if (rc) - return rc; - - afu_register_hwirqs(ctx); - return 0; -} - -void afu_release_irqs(struct cxl_context *ctx, void *cookie) -{ - irq_hw_number_t hwirq; - unsigned int virq; - int r, i; - - for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { - hwirq = ctx->irqs.offset[r]; - for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - virq = irq_find_mapping(NULL, hwirq); - if (virq) - cxl_unmap_irq(virq, cookie); - } - } - - afu_irq_name_free(ctx); - cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); - - ctx->irq_count = 0; -} - -void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr) -{ - dev_crit(&afu->dev, - "PSL Slice error received. Check AFU for root cause.\n"); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); - if (serr & CXL_PSL_SERR_An_afuto) - dev_crit(&afu->dev, "AFU MMIO Timeout\n"); - if (serr & CXL_PSL_SERR_An_afudis) - dev_crit(&afu->dev, - "MMIO targeted Accelerator that was not enabled\n"); - if (serr & CXL_PSL_SERR_An_afuov) - dev_crit(&afu->dev, "AFU CTAG Overflow\n"); - if (serr & CXL_PSL_SERR_An_badsrc) - dev_crit(&afu->dev, "Bad Interrupt Source\n"); - if (serr & CXL_PSL_SERR_An_badctx) - dev_crit(&afu->dev, "Bad Context Handle\n"); - if (serr & CXL_PSL_SERR_An_llcmdis) - dev_crit(&afu->dev, "LLCMD to Disabled AFU\n"); - if (serr & CXL_PSL_SERR_An_llcmdto) - dev_crit(&afu->dev, "LLCMD Timeout to AFU\n"); - if (serr & CXL_PSL_SERR_An_afupar) - dev_crit(&afu->dev, "AFU MMIO Parity Error\n"); - if (serr & CXL_PSL_SERR_An_afudup) - dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n"); - if (serr & CXL_PSL_SERR_An_AE) - dev_crit(&afu->dev, - "AFU asserted JDONE with JERROR in AFU Directed Mode\n"); -} diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c deleted file mode 100644 index c1fbf6f588f7..000000000000 --- a/drivers/misc/cxl/main.c +++ /dev/null @@ -1,383 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "cxl.h" -#include "trace.h" - -static DEFINE_SPINLOCK(adapter_idr_lock); -static DEFINE_IDR(cxl_adapter_idr); - -uint cxl_verbose; -module_param_named(verbose, cxl_verbose, uint, 0600); -MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); - -const struct cxl_backend_ops *cxl_ops; - -int cxl_afu_slbia(struct cxl_afu *afu) -{ - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - - pr_devel("cxl_afu_slbia issuing SLBIA command\n"); - cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); - while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); - return -EBUSY; - } - /* If the adapter has gone down, we can assume that we - * will PERST it and that will invalidate everything. - */ - if (!cxl_ops->link_ok(afu->adapter, afu)) - return -EIO; - cpu_relax(); - } - return 0; -} - -static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) -{ - unsigned long flags; - - if (ctx->mm != mm) - return; - - pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__, - ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); - - spin_lock_irqsave(&ctx->sste_lock, flags); - trace_cxl_slbia(ctx); - memset(ctx->sstp, 0, ctx->sst_size); - spin_unlock_irqrestore(&ctx->sste_lock, flags); - mb(); - cxl_afu_slbia(ctx->afu); -} - -static inline void cxl_slbia_core(struct mm_struct *mm) -{ - struct cxl *adapter; - struct cxl_afu *afu; - struct cxl_context *ctx; - int card, slice, id; - - pr_devel("%s called\n", __func__); - - spin_lock(&adapter_idr_lock); - idr_for_each_entry(&cxl_adapter_idr, adapter, card) { - /* XXX: Make this lookup faster with link from mm to ctx */ - spin_lock(&adapter->afu_list_lock); - for (slice = 0; slice < adapter->slices; slice++) { - afu = adapter->afu[slice]; - if (!afu || !afu->enabled) - continue; - rcu_read_lock(); - idr_for_each_entry(&afu->contexts_idr, ctx, id) - _cxl_slbia(ctx, mm); - rcu_read_unlock(); - } - spin_unlock(&adapter->afu_list_lock); - } - spin_unlock(&adapter_idr_lock); -} - -static struct cxl_calls cxl_calls = { - .cxl_slbia = cxl_slbia_core, - .owner = THIS_MODULE, -}; - -int cxl_alloc_sst(struct cxl_context *ctx) -{ - unsigned long vsid; - u64 ea_mask, size, sstp0, sstp1; - - sstp0 = 0; - sstp1 = 0; - - ctx->sst_size = PAGE_SIZE; - ctx->sst_lru = 0; - ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL); - if (!ctx->sstp) { - pr_err("cxl_alloc_sst: Unable to allocate segment table\n"); - return -ENOMEM; - } - pr_devel("SSTP allocated at 0x%p\n", ctx->sstp); - - vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12; - - sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT; - sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; - - size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT; - if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) { - WARN(1, "Impossible segment table size\n"); - return -EINVAL; - } - sstp0 |= size; - - if (mmu_kernel_ssize == MMU_SEGSIZE_256M) - ea_mask = 0xfffff00ULL; - else - ea_mask = 0xffffffff00ULL; - - sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */ - sstp1 |= (vsid << (64-(50-14))) & ~ea_mask; - sstp1 |= (u64)ctx->sstp & ea_mask; - sstp1 |= CXL_SSTP1_An_V; - - pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n", - (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1); - - /* Store calculated sstp hardware points for use later */ - ctx->sstp0 = sstp0; - ctx->sstp1 = sstp1; - - return 0; -} - -/* print buffer content as integers when debugging */ -void cxl_dump_debug_buffer(void *buf, size_t buf_len) -{ -#ifdef DEBUG - int i, *ptr; - - /* - * We want to regroup up to 4 integers per line, which means they - * need to be in the same pr_devel() statement - */ - ptr = (int *) buf; - for (i = 0; i * 4 < buf_len; i += 4) { - if ((i + 3) * 4 < buf_len) - pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1], - ptr[i + 2], ptr[i + 3]); - else if ((i + 2) * 4 < buf_len) - pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1], - ptr[i + 2]); - else if ((i + 1) * 4 < buf_len) - pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]); - else - pr_devel("%.8x\n", ptr[i]); - } -#endif /* DEBUG */ -} - -/* Find a CXL adapter by it's number and increase it's refcount */ -struct cxl *get_cxl_adapter(int num) -{ - struct cxl *adapter; - - spin_lock(&adapter_idr_lock); - if ((adapter = idr_find(&cxl_adapter_idr, num))) - get_device(&adapter->dev); - spin_unlock(&adapter_idr_lock); - - return adapter; -} - -static int cxl_alloc_adapter_nr(struct cxl *adapter) -{ - int i; - - idr_preload(GFP_KERNEL); - spin_lock(&adapter_idr_lock); - i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT); - spin_unlock(&adapter_idr_lock); - idr_preload_end(); - if (i < 0) - return i; - - adapter->adapter_num = i; - - return 0; -} - -void cxl_remove_adapter_nr(struct cxl *adapter) -{ - idr_remove(&cxl_adapter_idr, adapter->adapter_num); -} - -struct cxl *cxl_alloc_adapter(void) -{ - struct cxl *adapter; - - if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) - return NULL; - - spin_lock_init(&adapter->afu_list_lock); - - if (cxl_alloc_adapter_nr(adapter)) - goto err1; - - if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) - goto err2; - - /* start with context lock taken */ - atomic_set(&adapter->contexts_num, -1); - - return adapter; -err2: - cxl_remove_adapter_nr(adapter); -err1: - kfree(adapter); - return NULL; -} - -struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) -{ - struct cxl_afu *afu; - - if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) - return NULL; - - afu->adapter = adapter; - afu->dev.parent = &adapter->dev; - afu->dev.release = cxl_ops->release_afu; - afu->slice = slice; - idr_init(&afu->contexts_idr); - mutex_init(&afu->contexts_lock); - spin_lock_init(&afu->afu_cntl_lock); - atomic_set(&afu->configured_state, -1); - afu->prefault_mode = CXL_PREFAULT_NONE; - afu->irqs_max = afu->adapter->user_irqs; - - return afu; -} - -int cxl_afu_select_best_mode(struct cxl_afu *afu) -{ - if (afu->modes_supported & CXL_MODE_DIRECTED) - return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED); - - if (afu->modes_supported & CXL_MODE_DEDICATED) - return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED); - - dev_warn(&afu->dev, "No supported programming modes available\n"); - /* We don't fail this so the user can inspect sysfs */ - return 0; -} - -int cxl_adapter_context_get(struct cxl *adapter) -{ - int rc; - - rc = atomic_inc_unless_negative(&adapter->contexts_num); - return rc ? 0 : -EBUSY; -} - -void cxl_adapter_context_put(struct cxl *adapter) -{ - atomic_dec_if_positive(&adapter->contexts_num); -} - -int cxl_adapter_context_lock(struct cxl *adapter) -{ - int rc; - /* no active contexts -> contexts_num == 0 */ - rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1); - return rc ? -EBUSY : 0; -} - -void cxl_adapter_context_unlock(struct cxl *adapter) -{ - int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0); - - /* - * contexts lock taken -> contexts_num == -1 - * If not true then show a warning and force reset the lock. - * This will happen when context_unlock was requested without - * doing a context_lock. - */ - if (val != -1) { - atomic_set(&adapter->contexts_num, 0); - WARN(1, "Adapter context unlocked with %d active contexts", - val); - } -} - -static int __init init_cxl(void) -{ - int rc = 0; - - if (!tlbie_capable) - return -EINVAL; - - if ((rc = cxl_file_init())) - return rc; - - cxl_debugfs_init(); - - /* - * we don't register the callback on P9. slb callack is only - * used for the PSL8 MMU and CX4. - */ - if (cxl_is_power8()) { - rc = register_cxl_calls(&cxl_calls); - if (rc) - goto err; - } - - if (cpu_has_feature(CPU_FTR_HVMODE)) { - cxl_ops = &cxl_native_ops; - rc = pci_register_driver(&cxl_pci_driver); - } -#ifdef CONFIG_PPC_PSERIES - else { - cxl_ops = &cxl_guest_ops; - rc = platform_driver_register(&cxl_of_driver); - } -#endif - if (rc) - goto err1; - - return 0; -err1: - if (cxl_is_power8()) - unregister_cxl_calls(&cxl_calls); -err: - cxl_debugfs_exit(); - cxl_file_exit(); - - return rc; -} - -static void exit_cxl(void) -{ - if (cpu_has_feature(CPU_FTR_HVMODE)) - pci_unregister_driver(&cxl_pci_driver); -#ifdef CONFIG_PPC_PSERIES - else - platform_driver_unregister(&cxl_of_driver); -#endif - - cxl_debugfs_exit(); - cxl_file_exit(); - if (cxl_is_power8()) - unregister_cxl_calls(&cxl_calls); - idr_destroy(&cxl_adapter_idr); -} - -module_init(init_cxl); -module_exit(exit_cxl); - -MODULE_DESCRIPTION("IBM Coherent Accelerator"); -MODULE_AUTHOR("Ian Munsie "); -MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c deleted file mode 100644 index fbe16a6ab7ad..000000000000 --- a/drivers/misc/cxl/native.c +++ /dev/null @@ -1,1592 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" -#include "trace.h" - -static int afu_control(struct cxl_afu *afu, u64 command, u64 clear, - u64 result, u64 mask, bool enabled) -{ - u64 AFU_Cntl; - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - int rc = 0; - - spin_lock(&afu->afu_cntl_lock); - pr_devel("AFU command starting: %llx\n", command); - - trace_cxl_afu_ctrl(afu, command); - - AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command); - - AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - while ((AFU_Cntl & mask) != result) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&afu->dev, "WARNING: AFU control timed out!\n"); - rc = -EBUSY; - goto out; - } - - if (!cxl_ops->link_ok(afu->adapter, afu)) { - afu->enabled = enabled; - rc = -EIO; - goto out; - } - - pr_devel_ratelimited("AFU control... (0x%016llx)\n", - AFU_Cntl | command); - cpu_relax(); - AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - } - - if (AFU_Cntl & CXL_AFU_Cntl_An_RA) { - /* - * Workaround for a bug in the XSL used in the Mellanox CX4 - * that fails to clear the RA bit after an AFU reset, - * preventing subsequent AFU resets from working. - */ - cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA); - } - - pr_devel("AFU command complete: %llx\n", command); - afu->enabled = enabled; -out: - trace_cxl_afu_ctrl_done(afu, command, rc); - spin_unlock(&afu->afu_cntl_lock); - - return rc; -} - -static int afu_enable(struct cxl_afu *afu) -{ - pr_devel("AFU enable request\n"); - - return afu_control(afu, CXL_AFU_Cntl_An_E, 0, - CXL_AFU_Cntl_An_ES_Enabled, - CXL_AFU_Cntl_An_ES_MASK, true); -} - -int cxl_afu_disable(struct cxl_afu *afu) -{ - pr_devel("AFU disable request\n"); - - return afu_control(afu, 0, CXL_AFU_Cntl_An_E, - CXL_AFU_Cntl_An_ES_Disabled, - CXL_AFU_Cntl_An_ES_MASK, false); -} - -/* This will disable as well as reset */ -static int native_afu_reset(struct cxl_afu *afu) -{ - int rc; - u64 serr; - - pr_devel("AFU reset request\n"); - - rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0, - CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, - CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, - false); - - /* - * Re-enable any masked interrupts when the AFU is not - * activated to avoid side effects after attaching a process - * in dedicated mode. - */ - if (afu->current_mode == 0) { - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - serr &= ~CXL_PSL_SERR_An_IRQ_MASKS; - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - } - - return rc; -} - -static int native_afu_check_and_enable(struct cxl_afu *afu) -{ - if (!cxl_ops->link_ok(afu->adapter, afu)) { - WARN(1, "Refusing to enable afu while link down!\n"); - return -EIO; - } - if (afu->enabled) - return 0; - return afu_enable(afu); -} - -int cxl_psl_purge(struct cxl_afu *afu) -{ - u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); - u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - u64 dsisr, dar; - u64 start, end; - u64 trans_fault = 0x0ULL; - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - int rc = 0; - - trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc); - - pr_devel("PSL purge request\n"); - - if (cxl_is_power8()) - trans_fault = CXL_PSL_DSISR_TRANS; - if (cxl_is_power9()) - trans_fault = CXL_PSL9_DSISR_An_TF; - - if (!cxl_ops->link_ok(afu->adapter, afu)) { - dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); - rc = -EIO; - goto out; - } - - if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { - WARN(1, "psl_purge request while AFU not disabled!\n"); - cxl_afu_disable(afu); - } - - cxl_p1n_write(afu, CXL_PSL_SCNTL_An, - PSL_CNTL | CXL_PSL_SCNTL_An_Pc); - start = local_clock(); - PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); - while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK) - == CXL_PSL_SCNTL_An_Ps_Pending) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n"); - rc = -EBUSY; - goto out; - } - if (!cxl_ops->link_ok(afu->adapter, afu)) { - rc = -EIO; - goto out; - } - - dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", - PSL_CNTL, dsisr); - - if (dsisr & trans_fault) { - dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); - dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", - dsisr, dar); - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - } else if (dsisr) { - dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", - dsisr); - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - } else { - cpu_relax(); - } - PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); - } - end = local_clock(); - pr_devel("PSL purged in %lld ns\n", end - start); - - cxl_p1n_write(afu, CXL_PSL_SCNTL_An, - PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc); -out: - trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc); - return rc; -} - -static int spa_max_procs(int spa_size) -{ - /* - * From the CAIA: - * end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255 - * Most of that junk is really just an overly-complicated way of saying - * the last 256 bytes are __aligned(128), so it's really: - * end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255 - * and - * end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1 - * so - * sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256 - * Ignore the alignment (which is safe in this case as long as we are - * careful with our rounding) and solve for n: - */ - return ((spa_size / 8) - 96) / 17; -} - -static int cxl_alloc_spa(struct cxl_afu *afu, int mode) -{ - unsigned spa_size; - - /* Work out how many pages to allocate */ - afu->native->spa_order = -1; - do { - afu->native->spa_order++; - spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; - - if (spa_size > 0x100000) { - dev_warn(&afu->dev, "num_of_processes too large for the SPA, limiting to %i (0x%x)\n", - afu->native->spa_max_procs, afu->native->spa_size); - if (mode != CXL_MODE_DEDICATED) - afu->num_procs = afu->native->spa_max_procs; - break; - } - - afu->native->spa_size = spa_size; - afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size); - } while (afu->native->spa_max_procs < afu->num_procs); - - if (!(afu->native->spa = (struct cxl_process_element *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) { - pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); - return -ENOMEM; - } - pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", - 1<native->spa_order, afu->native->spa_max_procs, afu->num_procs); - - return 0; -} - -static void attach_spa(struct cxl_afu *afu) -{ - u64 spap; - - afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa + - ((afu->native->spa_max_procs + 3) * 128)); - - spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr; - spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; - spap |= CXL_PSL_SPAP_V; - pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", - afu->native->spa, afu->native->spa_max_procs, - afu->native->sw_command_status, spap); - cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); -} - -void cxl_release_spa(struct cxl_afu *afu) -{ - if (afu->native->spa) { - free_pages((unsigned long) afu->native->spa, - afu->native->spa_order); - afu->native->spa = NULL; - } -} - -/* - * Invalidation of all ERAT entries is no longer required by CAIA2. Use - * only for debug. - */ -int cxl_invalidate_all_psl9(struct cxl *adapter) -{ - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - u64 ierat; - - pr_devel("CXL adapter - invalidation of all ERAT entries\n"); - - /* Invalidates all ERAT entries for Radix or HPT */ - ierat = CXL_XSL9_IERAT_IALL; - if (radix_enabled()) - ierat |= CXL_XSL9_IERAT_INVR; - cxl_p1_write(adapter, CXL_XSL9_IERAT, ierat); - - while (cxl_p1_read(adapter, CXL_XSL9_IERAT) & CXL_XSL9_IERAT_IINPROG) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&adapter->dev, - "WARNING: CXL adapter invalidation of all ERAT entries timed out!\n"); - return -EBUSY; - } - if (!cxl_ops->link_ok(adapter, NULL)) - return -EIO; - cpu_relax(); - } - return 0; -} - -int cxl_invalidate_all_psl8(struct cxl *adapter) -{ - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - - pr_devel("CXL adapter wide TLBIA & SLBIA\n"); - - cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A); - - cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL); - while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); - return -EBUSY; - } - if (!cxl_ops->link_ok(adapter, NULL)) - return -EIO; - cpu_relax(); - } - - cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL); - while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); - return -EBUSY; - } - if (!cxl_ops->link_ok(adapter, NULL)) - return -EIO; - cpu_relax(); - } - return 0; -} - -int cxl_data_cache_flush(struct cxl *adapter) -{ - u64 reg; - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - - /* - * Do a datacache flush only if datacache is available. - * In case of PSL9D datacache absent hence flush operation. - * would timeout. - */ - if (adapter->native->no_data_cache) { - pr_devel("No PSL data cache. Ignoring cache flush req.\n"); - return 0; - } - - pr_devel("Flushing data cache\n"); - reg = cxl_p1_read(adapter, CXL_PSL_Control); - reg |= CXL_PSL_Control_Fr; - cxl_p1_write(adapter, CXL_PSL_Control, reg); - - reg = cxl_p1_read(adapter, CXL_PSL_Control); - while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n"); - return -EBUSY; - } - - if (!cxl_ops->link_ok(adapter, NULL)) { - dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n"); - return -EIO; - } - cpu_relax(); - reg = cxl_p1_read(adapter, CXL_PSL_Control); - } - - reg &= ~CXL_PSL_Control_Fr; - cxl_p1_write(adapter, CXL_PSL_Control, reg); - return 0; -} - -static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1) -{ - int rc; - - /* 1. Disable SSTP by writing 0 to SSTP1[V] */ - cxl_p2n_write(afu, CXL_SSTP1_An, 0); - - /* 2. Invalidate all SLB entries */ - if ((rc = cxl_afu_slbia(afu))) - return rc; - - /* 3. Set SSTP0_An */ - cxl_p2n_write(afu, CXL_SSTP0_An, sstp0); - - /* 4. Set SSTP1_An */ - cxl_p2n_write(afu, CXL_SSTP1_An, sstp1); - - return 0; -} - -/* Using per slice version may improve performance here. (ie. SLBIA_An) */ -static void slb_invalid(struct cxl_context *ctx) -{ - struct cxl *adapter = ctx->afu->adapter; - u64 slbia; - - WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex)); - - cxl_p1_write(adapter, CXL_PSL_LBISEL, - ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | - be32_to_cpu(ctx->elem->lpid)); - cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); - - while (1) { - if (!cxl_ops->link_ok(adapter, NULL)) - break; - slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); - if (!(slbia & CXL_TLB_SLB_P)) - break; - cpu_relax(); - } -} - -static int do_process_element_cmd(struct cxl_context *ctx, - u64 cmd, u64 pe_state) -{ - u64 state; - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - int rc = 0; - - trace_cxl_llcmd(ctx, cmd); - - WARN_ON(!ctx->afu->enabled); - - ctx->elem->software_state = cpu_to_be32(pe_state); - smp_wmb(); - *(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); - smp_mb(); - cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); - while (1) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n"); - rc = -EBUSY; - goto out; - } - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { - dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); - rc = -EIO; - goto out; - } - state = be64_to_cpup(ctx->afu->native->sw_command_status); - if (state == ~0ULL) { - pr_err("cxl: Error adding process element to AFU\n"); - rc = -1; - goto out; - } - if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) == - (cmd | (cmd >> 16) | ctx->pe)) - break; - /* - * The command won't finish in the PSL if there are - * outstanding DSIs. Hence we need to yield here in - * case there are outstanding DSIs that we need to - * service. Tuning possiblity: we could wait for a - * while before sched - */ - schedule(); - - } -out: - trace_cxl_llcmd_done(ctx, cmd, rc); - return rc; -} - -static int add_process_element(struct cxl_context *ctx) -{ - int rc = 0; - - mutex_lock(&ctx->afu->native->spa_mutex); - pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); - if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) - ctx->pe_inserted = true; - pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->native->spa_mutex); - return rc; -} - -static int terminate_process_element(struct cxl_context *ctx) -{ - int rc = 0; - - /* fast path terminate if it's already invalid */ - if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) - return rc; - - mutex_lock(&ctx->afu->native->spa_mutex); - pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); - /* We could be asked to terminate when the hw is down. That - * should always succeed: it's not running if the hw has gone - * away and is being reset. - */ - if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, - CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); - ctx->elem->software_state = 0; /* Remove Valid bit */ - pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->native->spa_mutex); - return rc; -} - -static int remove_process_element(struct cxl_context *ctx) -{ - int rc = 0; - - mutex_lock(&ctx->afu->native->spa_mutex); - pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); - - /* We could be asked to remove when the hw is down. Again, if - * the hw is down, the PE is gone, so we succeed. - */ - if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); - - if (!rc) - ctx->pe_inserted = false; - if (cxl_is_power8()) - slb_invalid(ctx); - pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->native->spa_mutex); - - return rc; -} - -void cxl_assign_psn_space(struct cxl_context *ctx) -{ - if (!ctx->afu->pp_size || ctx->master) { - ctx->psn_phys = ctx->afu->psn_phys; - ctx->psn_size = ctx->afu->adapter->ps_size; - } else { - ctx->psn_phys = ctx->afu->psn_phys + - (ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe); - ctx->psn_size = ctx->afu->pp_size; - } -} - -static int activate_afu_directed(struct cxl_afu *afu) -{ - int rc; - - dev_info(&afu->dev, "Activating AFU directed mode\n"); - - afu->num_procs = afu->max_procs_virtualised; - if (afu->native->spa == NULL) { - if (cxl_alloc_spa(afu, CXL_MODE_DIRECTED)) - return -ENOMEM; - } - attach_spa(afu); - - cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); - if (cxl_is_power8()) - cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); - cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); - - afu->current_mode = CXL_MODE_DIRECTED; - - if ((rc = cxl_chardev_m_afu_add(afu))) - return rc; - - if ((rc = cxl_sysfs_afu_m_add(afu))) - goto err; - - if ((rc = cxl_chardev_s_afu_add(afu))) - goto err1; - - return 0; -err1: - cxl_sysfs_afu_m_remove(afu); -err: - cxl_chardev_afu_remove(afu); - return rc; -} - -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE) -#else -#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) -#endif - -u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) -{ - u64 sr = 0; - - set_endian(sr); - if (master) - sr |= CXL_PSL_SR_An_MP; - if (mfspr(SPRN_LPCR) & LPCR_TC) - sr |= CXL_PSL_SR_An_TC; - - if (kernel) { - if (!real_mode) - sr |= CXL_PSL_SR_An_R; - sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV; - } else { - sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; - if (radix_enabled()) - sr |= CXL_PSL_SR_An_HV; - else - sr &= ~(CXL_PSL_SR_An_HV); - if (!test_tsk_thread_flag(current, TIF_32BIT)) - sr |= CXL_PSL_SR_An_SF; - } - if (p9) { - if (radix_enabled()) - sr |= CXL_PSL_SR_An_XLAT_ror; - else - sr |= CXL_PSL_SR_An_XLAT_hpt; - } - return sr; -} - -static u64 calculate_sr(struct cxl_context *ctx) -{ - return cxl_calculate_sr(ctx->master, ctx->kernel, false, - cxl_is_power9()); -} - -static void update_ivtes_directed(struct cxl_context *ctx) -{ - bool need_update = (ctx->status == STARTED); - int r; - - if (need_update) { - WARN_ON(terminate_process_element(ctx)); - WARN_ON(remove_process_element(ctx)); - } - - for (r = 0; r < CXL_IRQ_RANGES; r++) { - ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); - ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); - } - - /* - * Theoretically we could use the update llcmd, instead of a - * terminate/remove/add (or if an atomic update was required we could - * do a suspend/update/resume), however it seems there might be issues - * with the update llcmd on some cards (including those using an XSL on - * an ASIC) so for now it's safest to go with the commands that are - * known to work. In the future if we come across a situation where the - * card may be performing transactions using the same PE while we are - * doing this update we might need to revisit this. - */ - if (need_update) - WARN_ON(add_process_element(ctx)); -} - -static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) -{ - u32 pid; - int rc; - - cxl_assign_psn_space(ctx); - - ctx->elem->ctxtime = 0; /* disable */ - ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); - ctx->elem->haurp = 0; /* disable */ - - if (ctx->kernel) - pid = 0; - else { - if (ctx->mm == NULL) { - pr_devel("%s: unable to get mm for pe=%d pid=%i\n", - __func__, ctx->pe, pid_nr(ctx->pid)); - return -EINVAL; - } - pid = ctx->mm->context.id; - } - - /* Assign a unique TIDR (thread id) for the current thread */ - if (!(ctx->tidr) && (ctx->assign_tidr)) { - rc = set_thread_tidr(current); - if (rc) - return -ENODEV; - ctx->tidr = current->thread.tidr; - pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); - } - - ctx->elem->common.tid = cpu_to_be32(ctx->tidr); - ctx->elem->common.pid = cpu_to_be32(pid); - - ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); - - ctx->elem->common.csrp = 0; /* disable */ - - cxl_prefault(ctx, wed); - - /* - * Ensure we have the multiplexed PSL interrupt set up to take faults - * for kernel contexts that may not have allocated any AFU IRQs at all: - */ - if (ctx->irqs.range[0] == 0) { - ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; - ctx->irqs.range[0] = 1; - } - - ctx->elem->common.amr = cpu_to_be64(amr); - ctx->elem->common.wed = cpu_to_be64(wed); - - return 0; -} - -int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr) -{ - int result; - - /* fill the process element entry */ - result = process_element_entry_psl9(ctx, wed, amr); - if (result) - return result; - - update_ivtes_directed(ctx); - - /* first guy needs to enable */ - result = cxl_ops->afu_check_and_enable(ctx->afu); - if (result) - return result; - - return add_process_element(ctx); -} - -int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr) -{ - u32 pid; - int result; - - cxl_assign_psn_space(ctx); - - ctx->elem->ctxtime = 0; /* disable */ - ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); - ctx->elem->haurp = 0; /* disable */ - ctx->elem->u.sdr = cpu_to_be64(mfspr(SPRN_SDR1)); - - pid = current->pid; - if (ctx->kernel) - pid = 0; - ctx->elem->common.tid = 0; - ctx->elem->common.pid = cpu_to_be32(pid); - - ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); - - ctx->elem->common.csrp = 0; /* disable */ - ctx->elem->common.u.psl8.aurp0 = 0; /* disable */ - ctx->elem->common.u.psl8.aurp1 = 0; /* disable */ - - cxl_prefault(ctx, wed); - - ctx->elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0); - ctx->elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1); - - /* - * Ensure we have the multiplexed PSL interrupt set up to take faults - * for kernel contexts that may not have allocated any AFU IRQs at all: - */ - if (ctx->irqs.range[0] == 0) { - ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; - ctx->irqs.range[0] = 1; - } - - update_ivtes_directed(ctx); - - ctx->elem->common.amr = cpu_to_be64(amr); - ctx->elem->common.wed = cpu_to_be64(wed); - - /* first guy needs to enable */ - if ((result = cxl_ops->afu_check_and_enable(ctx->afu))) - return result; - - return add_process_element(ctx); -} - -static int deactivate_afu_directed(struct cxl_afu *afu) -{ - dev_info(&afu->dev, "Deactivating AFU directed mode\n"); - - afu->current_mode = 0; - afu->num_procs = 0; - - cxl_sysfs_afu_m_remove(afu); - cxl_chardev_afu_remove(afu); - - /* - * The CAIA section 2.2.1 indicates that the procedure for starting and - * stopping an AFU in AFU directed mode is AFU specific, which is not - * ideal since this code is generic and with one exception has no - * knowledge of the AFU. This is in contrast to the procedure for - * disabling a dedicated process AFU, which is documented to just - * require a reset. The architecture does indicate that both an AFU - * reset and an AFU disable should result in the AFU being disabled and - * we do both followed by a PSL purge for safety. - * - * Notably we used to have some issues with the disable sequence on PSL - * cards, which is why we ended up using this heavy weight procedure in - * the first place, however a bug was discovered that had rendered the - * disable operation ineffective, so it is conceivable that was the - * sole explanation for those difficulties. Careful regression testing - * is recommended if anyone attempts to remove or reorder these - * operations. - * - * The XSL on the Mellanox CX4 behaves a little differently from the - * PSL based cards and will time out an AFU reset if the AFU is still - * enabled. That card is special in that we do have a means to identify - * it from this code, so in that case we skip the reset and just use a - * disable/purge to avoid the timeout and corresponding noise in the - * kernel log. - */ - if (afu->adapter->native->sl_ops->needs_reset_before_disable) - cxl_ops->afu_reset(afu); - cxl_afu_disable(afu); - cxl_psl_purge(afu); - - return 0; -} - -int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu) -{ - dev_info(&afu->dev, "Activating dedicated process mode\n"); - - /* - * If XSL is set to dedicated mode (Set in PSL_SCNTL reg), the - * XSL and AFU are programmed to work with a single context. - * The context information should be configured in the SPA area - * index 0 (so PSL_SPAP must be configured before enabling the - * AFU). - */ - afu->num_procs = 1; - if (afu->native->spa == NULL) { - if (cxl_alloc_spa(afu, CXL_MODE_DEDICATED)) - return -ENOMEM; - } - attach_spa(afu); - - cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); - cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); - - afu->current_mode = CXL_MODE_DEDICATED; - - return cxl_chardev_d_afu_add(afu); -} - -int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu) -{ - dev_info(&afu->dev, "Activating dedicated process mode\n"); - - cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); - - cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */ - cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */ - cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); - cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID)); - cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */ - cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1)); - - cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */ - cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */ - cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */ - - afu->current_mode = CXL_MODE_DEDICATED; - afu->num_procs = 1; - - return cxl_chardev_d_afu_add(afu); -} - -void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx) -{ - int r; - - for (r = 0; r < CXL_IRQ_RANGES; r++) { - ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); - ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); - } -} - -void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx) -{ - struct cxl_afu *afu = ctx->afu; - - cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, - (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | - (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | - (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | - ((u64)ctx->irqs.offset[3] & 0xffff)); - cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) - (((u64)ctx->irqs.range[0] & 0xffff) << 48) | - (((u64)ctx->irqs.range[1] & 0xffff) << 32) | - (((u64)ctx->irqs.range[2] & 0xffff) << 16) | - ((u64)ctx->irqs.range[3] & 0xffff)); -} - -int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr) -{ - struct cxl_afu *afu = ctx->afu; - int result; - - /* fill the process element entry */ - result = process_element_entry_psl9(ctx, wed, amr); - if (result) - return result; - - if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes) - afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); - - ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V); - /* - * Ideally we should do a wmb() here to make sure the changes to the - * PE are visible to the card before we call afu_enable. - * On ppc64 though all mmios are preceded by a 'sync' instruction hence - * we dont dont need one here. - */ - - result = cxl_ops->afu_reset(afu); - if (result) - return result; - - return afu_enable(afu); -} - -int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr) -{ - struct cxl_afu *afu = ctx->afu; - u64 pid; - int rc; - - pid = (u64)current->pid << 32; - if (ctx->kernel) - pid = 0; - cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid); - - cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx)); - - if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) - return rc; - - cxl_prefault(ctx, wed); - - if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes) - afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); - - cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); - - /* master only context for dedicated */ - cxl_assign_psn_space(ctx); - - if ((rc = cxl_ops->afu_reset(afu))) - return rc; - - cxl_p2n_write(afu, CXL_PSL_WED_An, wed); - - return afu_enable(afu); -} - -static int deactivate_dedicated_process(struct cxl_afu *afu) -{ - dev_info(&afu->dev, "Deactivating dedicated process mode\n"); - - afu->current_mode = 0; - afu->num_procs = 0; - - cxl_chardev_afu_remove(afu); - - return 0; -} - -static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode) -{ - if (mode == CXL_MODE_DIRECTED) - return deactivate_afu_directed(afu); - if (mode == CXL_MODE_DEDICATED) - return deactivate_dedicated_process(afu); - return 0; -} - -static int native_afu_activate_mode(struct cxl_afu *afu, int mode) -{ - if (!mode) - return 0; - if (!(mode & afu->modes_supported)) - return -EINVAL; - - if (!cxl_ops->link_ok(afu->adapter, afu)) { - WARN(1, "Device link is down, refusing to activate!\n"); - return -EIO; - } - - if (mode == CXL_MODE_DIRECTED) - return activate_afu_directed(afu); - if ((mode == CXL_MODE_DEDICATED) && - (afu->adapter->native->sl_ops->activate_dedicated_process)) - return afu->adapter->native->sl_ops->activate_dedicated_process(afu); - - return -EINVAL; -} - -static int native_attach_process(struct cxl_context *ctx, bool kernel, - u64 wed, u64 amr) -{ - if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { - WARN(1, "Device link is down, refusing to attach process!\n"); - return -EIO; - } - - ctx->kernel = kernel; - if ((ctx->afu->current_mode == CXL_MODE_DIRECTED) && - (ctx->afu->adapter->native->sl_ops->attach_afu_directed)) - return ctx->afu->adapter->native->sl_ops->attach_afu_directed(ctx, wed, amr); - - if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) && - (ctx->afu->adapter->native->sl_ops->attach_dedicated_process)) - return ctx->afu->adapter->native->sl_ops->attach_dedicated_process(ctx, wed, amr); - - return -EINVAL; -} - -static inline int detach_process_native_dedicated(struct cxl_context *ctx) -{ - /* - * The CAIA section 2.1.1 indicates that we need to do an AFU reset to - * stop the AFU in dedicated mode (we therefore do not make that - * optional like we do in the afu directed path). It does not indicate - * that we need to do an explicit disable (which should occur - * implicitly as part of the reset) or purge, but we do these as well - * to be on the safe side. - * - * Notably we used to have some issues with the disable sequence - * (before the sequence was spelled out in the architecture) which is - * why we were so heavy weight in the first place, however a bug was - * discovered that had rendered the disable operation ineffective, so - * it is conceivable that was the sole explanation for those - * difficulties. Point is, we should be careful and do some regression - * testing if we ever attempt to remove any part of this procedure. - */ - cxl_ops->afu_reset(ctx->afu); - cxl_afu_disable(ctx->afu); - cxl_psl_purge(ctx->afu); - return 0; -} - -static void native_update_ivtes(struct cxl_context *ctx) -{ - if (ctx->afu->current_mode == CXL_MODE_DIRECTED) - return update_ivtes_directed(ctx); - if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) && - (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)) - return ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); - WARN(1, "native_update_ivtes: Bad mode\n"); -} - -static inline int detach_process_native_afu_directed(struct cxl_context *ctx) -{ - if (!ctx->pe_inserted) - return 0; - if (terminate_process_element(ctx)) - return -1; - if (remove_process_element(ctx)) - return -1; - - return 0; -} - -static int native_detach_process(struct cxl_context *ctx) -{ - trace_cxl_detach(ctx); - - if (ctx->afu->current_mode == CXL_MODE_DEDICATED) - return detach_process_native_dedicated(ctx); - - return detach_process_native_afu_directed(ctx); -} - -static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) -{ - /* If the adapter has gone away, we can't get any meaningful - * information. - */ - if (!cxl_ops->link_ok(afu->adapter, afu)) - return -EIO; - - info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); - if (cxl_is_power8()) - info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); - info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); - info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - info->proc_handle = 0; - - return 0; -} - -void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx) -{ - u64 fir1, serr; - - fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1); - - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); - if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); - cxl_afu_decode_psl_serr(ctx->afu, serr); - } -} - -void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx) -{ - u64 fir1, fir2, fir_slice, serr, afu_debug; - - fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); - fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); - afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); - dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); - if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); - cxl_afu_decode_psl_serr(ctx->afu, serr); - } - dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); -} - -static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, - u64 dsisr, u64 errstat) -{ - - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); - - if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers) - ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx); - - if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) { - dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); - ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter); - } - - return cxl_ops->ack_irq(ctx, 0, errstat); -} - -static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr) -{ - if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS)) - return true; - - if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF)) - return true; - - return false; -} - -irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info) -{ - if (cxl_is_translation_fault(afu, irq_info->dsisr)) - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - else - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - - return IRQ_HANDLED; -} - -static irqreturn_t native_irq_multiplexed(int irq, void *data) -{ - struct cxl_afu *afu = data; - struct cxl_context *ctx; - struct cxl_irq_info irq_info; - u64 phreg = cxl_p2n_read(afu, CXL_PSL_PEHandle_An); - int ph, ret = IRQ_HANDLED, res; - - /* check if eeh kicked in while the interrupt was in flight */ - if (unlikely(phreg == ~0ULL)) { - dev_warn(&afu->dev, - "Ignoring slice interrupt(%d) due to fenced card", - irq); - return IRQ_HANDLED; - } - /* Mask the pe-handle from register value */ - ph = phreg & 0xffff; - if ((res = native_get_irq_info(afu, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", res); - if (afu->adapter->native->sl_ops->fail_irq) - return afu->adapter->native->sl_ops->fail_irq(afu, &irq_info); - return ret; - } - - rcu_read_lock(); - ctx = idr_find(&afu->contexts_idr, ph); - if (ctx) { - if (afu->adapter->native->sl_ops->handle_interrupt) - ret = afu->adapter->native->sl_ops->handle_interrupt(irq, ctx, &irq_info); - rcu_read_unlock(); - return ret; - } - rcu_read_unlock(); - - WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" - " %016llx\n(Possible AFU HW issue - was a term/remove acked" - " with outstanding transactions?)\n", ph, irq_info.dsisr, - irq_info.dar); - if (afu->adapter->native->sl_ops->fail_irq) - ret = afu->adapter->native->sl_ops->fail_irq(afu, &irq_info); - return ret; -} - -static void native_irq_wait(struct cxl_context *ctx) -{ - u64 dsisr; - int timeout = 1000; - int ph; - - /* - * Wait until no further interrupts are presented by the PSL - * for this context. - */ - while (timeout--) { - ph = cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) & 0xffff; - if (ph != ctx->pe) - return; - dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); - if (cxl_is_power8() && - ((dsisr & CXL_PSL_DSISR_PENDING) == 0)) - return; - if (cxl_is_power9() && - ((dsisr & CXL_PSL9_DSISR_PENDING) == 0)) - return; - /* - * We are waiting for the workqueue to process our - * irq, so need to let that run here. - */ - msleep(1); - } - - dev_warn(&ctx->afu->dev, "WARNING: waiting on DSI for PE %i" - " DSISR %016llx!\n", ph, dsisr); - return; -} - -static irqreturn_t native_slice_irq_err(int irq, void *data) -{ - struct cxl_afu *afu = data; - u64 errstat, serr, afu_error, dsisr; - u64 fir_slice, afu_debug, irq_mask; - - /* - * slice err interrupt is only used with full PSL (no XSL) - */ - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); - dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - cxl_afu_decode_psl_serr(afu, serr); - - if (cxl_is_power8()) { - fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); - afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); - } - dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); - dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); - dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); - - /* mask off the IRQ so it won't retrigger until the AFU is reset */ - irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32; - serr |= irq_mask; - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n"); - - return IRQ_HANDLED; -} - -void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter) -{ - u64 fir1; - - fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1); - dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1); -} - -void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter) -{ - u64 fir1, fir2; - - fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); - dev_crit(&adapter->dev, - "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", - fir1, fir2); -} - -static irqreturn_t native_irq_err(int irq, void *data) -{ - struct cxl *adapter = data; - u64 err_ivte; - - WARN(1, "CXL ERROR interrupt %i\n", irq); - - err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); - dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); - - if (adapter->native->sl_ops->debugfs_stop_trace) { - dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); - adapter->native->sl_ops->debugfs_stop_trace(adapter); - } - - if (adapter->native->sl_ops->err_irq_dump_registers) - adapter->native->sl_ops->err_irq_dump_registers(adapter); - - return IRQ_HANDLED; -} - -int cxl_native_register_psl_err_irq(struct cxl *adapter) -{ - int rc; - - adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&adapter->dev)); - if (!adapter->irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter, - &adapter->native->err_hwirq, - &adapter->native->err_virq, - adapter->irq_name))) { - kfree(adapter->irq_name); - adapter->irq_name = NULL; - return rc; - } - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff); - - return 0; -} - -void cxl_native_release_psl_err_irq(struct cxl *adapter) -{ - if (adapter->native->err_virq == 0 || - adapter->native->err_virq != - irq_find_mapping(NULL, adapter->native->err_hwirq)) - return; - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); - cxl_unmap_irq(adapter->native->err_virq, adapter); - cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq); - kfree(adapter->irq_name); - adapter->native->err_virq = 0; -} - -int cxl_native_register_serr_irq(struct cxl_afu *afu) -{ - u64 serr; - int rc; - - afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&afu->dev)); - if (!afu->err_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu, - &afu->serr_hwirq, - &afu->serr_virq, afu->err_irq_name))) { - kfree(afu->err_irq_name); - afu->err_irq_name = NULL; - return rc; - } - - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - if (cxl_is_power8()) - serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); - if (cxl_is_power9()) { - /* - * By default, all errors are masked. So don't set all masks. - * Slice errors will be transfered. - */ - serr = (serr & ~0xff0000007fffffffULL) | (afu->serr_hwirq & 0xffff); - } - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - - return 0; -} - -void cxl_native_release_serr_irq(struct cxl_afu *afu) -{ - if (afu->serr_virq == 0 || - afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) - return; - - cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); - cxl_unmap_irq(afu->serr_virq, afu); - cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); - kfree(afu->err_irq_name); - afu->serr_virq = 0; -} - -int cxl_native_register_psl_irq(struct cxl_afu *afu) -{ - int rc; - - afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", - dev_name(&afu->dev)); - if (!afu->psl_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed, - afu, &afu->native->psl_hwirq, &afu->native->psl_virq, - afu->psl_irq_name))) { - kfree(afu->psl_irq_name); - afu->psl_irq_name = NULL; - } - return rc; -} - -void cxl_native_release_psl_irq(struct cxl_afu *afu) -{ - if (afu->native->psl_virq == 0 || - afu->native->psl_virq != - irq_find_mapping(NULL, afu->native->psl_hwirq)) - return; - - cxl_unmap_irq(afu->native->psl_virq, afu); - cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq); - kfree(afu->psl_irq_name); - afu->native->psl_virq = 0; -} - -static void recover_psl_err(struct cxl_afu *afu, u64 errstat) -{ - u64 dsisr; - - pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat); - - /* Clear PSL_DSISR[PE] */ - dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE); - - /* Write 1s to clear error status bits */ - cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); -} - -static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) -{ - trace_cxl_psl_irq_ack(ctx, tfc); - if (tfc) - cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc); - if (psl_reset_mask) - recover_psl_err(ctx->afu, psl_reset_mask); - - return 0; -} - -int cxl_check_error(struct cxl_afu *afu) -{ - return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); -} - -static bool native_support_attributes(const char *attr_name, - enum cxl_attrs type) -{ - return true; -} - -static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) -{ - if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) - return -EIO; - if (unlikely(off >= afu->crs_len)) - return -ERANGE; - *out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset + - (cr * afu->crs_len) + off); - return 0; -} - -static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) -{ - if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) - return -EIO; - if (unlikely(off >= afu->crs_len)) - return -ERANGE; - *out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset + - (cr * afu->crs_len) + off); - return 0; -} - -static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - int rc; - - rc = native_afu_cr_read32(afu, cr, aligned_off, &val); - if (!rc) - *out = (val >> ((off & 0x3) * 8)) & 0xffff; - return rc; -} - -static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - int rc; - - rc = native_afu_cr_read32(afu, cr, aligned_off, &val); - if (!rc) - *out = (val >> ((off & 0x3) * 8)) & 0xff; - return rc; -} - -static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) -{ - if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) - return -EIO; - if (unlikely(off >= afu->crs_len)) - return -ERANGE; - out_le32(afu->native->afu_desc_mmio + afu->crs_offset + - (cr * afu->crs_len) + off, in); - return 0; -} - -static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) -{ - u64 aligned_off = off & ~0x3L; - u32 val32, mask, shift; - int rc; - - rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); - if (rc) - return rc; - shift = (off & 0x3) * 8; - WARN_ON(shift == 24); - mask = 0xffff << shift; - val32 = (val32 & ~mask) | (in << shift); - - rc = native_afu_cr_write32(afu, cr, aligned_off, val32); - return rc; -} - -static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) -{ - u64 aligned_off = off & ~0x3L; - u32 val32, mask, shift; - int rc; - - rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); - if (rc) - return rc; - shift = (off & 0x3) * 8; - mask = 0xff << shift; - val32 = (val32 & ~mask) | (in << shift); - - rc = native_afu_cr_write32(afu, cr, aligned_off, val32); - return rc; -} - -const struct cxl_backend_ops cxl_native_ops = { - .module = THIS_MODULE, - .adapter_reset = cxl_pci_reset, - .alloc_one_irq = cxl_pci_alloc_one_irq, - .release_one_irq = cxl_pci_release_one_irq, - .alloc_irq_ranges = cxl_pci_alloc_irq_ranges, - .release_irq_ranges = cxl_pci_release_irq_ranges, - .setup_irq = cxl_pci_setup_irq, - .handle_psl_slice_error = native_handle_psl_slice_error, - .psl_interrupt = NULL, - .ack_irq = native_ack_irq, - .irq_wait = native_irq_wait, - .attach_process = native_attach_process, - .detach_process = native_detach_process, - .update_ivtes = native_update_ivtes, - .support_attributes = native_support_attributes, - .link_ok = cxl_adapter_link_ok, - .release_afu = cxl_pci_release_afu, - .afu_read_err_buffer = cxl_pci_afu_read_err_buffer, - .afu_check_and_enable = native_afu_check_and_enable, - .afu_activate_mode = native_afu_activate_mode, - .afu_deactivate_mode = native_afu_deactivate_mode, - .afu_reset = native_afu_reset, - .afu_cr_read8 = native_afu_cr_read8, - .afu_cr_read16 = native_afu_cr_read16, - .afu_cr_read32 = native_afu_cr_read32, - .afu_cr_read64 = native_afu_cr_read64, - .afu_cr_write8 = native_afu_cr_write8, - .afu_cr_write16 = native_afu_cr_write16, - .afu_cr_write32 = native_afu_cr_write32, - .read_adapter_vpd = cxl_pci_read_adapter_vpd, -}; diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c deleted file mode 100644 index e26ee85279fa..000000000000 --- a/drivers/misc/cxl/of.c +++ /dev/null @@ -1,346 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2015 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" - -static int read_phys_addr(struct device_node *np, char *prop_name, - struct cxl_afu *afu) -{ - int i, len, entry_size, naddr, nsize, type; - u64 addr, size; - const __be32 *prop; - - naddr = of_n_addr_cells(np); - nsize = of_n_size_cells(np); - - prop = of_get_property(np, prop_name, &len); - if (prop) { - entry_size = naddr + nsize; - for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) { - type = be32_to_cpu(prop[0]); - addr = of_read_number(prop, naddr); - size = of_read_number(&prop[naddr], nsize); - switch (type) { - case 0: /* unit address */ - afu->guest->handle = addr; - break; - case 1: /* p2 area */ - afu->guest->p2n_phys += addr; - afu->guest->p2n_size = size; - break; - case 2: /* problem state area */ - afu->psn_phys += addr; - afu->adapter->ps_size = size; - break; - default: - pr_err("Invalid address type %d found in %s property of AFU\n", - type, prop_name); - return -EINVAL; - } - } - } - return 0; -} - -static int read_vpd(struct cxl *adapter, struct cxl_afu *afu) -{ - char vpd[256]; - int rc; - size_t len = sizeof(vpd); - - memset(vpd, 0, len); - - if (adapter) - rc = cxl_guest_read_adapter_vpd(adapter, vpd, len); - else - rc = cxl_guest_read_afu_vpd(afu, vpd, len); - - if (rc > 0) { - cxl_dump_debug_buffer(vpd, rc); - rc = 0; - } - return rc; -} - -int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np) -{ - return of_property_read_reg(afu_np, 0, &afu->guest->handle, NULL); -} - -int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np) -{ - int i, rc; - u16 device_id, vendor_id; - u32 val = 0, class_code; - - /* Properties are read in the same order as listed in PAPR */ - - rc = read_phys_addr(np, "reg", afu); - if (rc) - return rc; - - rc = read_phys_addr(np, "assigned-addresses", afu); - if (rc) - return rc; - - if (afu->psn_phys == 0) - afu->psa = false; - else - afu->psa = true; - - of_property_read_u32(np, "ibm,#processes", &afu->max_procs_virtualised); - - if (cxl_verbose) - read_vpd(NULL, afu); - - of_property_read_u32(np, "ibm,max-ints-per-process", &afu->guest->max_ints); - afu->irqs_max = afu->guest->max_ints; - - if (!of_property_read_u32(np, "ibm,min-ints-per-process", &afu->pp_irqs)) { - /* One extra interrupt for the PSL interrupt is already - * included. Remove it now to keep only AFU interrupts and - * match the native case. - */ - afu->pp_irqs--; - } - - of_property_read_u64(np, "ibm,error-buffer-size", &afu->eb_len); - afu->eb_offset = 0; - - of_property_read_u64(np, "ibm,config-record-size", &afu->crs_len); - afu->crs_offset = 0; - - of_property_read_u32(np, "ibm,#config-records", &afu->crs_num); - - if (cxl_verbose) { - for (i = 0; i < afu->crs_num; i++) { - rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID, - &device_id); - if (!rc) - pr_info("record %d - device-id: %#x\n", - i, device_id); - rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID, - &vendor_id); - if (!rc) - pr_info("record %d - vendor-id: %#x\n", - i, vendor_id); - rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION, - &class_code); - if (!rc) { - class_code >>= 8; - pr_info("record %d - class-code: %#x\n", - i, class_code); - } - } - } - /* - * if "ibm,process-mmio" doesn't exist then per-process mmio is - * not supported - */ - val = 0; - if (!of_property_read_u32(np, "ibm,process-mmio", &val) && val == 1) - afu->pp_psa = true; - else - afu->pp_psa = false; - - if (!of_property_read_u32(np, "ibm,function-error-interrupt", &val)) - afu->serr_hwirq = val; - - pr_devel("AFU handle: %#llx\n", afu->guest->handle); - pr_devel("p2n_phys: %#llx (size %#llx)\n", - afu->guest->p2n_phys, afu->guest->p2n_size); - pr_devel("psn_phys: %#llx (size %#llx)\n", - afu->psn_phys, afu->adapter->ps_size); - pr_devel("Max number of processes virtualised=%i\n", - afu->max_procs_virtualised); - pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs, - afu->irqs_max); - pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq); - - return 0; -} - -static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np) -{ - const __be32 *ranges; - int len, nranges, i; - struct irq_avail *cur; - - ranges = of_get_property(np, "interrupt-ranges", &len); - if (ranges == NULL || len < (2 * sizeof(int))) - return -EINVAL; - - /* - * encoded array of two cells per entry, each cell encoded as - * with encode-int - */ - nranges = len / (2 * sizeof(int)); - if (nranges == 0 || (nranges * 2 * sizeof(int)) != len) - return -EINVAL; - - adapter->guest->irq_avail = kcalloc(nranges, sizeof(struct irq_avail), - GFP_KERNEL); - if (adapter->guest->irq_avail == NULL) - return -ENOMEM; - - adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]); - for (i = 0; i < nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - cur->offset = be32_to_cpu(ranges[i * 2]); - cur->range = be32_to_cpu(ranges[i * 2 + 1]); - cur->bitmap = bitmap_zalloc(cur->range, GFP_KERNEL); - if (cur->bitmap == NULL) - goto err; - if (cur->offset < adapter->guest->irq_base_offset) - adapter->guest->irq_base_offset = cur->offset; - if (cxl_verbose) - pr_info("available IRQ range: %#lx-%#lx (%lu)\n", - cur->offset, cur->offset + cur->range - 1, - cur->range); - } - adapter->guest->irq_nranges = nranges; - spin_lock_init(&adapter->guest->irq_alloc_lock); - - return 0; -err: - for (i--; i >= 0; i--) { - cur = &adapter->guest->irq_avail[i]; - bitmap_free(cur->bitmap); - } - kfree(adapter->guest->irq_avail); - adapter->guest->irq_avail = NULL; - return -ENOMEM; -} - -int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np) -{ - return of_property_read_reg(np, 0, &adapter->guest->handle, NULL); -} - -int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np) -{ - int rc; - const char *p; - u32 val = 0; - - /* Properties are read in the same order as listed in PAPR */ - - if ((rc = read_adapter_irq_config(adapter, np))) - return rc; - - if (!of_property_read_u32(np, "ibm,caia-version", &val)) { - adapter->caia_major = (val & 0xFF00) >> 8; - adapter->caia_minor = val & 0xFF; - } - - if (!of_property_read_u32(np, "ibm,psl-revision", &val)) - adapter->psl_rev = val; - - if (!of_property_read_string(np, "status", &p)) { - adapter->guest->status = kasprintf(GFP_KERNEL, "%s", p); - if (adapter->guest->status == NULL) - return -ENOMEM; - } - - if (!of_property_read_u32(np, "vendor-id", &val)) - adapter->guest->vendor = val; - - if (!of_property_read_u32(np, "device-id", &val)) - adapter->guest->device = val; - - if (!of_property_read_u32(np, "subsystem-vendor-id", &val)) - adapter->guest->subsystem_vendor = val; - - if (!of_property_read_u32(np, "subsystem-id", &val)) - adapter->guest->subsystem = val; - - if (cxl_verbose) - read_vpd(adapter, NULL); - - return 0; -} - -static void cxl_of_remove(struct platform_device *pdev) -{ - struct cxl *adapter; - int afu; - - adapter = dev_get_drvdata(&pdev->dev); - for (afu = 0; afu < adapter->slices; afu++) - cxl_guest_remove_afu(adapter->afu[afu]); - - cxl_guest_remove_adapter(adapter); -} - -static void cxl_of_shutdown(struct platform_device *pdev) -{ - cxl_of_remove(pdev); -} - -int cxl_of_probe(struct platform_device *pdev) -{ - struct device_node *np = NULL; - struct device_node *afu_np = NULL; - struct cxl *adapter = NULL; - int ret; - int slice = 0, slice_ok = 0; - - dev_err_once(&pdev->dev, "DEPRECATION: cxl is deprecated and will be removed in a future kernel release\n"); - - pr_devel("in %s\n", __func__); - - np = pdev->dev.of_node; - if (np == NULL) - return -ENODEV; - - /* init adapter */ - adapter = cxl_guest_init_adapter(np, pdev); - if (IS_ERR(adapter)) { - dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter)); - return PTR_ERR(adapter); - } - - /* init afu */ - for_each_child_of_node(np, afu_np) { - if ((ret = cxl_guest_init_afu(adapter, slice, afu_np))) - dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n", - slice, ret); - else - slice_ok++; - slice++; - } - - if (slice_ok == 0) { - dev_info(&pdev->dev, "No active AFU"); - adapter->slices = 0; - } - - return 0; -} - -static const struct of_device_id cxl_of_match[] = { - { .compatible = "ibm,coherent-platform-facility",}, - {}, -}; -MODULE_DEVICE_TABLE(of, cxl_of_match); - -struct platform_driver cxl_of_driver = { - .driver = { - .name = "cxl_of", - .of_match_table = cxl_of_match, - .owner = THIS_MODULE - }, - .probe = cxl_of_probe, - .remove = cxl_of_remove, - .shutdown = cxl_of_shutdown, -}; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c deleted file mode 100644 index 92bf7c5c7b35..000000000000 --- a/drivers/misc/cxl/pci.c +++ /dev/null @@ -1,2103 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxl.h" -#include - - -#define CXL_PCI_VSEC_ID 0x1280 -#define CXL_VSEC_MIN_SIZE 0x80 - -#define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \ - { \ - pci_read_config_word(dev, vsec + 0x6, dest); \ - *dest >>= 4; \ - } -#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0x8, dest) - -#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0x9, dest) -#define CXL_STATUS_SECOND_PORT 0x80 -#define CXL_STATUS_MSI_X_FULL 0x40 -#define CXL_STATUS_MSI_X_SINGLE 0x20 -#define CXL_STATUS_FLASH_RW 0x08 -#define CXL_STATUS_FLASH_RO 0x04 -#define CXL_STATUS_LOADABLE_AFU 0x02 -#define CXL_STATUS_LOADABLE_PSL 0x01 -/* If we see these features we won't try to use the card */ -#define CXL_UNSUPPORTED_FEATURES \ - (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE) - -#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0xa, dest) -#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ - pci_write_config_byte(dev, vsec + 0xa, val) -#define CXL_VSEC_PROTOCOL_MASK 0xe0 -#define CXL_VSEC_PROTOCOL_1024TB 0x80 -#define CXL_VSEC_PROTOCOL_512TB 0x40 -#define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8/9 uses this */ -#define CXL_VSEC_PROTOCOL_ENABLE 0x01 - -#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \ - pci_read_config_word(dev, vsec + 0xc, dest) -#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0xe, dest) -#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0xf, dest) -#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \ - pci_read_config_word(dev, vsec + 0x10, dest) - -#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \ - pci_read_config_byte(dev, vsec + 0x13, dest) -#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \ - pci_write_config_byte(dev, vsec + 0x13, val) -#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */ -#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */ -#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */ - -#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \ - pci_read_config_dword(dev, vsec + 0x20, dest) -#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \ - pci_read_config_dword(dev, vsec + 0x24, dest) -#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \ - pci_read_config_dword(dev, vsec + 0x28, dest) -#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \ - pci_read_config_dword(dev, vsec + 0x2c, dest) - - -/* This works a little different than the p1/p2 register accesses to make it - * easier to pull out individual fields */ -#define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off) -#define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off) -#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) -#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) - -#define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0) -#define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15) -#define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31) -#define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47) -#define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48) -#define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55) -#define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59) -#define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61) -#define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63) -#define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20) -#define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) -#define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28) -#define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30) -#define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6) -#define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7) -#define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) -#define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38) -#define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40) -#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) -#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) - -static const struct pci_device_id cxl_pci_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), }, - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), }, - { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), }, - { } -}; -MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); - - -/* - * Mostly using these wrappers to avoid confusion: - * priv 1 is BAR2, while priv 2 is BAR0 - */ -static inline resource_size_t p1_base(struct pci_dev *dev) -{ - return pci_resource_start(dev, 2); -} - -static inline resource_size_t p1_size(struct pci_dev *dev) -{ - return pci_resource_len(dev, 2); -} - -static inline resource_size_t p2_base(struct pci_dev *dev) -{ - return pci_resource_start(dev, 0); -} - -static inline resource_size_t p2_size(struct pci_dev *dev) -{ - return pci_resource_len(dev, 0); -} - -static int find_cxl_vsec(struct pci_dev *dev) -{ - return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID); -} - -static void dump_cxl_config_space(struct pci_dev *dev) -{ - int vsec; - u32 val; - - dev_info(&dev->dev, "dump_cxl_config_space\n"); - - pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val); - dev_info(&dev->dev, "BAR0: %#.8x\n", val); - pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val); - dev_info(&dev->dev, "BAR1: %#.8x\n", val); - pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val); - dev_info(&dev->dev, "BAR2: %#.8x\n", val); - pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val); - dev_info(&dev->dev, "BAR3: %#.8x\n", val); - pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val); - dev_info(&dev->dev, "BAR4: %#.8x\n", val); - pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val); - dev_info(&dev->dev, "BAR5: %#.8x\n", val); - - dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", - p1_base(dev), p1_size(dev)); - dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", - p2_base(dev), p2_size(dev)); - dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", - pci_resource_start(dev, 4), pci_resource_len(dev, 4)); - - if (!(vsec = find_cxl_vsec(dev))) - return; - -#define show_reg(name, what) \ - dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what) - - pci_read_config_dword(dev, vsec + 0x0, &val); - show_reg("Cap ID", (val >> 0) & 0xffff); - show_reg("Cap Ver", (val >> 16) & 0xf); - show_reg("Next Cap Ptr", (val >> 20) & 0xfff); - pci_read_config_dword(dev, vsec + 0x4, &val); - show_reg("VSEC ID", (val >> 0) & 0xffff); - show_reg("VSEC Rev", (val >> 16) & 0xf); - show_reg("VSEC Length", (val >> 20) & 0xfff); - pci_read_config_dword(dev, vsec + 0x8, &val); - show_reg("Num AFUs", (val >> 0) & 0xff); - show_reg("Status", (val >> 8) & 0xff); - show_reg("Mode Control", (val >> 16) & 0xff); - show_reg("Reserved", (val >> 24) & 0xff); - pci_read_config_dword(dev, vsec + 0xc, &val); - show_reg("PSL Rev", (val >> 0) & 0xffff); - show_reg("CAIA Ver", (val >> 16) & 0xffff); - pci_read_config_dword(dev, vsec + 0x10, &val); - show_reg("Base Image Rev", (val >> 0) & 0xffff); - show_reg("Reserved", (val >> 16) & 0x0fff); - show_reg("Image Control", (val >> 28) & 0x3); - show_reg("Reserved", (val >> 30) & 0x1); - show_reg("Image Loaded", (val >> 31) & 0x1); - - pci_read_config_dword(dev, vsec + 0x14, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x18, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x1c, &val); - show_reg("Reserved", val); - - pci_read_config_dword(dev, vsec + 0x20, &val); - show_reg("AFU Descriptor Offset", val); - pci_read_config_dword(dev, vsec + 0x24, &val); - show_reg("AFU Descriptor Size", val); - pci_read_config_dword(dev, vsec + 0x28, &val); - show_reg("Problem State Offset", val); - pci_read_config_dword(dev, vsec + 0x2c, &val); - show_reg("Problem State Size", val); - - pci_read_config_dword(dev, vsec + 0x30, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x34, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x38, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x3c, &val); - show_reg("Reserved", val); - - pci_read_config_dword(dev, vsec + 0x40, &val); - show_reg("PSL Programming Port", val); - pci_read_config_dword(dev, vsec + 0x44, &val); - show_reg("PSL Programming Control", val); - - pci_read_config_dword(dev, vsec + 0x48, &val); - show_reg("Reserved", val); - pci_read_config_dword(dev, vsec + 0x4c, &val); - show_reg("Reserved", val); - - pci_read_config_dword(dev, vsec + 0x50, &val); - show_reg("Flash Address Register", val); - pci_read_config_dword(dev, vsec + 0x54, &val); - show_reg("Flash Size Register", val); - pci_read_config_dword(dev, vsec + 0x58, &val); - show_reg("Flash Status/Control Register", val); - pci_read_config_dword(dev, vsec + 0x58, &val); - show_reg("Flash Data Port", val); - -#undef show_reg -} - -static void dump_afu_descriptor(struct cxl_afu *afu) -{ - u64 val, afu_cr_num, afu_cr_off, afu_cr_len; - int i; - -#define show_reg(name, what) \ - dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) - - val = AFUD_READ_INFO(afu); - show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val)); - show_reg("num_of_processes", AFUD_NUM_PROCS(val)); - show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); - show_reg("req_prog_mode", val & 0xffffULL); - afu_cr_num = AFUD_NUM_CRS(val); - - val = AFUD_READ(afu, 0x8); - show_reg("Reserved", val); - val = AFUD_READ(afu, 0x10); - show_reg("Reserved", val); - val = AFUD_READ(afu, 0x18); - show_reg("Reserved", val); - - val = AFUD_READ_CR(afu); - show_reg("Reserved", (val >> (63-7)) & 0xff); - show_reg("AFU_CR_len", AFUD_CR_LEN(val)); - afu_cr_len = AFUD_CR_LEN(val) * 256; - - val = AFUD_READ_CR_OFF(afu); - afu_cr_off = val; - show_reg("AFU_CR_offset", val); - - val = AFUD_READ_PPPSA(afu); - show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff); - show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val)); - - val = AFUD_READ_PPPSA_OFF(afu); - show_reg("PerProcessPSA_offset", val); - - val = AFUD_READ_EB(afu); - show_reg("Reserved", (val >> (63-7)) & 0xff); - show_reg("AFU_EB_len", AFUD_EB_LEN(val)); - - val = AFUD_READ_EB_OFF(afu); - show_reg("AFU_EB_offset", val); - - for (i = 0; i < afu_cr_num; i++) { - val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len); - show_reg("CR Vendor", val & 0xffff); - show_reg("CR Device", (val >> 16) & 0xffff); - } -#undef show_reg -} - -#define P8_CAPP_UNIT0_ID 0xBA -#define P8_CAPP_UNIT1_ID 0XBE -#define P9_CAPP_UNIT0_ID 0xC0 -#define P9_CAPP_UNIT1_ID 0xE0 - -static int get_phb_index(struct device_node *np, u32 *phb_index) -{ - if (of_property_read_u32(np, "ibm,phb-index", phb_index)) - return -ENODEV; - return 0; -} - -static u64 get_capp_unit_id(struct device_node *np, u32 phb_index) -{ - /* - * POWER 8: - * - For chips other than POWER8NVL, we only have CAPP 0, - * irrespective of which PHB is used. - * - For POWER8NVL, assume CAPP 0 is attached to PHB0 and - * CAPP 1 is attached to PHB1. - */ - if (cxl_is_power8()) { - if (!pvr_version_is(PVR_POWER8NVL)) - return P8_CAPP_UNIT0_ID; - - if (phb_index == 0) - return P8_CAPP_UNIT0_ID; - - if (phb_index == 1) - return P8_CAPP_UNIT1_ID; - } - - /* - * POWER 9: - * PEC0 (PHB0). Capp ID = CAPP0 (0b1100_0000) - * PEC1 (PHB1 - PHB2). No capi mode - * PEC2 (PHB3 - PHB4 - PHB5): Capi mode on PHB3 only. Capp ID = CAPP1 (0b1110_0000) - */ - if (cxl_is_power9()) { - if (phb_index == 0) - return P9_CAPP_UNIT0_ID; - - if (phb_index == 3) - return P9_CAPP_UNIT1_ID; - } - - return 0; -} - -int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, - u32 *phb_index, u64 *capp_unit_id) -{ - int rc; - struct device_node *np; - u32 id; - - if (!(np = pnv_pci_get_phb_node(dev))) - return -ENODEV; - - while (np && of_property_read_u32(np, "ibm,chip-id", &id)) - np = of_get_next_parent(np); - if (!np) - return -ENODEV; - - *chipid = id; - - rc = get_phb_index(np, phb_index); - if (rc) { - pr_err("cxl: invalid phb index\n"); - of_node_put(np); - return rc; - } - - *capp_unit_id = get_capp_unit_id(np, *phb_index); - of_node_put(np); - if (!*capp_unit_id) { - pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", - *chipid, *phb_index); - return -ENODEV; - } - - return 0; -} - -static DEFINE_MUTEX(indications_mutex); - -static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind, - u64 *nbwind) -{ - static u32 val[3]; - struct device_node *np; - - mutex_lock(&indications_mutex); - if (!val[0]) { - if (!(np = pnv_pci_get_phb_node(dev))) { - mutex_unlock(&indications_mutex); - return -ENODEV; - } - - if (of_property_read_u32_array(np, "ibm,phb-indications", val, 3)) { - val[2] = 0x0300UL; /* legacy values */ - val[1] = 0x0400UL; - val[0] = 0x0200UL; - } - of_node_put(np); - } - *capiind = val[0]; - *asnind = val[1]; - *nbwind = val[2]; - mutex_unlock(&indications_mutex); - return 0; -} - -int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) -{ - u64 xsl_dsnctl; - u64 capiind, asnind, nbwind; - - /* - * CAPI Identifier bits [0:7] - * bit 61:60 MSI bits --> 0 - * bit 59 TVT selector --> 0 - */ - if (get_phb_indications(dev, &capiind, &asnind, &nbwind)) - return -ENODEV; - - /* - * Tell XSL where to route data to. - * The field chipid should match the PHB CAPI_CMPM register - */ - xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */ - xsl_dsnctl |= (capp_unit_id << (63-15)); - - /* nMMU_ID Defaults to: b’000001001’*/ - xsl_dsnctl |= ((u64)0x09 << (63-28)); - - /* - * Used to identify CAPI packets which should be sorted into - * the Non-Blocking queues by the PHB. This field should match - * the PHB PBL_NBW_CMPM register - * nbwind=0x03, bits [57:58], must include capi indicator. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= (nbwind << (63-55)); - - /* - * Upper 16b address bits of ASB_Notify messages sent to the - * system. Need to match the PHB’s ASN Compare/Mask Register. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= asnind; - - *reg = xsl_dsnctl; - return 0; -} - -static int init_implementation_adapter_regs_psl9(struct cxl *adapter, - struct pci_dev *dev) -{ - u64 xsl_dsnctl, psl_fircntl; - u64 chipid; - u32 phb_index; - u64 capp_unit_id; - u64 psl_debug; - int rc; - - rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); - if (rc) - return rc; - - rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl); - if (rc) - return rc; - - cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl); - - /* Set fir_cntl to recommended value for production env */ - psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ - psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ - psl_fircntl |= 0x1ULL; /* ce_thresh */ - cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl); - - /* Setup the PSL to transmit packets on the PCIe before the - * CAPP is enabled. Make sure that CAPP virtual machines are disabled - */ - cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000012A10ULL); - - /* - * A response to an ASB_Notify request is returned by the - * system as an MMIO write to the address defined in - * the PSL_TNR_ADDR register. - * keep the Reset Value: 0x00020000E0000000 - */ - - /* Enable XSL rty limit */ - cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL); - - /* Change XSL_INV dummy read threshold */ - cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL); - - if (phb_index == 3) { - /* disable machines 31-47 and 20-27 for DMA */ - cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL); - } - - /* Snoop machines */ - cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL); - - /* Enable NORST and DD2 features */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); - - /* - * Check if PSL has data-cache. We need to flush adapter datacache - * when as its about to be removed. - */ - psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG); - if (psl_debug & CXL_PSL_DEBUG_CDC) { - dev_dbg(&dev->dev, "No data-cache present\n"); - adapter->native->no_data_cache = true; - } - - return 0; -} - -static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci_dev *dev) -{ - u64 psl_dsnctl, psl_fircntl; - u64 chipid; - u32 phb_index; - u64 capp_unit_id; - int rc; - - rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); - if (rc) - return rc; - - psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */ - psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */ - /* Tell PSL where to route data to */ - psl_dsnctl |= (chipid << (63-5)); - psl_dsnctl |= (capp_unit_id << (63-13)); - - cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl); - cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); - /* snoop write mask */ - cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); - /* set fir_cntl to recommended value for production env */ - psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ - psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ - psl_fircntl |= 0x1ULL; /* ce_thresh */ - cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl); - /* for debugging with trace arrays */ - cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); - - return 0; -} - -/* PSL */ -#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) -#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) -/* For the PSL this is a multiple for 0 < n <= 7: */ -#define PSL_2048_250MHZ_CYCLES 1 - -static void write_timebase_ctrl_psl8(struct cxl *adapter) -{ - cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, - TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); -} - -static u64 timebase_read_psl9(struct cxl *adapter) -{ - return cxl_p1_read(adapter, CXL_PSL9_Timebase); -} - -static u64 timebase_read_psl8(struct cxl *adapter) -{ - return cxl_p1_read(adapter, CXL_PSL_Timebase); -} - -static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) -{ - struct device_node *np; - - adapter->psl_timebase_synced = false; - - if (!(np = pnv_pci_get_phb_node(dev))) - return; - - /* Do not fail when CAPP timebase sync is not supported by OPAL */ - of_node_get(np); - if (!of_property_present(np, "ibm,capp-timebase-sync")) { - of_node_put(np); - dev_info(&dev->dev, "PSL timebase inactive: OPAL support missing\n"); - return; - } - of_node_put(np); - - /* - * Setup PSL Timebase Control and Status register - * with the recommended Timebase Sync Count value - */ - if (adapter->native->sl_ops->write_timebase_ctrl) - adapter->native->sl_ops->write_timebase_ctrl(adapter); - - /* Enable PSL Timebase */ - cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); - cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); - - return; -} - -static int init_implementation_afu_regs_psl9(struct cxl_afu *afu) -{ - return 0; -} - -static int init_implementation_afu_regs_psl8(struct cxl_afu *afu) -{ - /* read/write masks for this slice */ - cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); - /* APC read/write masks for this slice */ - cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); - /* for debugging with trace arrays */ - cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); - cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S); - - return 0; -} - -int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, - unsigned int virq) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); -} - -int cxl_update_image_control(struct cxl *adapter) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - int rc; - int vsec; - u8 image_state; - - if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); - return -ENODEV; - } - - if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) { - dev_err(&dev->dev, "failed to read image state: %i\n", rc); - return rc; - } - - if (adapter->perst_loads_image) - image_state |= CXL_VSEC_PERST_LOADS_IMAGE; - else - image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE; - - if (adapter->perst_select_user) - image_state |= CXL_VSEC_PERST_SELECT_USER; - else - image_state &= ~CXL_VSEC_PERST_SELECT_USER; - - if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) { - dev_err(&dev->dev, "failed to update image control: %i\n", rc); - return rc; - } - - return 0; -} - -int cxl_pci_alloc_one_irq(struct cxl *adapter) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - return pnv_cxl_alloc_hwirqs(dev, 1); -} - -void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - return pnv_cxl_release_hwirqs(dev, hwirq, 1); -} - -int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, - struct cxl *adapter, unsigned int num) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); -} - -void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, - struct cxl *adapter) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - pnv_cxl_release_hwirq_ranges(irqs, dev); -} - -static int setup_cxl_bars(struct pci_dev *dev) -{ - /* Safety check in case we get backported to < 3.17 without M64 */ - if ((p1_base(dev) < 0x100000000ULL) || - (p2_base(dev) < 0x100000000ULL)) { - dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n"); - return -ENODEV; - } - - /* - * BAR 4/5 has a special meaning for CXL and must be programmed with a - * special value corresponding to the CXL protocol address range. - * For POWER 8/9 that means bits 48:49 must be set to 10 - */ - pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000); - pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000); - - return 0; -} - -/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ -static int switch_card_to_cxl(struct pci_dev *dev) -{ - int vsec; - u8 val; - int rc; - - dev_info(&dev->dev, "switch card to CXL\n"); - - if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); - return -ENODEV; - } - - if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { - dev_err(&dev->dev, "failed to read current mode control: %i", rc); - return rc; - } - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; - if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { - dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); - return rc; - } - /* - * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states - * we must wait 100ms after this mode switch before touching - * PCIe config space. - */ - msleep(100); - - return 0; -} - -static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) -{ - u64 p1n_base, p2n_base, afu_desc; - const u64 p1n_size = 0x100; - const u64 p2n_size = 0x1000; - - p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); - p2n_base = p2_base(dev) + (afu->slice * p2n_size); - afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size)); - afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size); - - if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size))) - goto err; - if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) - goto err1; - if (afu_desc) { - if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size))) - goto err2; - } - - return 0; -err2: - iounmap(afu->p2n_mmio); -err1: - iounmap(afu->native->p1n_mmio); -err: - dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); - return -ENOMEM; -} - -static void pci_unmap_slice_regs(struct cxl_afu *afu) -{ - if (afu->p2n_mmio) { - iounmap(afu->p2n_mmio); - afu->p2n_mmio = NULL; - } - if (afu->native->p1n_mmio) { - iounmap(afu->native->p1n_mmio); - afu->native->p1n_mmio = NULL; - } - if (afu->native->afu_desc_mmio) { - iounmap(afu->native->afu_desc_mmio); - afu->native->afu_desc_mmio = NULL; - } -} - -void cxl_pci_release_afu(struct device *dev) -{ - struct cxl_afu *afu = to_cxl_afu(dev); - - pr_devel("%s\n", __func__); - - idr_destroy(&afu->contexts_idr); - cxl_release_spa(afu); - - kfree(afu->native); - kfree(afu); -} - -/* Expects AFU struct to have recently been zeroed out */ -static int cxl_read_afu_descriptor(struct cxl_afu *afu) -{ - u64 val; - - val = AFUD_READ_INFO(afu); - afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); - afu->max_procs_virtualised = AFUD_NUM_PROCS(val); - afu->crs_num = AFUD_NUM_CRS(val); - - if (AFUD_AFU_DIRECTED(val)) - afu->modes_supported |= CXL_MODE_DIRECTED; - if (AFUD_DEDICATED_PROCESS(val)) - afu->modes_supported |= CXL_MODE_DEDICATED; - if (AFUD_TIME_SLICED(val)) - afu->modes_supported |= CXL_MODE_TIME_SLICED; - - val = AFUD_READ_PPPSA(afu); - afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; - afu->psa = AFUD_PPPSA_PSA(val); - if ((afu->pp_psa = AFUD_PPPSA_PP(val))) - afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu); - - val = AFUD_READ_CR(afu); - afu->crs_len = AFUD_CR_LEN(val) * 256; - afu->crs_offset = AFUD_READ_CR_OFF(afu); - - - /* eb_len is in multiple of 4K */ - afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096; - afu->eb_offset = AFUD_READ_EB_OFF(afu); - - /* eb_off is 4K aligned so lower 12 bits are always zero */ - if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) { - dev_warn(&afu->dev, - "Invalid AFU error buffer offset %Lx\n", - afu->eb_offset); - dev_info(&afu->dev, - "Ignoring AFU error buffer in the descriptor\n"); - /* indicate that no afu buffer exists */ - afu->eb_len = 0; - } - - return 0; -} - -static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) -{ - int i, rc; - u32 val; - - if (afu->psa && afu->adapter->ps_size < - (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { - dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); - return -ENODEV; - } - - if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) - dev_warn(&afu->dev, "AFU uses pp_size(%#016llx) < PAGE_SIZE per-process PSA!\n", afu->pp_size); - - for (i = 0; i < afu->crs_num; i++) { - rc = cxl_ops->afu_cr_read32(afu, i, 0, &val); - if (rc || val == 0) { - dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); - return -EINVAL; - } - } - - if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) { - /* - * We could also check this for the dedicated process model - * since the architecture indicates it should be set to 1, but - * in that case we ignore the value and I'd rather not risk - * breaking any existing dedicated process AFUs that left it as - * 0 (not that I'm aware of any). It is clearly an error for an - * AFU directed AFU to set this to 0, and would have previously - * triggered a bug resulting in the maximum not being enforced - * at all since idr_alloc treats 0 as no maximum. - */ - dev_err(&afu->dev, "AFU does not support any processes\n"); - return -EINVAL; - } - - return 0; -} - -static int sanitise_afu_regs_psl9(struct cxl_afu *afu) -{ - u64 reg; - - /* - * Clear out any regs that contain either an IVTE or address or may be - * waiting on an acknowledgment to try to be a bit safer as we bring - * it online - */ - reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { - dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); - if (cxl_ops->afu_reset(afu)) - return -EIO; - if (cxl_afu_disable(afu)) - return -EIO; - if (cxl_psl_purge(afu)) - return -EIO; - } - cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); - reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - if (reg) { - dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); - if (reg & CXL_PSL9_DSISR_An_TF) - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - else - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - } - if (afu->adapter->native->sl_ops->register_serr_irq) { - reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); - if (reg) { - if (reg & ~0x000000007fffffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); - cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); - } - } - reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - if (reg) { - dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); - cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); - } - - return 0; -} - -static int sanitise_afu_regs_psl8(struct cxl_afu *afu) -{ - u64 reg; - - /* - * Clear out any regs that contain either an IVTE or address or may be - * waiting on an acknowledgement to try to be a bit safer as we bring - * it online - */ - reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); - if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { - dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); - if (cxl_ops->afu_reset(afu)) - return -EIO; - if (cxl_afu_disable(afu)) - return -EIO; - if (cxl_psl_purge(afu)) - return -EIO; - } - cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000); - cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000); - cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000); - cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000); - cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000); - cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000); - cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); - reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - if (reg) { - dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); - if (reg & CXL_PSL_DSISR_TRANS) - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - else - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - } - if (afu->adapter->native->sl_ops->register_serr_irq) { - reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); - if (reg) { - if (reg & ~0xffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); - cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); - } - } - reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - if (reg) { - dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); - cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); - } - - return 0; -} - -#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE -/* - * afu_eb_read: - * Called from sysfs and reads the afu error info buffer. The h/w only supports - * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte - * aligned the function uses a bounce buffer which can be max PAGE_SIZE. - */ -ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, - loff_t off, size_t count) -{ - loff_t aligned_start, aligned_end; - size_t aligned_length; - void *tbuf; - const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset; - - if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) - return 0; - - /* calculate aligned read window */ - count = min((size_t)(afu->eb_len - off), count); - aligned_start = round_down(off, 8); - aligned_end = round_up(off + count, 8); - aligned_length = aligned_end - aligned_start; - - /* max we can copy in one read is PAGE_SIZE */ - if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) { - aligned_length = ERR_BUFF_MAX_COPY_SIZE; - count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); - } - - /* use bounce buffer for copy */ - tbuf = (void *)__get_free_page(GFP_KERNEL); - if (!tbuf) - return -ENOMEM; - - /* perform aligned read from the mmio region */ - memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length); - memcpy(buf, tbuf + (off & 0x7), count); - - free_page((unsigned long)tbuf); - - return count; -} - -static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) -{ - int rc; - - if ((rc = pci_map_slice_regs(afu, adapter, dev))) - return rc; - - if (adapter->native->sl_ops->sanitise_afu_regs) { - rc = adapter->native->sl_ops->sanitise_afu_regs(afu); - if (rc) - goto err1; - } - - /* We need to reset the AFU before we can read the AFU descriptor */ - if ((rc = cxl_ops->afu_reset(afu))) - goto err1; - - if (cxl_verbose) - dump_afu_descriptor(afu); - - if ((rc = cxl_read_afu_descriptor(afu))) - goto err1; - - if ((rc = cxl_afu_descriptor_looks_ok(afu))) - goto err1; - - if (adapter->native->sl_ops->afu_regs_init) - if ((rc = adapter->native->sl_ops->afu_regs_init(afu))) - goto err1; - - if (adapter->native->sl_ops->register_serr_irq) - if ((rc = adapter->native->sl_ops->register_serr_irq(afu))) - goto err1; - - if ((rc = cxl_native_register_psl_irq(afu))) - goto err2; - - atomic_set(&afu->configured_state, 0); - return 0; - -err2: - if (adapter->native->sl_ops->release_serr_irq) - adapter->native->sl_ops->release_serr_irq(afu); -err1: - pci_unmap_slice_regs(afu); - return rc; -} - -static void pci_deconfigure_afu(struct cxl_afu *afu) -{ - /* - * It's okay to deconfigure when AFU is already locked, otherwise wait - * until there are no readers - */ - if (atomic_read(&afu->configured_state) != -1) { - while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1) - schedule(); - } - cxl_native_release_psl_irq(afu); - if (afu->adapter->native->sl_ops->release_serr_irq) - afu->adapter->native->sl_ops->release_serr_irq(afu); - pci_unmap_slice_regs(afu); -} - -static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) -{ - struct cxl_afu *afu; - int rc = -ENOMEM; - - afu = cxl_alloc_afu(adapter, slice); - if (!afu) - return -ENOMEM; - - afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL); - if (!afu->native) - goto err_free_afu; - - mutex_init(&afu->native->spa_mutex); - - rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); - if (rc) - goto err_free_native; - - rc = pci_configure_afu(afu, adapter, dev); - if (rc) - goto err_free_native; - - /* Don't care if this fails */ - cxl_debugfs_afu_add(afu); - - /* - * After we call this function we must not free the afu directly, even - * if it returns an error! - */ - if ((rc = cxl_register_afu(afu))) - goto err_put_dev; - - if ((rc = cxl_sysfs_afu_add(afu))) - goto err_del_dev; - - adapter->afu[afu->slice] = afu; - - if ((rc = cxl_pci_vphb_add(afu))) - dev_info(&afu->dev, "Can't register vPHB\n"); - - return 0; - -err_del_dev: - device_del(&afu->dev); -err_put_dev: - pci_deconfigure_afu(afu); - cxl_debugfs_afu_remove(afu); - put_device(&afu->dev); - return rc; - -err_free_native: - kfree(afu->native); -err_free_afu: - kfree(afu); - return rc; - -} - -static void cxl_pci_remove_afu(struct cxl_afu *afu) -{ - pr_devel("%s\n", __func__); - - if (!afu) - return; - - cxl_pci_vphb_remove(afu); - cxl_sysfs_afu_remove(afu); - cxl_debugfs_afu_remove(afu); - - spin_lock(&afu->adapter->afu_list_lock); - afu->adapter->afu[afu->slice] = NULL; - spin_unlock(&afu->adapter->afu_list_lock); - - cxl_context_detach_all(afu); - cxl_ops->afu_deactivate_mode(afu, afu->current_mode); - - pci_deconfigure_afu(afu); - device_unregister(&afu->dev); -} - -int cxl_pci_reset(struct cxl *adapter) -{ - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - int rc; - - if (adapter->perst_same_image) { - dev_warn(&dev->dev, - "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); - return -EINVAL; - } - - dev_info(&dev->dev, "CXL reset\n"); - - /* - * The adapter is about to be reset, so ignore errors. - */ - cxl_data_cache_flush(adapter); - - /* pcie_warm_reset requests a fundamental pci reset which includes a - * PERST assert/deassert. PERST triggers a loading of the image - * if "user" or "factory" is selected in sysfs */ - if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) { - dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n"); - return rc; - } - - return rc; -} - -static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) -{ - if (pci_request_region(dev, 2, "priv 2 regs")) - goto err1; - if (pci_request_region(dev, 0, "priv 1 regs")) - goto err2; - - pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", - p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); - - if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) - goto err3; - - if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) - goto err4; - - return 0; - -err4: - iounmap(adapter->native->p1_mmio); - adapter->native->p1_mmio = NULL; -err3: - pci_release_region(dev, 0); -err2: - pci_release_region(dev, 2); -err1: - return -ENOMEM; -} - -static void cxl_unmap_adapter_regs(struct cxl *adapter) -{ - if (adapter->native->p1_mmio) { - iounmap(adapter->native->p1_mmio); - adapter->native->p1_mmio = NULL; - pci_release_region(to_pci_dev(adapter->dev.parent), 2); - } - if (adapter->native->p2_mmio) { - iounmap(adapter->native->p2_mmio); - adapter->native->p2_mmio = NULL; - pci_release_region(to_pci_dev(adapter->dev.parent), 0); - } -} - -static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) -{ - int vsec; - u32 afu_desc_off, afu_desc_size; - u32 ps_off, ps_size; - u16 vseclen; - u8 image_state; - - if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); - return -ENODEV; - } - - CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); - if (vseclen < CXL_VSEC_MIN_SIZE) { - dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n"); - return -EINVAL; - } - - CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status); - CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev); - CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major); - CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor); - CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); - CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); - adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE); - - CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); - CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); - CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size); - CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off); - CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size); - - /* Convert everything to bytes, because there is NO WAY I'd look at the - * code a month later and forget what units these are in ;-) */ - adapter->native->ps_off = ps_off * 64 * 1024; - adapter->ps_size = ps_size * 64 * 1024; - adapter->native->afu_desc_off = afu_desc_off * 64 * 1024; - adapter->native->afu_desc_size = afu_desc_size * 64 * 1024; - - /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ - adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; - - return 0; -} - -/* - * Workaround a PCIe Host Bridge defect on some cards, that can cause - * malformed Transaction Layer Packet (TLP) errors to be erroneously - * reported. Mask this error in the Uncorrectable Error Mask Register. - * - * The upper nibble of the PSL revision is used to distinguish between - * different cards. The affected ones have it set to 0. - */ -static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev) -{ - int aer; - u32 data; - - if (adapter->psl_rev & 0xf000) - return; - if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))) - return; - pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data); - if (data & PCI_ERR_UNC_MALF_TLP) - if (data & PCI_ERR_UNC_INTN) - return; - data |= PCI_ERR_UNC_MALF_TLP; - data |= PCI_ERR_UNC_INTN; - pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data); -} - -static bool cxl_compatible_caia_version(struct cxl *adapter) -{ - if (cxl_is_power8() && (adapter->caia_major == 1)) - return true; - - if (cxl_is_power9() && (adapter->caia_major == 2)) - return true; - - return false; -} - -static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) -{ - if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) - return -EBUSY; - - if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { - dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n"); - return -EINVAL; - } - - if (!cxl_compatible_caia_version(adapter)) { - dev_info(&dev->dev, "Ignoring card. PSL type is not supported (caia version: %d)\n", - adapter->caia_major); - return -ENODEV; - } - - if (!adapter->slices) { - /* Once we support dynamic reprogramming we can use the card if - * it supports loadable AFUs */ - dev_err(&dev->dev, "ABORTING: Device has no AFUs\n"); - return -EINVAL; - } - - if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) { - dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); - return -EINVAL; - } - - if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) { - dev_err(&dev->dev, "ABORTING: Problem state size larger than " - "available in BAR2: 0x%llx > 0x%llx\n", - adapter->ps_size, p2_size(dev) - adapter->native->ps_off); - return -EINVAL; - } - - return 0; -} - -ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) -{ - return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf); -} - -static void cxl_release_adapter(struct device *dev) -{ - struct cxl *adapter = to_cxl_adapter(dev); - - pr_devel("cxl_release_adapter\n"); - - cxl_remove_adapter_nr(adapter); - - kfree(adapter->native); - kfree(adapter); -} - -#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) - -static int sanitise_adapter_regs(struct cxl *adapter) -{ - int rc = 0; - - /* Clear PSL tberror bit by writing 1 to it */ - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror); - - if (adapter->native->sl_ops->invalidate_all) { - /* do not invalidate ERAT entries when not reloading on PERST */ - if (cxl_is_power9() && (adapter->perst_loads_image)) - return 0; - rc = adapter->native->sl_ops->invalidate_all(adapter); - } - - return rc; -} - -/* This should contain *only* operations that can safely be done in - * both creation and recovery. - */ -static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) -{ - int rc; - - adapter->dev.parent = &dev->dev; - adapter->dev.release = cxl_release_adapter; - pci_set_drvdata(dev, adapter); - - rc = pci_enable_device(dev); - if (rc) { - dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); - return rc; - } - - if ((rc = cxl_read_vsec(adapter, dev))) - return rc; - - if ((rc = cxl_vsec_looks_ok(adapter, dev))) - return rc; - - cxl_fixup_malformed_tlp(adapter, dev); - - if ((rc = setup_cxl_bars(dev))) - return rc; - - if ((rc = switch_card_to_cxl(dev))) - return rc; - - if ((rc = cxl_update_image_control(adapter))) - return rc; - - if ((rc = cxl_map_adapter_regs(adapter, dev))) - return rc; - - if ((rc = sanitise_adapter_regs(adapter))) - goto err; - - if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) - goto err; - - /* Required for devices using CAPP DMA mode, harmless for others */ - pci_set_master(dev); - - adapter->tunneled_ops_supported = false; - - if (cxl_is_power9()) { - if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1)) - dev_info(&dev->dev, "Tunneled operations unsupported\n"); - else - adapter->tunneled_ops_supported = true; - } - - if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) - goto err; - - /* If recovery happened, the last step is to turn on snooping. - * In the non-recovery case this has no effect */ - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) - goto err; - - /* Ignore error, adapter init is not dependant on timebase sync */ - cxl_setup_psl_timebase(adapter, dev); - - if ((rc = cxl_native_register_psl_err_irq(adapter))) - goto err; - - return 0; - -err: - cxl_unmap_adapter_regs(adapter); - return rc; - -} - -static void cxl_deconfigure_adapter(struct cxl *adapter) -{ - struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - - if (cxl_is_power9()) - pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0); - - cxl_native_release_psl_err_irq(adapter); - cxl_unmap_adapter_regs(adapter); - - pci_disable_device(pdev); -} - -static void cxl_stop_trace_psl9(struct cxl *adapter) -{ - int traceid; - u64 trace_state, trace_mask; - struct pci_dev *dev = to_pci_dev(adapter->dev.parent); - - /* read each tracearray state and issue mmio to stop them is needed */ - for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) { - trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG); - trace_mask = (0x3ULL << (62 - traceid * 2)); - trace_state = (trace_state & trace_mask) >> (62 - traceid * 2); - dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n", - traceid, trace_state); - - /* issue mmio if the trace array isn't in FIN state */ - if (trace_state != CXL_PSL9_TRACESTATE_FIN) - cxl_p1_write(adapter, CXL_PSL9_TRACECFG, - 0x8400000000000000ULL | traceid); - } -} - -static void cxl_stop_trace_psl8(struct cxl *adapter) -{ - int slice; - - /* Stop the trace */ - cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL); - - /* Stop the slice traces */ - spin_lock(&adapter->afu_list_lock); - for (slice = 0; slice < adapter->slices; slice++) { - if (adapter->afu[slice]) - cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, - 0x8000000000000000LL); - } - spin_unlock(&adapter->afu_list_lock); -} - -static const struct cxl_service_layer_ops psl9_ops = { - .adapter_regs_init = init_implementation_adapter_regs_psl9, - .invalidate_all = cxl_invalidate_all_psl9, - .afu_regs_init = init_implementation_afu_regs_psl9, - .sanitise_afu_regs = sanitise_afu_regs_psl9, - .register_serr_irq = cxl_native_register_serr_irq, - .release_serr_irq = cxl_native_release_serr_irq, - .handle_interrupt = cxl_irq_psl9, - .fail_irq = cxl_fail_irq_psl, - .activate_dedicated_process = cxl_activate_dedicated_process_psl9, - .attach_afu_directed = cxl_attach_afu_directed_psl9, - .attach_dedicated_process = cxl_attach_dedicated_process_psl9, - .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl9, - .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9, - .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9, - .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9, - .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9, - .debugfs_stop_trace = cxl_stop_trace_psl9, - .timebase_read = timebase_read_psl9, - .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, - .needs_reset_before_disable = true, -}; - -static const struct cxl_service_layer_ops psl8_ops = { - .adapter_regs_init = init_implementation_adapter_regs_psl8, - .invalidate_all = cxl_invalidate_all_psl8, - .afu_regs_init = init_implementation_afu_regs_psl8, - .sanitise_afu_regs = sanitise_afu_regs_psl8, - .register_serr_irq = cxl_native_register_serr_irq, - .release_serr_irq = cxl_native_release_serr_irq, - .handle_interrupt = cxl_irq_psl8, - .fail_irq = cxl_fail_irq_psl, - .activate_dedicated_process = cxl_activate_dedicated_process_psl8, - .attach_afu_directed = cxl_attach_afu_directed_psl8, - .attach_dedicated_process = cxl_attach_dedicated_process_psl8, - .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8, - .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8, - .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8, - .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8, - .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8, - .debugfs_stop_trace = cxl_stop_trace_psl8, - .write_timebase_ctrl = write_timebase_ctrl_psl8, - .timebase_read = timebase_read_psl8, - .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, - .needs_reset_before_disable = true, -}; - -static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) -{ - if (cxl_is_power8()) { - dev_info(&dev->dev, "Device uses a PSL8\n"); - adapter->native->sl_ops = &psl8_ops; - } else { - dev_info(&dev->dev, "Device uses a PSL9\n"); - adapter->native->sl_ops = &psl9_ops; - } -} - - -static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) -{ - struct cxl *adapter; - int rc; - - adapter = cxl_alloc_adapter(); - if (!adapter) - return ERR_PTR(-ENOMEM); - - adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL); - if (!adapter->native) { - rc = -ENOMEM; - goto err_release; - } - - set_sl_ops(adapter, dev); - - /* Set defaults for parameters which need to persist over - * configure/reconfigure - */ - adapter->perst_loads_image = true; - adapter->perst_same_image = false; - - rc = cxl_configure_adapter(adapter, dev); - if (rc) { - pci_disable_device(dev); - goto err_release; - } - - /* Don't care if this one fails: */ - cxl_debugfs_adapter_add(adapter); - - /* - * After we call this function we must not free the adapter directly, - * even if it returns an error! - */ - if ((rc = cxl_register_adapter(adapter))) - goto err_put_dev; - - if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_del_dev; - - /* Release the context lock as adapter is configured */ - cxl_adapter_context_unlock(adapter); - - return adapter; - -err_del_dev: - device_del(&adapter->dev); -err_put_dev: - /* This should mirror cxl_remove_adapter, except without the - * sysfs parts - */ - cxl_debugfs_adapter_remove(adapter); - cxl_deconfigure_adapter(adapter); - put_device(&adapter->dev); - return ERR_PTR(rc); - -err_release: - cxl_release_adapter(&adapter->dev); - return ERR_PTR(rc); -} - -static void cxl_pci_remove_adapter(struct cxl *adapter) -{ - pr_devel("cxl_remove_adapter\n"); - - cxl_sysfs_adapter_remove(adapter); - cxl_debugfs_adapter_remove(adapter); - - /* - * Flush adapter datacache as its about to be removed. - */ - cxl_data_cache_flush(adapter); - - cxl_deconfigure_adapter(adapter); - - device_unregister(&adapter->dev); -} - -#define CXL_MAX_PCIEX_PARENT 2 - -int cxl_slot_is_switched(struct pci_dev *dev) -{ - struct device_node *np; - int depth = 0; - - if (!(np = pci_device_to_OF_node(dev))) { - pr_err("cxl: np = NULL\n"); - return -ENODEV; - } - of_node_get(np); - while (np) { - np = of_get_next_parent(np); - if (!of_node_is_type(np, "pciex")) - break; - depth++; - } - of_node_put(np); - return (depth > CXL_MAX_PCIEX_PARENT); -} - -static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct cxl *adapter; - int slice; - int rc; - - dev_err_once(&dev->dev, "DEPRECATED: cxl is deprecated and will be removed in a future kernel release\n"); - - if (cxl_pci_is_vphb_device(dev)) { - dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); - return -ENODEV; - } - - if (cxl_slot_is_switched(dev)) { - dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n"); - return -ENODEV; - } - - if (cxl_is_power9() && !radix_enabled()) { - dev_info(&dev->dev, "Only Radix mode supported\n"); - return -ENODEV; - } - - if (cxl_verbose) - dump_cxl_config_space(dev); - - adapter = cxl_pci_init_adapter(dev); - if (IS_ERR(adapter)) { - dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); - return PTR_ERR(adapter); - } - - for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = pci_init_afu(adapter, slice, dev))) { - dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); - continue; - } - - rc = cxl_afu_select_best_mode(adapter->afu[slice]); - if (rc) - dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); - } - - return 0; -} - -static void cxl_remove(struct pci_dev *dev) -{ - struct cxl *adapter = pci_get_drvdata(dev); - struct cxl_afu *afu; - int i; - - /* - * Lock to prevent someone grabbing a ref through the adapter list as - * we are removing it - */ - for (i = 0; i < adapter->slices; i++) { - afu = adapter->afu[i]; - cxl_pci_remove_afu(afu); - } - cxl_pci_remove_adapter(adapter); -} - -static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, - pci_channel_state_t state) -{ - struct pci_dev *afu_dev; - struct pci_driver *afu_drv; - const struct pci_error_handlers *err_handler; - pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; - pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; - - /* There should only be one entry, but go through the list - * anyway - */ - if (afu == NULL || afu->phb == NULL) - return result; - - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { - afu_drv = to_pci_driver(afu_dev->dev.driver); - if (!afu_drv) - continue; - - afu_dev->error_state = state; - - err_handler = afu_drv->err_handler; - if (err_handler) - afu_result = err_handler->error_detected(afu_dev, - state); - /* Disconnect trumps all, NONE trumps NEED_RESET */ - if (afu_result == PCI_ERS_RESULT_DISCONNECT) - result = PCI_ERS_RESULT_DISCONNECT; - else if ((afu_result == PCI_ERS_RESULT_NONE) && - (result == PCI_ERS_RESULT_NEED_RESET)) - result = PCI_ERS_RESULT_NONE; - } - return result; -} - -static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl *adapter = pci_get_drvdata(pdev); - struct cxl_afu *afu; - pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; - pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; - int i; - - /* At this point, we could still have an interrupt pending. - * Let's try to get them out of the way before they do - * anything we don't like. - */ - schedule(); - - /* If we're permanently dead, give up. */ - if (state == pci_channel_io_perm_failure) { - spin_lock(&adapter->afu_list_lock); - for (i = 0; i < adapter->slices; i++) { - afu = adapter->afu[i]; - /* - * Tell the AFU drivers; but we don't care what they - * say, we're going away. - */ - cxl_vphb_error_detected(afu, state); - } - spin_unlock(&adapter->afu_list_lock); - return PCI_ERS_RESULT_DISCONNECT; - } - - /* Are we reflashing? - * - * If we reflash, we could come back as something entirely - * different, including a non-CAPI card. As such, by default - * we don't participate in the process. We'll be unbound and - * the slot re-probed. (TODO: check EEH doesn't blindly rebind - * us!) - * - * However, this isn't the entire story: for reliablity - * reasons, we usually want to reflash the FPGA on PERST in - * order to get back to a more reliable known-good state. - * - * This causes us a bit of a problem: if we reflash we can't - * trust that we'll come back the same - we could have a new - * image and been PERSTed in order to load that - * image. However, most of the time we actually *will* come - * back the same - for example a regular EEH event. - * - * Therefore, we allow the user to assert that the image is - * indeed the same and that we should continue on into EEH - * anyway. - */ - if (adapter->perst_loads_image && !adapter->perst_same_image) { - /* TODO take the PHB out of CXL mode */ - dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); - return PCI_ERS_RESULT_NONE; - } - - /* - * At this point, we want to try to recover. We'll always - * need a complete slot reset: we don't trust any other reset. - * - * Now, we go through each AFU: - * - We send the driver, if bound, an error_detected callback. - * We expect it to clean up, but it can also tell us to give - * up and permanently detach the card. To simplify things, if - * any bound AFU driver doesn't support EEH, we give up on EEH. - * - * - We detach all contexts associated with the AFU. This - * does not free them, but puts them into a CLOSED state - * which causes any the associated files to return useful - * errors to userland. It also unmaps, but does not free, - * any IRQs. - * - * - We clean up our side: releasing and unmapping resources we hold - * so we can wire them up again when the hardware comes back up. - * - * Driver authors should note: - * - * - Any contexts you create in your kernel driver (except - * those associated with anonymous file descriptors) are - * your responsibility to free and recreate. Likewise with - * any attached resources. - * - * - We will take responsibility for re-initialising the - * device context (the one set up for you in - * cxl_pci_enable_device_hook and accessed through - * cxl_get_context). If you've attached IRQs or other - * resources to it, they remains yours to free. - * - * You can call the same functions to release resources as you - * normally would: we make sure that these functions continue - * to work when the hardware is down. - * - * Two examples: - * - * 1) If you normally free all your resources at the end of - * each request, or if you use anonymous FDs, your - * error_detected callback can simply set a flag to tell - * your driver not to start any new calls. You can then - * clear the flag in the resume callback. - * - * 2) If you normally allocate your resources on startup: - * * Set a flag in error_detected as above. - * * Let CXL detach your contexts. - * * In slot_reset, free the old resources and allocate new ones. - * * In resume, clear the flag to allow things to start. - */ - - /* Make sure no one else changes the afu list */ - spin_lock(&adapter->afu_list_lock); - - for (i = 0; i < adapter->slices; i++) { - afu = adapter->afu[i]; - - if (afu == NULL) - continue; - - afu_result = cxl_vphb_error_detected(afu, state); - cxl_context_detach_all(afu); - cxl_ops->afu_deactivate_mode(afu, afu->current_mode); - pci_deconfigure_afu(afu); - - /* Disconnect trumps all, NONE trumps NEED_RESET */ - if (afu_result == PCI_ERS_RESULT_DISCONNECT) - result = PCI_ERS_RESULT_DISCONNECT; - else if ((afu_result == PCI_ERS_RESULT_NONE) && - (result == PCI_ERS_RESULT_NEED_RESET)) - result = PCI_ERS_RESULT_NONE; - } - spin_unlock(&adapter->afu_list_lock); - - /* should take the context lock here */ - if (cxl_adapter_context_lock(adapter) != 0) - dev_warn(&adapter->dev, - "Couldn't take context lock with %d active-contexts\n", - atomic_read(&adapter->contexts_num)); - - cxl_deconfigure_adapter(adapter); - - return result; -} - -static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) -{ - struct cxl *adapter = pci_get_drvdata(pdev); - struct cxl_afu *afu; - struct cxl_context *ctx; - struct pci_dev *afu_dev; - struct pci_driver *afu_drv; - const struct pci_error_handlers *err_handler; - pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; - pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; - int i; - - if (cxl_configure_adapter(adapter, pdev)) - goto err; - - /* - * Unlock context activation for the adapter. Ideally this should be - * done in cxl_pci_resume but cxlflash module tries to activate the - * master context as part of slot_reset callback. - */ - cxl_adapter_context_unlock(adapter); - - spin_lock(&adapter->afu_list_lock); - for (i = 0; i < adapter->slices; i++) { - afu = adapter->afu[i]; - - if (afu == NULL) - continue; - - if (pci_configure_afu(afu, adapter, pdev)) - goto err_unlock; - - if (cxl_afu_select_best_mode(afu)) - goto err_unlock; - - if (afu->phb == NULL) - continue; - - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { - /* Reset the device context. - * TODO: make this less disruptive - */ - ctx = cxl_get_context(afu_dev); - - if (ctx && cxl_release_context(ctx)) - goto err_unlock; - - ctx = cxl_dev_context_init(afu_dev); - if (IS_ERR(ctx)) - goto err_unlock; - - afu_dev->dev.archdata.cxl_ctx = ctx; - - if (cxl_ops->afu_check_and_enable(afu)) - goto err_unlock; - - afu_dev->error_state = pci_channel_io_normal; - - /* If there's a driver attached, allow it to - * chime in on recovery. Drivers should check - * if everything has come back OK, but - * shouldn't start new work until we call - * their resume function. - */ - afu_drv = to_pci_driver(afu_dev->dev.driver); - if (!afu_drv) - continue; - - err_handler = afu_drv->err_handler; - if (err_handler && err_handler->slot_reset) - afu_result = err_handler->slot_reset(afu_dev); - - if (afu_result == PCI_ERS_RESULT_DISCONNECT) - result = PCI_ERS_RESULT_DISCONNECT; - } - } - - spin_unlock(&adapter->afu_list_lock); - return result; - -err_unlock: - spin_unlock(&adapter->afu_list_lock); - -err: - /* All the bits that happen in both error_detected and cxl_remove - * should be idempotent, so we don't need to worry about leaving a mix - * of unconfigured and reconfigured resources. - */ - dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); - return PCI_ERS_RESULT_DISCONNECT; -} - -static void cxl_pci_resume(struct pci_dev *pdev) -{ - struct cxl *adapter = pci_get_drvdata(pdev); - struct cxl_afu *afu; - struct pci_dev *afu_dev; - struct pci_driver *afu_drv; - const struct pci_error_handlers *err_handler; - int i; - - /* Everything is back now. Drivers should restart work now. - * This is not the place to be checking if everything came back up - * properly, because there's no return value: do that in slot_reset. - */ - spin_lock(&adapter->afu_list_lock); - for (i = 0; i < adapter->slices; i++) { - afu = adapter->afu[i]; - - if (afu == NULL || afu->phb == NULL) - continue; - - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { - afu_drv = to_pci_driver(afu_dev->dev.driver); - if (!afu_drv) - continue; - - err_handler = afu_drv->err_handler; - if (err_handler && err_handler->resume) - err_handler->resume(afu_dev); - } - } - spin_unlock(&adapter->afu_list_lock); -} - -static const struct pci_error_handlers cxl_err_handler = { - .error_detected = cxl_pci_error_detected, - .slot_reset = cxl_pci_slot_reset, - .resume = cxl_pci_resume, -}; - -struct pci_driver cxl_pci_driver = { - .name = "cxl-pci", - .id_table = cxl_pci_tbl, - .probe = cxl_probe, - .remove = cxl_remove, - .shutdown = cxl_remove, - .err_handler = &cxl_err_handler, -}; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c deleted file mode 100644 index b1fc6446bd4b..000000000000 --- a/drivers/misc/cxl/sysfs.c +++ /dev/null @@ -1,771 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include -#include - -#include "cxl.h" - -#define to_afu_chardev_m(d) dev_get_drvdata(d) - -/********* Adapter attributes **********************************************/ - -static ssize_t caia_version_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major, - adapter->caia_minor); -} - -static ssize_t psl_revision_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev); -} - -static ssize_t base_image_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image); -} - -static ssize_t image_loaded_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - if (adapter->user_image_loaded) - return scnprintf(buf, PAGE_SIZE, "user\n"); - return scnprintf(buf, PAGE_SIZE, "factory\n"); -} - -static ssize_t psl_timebase_synced_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - u64 psl_tb, delta; - - /* Recompute the status only in native mode */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - psl_tb = adapter->native->sl_ops->timebase_read(adapter); - delta = abs(mftb() - psl_tb); - - /* CORE TB and PSL TB difference <= 16usecs ? */ - adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false; - pr_devel("PSL timebase %s - delta: 0x%016llx\n", - (tb_to_ns(delta) < 16000) ? "synchronized" : - "not synchronized", tb_to_ns(delta)); - } - return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced); -} - -static ssize_t tunneled_ops_supported_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported); -} - -static ssize_t reset_adapter_store(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl *adapter = to_cxl_adapter(device); - int rc; - int val; - - rc = sscanf(buf, "%i", &val); - if ((rc != 1) || (val != 1 && val != -1)) - return -EINVAL; - - /* - * See if we can lock the context mapping that's only allowed - * when there are no contexts attached to the adapter. Once - * taken this will also prevent any context from getting activated. - */ - if (val == 1) { - rc = cxl_adapter_context_lock(adapter); - if (rc) - goto out; - - rc = cxl_ops->adapter_reset(adapter); - /* In case reset failed release context lock */ - if (rc) - cxl_adapter_context_unlock(adapter); - - } else if (val == -1) { - /* Perform a forced adapter reset */ - rc = cxl_ops->adapter_reset(adapter); - } - -out: - return rc ? rc : count; -} - -static ssize_t load_image_on_perst_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - if (!adapter->perst_loads_image) - return scnprintf(buf, PAGE_SIZE, "none\n"); - - if (adapter->perst_select_user) - return scnprintf(buf, PAGE_SIZE, "user\n"); - return scnprintf(buf, PAGE_SIZE, "factory\n"); -} - -static ssize_t load_image_on_perst_store(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl *adapter = to_cxl_adapter(device); - int rc; - - if (!strncmp(buf, "none", 4)) - adapter->perst_loads_image = false; - else if (!strncmp(buf, "user", 4)) { - adapter->perst_select_user = true; - adapter->perst_loads_image = true; - } else if (!strncmp(buf, "factory", 7)) { - adapter->perst_select_user = false; - adapter->perst_loads_image = true; - } else - return -EINVAL; - - if ((rc = cxl_update_image_control(adapter))) - return rc; - - return count; -} - -static ssize_t perst_reloads_same_image_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl *adapter = to_cxl_adapter(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image); -} - -static ssize_t perst_reloads_same_image_store(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl *adapter = to_cxl_adapter(device); - int rc; - int val; - - rc = sscanf(buf, "%i", &val); - if ((rc != 1) || !(val == 1 || val == 0)) - return -EINVAL; - - adapter->perst_same_image = (val == 1); - return count; -} - -static struct device_attribute adapter_attrs[] = { - __ATTR_RO(caia_version), - __ATTR_RO(psl_revision), - __ATTR_RO(base_image), - __ATTR_RO(image_loaded), - __ATTR_RO(psl_timebase_synced), - __ATTR_RO(tunneled_ops_supported), - __ATTR_RW(load_image_on_perst), - __ATTR_RW(perst_reloads_same_image), - __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), -}; - - -/********* AFU master specific attributes **********************************/ - -static ssize_t mmio_size_show_master(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_afu_chardev_m(device); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); -} - -static ssize_t pp_mmio_off_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_afu_chardev_m(device); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset); -} - -static ssize_t pp_mmio_len_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_afu_chardev_m(device); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); -} - -static struct device_attribute afu_master_attrs[] = { - __ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL), - __ATTR_RO(pp_mmio_off), - __ATTR_RO(pp_mmio_len), -}; - - -/********* AFU attributes **************************************************/ - -static ssize_t mmio_size_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - - if (afu->pp_size) - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); -} - -static ssize_t reset_store_afu(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl_afu *afu = to_cxl_afu(device); - int rc; - - /* Not safe to reset if it is currently in use */ - mutex_lock(&afu->contexts_lock); - if (!idr_is_empty(&afu->contexts_idr)) { - rc = -EBUSY; - goto err; - } - - if ((rc = cxl_ops->afu_reset(afu))) - goto err; - - rc = count; -err: - mutex_unlock(&afu->contexts_lock); - return rc; -} - -static ssize_t irqs_min_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs); -} - -static ssize_t irqs_max_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - - return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max); -} - -static ssize_t irqs_max_store(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl_afu *afu = to_cxl_afu(device); - ssize_t ret; - int irqs_max; - - ret = sscanf(buf, "%i", &irqs_max); - if (ret != 1) - return -EINVAL; - - if (irqs_max < afu->pp_irqs) - return -EINVAL; - - if (cpu_has_feature(CPU_FTR_HVMODE)) { - if (irqs_max > afu->adapter->user_irqs) - return -EINVAL; - } else { - /* pHyp sets a per-AFU limit */ - if (irqs_max > afu->guest->max_ints) - return -EINVAL; - } - - afu->irqs_max = irqs_max; - return count; -} - -static ssize_t modes_supported_show(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - char *p = buf, *end = buf + PAGE_SIZE; - - if (afu->modes_supported & CXL_MODE_DEDICATED) - p += scnprintf(p, end - p, "dedicated_process\n"); - if (afu->modes_supported & CXL_MODE_DIRECTED) - p += scnprintf(p, end - p, "afu_directed\n"); - return (p - buf); -} - -static ssize_t prefault_mode_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - - switch (afu->prefault_mode) { - case CXL_PREFAULT_WED: - return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n"); - case CXL_PREFAULT_ALL: - return scnprintf(buf, PAGE_SIZE, "all\n"); - default: - return scnprintf(buf, PAGE_SIZE, "none\n"); - } -} - -static ssize_t prefault_mode_store(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl_afu *afu = to_cxl_afu(device); - enum prefault_modes mode = -1; - - if (!strncmp(buf, "none", 4)) - mode = CXL_PREFAULT_NONE; - else { - if (!radix_enabled()) { - - /* only allowed when not in radix mode */ - if (!strncmp(buf, "work_element_descriptor", 23)) - mode = CXL_PREFAULT_WED; - if (!strncmp(buf, "all", 3)) - mode = CXL_PREFAULT_ALL; - } else { - dev_err(device, "Cannot prefault with radix enabled\n"); - } - } - - if (mode == -1) - return -EINVAL; - - afu->prefault_mode = mode; - return count; -} - -static ssize_t mode_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct cxl_afu *afu = to_cxl_afu(device); - - if (afu->current_mode == CXL_MODE_DEDICATED) - return scnprintf(buf, PAGE_SIZE, "dedicated_process\n"); - if (afu->current_mode == CXL_MODE_DIRECTED) - return scnprintf(buf, PAGE_SIZE, "afu_directed\n"); - return scnprintf(buf, PAGE_SIZE, "none\n"); -} - -static ssize_t mode_store(struct device *device, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxl_afu *afu = to_cxl_afu(device); - int old_mode, mode = -1; - int rc = -EBUSY; - - /* can't change this if we have a user */ - mutex_lock(&afu->contexts_lock); - if (!idr_is_empty(&afu->contexts_idr)) - goto err; - - if (!strncmp(buf, "dedicated_process", 17)) - mode = CXL_MODE_DEDICATED; - if (!strncmp(buf, "afu_directed", 12)) - mode = CXL_MODE_DIRECTED; - if (!strncmp(buf, "none", 4)) - mode = 0; - - if (mode == -1) { - rc = -EINVAL; - goto err; - } - - /* - * afu_deactivate_mode needs to be done outside the lock, prevent - * other contexts coming in before we are ready: - */ - old_mode = afu->current_mode; - afu->current_mode = 0; - afu->num_procs = 0; - - mutex_unlock(&afu->contexts_lock); - - if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode))) - return rc; - if ((rc = cxl_ops->afu_activate_mode(afu, mode))) - return rc; - - return count; -err: - mutex_unlock(&afu->contexts_lock); - return rc; -} - -static ssize_t api_version_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION); -} - -static ssize_t api_version_compatible_show(struct device *device, - struct device_attribute *attr, - char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); -} - -static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, - const struct bin_attribute *bin_attr, char *buf, - loff_t off, size_t count) -{ - struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj)); - - return cxl_ops->afu_read_err_buffer(afu, buf, off, count); -} - -static struct device_attribute afu_attrs[] = { - __ATTR_RO(mmio_size), - __ATTR_RO(irqs_min), - __ATTR_RW(irqs_max), - __ATTR_RO(modes_supported), - __ATTR_RW(mode), - __ATTR_RW(prefault_mode), - __ATTR_RO(api_version), - __ATTR_RO(api_version_compatible), - __ATTR(reset, S_IWUSR, NULL, reset_store_afu), -}; - -int cxl_sysfs_adapter_add(struct cxl *adapter) -{ - struct device_attribute *dev_attr; - int i, rc; - - for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { - dev_attr = &adapter_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_ADAPTER_ATTRS)) { - if ((rc = device_create_file(&adapter->dev, dev_attr))) - goto err; - } - } - return 0; -err: - for (i--; i >= 0; i--) { - dev_attr = &adapter_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_ADAPTER_ATTRS)) - device_remove_file(&adapter->dev, dev_attr); - } - return rc; -} - -void cxl_sysfs_adapter_remove(struct cxl *adapter) -{ - struct device_attribute *dev_attr; - int i; - - for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { - dev_attr = &adapter_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_ADAPTER_ATTRS)) - device_remove_file(&adapter->dev, dev_attr); - } -} - -struct afu_config_record { - struct kobject kobj; - struct bin_attribute config_attr; - struct list_head list; - int cr; - u16 device; - u16 vendor; - u32 class; -}; - -#define to_cr(obj) container_of(obj, struct afu_config_record, kobj) - -static ssize_t vendor_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct afu_config_record *cr = to_cr(kobj); - - return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor); -} - -static ssize_t device_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct afu_config_record *cr = to_cr(kobj); - - return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device); -} - -static ssize_t class_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct afu_config_record *cr = to_cr(kobj); - - return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class); -} - -static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, - const struct bin_attribute *bin_attr, char *buf, - loff_t off, size_t count) -{ - struct afu_config_record *cr = to_cr(kobj); - struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj->parent)); - - u64 i, j, val, rc; - - for (i = 0; i < count;) { - rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val); - if (rc) - val = ~0ULL; - for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) - buf[i] = (val >> (j * 8)) & 0xff; - } - - return count; -} - -static struct kobj_attribute vendor_attribute = - __ATTR_RO(vendor); -static struct kobj_attribute device_attribute = - __ATTR_RO(device); -static struct kobj_attribute class_attribute = - __ATTR_RO(class); - -static struct attribute *afu_cr_attrs[] = { - &vendor_attribute.attr, - &device_attribute.attr, - &class_attribute.attr, - NULL, -}; -ATTRIBUTE_GROUPS(afu_cr); - -static void release_afu_config_record(struct kobject *kobj) -{ - struct afu_config_record *cr = to_cr(kobj); - - kfree(cr); -} - -static const struct kobj_type afu_config_record_type = { - .sysfs_ops = &kobj_sysfs_ops, - .release = release_afu_config_record, - .default_groups = afu_cr_groups, -}; - -static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx) -{ - struct afu_config_record *cr; - int rc; - - cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL); - if (!cr) - return ERR_PTR(-ENOMEM); - - cr->cr = cr_idx; - - rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device); - if (rc) - goto err; - rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor); - if (rc) - goto err; - rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class); - if (rc) - goto err; - cr->class >>= 8; - - /* - * Export raw AFU PCIe like config record. For now this is read only by - * root - we can expand that later to be readable by non-root and maybe - * even writable provided we have a good use-case. Once we support - * exposing AFUs through a virtual PHB they will get that for free from - * Linux' PCI infrastructure, but until then it's not clear that we - * need it for anything since the main use case is just identifying - * AFUs, which can be done via the vendor, device and class attributes. - */ - sysfs_bin_attr_init(&cr->config_attr); - cr->config_attr.attr.name = "config"; - cr->config_attr.attr.mode = S_IRUSR; - cr->config_attr.size = afu->crs_len; - cr->config_attr.read_new = afu_read_config; - - rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type, - &afu->dev.kobj, "cr%i", cr->cr); - if (rc) - goto err1; - - rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr); - if (rc) - goto err1; - - rc = kobject_uevent(&cr->kobj, KOBJ_ADD); - if (rc) - goto err2; - - return cr; -err2: - sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); -err1: - kobject_put(&cr->kobj); - return ERR_PTR(rc); -err: - kfree(cr); - return ERR_PTR(rc); -} - -void cxl_sysfs_afu_remove(struct cxl_afu *afu) -{ - struct device_attribute *dev_attr; - struct afu_config_record *cr, *tmp; - int i; - - /* remove the err buffer bin attribute */ - if (afu->eb_len) - device_remove_bin_file(&afu->dev, &afu->attr_eb); - - for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { - dev_attr = &afu_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_ATTRS)) - device_remove_file(&afu->dev, &afu_attrs[i]); - } - - list_for_each_entry_safe(cr, tmp, &afu->crs, list) { - sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); - kobject_put(&cr->kobj); - } -} - -int cxl_sysfs_afu_add(struct cxl_afu *afu) -{ - struct device_attribute *dev_attr; - struct afu_config_record *cr; - int i, rc; - - INIT_LIST_HEAD(&afu->crs); - - for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { - dev_attr = &afu_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_ATTRS)) { - if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) - goto err; - } - } - - /* conditionally create the add the binary file for error info buffer */ - if (afu->eb_len) { - sysfs_attr_init(&afu->attr_eb.attr); - - afu->attr_eb.attr.name = "afu_err_buff"; - afu->attr_eb.attr.mode = S_IRUGO; - afu->attr_eb.size = afu->eb_len; - afu->attr_eb.read_new = afu_eb_read; - - rc = device_create_bin_file(&afu->dev, &afu->attr_eb); - if (rc) { - dev_err(&afu->dev, - "Unable to create eb attr for the afu. Err(%d)\n", - rc); - goto err; - } - } - - for (i = 0; i < afu->crs_num; i++) { - cr = cxl_sysfs_afu_new_cr(afu, i); - if (IS_ERR(cr)) { - rc = PTR_ERR(cr); - goto err1; - } - list_add(&cr->list, &afu->crs); - } - - return 0; - -err1: - cxl_sysfs_afu_remove(afu); - return rc; -err: - /* reset the eb_len as we havent created the bin attr */ - afu->eb_len = 0; - - for (i--; i >= 0; i--) { - dev_attr = &afu_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_ATTRS)) - device_remove_file(&afu->dev, &afu_attrs[i]); - } - return rc; -} - -int cxl_sysfs_afu_m_add(struct cxl_afu *afu) -{ - struct device_attribute *dev_attr; - int i, rc; - - for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { - dev_attr = &afu_master_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_MASTER_ATTRS)) { - if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) - goto err; - } - } - - return 0; - -err: - for (i--; i >= 0; i--) { - dev_attr = &afu_master_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_MASTER_ATTRS)) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); - } - return rc; -} - -void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) -{ - struct device_attribute *dev_attr; - int i; - - for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { - dev_attr = &afu_master_attrs[i]; - if (cxl_ops->support_attributes(dev_attr->attr.name, - CXL_AFU_MASTER_ATTRS)) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); - } -} diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c deleted file mode 100644 index 86f654b99efb..000000000000 --- a/drivers/misc/cxl/trace.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2015 IBM Corp. - */ - -#ifndef __CHECKER__ -#define CREATE_TRACE_POINTS -#include "trace.h" -#endif diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h deleted file mode 100644 index c474157c6857..000000000000 --- a/drivers/misc/cxl/trace.h +++ /dev/null @@ -1,691 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2015 IBM Corp. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM cxl - -#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _CXL_TRACE_H - -#include - -#include "cxl.h" - -#define dsisr_psl9_flags(flags) \ - __print_flags(flags, "|", \ - { CXL_PSL9_DSISR_An_CO_MASK, "FR" }, \ - { CXL_PSL9_DSISR_An_TF, "TF" }, \ - { CXL_PSL9_DSISR_An_PE, "PE" }, \ - { CXL_PSL9_DSISR_An_AE, "AE" }, \ - { CXL_PSL9_DSISR_An_OC, "OC" }, \ - { CXL_PSL9_DSISR_An_S, "S" }) - -#define DSISR_FLAGS \ - { CXL_PSL_DSISR_An_DS, "DS" }, \ - { CXL_PSL_DSISR_An_DM, "DM" }, \ - { CXL_PSL_DSISR_An_ST, "ST" }, \ - { CXL_PSL_DSISR_An_UR, "UR" }, \ - { CXL_PSL_DSISR_An_PE, "PE" }, \ - { CXL_PSL_DSISR_An_AE, "AE" }, \ - { CXL_PSL_DSISR_An_OC, "OC" }, \ - { CXL_PSL_DSISR_An_M, "M" }, \ - { CXL_PSL_DSISR_An_P, "P" }, \ - { CXL_PSL_DSISR_An_A, "A" }, \ - { CXL_PSL_DSISR_An_S, "S" }, \ - { CXL_PSL_DSISR_An_K, "K" } - -#define TFC_FLAGS \ - { CXL_PSL_TFC_An_A, "A" }, \ - { CXL_PSL_TFC_An_C, "C" }, \ - { CXL_PSL_TFC_An_AE, "AE" }, \ - { CXL_PSL_TFC_An_R, "R" } - -#define LLCMD_NAMES \ - { CXL_SPA_SW_CMD_TERMINATE, "TERMINATE" }, \ - { CXL_SPA_SW_CMD_REMOVE, "REMOVE" }, \ - { CXL_SPA_SW_CMD_SUSPEND, "SUSPEND" }, \ - { CXL_SPA_SW_CMD_RESUME, "RESUME" }, \ - { CXL_SPA_SW_CMD_ADD, "ADD" }, \ - { CXL_SPA_SW_CMD_UPDATE, "UPDATE" } - -#define AFU_COMMANDS \ - { 0, "DISABLE" }, \ - { CXL_AFU_Cntl_An_E, "ENABLE" }, \ - { CXL_AFU_Cntl_An_RA, "RESET" } - -#define PSL_COMMANDS \ - { CXL_PSL_SCNTL_An_Pc, "PURGE" }, \ - { CXL_PSL_SCNTL_An_Sc, "SUSPEND" } - - -DECLARE_EVENT_CLASS(cxl_pe_class, - TP_PROTO(struct cxl_context *ctx), - - TP_ARGS(ctx), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - ), - - TP_printk("afu%i.%i pe=%i", - __entry->card, - __entry->afu, - __entry->pe - ) -); - - -TRACE_EVENT(cxl_attach, - TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr), - - TP_ARGS(ctx, wed, num_interrupts, amr), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(pid_t, pid) - __field(u64, wed) - __field(u64, amr) - __field(s16, num_interrupts) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->pid = pid_nr(ctx->pid); - __entry->wed = wed; - __entry->amr = amr; - __entry->num_interrupts = num_interrupts; - ), - - TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx", - __entry->card, - __entry->afu, - __entry->pid, - __entry->pe, - __entry->wed, - __entry->num_interrupts, - __entry->amr - ) -); - -DEFINE_EVENT(cxl_pe_class, cxl_detach, - TP_PROTO(struct cxl_context *ctx), - TP_ARGS(ctx) -); - -TRACE_EVENT(cxl_afu_irq, - TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq), - - TP_ARGS(ctx, afu_irq, virq, hwirq), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u16, afu_irq) - __field(int, virq) - __field(irq_hw_number_t, hwirq) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->afu_irq = afu_irq; - __entry->virq = virq; - __entry->hwirq = hwirq; - ), - - TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx", - __entry->card, - __entry->afu, - __entry->pe, - __entry->afu_irq, - __entry->virq, - __entry->hwirq - ) -); - -TRACE_EVENT(cxl_psl9_irq, - TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), - - TP_ARGS(ctx, irq, dsisr, dar), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(int, irq) - __field(u64, dsisr) - __field(u64, dar) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->irq = irq; - __entry->dsisr = dsisr; - __entry->dar = dar; - ), - - TP_printk("afu%i.%i pe=%i irq=%i dsisr=0x%016llx dsisr=%s dar=0x%016llx", - __entry->card, - __entry->afu, - __entry->pe, - __entry->irq, - __entry->dsisr, - dsisr_psl9_flags(__entry->dsisr), - __entry->dar - ) -); - -TRACE_EVENT(cxl_psl_irq, - TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), - - TP_ARGS(ctx, irq, dsisr, dar), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(int, irq) - __field(u64, dsisr) - __field(u64, dar) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->irq = irq; - __entry->dsisr = dsisr; - __entry->dar = dar; - ), - - TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx", - __entry->card, - __entry->afu, - __entry->pe, - __entry->irq, - __print_flags(__entry->dsisr, "|", DSISR_FLAGS), - __entry->dar - ) -); - -TRACE_EVENT(cxl_psl_irq_ack, - TP_PROTO(struct cxl_context *ctx, u64 tfc), - - TP_ARGS(ctx, tfc), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u64, tfc) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->tfc = tfc; - ), - - TP_printk("afu%i.%i pe=%i tfc=%s", - __entry->card, - __entry->afu, - __entry->pe, - __print_flags(__entry->tfc, "|", TFC_FLAGS) - ) -); - -TRACE_EVENT(cxl_ste_miss, - TP_PROTO(struct cxl_context *ctx, u64 dar), - - TP_ARGS(ctx, dar), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u64, dar) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->dar = dar; - ), - - TP_printk("afu%i.%i pe=%i dar=0x%016llx", - __entry->card, - __entry->afu, - __entry->pe, - __entry->dar - ) -); - -TRACE_EVENT(cxl_ste_write, - TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v), - - TP_ARGS(ctx, idx, e, v), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(unsigned int, idx) - __field(u64, e) - __field(u64, v) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->idx = idx; - __entry->e = e; - __entry->v = v; - ), - - TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx", - __entry->card, - __entry->afu, - __entry->pe, - __entry->idx, - __entry->e, - __entry->v - ) -); - -TRACE_EVENT(cxl_pte_miss, - TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar), - - TP_ARGS(ctx, dsisr, dar), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u64, dsisr) - __field(u64, dar) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->dsisr = dsisr; - __entry->dar = dar; - ), - - TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx", - __entry->card, - __entry->afu, - __entry->pe, - __print_flags(__entry->dsisr, "|", DSISR_FLAGS), - __entry->dar - ) -); - -TRACE_EVENT(cxl_llcmd, - TP_PROTO(struct cxl_context *ctx, u64 cmd), - - TP_ARGS(ctx, cmd), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u64, cmd) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->cmd = cmd; - ), - - TP_printk("afu%i.%i pe=%i cmd=%s", - __entry->card, - __entry->afu, - __entry->pe, - __print_symbolic_u64(__entry->cmd, LLCMD_NAMES) - ) -); - -TRACE_EVENT(cxl_llcmd_done, - TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc), - - TP_ARGS(ctx, cmd, rc), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u16, pe) - __field(u64, cmd) - __field(int, rc) - ), - - TP_fast_assign( - __entry->card = ctx->afu->adapter->adapter_num; - __entry->afu = ctx->afu->slice; - __entry->pe = ctx->pe; - __entry->rc = rc; - __entry->cmd = cmd; - ), - - TP_printk("afu%i.%i pe=%i cmd=%s rc=%i", - __entry->card, - __entry->afu, - __entry->pe, - __print_symbolic_u64(__entry->cmd, LLCMD_NAMES), - __entry->rc - ) -); - -DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl, - TP_PROTO(struct cxl_afu *afu, u64 cmd), - - TP_ARGS(afu, cmd), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u64, cmd) - ), - - TP_fast_assign( - __entry->card = afu->adapter->adapter_num; - __entry->afu = afu->slice; - __entry->cmd = cmd; - ), - - TP_printk("afu%i.%i cmd=%s", - __entry->card, - __entry->afu, - __print_symbolic_u64(__entry->cmd, AFU_COMMANDS) - ) -); - -DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done, - TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), - - TP_ARGS(afu, cmd, rc), - - TP_STRUCT__entry( - __field(u8, card) - __field(u8, afu) - __field(u64, cmd) - __field(int, rc) - ), - - TP_fast_assign( - __entry->card = afu->adapter->adapter_num; - __entry->afu = afu->slice; - __entry->rc = rc; - __entry->cmd = cmd; - ), - - TP_printk("afu%i.%i cmd=%s rc=%i", - __entry->card, - __entry->afu, - __print_symbolic_u64(__entry->cmd, AFU_COMMANDS), - __entry->rc - ) -); - -DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl, - TP_PROTO(struct cxl_afu *afu, u64 cmd), - TP_ARGS(afu, cmd) -); - -DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done, - TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), - TP_ARGS(afu, cmd, rc) -); - -DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl, - TP_PROTO(struct cxl_afu *afu, u64 cmd), - TP_ARGS(afu, cmd), - - TP_printk("psl%i.%i cmd=%s", - __entry->card, - __entry->afu, - __print_symbolic_u64(__entry->cmd, PSL_COMMANDS) - ) -); - -DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done, - TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), - TP_ARGS(afu, cmd, rc), - - TP_printk("psl%i.%i cmd=%s rc=%i", - __entry->card, - __entry->afu, - __print_symbolic_u64(__entry->cmd, PSL_COMMANDS), - __entry->rc - ) -); - -DEFINE_EVENT(cxl_pe_class, cxl_slbia, - TP_PROTO(struct cxl_context *ctx), - TP_ARGS(ctx) -); - -TRACE_EVENT(cxl_hcall, - TP_PROTO(u64 unit_address, u64 process_token, long rc), - - TP_ARGS(unit_address, process_token, rc), - - TP_STRUCT__entry( - __field(u64, unit_address) - __field(u64, process_token) - __field(long, rc) - ), - - TP_fast_assign( - __entry->unit_address = unit_address; - __entry->process_token = process_token; - __entry->rc = rc; - ), - - TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li", - __entry->unit_address, - __entry->process_token, - __entry->rc - ) -); - -TRACE_EVENT(cxl_hcall_control, - TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, - u64 p4, unsigned long r4, long rc), - - TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc), - - TP_STRUCT__entry( - __field(u64, unit_address) - __field(char *, fct) - __field(u64, p1) - __field(u64, p2) - __field(u64, p3) - __field(u64, p4) - __field(unsigned long, r4) - __field(long, rc) - ), - - TP_fast_assign( - __entry->unit_address = unit_address; - __entry->fct = fct; - __entry->p1 = p1; - __entry->p2 = p2; - __entry->p3 = p3; - __entry->p4 = p4; - __entry->r4 = r4; - __entry->rc = rc; - ), - - TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li", - __entry->unit_address, - __entry->fct, - __entry->p1, - __entry->p2, - __entry->p3, - __entry->p4, - __entry->r4, - __entry->rc - ) -); - -TRACE_EVENT(cxl_hcall_attach, - TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token, - unsigned long mmio_addr, unsigned long mmio_size, long rc), - - TP_ARGS(unit_address, phys_addr, process_token, - mmio_addr, mmio_size, rc), - - TP_STRUCT__entry( - __field(u64, unit_address) - __field(u64, phys_addr) - __field(unsigned long, process_token) - __field(unsigned long, mmio_addr) - __field(unsigned long, mmio_size) - __field(long, rc) - ), - - TP_fast_assign( - __entry->unit_address = unit_address; - __entry->phys_addr = phys_addr; - __entry->process_token = process_token; - __entry->mmio_addr = mmio_addr; - __entry->mmio_size = mmio_size; - __entry->rc = rc; - ), - - TP_printk("unit_address=0x%016llx phys_addr=0x%016llx " - "token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li", - __entry->unit_address, - __entry->phys_addr, - __entry->process_token, - __entry->mmio_addr, - __entry->mmio_size, - __entry->rc - ) -); - -DEFINE_EVENT(cxl_hcall, cxl_hcall_detach, - TP_PROTO(u64 unit_address, u64 process_token, long rc), - TP_ARGS(unit_address, process_token, rc) -); - -DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function, - TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, - u64 p4, unsigned long r4, long rc), - TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) -); - -DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info, - TP_PROTO(u64 unit_address, u64 process_token, long rc), - TP_ARGS(unit_address, process_token, rc) -); - -TRACE_EVENT(cxl_hcall_control_faults, - TP_PROTO(u64 unit_address, u64 process_token, - u64 control_mask, u64 reset_mask, unsigned long r4, - long rc), - - TP_ARGS(unit_address, process_token, - control_mask, reset_mask, r4, rc), - - TP_STRUCT__entry( - __field(u64, unit_address) - __field(u64, process_token) - __field(u64, control_mask) - __field(u64, reset_mask) - __field(unsigned long, r4) - __field(long, rc) - ), - - TP_fast_assign( - __entry->unit_address = unit_address; - __entry->process_token = process_token; - __entry->control_mask = control_mask; - __entry->reset_mask = reset_mask; - __entry->r4 = r4; - __entry->rc = rc; - ), - - TP_printk("unit_address=0x%016llx process_token=0x%llx " - "control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li", - __entry->unit_address, - __entry->process_token, - __entry->control_mask, - __entry->reset_mask, - __entry->r4, - __entry->rc - ) -); - -DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility, - TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, - u64 p4, unsigned long r4, long rc), - TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) -); - -TRACE_EVENT(cxl_hcall_download_facility, - TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num, - unsigned long r4, long rc), - - TP_ARGS(unit_address, fct, list_address, num, r4, rc), - - TP_STRUCT__entry( - __field(u64, unit_address) - __field(char *, fct) - __field(u64, list_address) - __field(u64, num) - __field(unsigned long, r4) - __field(long, rc) - ), - - TP_fast_assign( - __entry->unit_address = unit_address; - __entry->fct = fct; - __entry->list_address = list_address; - __entry->num = num; - __entry->r4 = r4; - __entry->rc = rc; - ), - - TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li", - __entry->unit_address, - __entry->fct, - __entry->list_address, - __entry->num, - __entry->r4, - __entry->rc - ) -); - -#endif /* _CXL_TRACE_H */ - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c deleted file mode 100644 index 6332db8044bd..000000000000 --- a/drivers/misc/cxl/vphb.c +++ /dev/null @@ -1,309 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014 IBM Corp. - */ - -#include -#include -#include "cxl.h" - -static int cxl_pci_probe_mode(struct pci_bus *bus) -{ - return PCI_PROBE_NORMAL; -} - -static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - return -ENODEV; -} - -static void cxl_teardown_msi_irqs(struct pci_dev *pdev) -{ - /* - * MSI should never be set but need still need to provide this call - * back. - */ -} - -static bool cxl_pci_enable_device_hook(struct pci_dev *dev) -{ - struct pci_controller *phb; - struct cxl_afu *afu; - struct cxl_context *ctx; - - phb = pci_bus_to_host(dev->bus); - afu = (struct cxl_afu *)phb->private_data; - - if (!cxl_ops->link_ok(afu->adapter, afu)) { - dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); - return false; - } - - dev->dev.archdata.dma_offset = PAGE_OFFSET; - - /* - * Allocate a context to do cxl things too. If we eventually do real - * DMA ops, we'll need a default context to attach them to - */ - ctx = cxl_dev_context_init(dev); - if (IS_ERR(ctx)) - return false; - dev->dev.archdata.cxl_ctx = ctx; - - return (cxl_ops->afu_check_and_enable(afu) == 0); -} - -static void cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_context *ctx = cxl_get_context(dev); - - if (ctx) { - if (ctx->status == STARTED) { - dev_err(&dev->dev, "Default context started\n"); - return; - } - dev->dev.archdata.cxl_ctx = NULL; - cxl_release_context(ctx); - } -} - -static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) -{ - /* Should we do an AFU reset here ? */ -} - -static int cxl_pcie_cfg_record(u8 bus, u8 devfn) -{ - return (bus << 8) + devfn; -} - -static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus) -{ - struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL; - - return phb ? phb->private_data : NULL; -} - -static void cxl_afu_configured_put(struct cxl_afu *afu) -{ - atomic_dec_if_positive(&afu->configured_state); -} - -static bool cxl_afu_configured_get(struct cxl_afu *afu) -{ - return atomic_inc_unless_negative(&afu->configured_state); -} - -static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, - struct cxl_afu *afu, int *_record) -{ - int record; - - record = cxl_pcie_cfg_record(bus->number, devfn); - if (record > afu->crs_num) - return PCIBIOS_DEVICE_NOT_FOUND; - - *_record = record; - return 0; -} - -static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - int rc, record; - struct cxl_afu *afu; - u8 val8; - u16 val16; - u32 val32; - - afu = pci_bus_to_afu(bus); - /* Grab a reader lock on afu. */ - if (afu == NULL || !cxl_afu_configured_get(afu)) - return PCIBIOS_DEVICE_NOT_FOUND; - - rc = cxl_pcie_config_info(bus, devfn, afu, &record); - if (rc) - goto out; - - switch (len) { - case 1: - rc = cxl_ops->afu_cr_read8(afu, record, offset, &val8); - *val = val8; - break; - case 2: - rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16); - *val = val16; - break; - case 4: - rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32); - *val = val32; - break; - default: - WARN_ON(1); - } - -out: - cxl_afu_configured_put(afu); - return rc ? PCIBIOS_DEVICE_NOT_FOUND : 0; -} - -static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - int rc, record; - struct cxl_afu *afu; - - afu = pci_bus_to_afu(bus); - /* Grab a reader lock on afu. */ - if (afu == NULL || !cxl_afu_configured_get(afu)) - return PCIBIOS_DEVICE_NOT_FOUND; - - rc = cxl_pcie_config_info(bus, devfn, afu, &record); - if (rc) - goto out; - - switch (len) { - case 1: - rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff); - break; - case 2: - rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff); - break; - case 4: - rc = cxl_ops->afu_cr_write32(afu, record, offset, val); - break; - default: - WARN_ON(1); - } - -out: - cxl_afu_configured_put(afu); - return rc ? PCIBIOS_SET_FAILED : 0; -} - -static struct pci_ops cxl_pcie_pci_ops = -{ - .read = cxl_pcie_read_config, - .write = cxl_pcie_write_config, -}; - - -static struct pci_controller_ops cxl_pci_controller_ops = -{ - .probe_mode = cxl_pci_probe_mode, - .enable_device_hook = cxl_pci_enable_device_hook, - .disable_device = cxl_pci_disable_device, - .release_device = cxl_pci_disable_device, - .reset_secondary_bus = cxl_pci_reset_secondary_bus, - .setup_msi_irqs = cxl_setup_msi_irqs, - .teardown_msi_irqs = cxl_teardown_msi_irqs, -}; - -int cxl_pci_vphb_add(struct cxl_afu *afu) -{ - struct pci_controller *phb; - struct device_node *vphb_dn; - struct device *parent; - - /* - * If there are no AFU configuration records we won't have anything to - * expose under the vPHB, so skip creating one, returning success since - * this is still a valid case. This will also opt us out of EEH - * handling since we won't have anything special to do if there are no - * kernel drivers attached to the vPHB, and EEH handling is not yet - * supported in the peer model. - */ - if (!afu->crs_num) - return 0; - - /* The parent device is the adapter. Reuse the device node of - * the adapter. - * We don't seem to care what device node is used for the vPHB, - * but tools such as lsvpd walk up the device parents looking - * for a valid location code, so we might as well show devices - * attached to the adapter as being located on that adapter. - */ - parent = afu->adapter->dev.parent; - vphb_dn = parent->of_node; - - /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(vphb_dn); - if (!phb) - return -ENODEV; - - /* Setup parent in sysfs */ - phb->parent = parent; - - /* Setup the PHB using arch provided callback */ - phb->ops = &cxl_pcie_pci_ops; - phb->cfg_addr = NULL; - phb->cfg_data = NULL; - phb->private_data = afu; - phb->controller_ops = cxl_pci_controller_ops; - - /* Scan the bus */ - pcibios_scan_phb(phb); - if (phb->bus == NULL) - return -ENXIO; - - /* Set release hook on root bus */ - pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge), - pcibios_free_controller_deferred, - (void *) phb); - - /* Claim resources. This might need some rework as well depending - * whether we are doing probe-only or not, like assigning unassigned - * resources etc... - */ - pcibios_claim_one_bus(phb->bus); - - /* Add probed PCI devices to the device model */ - pci_bus_add_devices(phb->bus); - - afu->phb = phb; - - return 0; -} - -void cxl_pci_vphb_remove(struct cxl_afu *afu) -{ - struct pci_controller *phb; - - /* If there is no configuration record we won't have one of these */ - if (!afu || !afu->phb) - return; - - phb = afu->phb; - afu->phb = NULL; - - pci_remove_root_bus(phb->bus); - /* - * We don't free phb here - that's handled by - * pcibios_free_controller_deferred() - */ -} - -bool cxl_pci_is_vphb_device(struct pci_dev *dev) -{ - struct pci_controller *phb; - - phb = pci_bus_to_host(dev->bus); - - return (phb->ops == &cxl_pcie_pci_ops); -} - -struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) -{ - struct pci_controller *phb; - - phb = pci_bus_to_host(dev->bus); - - return (struct cxl_afu *)phb->private_data; -} -EXPORT_SYMBOL_GPL(cxl_pci_to_afu); - -unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev) -{ - return cxl_pcie_cfg_record(dev->bus->number, dev->devfn); -} -EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record); diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h deleted file mode 100644 index 2251da7f32d9..000000000000 --- a/include/misc/cxl-base.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2014 IBM Corp. - */ - -#ifndef _MISC_CXL_BASE_H -#define _MISC_CXL_BASE_H - -#ifdef CONFIG_CXL_BASE - -#define CXL_IRQ_RANGES 4 - -struct cxl_irq_ranges { - irq_hw_number_t offset[CXL_IRQ_RANGES]; - irq_hw_number_t range[CXL_IRQ_RANGES]; -}; - -extern atomic_t cxl_use_count; - -static inline bool cxl_ctx_in_use(void) -{ - return (atomic_read(&cxl_use_count) != 0); -} - -static inline void cxl_ctx_get(void) -{ - atomic_inc(&cxl_use_count); -} - -static inline void cxl_ctx_put(void) -{ - atomic_dec(&cxl_use_count); -} - -struct cxl_afu *cxl_afu_get(struct cxl_afu *afu); -void cxl_afu_put(struct cxl_afu *afu); -void cxl_slbia(struct mm_struct *mm); - -#else /* CONFIG_CXL_BASE */ - -static inline bool cxl_ctx_in_use(void) { return false; } -static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) { return NULL; } -static inline void cxl_afu_put(struct cxl_afu *afu) {} -static inline void cxl_slbia(struct mm_struct *mm) {} - -#endif /* CONFIG_CXL_BASE */ - -#endif diff --git a/include/misc/cxl.h b/include/misc/cxl.h deleted file mode 100644 index d8044299d654..000000000000 --- a/include/misc/cxl.h +++ /dev/null @@ -1,265 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2015 IBM Corp. - */ - -#ifndef _MISC_CXL_H -#define _MISC_CXL_H - -#include -#include -#include -#include - -/* - * This documents the in kernel API for driver to use CXL. It allows kernel - * drivers to bind to AFUs using an AFU configuration record exposed as a PCI - * configuration record. - * - * This API enables control over AFU and contexts which can't be part of the - * generic PCI API. This API is agnostic to the actual AFU. - */ - -/* Get the AFU associated with a pci_dev */ -struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); - -/* Get the AFU conf record number associated with a pci_dev */ -unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); - - -/* - * Context lifetime overview: - * - * An AFU context may be inited and then started and stopped multiple times - * before it's released. ie. - * - cxl_dev_context_init() - * - cxl_start_context() - * - cxl_stop_context() - * - cxl_start_context() - * - cxl_stop_context() - * ...repeat... - * - cxl_release_context() - * Once released, a context can't be started again. - * - * One context is inited by the cxl driver for every pci_dev. This is to be - * used as a default kernel context. cxl_get_context() will get this - * context. This context will be released by PCI hot unplug, so doesn't need to - * be released explicitly by drivers. - * - * Additional kernel contexts may be inited using cxl_dev_context_init(). - * These must be released using cxl_context_detach(). - * - * Once a context has been inited, IRQs may be configured. Firstly these IRQs - * must be allocated (cxl_allocate_afu_irqs()), then individually mapped to - * specific handlers (cxl_map_afu_irq()). - * - * These IRQs can be unmapped (cxl_unmap_afu_irq()) and finally released - * (cxl_free_afu_irqs()). - * - * The AFU can be reset (cxl_afu_reset()). This will cause the PSL/AFU - * hardware to lose track of all contexts. It's upto the caller of - * cxl_afu_reset() to restart these contexts. - */ - -/* - * On pci_enabled_device(), the cxl driver will init a single cxl context for - * use by the driver. It doesn't start this context (as that will likely - * generate DMA traffic for most AFUs). - * - * This gets the default context associated with this pci_dev. This context - * doesn't need to be released as this will be done by the PCI subsystem on hot - * unplug. - */ -struct cxl_context *cxl_get_context(struct pci_dev *dev); -/* - * Allocate and initalise a context associated with a AFU PCI device. This - * doesn't start the context in the AFU. - */ -struct cxl_context *cxl_dev_context_init(struct pci_dev *dev); -/* - * Release and free a context. Context should be stopped before calling. - */ -int cxl_release_context(struct cxl_context *ctx); - -/* - * Set and get private data associated with a context. Allows drivers to have a - * back pointer to some useful structure. - */ -int cxl_set_priv(struct cxl_context *ctx, void *priv); -void *cxl_get_priv(struct cxl_context *ctx); - -/* - * Allocate AFU interrupts for this context. num=0 will allocate the default - * for this AFU as given in the AFU descriptor. This number doesn't include the - * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be - * used must map a handler with cxl_map_afu_irq. - */ -int cxl_allocate_afu_irqs(struct cxl_context *cxl, int num); -/* Free allocated interrupts */ -void cxl_free_afu_irqs(struct cxl_context *cxl); - -/* - * Map a handler for an AFU interrupt associated with a particular context. AFU - * IRQS numbers start from 1 (CAIA defines AFU IRQ 0 for page faults). cookie - * is private data is that will be provided to the interrupt handler. - */ -int cxl_map_afu_irq(struct cxl_context *cxl, int num, - irq_handler_t handler, void *cookie, char *name); -/* unmap mapped IRQ handlers */ -void cxl_unmap_afu_irq(struct cxl_context *cxl, int num, void *cookie); - -/* - * Start work on the AFU. This starts an cxl context and associates it with a - * task. task == NULL will make it a kernel context. - */ -int cxl_start_context(struct cxl_context *ctx, u64 wed, - struct task_struct *task); -/* - * Stop a context and remove it from the PSL - */ -int cxl_stop_context(struct cxl_context *ctx); - -/* Reset the AFU */ -int cxl_afu_reset(struct cxl_context *ctx); - -/* - * Set a context as a master context. - * This sets the default problem space area mapped as the full space, rather - * than just the per context area (for slaves). - */ -void cxl_set_master(struct cxl_context *ctx); - -/* - * Map and unmap the AFU Problem Space area. The amount and location mapped - * depends on if this context is a master or slave. - */ -void __iomem *cxl_psa_map(struct cxl_context *ctx); -void cxl_psa_unmap(void __iomem *addr); - -/* Get the process element for this context */ -int cxl_process_element(struct cxl_context *ctx); - -/* - * These calls allow drivers to create their own file descriptors and make them - * identical to the cxl file descriptor user API. An example use case: - * - * struct file_operations cxl_my_fops = {}; - * ...... - * // Init the context - * ctx = cxl_dev_context_init(dev); - * if (IS_ERR(ctx)) - * return PTR_ERR(ctx); - * // Create and attach a new file descriptor to my file ops - * file = cxl_get_fd(ctx, &cxl_my_fops, &fd); - * // Start context - * rc = cxl_start_work(ctx, &work.work); - * if (rc) { - * fput(file); - * put_unused_fd(fd); - * return -ENODEV; - * } - * // No error paths after installing the fd - * fd_install(fd, file); - * return fd; - * - * This inits a context, and gets a file descriptor and associates some file - * ops to that file descriptor. If the file ops are blank, the cxl driver will - * fill them in with the default ones that mimic the standard user API. Once - * completed, the file descriptor can be installed. Once the file descriptor is - * installed, it's visible to the user so no errors must occur past this point. - * - * If cxl_fd_release() file op call is installed, the context will be stopped - * and released when the fd is released. Hence the driver won't need to manage - * this itself. - */ - -/* - * Take a context and associate it with my file ops. Returns the associated - * file and file descriptor. Any file ops which are blank are filled in by the - * cxl driver with the default ops to mimic the standard API. - */ -struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, - int *fd); -/* Get the context associated with this file */ -struct cxl_context *cxl_fops_get_context(struct file *file); -/* - * Start a context associated a struct cxl_ioctl_start_work used by the - * standard cxl user API. - */ -int cxl_start_work(struct cxl_context *ctx, - struct cxl_ioctl_start_work *work); -/* - * Export all the existing fops so drivers can use them - */ -int cxl_fd_open(struct inode *inode, struct file *file); -int cxl_fd_release(struct inode *inode, struct file *file); -long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm); -__poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll); -ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, - loff_t *off); - -/* - * For EEH, a driver may want to assert a PERST will reload the same image - * from flash into the FPGA. - * - * This is a property of the entire adapter, not a single AFU, so drivers - * should set this property with care! - */ -void cxl_perst_reloads_same_image(struct cxl_afu *afu, - bool perst_reloads_same_image); - -/* - * Read the VPD for the card where the AFU resides - */ -ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count); - -/* - * AFU driver ops allow an AFU driver to create their own events to pass to - * userspace through the file descriptor as a simpler alternative to overriding - * the read() and poll() calls that works with the generic cxl events. These - * events are given priority over the generic cxl events, so they will be - * delivered first if multiple types of events are pending. - * - * The AFU driver must call cxl_context_events_pending() to notify the cxl - * driver that new events are ready to be delivered for a specific context. - * cxl_context_events_pending() will adjust the current count of AFU driver - * events for this context, and wake up anyone waiting on the context wait - * queue. - * - * The cxl driver will then call fetch_event() to get a structure defining - * the size and address of the driver specific event data. The cxl driver - * will build a cxl header with type and process_element fields filled in, - * and header.size set to sizeof(struct cxl_event_header) + data_size. - * The total size of the event is limited to CXL_READ_MIN_SIZE (4K). - * - * fetch_event() is called with a spin lock held, so it must not sleep. - * - * The cxl driver will then deliver the event to userspace, and finally - * call event_delivered() to return the status of the operation, identified - * by cxl context and AFU driver event data pointers. - * 0 Success - * -EFAULT copy_to_user() has failed - * -EINVAL Event data pointer is NULL, or event size is greater than - * CXL_READ_MIN_SIZE. - */ -struct cxl_afu_driver_ops { - struct cxl_event_afu_driver_reserved *(*fetch_event) ( - struct cxl_context *ctx); - void (*event_delivered) (struct cxl_context *ctx, - struct cxl_event_afu_driver_reserved *event, - int rc); -}; - -/* - * Associate the above driver ops with a specific context. - * Reset the current count of AFU driver events. - */ -void cxl_set_driver_ops(struct cxl_context *ctx, - struct cxl_afu_driver_ops *ops); - -/* Notify cxl driver that new events are ready to be delivered for context */ -void cxl_context_events_pending(struct cxl_context *ctx, - unsigned int new_events); - -#endif /* _MISC_CXL_H */ diff --git a/include/misc/cxllib.h b/include/misc/cxllib.h deleted file mode 100644 index eacc417288fc..000000000000 --- a/include/misc/cxllib.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2017 IBM Corp. - */ - -#ifndef _MISC_CXLLIB_H -#define _MISC_CXLLIB_H - -#include -#include - -/* - * cxl driver exports a in-kernel 'library' API which can be called by - * other drivers to help interacting with an IBM XSL. - */ - -/* - * tells whether capi is supported on the PCIe slot where the - * device is seated - * - * Input: - * dev: device whose slot needs to be checked - * flags: 0 for the time being - */ -bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags); - - -/* - * Returns the configuration parameters to be used by the XSL or device - * - * Input: - * dev: device, used to find PHB - * Output: - * struct cxllib_xsl_config: - * version - * capi BAR address, i.e. 0x2000000000000-0x2FFFFFFFFFFFF - * capi BAR size - * data send control (XSL_DSNCTL) - * dummy read address (XSL_DRA) - */ -#define CXL_XSL_CONFIG_VERSION1 1 -struct cxllib_xsl_config { - u32 version; /* format version for register encoding */ - u32 log_bar_size;/* log size of the capi_window */ - u64 bar_addr; /* address of the start of capi window */ - u64 dsnctl; /* matches definition of XSL_DSNCTL */ - u64 dra; /* real address that can be used for dummy read */ -}; - -int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg); - - -/* - * Activate capi for the pci host bridge associated with the device. - * Can be extended to deactivate once we know how to do it. - * Device must be ready to accept messages from the CAPP unit and - * respond accordingly (TLB invalidates, ...) - * - * PHB is switched to capi mode through calls to skiboot. - * CAPP snooping is activated - * - * Input: - * dev: device whose PHB should switch mode - * mode: mode to switch to i.e. CAPI or PCI - * flags: options related to the mode - */ -enum cxllib_mode { - CXL_MODE_CXL, - CXL_MODE_PCI, -}; - -#define CXL_MODE_NO_DMA 0 -#define CXL_MODE_DMA_TVT0 1 -#define CXL_MODE_DMA_TVT1 2 - -int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, - unsigned long flags); - - -/* - * Set the device for capi DMA. - * Define its dma_ops and dma offset so that allocations will be using TVT#1 - * - * Input: - * dev: device to set - * flags: options. CXL_MODE_DMA_TVT1 should be used - */ -int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags); - - -/* - * Get the Process Element structure for the given thread - * - * Input: - * task: task_struct for the context of the translation - * translation_mode: whether addresses should be translated - * Output: - * attr: attributes to fill up the Process Element structure from CAIA - */ -struct cxllib_pe_attributes { - u64 sr; - u32 lpid; - u32 tid; - u32 pid; -}; -#define CXL_TRANSLATED_MODE 0 -#define CXL_REAL_MODE 1 - -int cxllib_get_PE_attributes(struct task_struct *task, - unsigned long translation_mode, struct cxllib_pe_attributes *attr); - - -/* - * Handle memory fault. - * Fault in all the pages of the specified buffer for the permissions - * provided in ‘flags’ - * - * Shouldn't be called from interrupt context - * - * Input: - * mm: struct mm for the thread faulting the pages - * addr: base address of the buffer to page in - * size: size of the buffer to page in - * flags: permission requested (DSISR_ISSTORE...) - */ -int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags); - - -#endif /* _MISC_CXLLIB_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h deleted file mode 100644 index 56376d3907d8..000000000000 --- a/include/uapi/misc/cxl.h +++ /dev/null @@ -1,156 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * Copyright 2014 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _UAPI_MISC_CXL_H -#define _UAPI_MISC_CXL_H - -#include -#include - - -struct cxl_ioctl_start_work { - __u64 flags; - __u64 work_element_descriptor; - __u64 amr; - __s16 num_interrupts; - __u16 tid; - __s32 reserved1; - __u64 reserved2; - __u64 reserved3; - __u64 reserved4; - __u64 reserved5; -}; - -#define CXL_START_WORK_AMR 0x0000000000000001ULL -#define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL -#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL -#define CXL_START_WORK_TID 0x0000000000000008ULL -#define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ - CXL_START_WORK_NUM_IRQS |\ - CXL_START_WORK_ERR_FF |\ - CXL_START_WORK_TID) - - -/* Possible modes that an afu can be in */ -#define CXL_MODE_DEDICATED 0x1 -#define CXL_MODE_DIRECTED 0x2 - -/* possible flags for the cxl_afu_id flags field */ -#define CXL_AFUID_FLAG_SLAVE 0x1 /* In directed-mode afu is in slave mode */ - -struct cxl_afu_id { - __u64 flags; /* One of CXL_AFUID_FLAG_X */ - __u32 card_id; - __u32 afu_offset; - __u32 afu_mode; /* one of the CXL_MODE_X */ - __u32 reserved1; - __u64 reserved2; - __u64 reserved3; - __u64 reserved4; - __u64 reserved5; - __u64 reserved6; -}; - -/* base adapter image header is included in the image */ -#define CXL_AI_NEED_HEADER 0x0000000000000001ULL -#define CXL_AI_ALL CXL_AI_NEED_HEADER - -#define CXL_AI_HEADER_SIZE 128 -#define CXL_AI_BUFFER_SIZE 4096 -#define CXL_AI_MAX_ENTRIES 256 -#define CXL_AI_MAX_CHUNK_SIZE (CXL_AI_BUFFER_SIZE * CXL_AI_MAX_ENTRIES) - -struct cxl_adapter_image { - __u64 flags; - __u64 data; - __u64 len_data; - __u64 len_image; - __u64 reserved1; - __u64 reserved2; - __u64 reserved3; - __u64 reserved4; -}; - -/* ioctl numbers */ -#define CXL_MAGIC 0xCA -/* AFU devices */ -#define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) -#define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) -#define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id) -/* adapter devices */ -#define CXL_IOCTL_DOWNLOAD_IMAGE _IOW(CXL_MAGIC, 0x0A, struct cxl_adapter_image) -#define CXL_IOCTL_VALIDATE_IMAGE _IOW(CXL_MAGIC, 0x0B, struct cxl_adapter_image) - -#define CXL_READ_MIN_SIZE 0x1000 /* 4K */ - -/* Events from read() */ -enum cxl_event_type { - CXL_EVENT_RESERVED = 0, - CXL_EVENT_AFU_INTERRUPT = 1, - CXL_EVENT_DATA_STORAGE = 2, - CXL_EVENT_AFU_ERROR = 3, - CXL_EVENT_AFU_DRIVER = 4, -}; - -struct cxl_event_header { - __u16 type; - __u16 size; - __u16 process_element; - __u16 reserved1; -}; - -struct cxl_event_afu_interrupt { - __u16 flags; - __u16 irq; /* Raised AFU interrupt number */ - __u32 reserved1; -}; - -struct cxl_event_data_storage { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 addr; - __u64 dsisr; - __u64 reserved3; -}; - -struct cxl_event_afu_error { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 error; -}; - -struct cxl_event_afu_driver_reserved { - /* - * Defines the buffer passed to the cxl driver by the AFU driver. - * - * This is not ABI since the event header.size passed to the user for - * existing events is set in the read call to sizeof(cxl_event_header) - * + sizeof(whatever event is being dispatched) and the user is already - * required to use a 4K buffer on the read call. - * - * Of course the contents will be ABI, but that's up the AFU driver. - */ - __u32 data_size; - __u8 data[]; -}; - -struct cxl_event { - struct cxl_event_header header; - union { - struct cxl_event_afu_interrupt irq; - struct cxl_event_data_storage fault; - struct cxl_event_afu_error afu_error; - struct cxl_event_afu_driver_reserved afu_driver_event; - }; -}; - -#endif /* _UAPI_MISC_CXL_H */ From e01968420a99ab34f607ad35111b9158caabc3db Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Mar 2025 07:07:35 +0100 Subject: [PATCH 0805/2157] MAINTAINERS: driver core: mark Rafael and Danilo as co-maintainers In talking it over with Rafael and Danilo, it makes more sense for everyone to be a maintainer here, to share the load where possible. Acked-by: Rafael J. Wysocki Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/2025031634-playing-lark-95f9@gregkh Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0427b32b3688..2f6e379547bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7101,8 +7101,8 @@ F: include/linux/component.h DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman -R: "Rafael J. Wysocki" -R: Danilo Krummrich +M: "Rafael J. Wysocki" +M: Danilo Krummrich S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git F: Documentation/core-api/kobject.rst From e22bbb8e97846bfb5a6942a2322f0237ff13df0f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Mar 2025 12:06:18 +0100 Subject: [PATCH 0806/2157] kbuild: link-vmlinux.sh: Make output file name configurable In order to introduce an intermediate, non-stripped vmlinux build that can be used by other build steps as an input, pass the output file name to link-vmlinux.sh via its command line. Signed-off-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- scripts/Makefile.vmlinux | 2 +- scripts/link-vmlinux.sh | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index fb79fd6b2465..487f0bf716ad 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -69,7 +69,7 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link cmd_link_vmlinux = \ - $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)"; \ + $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)" "$@"; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) targets += vmlinux diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 96ae0bb65308..b3d928925598 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -31,6 +31,7 @@ set -e LD="$1" KBUILD_LDFLAGS="$2" LDFLAGS_vmlinux="$3" +VMLINUX="$4" is_enabled() { grep -q "^$1=y" include/config/auto.conf @@ -283,23 +284,23 @@ if is_enabled CONFIG_VMLINUX_MAP; then generate_map=1 fi -vmlinux_link vmlinux +vmlinux_link "${VMLINUX}" # fill in BTF IDs if is_enabled CONFIG_DEBUG_INFO_BTF; then - info BTFIDS vmlinux + info BTFIDS "${VMLINUX}" RESOLVE_BTFIDS_ARGS="" if is_enabled CONFIG_WERROR; then RESOLVE_BTFIDS_ARGS=" --fatal_warnings " fi - ${RESOLVE_BTFIDS} ${RESOLVE_BTFIDS_ARGS} vmlinux + ${RESOLVE_BTFIDS} ${RESOLVE_BTFIDS_ARGS} "${VMLINUX}" fi -mksysmap vmlinux System.map +mksysmap "${VMLINUX}" System.map if is_enabled CONFIG_BUILDTIME_TABLE_SORT; then - info SORTTAB vmlinux - if ! sorttable vmlinux; then + info SORTTAB "${VMLINUX}" + if ! sorttable "${VMLINUX}"; then echo >&2 Failed to sort kernel tables exit 1 fi @@ -315,4 +316,4 @@ if is_enabled CONFIG_KALLSYMS; then fi # For fixdep -echo "vmlinux: $0" > .vmlinux.d +echo "${VMLINUX}: $0" > ".${VMLINUX}.d" From 9b400d17259b70d1d68585028e96b30152d0796a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Mar 2025 12:06:19 +0100 Subject: [PATCH 0807/2157] kbuild: Introduce Kconfig symbol for linking vmlinux with relocations Some architectures build vmlinux with static relocations preserved, but strip them again from the final vmlinux image. Arch specific tools consume these static relocations in order to construct relocation tables for KASLR. The fact that vmlinux is created, consumed and subsequently updated goes against the typical, declarative paradigm used by Make, which is based on rules and dependencies. So as a first step towards cleaning this up, introduce a Kconfig symbol to declare that the arch wants to consume the static relocations emitted into vmlinux. This will be wired up further in subsequent patches. Signed-off-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- Makefile | 4 ++++ arch/Kconfig | 7 +++++++ arch/mips/Kconfig | 1 + arch/mips/Makefile | 4 ---- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 2 +- arch/s390/Kconfig | 1 + arch/s390/Makefile | 2 +- arch/x86/Kconfig | 1 + arch/x86/Makefile | 6 ------ 10 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 30242e731a0d..0904b73872cb 100644 --- a/Makefile +++ b/Makefile @@ -1120,6 +1120,10 @@ ifdef CONFIG_LD_ORPHAN_WARN LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif +ifneq ($(CONFIG_ARCH_VMLINUX_NEEDS_RELOCS),) +LDFLAGS_vmlinux += --emit-relocs --discard-none +endif + # Align the bit size of userspace programs with the kernel KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) diff --git a/arch/Kconfig b/arch/Kconfig index b8a4ff365582..101a13fcde8e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1695,6 +1695,13 @@ config ARCH_HAS_KERNEL_FPU_SUPPORT Architectures that select this option can run floating-point code in the kernel, as described in Documentation/core-api/floating-point.rst. +config ARCH_VMLINUX_NEEDS_RELOCS + bool + help + Whether the architecture needs vmlinux to be built with static + relocations preserved. This is used by some architectures to + construct bespoke relocation tables for KASLR. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1924f2d83932..5aedbd7afadb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2617,6 +2617,7 @@ config RELOCATABLE CPU_MIPS32_R6 || CPU_MIPS64_R6 || \ CPU_P5600 || CAVIUM_OCTEON_SOC || \ CPU_LOONGSON64 + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. diff --git a/arch/mips/Makefile b/arch/mips/Makefile index be8cb44a89fd..d9057e29bc62 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -100,10 +100,6 @@ LDFLAGS_vmlinux += -G 0 -static -n -nostdlib KBUILD_AFLAGS_MODULE += -mlong-calls KBUILD_CFLAGS_MODULE += -mlong-calls -ifeq ($(CONFIG_RELOCATABLE),y) -LDFLAGS_vmlinux += --emit-relocs -endif - cflags-y += -ffreestanding cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..6f5800114416 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1077,6 +1077,7 @@ config RELOCATABLE bool "Build a relocatable kernel" depends on MMU && 64BIT && !XIP_KERNEL select MODULE_SECTIONS if MODULES + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 13fbc0f94238..6ef0d10e0c50 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -8,7 +8,7 @@ LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) - LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs + LDFLAGS_vmlinux += -shared -Bsymbolic -z notext KBUILD_CFLAGS += -fPIE endif ifeq ($(CONFIG_DYNAMIC_FTRACE),y) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..ea67b7317138 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -630,6 +630,7 @@ endchoice config RELOCATABLE def_bool y + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5fae311203c2..d5f4be440879 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -15,7 +15,7 @@ KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 KBUILD_CFLAGS += -fPIC -LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none +LDFLAGS_vmlinux := -no-pie extra_tools := relocs aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0e27ebd7e36a..57fc0e7635c9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2200,6 +2200,7 @@ config RANDOMIZE_BASE config X86_NEED_RELOCS def_bool y depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) + select ARCH_VMLINUX_NEEDS_RELOCS config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5b773b34768d..f65ed6dcd6fb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -251,12 +251,6 @@ endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) -ifdef CONFIG_X86_NEED_RELOCS -LDFLAGS_vmlinux := --emit-relocs --discard-none -else -LDFLAGS_vmlinux := -endif - # # The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to # the linker to force 2MB page size regardless of the default page size used From ac4f06789b4f9c17357e81e918879c6e2ffdd075 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Mar 2025 12:06:20 +0100 Subject: [PATCH 0808/2157] kbuild: Create intermediate vmlinux build with relocations preserved The imperative paradigm used to build vmlinux, extract some info from it or perform some checks on it, and subsequently modify it again goes against the declarative paradigm that is usually employed for defining make rules. In particular, the Makefile.postlink files that consume their input via an output rule result in some dodgy logic in the decompressor makefiles for RISC-V and x86, given that the vmlinux.relocs input file needed to generate the arch-specific relocation tables may not exist or be out of date, but cannot be constructed using the ordinary Make dependency based rules, because the info needs to be extracted while vmlinux is in its ephemeral, non-stripped form. So instead, for architectures that require the static relocations that are emitted into vmlinux when passing --emit-relocs to the linker, and are subsequently stripped out again, introduce an intermediate vmlinux target called vmlinux.unstripped, and organize the reset of the build logic accordingly: - vmlinux.unstripped is created only once, and not updated again - build rules under arch/*/boot can depend on vmlinux.unstripped without running the risk of the data disappearing or being out of date - the final vmlinux generated by the build is not bloated with static relocations that are never needed again after the build completes. Signed-off-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- .gitignore | 1 + Makefile | 2 +- arch/mips/Makefile.postlink | 2 +- arch/riscv/Makefile.postlink | 11 +---------- arch/riscv/boot/Makefile | 5 +---- arch/s390/Makefile.postlink | 4 +--- arch/x86/Makefile.postlink | 8 +++----- scripts/Makefile.lib | 3 --- scripts/Makefile.vmlinux | 28 +++++++++++++++++++++------- 9 files changed, 30 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 5937c74d3dc1..f2f63e47fb88 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,7 @@ modules.order /vmlinux.32 /vmlinux.map /vmlinux.symvers +/vmlinux.unstripped /vmlinux-gdb.py /vmlinuz /System.map diff --git a/Makefile b/Makefile index 0904b73872cb..22593a774cf6 100644 --- a/Makefile +++ b/Makefile @@ -1569,7 +1569,7 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ - modules.builtin.ranges vmlinux.o.map \ + modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ .builtin-dtbs-list .builtin-dtb.S diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 6cfdc149d3bc..ea0add7d56b2 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -22,7 +22,7 @@ quiet_cmd_relocs = RELOCS $@ # `@true` prevents complaint when there is nothing to be done -vmlinux: FORCE +vmlinux vmlinux.unstripped: FORCE @true ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y) $(call if_changed,ls3_llsc) diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index 6b0580949b6a..0e4cf8ad2f14 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -10,26 +10,17 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(srctree)/scripts/Makefile.lib quiet_cmd_relocs_check = CHKREL $@ cmd_relocs_check = \ $(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" -ifdef CONFIG_RELOCATABLE -quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs -cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs - -endif - # `@true` prevents complaint when there is nothing to be done -vmlinux: FORCE +vmlinux vmlinux.unstripped: FORCE @true ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) - $(call if_changed,cp_vmlinux_relocs) - $(call if_changed,strip_relocs) endif clean: diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index b25d524ce5eb..bfc3d0b75b9b 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -32,10 +32,7 @@ $(obj)/xipImage: vmlinux FORCE endif ifdef CONFIG_RELOCATABLE -vmlinux.relocs: vmlinux - @ (! [ -f vmlinux.relocs ] && echo "vmlinux.relocs can't be found, please remove vmlinux and try again") || true - -$(obj)/Image: vmlinux.relocs FORCE +$(obj)/Image: vmlinux.unstripped FORCE else $(obj)/Image: vmlinux FORCE endif diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index 1ae5478cd6ac..c2b737500a91 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -11,7 +11,6 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(srctree)/scripts/Makefile.lib CMD_RELOCS=arch/s390/tools/relocs OUT_RELOCS = arch/s390/boot @@ -20,9 +19,8 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -vmlinux: FORCE +vmlinux.unstripped: FORCE $(call cmd,relocs) - $(call cmd,strip_relocs) clean: @rm -f $(OUT_RELOCS)/relocs.S diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink index 8b8a68162c94..445fce66630f 100644 --- a/arch/x86/Makefile.postlink +++ b/arch/x86/Makefile.postlink @@ -11,23 +11,21 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(srctree)/scripts/Makefile.lib CMD_RELOCS = arch/x86/tools/relocs OUT_RELOCS = arch/x86/boot/compressed -quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs +quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/vmlinux.relocs cmd_relocs = \ mkdir -p $(OUT_RELOCS); \ - $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ + $(CMD_RELOCS) $@ > $(OUT_RELOCS)/vmlinux.relocs; \ $(CMD_RELOCS) --abs-relocs $@ # `@true` prevents complaint when there is nothing to be done -vmlinux: FORCE +vmlinux vmlinux.unstripped: FORCE @true ifeq ($(CONFIG_X86_NEED_RELOCS),y) $(call cmd,relocs) - $(call cmd,strip_relocs) endif clean: diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 47f56ef71937..d1856a70c919 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -371,9 +371,6 @@ quiet_cmd_ar = AR $@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ -quiet_cmd_strip_relocs = RSTRIP $@ -cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $@ - # Gzip # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 487f0bf716ad..b0a6cd5b818c 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -9,6 +9,20 @@ include $(srctree)/scripts/Makefile.lib targets := +ifdef CONFIG_ARCH_VMLINUX_NEEDS_RELOCS +vmlinux-final := vmlinux.unstripped + +quiet_cmd_strip_relocs = RSTRIP $@ + cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $< $@ + +vmlinux: $(vmlinux-final) FORCE + $(call if_changed,strip_relocs) + +targets += vmlinux +else +vmlinux-final := vmlinux +endif + %.o: %.c FORCE $(call if_changed_rule,cc_o_c) @@ -47,7 +61,7 @@ targets += .builtin-dtbs-list ifdef CONFIG_GENERIC_BUILTIN_DTB targets += .builtin-dtbs.S .builtin-dtbs.o -vmlinux: .builtin-dtbs.o +$(vmlinux-final): .builtin-dtbs.o endif # vmlinux @@ -55,11 +69,11 @@ endif ifdef CONFIG_MODULES targets += .vmlinux.export.o -vmlinux: .vmlinux.export.o +$(vmlinux-final): .vmlinux.export.o endif ifdef CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX -vmlinux: arch/$(SRCARCH)/tools/vmlinux.arch.o +$(vmlinux-final): arch/$(SRCARCH)/tools/vmlinux.arch.o arch/$(SRCARCH)/tools/vmlinux.arch.o: vmlinux.o FORCE $(Q)$(MAKE) $(build)=arch/$(SRCARCH)/tools $@ @@ -72,11 +86,11 @@ cmd_link_vmlinux = \ $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)" "$@"; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) -targets += vmlinux -vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +targets += $(vmlinux-final) +$(vmlinux-final): scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) ifdef CONFIG_DEBUG_INFO_BTF -vmlinux: $(RESOLVE_BTFIDS) +$(vmlinux-final): $(RESOLVE_BTFIDS) endif ifdef CONFIG_BUILDTIME_TABLE_SORT @@ -96,7 +110,7 @@ modules.builtin.ranges: $(srctree)/scripts/generate_builtin_ranges.awk \ modules.builtin vmlinux.map vmlinux.o.map FORCE $(call if_changed,modules_builtin_ranges) -vmlinux.map: vmlinux +vmlinux.map: $(vmlinux-final) @: endif From e6a03a666995a6b64cd8a28cb5e5b9c6a162444a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Mar 2025 12:06:21 +0100 Subject: [PATCH 0809/2157] x86: Get rid of Makefile.postlink Instead of generating the vmlinux.relocs file (needed by the decompressor build to construct the KASLR relocation tables) as a vmlinux postlink step, which is dubious because it depends on data that is stripped from vmlinux before the build completes, generate it from vmlinux.unstripped, which has been introduced specifically for this purpose. This ensures that each artifact is rebuilt as needed, rather than as a side effect of another build rule. This effectively reverts commit 9d9173e9ceb6 ("x86/build: Avoid relocation information in final vmlinux") Signed-off-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- arch/x86/Makefile.postlink | 38 ------------------------------- arch/x86/boot/compressed/Makefile | 9 +++++--- 2 files changed, 6 insertions(+), 41 deletions(-) delete mode 100644 arch/x86/Makefile.postlink diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink deleted file mode 100644 index 445fce66630f..000000000000 --- a/arch/x86/Makefile.postlink +++ /dev/null @@ -1,38 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# =========================================================================== -# Post-link x86 pass -# =========================================================================== -# -# 1. Separate relocations from vmlinux into vmlinux.relocs. -# 2. Strip relocations from vmlinux. - -PHONY := __archpost -__archpost: - --include include/config/auto.conf -include $(srctree)/scripts/Kbuild.include - -CMD_RELOCS = arch/x86/tools/relocs -OUT_RELOCS = arch/x86/boot/compressed -quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/vmlinux.relocs - cmd_relocs = \ - mkdir -p $(OUT_RELOCS); \ - $(CMD_RELOCS) $@ > $(OUT_RELOCS)/vmlinux.relocs; \ - $(CMD_RELOCS) --abs-relocs $@ - -# `@true` prevents complaint when there is nothing to be done - -vmlinux vmlinux.unstripped: FORCE - @true -ifeq ($(CONFIG_X86_NEED_RELOCS),y) - $(call cmd,relocs) -endif - -clean: - @rm -f $(OUT_RELOCS)/vmlinux.relocs - -PHONY += FORCE clean - -FORCE: - -.PHONY: $(PHONY) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 606c74f27459..5edee7a9786c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -117,9 +117,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs -# vmlinux.relocs is created by the vmlinux postlink step. -$(obj)/vmlinux.relocs: vmlinux - @true +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< + +$(obj)/vmlinux.relocs: vmlinux.unstripped FORCE + $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs From 82e7a5997170f105dc5452f83f349e6e625e61f5 Mon Sep 17 00:00:00 2001 From: Kefan Liu Date: Tue, 11 Mar 2025 23:45:35 +0800 Subject: [PATCH 0810/2157] Documentation/kbuild: Fix indentation in modules.rst example Correct the indentation in an example within the `modules.rst` file to improve readability. Signed-off-by: Kefan Liu Signed-off-by: Masahiro Yamada --- Documentation/kbuild/modules.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index a42f00d8cb90..d0703605bfa4 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -318,7 +318,7 @@ Several Subdirectories | |__ include | |__ hardwareif.h |__ include - |__ complex.h + |__ complex.h To build the module complex.ko, we then need the following kbuild file:: From be8f23cebdb9546beb30ad15ff59130b66c8f2ac Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:49:10 +0100 Subject: [PATCH 0811/2157] phy: qcom: uniphy-28lp: add COMMON_CLK dependency In configurations without CONFIG_COMMON_CLK, the driver fails to build: aarch64-linux-ld: drivers/phy/qualcomm/phy-qcom-uniphy-pcie-28lp.o: in function `qcom_uniphy_pcie_probe': phy-qcom-uniphy-pcie-28lp.c:(.text+0x200): undefined reference to `__clk_hw_register_fixed_rate' aarch64-linux-ld: phy-qcom-uniphy-pcie-28lp.c:(.text+0x238): undefined reference to `of_clk_hw_simple_get' phy-qcom-uniphy-pcie-28lp.c:(.text+0x238): dangerous relocation: unsupported relocation aarch64-linux-ld: phy-qcom-uniphy-pcie-28lp.c:(.text+0x240): undefined reference to `of_clk_hw_simple_get' aarch64-linux-ld: phy-qcom-uniphy-pcie-28lp.c:(.text+0x248): undefined reference to `devm_of_clk_add_hw_provider' Add that as a Kconfig dependencies. Fixes: 74badb8b0b14 ("phy: qcom: Introduce PCIe UNIPHY 28LP driver") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250314154915.4074980-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index a6b71fda1b9c..c1e0a11ddd76 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -157,6 +157,7 @@ config PHY_QCOM_M31_USB config PHY_QCOM_UNIPHY_PCIE_28LP bool "PCIE UNIPHY 28LP PHY driver" depends on ARCH_QCOM + depends on COMMON_CLK depends on HAS_IOMEM depends on OF select GENERIC_PHY From 301587cf4e771aca8c5ee05a6ba8d7d8f548e478 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 13 Mar 2025 14:40:31 +0100 Subject: [PATCH 0812/2157] dt-bindings: phy: Add Rockchip MIPI C-/D-PHY schema Add dt-binding schema for the MIPI C-/D-PHY found on Rockchip RK3588 SoCs. Tested-by: Daniel Semkowicz Tested-by: Sebastian Reichel Tested-by: Quentin Schulz Reviewed-by: Sebastian Reichel Acked-by: Conor Dooley Reviewed-by: Krzysztof Kozlowski Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250313134035.278133-2-heiko@sntech.de Signed-off-by: Vinod Koul --- .../phy/rockchip,rk3588-mipi-dcphy.yaml | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml new file mode 100644 index 000000000000..c8ff5ba22a86 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/rockchip,rk3588-mipi-dcphy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip MIPI D-/C-PHY with Samsung IP block + +maintainers: + - Guochun Huang + - Heiko Stuebner + +properties: + compatible: + enum: + - rockchip,rk3576-mipi-dcphy + - rockchip,rk3588-mipi-dcphy + + reg: + maxItems: 1 + + "#phy-cells": + const: 1 + description: | + Argument is mode to operate in. Supported modes are: + - PHY_TYPE_DPHY + - PHY_TYPE_CPHY + See include/dt-bindings/phy/phy.h for constants. + + clocks: + maxItems: 2 + + clock-names: + items: + - const: pclk + - const: ref + + resets: + maxItems: 4 + + reset-names: + items: + - const: m_phy + - const: apb + - const: grf + - const: s_phy + + rockchip,grf: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon managing the 'mipi dcphy general register files'. + +required: + - compatible + - reg + - clocks + - clock-names + - resets + - reset-names + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + phy@feda0000 { + compatible = "rockchip,rk3588-mipi-dcphy"; + reg = <0x0 0xfeda0000 0x0 0x10000>; + clocks = <&cru PCLK_MIPI_DCPHY0>, + <&cru CLK_USBDPPHY_MIPIDCPPHY_REF>; + clock-names = "pclk", "ref"; + resets = <&cru SRST_M_MIPI_DCPHY0>, + <&cru SRST_P_MIPI_DCPHY0>, + <&cru SRST_P_MIPI_DCPHY0_GRF>, + <&cru SRST_S_MIPI_DCPHY0>; + reset-names = "m_phy", "apb", "grf", "s_phy"; + rockchip,grf = <&mipidcphy0_grf>; + #phy-cells = <1>; + }; + }; From b2a1a2ae7818c9d8da12bf7b1983c8b9f5fb712b Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 13 Mar 2025 14:40:32 +0100 Subject: [PATCH 0813/2157] phy: rockchip: Add Samsung MIPI D-/C-PHY driver Add driver for the MIPI D-/C-PHY block based around a Samsung IP-block that is for example needed to drive a MIPI DSI output on rk3588. Right now only the D-PHY portion is implemented, with the C-PHY part needing separate work. Tested-by: Daniel Semkowicz Tested-by: Sebastian Reichel Tested-by: Quentin Schulz Reviewed-by: Sebastian Reichel Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250313134035.278133-3-heiko@sntech.de Signed-off-by: Vinod Koul --- drivers/phy/rockchip/Kconfig | 12 + drivers/phy/rockchip/Makefile | 1 + .../phy/rockchip/phy-rockchip-samsung-dcphy.c | 1719 +++++++++++++++++ 3 files changed, 1732 insertions(+) create mode 100644 drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig index 2f7a05f21dc5..2bfb42996503 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig @@ -83,6 +83,18 @@ config PHY_ROCKCHIP_PCIE help Enable this to support the Rockchip PCIe PHY. +config PHY_ROCKCHIP_SAMSUNG_DCPHY + tristate "Rockchip Samsung MIPI DCPHY driver" + depends on (ARCH_ROCKCHIP || COMPILE_TEST) + select GENERIC_PHY + select GENERIC_PHY_MIPI_DPHY + help + Enable this to support the Rockchip MIPI DCPHY with + Samsung IP block. + + To compile this driver as a module, choose M here: the module + will be called phy-rockchip-samsung-dcphy + config PHY_ROCKCHIP_SAMSUNG_HDPTX tristate "Rockchip Samsung HDMI/eDP Combo PHY driver" depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile index 010a824e32ce..117aaffd037d 100644 --- a/drivers/phy/rockchip/Makefile +++ b/drivers/phy/rockchip/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI) += phy-rockchip-inno-hdmi.o obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2) += phy-rockchip-inno-usb2.o obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY) += phy-rockchip-naneng-combphy.o obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o +obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_DCPHY) += phy-rockchip-samsung-dcphy.o obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX) += phy-rockchip-samsung-hdptx.o obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3) += phy-rockchip-snps-pcie3.o obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c new file mode 100644 index 000000000000..08c78c1bafc9 --- /dev/null +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c @@ -0,0 +1,1719 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2025 Rockchip Electronics Co.Ltd + * Author: + * Guochun Huang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FIELD_PREP_HIWORD(_mask, _val) \ + ( \ + FIELD_PREP((_mask), (_val)) | \ + ((_mask) << 16) \ + ) + +#define BIAS_CON0 0x0000 +#define I_RES_CNTL_MASK GENMASK(6, 4) +#define I_RES_CNTL(x) FIELD_PREP(I_RES_CNTL_MASK, x) +#define I_RES_059_2UA I_RES_CNTL(0) +#define I_RES_100_2UA I_RES_CNTL(1) +#define I_RES_094_2UA I_RES_CNTL(2) +#define I_RES_113_8UA I_RES_CNTL(3) +#define I_RES_089_7UA I_RES_CNTL(4) +#define I_RES_111_8UA I_RES_CNTL(5) +#define I_RES_108_2UA I_RES_CNTL(6) +#define I_RES_120_8UA I_RES_CNTL(7) +#define I_DEV_SEL_MASK GENMASK(1, 0) +#define I_DEV_SEL(x) FIELD_PREP(I_DEV_SEL_MASK, x) +#define I_DEV_DIV_6 I_DEV_SEL(0) +#define I_DEV_DIV_12 I_DEV_SEL(1) +#define I_DEV_DIV_20 I_DEV_SEL(2) +#define I_DEV_DIV_40 I_DEV_SEL(3) + +#define BIAS_CON1 0x0004 +#define I_VBG_SEL_MASK GENMASK(9, 8) +#define I_VBG_SEL(x) FIELD_PREP(I_VBG_SEL_MASK, x) +#define I_VBG_SEL_780MV I_VBG_SEL(0) +#define I_VBG_SEL_820MV I_VBG_SEL(1) +#define I_VBG_SEL_860MV I_VBG_SEL(2) +#define I_VBG_SEL_900MV I_VBG_SEL(3) +#define I_BGR_VREF_SEL_MASK GENMASK(5, 4) +#define I_BGR_VREF_SEL(x) FIELD_PREP(I_BGR_VREF_SEL_MASK, x) +#define I_BGR_VREF_810MV I_BGR_VREF_SEL(0) +#define I_BGR_VREF_820MV I_BGR_VREF_SEL(1) +#define I_BGR_VREF_830MV I_BGR_VREF_SEL(2) +#define I_BGR_VREF_840MV I_BGR_VREF_SEL(3) +#define I_LADDER_SEL_MASK GENMASK(2, 0) +#define I_LADDER_SEL(x) FIELD_PREP(I_LADDER_SEL_MASK, x) +#define I_LADDER_1_00V I_LADDER_SEL(0) +#define I_LADDER_0_96V I_LADDER_SEL(1) +#define I_LADDER_0_92V I_LADDER_SEL(2) +#define I_LADDER_0_88V I_LADDER_SEL(3) +#define I_LADDER_0_84V I_LADDER_SEL(4) +#define I_LADDER_0_80V I_LADDER_SEL(5) +#define I_LADDER_0_76V I_LADDER_SEL(6) +#define I_LADDER_0_72V I_LADDER_SEL(7) + +/* + * Voltage corrections around reference voltages + * The selection between the 400-based or 200-based values for REG_400M + * is done by the hw depending on I_MUX below being 400MV or 200MV. + */ +#define BIAS_CON2 0x0008 +#define REG_325M_MASK GENMASK(14, 12) +#define REG_325M(x) FIELD_PREP(REG_325M_MASK, x) +#define REG_325M_295MV REG_325M(0) +#define REG_325M_305MV REG_325M(1) +#define REG_325M_315MV REG_325M(2) +#define REG_325M_325MV REG_325M(3) +#define REG_325M_335MV REG_325M(4) +#define REG_325M_345MV REG_325M(5) +#define REG_325M_355MV REG_325M(6) +#define REG_325M_365MV REG_325M(7) +#define REG_LP_400M_MASK GENMASK(10, 8) +#define REG_LP_400M(x) FIELD_PREP(REG_LP_400M_MASK, x) +#define REG_LP_400M_380MV REG_LP_400M(0) +#define REG_LP_400M_390MV REG_LP_400M(1) +#define REG_LP_400M_400MV REG_LP_400M(2) +#define REG_LP_400M_410MV REG_LP_400M(3) +#define REG_LP_400M_420MV REG_LP_400M(4) +#define REG_LP_400M_430MV REG_LP_400M(5) +#define REG_LP_400M_440MV REG_LP_400M(6) +#define REG_LP_400M_450MV REG_LP_400M(7) +#define REG_400M_MASK GENMASK(6, 4) +#define REG_400M(x) FIELD_PREP(REG_400M_MASK, x) +#define REG_400M_380MV REG_400M(0) +#define REG_400M_390MV REG_400M(1) +#define REG_400M_400MV REG_400M(2) +#define REG_400M_410MV REG_400M(3) +#define REG_400M_420MV REG_400M(4) +#define REG_400M_430MV REG_400M(5) +#define REG_400M_440MV REG_400M(6) +#define REG_400M_450MV REG_400M(7) +#define REG_400M_230MV REG_400M(0) +#define REG_400M_220MV REG_400M(1) +#define REG_400M_210MV REG_400M(2) +#define REG_400M_200MV REG_400M(3) +#define REG_400M_190MV REG_400M(4) +#define REG_400M_180MV REG_400M(5) +#define REG_400M_170MV REG_400M(6) +#define REG_400M_160MV REG_400M(7) +#define REG_645M_MASK GENMASK(2, 0) +#define REG_645M(x) FIELD_PREP(REG_645M_MASK, x) +#define REG_645M_605MV REG_645M(0) +#define REG_645M_625MV REG_645M(1) +#define REG_645M_635MV REG_645M(2) +#define REG_645M_645MV REG_645M(3) +#define REG_645M_655MV REG_645M(4) +#define REG_645M_665MV REG_645M(5) +#define REG_645M_685MV REG_645M(6) +#define REG_645M_725MV REG_645M(7) + +#define BIAS_CON4 0x0010 +#define I_MUX_SEL_MASK GENMASK(6, 5) +#define I_MUX_SEL(x) FIELD_PREP(I_MUX_SEL_MASK, x) +#define I_MUX_400MV I_MUX_SEL(0) +#define I_MUX_200MV I_MUX_SEL(1) +#define I_MUX_530MV I_MUX_SEL(2) + +#define PLL_CON0 0x0100 +#define PLL_EN BIT(12) +#define S_MASK GENMASK(10, 8) +#define S(x) FIELD_PREP(S_MASK, x) +#define P_MASK GENMASK(5, 0) +#define P(x) FIELD_PREP(P_MASK, x) +#define PLL_CON1 0x0104 +#define PLL_CON2 0x0108 +#define M_MASK GENMASK(9, 0) +#define M(x) FIELD_PREP(M_MASK, x) +#define PLL_CON3 0x010c +#define MRR_MASK GENMASK(13, 8) +#define MRR(x) FIELD_PREP(MRR_MASK, x) +#define MFR_MASK GENMASK(7, 0) +#define MFR(x) FIELD_PREP(MFR_MASK, x) +#define PLL_CON4 0x0110 +#define SSCG_EN BIT(11) +#define PLL_CON5 0x0114 +#define RESET_N_SEL BIT(10) +#define PLL_ENABLE_SEL BIT(8) +#define PLL_CON6 0x0118 +#define PLL_CON7 0x011c +#define PLL_LOCK_CNT(x) FIELD_PREP(GENMASK(15, 0), x) +#define PLL_CON8 0x0120 +#define PLL_STB_CNT(x) FIELD_PREP(GENMASK(15, 0), x) +#define PLL_STAT0 0x0140 +#define PLL_LOCK BIT(0) + +#define DPHY_MC_GNR_CON0 0x0300 +#define PHY_READY BIT(1) +#define PHY_ENABLE BIT(0) +#define DPHY_MC_GNR_CON1 0x0304 +#define T_PHY_READY(x) FIELD_PREP(GENMASK(15, 0), x) +#define DPHY_MC_ANA_CON0 0x0308 +#define EDGE_CON(x) FIELD_PREP(GENMASK(14, 12), x) +#define EDGE_CON_DIR(x) FIELD_PREP(BIT(9), x) +#define EDGE_CON_EN BIT(8) +#define RES_UP(x) FIELD_PREP(GENMASK(7, 4), x) +#define RES_DN(x) FIELD_PREP(GENMASK(3, 0), x) +#define DPHY_MC_ANA_CON1 0x030c +#define DPHY_MC_ANA_CON2 0x0310 +#define HS_VREG_AMP_ICON(x) FIELD_PREP(GENMASK(1, 0), x) +#define DPHY_MC_TIME_CON0 0x0330 +#define HSTX_CLK_SEL BIT(12) +#define T_LPX(x) FIELD_PREP(GENMASK(11, 4), x) +#define DPHY_MC_TIME_CON1 0x0334 +#define T_CLK_ZERO(x) FIELD_PREP(GENMASK(15, 8), x) +#define T_CLK_PREPARE(x) FIELD_PREP(GENMASK(7, 0), x) +#define DPHY_MC_TIME_CON2 0x0338 +#define T_HS_EXIT(x) FIELD_PREP(GENMASK(15, 8), x) +#define T_CLK_TRAIL(x) FIELD_PREP(GENMASK(7, 0), x) +#define DPHY_MC_TIME_CON3 0x033c +#define T_CLK_POST(x) FIELD_PREP(GENMASK(7, 0), x) +#define DPHY_MC_TIME_CON4 0x0340 +#define T_ULPS_EXIT(x) FIELD_PREP(GENMASK(9, 0), x) +#define DPHY_MC_DESKEW_CON0 0x0350 +#define SKEW_CAL_RUN_TIME(x) FIELD_PREP(GENMASK(15, 12), x) + +#define SKEW_CAL_INIT_RUN_TIME(x) FIELD_PREP(GENMASK(11, 8), x) +#define SKEW_CAL_INIT_WAIT_TIME(x) FIELD_PREP(GENMASK(7, 4), x) +#define SKEW_CAL_EN BIT(0) + +#define COMBO_MD0_GNR_CON0 0x0400 +#define COMBO_MD0_GNR_CON1 0x0404 +#define COMBO_MD0_ANA_CON0 0x0408 +#define COMBO_MD0_ANA_CON1 0x040c +#define COMBO_MD0_ANA_CON2 0x0410 + +#define COMBO_MD0_TIME_CON0 0x0430 +#define COMBO_MD0_TIME_CON1 0x0434 +#define COMBO_MD0_TIME_CON2 0x0438 +#define COMBO_MD0_TIME_CON3 0x043c +#define COMBO_MD0_TIME_CON4 0x0440 +#define COMBO_MD0_DATA_CON0 0x0444 + +#define COMBO_MD1_GNR_CON0 0x0500 +#define COMBO_MD1_GNR_CON1 0x0504 +#define COMBO_MD1_ANA_CON0 0x0508 +#define COMBO_MD1_ANA_CON1 0x050c +#define COMBO_MD1_ANA_CON2 0x0510 +#define COMBO_MD1_TIME_CON0 0x0530 +#define COMBO_MD1_TIME_CON1 0x0534 +#define COMBO_MD1_TIME_CON2 0x0538 +#define COMBO_MD1_TIME_CON3 0x053c +#define COMBO_MD1_TIME_CON4 0x0540 +#define COMBO_MD1_DATA_CON0 0x0544 + +#define COMBO_MD2_GNR_CON0 0x0600 +#define COMBO_MD2_GNR_CON1 0x0604 +#define COMBO_MD2_ANA_CON0 0X0608 +#define COMBO_MD2_ANA_CON1 0X060c +#define COMBO_MD2_ANA_CON2 0X0610 +#define COMBO_MD2_TIME_CON0 0x0630 +#define COMBO_MD2_TIME_CON1 0x0634 +#define COMBO_MD2_TIME_CON2 0x0638 +#define COMBO_MD2_TIME_CON3 0x063c +#define COMBO_MD2_TIME_CON4 0x0640 +#define COMBO_MD2_DATA_CON0 0x0644 + +#define DPHY_MD3_GNR_CON0 0x0700 +#define DPHY_MD3_GNR_CON1 0x0704 +#define DPHY_MD3_ANA_CON0 0X0708 +#define DPHY_MD3_ANA_CON1 0X070c +#define DPHY_MD3_ANA_CON2 0X0710 +#define DPHY_MD3_TIME_CON0 0x0730 +#define DPHY_MD3_TIME_CON1 0x0734 +#define DPHY_MD3_TIME_CON2 0x0738 +#define DPHY_MD3_TIME_CON3 0x073c +#define DPHY_MD3_TIME_CON4 0x0740 +#define DPHY_MD3_DATA_CON0 0x0744 + +#define T_LP_EXIT_SKEW(x) FIELD_PREP(GENMASK(3, 2), x) +#define T_LP_ENTRY_SKEW(x) FIELD_PREP(GENMASK(1, 0), x) +#define T_HS_ZERO(x) FIELD_PREP(GENMASK(15, 8), x) +#define T_HS_PREPARE(x) FIELD_PREP(GENMASK(7, 0), x) +#define T_HS_EXIT(x) FIELD_PREP(GENMASK(15, 8), x) +#define T_HS_TRAIL(x) FIELD_PREP(GENMASK(7, 0), x) +#define T_TA_GET(x) FIELD_PREP(GENMASK(7, 4), x) +#define T_TA_GO(x) FIELD_PREP(GENMASK(3, 0), x) + +/* MIPI_CDPHY_GRF registers */ +#define MIPI_DCPHY_GRF_CON0 0x0000 +#define S_CPHY_MODE FIELD_PREP_HIWORD(BIT(3), 1) +#define M_CPHY_MODE FIELD_PREP_HIWORD(BIT(0), 1) + +enum hs_drv_res_ohm { + STRENGTH_30_OHM = 0x8, + STRENGTH_31_2_OHM, + STRENGTH_32_5_OHM, + STRENGTH_34_OHM, + STRENGTH_35_5_OHM, + STRENGTH_37_OHM, + STRENGTH_39_OHM, + STRENGTH_41_OHM, + STRENGTH_43_OHM = 0x0, + STRENGTH_46_OHM, + STRENGTH_49_OHM, + STRENGTH_52_OHM, + STRENGTH_56_OHM, + STRENGTH_60_OHM, + STRENGTH_66_OHM, + STRENGTH_73_OHM, +}; + +struct hs_drv_res_cfg { + enum hs_drv_res_ohm clk_hs_drv_up_ohm; + enum hs_drv_res_ohm clk_hs_drv_down_ohm; + enum hs_drv_res_ohm data_hs_drv_up_ohm; + enum hs_drv_res_ohm data_hs_drv_down_ohm; +}; + +struct samsung_mipi_dcphy_plat_data { + const struct hs_drv_res_cfg *dphy_hs_drv_res_cfg; + u32 dphy_tx_max_lane_kbps; +}; + +struct samsung_mipi_dcphy { + struct device *dev; + struct clk *ref_clk; + struct clk *pclk; + struct regmap *regmap; + struct regmap *grf_regmap; + struct reset_control *m_phy_rst; + struct reset_control *s_phy_rst; + struct reset_control *apb_rst; + struct reset_control *grf_apb_rst; + unsigned int lanes; + struct phy *phy; + u8 type; + + const struct samsung_mipi_dcphy_plat_data *pdata; + struct { + unsigned long long rate; + u8 prediv; + u16 fbdiv; + long dsm; + u8 scaler; + + bool ssc_en; + u8 mfr; + u8 mrr; + } pll; +}; + +struct samsung_mipi_dphy_timing { + unsigned int max_lane_mbps; + u8 clk_prepare; + u8 clk_zero; + u8 clk_post; + u8 clk_trail_eot; + u8 hs_prepare; + u8 hs_zero; + u8 hs_trail_eot; + u8 lpx; + u8 hs_exit; + u8 hs_settle; +}; + +/* + * Timing values taken from rk3588 vendor kernel. + * Not documented in hw documentation. + */ +static const +struct samsung_mipi_dphy_timing samsung_mipi_dphy_timing_table[] = { + {6500, 32, 117, 31, 28, 30, 56, 27, 24, 44, 37}, + {6490, 32, 116, 31, 28, 30, 56, 27, 24, 44, 37}, + {6480, 32, 116, 31, 28, 30, 56, 27, 24, 44, 37}, + {6470, 32, 116, 31, 28, 30, 56, 27, 24, 44, 37}, + {6460, 32, 116, 31, 28, 30, 56, 27, 24, 44, 37}, + {6450, 32, 115, 31, 28, 30, 56, 27, 24, 44, 37}, + {6440, 32, 115, 31, 28, 30, 56, 27, 24, 44, 37}, + {6430, 31, 116, 31, 28, 30, 55, 27, 24, 44, 37}, + {6420, 31, 116, 31, 28, 30, 55, 27, 24, 44, 37}, + {6410, 31, 116, 31, 27, 30, 55, 27, 24, 44, 37}, + {6400, 31, 115, 30, 27, 30, 55, 27, 23, 43, 36}, + {6390, 31, 115, 30, 27, 30, 55, 27, 23, 43, 36}, + {6380, 31, 115, 30, 27, 30, 55, 27, 23, 43, 36}, + {6370, 31, 115, 30, 27, 30, 55, 26, 23, 43, 36}, + {6360, 31, 114, 30, 27, 30, 54, 26, 23, 43, 36}, + {6350, 31, 114, 30, 27, 30, 54, 26, 23, 43, 36}, + {6340, 31, 114, 30, 27, 30, 54, 26, 23, 43, 36}, + {6330, 31, 114, 30, 27, 30, 54, 26, 23, 43, 36}, + {6320, 31, 113, 30, 27, 30, 54, 26, 23, 43, 36}, + {6310, 31, 113, 30, 27, 30, 54, 26, 23, 43, 36}, + {6300, 31, 113, 30, 27, 30, 54, 26, 23, 43, 36}, + {6290, 31, 113, 30, 27, 29, 54, 26, 23, 43, 36}, + {6280, 31, 112, 30, 27, 29, 54, 26, 23, 43, 36}, + {6270, 31, 112, 30, 27, 29, 54, 26, 23, 43, 36}, + {6260, 31, 112, 30, 27, 29, 54, 26, 23, 43, 36}, + {6250, 31, 112, 30, 27, 29, 54, 26, 23, 42, 36}, + {6240, 30, 113, 30, 27, 29, 54, 26, 23, 42, 36}, + {6230, 30, 112, 30, 27, 29, 54, 26, 23, 42, 35}, + {6220, 30, 112, 30, 27, 29, 53, 26, 23, 42, 35}, + {6210, 30, 112, 30, 27, 29, 53, 26, 23, 42, 35}, + {6200, 30, 112, 29, 27, 29, 53, 26, 23, 42, 35}, + {6190, 30, 111, 29, 27, 29, 53, 26, 23, 42, 35}, + {6180, 30, 111, 29, 27, 29, 53, 26, 23, 42, 35}, + {6170, 30, 111, 29, 26, 29, 53, 26, 23, 42, 35}, + {6160, 30, 111, 29, 26, 29, 53, 26, 23, 42, 35}, + {6150, 30, 110, 29, 26, 29, 53, 26, 23, 42, 35}, + {6140, 30, 110, 29, 26, 29, 52, 26, 23, 42, 35}, + {6130, 30, 110, 29, 26, 29, 52, 25, 22, 42, 35}, + {6120, 30, 110, 29, 26, 29, 52, 25, 22, 42, 35}, + {6110, 30, 110, 29, 26, 29, 52, 25, 22, 42, 35}, + {6100, 30, 109, 29, 26, 29, 52, 25, 22, 41, 35}, + {6090, 30, 109, 29, 26, 29, 52, 25, 22, 41, 35}, + {6080, 30, 109, 29, 26, 28, 53, 25, 22, 41, 35}, + {6070, 30, 109, 29, 26, 28, 52, 25, 22, 41, 34}, + {6060, 30, 108, 29, 26, 28, 52, 25, 22, 41, 34}, + {6050, 30, 108, 29, 26, 28, 52, 25, 22, 41, 34}, + {6040, 29, 109, 29, 26, 28, 52, 25, 22, 41, 34}, + {6030, 29, 109, 29, 26, 28, 52, 25, 22, 41, 34}, + {6020, 29, 108, 29, 26, 28, 52, 25, 22, 41, 34}, + {6010, 29, 108, 29, 26, 28, 52, 25, 22, 41, 34}, + {6000, 29, 108, 28, 26, 28, 51, 25, 22, 41, 34}, + {5990, 29, 108, 28, 26, 28, 51, 25, 22, 41, 34}, + {5980, 29, 107, 28, 26, 28, 51, 25, 22, 41, 34}, + {5970, 29, 107, 28, 26, 28, 51, 25, 22, 41, 34}, + {5960, 29, 107, 28, 26, 28, 51, 25, 22, 40, 34}, + {5950, 29, 107, 28, 26, 28, 51, 25, 22, 40, 34}, + {5940, 29, 107, 28, 25, 28, 51, 25, 22, 40, 34}, + {5930, 29, 106, 28, 25, 28, 50, 25, 22, 40, 34}, + {5920, 29, 106, 28, 25, 28, 50, 25, 22, 40, 34}, + {5910, 29, 106, 28, 25, 28, 50, 25, 22, 40, 34}, + {5900, 29, 106, 28, 25, 28, 50, 24, 22, 40, 33}, + {5890, 29, 105, 28, 25, 28, 50, 24, 22, 40, 33}, + {5880, 29, 105, 28, 25, 28, 50, 24, 22, 40, 33}, + {5870, 29, 105, 28, 25, 27, 51, 24, 22, 40, 33}, + {5860, 29, 105, 28, 25, 27, 51, 24, 21, 40, 33}, + {5850, 29, 104, 28, 25, 27, 50, 24, 21, 40, 33}, + {5840, 28, 105, 28, 25, 27, 50, 24, 21, 40, 33}, + {5830, 28, 105, 28, 25, 27, 50, 24, 21, 40, 33}, + {5820, 28, 105, 28, 25, 27, 50, 24, 21, 40, 33}, + {5810, 28, 104, 28, 25, 27, 50, 24, 21, 39, 33}, + {5800, 28, 104, 27, 25, 27, 50, 24, 21, 39, 33}, + {5790, 28, 104, 27, 25, 27, 50, 24, 21, 39, 33}, + {5780, 28, 104, 27, 25, 27, 49, 24, 21, 39, 33}, + {5770, 28, 104, 27, 25, 27, 49, 24, 21, 39, 33}, + {5760, 28, 103, 27, 25, 27, 49, 24, 21, 39, 33}, + {5750, 28, 103, 27, 25, 27, 49, 24, 21, 39, 33}, + {5740, 28, 103, 27, 25, 27, 49, 24, 21, 39, 33}, + {5730, 28, 103, 27, 25, 27, 49, 24, 21, 39, 32}, + {5720, 28, 102, 27, 25, 27, 49, 24, 21, 39, 32}, + {5710, 28, 102, 27, 25, 27, 48, 24, 21, 39, 32}, + {5700, 28, 102, 27, 24, 27, 48, 24, 21, 39, 32}, + {5690, 28, 102, 27, 24, 27, 48, 24, 21, 39, 32}, + {5680, 28, 101, 27, 24, 27, 48, 24, 21, 39, 32}, + {5670, 28, 101, 27, 24, 27, 48, 23, 21, 38, 32}, + {5660, 28, 101, 27, 24, 26, 49, 23, 21, 38, 32}, + {5650, 28, 101, 27, 24, 26, 49, 23, 21, 38, 32}, + {5640, 27, 101, 27, 24, 26, 48, 23, 21, 38, 32}, + {5630, 27, 101, 27, 24, 26, 48, 23, 21, 38, 32}, + {5620, 27, 101, 27, 24, 26, 48, 23, 21, 38, 32}, + {5610, 27, 101, 27, 24, 26, 48, 23, 21, 38, 32}, + {5600, 27, 101, 26, 24, 26, 48, 23, 20, 38, 32}, + {5590, 27, 100, 26, 24, 26, 48, 23, 20, 38, 32}, + {5580, 27, 100, 26, 24, 26, 48, 23, 20, 38, 32}, + {5570, 27, 100, 26, 24, 26, 48, 23, 20, 38, 31}, + {5560, 27, 100, 26, 24, 26, 47, 23, 20, 38, 31}, + {5550, 27, 99, 26, 24, 26, 47, 23, 20, 38, 31}, + {5540, 27, 99, 26, 24, 26, 47, 23, 20, 38, 31}, + {5530, 27, 99, 26, 24, 26, 47, 23, 20, 38, 31}, + {5520, 27, 99, 26, 24, 26, 47, 23, 20, 37, 31}, + {5510, 27, 98, 26, 24, 26, 47, 23, 20, 37, 31}, + {5500, 27, 98, 26, 24, 26, 47, 23, 20, 37, 31}, + {5490, 27, 98, 26, 24, 26, 46, 23, 20, 37, 31}, + {5480, 27, 98, 26, 24, 26, 46, 23, 20, 37, 31}, + {5470, 27, 97, 26, 23, 26, 46, 23, 20, 37, 31}, + {5460, 27, 97, 26, 23, 26, 46, 23, 20, 37, 31}, + {5450, 27, 97, 26, 23, 25, 47, 23, 20, 37, 31}, + {5440, 26, 98, 26, 23, 25, 47, 23, 20, 37, 31}, + {5430, 26, 98, 26, 23, 25, 47, 22, 20, 37, 31}, + {5420, 26, 97, 26, 23, 25, 46, 22, 20, 37, 31}, + {5410, 26, 97, 26, 23, 25, 46, 22, 20, 37, 31}, + {5400, 26, 97, 25, 23, 25, 46, 22, 20, 37, 30}, + {5390, 26, 97, 25, 23, 25, 46, 22, 20, 37, 30}, + {5380, 26, 96, 25, 23, 25, 46, 22, 20, 36, 30}, + {5370, 26, 96, 25, 23, 25, 46, 22, 20, 36, 30}, + {5360, 26, 96, 25, 23, 25, 46, 22, 20, 36, 30}, + {5350, 26, 96, 25, 23, 25, 46, 22, 20, 36, 30}, + {5340, 26, 95, 25, 23, 25, 45, 22, 20, 36, 30}, + {5330, 26, 95, 25, 23, 25, 45, 22, 19, 36, 30}, + {5320, 26, 95, 25, 23, 25, 45, 22, 19, 36, 30}, + {5310, 26, 95, 25, 23, 25, 45, 22, 19, 36, 30}, + {5300, 26, 95, 25, 23, 25, 45, 22, 19, 36, 30}, + {5290, 26, 94, 25, 23, 25, 45, 22, 19, 36, 30}, + {5280, 26, 94, 25, 23, 25, 45, 22, 19, 36, 30}, + {5270, 26, 94, 25, 23, 25, 44, 22, 19, 36, 30}, + {5260, 26, 94, 25, 23, 25, 44, 22, 19, 36, 30}, + {5250, 25, 94, 25, 23, 24, 45, 22, 19, 36, 30}, + {5240, 25, 94, 25, 23, 24, 45, 22, 19, 36, 29}, + {5230, 25, 94, 25, 22, 24, 45, 22, 19, 35, 29}, + {5220, 25, 94, 25, 22, 24, 45, 22, 19, 35, 29}, + {5210, 25, 93, 25, 22, 24, 45, 22, 19, 35, 29}, + {5200, 25, 93, 24, 22, 24, 44, 21, 19, 35, 29}, + {5190, 25, 93, 24, 22, 24, 44, 21, 19, 35, 29}, + {5180, 25, 93, 24, 22, 24, 44, 21, 19, 35, 29}, + {5170, 25, 92, 24, 22, 24, 44, 21, 19, 35, 29}, + {5160, 25, 92, 24, 22, 24, 44, 21, 19, 35, 29}, + {5150, 25, 92, 24, 22, 24, 44, 21, 19, 35, 29}, + {5140, 25, 92, 24, 22, 24, 44, 21, 19, 35, 29}, + {5130, 25, 92, 24, 22, 24, 43, 21, 19, 35, 29}, + {5120, 25, 91, 24, 22, 24, 43, 21, 19, 35, 29}, + {5110, 25, 91, 24, 22, 24, 43, 21, 19, 35, 29}, + {5100, 25, 91, 24, 22, 24, 43, 21, 19, 35, 29}, + {5090, 25, 91, 24, 22, 24, 43, 21, 19, 34, 29}, + {5080, 25, 90, 24, 22, 24, 43, 21, 19, 34, 29}, + {5070, 25, 90, 24, 22, 24, 43, 21, 19, 34, 28}, + {5060, 25, 90, 24, 22, 24, 43, 21, 18, 34, 28}, + {5050, 24, 91, 24, 22, 24, 42, 21, 18, 34, 28}, + {5040, 24, 90, 24, 22, 23, 43, 21, 18, 34, 28}, + {5030, 24, 90, 24, 22, 23, 43, 21, 18, 34, 28}, + {5020, 24, 90, 24, 22, 23, 43, 21, 18, 34, 28}, + {5010, 24, 90, 24, 22, 23, 43, 21, 18, 34, 28}, + {5000, 24, 89, 23, 21, 23, 43, 21, 18, 34, 28}, + {4990, 24, 89, 23, 21, 23, 43, 21, 18, 34, 28}, + {4980, 24, 89, 23, 21, 23, 42, 21, 18, 34, 28}, + {4970, 24, 89, 23, 21, 23, 42, 21, 18, 34, 28}, + {4960, 24, 89, 23, 21, 23, 42, 20, 18, 34, 28}, + {4950, 24, 88, 23, 21, 23, 42, 20, 18, 34, 28}, + {4940, 24, 88, 23, 21, 23, 42, 20, 18, 33, 28}, + {4930, 24, 88, 23, 21, 23, 42, 20, 18, 33, 28}, + {4920, 24, 88, 23, 21, 23, 42, 20, 18, 33, 28}, + {4910, 24, 87, 23, 21, 23, 41, 20, 18, 33, 28}, + {4900, 24, 87, 23, 21, 23, 41, 20, 18, 33, 27}, + {4890, 24, 87, 23, 21, 23, 41, 20, 18, 33, 27}, + {4880, 24, 87, 23, 21, 23, 41, 20, 18, 33, 27}, + {4870, 24, 86, 23, 21, 23, 41, 20, 18, 33, 27}, + {4860, 24, 86, 23, 21, 23, 41, 20, 18, 33, 27}, + {4850, 23, 87, 23, 21, 23, 41, 20, 18, 33, 27}, + {4840, 23, 87, 23, 21, 23, 40, 20, 18, 33, 27}, + {4830, 23, 86, 23, 21, 22, 41, 20, 18, 33, 27}, + {4820, 23, 86, 23, 21, 22, 41, 20, 18, 33, 27}, + {4810, 23, 86, 23, 21, 22, 41, 20, 18, 33, 27}, + {4800, 23, 86, 22, 21, 22, 41, 20, 17, 32, 27}, + {4790, 23, 86, 22, 21, 22, 41, 20, 17, 32, 27}, + {4780, 23, 85, 22, 21, 22, 41, 20, 17, 32, 27}, + {4770, 23, 85, 22, 21, 22, 41, 20, 17, 32, 27}, + {4760, 23, 85, 22, 20, 22, 40, 20, 17, 32, 27}, + {4750, 23, 85, 22, 20, 22, 40, 20, 17, 32, 27}, + {4740, 23, 84, 22, 20, 22, 40, 20, 17, 32, 26}, + {4730, 23, 84, 22, 20, 22, 40, 19, 17, 32, 26}, + {4720, 23, 84, 22, 20, 22, 40, 19, 17, 32, 26}, + {4710, 23, 84, 22, 20, 22, 40, 19, 17, 32, 26}, + {4700, 23, 83, 22, 20, 22, 40, 19, 17, 32, 26}, + {4690, 23, 83, 22, 20, 22, 39, 19, 17, 32, 26}, + {4680, 23, 83, 22, 20, 22, 39, 19, 17, 32, 26}, + {4670, 23, 83, 22, 20, 22, 39, 19, 17, 32, 26}, + {4660, 23, 82, 22, 20, 22, 39, 19, 17, 32, 26}, + {4650, 22, 83, 22, 20, 22, 39, 19, 17, 31, 26}, + {4640, 22, 83, 22, 20, 22, 39, 19, 17, 31, 26}, + {4630, 22, 83, 22, 20, 22, 39, 19, 17, 31, 26}, + {4620, 22, 83, 22, 20, 21, 39, 19, 17, 31, 26}, + {4610, 22, 82, 22, 20, 21, 39, 19, 17, 31, 26}, + {4600, 22, 82, 21, 20, 21, 39, 19, 17, 31, 26}, + {4590, 22, 82, 21, 20, 21, 39, 19, 17, 31, 26}, + {4580, 22, 82, 21, 20, 21, 39, 19, 17, 31, 26}, + {4570, 22, 81, 21, 20, 21, 39, 19, 17, 31, 25}, + {4560, 22, 81, 21, 20, 21, 39, 19, 17, 31, 25}, + {4550, 22, 81, 21, 20, 21, 38, 19, 17, 31, 25}, + {4540, 22, 81, 21, 20, 21, 38, 19, 17, 31, 25}, + {4530, 22, 80, 21, 19, 21, 38, 19, 16, 31, 25}, + {4520, 22, 80, 21, 19, 21, 38, 19, 16, 31, 25}, + {4510, 22, 80, 21, 19, 21, 38, 19, 16, 31, 25}, + {4500, 22, 80, 21, 19, 21, 38, 19, 16, 30, 25}, + {4490, 22, 80, 21, 19, 21, 38, 18, 16, 30, 25}, + {4480, 22, 79, 21, 19, 21, 38, 18, 16, 30, 25}, + {4470, 22, 79, 21, 19, 21, 37, 18, 16, 30, 25}, + {4460, 22, 79, 21, 19, 21, 37, 18, 16, 30, 25}, + {4450, 21, 80, 21, 19, 21, 37, 18, 16, 30, 25}, + {4440, 21, 79, 21, 19, 21, 37, 18, 16, 30, 25}, + {4430, 21, 79, 21, 19, 21, 37, 18, 16, 30, 25}, + {4420, 21, 79, 21, 19, 21, 37, 18, 16, 30, 25}, + {4410, 21, 79, 21, 19, 20, 38, 18, 16, 30, 25}, + {4400, 21, 78, 20, 19, 20, 37, 18, 16, 30, 24}, + {4390, 21, 78, 20, 19, 20, 37, 18, 16, 30, 24}, + {4380, 21, 78, 20, 19, 20, 37, 18, 16, 30, 24}, + {4370, 21, 78, 20, 19, 20, 37, 18, 16, 30, 24}, + {4360, 21, 77, 20, 19, 20, 37, 18, 16, 29, 24}, + {4350, 21, 77, 20, 19, 20, 37, 18, 16, 29, 24}, + {4340, 21, 77, 20, 19, 20, 37, 18, 16, 29, 24}, + {4330, 21, 77, 20, 19, 20, 36, 18, 16, 29, 24}, + {4320, 21, 77, 20, 19, 20, 36, 18, 16, 29, 24}, + {4310, 21, 76, 20, 19, 20, 36, 18, 16, 29, 24}, + {4300, 21, 76, 20, 18, 20, 36, 18, 16, 29, 24}, + {4290, 21, 76, 20, 18, 20, 36, 18, 16, 29, 24}, + {4280, 21, 76, 20, 18, 20, 36, 18, 16, 29, 24}, + {4270, 21, 75, 20, 18, 20, 36, 18, 16, 29, 24}, + {4260, 21, 75, 20, 18, 20, 35, 17, 15, 29, 24}, + {4250, 20, 76, 20, 18, 20, 35, 17, 15, 29, 24}, + {4240, 20, 76, 20, 18, 20, 35, 17, 15, 29, 23}, + {4230, 20, 75, 20, 18, 20, 35, 17, 15, 29, 23}, + {4220, 20, 75, 20, 18, 20, 35, 17, 15, 29, 23}, + {4210, 20, 75, 20, 18, 20, 35, 17, 15, 28, 23}, + {4200, 20, 75, 19, 18, 19, 36, 17, 15, 28, 23}, + {4190, 20, 74, 19, 18, 19, 36, 17, 15, 28, 23}, + {4180, 20, 74, 19, 18, 19, 35, 17, 15, 28, 23}, + {4170, 20, 74, 19, 18, 19, 35, 17, 15, 28, 23}, + {4160, 20, 74, 19, 18, 19, 35, 17, 15, 28, 23}, + {4150, 20, 74, 19, 18, 19, 35, 17, 15, 28, 23}, + {4140, 20, 73, 19, 18, 19, 35, 17, 15, 28, 23}, + {4130, 20, 73, 19, 18, 19, 35, 17, 15, 28, 23}, + {4120, 20, 73, 19, 18, 19, 35, 17, 15, 28, 23}, + {4110, 20, 73, 19, 18, 19, 34, 17, 15, 28, 23}, + {4100, 20, 72, 19, 18, 19, 34, 17, 15, 28, 23}, + {4090, 20, 72, 19, 18, 19, 34, 17, 15, 28, 23}, + {4080, 20, 72, 19, 18, 19, 34, 17, 15, 28, 23}, + {4070, 20, 72, 19, 18, 19, 34, 17, 15, 27, 22}, + {4060, 19, 72, 19, 17, 19, 34, 17, 15, 27, 22}, + {4050, 19, 72, 19, 17, 19, 34, 17, 15, 27, 22}, + {4040, 19, 72, 19, 17, 19, 33, 17, 15, 27, 22}, + {4030, 19, 72, 19, 17, 19, 33, 17, 15, 27, 22}, + {4020, 19, 71, 19, 17, 19, 33, 16, 15, 27, 22}, + {4010, 19, 71, 19, 17, 19, 33, 16, 15, 27, 22}, + {4000, 19, 71, 18, 17, 19, 33, 16, 14, 27, 22}, + {3990, 19, 71, 18, 17, 18, 34, 16, 14, 27, 22}, + {3980, 19, 71, 18, 17, 18, 34, 16, 14, 27, 22}, + {3970, 19, 70, 18, 17, 18, 33, 16, 14, 27, 22}, + {3960, 19, 70, 18, 17, 18, 33, 16, 14, 27, 22}, + {3950, 19, 70, 18, 17, 18, 33, 16, 14, 27, 22}, + {3940, 19, 70, 18, 17, 18, 33, 16, 14, 27, 22}, + {3930, 19, 69, 18, 17, 18, 33, 16, 14, 27, 22}, + {3920, 19, 69, 18, 17, 18, 33, 16, 14, 26, 22}, + {3910, 19, 69, 18, 17, 18, 33, 16, 14, 26, 22}, + {3900, 19, 69, 18, 17, 18, 33, 16, 14, 26, 21}, + {3890, 19, 68, 18, 17, 18, 32, 16, 14, 26, 21}, + {3880, 19, 68, 18, 17, 18, 32, 16, 14, 26, 21}, + {3870, 19, 68, 18, 17, 18, 32, 16, 14, 26, 21}, + {3860, 18, 69, 18, 17, 18, 32, 16, 14, 26, 21}, + {3850, 18, 68, 18, 17, 18, 32, 16, 14, 26, 21}, + {3840, 18, 68, 18, 17, 18, 32, 16, 14, 26, 21}, + {3830, 18, 68, 18, 16, 18, 32, 16, 14, 26, 21}, + {3820, 18, 68, 18, 16, 18, 31, 16, 14, 26, 21}, + {3810, 18, 68, 18, 16, 18, 31, 16, 14, 26, 21}, + {3800, 18, 67, 17, 16, 18, 31, 16, 14, 26, 21}, + {3790, 18, 67, 17, 16, 17, 32, 15, 14, 26, 21}, + {3780, 18, 67, 17, 16, 17, 32, 15, 14, 25, 21}, + {3770, 18, 67, 17, 16, 17, 32, 15, 14, 25, 21}, + {3760, 18, 66, 17, 16, 17, 32, 15, 14, 25, 21}, + {3750, 18, 66, 17, 16, 17, 31, 15, 14, 25, 21}, + {3740, 18, 66, 17, 16, 17, 31, 15, 14, 25, 20}, + {3730, 18, 66, 17, 16, 17, 31, 15, 13, 25, 20}, + {3720, 18, 65, 17, 16, 17, 31, 15, 13, 25, 20}, + {3710, 18, 65, 17, 16, 17, 31, 15, 13, 25, 20}, + {3700, 18, 65, 17, 16, 17, 31, 15, 13, 25, 20}, + {3690, 18, 65, 17, 16, 17, 31, 15, 13, 25, 20}, + {3680, 18, 64, 17, 16, 17, 31, 15, 13, 25, 20}, + {3670, 18, 64, 17, 16, 17, 30, 15, 13, 25, 20}, + {3660, 17, 65, 17, 16, 17, 30, 15, 13, 25, 20}, + {3650, 17, 65, 17, 16, 17, 30, 15, 13, 25, 20}, + {3640, 17, 65, 17, 16, 17, 30, 15, 13, 25, 20}, + {3630, 17, 64, 17, 16, 17, 30, 15, 13, 24, 20}, + {3620, 17, 64, 17, 16, 17, 30, 15, 13, 24, 20}, + {3610, 17, 64, 17, 16, 17, 30, 15, 13, 24, 20}, + {3600, 17, 64, 16, 16, 17, 29, 15, 13, 24, 20}, + {3590, 17, 63, 16, 15, 17, 29, 15, 13, 24, 20}, + {3580, 17, 63, 16, 15, 16, 30, 15, 13, 24, 20}, + {3570, 17, 63, 16, 15, 16, 30, 15, 13, 24, 19}, + {3560, 17, 63, 16, 15, 16, 30, 14, 13, 24, 19}, + {3550, 17, 62, 16, 15, 16, 30, 14, 13, 24, 19}, + {3540, 17, 62, 16, 15, 16, 30, 14, 13, 24, 19}, + {3530, 17, 62, 16, 15, 16, 29, 14, 13, 24, 19}, + {3520, 17, 62, 16, 15, 16, 29, 14, 13, 24, 19}, + {3510, 17, 62, 16, 15, 16, 29, 14, 13, 24, 19}, + {3500, 17, 61, 16, 15, 16, 29, 14, 13, 24, 19}, + {3490, 17, 61, 16, 15, 16, 29, 14, 13, 23, 19}, + {3480, 17, 61, 16, 15, 16, 29, 14, 13, 23, 19}, + {3470, 17, 61, 16, 15, 16, 29, 14, 13, 23, 19}, + {3460, 16, 61, 16, 15, 16, 28, 14, 12, 23, 19}, + {3450, 16, 61, 16, 15, 16, 28, 14, 12, 23, 19}, + {3440, 16, 61, 16, 15, 16, 28, 14, 12, 23, 19}, + {3430, 16, 61, 16, 15, 16, 28, 14, 12, 23, 19}, + {3420, 16, 60, 16, 15, 16, 28, 14, 12, 23, 19}, + {3410, 16, 60, 16, 15, 16, 28, 14, 12, 23, 18}, + {3400, 16, 60, 15, 15, 16, 28, 14, 12, 23, 18}, + {3390, 16, 60, 15, 15, 16, 28, 14, 12, 23, 18}, + {3380, 16, 59, 15, 15, 16, 27, 14, 12, 23, 18}, + {3370, 16, 59, 15, 15, 15, 28, 14, 12, 23, 18}, + {3360, 16, 59, 15, 14, 15, 28, 14, 12, 23, 18}, + {3350, 16, 59, 15, 14, 15, 28, 14, 12, 23, 18}, + {3340, 16, 59, 15, 14, 15, 28, 14, 12, 22, 18}, + {3330, 16, 58, 15, 14, 15, 28, 14, 12, 22, 18}, + {3320, 16, 58, 15, 14, 15, 28, 13, 12, 22, 18}, + {3310, 16, 58, 15, 14, 15, 27, 13, 12, 22, 18}, + {3300, 16, 58, 15, 14, 15, 27, 13, 12, 22, 18}, + {3290, 16, 57, 15, 14, 15, 27, 13, 12, 22, 18}, + {3280, 16, 57, 15, 14, 15, 27, 13, 12, 22, 18}, + {3270, 16, 57, 15, 14, 15, 27, 13, 12, 22, 18}, + {3260, 15, 58, 15, 14, 15, 27, 13, 12, 22, 18}, + {3250, 15, 57, 15, 14, 15, 27, 13, 12, 22, 18}, + {3240, 15, 57, 15, 14, 15, 26, 13, 12, 22, 17}, + {3230, 15, 57, 15, 14, 15, 26, 13, 12, 22, 17}, + {3220, 15, 57, 15, 14, 15, 26, 13, 12, 22, 17}, + {3210, 15, 56, 15, 14, 15, 26, 13, 12, 22, 17}, + {3200, 15, 56, 14, 14, 15, 26, 13, 11, 21, 17}, + {3190, 15, 56, 14, 14, 15, 26, 13, 11, 21, 17}, + {3180, 15, 56, 14, 14, 15, 26, 13, 11, 21, 17}, + {3170, 15, 56, 14, 14, 15, 25, 13, 11, 21, 17}, + {3160, 15, 55, 14, 14, 14, 26, 13, 11, 21, 17}, + {3150, 15, 55, 14, 14, 14, 26, 13, 11, 21, 17}, + {3140, 15, 55, 14, 14, 14, 26, 13, 11, 21, 17}, + {3130, 15, 55, 14, 14, 14, 26, 13, 11, 21, 17}, + {3120, 15, 54, 14, 13, 14, 26, 13, 11, 21, 17}, + {3110, 15, 54, 14, 13, 14, 26, 13, 11, 21, 17}, + {3100, 15, 54, 14, 13, 14, 26, 13, 11, 21, 17}, + {3090, 15, 54, 14, 13, 14, 25, 12, 11, 21, 17}, + {3080, 15, 53, 14, 13, 14, 25, 12, 11, 21, 17}, + {3070, 14, 54, 14, 13, 14, 25, 12, 11, 21, 16}, + {3060, 14, 54, 14, 13, 14, 25, 12, 11, 21, 16}, + {3050, 14, 54, 14, 13, 14, 25, 12, 11, 20, 16}, + {3040, 14, 53, 14, 13, 14, 25, 12, 11, 20, 16}, + {3030, 14, 53, 14, 13, 14, 25, 12, 11, 20, 16}, + {3020, 14, 53, 14, 13, 14, 24, 12, 11, 20, 16}, + {3010, 14, 53, 14, 13, 14, 24, 12, 11, 20, 16}, + {3000, 14, 53, 13, 13, 14, 24, 12, 11, 20, 16}, + {2990, 14, 52, 13, 13, 14, 24, 12, 11, 20, 16}, + {2980, 14, 52, 13, 13, 14, 24, 12, 11, 20, 16}, + {2970, 14, 52, 13, 13, 14, 24, 12, 11, 20, 16}, + {2960, 14, 52, 13, 13, 14, 24, 12, 11, 20, 16}, + {2950, 14, 51, 13, 13, 13, 24, 12, 11, 20, 16}, + {2940, 14, 51, 13, 13, 13, 24, 12, 11, 20, 16}, + {2930, 14, 51, 13, 13, 13, 24, 12, 10, 20, 16}, + {2920, 14, 51, 13, 13, 13, 24, 12, 10, 20, 16}, + {2910, 14, 50, 13, 13, 13, 24, 12, 10, 20, 15}, + {2900, 14, 50, 13, 13, 13, 24, 12, 10, 19, 15}, + {2890, 14, 50, 13, 12, 13, 24, 12, 10, 19, 15}, + {2880, 14, 50, 13, 12, 13, 23, 12, 10, 19, 15}, + {2870, 13, 50, 13, 12, 13, 23, 12, 10, 19, 15}, + {2860, 13, 50, 13, 12, 13, 23, 12, 10, 19, 15}, + {2850, 13, 50, 13, 12, 13, 23, 11, 10, 19, 15}, + {2840, 13, 50, 13, 12, 13, 23, 11, 10, 19, 15}, + {2830, 13, 50, 13, 12, 13, 23, 11, 10, 19, 15}, + {2820, 13, 49, 13, 12, 13, 23, 11, 10, 19, 15}, + {2810, 13, 49, 13, 12, 13, 23, 11, 10, 19, 15}, + {2800, 13, 49, 12, 12, 13, 22, 11, 10, 19, 15}, + {2790, 13, 49, 12, 12, 13, 22, 11, 10, 19, 15}, + {2780, 13, 48, 12, 12, 13, 22, 11, 10, 19, 15}, + {2770, 13, 48, 12, 12, 13, 22, 11, 10, 19, 15}, + {2760, 13, 48, 12, 12, 13, 22, 11, 10, 18, 15}, + {2750, 13, 48, 12, 12, 13, 22, 11, 10, 18, 15}, + {2740, 13, 47, 12, 12, 12, 23, 11, 10, 18, 14}, + {2730, 13, 47, 12, 12, 12, 22, 11, 10, 18, 14}, + {2720, 13, 47, 12, 12, 12, 22, 11, 10, 18, 14}, + {2710, 13, 47, 12, 12, 12, 22, 11, 10, 18, 14}, + {2700, 13, 47, 12, 12, 12, 22, 11, 10, 18, 14}, + {2690, 13, 46, 12, 12, 12, 22, 11, 10, 18, 14}, + {2680, 13, 46, 12, 12, 12, 22, 11, 10, 18, 14}, + {2670, 12, 47, 12, 12, 12, 22, 11, 10, 18, 14}, + {2660, 12, 47, 12, 12, 12, 21, 11, 9, 18, 14}, + {2650, 12, 46, 12, 11, 12, 21, 11, 9, 18, 14}, + {2640, 12, 46, 12, 11, 12, 21, 11, 9, 18, 14}, + {2630, 12, 46, 12, 11, 12, 21, 11, 9, 18, 14}, + {2620, 12, 46, 12, 11, 12, 21, 10, 9, 18, 14}, + {2610, 12, 45, 12, 11, 12, 21, 10, 9, 17, 14}, + {2600, 12, 45, 11, 11, 12, 21, 10, 9, 17, 14}, + {2590, 12, 45, 11, 11, 12, 20, 10, 9, 17, 14}, + {2580, 12, 45, 11, 11, 12, 20, 10, 9, 17, 14}, + {2570, 12, 44, 11, 11, 12, 20, 10, 9, 17, 13}, + {2560, 12, 44, 11, 11, 12, 20, 10, 9, 17, 13}, + {2550, 12, 44, 11, 11, 12, 20, 10, 9, 17, 13}, + {2540, 12, 44, 11, 11, 11, 21, 10, 9, 17, 13}, + {2530, 12, 44, 11, 11, 11, 21, 10, 9, 17, 13}, + {2520, 12, 43, 11, 11, 11, 21, 10, 9, 17, 13}, + {2510, 12, 43, 11, 11, 11, 20, 10, 9, 17, 13}, + {2500, 12, 43, 11, 11, 11, 20, 10, 9, 17, 13}, + {2490, 12, 43, 11, 11, 11, 20, 10, 9, 17, 13}, + {2480, 12, 42, 11, 11, 11, 20, 10, 9, 17, 13}, + {2470, 11, 43, 11, 11, 11, 20, 10, 9, 16, 13}, + {2460, 11, 43, 11, 11, 11, 20, 10, 9, 16, 13}, + {2450, 11, 43, 11, 11, 11, 20, 10, 9, 16, 13}, + {2440, 11, 42, 11, 11, 11, 19, 10, 9, 16, 13}, + {2430, 11, 42, 11, 11, 11, 19, 10, 9, 16, 13}, + {2420, 11, 42, 11, 10, 11, 19, 10, 9, 16, 13}, + {2410, 11, 42, 11, 10, 11, 19, 10, 9, 16, 12}, + {2400, 11, 41, 10, 10, 11, 19, 10, 8, 16, 12}, + {2390, 11, 41, 10, 10, 11, 19, 10, 8, 16, 12}, + {2380, 11, 41, 10, 10, 11, 19, 9, 8, 16, 12}, + {2370, 11, 41, 10, 10, 11, 18, 9, 8, 16, 12}, + {2360, 11, 41, 10, 10, 11, 18, 9, 8, 16, 12}, + {2350, 11, 40, 10, 10, 11, 18, 9, 8, 16, 12}, + {2340, 11, 40, 10, 10, 11, 18, 9, 8, 16, 12}, + {2330, 11, 40, 10, 10, 10, 19, 9, 8, 16, 12}, + {2320, 11, 40, 10, 10, 10, 19, 9, 8, 15, 12}, + {2310, 11, 39, 10, 10, 10, 19, 9, 8, 15, 12}, + {2300, 11, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2290, 11, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2280, 11, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2270, 10, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2260, 10, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2250, 10, 39, 10, 10, 10, 18, 9, 8, 15, 12}, + {2240, 10, 39, 10, 10, 10, 18, 9, 8, 15, 11}, + {2230, 10, 38, 10, 10, 10, 18, 9, 8, 15, 11}, + {2220, 10, 38, 10, 10, 10, 17, 9, 8, 15, 11}, + {2210, 10, 38, 10, 10, 10, 17, 9, 8, 15, 11}, + {2200, 10, 38, 9, 10, 10, 17, 9, 8, 15, 11}, + {2190, 10, 38, 9, 9, 10, 17, 9, 8, 15, 11}, + {2180, 10, 37, 9, 9, 10, 17, 9, 8, 14, 11}, + {2170, 10, 37, 9, 9, 10, 17, 9, 8, 14, 11}, + {2160, 10, 37, 9, 9, 10, 17, 9, 8, 14, 11}, + {2150, 10, 37, 9, 9, 10, 16, 8, 8, 14, 11}, + {2140, 10, 36, 9, 9, 10, 16, 8, 8, 14, 11}, + {2130, 10, 36, 9, 9, 10, 16, 8, 7, 14, 11}, + {2120, 10, 36, 9, 9, 9, 17, 8, 7, 14, 11}, + {2110, 10, 36, 9, 9, 9, 17, 8, 7, 14, 11}, + {2100, 10, 35, 9, 9, 9, 17, 8, 7, 14, 11}, + {2090, 10, 35, 9, 9, 9, 17, 8, 7, 14, 11}, + {2080, 9, 36, 9, 9, 9, 16, 8, 7, 14, 11}, + {2070, 9, 36, 9, 9, 9, 16, 8, 7, 14, 10}, + {2060, 9, 35, 9, 9, 9, 16, 8, 7, 14, 10}, + {2050, 9, 35, 9, 9, 9, 16, 8, 7, 14, 10}, + {2040, 9, 35, 9, 9, 9, 16, 8, 7, 14, 10}, + {2030, 9, 35, 9, 9, 9, 16, 8, 7, 13, 10}, + {2020, 9, 35, 9, 9, 9, 16, 8, 7, 13, 10}, + {2010, 9, 34, 9, 9, 9, 15, 8, 7, 13, 10}, + {2000, 9, 34, 8, 9, 9, 15, 8, 7, 13, 10}, + {1990, 9, 34, 8, 9, 9, 15, 8, 7, 13, 10}, + {1980, 9, 34, 8, 9, 9, 15, 8, 7, 13, 10}, + {1970, 9, 33, 8, 9, 9, 15, 8, 7, 13, 10}, + {1960, 9, 33, 8, 9, 9, 15, 8, 7, 13, 10}, + {1950, 9, 33, 8, 8, 9, 15, 8, 7, 13, 10}, + {1940, 9, 33, 8, 8, 9, 15, 8, 7, 13, 10}, + {1930, 9, 32, 8, 8, 9, 14, 8, 7, 13, 10}, + {1920, 9, 32, 8, 8, 9, 14, 8, 7, 13, 10}, + {1910, 9, 32, 8, 8, 8, 15, 7, 7, 13, 9}, + {1900, 9, 32, 8, 8, 8, 15, 7, 7, 13, 9}, + {1890, 9, 31, 8, 8, 8, 15, 7, 7, 12, 9}, + {1880, 8, 32, 8, 8, 8, 15, 7, 7, 12, 9}, + {1870, 8, 32, 8, 8, 8, 15, 7, 7, 12, 9}, + {1860, 8, 32, 8, 8, 8, 14, 7, 6, 12, 9}, + {1850, 8, 32, 8, 8, 8, 14, 7, 6, 12, 9}, + {1840, 8, 31, 8, 8, 8, 14, 7, 6, 12, 9}, + {1830, 8, 31, 8, 8, 8, 14, 7, 6, 12, 9}, + {1820, 8, 31, 8, 8, 8, 14, 7, 6, 12, 9}, + {1810, 8, 31, 8, 8, 8, 14, 7, 6, 12, 9}, + {1800, 8, 30, 7, 8, 8, 14, 7, 6, 12, 9}, + {1790, 8, 30, 7, 8, 8, 13, 7, 6, 12, 9}, + {1780, 8, 30, 7, 8, 8, 13, 7, 6, 12, 9}, + {1770, 8, 30, 7, 8, 8, 13, 7, 6, 12, 9}, + {1760, 8, 29, 7, 8, 8, 13, 7, 6, 12, 9}, + {1750, 8, 29, 7, 8, 8, 13, 7, 6, 12, 9}, + {1740, 8, 29, 7, 8, 8, 13, 7, 6, 11, 8}, + {1730, 8, 29, 7, 8, 8, 13, 7, 6, 11, 8}, + {1720, 8, 29, 7, 7, 8, 13, 7, 6, 11, 8}, + {1710, 8, 28, 7, 7, 8, 12, 7, 6, 11, 8}, + {1700, 8, 28, 7, 7, 7, 13, 7, 6, 11, 8}, + {1690, 8, 28, 7, 7, 7, 13, 7, 6, 11, 8}, + {1680, 7, 29, 7, 7, 7, 13, 6, 6, 11, 8}, + {1670, 7, 28, 7, 7, 7, 13, 6, 6, 11, 8}, + {1660, 7, 28, 7, 7, 7, 13, 6, 6, 11, 8}, + {1650, 7, 28, 7, 7, 7, 13, 6, 6, 11, 8}, + {1640, 7, 28, 7, 7, 7, 12, 6, 6, 11, 8}, + {1630, 7, 27, 7, 7, 7, 12, 6, 6, 11, 8}, + {1620, 7, 27, 7, 7, 7, 12, 6, 6, 11, 8}, + {1610, 7, 27, 7, 7, 7, 12, 6, 6, 11, 8}, + {1600, 7, 27, 6, 7, 7, 12, 6, 5, 10, 8}, + {1590, 7, 26, 6, 7, 7, 12, 6, 5, 10, 8}, + {1580, 7, 26, 6, 7, 7, 12, 6, 5, 10, 7}, + {1570, 7, 26, 6, 7, 7, 11, 6, 5, 10, 7}, + {1560, 7, 26, 6, 7, 7, 11, 6, 5, 10, 7}, + {1550, 7, 26, 6, 7, 7, 11, 6, 5, 10, 7}, + {1540, 7, 25, 6, 7, 7, 11, 6, 5, 10, 7}, + {1530, 7, 25, 6, 7, 7, 11, 6, 5, 10, 7}, + {1520, 7, 25, 6, 7, 7, 11, 6, 5, 10, 7}, + {1510, 7, 25, 6, 7, 7, 11, 6, 5, 10, 7}, + {1500, 7, 24, 6, 7, 7, 10, 6, 5, 10, 7}, + {1490, 59, 25, 6, 77, 59, 10, 70, 44, 9, 73}, + {1480, 59, 24, 6, 76, 58, 10, 70, 44, 9, 73}, + {1470, 58, 24, 6, 76, 58, 10, 69, 44, 9, 72}, + {1460, 58, 24, 6, 76, 58, 10, 69, 43, 9, 72}, + {1450, 58, 24, 6, 75, 57, 10, 68, 43, 9, 71}, + {1440, 57, 24, 6, 75, 57, 10, 68, 43, 9, 71}, + {1430, 57, 23, 6, 75, 57, 10, 68, 43, 8, 70}, + {1420, 56, 23, 6, 74, 57, 9, 67, 43, 8, 70}, + {1410, 56, 23, 6, 74, 57, 9, 67, 43, 8, 69}, + {1400, 56, 23, 5, 74, 55, 9, 67, 41, 8, 69}, + {1390, 55, 23, 5, 73, 55, 9, 66, 41, 8, 68}, + {1380, 55, 23, 5, 73, 54, 9, 66, 41, 8, 68}, + {1370, 54, 22, 5, 72, 54, 9, 66, 41, 8, 67}, + {1360, 54, 22, 5, 72, 54, 9, 65, 40, 8, 67}, + {1350, 54, 22, 5, 72, 53, 9, 65, 40, 8, 66}, + {1340, 53, 22, 5, 71, 53, 9, 65, 40, 8, 66}, + {1330, 53, 22, 5, 71, 53, 9, 64, 39, 8, 65}, + {1320, 52, 22, 5, 71, 53, 8, 64, 40, 8, 65}, + {1310, 52, 21, 5, 70, 53, 8, 64, 40, 8, 64}, + {1300, 51, 21, 5, 70, 51, 8, 63, 38, 8, 64}, + {1290, 51, 21, 5, 70, 51, 8, 63, 38, 7, 64}, + {1280, 51, 21, 5, 69, 51, 8, 63, 38, 7, 63}, + {1270, 50, 21, 5, 69, 50, 8, 62, 38, 7, 63}, + {1260, 50, 20, 5, 69, 50, 8, 62, 37, 7, 62}, + {1250, 49, 20, 5, 68, 49, 8, 62, 37, 7, 62}, + {1240, 49, 20, 5, 68, 49, 8, 61, 37, 7, 61}, + {1230, 49, 20, 5, 68, 49, 8, 61, 36, 7, 61}, + {1220, 48, 20, 5, 67, 48, 8, 61, 36, 7, 60}, + {1210, 48, 19, 5, 67, 48, 7, 60, 36, 7, 60}, + {1200, 49, 19, 4, 67, 49, 7, 60, 36, 7, 59}, + {1190, 48, 19, 4, 66, 48, 7, 60, 36, 7, 59}, + {1180, 48, 19, 4, 66, 48, 7, 59, 36, 7, 58}, + {1170, 46, 19, 4, 66, 46, 7, 59, 35, 7, 58}, + {1160, 46, 18, 4, 65, 46, 7, 59, 34, 7, 57}, + {1150, 45, 18, 4, 65, 46, 7, 58, 34, 7, 57}, + {1140, 45, 18, 4, 65, 45, 7, 58, 34, 6, 56}, + {1130, 45, 18, 4, 64, 45, 7, 58, 33, 6, 56}, + {1120, 44, 18, 4, 64, 44, 7, 57, 33, 6, 55}, + {1110, 44, 18, 4, 64, 44, 7, 57, 33, 6, 55}, + {1100, 43, 17, 4, 63, 44, 6, 57, 32, 6, 54}, + {1090, 43, 17, 4, 63, 44, 6, 56, 33, 6, 54}, + {1080, 43, 17, 4, 63, 44, 6, 56, 33, 6, 53}, + {1070, 42, 17, 4, 62, 44, 6, 56, 33, 6, 53}, + {1060, 42, 17, 4, 62, 42, 6, 55, 31, 6, 52}, + {1050, 41, 17, 4, 62, 42, 6, 55, 31, 6, 52}, + {1040, 41, 16, 4, 61, 41, 6, 54, 31, 6, 52}, + {1030, 41, 16, 4, 61, 41, 6, 54, 30, 6, 51}, + {1020, 40, 16, 4, 61, 41, 6, 54, 30, 6, 51}, + {1010, 40, 16, 4, 60, 40, 6, 53, 30, 6, 50}, + {1000, 39, 16, 3, 60, 40, 6, 53, 29, 5, 50}, + { 990, 39, 15, 3, 60, 39, 6, 53, 29, 5, 49}, + { 980, 39, 15, 3, 59, 39, 5, 52, 29, 5, 49}, + { 970, 38, 15, 3, 59, 39, 5, 52, 29, 5, 48}, + { 960, 38, 15, 3, 59, 39, 5, 52, 29, 5, 48}, + { 950, 37, 15, 3, 58, 39, 5, 51, 29, 5, 47}, + { 940, 37, 14, 3, 58, 39, 5, 51, 29, 5, 47}, + { 930, 37, 14, 3, 57, 37, 5, 51, 27, 5, 46}, + { 920, 36, 14, 3, 57, 37, 5, 50, 27, 5, 46}, + { 910, 36, 14, 3, 57, 36, 5, 50, 27, 5, 45}, + { 900, 35, 14, 3, 56, 36, 5, 50, 26, 5, 45}, + { 890, 35, 14, 3, 56, 36, 5, 49, 26, 5, 44}, + { 880, 35, 13, 3, 56, 35, 5, 49, 26, 5, 44}, + { 870, 34, 13, 3, 55, 35, 4, 49, 26, 5, 43}, + { 860, 34, 13, 3, 55, 35, 4, 48, 25, 5, 43}, + { 850, 33, 13, 3, 55, 35, 4, 48, 26, 4, 42}, + { 840, 33, 13, 3, 54, 35, 4, 48, 26, 4, 42}, + { 830, 33, 12, 3, 54, 33, 4, 47, 24, 4, 41}, + { 820, 32, 12, 3, 54, 33, 4, 47, 24, 4, 41}, + { 810, 32, 12, 3, 53, 33, 4, 47, 24, 4, 40}, + { 800, 31, 12, 2, 53, 32, 4, 46, 23, 4, 40}, + { 790, 31, 12, 2, 53, 32, 4, 46, 23, 4, 39}, + { 780, 30, 12, 2, 52, 31, 4, 46, 23, 4, 39}, + { 770, 30, 11, 2, 52, 31, 4, 45, 23, 4, 39}, + { 760, 30, 11, 2, 52, 31, 3, 45, 22, 4, 38}, + { 750, 29, 11, 2, 51, 30, 3, 45, 22, 4, 38}, + { 740, 29, 11, 2, 51, 30, 3, 44, 22, 4, 37}, + { 730, 28, 11, 2, 51, 31, 3, 44, 22, 4, 37}, + { 720, 28, 10, 2, 50, 30, 3, 44, 22, 4, 36}, + { 710, 28, 10, 2, 50, 30, 3, 43, 22, 4, 36}, + { 700, 27, 10, 2, 50, 28, 3, 43, 20, 3, 35}, + { 690, 27, 10, 2, 49, 28, 3, 43, 20, 3, 35}, + { 680, 26, 10, 2, 49, 28, 3, 42, 20, 3, 34}, + { 670, 26, 10, 2, 49, 27, 3, 42, 20, 3, 34}, + { 660, 26, 9, 2, 48, 27, 3, 42, 19, 3, 33}, + { 650, 25, 9, 2, 48, 26, 3, 41, 19, 3, 33}, + { 640, 25, 9, 2, 48, 26, 2, 41, 19, 3, 32}, + { 630, 24, 9, 2, 47, 26, 2, 40, 18, 3, 32}, + { 620, 24, 9, 2, 47, 26, 2, 40, 19, 3, 31}, + { 610, 24, 8, 2, 47, 26, 2, 40, 19, 3, 31}, + { 600, 23, 8, 1, 46, 26, 2, 39, 18, 3, 30}, + { 590, 23, 8, 1, 46, 24, 2, 39, 17, 3, 30}, + { 580, 22, 8, 1, 46, 24, 2, 39, 17, 3, 29}, + { 570, 22, 8, 1, 45, 23, 2, 38, 17, 3, 29}, + { 560, 22, 7, 1, 45, 23, 2, 38, 16, 2, 28}, + { 550, 21, 7, 1, 45, 23, 2, 38, 16, 2, 28}, + { 540, 21, 7, 1, 44, 22, 2, 37, 16, 2, 27}, + { 530, 20, 7, 1, 44, 22, 1, 37, 15, 2, 27}, + { 520, 20, 7, 1, 43, 21, 1, 37, 15, 2, 27}, + { 510, 20, 6, 1, 43, 21, 1, 36, 15, 2, 26}, + { 500, 19, 6, 1, 43, 22, 1, 36, 15, 2, 26}, + { 490, 19, 6, 1, 42, 21, 1, 36, 15, 2, 25}, + { 480, 18, 6, 1, 42, 21, 1, 35, 15, 2, 25}, + { 470, 18, 6, 1, 42, 21, 1, 35, 15, 2, 24}, + { 460, 18, 6, 1, 41, 19, 1, 35, 13, 2, 24}, + { 450, 17, 5, 1, 41, 19, 1, 34, 13, 2, 23}, + { 440, 17, 5, 1, 41, 18, 1, 34, 13, 2, 23}, + { 430, 16, 5, 1, 40, 18, 0, 34, 12, 2, 22}, + { 420, 16, 5, 1, 40, 18, 0, 33, 12, 2, 22}, + { 410, 16, 5, 1, 40, 17, 0, 33, 12, 1, 21}, + { 400, 15, 5, 0, 39, 17, 0, 33, 11, 1, 21}, + { 390, 15, 4, 0, 39, 17, 0, 32, 12, 1, 20}, + { 380, 14, 4, 0, 39, 17, 0, 32, 12, 1, 20}, + { 370, 14, 4, 0, 38, 17, 0, 32, 12, 1, 19}, + { 360, 14, 4, 0, 38, 15, 0, 31, 10, 1, 19}, + { 350, 13, 4, 0, 38, 15, 0, 31, 10, 1, 18}, + { 340, 13, 3, 0, 37, 15, 0, 31, 10, 1, 18}, + { 330, 12, 3, 0, 37, 14, 0, 30, 9, 1, 17}, + { 320, 12, 3, 0, 37, 14, 0, 30, 9, 1, 17}, + { 310, 12, 3, 0, 36, 13, 0, 30, 9, 1, 16}, + { 300, 11, 3, 0, 36, 13, 0, 29, 8, 1, 16}, + { 290, 11, 2, 0, 36, 13, 0, 29, 8, 1, 15}, + { 280, 10, 2, 0, 35, 12, 0, 29, 8, 1, 15}, + { 270, 10, 2, 0, 35, 12, 0, 28, 8, 0, 14}, + { 260, 9, 2, 0, 35, 12, 0, 28, 8, 0, 14}, + { 250, 9, 2, 0, 34, 12, 0, 28, 8, 0, 14}, + { 240, 9, 2, 0, 34, 12, 0, 27, 8, 0, 13}, + { 230, 8, 1, 0, 34, 10, 0, 27, 6, 0, 13}, + { 220, 8, 1, 0, 33, 10, 0, 27, 6, 0, 12}, + { 210, 7, 1, 0, 33, 10, 0, 26, 6, 0, 12}, + { 200, 7, 1, 0, 33, 9, 0, 26, 5, 0, 11}, + { 190, 7, 1, 0, 32, 9, 0, 25, 5, 0, 11}, + { 180, 6, 1, 0, 32, 8, 0, 25, 5, 0, 10}, + { 170, 6, 0, 0, 32, 8, 0, 25, 5, 0, 10}, + { 160, 5, 0, 0, 31, 8, 0, 24, 4, 0, 9}, + { 150, 5, 0, 0, 31, 8, 0, 24, 5, 0, 9}, + { 140, 5, 0, 0, 31, 8, 0, 24, 5, 0, 8}, + { 130, 4, 0, 0, 30, 6, 0, 23, 3, 0, 8}, + { 120, 4, 0, 0, 30, 6, 0, 23, 3, 0, 7}, + { 110, 3, 0, 0, 30, 6, 0, 23, 3, 0, 7}, + { 100, 3, 0, 0, 29, 5, 0, 22, 2, 0, 6}, + { 90, 3, 0, 0, 29, 5, 0, 22, 2, 0, 6}, + { 80, 2, 0, 0, 28, 5, 0, 22, 2, 0, 5}, +}; + +static void samsung_mipi_dcphy_bias_block_enable(struct samsung_mipi_dcphy *samsung) +{ + regmap_write(samsung->regmap, BIAS_CON0, I_DEV_DIV_6 | I_RES_100_2UA); + regmap_write(samsung->regmap, BIAS_CON1, I_VBG_SEL_820MV | I_BGR_VREF_820MV | + I_LADDER_1_00V); + regmap_write(samsung->regmap, BIAS_CON2, REG_325M_325MV | REG_LP_400M_400MV | + REG_400M_400MV | REG_645M_645MV); + + /* default output voltage select: + * dphy: 400mv + * cphy: 530mv + */ + regmap_update_bits(samsung->regmap, BIAS_CON4, + I_MUX_SEL_MASK, I_MUX_400MV); +} + +static void samsung_mipi_dphy_lane_enable(struct samsung_mipi_dcphy *samsung) +{ + regmap_write(samsung->regmap, DPHY_MC_GNR_CON1, T_PHY_READY(0x2000)); + regmap_update_bits(samsung->regmap, DPHY_MC_GNR_CON0, + PHY_ENABLE, PHY_ENABLE); + + switch (samsung->lanes) { + case 4: + regmap_write(samsung->regmap, DPHY_MD3_GNR_CON1, + T_PHY_READY(0x2000)); + regmap_update_bits(samsung->regmap, DPHY_MD3_GNR_CON0, + PHY_ENABLE, PHY_ENABLE); + fallthrough; + case 3: + regmap_write(samsung->regmap, COMBO_MD2_GNR_CON1, + T_PHY_READY(0x2000)); + regmap_update_bits(samsung->regmap, COMBO_MD2_GNR_CON0, + PHY_ENABLE, PHY_ENABLE); + fallthrough; + case 2: + regmap_write(samsung->regmap, COMBO_MD1_GNR_CON1, + T_PHY_READY(0x2000)); + regmap_update_bits(samsung->regmap, COMBO_MD1_GNR_CON0, + PHY_ENABLE, PHY_ENABLE); + fallthrough; + case 1: + default: + regmap_write(samsung->regmap, COMBO_MD0_GNR_CON1, + T_PHY_READY(0x2000)); + regmap_update_bits(samsung->regmap, COMBO_MD0_GNR_CON0, + PHY_ENABLE, PHY_ENABLE); + break; + } +} + +static void samsung_mipi_dphy_lane_disable(struct samsung_mipi_dcphy *samsung) +{ + switch (samsung->lanes) { + case 4: + regmap_update_bits(samsung->regmap, DPHY_MD3_GNR_CON0, + PHY_ENABLE, 0); + fallthrough; + case 3: + regmap_update_bits(samsung->regmap, COMBO_MD2_GNR_CON0, + PHY_ENABLE, 0); + fallthrough; + case 2: + regmap_update_bits(samsung->regmap, COMBO_MD1_GNR_CON0, + PHY_ENABLE, 0); + fallthrough; + case 1: + default: + regmap_update_bits(samsung->regmap, COMBO_MD0_GNR_CON0, + PHY_ENABLE, 0); + break; + } + + regmap_update_bits(samsung->regmap, DPHY_MC_GNR_CON0, PHY_ENABLE, 0); +} + +static void samsung_mipi_dcphy_pll_configure(struct samsung_mipi_dcphy *samsung) +{ + regmap_update_bits(samsung->regmap, PLL_CON0, S_MASK | P_MASK, + S(samsung->pll.scaler) | P(samsung->pll.prediv)); + + if (samsung->pll.dsm < 0) { + u16 dsm_tmp; + + /* Using opposite number subtraction to find complement */ + dsm_tmp = abs(samsung->pll.dsm); + dsm_tmp = dsm_tmp - 1; + dsm_tmp ^= 0xffff; + regmap_write(samsung->regmap, PLL_CON1, dsm_tmp); + } else { + regmap_write(samsung->regmap, PLL_CON1, samsung->pll.dsm); + } + + regmap_update_bits(samsung->regmap, PLL_CON2, + M_MASK, M(samsung->pll.fbdiv)); + + if (samsung->pll.ssc_en) { + regmap_write(samsung->regmap, PLL_CON3, + MRR(samsung->pll.mrr) | MFR(samsung->pll.mfr)); + regmap_update_bits(samsung->regmap, PLL_CON4, SSCG_EN, SSCG_EN); + } + + regmap_write(samsung->regmap, PLL_CON5, RESET_N_SEL | PLL_ENABLE_SEL); + regmap_write(samsung->regmap, PLL_CON7, PLL_LOCK_CNT(0xf000)); + regmap_write(samsung->regmap, PLL_CON8, PLL_STB_CNT(0xf000)); +} + +static int samsung_mipi_dcphy_pll_enable(struct samsung_mipi_dcphy *samsung) +{ + u32 sts; + int ret; + + regmap_update_bits(samsung->regmap, PLL_CON0, PLL_EN, PLL_EN); + + ret = regmap_read_poll_timeout(samsung->regmap, PLL_STAT0, + sts, (sts & PLL_LOCK), 1000, 20000); + if (ret < 0) + dev_err(samsung->dev, "DC-PHY pll failed to lock\n"); + + return ret; +} + +static void samsung_mipi_dcphy_pll_disable(struct samsung_mipi_dcphy *samsung) +{ + regmap_update_bits(samsung->regmap, PLL_CON0, PLL_EN, 0); +} + +static const struct samsung_mipi_dphy_timing * +samsung_mipi_dphy_get_timing(struct samsung_mipi_dcphy *samsung) +{ + const struct samsung_mipi_dphy_timing *timings; + unsigned int num_timings; + unsigned int lane_mbps = div64_ul(samsung->pll.rate, USEC_PER_SEC); + unsigned int i; + + timings = samsung_mipi_dphy_timing_table; + num_timings = ARRAY_SIZE(samsung_mipi_dphy_timing_table); + + for (i = num_timings; i > 1; i--) + if (lane_mbps <= timings[i - 1].max_lane_mbps) + break; + + return &timings[i - 1]; +} + +static unsigned long +samsung_mipi_dcphy_pll_round_rate(struct samsung_mipi_dcphy *samsung, + unsigned long prate, unsigned long rate, + u8 *prediv, u16 *fbdiv, int *dsm, u8 *scaler) +{ + u32 max_fout = samsung->pdata->dphy_tx_max_lane_kbps; + u64 best_freq = 0; + u64 fin, fvco, fout; + u8 min_prediv, max_prediv; + u8 _prediv, best_prediv = 1; + u16 _fbdiv, best_fbdiv = 1; + u8 _scaler, best_scaler = 0; + u32 min_delta = UINT_MAX; + long _dsm, best_dsm = 0; + + if (!prate) { + dev_err(samsung->dev, "parent rate of PLL can not be zero\n"); + return 0; + } + + /* + * The PLL output frequency can be calculated using a simple formula: + * Fvco = ((m+k/65536) x 2 x Fin) / p + * Fout = ((m+k/65536) x 2 x Fin) / (p x 2^s) + */ + fin = div64_ul(prate, MSEC_PER_SEC); + + while (!best_freq) { + fout = div64_ul(rate, MSEC_PER_SEC); + if (fout > max_fout) + fout = max_fout; + + /* 0 ≤ S[2:0] ≤ 6 */ + for (_scaler = 0; _scaler < 7; _scaler++) { + fvco = fout << _scaler; + + /* + * 2600MHz ≤ FVCO ≤ 6600MHz + */ + if (fvco < 2600 * MSEC_PER_SEC || fvco > 6600 * MSEC_PER_SEC) + continue; + + /* 6MHz ≤ Fref(Fin / p) ≤ 30MHz */ + min_prediv = DIV_ROUND_UP_ULL(fin, 30 * MSEC_PER_SEC); + max_prediv = DIV_ROUND_CLOSEST_ULL(fin, 6 * MSEC_PER_SEC); + + for (_prediv = min_prediv; _prediv <= max_prediv; _prediv++) { + u64 delta, tmp; + + _fbdiv = DIV_ROUND_CLOSEST_ULL(fvco * _prediv, 2 * fin); + + /* 64 ≤ M[9:0] ≤ 1023 */ + if (_fbdiv < 64 || _fbdiv > 1023) + continue; + + /* -32767 ≤ K[15:0] ≤ 32767 */ + _dsm = ((_prediv * fvco) - (2 * _fbdiv * fin)); + _dsm = DIV_ROUND_UP_ULL(_dsm << 15, fin); + if (abs(_dsm) > 32767) + continue; + + tmp = DIV_ROUND_CLOSEST_ULL((_fbdiv * fin * 2 * 1000), _prediv); + tmp += DIV_ROUND_CLOSEST_ULL((_dsm * fin * 1000), _prediv << 15); + + delta = abs(fvco * MSEC_PER_SEC - tmp); + if (delta < min_delta) { + best_prediv = _prediv; + best_fbdiv = _fbdiv; + best_dsm = _dsm; + best_scaler = _scaler; + min_delta = delta; + best_freq = DIV_ROUND_CLOSEST_ULL(tmp, 1000) * MSEC_PER_SEC; + } + } + } + + rate += 100 * MSEC_PER_SEC; + } + + *prediv = best_prediv; + *fbdiv = best_fbdiv; + *dsm = (int)best_dsm & 0xffff; + *scaler = best_scaler; + dev_dbg(samsung->dev, "p: %d, m: %d, dsm:%ld, scaler: %d\n", + best_prediv, best_fbdiv, best_dsm, best_scaler); + + return best_freq >> best_scaler; +} + +static void +samsung_mipi_dphy_clk_lane_timing_init(struct samsung_mipi_dcphy *samsung) +{ + const struct samsung_mipi_dphy_timing *timing; + unsigned int lane_hs_rate = div64_ul(samsung->pll.rate, USEC_PER_SEC); + u32 val, res_up, res_down; + + timing = samsung_mipi_dphy_get_timing(samsung); + regmap_write(samsung->regmap, DPHY_MC_GNR_CON0, 0xf000); + + /* + * The Drive-Strength / Voltage-Amplitude is adjusted by setting + * the Driver-Up Resistor and Driver-Down Resistor. + */ + res_up = samsung->pdata->dphy_hs_drv_res_cfg->clk_hs_drv_up_ohm; + res_down = samsung->pdata->dphy_hs_drv_res_cfg->clk_hs_drv_down_ohm; + val = EDGE_CON(7) | EDGE_CON_DIR(0) | EDGE_CON_EN | + RES_UP(res_up) | RES_DN(res_down); + regmap_write(samsung->regmap, DPHY_MC_ANA_CON0, val); + + if (lane_hs_rate >= 4500) + regmap_write(samsung->regmap, DPHY_MC_ANA_CON1, 0x0001); + + val = 0; + /* + * Divide-by-2 Clock from Serial Clock. Use this when data rate is under + * 1500Mbps, otherwise divide-by-16 Clock from Serial Clock + */ + if (lane_hs_rate < 1500) + val = HSTX_CLK_SEL; + + val |= T_LPX(timing->lpx); + /* T_LP_EXIT_SKEW/T_LP_ENTRY_SKEW unconfig */ + regmap_write(samsung->regmap, DPHY_MC_TIME_CON0, val); + + val = T_CLK_ZERO(timing->clk_zero) | T_CLK_PREPARE(timing->clk_prepare); + regmap_write(samsung->regmap, DPHY_MC_TIME_CON1, val); + + val = T_HS_EXIT(timing->hs_exit) | T_CLK_TRAIL(timing->clk_trail_eot); + regmap_write(samsung->regmap, DPHY_MC_TIME_CON2, val); + + val = T_CLK_POST(timing->clk_post); + regmap_write(samsung->regmap, DPHY_MC_TIME_CON3, val); + + /* Escape Clock is 20.00MHz */ + regmap_write(samsung->regmap, DPHY_MC_TIME_CON4, 0x1f4); + + /* + * skew calibration should be off, if the operation data rate is + * under 1.5Gbps or equal to 1.5Gbps. + */ + if (lane_hs_rate > 1500) + regmap_write(samsung->regmap, DPHY_MC_DESKEW_CON0, 0x9cb1); +} + +static void +samsung_mipi_dphy_data_lane_timing_init(struct samsung_mipi_dcphy *samsung) +{ + const struct samsung_mipi_dphy_timing *timing; + unsigned int lane_hs_rate = div64_ul(samsung->pll.rate, USEC_PER_SEC); + u32 val, res_up, res_down; + + timing = samsung_mipi_dphy_get_timing(samsung); + + /* + * The Drive-Strength / Voltage-Amplitude is adjusted by adjusting the + * Driver-Up Resistor and Driver-Down Resistor. + */ + res_up = samsung->pdata->dphy_hs_drv_res_cfg->data_hs_drv_up_ohm; + res_down = samsung->pdata->dphy_hs_drv_res_cfg->data_hs_drv_down_ohm; + val = EDGE_CON(7) | EDGE_CON_DIR(0) | EDGE_CON_EN | + RES_UP(res_up) | RES_DN(res_down); + regmap_write(samsung->regmap, COMBO_MD0_ANA_CON0, val); + regmap_write(samsung->regmap, COMBO_MD1_ANA_CON0, val); + regmap_write(samsung->regmap, COMBO_MD2_ANA_CON0, val); + regmap_write(samsung->regmap, DPHY_MD3_ANA_CON0, val); + + if (lane_hs_rate >= 4500) { + regmap_write(samsung->regmap, COMBO_MD0_ANA_CON1, 0x0001); + regmap_write(samsung->regmap, COMBO_MD1_ANA_CON1, 0x0001); + regmap_write(samsung->regmap, COMBO_MD2_ANA_CON1, 0x0001); + regmap_write(samsung->regmap, DPHY_MD3_ANA_CON1, 0x0001); + } + + val = 0; + /* + * Divide-by-2 Clock from Serial Clock. Use this when data rate is under + * 1500Mbps, otherwise divide-by-16 Clock from Serial Clock + */ + if (lane_hs_rate < 1500) + val = HSTX_CLK_SEL; + + val |= T_LPX(timing->lpx); + /* T_LP_EXIT_SKEW/T_LP_ENTRY_SKEW unconfig */ + regmap_write(samsung->regmap, COMBO_MD0_TIME_CON0, val); + regmap_write(samsung->regmap, COMBO_MD1_TIME_CON0, val); + regmap_write(samsung->regmap, COMBO_MD2_TIME_CON0, val); + regmap_write(samsung->regmap, DPHY_MD3_TIME_CON0, val); + + val = T_HS_ZERO(timing->hs_zero) | T_HS_PREPARE(timing->hs_prepare); + regmap_write(samsung->regmap, COMBO_MD0_TIME_CON1, val); + regmap_write(samsung->regmap, COMBO_MD1_TIME_CON1, val); + regmap_write(samsung->regmap, COMBO_MD2_TIME_CON1, val); + regmap_write(samsung->regmap, DPHY_MD3_TIME_CON1, val); + + val = T_HS_EXIT(timing->hs_exit) | T_HS_TRAIL(timing->hs_trail_eot); + regmap_write(samsung->regmap, COMBO_MD0_TIME_CON2, val); + regmap_write(samsung->regmap, COMBO_MD1_TIME_CON2, val); + regmap_write(samsung->regmap, COMBO_MD2_TIME_CON2, val); + regmap_write(samsung->regmap, DPHY_MD3_TIME_CON2, val); + + /* TTA-GET/TTA-GO Timing Counter register use default value */ + val = T_TA_GET(0x3) | T_TA_GO(0x0); + regmap_write(samsung->regmap, COMBO_MD0_TIME_CON3, val); + regmap_write(samsung->regmap, COMBO_MD1_TIME_CON3, val); + regmap_write(samsung->regmap, COMBO_MD2_TIME_CON3, val); + regmap_write(samsung->regmap, DPHY_MD3_TIME_CON3, val); + + /* Escape Clock is 20.00MHz */ + regmap_write(samsung->regmap, COMBO_MD0_TIME_CON4, 0x1f4); + regmap_write(samsung->regmap, COMBO_MD1_TIME_CON4, 0x1f4); + regmap_write(samsung->regmap, COMBO_MD2_TIME_CON4, 0x1f4); + regmap_write(samsung->regmap, DPHY_MD3_TIME_CON4, 0x1f4); +} + +static int samsung_mipi_dphy_power_on(struct samsung_mipi_dcphy *samsung) +{ + int ret; + + reset_control_assert(samsung->m_phy_rst); + + samsung_mipi_dcphy_bias_block_enable(samsung); + samsung_mipi_dcphy_pll_configure(samsung); + samsung_mipi_dphy_clk_lane_timing_init(samsung); + samsung_mipi_dphy_data_lane_timing_init(samsung); + ret = samsung_mipi_dcphy_pll_enable(samsung); + if (ret < 0) + return ret; + + samsung_mipi_dphy_lane_enable(samsung); + + reset_control_deassert(samsung->m_phy_rst); + + /* The TSKEWCAL maximum is 100 µsec + * at initial calibration. + */ + usleep_range(100, 110); + + return 0; +} + +static int samsung_mipi_dcphy_power_on(struct phy *phy) +{ + struct samsung_mipi_dcphy *samsung = phy_get_drvdata(phy); + + reset_control_assert(samsung->apb_rst); + udelay(1); + reset_control_deassert(samsung->apb_rst); + + switch (samsung->type) { + case PHY_TYPE_DPHY: + return samsung_mipi_dphy_power_on(samsung); + default: + /* CPHY part to be implemented later */ + return -EOPNOTSUPP; + } + + return 0; +} + +static int samsung_mipi_dcphy_power_off(struct phy *phy) +{ + struct samsung_mipi_dcphy *samsung = phy_get_drvdata(phy); + + switch (samsung->type) { + case PHY_TYPE_DPHY: + samsung_mipi_dphy_lane_disable(samsung); + break; + default: + /* CPHY part to be implemented later */ + return -EOPNOTSUPP; + } + + samsung_mipi_dcphy_pll_disable(samsung); + + return 0; +} + +static int +samsung_mipi_dcphy_pll_ssc_modulation_calc(struct samsung_mipi_dcphy *samsung, + u8 *mfr, u8 *mrr) +{ + unsigned long fin = div64_ul(clk_get_rate(samsung->ref_clk), MSEC_PER_SEC); + u16 prediv = samsung->pll.prediv; + u16 fbdiv = samsung->pll.fbdiv; + u16 min_mfr, max_mfr; + u16 _mfr, best_mfr = 0; + u16 mr, _mrr, best_mrr = 0; + + /* 20KHz ≤ MF ≤ 150KHz */ + max_mfr = DIV_ROUND_UP(fin, (20 * prediv) << 5); + min_mfr = div64_ul(fin, ((150 * prediv) << 5)); + /*0 ≤ mfr ≤ 255 */ + if (max_mfr > 256) + max_mfr = 256; + + for (_mfr = min_mfr; _mfr < max_mfr; _mfr++) { + /* 1 ≤ mrr ≤ 31 */ + for (_mrr = 1; _mrr < 32; _mrr++) { + mr = DIV_ROUND_UP(_mfr * _mrr * 100, fbdiv << 6); + /* 0 ≤ MR ≤ 5% */ + if (mr > 5) + continue; + + if (_mfr * _mrr < 513) { + best_mfr = _mfr; + best_mrr = _mrr; + break; + } + } + } + + if (best_mrr) { + *mfr = best_mfr & 0xff; + *mrr = best_mrr & 0x3f; + } else { + dev_err(samsung->dev, "failed to calc ssc parameter mfr and mrr\n"); + return -EINVAL; + } + + return 0; +} + +static void +samsung_mipi_dcphy_pll_calc_rate(struct samsung_mipi_dcphy *samsung, + unsigned long long rate) +{ + unsigned long prate = clk_get_rate(samsung->ref_clk); + unsigned long fout; + u8 scaler = 0, mfr = 0, mrr = 0; + u16 fbdiv = 0; + u8 prediv = 1; + int dsm = 0; + int ret; + + fout = samsung_mipi_dcphy_pll_round_rate(samsung, prate, rate, + &prediv, &fbdiv, &dsm, + &scaler); + + dev_dbg(samsung->dev, "%s: fin=%lu, req_rate=%llu\n", + __func__, prate, rate); + dev_dbg(samsung->dev, "%s: fout=%lu, prediv=%u, fbdiv=%u\n", + __func__, fout, prediv, fbdiv); + + samsung->pll.prediv = prediv; + samsung->pll.fbdiv = fbdiv; + samsung->pll.dsm = dsm; + samsung->pll.scaler = scaler; + samsung->pll.rate = fout; + + /* + * All DPHY 2.0 compliant Transmitters shall support SSC operating above + * 2.5 Gbps + */ + if (fout > 2500000000LL) { + ret = samsung_mipi_dcphy_pll_ssc_modulation_calc(samsung, + &mfr, &mrr); + if (!ret) { + samsung->pll.ssc_en = true; + samsung->pll.mfr = mfr; + samsung->pll.mrr = mrr; + } + } +} + +static int samsung_mipi_dcphy_configure(struct phy *phy, + union phy_configure_opts *opts) +{ + struct samsung_mipi_dcphy *samsung = phy_get_drvdata(phy); + unsigned long long target_rate = opts->mipi_dphy.hs_clk_rate; + + samsung->lanes = opts->mipi_dphy.lanes > 4 ? 4 : opts->mipi_dphy.lanes; + + samsung_mipi_dcphy_pll_calc_rate(samsung, target_rate); + opts->mipi_dphy.hs_clk_rate = samsung->pll.rate; + + return 0; +} + +static int samsung_mipi_dcphy_init(struct phy *phy) +{ + struct samsung_mipi_dcphy *samsung = phy_get_drvdata(phy); + + return pm_runtime_resume_and_get(samsung->dev); +} + +static int samsung_mipi_dcphy_exit(struct phy *phy) +{ + struct samsung_mipi_dcphy *samsung = phy_get_drvdata(phy); + + return pm_runtime_put(samsung->dev); +} + +static const struct phy_ops samsung_mipi_dcphy_ops = { + .configure = samsung_mipi_dcphy_configure, + .power_on = samsung_mipi_dcphy_power_on, + .power_off = samsung_mipi_dcphy_power_off, + .init = samsung_mipi_dcphy_init, + .exit = samsung_mipi_dcphy_exit, + .owner = THIS_MODULE, +}; + +static const struct regmap_config samsung_mipi_dcphy_regmap_config = { + .name = "dcphy", + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = 0x10000, +}; + +static struct phy *samsung_mipi_dcphy_xlate(struct device *dev, + const struct of_phandle_args *args) +{ + struct samsung_mipi_dcphy *samsung = dev_get_drvdata(dev); + + if (args->args_count != 1) { + dev_err(dev, "invalid number of arguments\n"); + return ERR_PTR(-EINVAL); + } + + if (samsung->type != PHY_NONE && samsung->type != args->args[0]) + dev_warn(dev, "phy type select %d overwriting type %d\n", + args->args[0], samsung->type); + + samsung->type = args->args[0]; + + return samsung->phy; +} + +static int samsung_mipi_dcphy_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct samsung_mipi_dcphy *samsung; + struct phy_provider *phy_provider; + struct resource *res; + void __iomem *regs; + int ret; + + samsung = devm_kzalloc(dev, sizeof(*samsung), GFP_KERNEL); + if (!samsung) + return -ENOMEM; + + samsung->dev = dev; + samsung->pdata = device_get_match_data(dev); + platform_set_drvdata(pdev, samsung); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + regs = devm_ioremap_resource(dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + samsung->regmap = devm_regmap_init_mmio(dev, regs, + &samsung_mipi_dcphy_regmap_config); + if (IS_ERR(samsung->regmap)) + return dev_err_probe(dev, PTR_ERR(samsung->regmap), "Failed to init regmap\n"); + + samsung->grf_regmap = syscon_regmap_lookup_by_phandle(np, "rockchip,grf"); + if (IS_ERR(samsung->grf_regmap)) + return dev_err_probe(dev, PTR_ERR(samsung->grf_regmap), + "Unable to get rockchip,grf\n"); + + samsung->ref_clk = devm_clk_get(dev, "ref"); + if (IS_ERR(samsung->ref_clk)) + return dev_err_probe(dev, PTR_ERR(samsung->ref_clk), + "Failed to get reference clock\n"); + + samsung->pclk = devm_clk_get(dev, "pclk"); + if (IS_ERR(samsung->pclk)) + return dev_err_probe(dev, PTR_ERR(samsung->pclk), "Failed to get pclk\n"); + + samsung->m_phy_rst = devm_reset_control_get(dev, "m_phy"); + if (IS_ERR(samsung->m_phy_rst)) + return dev_err_probe(dev, PTR_ERR(samsung->m_phy_rst), + "Failed to get system m_phy_rst control\n"); + + samsung->s_phy_rst = devm_reset_control_get(dev, "s_phy"); + if (IS_ERR(samsung->s_phy_rst)) + return dev_err_probe(dev, PTR_ERR(samsung->s_phy_rst), + "Failed to get system s_phy_rst control\n"); + + samsung->apb_rst = devm_reset_control_get(dev, "apb"); + if (IS_ERR(samsung->apb_rst)) + return dev_err_probe(dev, PTR_ERR(samsung->apb_rst), + "Failed to get system apb_rst control\n"); + + samsung->grf_apb_rst = devm_reset_control_get(dev, "grf"); + if (IS_ERR(samsung->grf_apb_rst)) + return dev_err_probe(dev, PTR_ERR(samsung->grf_apb_rst), + "Failed to get system grf_apb_rst control\n"); + + samsung->phy = devm_phy_create(dev, NULL, &samsung_mipi_dcphy_ops); + if (IS_ERR(samsung->phy)) + return dev_err_probe(dev, PTR_ERR(samsung->phy), "Failed to create MIPI DC-PHY\n"); + + phy_set_drvdata(samsung->phy, samsung); + + ret = devm_pm_runtime_enable(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable runtime PM\n"); + + phy_provider = devm_of_phy_provider_register(dev, samsung_mipi_dcphy_xlate); + if (IS_ERR(phy_provider)) + return dev_err_probe(dev, PTR_ERR(phy_provider), + "Failed to register phy provider\n"); + + return 0; +} + +static __maybe_unused int samsung_mipi_dcphy_runtime_suspend(struct device *dev) +{ + struct samsung_mipi_dcphy *samsung = dev_get_drvdata(dev); + + clk_disable_unprepare(samsung->ref_clk); + clk_disable_unprepare(samsung->pclk); + + return 0; +} + +static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev) +{ + struct samsung_mipi_dcphy *samsung = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(samsung->pclk); + if (ret) { + dev_err(samsung->dev, "Failed to enable pclk, %d\n", ret); + return ret; + } + + clk_prepare_enable(samsung->ref_clk); + if (ret) { + dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret); + clk_disable_unprepare(samsung->pclk); + return ret; + } + + return 0; +} + +static const struct dev_pm_ops samsung_mipi_dcphy_pm_ops = { + SET_RUNTIME_PM_OPS(samsung_mipi_dcphy_runtime_suspend, + samsung_mipi_dcphy_runtime_resume, NULL) +}; + +static const struct hs_drv_res_cfg rk3576_dphy_hs_drv_res_cfg = { + .clk_hs_drv_up_ohm = STRENGTH_52_OHM, + .clk_hs_drv_down_ohm = STRENGTH_52_OHM, + .data_hs_drv_up_ohm = STRENGTH_39_OHM, + .data_hs_drv_down_ohm = STRENGTH_39_OHM, +}; + +static const struct hs_drv_res_cfg rk3588_dphy_hs_drv_res_cfg = { + .clk_hs_drv_up_ohm = STRENGTH_34_OHM, + .clk_hs_drv_down_ohm = STRENGTH_34_OHM, + .data_hs_drv_up_ohm = STRENGTH_43_OHM, + .data_hs_drv_down_ohm = STRENGTH_43_OHM, +}; + +static const struct samsung_mipi_dcphy_plat_data rk3576_samsung_mipi_dcphy_plat_data = { + .dphy_hs_drv_res_cfg = &rk3576_dphy_hs_drv_res_cfg, + .dphy_tx_max_lane_kbps = 2500000L, +}; + +static const struct samsung_mipi_dcphy_plat_data rk3588_samsung_mipi_dcphy_plat_data = { + .dphy_hs_drv_res_cfg = &rk3588_dphy_hs_drv_res_cfg, + .dphy_tx_max_lane_kbps = 4500000L, +}; + +static const struct of_device_id samsung_mipi_dcphy_of_match[] = { + { + .compatible = "rockchip,rk3576-mipi-dcphy", + .data = &rk3576_samsung_mipi_dcphy_plat_data, + }, { + .compatible = "rockchip,rk3588-mipi-dcphy", + .data = &rk3588_samsung_mipi_dcphy_plat_data, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, samsung_mipi_dcphy_of_match); + +static struct platform_driver samsung_mipi_dcphy_driver = { + .driver = { + .name = "samsung-mipi-dcphy", + .of_match_table = samsung_mipi_dcphy_of_match, + .pm = &samsung_mipi_dcphy_pm_ops, + }, + .probe = samsung_mipi_dcphy_probe, +}; +module_platform_driver(samsung_mipi_dcphy_driver); + +MODULE_AUTHOR("Guochun Huang "); +MODULE_DESCRIPTION("Samsung MIPI DCPHY Driver"); +MODULE_LICENSE("GPL"); From 86ae168934098be744cd5a8470544165f0054d0b Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 27 Feb 2025 19:08:35 +0800 Subject: [PATCH 0814/2157] dt-bindings: phy: rockchip: Add rk3562 naneng-combophy compatible rk3562 use the same Naneng Combo Phy driver as rk3568. Signed-off-by: Kever Yang Reviewed-by: Heiko Stuebner Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250227110836.2343158-1-kever.yang@rock-chips.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml index 1b3de6678c08..888e6b2aac5a 100644 --- a/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml +++ b/Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml @@ -12,6 +12,7 @@ maintainers: properties: compatible: enum: + - rockchip,rk3562-naneng-combphy - rockchip,rk3568-naneng-combphy - rockchip,rk3576-naneng-combphy - rockchip,rk3588-naneng-combphy From f13bff25161b8a0a9d716764ebe57334d496c6d9 Mon Sep 17 00:00:00 2001 From: Jon Lin Date: Thu, 27 Feb 2025 19:08:36 +0800 Subject: [PATCH 0815/2157] phy: rockchip-naneng-combo: Support rk3562 rk3562 has 1 naneng comboPHY used for PCIe and USB3. Signed-off-by: Jon Lin Signed-off-by: Kever Yang Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250227110836.2343158-2-kever.yang@rock-chips.com Signed-off-by: Vinod Koul --- .../rockchip/phy-rockchip-naneng-combphy.c | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index a1532ef8bbe9..e4c5e0531b16 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -393,6 +393,154 @@ static int rockchip_combphy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } +static int rk3562_combphy_cfg(struct rockchip_combphy_priv *priv) +{ + const struct rockchip_combphy_grfcfg *cfg = priv->cfg->grfcfg; + unsigned long rate; + u32 val; + + switch (priv->type) { + case PHY_TYPE_PCIE: + /* Set SSC downward spread spectrum */ + rockchip_combphy_updatel(priv, PHYREG32_SSC_MASK, + PHYREG32_SSC_DOWNWARD << PHYREG32_SSC_DIR_SHIFT, + PHYREG32); + + rockchip_combphy_param_write(priv->phy_grf, &cfg->con0_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true); + break; + case PHY_TYPE_USB3: + /* Set SSC downward spread spectrum */ + rockchip_combphy_updatel(priv, PHYREG32_SSC_MASK, + PHYREG32_SSC_DOWNWARD << PHYREG32_SSC_DIR_SHIFT, + PHYREG32); + + /* Enable adaptive CTLE for USB3.0 Rx */ + rockchip_combphy_updatel(priv, PHYREG15_CTLE_EN, + PHYREG15_CTLE_EN, PHYREG15); + + /* Set PLL KVCO fine tuning signals */ + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, BIT(3), PHYREG33); + + /* Set PLL LPF R1 to su_trim[10:7]=1001 */ + writel(PHYREG12_PLL_LPF_ADJ_VALUE, priv->mmio + PHYREG12); + + /* Set PLL input clock divider 1/2 */ + val = FIELD_PREP(PHYREG6_PLL_DIV_MASK, PHYREG6_PLL_DIV_2); + rockchip_combphy_updatel(priv, PHYREG6_PLL_DIV_MASK, val, PHYREG6); + + /* Set PLL loop divider */ + writel(PHYREG18_PLL_LOOP, priv->mmio + PHYREG18); + + /* Set PLL KVCO to min and set PLL charge pump current to max */ + writel(PHYREG11_SU_TRIM_0_7, priv->mmio + PHYREG11); + + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_sel_usb, true); + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_txcomp_sel, false); + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_txelec_sel, false); + rockchip_combphy_param_write(priv->phy_grf, &cfg->usb_mode_set, true); + break; + default: + dev_err(priv->dev, "incompatible PHY type\n"); + return -EINVAL; + } + + rate = clk_get_rate(priv->refclk); + + switch (rate) { + case REF_CLOCK_24MHz: + if (priv->type == PHY_TYPE_USB3) { + /* Set ssc_cnt[9:0]=0101111101 & 31.5KHz */ + val = FIELD_PREP(PHYREG15_SSC_CNT_MASK, PHYREG15_SSC_CNT_VALUE); + rockchip_combphy_updatel(priv, PHYREG15_SSC_CNT_MASK, + val, PHYREG15); + + writel(PHYREG16_SSC_CNT_VALUE, priv->mmio + PHYREG16); + } + break; + case REF_CLOCK_25MHz: + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_25m, true); + break; + case REF_CLOCK_100MHz: + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_100m, true); + if (priv->type == PHY_TYPE_PCIE) { + /* PLL KVCO tuning fine */ + val = FIELD_PREP(PHYREG33_PLL_KVCO_MASK, PHYREG33_PLL_KVCO_VALUE); + rockchip_combphy_updatel(priv, PHYREG33_PLL_KVCO_MASK, + val, PHYREG33); + + /* Enable controlling random jitter, aka RMJ */ + writel(0x4, priv->mmio + PHYREG12); + + val = PHYREG6_PLL_DIV_2 << PHYREG6_PLL_DIV_SHIFT; + rockchip_combphy_updatel(priv, PHYREG6_PLL_DIV_MASK, + val, PHYREG6); + + writel(0x32, priv->mmio + PHYREG18); + writel(0xf0, priv->mmio + PHYREG11); + } + break; + default: + dev_err(priv->dev, "Unsupported rate: %lu\n", rate); + return -EINVAL; + } + + if (priv->ext_refclk) { + rockchip_combphy_param_write(priv->phy_grf, &cfg->pipe_clk_ext, true); + if (priv->type == PHY_TYPE_PCIE && rate == REF_CLOCK_100MHz) { + val = PHYREG13_RESISTER_HIGH_Z << PHYREG13_RESISTER_SHIFT; + val |= PHYREG13_CKRCV_AMP0; + rockchip_combphy_updatel(priv, PHYREG13_RESISTER_MASK, val, PHYREG13); + + val = readl(priv->mmio + PHYREG14); + val |= PHYREG14_CKRCV_AMP1; + writel(val, priv->mmio + PHYREG14); + } + } + + if (priv->enable_ssc) { + val = readl(priv->mmio + PHYREG8); + val |= PHYREG8_SSC_EN; + writel(val, priv->mmio + PHYREG8); + } + + return 0; +} + +static const struct rockchip_combphy_grfcfg rk3562_combphy_grfcfgs = { + /* pipe-phy-grf */ + .pcie_mode_set = { 0x0000, 5, 0, 0x00, 0x11 }, + .usb_mode_set = { 0x0000, 5, 0, 0x00, 0x04 }, + .pipe_rxterm_set = { 0x0000, 12, 12, 0x00, 0x01 }, + .pipe_txelec_set = { 0x0004, 1, 1, 0x00, 0x01 }, + .pipe_txcomp_set = { 0x0004, 4, 4, 0x00, 0x01 }, + .pipe_clk_25m = { 0x0004, 14, 13, 0x00, 0x01 }, + .pipe_clk_100m = { 0x0004, 14, 13, 0x00, 0x02 }, + .pipe_phymode_sel = { 0x0008, 1, 1, 0x00, 0x01 }, + .pipe_rate_sel = { 0x0008, 2, 2, 0x00, 0x01 }, + .pipe_rxterm_sel = { 0x0008, 8, 8, 0x00, 0x01 }, + .pipe_txelec_sel = { 0x0008, 12, 12, 0x00, 0x01 }, + .pipe_txcomp_sel = { 0x0008, 15, 15, 0x00, 0x01 }, + .pipe_clk_ext = { 0x000c, 9, 8, 0x02, 0x01 }, + .pipe_sel_usb = { 0x000c, 14, 13, 0x00, 0x01 }, + .pipe_phy_status = { 0x0034, 6, 6, 0x01, 0x00 }, + .con0_for_pcie = { 0x0000, 15, 0, 0x00, 0x1000 }, + .con1_for_pcie = { 0x0004, 15, 0, 0x00, 0x0000 }, + .con2_for_pcie = { 0x0008, 15, 0, 0x00, 0x0101 }, + .con3_for_pcie = { 0x000c, 15, 0, 0x00, 0x0200 }, +}; + +static const struct rockchip_combphy_cfg rk3562_combphy_cfgs = { + .num_phys = 1, + .phy_ids = { + 0xff750000 + }, + .grfcfg = &rk3562_combphy_grfcfgs, + .combphy_cfg = rk3562_combphy_cfg, +}; + static int rk3568_combphy_cfg(struct rockchip_combphy_priv *priv) { const struct rockchip_combphy_grfcfg *cfg = priv->cfg->grfcfg; @@ -1046,6 +1194,10 @@ static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = { }; static const struct of_device_id rockchip_combphy_of_match[] = { + { + .compatible = "rockchip,rk3562-naneng-combphy", + .data = &rk3562_combphy_cfgs, + }, { .compatible = "rockchip,rk3568-naneng-combphy", .data = &rk3568_combphy_cfgs, From 3f61ac7c65bdb26accb52f9db66313597e759821 Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Thu, 13 Mar 2025 13:59:32 +0100 Subject: [PATCH 0816/2157] fs/9p: fix NULL pointer dereference on mkdir When a 9p tree was mounted with option 'posixacl', parent directory had a default ACL set for its subdirectories, e.g.: setfacl -m default:group:simpsons:rwx parentdir then creating a subdirectory crashed 9p client, as v9fs_fid_add() call in function v9fs_vfs_mkdir_dotl() sets the passed 'fid' pointer to NULL (since dafbe689736) even though the subsequent v9fs_set_create_acl() call expects a valid non-NULL 'fid' pointer: [ 37.273191] BUG: kernel NULL pointer dereference, address: 0000000000000000 ... [ 37.322338] Call Trace: [ 37.323043] [ 37.323621] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 37.324448] ? page_fault_oops (arch/x86/mm/fault.c:714) [ 37.325532] ? search_module_extables (kernel/module/main.c:3733) [ 37.326742] ? p9_client_walk (net/9p/client.c:1165) 9pnet [ 37.328006] ? search_bpf_extables (kernel/bpf/core.c:804) [ 37.329142] ? exc_page_fault (./arch/x86/include/asm/paravirt.h:686 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538) [ 37.330196] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:574) [ 37.331330] ? p9_client_walk (net/9p/client.c:1165) 9pnet [ 37.332562] ? v9fs_fid_xattr_get (fs/9p/xattr.c:30) 9p [ 37.333824] v9fs_fid_xattr_set (fs/9p/fid.h:23 fs/9p/xattr.c:121) 9p [ 37.335077] v9fs_set_acl (fs/9p/acl.c:276) 9p [ 37.336112] v9fs_set_create_acl (fs/9p/acl.c:307) 9p [ 37.337326] v9fs_vfs_mkdir_dotl (fs/9p/vfs_inode_dotl.c:411) 9p [ 37.338590] vfs_mkdir (fs/namei.c:4313) [ 37.339535] do_mkdirat (fs/namei.c:4336) [ 37.340465] __x64_sys_mkdir (fs/namei.c:4354) [ 37.341455] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) [ 37.342447] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fix this by simply swapping the sequence of these two calls in v9fs_vfs_mkdir_dotl(), i.e. calling v9fs_set_create_acl() before v9fs_fid_add(). Fixes: dafbe689736f ("9p fid refcount: cleanup p9_fid_put calls") Reported-by: syzbot+5b667f9a1fee4ba3775a@syzkaller.appspotmail.com Signed-off-by: Christian Schoenebeck Message-ID: Signed-off-by: Dominique Martinet --- fs/9p/vfs_inode_dotl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 143ac03b7425..3397939fd2d5 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -407,8 +407,8 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, err); goto error; } - v9fs_fid_add(dentry, &fid); v9fs_set_create_acl(inode, fid, dacl, pacl); + v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; inc_nlink(dir); From 6b7ce3134f68107438c99f8ea5424e08d418ca1b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:05 +0100 Subject: [PATCH 0817/2157] x86/kgdb: use IS_ERR_PCPU() macro Patch series "Enable strict percpu address space checks", v4. Enable strict percpu address space checks via x86 named address space qualifiers. Percpu variables are declared in __seg_gs/__seg_fs named AS and kept named AS qualified until they are dereferenced via percpu accessor. This approach enables various compiler checks for cross-namespace variable assignments. Please note that current version of sparse doesn't know anything about __typeof_unqual__() operator. Avoid the usage of __typeof_unqual__() when sparse checking is active to prevent sparse errors with unknowing keyword. The proposed patch by Dan Carpenter to implement __typeof_unqual__() handling in sparse is located at: https://lore.kernel.org/lkml/5b8d0dee-8fb6-45af-ba6c-7f74aff9a4b8@stanley.mountain/ This patch (of 6): Use IS_ERR_PCPU() when checking the error pointer in the percpu address space. This macro adds intermediate cast to unsigned long when switching named address spaces. The patch will avoid future build errors due to pointer address space mismatch with enabled strict percpu address space checks. Link: https://lkml.kernel.org/r/20250127160709.80604-1-ubizjak@gmail.com Link: https://lkml.kernel.org/r/20250127160709.80604-2-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Nadav Amit Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Brian Gerst Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Boqun Feng Cc: "David S. Miller" Cc: Denys Vlasenko Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Kent Overstreet Cc: Paolo Abeni Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 9c9faa1634fb..102641fd2172 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -655,7 +655,7 @@ void kgdb_arch_late(void) if (breakinfo[i].pev) continue; breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); - if (IS_ERR((void * __force)breakinfo[i].pev)) { + if (IS_ERR_PCPU(breakinfo[i].pev)) { printk(KERN_ERR "kgdb: Could not allocate hw" "breakpoints\nDisabling the kernel debugger\n"); breakinfo[i].pev = NULL; From ac053946f5c40ce90ca7ccb75ed687612d9eccf9 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:06 +0100 Subject: [PATCH 0818/2157] compiler.h: introduce TYPEOF_UNQUAL() macro Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof operator when available, to return unqualified type of the expression. Current version of sparse doesn't know anything about __typeof_unqual__() operator. Avoid the usage of __typeof_unqual__() when sparse checking is active to prevent sparse errors with unknowing keyword. Link: https://lkml.kernel.org/r/20250127160709.80604-3-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Thomas Gleixner Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Brian Gerst Cc: Denys Vlasenko Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Boqun Feng Cc: Borislav Petkov Cc: Dave Hansen Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Kent Overstreet Cc: Nadav Amit Cc: Paolo Abeni Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/compiler-clang.h | 8 ++++++++ include/linux/compiler-gcc.h | 8 ++++++++ include/linux/compiler.h | 20 ++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 2e7c2c282f3a..4fc8e26914ad 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -128,3 +128,11 @@ */ #define ASM_INPUT_G "ir" #define ASM_INPUT_RM "r" + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..32048052c64a 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -137,3 +137,11 @@ #if GCC_VERSION < 90100 #undef __alloc_size__ #endif + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__GNUC__ >= 14) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 155385754824..3a7a537e13a3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -210,6 +210,26 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __must_be_cstr(p) \ __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") +/* + * Use __typeof_unqual__() when available. + * + * XXX: Remove test for __CHECKER__ once + * sparse learns about __typeof_unqual__(). + */ +#if CC_HAS_TYPEOF_UNQUAL && !defined(__CHECKER__) +# define USE_TYPEOF_UNQUAL 1 +#endif + +/* + * Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof + * operator when available, to return an unqualified type of the exp. + */ +#if defined(USE_TYPEOF_UNQUAL) +# define TYPEOF_UNQUAL(exp) __typeof_unqual__(exp) +#else +# define TYPEOF_UNQUAL(exp) __typeof__(exp) +#endif + #endif /* __KERNEL__ */ /** From 8a3c392388c6a6e0c8937a24712b630ec9ac7016 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:07 +0100 Subject: [PATCH 0819/2157] percpu: use TYPEOF_UNQUAL() in variable declarations Use TYPEOF_UNQUAL() to declare variables as a corresponding type without named address space qualifier to avoid "`__seg_gs' specified for auto variable `var'" errors. Link: https://lkml.kernel.org/r/20250127160709.80604-4-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Nadav Amit Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Kent Overstreet Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Peter Zijlstra Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Linus Torvalds Cc: Brian Gerst Cc: Denys Vlasenko Signed-off-by: Andrew Morton --- arch/x86/include/asm/percpu.h | 10 +++++----- fs/bcachefs/util.h | 2 +- include/asm-generic/percpu.h | 26 +++++++++++++------------- include/linux/part_stat.h | 2 +- include/linux/percpu-defs.h | 4 ++-- include/net/snmp.h | 5 ++--- kernel/locking/percpu-rwsem.c | 2 +- net/mpls/internal.h | 4 ++-- 8 files changed, 27 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e525cd85f999..666e4137b09f 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -180,7 +180,7 @@ do { \ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ \ if (0) { \ - typeof(_var) pto_tmp__; \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ @@ -219,7 +219,7 @@ do { \ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ \ if (0) { \ - typeof(_var) pto_tmp__; \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ @@ -240,7 +240,7 @@ do { \ (val) == (typeof(val))-1)) ? (int)(val) : 0; \ \ if (0) { \ - typeof(var) pao_tmp__; \ + TYPEOF_UNQUAL(var) pao_tmp__; \ pao_tmp__ = (val); \ (void)pao_tmp__; \ } \ @@ -273,7 +273,7 @@ do { \ */ #define raw_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(_var) pxo_old__ = raw_cpu_read(_var); \ + TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var); \ \ raw_cpu_write(_var, _nval); \ \ @@ -287,7 +287,7 @@ do { \ */ #define this_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(_var) pxo_old__ = this_cpu_read(_var); \ + TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var); \ \ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ \ diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index e7c3541b38f3..5760f7dbcac2 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -609,7 +609,7 @@ do { \ #define per_cpu_sum(_p) \ ({ \ - typeof(*_p) _ret = 0; \ + TYPEOF_UNQUAL(*_p) _ret = 0; \ \ int cpu; \ for_each_possible_cpu(cpu) \ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 94cbd50cc870..50597b975a49 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -74,7 +74,7 @@ do { \ #define raw_cpu_generic_add_return(pcp, val) \ ({ \ - typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ + TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \ \ *__p += val; \ *__p; \ @@ -82,8 +82,8 @@ do { \ #define raw_cpu_generic_xchg(pcp, nval) \ ({ \ - typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ - typeof(pcp) __ret; \ + TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \ + TYPEOF_UNQUAL(pcp) __ret; \ __ret = *__p; \ *__p = nval; \ __ret; \ @@ -91,7 +91,7 @@ do { \ #define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \ ({ \ - typeof(pcp) __val, __old = *(ovalp); \ + TYPEOF_UNQUAL(pcp) __val, __old = *(ovalp); \ __val = _cmpxchg(pcp, __old, nval); \ if (__val != __old) \ *(ovalp) = __val; \ @@ -100,8 +100,8 @@ do { \ #define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ ({ \ - typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ - typeof(pcp) __val = *__p, ___old = *(ovalp); \ + TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \ + TYPEOF_UNQUAL(pcp) __val = *__p, ___old = *(ovalp); \ bool __ret; \ if (__val == ___old) { \ *__p = nval; \ @@ -115,14 +115,14 @@ do { \ #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ - typeof(pcp) __old = (oval); \ + TYPEOF_UNQUAL(pcp) __old = (oval); \ raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \ __old; \ }) #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ - typeof(pcp) ___ret; \ + TYPEOF_UNQUAL(pcp) ___ret; \ preempt_disable_notrace(); \ ___ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ @@ -131,7 +131,7 @@ do { \ #define __this_cpu_generic_read_noirq(pcp) \ ({ \ - typeof(pcp) ___ret; \ + TYPEOF_UNQUAL(pcp) ___ret; \ unsigned long ___flags; \ raw_local_irq_save(___flags); \ ___ret = raw_cpu_generic_read(pcp); \ @@ -141,7 +141,7 @@ do { \ #define this_cpu_generic_read(pcp) \ ({ \ - typeof(pcp) __ret; \ + TYPEOF_UNQUAL(pcp) __ret; \ if (__native_word(pcp)) \ __ret = __this_cpu_generic_read_nopreempt(pcp); \ else \ @@ -160,7 +160,7 @@ do { \ #define this_cpu_generic_add_return(pcp, val) \ ({ \ - typeof(pcp) __ret; \ + TYPEOF_UNQUAL(pcp) __ret; \ unsigned long __flags; \ raw_local_irq_save(__flags); \ __ret = raw_cpu_generic_add_return(pcp, val); \ @@ -170,7 +170,7 @@ do { \ #define this_cpu_generic_xchg(pcp, nval) \ ({ \ - typeof(pcp) __ret; \ + TYPEOF_UNQUAL(pcp) __ret; \ unsigned long __flags; \ raw_local_irq_save(__flags); \ __ret = raw_cpu_generic_xchg(pcp, nval); \ @@ -190,7 +190,7 @@ do { \ #define this_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ - typeof(pcp) __ret; \ + TYPEOF_UNQUAL(pcp) __ret; \ unsigned long __flags; \ raw_local_irq_save(__flags); \ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index ac8c44dd8237..c5e9cac0575e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -33,7 +33,7 @@ struct disk_stats { #define part_stat_read(part, field) \ ({ \ - typeof((part)->bd_stats->field) res = 0; \ + TYPEOF_UNQUAL((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5b520fe86b60..79b9402404f1 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -317,7 +317,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return(stem, variable) \ ({ \ - typeof(variable) pscr_ret__; \ + TYPEOF_UNQUAL(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable); break; \ @@ -332,7 +332,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return2(stem, variable, ...) \ ({ \ - typeof(variable) pscr2_ret__; \ + TYPEOF_UNQUAL(variable) pscr2_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ diff --git a/include/net/snmp.h b/include/net/snmp.h index 468a67836e2f..4cb4326dfebe 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -159,7 +159,7 @@ struct linux_tls_mib { #define __SNMP_ADD_STATS64(mib, field, addend) \ do { \ - __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \ + TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[field] += addend; \ u64_stats_update_end(&ptr->syncp); \ @@ -176,8 +176,7 @@ struct linux_tls_mib { #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) #define __SNMP_UPD_PO_STATS64(mib, basefield, addend) \ do { \ - __typeof__(*mib) *ptr; \ - ptr = raw_cpu_ptr((mib)); \ + TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[basefield##PKTS]++; \ ptr->mibs[basefield##OCTETS] += addend; \ diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 6083883c4fe0..d6964fc29f51 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -184,7 +184,7 @@ EXPORT_SYMBOL_GPL(__percpu_down_read); #define per_cpu_sum(var) \ ({ \ - typeof(var) __sum = 0; \ + TYPEOF_UNQUAL(var) __sum = 0; \ int cpu; \ compiletime_assert_atomic_type(__sum); \ for_each_possible_cpu(cpu) \ diff --git a/net/mpls/internal.h b/net/mpls/internal.h index b9f492ddf93b..83c629529b57 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -33,7 +33,7 @@ struct mpls_dev { #define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ do { \ - __typeof__(*(mdev)->stats) *ptr = \ + TYPEOF_UNQUAL(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ @@ -45,7 +45,7 @@ struct mpls_dev { #define MPLS_INC_STATS(mdev, field) \ do { \ - __typeof__(*(mdev)->stats) *ptr = \ + TYPEOF_UNQUAL(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ From 6a39fe05ecaa3946ed0af9efd3e56689d519e420 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:08 +0100 Subject: [PATCH 0820/2157] percpu: use TYPEOF_UNQUAL() in *_cpu_ptr() accessors Use TYPEOF_UNQUAL() macro to declare the return type of *_cpu_ptr() accessors in the generic named address space to avoid access to data from pointer to non-enclosed address space type of errors. Link: https://lkml.kernel.org/r/20250127160709.80604-5-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Nadav Amit Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Brian Gerst Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Boqun Feng Cc: "David S. Miller" Cc: Denys Vlasenko Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Kent Overstreet Cc: Paolo Abeni Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/include/asm/percpu.h | 8 ++++++-- include/linux/percpu-defs.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 666e4137b09f..27f668660abe 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -73,10 +73,14 @@ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \ \ tcp_ptr__ += (__force unsigned long)(_ptr); \ - (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \ }) #else -#define arch_raw_cpu_ptr(_ptr) ({ BUILD_BUG(); (typeof(_ptr))0; }) +#define arch_raw_cpu_ptr(_ptr) \ +({ \ + BUILD_BUG(); \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0; \ +}) #endif #define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 79b9402404f1..a7cf954ea99d 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -221,7 +221,7 @@ do { \ } while (0) #define PERCPU_PTR(__p) \ - (typeof(*(__p)) __force __kernel *)((__force unsigned long)(__p)) + (TYPEOF_UNQUAL(*(__p)) __force __kernel *)((__force unsigned long)(__p)) #ifdef CONFIG_SMP From 6cea5ae714ba47ea4807d15903baca9857a450e6 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:09 +0100 Subject: [PATCH 0821/2157] percpu: repurpose __percpu tag as a named address space qualifier The patch introduces __percpu_qual define and repurposes __percpu tag as a named address space qualifier using the new define. Arches can now conditionally define __percpu_qual as their named address space qualifier for percpu variables. Link: https://lkml.kernel.org/r/20250127160709.80604-6-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Nadav Amit Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Brian Gerst Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Boqun Feng Cc: Borislav Petkov Cc: Dave Hansen Cc: "David S. Miller" Cc: Denys Vlasenko Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Kent Overstreet Cc: Paolo Abeni Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/percpu.h | 13 +++++++++++++ include/linux/compiler_types.h | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 50597b975a49..02aeca21479a 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -6,6 +6,19 @@ #include #include +/* + * __percpu_qual is the qualifier for the percpu named address space. + * + * Most arches use generic named address space for percpu variables but + * some arches define percpu variables in different named address space + * (on the x86 arch, percpu variable may be declared as being relative + * to the %fs or %gs segments using __seg_fs or __seg_gs named address + * space qualifier). + */ +#ifndef __percpu_qual +# define __percpu_qual +#endif + #ifdef CONFIG_SMP /* diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 981cc3d7e3aa..5d6544545658 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -57,7 +57,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __user BTF_TYPE_TAG(user) # endif # define __iomem -# define __percpu BTF_TYPE_TAG(percpu) +# define __percpu __percpu_qual BTF_TYPE_TAG(percpu) # define __rcu BTF_TYPE_TAG(rcu) # define __chk_user_ptr(x) (void)0 From 6a367577153acd9b432a5340fb10891eeb7e10f1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 27 Jan 2025 17:05:10 +0100 Subject: [PATCH 0822/2157] percpu/x86: enable strict percpu checks via named AS qualifiers This patch declares percpu variables in __seg_gs/__seg_fs named AS and keeps them named AS qualified until they are dereferenced with percpu accessor. This approach enables various compiler check for cross-namespace variable assignments. Link: https://lkml.kernel.org/r/20250127160709.80604-7-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Nadav Amit Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Brian Gerst Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Boqun Feng Cc: "David S. Miller" Cc: Denys Vlasenko Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Kent Overstreet Cc: Paolo Abeni Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/include/asm/percpu.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 27f668660abe..474d648bca9a 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -95,9 +95,18 @@ #endif /* CONFIG_SMP */ -#define __my_cpu_type(var) typeof(var) __percpu_seg_override -#define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) -#define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) +#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL) +# define __my_cpu_type(var) typeof(var) +# define __my_cpu_ptr(ptr) (ptr) +# define __my_cpu_var(var) (var) + +# define __percpu_qual __percpu_seg_override +#else +# define __my_cpu_type(var) typeof(var) __percpu_seg_override +# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) +# define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) +#endif + #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x From 1c81f1a699263aeae9aa1ac777058846c546e3c0 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 24 Jan 2025 07:35:11 +0000 Subject: [PATCH 0823/2157] memcg: use OFP_PEAK_UNSET instead of -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Some cleanup for memcg", v4. This patch (of 4): The 'OFP_PEAK_UNSET' has been defined, use it instead of '-1'. Link: https://lkml.kernel.org/r/20250124073514.2375622-1-chenridong@huaweicloud.com Link: https://lkml.kernel.org/r/20250124073514.2375622-2-chenridong@huaweicloud.com Signed-off-by: Chen Ridong Reviewed-by: Michal Koutný Acked-by: David Finkel Acked-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Cc: Muchun Song Cc: Michal Hocko Cc: Muchun Song Cc: Vlastimil Babka Cc: Wang Weiyang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a037ec92881d..12f45dd0f64a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4013,7 +4013,7 @@ static ssize_t peak_write(struct kernfs_open_file *of, char *buf, size_t nbytes, WRITE_ONCE(peer_ctx->value, usage); /* initial write, register watcher */ - if (ofp->value == -1) + if (ofp->value == OFP_PEAK_UNSET) list_add(&ofp->list, watchers); WRITE_ONCE(ofp->value, usage); From 2059c8e320e2e538f70aa9b1e6ee9e793d4db6f7 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 24 Jan 2025 07:35:12 +0000 Subject: [PATCH 0824/2157] memcg: call the free function when allocation of pn fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'free_mem_cgroup_per_node_info' function is used to free the 'mem_cgroup_per_node' struct. Using 'pn' as the input for the free_mem_cgroup_per_node_info function will be much clearer. Call 'free_mem_cgroup_per_node_info' when 'alloc_mem_cgroup_per_node_info' fails, to free 'pn' as a whole, which makes the code more cohesive. Link: https://lkml.kernel.org/r/20250124073514.2375622-3-chenridong@huaweicloud.com Signed-off-by: Chen Ridong Reviewed-by: Michal Koutný Acked-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: David Finkel Cc: Michal Hocko Cc: Michal Hocko Cc: Muchun Song Cc: Muchun Song Cc: Vlastimil Babka Cc: Wang Weiyang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/memcontrol.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 12f45dd0f64a..7fd03425e2c4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3433,6 +3433,16 @@ struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino) } #endif +static void free_mem_cgroup_per_node_info(struct mem_cgroup_per_node *pn) +{ + if (!pn) + return; + + free_percpu(pn->lruvec_stats_percpu); + kfree(pn->lruvec_stats); + kfree(pn); +} + static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; @@ -3457,23 +3467,10 @@ static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) memcg->nodeinfo[node] = pn; return true; fail: - kfree(pn->lruvec_stats); - kfree(pn); + free_mem_cgroup_per_node_info(pn); return false; } -static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) -{ - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; - - if (!pn) - return; - - free_percpu(pn->lruvec_stats_percpu); - kfree(pn->lruvec_stats); - kfree(pn); -} - static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; @@ -3481,7 +3478,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) obj_cgroup_put(memcg->orig_objcg); for_each_node(node) - free_mem_cgroup_per_node_info(memcg, node); + free_mem_cgroup_per_node_info(memcg->nodeinfo[node]); memcg1_free_events(memcg); kfree(memcg->vmstats); free_percpu(memcg->vmstats_percpu); From bc812d1905df2e7dbc8a80253ec9c31366526ed3 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 24 Jan 2025 07:35:13 +0000 Subject: [PATCH 0825/2157] memcg: factor out the replace_stock_objcg function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out the 'replace_stock_objcg' function to make the code more cohesive. Link: https://lkml.kernel.org/r/20250124073514.2375622-4-chenridong@huaweicloud.com Signed-off-by: Chen Ridong Reviewed-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: David Finkel Cc: Michal Hocko Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Muchun Song Cc: Vlastimil Babka Cc: Wang Weiyang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/memcontrol.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7fd03425e2c4..65056395a1b4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2700,6 +2700,20 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) obj_cgroup_put(objcg); } +/* Replace the stock objcg with objcg, return the old objcg */ +static struct obj_cgroup *replace_stock_objcg(struct memcg_stock_pcp *stock, + struct obj_cgroup *objcg) +{ + struct obj_cgroup *old = NULL; + + old = drain_obj_stock(stock); + obj_cgroup_get(objcg); + stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) + ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; + WRITE_ONCE(stock->cached_objcg, objcg); + return old; +} + static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { @@ -2717,11 +2731,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, * changes. */ if (READ_ONCE(stock->cached_objcg) != objcg) { - old = drain_obj_stock(stock); - obj_cgroup_get(objcg); - stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) - ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; - WRITE_ONCE(stock->cached_objcg, objcg); + old = replace_stock_objcg(stock, objcg); stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ @@ -2875,11 +2885,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ - old = drain_obj_stock(stock); - obj_cgroup_get(objcg); - WRITE_ONCE(stock->cached_objcg, objcg); - stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) - ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; + old = replace_stock_objcg(stock, objcg); allow_uncharge = true; /* Allow uncharge when objcg changes */ } stock->nr_bytes += nr_bytes; From 610dc18c502df113e95d4f23374b30538d0b633e Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 24 Jan 2025 07:35:14 +0000 Subject: [PATCH 0826/2157] memcg: add CONFIG_MEMCG_V1 for 'local' functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CONFIG_MEMCG_V1 for the 'local' functions, which are only used in memcg v1, so that they won't be built for v2. Link: https://lkml.kernel.org/r/20250124073514.2375622-5-chenridong@huaweicloud.com Signed-off-by: Chen Ridong Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: David Finkel Cc: Michal Hocko Cc: Muchun Song Cc: Vlastimil Babka Cc: Wang Weiyang Cc: Yosry Ahmed Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol-v1.h | 6 +++--- mm/memcontrol.c | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index 144d71b65907..ecff454373e2 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -60,15 +60,15 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap); void drain_all_stock(struct mem_cgroup *root_memcg); unsigned long memcg_events(struct mem_cgroup *memcg, int event); -unsigned long memcg_events_local(struct mem_cgroup *memcg, int event); -unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx); unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); -unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item); int memory_stat_show(struct seq_file *m, void *v); /* Cgroup v1-specific declarations */ #ifdef CONFIG_MEMCG_V1 +unsigned long memcg_events_local(struct mem_cgroup *memcg, int event); +unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx); +unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item); bool memcg1_alloc_events(struct mem_cgroup *memcg); void memcg1_free_events(struct mem_cgroup *memcg); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 65056395a1b4..729b3e5f98f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -706,6 +706,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, trace_mod_memcg_state(memcg, idx, val); } +#ifdef CONFIG_MEMCG_V1 /* idx can be of type enum memcg_stat_item or node_stat_item. */ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { @@ -722,6 +723,7 @@ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) #endif return x; } +#endif static void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, @@ -869,6 +871,7 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event) return READ_ONCE(memcg->vmstats->events[i]); } +#ifdef CONFIG_MEMCG_V1 unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) { int i = memcg_events_index(event); @@ -878,6 +881,7 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) return READ_ONCE(memcg->vmstats->events_local[i]); } +#endif struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { @@ -1447,11 +1451,13 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item) memcg_page_state_output_unit(item); } +#ifdef CONFIG_MEMCG_V1 unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item) { return memcg_page_state_local(memcg, item) * memcg_page_state_output_unit(item); } +#endif #ifdef CONFIG_HUGETLB_PAGE static bool memcg_accounts_hugetlb(void) From 75fe8ec2380233f7d80c2574d52119072f9eb63e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 23 Jan 2025 23:38:58 -0500 Subject: [PATCH 0827/2157] mm: memcontrol: unshare v2-only charge API bits again 6b611388b626 ("memcg-v1: remove charge move code") removed the remaining v1 callers. Link: https://lkml.kernel.org/r/20250124043859.18808-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol-v1.h | 15 --------------- mm/memcontrol.c | 17 +++++++++++++---- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index ecff454373e2..ffd2ac839185 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -7,21 +7,6 @@ /* Cgroup v1 and v2 common declarations */ -int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, - unsigned int nr_pages); - -static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, - unsigned int nr_pages) -{ - if (mem_cgroup_is_root(memcg)) - return 0; - - return try_charge_memcg(memcg, gfp_mask, nr_pages); -} - -void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n); -void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n); - /* * Iteration constructs for visiting all cgroups (under a tree). If * loops are exited prematurely (break), mem_cgroup_iter_break() must diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 729b3e5f98f4..7f1ca1065316 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2213,8 +2213,8 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) css_put(&memcg->css); } -int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, - unsigned int nr_pages) +static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) { unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages); int nr_retries = MAX_RECLAIM_RETRIES; @@ -2403,6 +2403,15 @@ int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, return 0; } +static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) +{ + if (mem_cgroup_is_root(memcg)) + return 0; + + return try_charge_memcg(memcg, gfp_mask, nr_pages); +} + static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio); @@ -3389,13 +3398,13 @@ static void mem_cgroup_id_remove(struct mem_cgroup *memcg) } } -void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, +static void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { refcount_add(n, &memcg->id.ref); } -void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) +static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { if (refcount_sub_and_test(n, &memcg->id.ref)) { mem_cgroup_id_remove(memcg); From 0d892bbbfa1c3b75569ef5075503bb785d1d52fd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 23 Jan 2025 23:38:59 -0500 Subject: [PATCH 0828/2157] mm: memcontrol: move stray ratelimit bits to v1 41213dd0f816 ("memcg: move mem_cgroup_event_ratelimit to v1 code") left this one behind. There are no v2 references. Link: https://lkml.kernel.org/r/20250124043859.18808-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol-v1.c | 13 +++++++++++++ mm/memcontrol-v1.h | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2be6b9112808..6d184fae0ad1 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -490,6 +490,19 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg) } /* Cgroup1: threshold notifications & softlimit tree updates */ + +/* + * Per memcg event counter is incremented at every pagein/pageout. With THP, + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better + * than using jiffies etc. to handle periodic memcg event. + */ +enum mem_cgroup_events_target { + MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, + MEM_CGROUP_NTARGETS, +}; + struct memcg1_events_percpu { unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index ffd2ac839185..bfe44d2337a5 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -28,18 +28,6 @@ static inline bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys); } -/* - * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremented by the number of pages. This counter is used - * to trigger some periodic events. This is straightforward and better - * than using jiffies etc. to handle periodic memcg event. - */ -enum mem_cgroup_events_target { - MEM_CGROUP_TARGET_THRESH, - MEM_CGROUP_TARGET_SOFTLIMIT, - MEM_CGROUP_NTARGETS, -}; - unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap); void drain_all_stock(struct mem_cgroup *root_memcg); From 89ce924f0bd447eb52a5f224d879dbf8f09451db Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 24 Jan 2025 00:41:32 -0500 Subject: [PATCH 0829/2157] mm: memcontrol: move memsw charge callbacks to v1 The interweaving of two entirely different swap accounting strategies has been one of the more confusing parts of the memcg code. Split out the v1 code to clarify the implementation and a handful of callsites, and to avoid building the v1 bits when !CONFIG_MEMCG_V1. text data bss dec hex filename 39253 6446 4160 49859 c2c3 mm/memcontrol.o.old 38877 6382 4160 49419 c10b mm/memcontrol.o Link: https://lkml.kernel.org/r/20250124054132.45643-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Balbir Singh Acked-by: Shakeel Butt Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 17 +++-- include/linux/swap.h | 5 -- mm/huge_memory.c | 2 +- mm/memcontrol-v1.c | 89 ++++++++++++++++++++++++- mm/memcontrol-v1.h | 6 +- mm/memcontrol.c | 129 ++++++------------------------------- mm/memory.c | 2 +- mm/shmem.c | 2 +- mm/swap_state.c | 2 +- mm/vmscan.c | 2 +- 10 files changed, 126 insertions(+), 130 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6e74b8254d9b..57664e2a8fb7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -649,8 +649,6 @@ int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); - void __mem_cgroup_uncharge(struct folio *folio); /** @@ -1165,10 +1163,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) -{ -} - static inline void mem_cgroup_uncharge(struct folio *folio) { } @@ -1848,6 +1842,9 @@ static inline void mem_cgroup_exit_user_fault(void) current->in_user_fault = 0; } +void memcg1_swapout(struct folio *folio, swp_entry_t entry); +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages); + #else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -1875,6 +1872,14 @@ static inline void mem_cgroup_exit_user_fault(void) { } +static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry) +{ +} + +static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +{ +} + #endif /* CONFIG_MEMCG_V1 */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index b13b72645db3..91b30701274e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -659,7 +659,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) #endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) @@ -680,10 +679,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct folio *folio); #else -static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) -{ -} - static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 373781b21e5c..118f2127c785 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3740,7 +3740,7 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) /* * Exclude swapcache: originally to avoid a corrupt deferred split - * queue. Nowadays that is fully prevented by mem_cgroup_swapout(); + * queue. Nowadays that is fully prevented by memcg1_swapout(); * but if page reclaim is already handling the same folio, it is * unnecessary to handle it again in the shrinker, so excluding * swapcache here may still be a useful optimization. diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 6d184fae0ad1..c1feb3945350 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -581,8 +581,59 @@ void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg) local_irq_restore(flags); } -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) +/** + * memcg1_swapout - transfer a memsw charge to swap + * @folio: folio whose memsw charge to transfer + * @entry: swap entry to move the charge to + * + * Transfer the memsw charge of @folio to @entry. + */ +void memcg1_swapout(struct folio *folio, swp_entry_t entry) { + struct mem_cgroup *memcg, *swap_memcg; + unsigned int nr_entries; + + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); + + if (mem_cgroup_disabled()) + return; + + if (!do_memsw_account()) + return; + + memcg = folio_memcg(folio); + + VM_WARN_ON_ONCE_FOLIO(!memcg, folio); + if (!memcg) + return; + + /* + * In case the memcg owning these pages has been offlined and doesn't + * have an ID allocated to it anymore, charge the closest online + * ancestor for the swap instead and transfer the memory+swap charge. + */ + swap_memcg = mem_cgroup_id_get_online(memcg); + nr_entries = folio_nr_pages(folio); + /* Get references for the tail pages, too */ + if (nr_entries > 1) + mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); + mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); + + folio_unqueue_deferred_split(folio); + folio->memcg_data = 0; + + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memory, nr_entries); + + if (memcg != swap_memcg) { + if (!mem_cgroup_is_root(swap_memcg)) + page_counter_charge(&swap_memcg->memsw, nr_entries); + page_counter_uncharge(&memcg->memsw, nr_entries); + } + /* * Interrupts should be disabled here because the caller holds the * i_pages lock which is taken with interrupts-off. It is @@ -594,6 +645,42 @@ void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) memcg1_charge_statistics(memcg, -folio_nr_pages(folio)); preempt_enable_nested(); memcg1_check_events(memcg, folio_nid(folio)); + + css_put(&memcg->css); +} + +/* + * memcg1_swapin - uncharge swap slot + * @entry: the first swap entry for which the pages are charged + * @nr_pages: number of pages which will be uncharged + * + * Call this function after successfully adding the charged page to swapcache. + * + * Note: This function assumes the page for which swap slot is being uncharged + * is order 0 page. + */ +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +{ + /* + * Cgroup1's unified memory+swap counter has been charged with the + * new swapcache page, finish the transfer by uncharging the swap + * slot. The swap slot would also get uncharged when it dies, but + * it can stick around indefinitely and we'd count the page twice + * the entire time. + * + * Cgroup2 has separate resource counters for memory and swap, + * so this is a non-issue here. Memory and swap charge lifetimes + * correspond 1:1 to page and swap slot lifetimes: we charge the + * page to memory here, and uncharge swap when the slot is freed. + */ + if (do_memsw_account()) { + /* + * The swap entry might not get freed for a long time, + * let's not wait for it. The page already received a + * memory+swap charge, drop the swap entry duplicate. + */ + mem_cgroup_uncharge_swap(entry, nr_pages); + } } void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index bfe44d2337a5..653ff1bad244 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -36,6 +36,9 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event); unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); int memory_stat_show(struct seq_file *m, void *v); +void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n); +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg); + /* Cgroup v1-specific declarations */ #ifdef CONFIG_MEMCG_V1 @@ -69,7 +72,6 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked); void memcg1_oom_recover(struct mem_cgroup *memcg); void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg); -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg); void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, unsigned long nr_memory, int nid); @@ -107,8 +109,6 @@ static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {} static inline void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg) {} -static inline void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) {} - static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, unsigned long nr_memory, int nid) {} diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7f1ca1065316..04973c084c63 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3398,7 +3398,7 @@ static void mem_cgroup_id_remove(struct mem_cgroup *memcg) } } -static void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, +void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { refcount_add(n, &memcg->id.ref); @@ -3419,6 +3419,24 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) mem_cgroup_id_put_many(memcg, 1); } +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +{ + while (!refcount_inc_not_zero(&memcg->id.ref)) { + /* + * The root cgroup cannot be destroyed, so it's refcount must + * always be >= 1. + */ + if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { + VM_BUG_ON(1); + break; + } + memcg = parent_mem_cgroup(memcg); + if (!memcg) + memcg = root_mem_cgroup; + } + return memcg; +} + /** * mem_cgroup_from_id - look up a memcg from a memcg id * @id: the memcg id to look up @@ -4604,40 +4622,6 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, return ret; } -/* - * mem_cgroup_swapin_uncharge_swap - uncharge swap slot - * @entry: the first swap entry for which the pages are charged - * @nr_pages: number of pages which will be uncharged - * - * Call this function after successfully adding the charged page to swapcache. - * - * Note: This function assumes the page for which swap slot is being uncharged - * is order 0 page. - */ -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) -{ - /* - * Cgroup1's unified memory+swap counter has been charged with the - * new swapcache page, finish the transfer by uncharging the swap - * slot. The swap slot would also get uncharged when it dies, but - * it can stick around indefinitely and we'd count the page twice - * the entire time. - * - * Cgroup2 has separate resource counters for memory and swap, - * so this is a non-issue here. Memory and swap charge lifetimes - * correspond 1:1 to page and swap slot lifetimes: we charge the - * page to memory here, and uncharge swap when the slot is freed. - */ - if (do_memsw_account()) { - /* - * The swap entry might not get freed for a long time, - * let's not wait for it. The page already received a - * memory+swap charge, drop the swap entry duplicate. - */ - mem_cgroup_uncharge_swap(entry, nr_pages); - } -} - struct uncharge_gather { struct mem_cgroup *memcg; unsigned long nr_memory; @@ -4963,81 +4947,6 @@ static int __init mem_cgroup_init(void) subsys_initcall(mem_cgroup_init); #ifdef CONFIG_SWAP -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) -{ - while (!refcount_inc_not_zero(&memcg->id.ref)) { - /* - * The root cgroup cannot be destroyed, so it's refcount must - * always be >= 1. - */ - if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { - VM_BUG_ON(1); - break; - } - memcg = parent_mem_cgroup(memcg); - if (!memcg) - memcg = root_mem_cgroup; - } - return memcg; -} - -/** - * mem_cgroup_swapout - transfer a memsw charge to swap - * @folio: folio whose memsw charge to transfer - * @entry: swap entry to move the charge to - * - * Transfer the memsw charge of @folio to @entry. - */ -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) -{ - struct mem_cgroup *memcg, *swap_memcg; - unsigned int nr_entries; - - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); - - if (mem_cgroup_disabled()) - return; - - if (!do_memsw_account()) - return; - - memcg = folio_memcg(folio); - - VM_WARN_ON_ONCE_FOLIO(!memcg, folio); - if (!memcg) - return; - - /* - * In case the memcg owning these pages has been offlined and doesn't - * have an ID allocated to it anymore, charge the closest online - * ancestor for the swap instead and transfer the memory+swap charge. - */ - swap_memcg = mem_cgroup_id_get_online(memcg); - nr_entries = folio_nr_pages(folio); - /* Get references for the tail pages, too */ - if (nr_entries > 1) - mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); - mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - - swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); - - folio_unqueue_deferred_split(folio); - folio->memcg_data = 0; - - if (!mem_cgroup_is_root(memcg)) - page_counter_uncharge(&memcg->memory, nr_entries); - - if (memcg != swap_memcg) { - if (!mem_cgroup_is_root(swap_memcg)) - page_counter_charge(&swap_memcg->memsw, nr_entries); - page_counter_uncharge(&memcg->memsw, nr_entries); - } - - memcg1_swapout(folio, memcg); - css_put(&memcg->css); -} - /** * __mem_cgroup_try_charge_swap - try charging swap space for a folio * @folio: folio being added to swap diff --git a/mm/memory.c b/mm/memory.c index b9661ccfa64f..f77c10fd26b9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4407,7 +4407,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } need_clear_cache = true; - mem_cgroup_swapin_uncharge_swap(entry, nr_pages); + memcg1_swapin(entry, nr_pages); shadow = get_shadow_from_swap_cache(entry); if (shadow) diff --git a/mm/shmem.c b/mm/shmem.c index 1ede0800e846..15fa7fa9c8e8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2017,7 +2017,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, __folio_set_swapbacked(new); new->swap = entry; - mem_cgroup_swapin_uncharge_swap(entry, nr_pages); + memcg1_swapin(entry, nr_pages); shadow = get_shadow_from_swap_cache(entry); if (shadow) workingset_refault(new, shadow); diff --git a/mm/swap_state.c b/mm/swap_state.c index ca42b2be64d9..2e1acb210e57 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -521,7 +521,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (add_to_swap_cache(new_folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) goto fail_unlock; - mem_cgroup_swapin_uncharge_swap(entry, 1); + memcg1_swapin(entry, 1); if (shadow) workingset_refault(new_folio, shadow); diff --git a/mm/vmscan.c b/mm/vmscan.c index c767d71c43d7..fc4951d23b97 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -769,7 +769,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(folio, target_memcg); __delete_from_swap_cache(folio, swap, shadow); - mem_cgroup_swapout(folio, swap); + memcg1_swapout(folio, swap); xa_unlock_irq(&mapping->i_pages); put_swap_folio(folio, swap); } else { From 5f6084f95bc1f0a0b95e58e49be1ee74b01f5144 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 23 Jan 2025 19:35:22 +0000 Subject: [PATCH 0830/2157] mm/oom_kill: fix trivial typo in comment Update 'give' -> 'given' in the description of oom_reap_task_mm(). Link: https://lkml.kernel.org/r/20250123193523.1496909-1-cmllamas@google.com Signed-off-by: Carlos Llamas Signed-off-by: Andrew Morton --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 1cf121ad7085..25923cfec9c6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -563,7 +563,7 @@ static bool __oom_reap_task_mm(struct mm_struct *mm) } /* - * Reaps the address space of the give task. + * Reaps the address space of the given task. * * Returns true on success and false if none or part of the address space * has been reclaimed and the caller should retry later. From 035a112e5fd5b93a1d34c5d736bb515b2f9fa52f Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 22 Jan 2025 11:19:26 -0500 Subject: [PATCH 0831/2157] selftests/mm: make file-backed THP split work by writing PMD size data Commit acd7ccb284b8 ("mm: shmem: add large folio support for tmpfs") changes huge=always to allocate THP/mTHP based on write size and split_huge_page_test does not write PMD size data, so file-back THP is not created during the test. Fix it by writing PMD size data. Link: https://lkml.kernel.org/r/20250122161928.1240637-1-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Miaohe Lin Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- .../selftests/mm/split_huge_page_test.c | 52 ++++++++++++++++--- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 3f353f3d070f..ba498aaaf857 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -265,14 +265,28 @@ void split_file_backed_thp(void) { int status; int fd; - ssize_t num_written; char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; const char *tmpfs_loc = mkdtemp(tmpfs_template); char testfile[INPUT_MAX]; + ssize_t num_written, num_read; + char *file_buf1, *file_buf2; uint64_t pgoff_start = 0, pgoff_end = 1024; + int i; ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); + file_buf1 = (char *)malloc(pmd_pagesize); + file_buf2 = (char *)malloc(pmd_pagesize); + + if (!file_buf1 || !file_buf2) { + ksft_print_msg("cannot allocate file buffers\n"); + goto out; + } + + for (i = 0; i < pmd_pagesize; i++) + file_buf1[i] = (char)i; + memset(file_buf2, 0, pmd_pagesize); + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); if (status) @@ -281,26 +295,45 @@ void split_file_backed_thp(void) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + goto cleanup; } - fd = open(testfile, O_CREAT|O_WRONLY, 0664); + fd = open(testfile, O_CREAT|O_RDWR, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; } - /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); - close(fd); + /* write pmd size data to the file, so a file-backed THP can be allocated */ + num_written = write(fd, file_buf1, pmd_pagesize); - if (num_written < 1) { - ksft_perror("Fail to write data to testing file"); - goto cleanup; + if (num_written == -1 || num_written != pmd_pagesize) { + ksft_perror("Failed to write data to testing file"); + goto close_file; } /* split the file-backed THP */ write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); + /* check file content after split */ + status = lseek(fd, 0, SEEK_SET); + if (status == -1) { + ksft_perror("Cannot lseek file"); + goto close_file; + } + + num_read = read(fd, file_buf2, num_written); + if (num_read == -1 || num_read != num_written) { + ksft_perror("Cannot read file content back"); + goto close_file; + } + + if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { + ksft_print_msg("File content changed\n"); + goto close_file; + } + + close(fd); status = unlink(testfile); if (status) { ksft_perror("Cannot remove testing file"); @@ -321,9 +354,12 @@ void split_file_backed_thp(void) ksft_test_result_pass("File-backed THP split test done\n"); return; +close_file: + close(fd); cleanup: umount(tmpfs_loc); rmdir(tmpfs_loc); +out: ksft_exit_fail_msg("Error occurred\n"); } From 9b2f764933eb5e3ac9ebba26e3341529219c4401 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 22 Jan 2025 11:19:27 -0500 Subject: [PATCH 0832/2157] mm/huge_memory: allow split shmem large folio to any lower order Commit 4d684b5f92ba ("mm: shmem: add large folio support for tmpfs") has added large folio support to shmem. Remove the restriction in split_huge_page*(). Link: https://lkml.kernel.org/r/20250122161928.1240637-2-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Baolin Wang Reviewed-by: Yang Shi Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Miaohe Lin Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/huge_memory.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 118f2127c785..e33da765c428 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3299,7 +3299,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* Some pages can be beyond EOF: drop them from page cache */ if (tail->index >= end) { if (shmem_mapping(folio->mapping)) - nr_dropped++; + nr_dropped += new_nr; else if (folio_test_clear_dirty(tail)) folio_account_cleaned(tail, inode_to_wb(folio->mapping->host)); @@ -3465,12 +3465,6 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, return -EINVAL; } } else if (new_order) { - /* Split shmem folio to non-zero order not supported */ - if (shmem_mapping(folio->mapping)) { - VM_WARN_ONCE(1, - "Cannot split shmem folio to non-0 order"); - return -EINVAL; - } /* * No split if the file system does not support large folio. * Note that we might still have THPs in such mappings due to From ad4c9bb5415232b452157002a48d58a1dc92d3c0 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 22 Jan 2025 11:19:28 -0500 Subject: [PATCH 0833/2157] selftests/mm: test splitting file-backed THP to any lower order Now split_huge_page*() supports shmem THP split to any lower order. Test it. The test now reads file content out after split to check if the split corrupts the file data. Link: https://lkml.kernel.org/r/20250122161928.1240637-3-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Miaohe Lin Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/split_huge_page_test.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index ba498aaaf857..e0304046b1a0 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -261,7 +261,7 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); } -void split_file_backed_thp(void) +void split_file_backed_thp(int order) { int status; int fd; @@ -313,7 +313,7 @@ void split_file_backed_thp(void) } /* split the file-backed THP */ - write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); /* check file content after split */ status = lseek(fd, 0, SEEK_SET); @@ -351,7 +351,7 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); ksft_print_msg("Please check dmesg for more information\n"); - ksft_test_result_pass("File-backed THP split test done\n"); + ksft_test_result_pass("File-backed THP split to order %d test done\n", order); return; close_file: @@ -517,7 +517,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+2+9); + ksft_set_plan(1+8+1+9+9); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -534,7 +534,8 @@ int main(int argc, char **argv) split_pmd_thp_to_order(i); split_pte_mapped_thp(); - split_file_backed_thp(); + for (i = 0; i < 9; i++) + split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); From 100bc3b877fca43e46485883eaf179aec7a11c86 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Wed, 8 Jan 2025 09:52:23 +0800 Subject: [PATCH 0834/2157] drivers/base/memory: simplify outputting of valid_zones_show() No need to specify position at the first writing to the buf because the @len is always 0 at this time. Use sysfs_emit() instead to simplify it. Also avoid setting/checking default_zone with a conditional operator. Link: https://lkml.kernel.org/r/20250108015223.1522887-1-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Acked-by: David Hildenbrand Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- drivers/base/memory.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 348c5dbbfa68..4765f2928725 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -455,7 +455,7 @@ static ssize_t valid_zones_show(struct device *dev, struct memory_group *group = mem->group; struct zone *default_zone; int nid = mem->nid; - int len = 0; + int len; /* * Check the existing zone. Make sure that we do that only on the @@ -466,22 +466,18 @@ static ssize_t valid_zones_show(struct device *dev, * If !mem->zone, the memory block spans multiple zones and * cannot get offlined. */ - default_zone = mem->zone; - if (!default_zone) - return sysfs_emit(buf, "%s\n", "none"); - len += sysfs_emit_at(buf, len, "%s", default_zone->name); - goto out; + return sysfs_emit(buf, "%s\n", + mem->zone ? mem->zone->name : "none"); } default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group, start_pfn, nr_pages); - len += sysfs_emit_at(buf, len, "%s", default_zone->name); + len = sysfs_emit(buf, "%s", default_zone->name); len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, default_zone); len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, default_zone); -out: len += sysfs_emit_at(buf, len, "\n"); return len; } From 8977752c8056a6a094a279004a49722da15bace3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:43 +0100 Subject: [PATCH 0835/2157] mm/gup: reject FOLL_SPLIT_PMD with hugetlb VMAs Patch series "mm: fixes for device-exclusive entries (hmm)", v2. Discussing the PageTail() call in make_device_exclusive_range() with Willy, I recently discovered [1] that device-exclusive handling does not properly work with THP, making the hmm-tests selftests fail if THPs are enabled on the system. Looking into more details, I found that hugetlb is not properly fenced, and I realized that something that was bugging me for longer -- how device-exclusive entries interact with mapcounts -- completely breaks migration/swapout/split/hwpoison handling of these folios while they have device-exclusive PTEs. The program below can be used to allocate 1 GiB worth of pages and making them device-exclusive on a kernel with CONFIG_TEST_HMM. Once they are device-exclusive, these folios cannot get swapped out (proc$pid/smaps_rollup will always indicate 1 GiB RSS no matter how much one forces memory reclaim), and when having a memory block onlined to ZONE_MOVABLE, trying to offline it will loop forever and complain about failed migration of a page that should be movable. # echo offline > /sys/devices/system/memory/memory136/state # echo online_movable > /sys/devices/system/memory/memory136/state # ./hmm-swap & ... wait until everything is device-exclusive # echo offline > /sys/devices/system/memory/memory136/state [ 285.193431][T14882] page: refcount:2 mapcount:0 mapping:0000000000000000 index:0x7f20671f7 pfn:0x442b6a [ 285.196618][T14882] memcg:ffff888179298000 [ 285.198085][T14882] anon flags: 0x5fff0000002091c(referenced|uptodate| dirty|active|owner_2|swapbacked|node=1|zone=3|lastcpupid=0x7ff) [ 285.201734][T14882] raw: ... [ 285.204464][T14882] raw: ... [ 285.207196][T14882] page dumped because: migration failure [ 285.209072][T14882] page_owner tracks the page as allocated [ 285.210915][T14882] page last allocated via order 0, migratetype Movable, gfp_mask 0x140dca(GFP_HIGHUSER_MOVABLE|__GFP_COMP|__GFP_ZERO), id 14926, tgid 14926 (hmm-swap), ts 254506295376, free_ts 227402023774 [ 285.216765][T14882] post_alloc_hook+0x197/0x1b0 [ 285.218874][T14882] get_page_from_freelist+0x76e/0x3280 [ 285.220864][T14882] __alloc_frozen_pages_noprof+0x38e/0x2740 [ 285.223302][T14882] alloc_pages_mpol+0x1fc/0x540 [ 285.225130][T14882] folio_alloc_mpol_noprof+0x36/0x340 [ 285.227222][T14882] vma_alloc_folio_noprof+0xee/0x1a0 [ 285.229074][T14882] __handle_mm_fault+0x2b38/0x56a0 [ 285.230822][T14882] handle_mm_fault+0x368/0x9f0 ... This series fixes all issues I found so far. There is no easy way to fix without a bigger rework/cleanup. I have a bunch of cleanups on top (some previous sent, some the result of the discussion in v1) that I will send out separately once this landed and I get to it. I wish we could just use some special present PROT_NONE PTEs instead of these (non-present, non-none) fake-swap entries; but that just results in the same problem we keep having (lack of spare PTE bits), and staring at other similar fake-swap entries, that ship has sailed. With this series, make_device_exclusive() doesn't actually belong into mm/rmap.c anymore, but I'll leave moving that for another day. I only tested this series with the hmm-tests selftests due to lack of HW, so I'd appreciate some testing, especially if the interaction between two GPUs wanting a device-exclusive entry works as expected. #include #include #include #include #include #include #include #include #include #include #define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) struct hmm_dmirror_cmd { __u64 addr; __u64 ptr; __u64 npages; __u64 cpages; __u64 faults; }; const size_t size = 1 * 1024 * 1024 * 1024ul; const size_t chunk_size = 2 * 1024 * 1024ul; int main(void) { struct hmm_dmirror_cmd cmd; size_t cur_size; int fd, ret; char *addr, *mirror; fd = open("/dev/hmm_dmirror1", O_RDWR, 0); if (fd < 0) { perror("open failed\n"); exit(1); } addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { perror("mmap failed\n"); exit(1); } madvise(addr, size, MADV_NOHUGEPAGE); memset(addr, 1, size); mirror = malloc(chunk_size); for (cur_size = 0; cur_size < size; cur_size += chunk_size) { cmd.addr = (uintptr_t)addr + cur_size; cmd.ptr = (uintptr_t)mirror; cmd.npages = chunk_size / getpagesize(); ret = ioctl(fd, HMM_DMIRROR_EXCLUSIVE, &cmd); if (ret) { perror("ioctl failed\n"); exit(1); } } pause(); return 0; } [1] https://lkml.kernel.org/r/25e02685-4f1d-47fa-be5b-01ff85bb0ce2@redhat.com This patch (of 17): We only have two FOLL_SPLIT_PMD users. While uprobe refuses hugetlb early, make_device_exclusive_range() can end up getting called on hugetlb VMAs. Right now, this means that with a PMD-sized hugetlb page, we can end up calling split_huge_pmd(), because pmd_trans_huge() also succeeds with hugetlb PMDs. For example, using a modified hmm-test selftest one can trigger: [ 207.017134][T14945] ------------[ cut here ]------------ [ 207.018614][T14945] kernel BUG at mm/page_table_check.c:87! [ 207.019716][T14945] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 207.021072][T14945] CPU: 3 UID: 0 PID: ... [ 207.023036][T14945] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 [ 207.024834][T14945] RIP: 0010:page_table_check_clear.part.0+0x488/0x510 [ 207.026128][T14945] Code: ... [ 207.029965][T14945] RSP: 0018:ffffc9000cb8f348 EFLAGS: 00010293 [ 207.031139][T14945] RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff8249a0cd [ 207.032649][T14945] RDX: ffff88811e883c80 RSI: ffffffff8249a357 RDI: ffff88811e883c80 [ 207.034183][T14945] RBP: ffff888105c0a050 R08: 0000000000000005 R09: 0000000000000000 [ 207.035688][T14945] R10: 00000000ffffffff R11: 0000000000000003 R12: 0000000000000001 [ 207.037203][T14945] R13: 0000000000000200 R14: 0000000000000001 R15: dffffc0000000000 [ 207.038711][T14945] FS: 00007f2783275740(0000) GS:ffff8881f4980000(0000) knlGS:0000000000000000 [ 207.040407][T14945] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 207.041660][T14945] CR2: 00007f2782c00000 CR3: 0000000132356000 CR4: 0000000000750ef0 [ 207.043196][T14945] PKRU: 55555554 [ 207.043880][T14945] Call Trace: [ 207.044506][T14945] [ 207.045086][T14945] ? __die+0x51/0x92 [ 207.045864][T14945] ? die+0x29/0x50 [ 207.046596][T14945] ? do_trap+0x250/0x320 [ 207.047430][T14945] ? do_error_trap+0xe7/0x220 [ 207.048346][T14945] ? page_table_check_clear.part.0+0x488/0x510 [ 207.049535][T14945] ? handle_invalid_op+0x34/0x40 [ 207.050494][T14945] ? page_table_check_clear.part.0+0x488/0x510 [ 207.051681][T14945] ? exc_invalid_op+0x2e/0x50 [ 207.052589][T14945] ? asm_exc_invalid_op+0x1a/0x20 [ 207.053596][T14945] ? page_table_check_clear.part.0+0x1fd/0x510 [ 207.054790][T14945] ? page_table_check_clear.part.0+0x487/0x510 [ 207.055993][T14945] ? page_table_check_clear.part.0+0x488/0x510 [ 207.057195][T14945] ? page_table_check_clear.part.0+0x487/0x510 [ 207.058384][T14945] __page_table_check_pmd_clear+0x34b/0x5a0 [ 207.059524][T14945] ? __pfx___page_table_check_pmd_clear+0x10/0x10 [ 207.060775][T14945] ? __pfx___mutex_unlock_slowpath+0x10/0x10 [ 207.061940][T14945] ? __pfx___lock_acquire+0x10/0x10 [ 207.062967][T14945] pmdp_huge_clear_flush+0x279/0x360 [ 207.064024][T14945] split_huge_pmd_locked+0x82b/0x3750 ... Before commit 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code"), we would have ignored the flag; instead, let's simply refuse the combination completely in check_vma_flags(): the caller is likely not prepared to handle any hugetlb folios. We'll teach make_device_exclusive_range() separately to ignore any hugetlb folios as a future-proof safety net. Link: https://lkml.kernel.org/r/20250210193801.781278-1-david@redhat.com Link: https://lkml.kernel.org/r/20250210193801.781278-2-david@redhat.com Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code") Signed-off-by: David Hildenbrand Reviewed-by: John Hubbard Reviewed-by: Alistair Popple Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Vlastimil Babka Cc: Yanteng Si Cc: Simona Vetter Cc: Barry Song Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/gup.c b/mm/gup.c index 3883b307780e..61e751baf862 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1283,6 +1283,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) return -EOPNOTSUPP; + if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma)) + return -EOPNOTSUPP; + if (vma_is_secretmem(vma)) return -EFAULT; From bc3fe6805cf09a25a086573a17d40e525208c5d8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:44 +0100 Subject: [PATCH 0836/2157] mm/rmap: reject hugetlb folios in folio_make_device_exclusive() Even though FOLL_SPLIT_PMD on hugetlb now always fails with -EOPNOTSUPP, let's add a safety net in case FOLL_SPLIT_PMD usage would ever be reworked. In particular, before commit 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code"), GUP(FOLL_SPLIT_PMD) would just have returned a page. In particular, hugetlb folios that are not PMD-sized would never have been prone to FOLL_SPLIT_PMD. hugetlb folios can be anonymous, and page_make_device_exclusive_one() is not really prepared for handling them at all. So let's spell that out. Link: https://lkml.kernel.org/r/20250210193801.781278-3-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Reviewed-by: Alistair Popple Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Cc: Signed-off-by: Andrew Morton --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index c6c4d4ea29a7..17fbfa61f7ef 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2499,7 +2499,7 @@ static bool folio_make_device_exclusive(struct folio *folio, * Restrict to anonymous folios for now to avoid potential writeback * issues. */ - if (!folio_test_anon(folio)) + if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) return false; rmap_walk(folio, &rwc); From 599b684a7854e51a51358fe59bbdfea281f0b461 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:45 +0100 Subject: [PATCH 0837/2157] mm/rmap: convert make_device_exclusive_range() to make_device_exclusive() The single "real" user in the tree of make_device_exclusive_range() always requests making only a single address exclusive. The current implementation is hard to fix for properly supporting anonymous THP / large folios and for avoiding messing with rmap walks in weird ways. So let's always process a single address/page and return folio + page to minimize page -> folio lookups. This is a preparation for further changes. Reject any non-anonymous or hugetlb folios early, directly after GUP. While at it, extend the documentation of make_device_exclusive() to clarify some things. Link: https://lkml.kernel.org/r/20250210193801.781278-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Simona Vetter Reviewed-by: Alistair Popple Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- Documentation/mm/hmm.rst | 2 +- Documentation/translations/zh_CN/mm/hmm.rst | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 5 +- include/linux/mmu_notifier.h | 2 +- include/linux/rmap.h | 5 +- lib/test_hmm.c | 41 +++----- mm/rmap.c | 103 ++++++++++++-------- 7 files changed, 83 insertions(+), 77 deletions(-) diff --git a/Documentation/mm/hmm.rst b/Documentation/mm/hmm.rst index f6d53c37a2ca..7d61b7a8b65b 100644 --- a/Documentation/mm/hmm.rst +++ b/Documentation/mm/hmm.rst @@ -400,7 +400,7 @@ Exclusive access memory Some devices have features such as atomic PTE bits that can be used to implement atomic access to system memory. To support atomic operations to a shared virtual memory page such a device needs access to that page which is exclusive of any -userspace access from the CPU. The ``make_device_exclusive_range()`` function +userspace access from the CPU. The ``make_device_exclusive()`` function can be used to make a memory range inaccessible from userspace. This replaces all mappings for pages in the given range with special swap diff --git a/Documentation/translations/zh_CN/mm/hmm.rst b/Documentation/translations/zh_CN/mm/hmm.rst index 0669f947d0bc..22c210f4e94f 100644 --- a/Documentation/translations/zh_CN/mm/hmm.rst +++ b/Documentation/translations/zh_CN/mm/hmm.rst @@ -326,7 +326,7 @@ devm_memunmap_pages() 和 devm_release_mem_region() 当资源可以绑定到 ``s 一些设备具有诸如原子PTE位的功能,可以用来实现对系统内存的原子访问。为了支持对一 个共享的虚拟内存页的原子操作,这样的设备需要对该页的访问是排他的,而不是来自CPU -的任何用户空间访问。 ``make_device_exclusive_range()`` 函数可以用来使一 +的任何用户空间访问。 ``make_device_exclusive()`` 函数可以用来使一 个内存范围不能从用户空间访问。 这将用特殊的交换条目替换给定范围内的所有页的映射。任何试图访问交换条目的行为都会 diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 8ea98f06d39a..aa22d8458d22 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -610,10 +610,9 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, notifier_seq = mmu_interval_read_begin(¬ifier->notifier); mmap_read_lock(mm); - ret = make_device_exclusive_range(mm, start, start + PAGE_SIZE, - &page, drm->dev); + page = make_device_exclusive(mm, start, drm->dev, &folio); mmap_read_unlock(mm); - if (ret <= 0 || !page) { + if (IS_ERR(page)) { ret = -EINVAL; goto out; } diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index e2dd57ca368b..d4e714661826 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -46,7 +46,7 @@ struct mmu_interval_notifier; * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no * longer have exclusive access to the page. When sent during creation of an * exclusive range the owner will be initialised to the value provided by the - * caller of make_device_exclusive_range(), otherwise the owner will be NULL. + * caller of make_device_exclusive(), otherwise the owner will be NULL. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 683a04088f3f..86425d42c1a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -663,9 +663,8 @@ int folio_referenced(struct folio *, int is_locked, void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); -int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, - unsigned long end, struct page **pages, - void *arg); +struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, + void *owner, struct folio **foliop); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 056f2e411d7b..e4afca8d1880 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -780,10 +780,8 @@ static int dmirror_exclusive(struct dmirror *dmirror, unsigned long start, end, addr; unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; - struct page *pages[64]; struct dmirror_bounce bounce; - unsigned long next; - int ret; + int ret = 0; start = cmd->addr; end = start + size; @@ -795,36 +793,27 @@ static int dmirror_exclusive(struct dmirror *dmirror, return -EINVAL; mmap_read_lock(mm); - for (addr = start; addr < end; addr = next) { - unsigned long mapped = 0; - int i; + for (addr = start; !ret && addr < end; addr += PAGE_SIZE) { + struct folio *folio; + struct page *page; - next = min(end, addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)); - - ret = make_device_exclusive_range(mm, addr, next, pages, NULL); - /* - * Do dmirror_atomic_map() iff all pages are marked for - * exclusive access to avoid accessing uninitialized - * fields of pages. - */ - if (ret == (next - addr) >> PAGE_SHIFT) - mapped = dmirror_atomic_map(addr, next, pages, dmirror); - for (i = 0; i < ret; i++) { - if (pages[i]) { - unlock_page(pages[i]); - put_page(pages[i]); - } + page = make_device_exclusive(mm, addr, NULL, &folio); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + break; } - if (addr + (mapped << PAGE_SHIFT) < next) { - mmap_read_unlock(mm); - mmput(mm); - return -EBUSY; - } + ret = dmirror_atomic_map(addr, addr + PAGE_SIZE, &page, dmirror); + ret = ret == 1 ? 0 : -EBUSY; + folio_unlock(folio); + folio_put(folio); } mmap_read_unlock(mm); mmput(mm); + if (ret) + return ret; + /* Return the migrated data for verification. */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) diff --git a/mm/rmap.c b/mm/rmap.c index 17fbfa61f7ef..7ccf850565d3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2495,70 +2495,89 @@ static bool folio_make_device_exclusive(struct folio *folio, .arg = &args, }; - /* - * Restrict to anonymous folios for now to avoid potential writeback - * issues. - */ - if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) - return false; - rmap_walk(folio, &rwc); return args.valid && !folio_mapcount(folio); } /** - * make_device_exclusive_range() - Mark a range for exclusive use by a device + * make_device_exclusive() - Mark a page for exclusive use by a device * @mm: mm_struct of associated target process - * @start: start of the region to mark for exclusive device access - * @end: end address of region - * @pages: returns the pages which were successfully marked for exclusive access + * @addr: the virtual address to mark for exclusive device access * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering + * @foliop: folio pointer will be stored here on success. * - * Returns: number of pages found in the range by GUP. A page is marked for - * exclusive access only if the page pointer is non-NULL. + * This function looks up the page mapped at the given address, grabs a + * folio reference, locks the folio and replaces the PTE with special + * device-exclusive PFN swap entry, preventing access through the process + * page tables. The function will return with the folio locked and referenced. * - * This function finds ptes mapping page(s) to the given address range, locks - * them and replaces mappings with special swap entries preventing userspace CPU - * access. On fault these entries are replaced with the original mapping after - * calling MMU notifiers. + * On fault, the device-exclusive entries are replaced with the original PTE + * under folio lock, after calling MMU notifiers. + * + * Only anonymous non-hugetlb folios are supported and the VMA must have + * write permissions such that we can fault in the anonymous page writable + * in order to mark it exclusive. The caller must hold the mmap_lock in read + * mode. * * A driver using this to program access from a device must use a mmu notifier * critical section to hold a device specific lock during programming. Once - * programming is complete it should drop the page lock and reference after + * programming is complete it should drop the folio lock and reference after * which point CPU access to the page will revoke the exclusive access. + * + * Notes: + * #. This function always operates on individual PTEs mapping individual + * pages. PMD-sized THPs are first remapped to be mapped by PTEs before + * the conversion happens on a single PTE corresponding to @addr. + * #. While concurrent access through the process page tables is prevented, + * concurrent access through other page references (e.g., earlier GUP + * invocation) is not handled and not supported. + * #. device-exclusive entries are considered "clean" and "old" by core-mm. + * Device drivers must update the folio state when informed by MMU + * notifiers. + * + * Returns: pointer to mapped page on success, otherwise a negative error. */ -int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, - unsigned long end, struct page **pages, - void *owner) +struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, + void *owner, struct folio **foliop) { - long npages = (end - start) >> PAGE_SHIFT; - long i; + struct folio *folio; + struct page *page; + long npages; - npages = get_user_pages_remote(mm, start, npages, + mmap_assert_locked(mm); + + /* + * Fault in the page writable and try to lock it; note that if the + * address would already be marked for exclusive use by a device, + * the GUP call would undo that first by triggering a fault. + */ + npages = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD, - pages, NULL); - if (npages < 0) - return npages; + &page, NULL); + if (npages != 1) + return ERR_PTR(npages); + folio = page_folio(page); - for (i = 0; i < npages; i++, start += PAGE_SIZE) { - struct folio *folio = page_folio(pages[i]); - if (PageTail(pages[i]) || !folio_trylock(folio)) { - folio_put(folio); - pages[i] = NULL; - continue; - } - - if (!folio_make_device_exclusive(folio, mm, start, owner)) { - folio_unlock(folio); - folio_put(folio); - pages[i] = NULL; - } + if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) { + folio_put(folio); + return ERR_PTR(-EOPNOTSUPP); } - return npages; + if (!folio_trylock(folio)) { + folio_put(folio); + return ERR_PTR(-EBUSY); + } + + if (!folio_make_device_exclusive(folio, mm, addr, owner)) { + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(-EBUSY); + } + *foliop = folio; + return page; } -EXPORT_SYMBOL_GPL(make_device_exclusive_range); +EXPORT_SYMBOL_GPL(make_device_exclusive); #endif void __put_anon_vma(struct anon_vma *anon_vma) From 438354724f69b5a717b6cd366db35b7034a8d2f9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:46 +0100 Subject: [PATCH 0838/2157] mm/rmap: implement make_device_exclusive() using folio_walk instead of rmap walk We require a writable PTE and only support anonymous folio: we can only have exactly one PTE pointing at that page, which we can just lookup using a folio walk, avoiding the rmap walk and the anon VMA lock. So let's stop doing an rmap walk and perform a folio walk instead, so we can easily just modify a single PTE and avoid relying on rmap/mapcounts. We now effectively work on a single PTE instead of multiple PTEs of a large folio, allowing for conversion of individual PTEs from non-exclusive to device-exclusive -- note that the opposite direction always works on single PTEs: restore_exclusive_pte(). With this change, device-exclusive handling is fully compatible with THPs / large folios. We still require PMD-sized THPs to get PTE-mapped, and supporting PMD-mapped THP (without the PTE-remapping) is a different endeavour that might not be worth it at this point: it might even have negative side-effects [1]. This gets rid of the "folio_mapcount()" usage and let's us fix ordinary rmap walks (migration/swapout) next. Spell out that messing with the mapcount is wrong and must be fixed. [1] https://lkml.kernel.org/r/Z5tI-cOSyzdLjoe_@phenom.ffwll.local Link: https://lkml.kernel.org/r/20250210193801.781278-5-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/rmap.c | 200 ++++++++++++++++++------------------------------------ 1 file changed, 67 insertions(+), 133 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 7ccf850565d3..0cd2a2d3de00 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2375,131 +2375,6 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags) } #ifdef CONFIG_DEVICE_PRIVATE -struct make_exclusive_args { - struct mm_struct *mm; - unsigned long address; - void *owner; - bool valid; -}; - -static bool page_make_device_exclusive_one(struct folio *folio, - struct vm_area_struct *vma, unsigned long address, void *priv) -{ - struct mm_struct *mm = vma->vm_mm; - DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); - struct make_exclusive_args *args = priv; - pte_t pteval; - struct page *subpage; - bool ret = true; - struct mmu_notifier_range range; - swp_entry_t entry; - pte_t swp_pte; - pte_t ptent; - - mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, - vma->vm_mm, address, min(vma->vm_end, - address + folio_size(folio)), - args->owner); - mmu_notifier_invalidate_range_start(&range); - - while (page_vma_mapped_walk(&pvmw)) { - /* Unexpected PMD-mapped THP? */ - VM_BUG_ON_FOLIO(!pvmw.pte, folio); - - ptent = ptep_get(pvmw.pte); - if (!pte_present(ptent)) { - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } - - subpage = folio_page(folio, - pte_pfn(ptent) - folio_pfn(folio)); - address = pvmw.address; - - /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(ptent)); - pteval = ptep_clear_flush(vma, address, pvmw.pte); - - /* Set the dirty flag on the folio now the pte is gone. */ - if (pte_dirty(pteval)) - folio_mark_dirty(folio); - - /* - * Check that our target page is still mapped at the expected - * address. - */ - if (args->mm == mm && args->address == address && - pte_write(pteval)) - args->valid = true; - - /* - * Store the pfn of the page in a special migration - * pte. do_swap_page() will wait until the migration - * pte is removed and then restart fault handling. - */ - if (pte_write(pteval)) - entry = make_writable_device_exclusive_entry( - page_to_pfn(subpage)); - else - entry = make_readable_device_exclusive_entry( - page_to_pfn(subpage)); - swp_pte = swp_entry_to_pte(entry); - if (pte_soft_dirty(pteval)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pteval)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); - - set_pte_at(mm, address, pvmw.pte, swp_pte); - - /* - * There is a reference on the page for the swap entry which has - * been removed, so shouldn't take another. - */ - folio_remove_rmap_pte(folio, subpage, vma); - } - - mmu_notifier_invalidate_range_end(&range); - - return ret; -} - -/** - * folio_make_device_exclusive - Mark the folio exclusively owned by a device. - * @folio: The folio to replace page table entries for. - * @mm: The mm_struct where the folio is expected to be mapped. - * @address: Address where the folio is expected to be mapped. - * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks - * - * Tries to remove all the page table entries which are mapping this - * folio and replace them with special device exclusive swap entries to - * grant a device exclusive access to the folio. - * - * Context: Caller must hold the folio lock. - * Return: false if the page is still mapped, or if it could not be unmapped - * from the expected address. Otherwise returns true (success). - */ -static bool folio_make_device_exclusive(struct folio *folio, - struct mm_struct *mm, unsigned long address, void *owner) -{ - struct make_exclusive_args args = { - .mm = mm, - .address = address, - .owner = owner, - .valid = false, - }; - struct rmap_walk_control rwc = { - .rmap_one = page_make_device_exclusive_one, - .done = folio_not_mapped, - .anon_lock = folio_lock_anon_vma_read, - .arg = &args, - }; - - rmap_walk(folio, &rwc); - - return args.valid && !folio_mapcount(folio); -} - /** * make_device_exclusive() - Mark a page for exclusive use by a device * @mm: mm_struct of associated target process @@ -2541,22 +2416,31 @@ static bool folio_make_device_exclusive(struct folio *folio, struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, void *owner, struct folio **foliop) { - struct folio *folio; + struct mmu_notifier_range range; + struct folio *folio, *fw_folio; + struct vm_area_struct *vma; + struct folio_walk fw; struct page *page; - long npages; + swp_entry_t entry; + pte_t swp_pte; mmap_assert_locked(mm); + addr = PAGE_ALIGN_DOWN(addr); /* * Fault in the page writable and try to lock it; note that if the * address would already be marked for exclusive use by a device, * the GUP call would undo that first by triggering a fault. + * + * If any other device would already map this page exclusively, the + * fault will trigger a conversion to an ordinary + * (non-device-exclusive) PTE and issue a MMU_NOTIFY_EXCLUSIVE. */ - npages = get_user_pages_remote(mm, addr, 1, - FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD, - &page, NULL); - if (npages != 1) - return ERR_PTR(npages); + page = get_user_page_vma_remote(mm, addr, + FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD, + &vma); + if (IS_ERR(page)) + return page; folio = page_folio(page); if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) { @@ -2569,11 +2453,61 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, return ERR_PTR(-EBUSY); } - if (!folio_make_device_exclusive(folio, mm, addr, owner)) { + /* + * Inform secondary MMUs that we are going to convert this PTE to + * device-exclusive, such that they unmap it now. Note that the + * caller must filter this event out to prevent livelocks. + */ + mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, + mm, addr, addr + PAGE_SIZE, owner); + mmu_notifier_invalidate_range_start(&range); + + /* + * Let's do a second walk and make sure we still find the same page + * mapped writable. Note that any page of an anonymous folio can + * only be mapped writable using exactly one PTE ("exclusive"), so + * there cannot be other mappings. + */ + fw_folio = folio_walk_start(&fw, vma, addr, 0); + if (fw_folio != folio || fw.page != page || + fw.level != FW_LEVEL_PTE || !pte_write(fw.pte)) { + if (fw_folio) + folio_walk_end(&fw, vma); + mmu_notifier_invalidate_range_end(&range); folio_unlock(folio); folio_put(folio); return ERR_PTR(-EBUSY); } + + /* Nuke the page table entry so we get the uptodate dirty bit. */ + flush_cache_page(vma, addr, page_to_pfn(page)); + fw.pte = ptep_clear_flush(vma, addr, fw.ptep); + + /* Set the dirty flag on the folio now the PTE is gone. */ + if (pte_dirty(fw.pte)) + folio_mark_dirty(folio); + + /* + * Store the pfn of the page in a special device-exclusive PFN swap PTE. + * do_swap_page() will trigger the conversion back while holding the + * folio lock. + */ + entry = make_writable_device_exclusive_entry(page_to_pfn(page)); + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(fw.pte)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + /* The pte is writable, uffd-wp does not apply. */ + set_pte_at(mm, addr, fw.ptep, swp_pte); + + /* + * TODO: The device-exclusive PFN swap PTE holds a folio reference but + * does not count as a mapping (mapcount), which is wrong and must be + * fixed, otherwise RMAP walks don't behave as expected. + */ + folio_remove_rmap_pte(folio, page, vma); + + folio_walk_end(&fw, vma); + mmu_notifier_invalidate_range_end(&range); *foliop = folio; return page; } From 9a914592140ec548ef72bfef7c8a4e036a1ac492 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:47 +0100 Subject: [PATCH 0839/2157] mm/memory: detect writability in restore_exclusive_pte() through can_change_pte_writable() Let's do it just like mprotect write-upgrade or during NUMA-hinting faults on PROT_NONE PTEs: detect if the PTE can be writable by using can_change_pte_writable(). Set the PTE only dirty if the folio is dirty: we might not necessarily have a write access, and setting the PTE writable doesn't require setting the PTE dirty. From a CPU perspective, these entries are clean. So only set the PTE dirty if the folios is dirty. With this change in place, there is no need to have separate readable and writable device-exclusive entry types, and we'll merge them next separately. Note that, during fork(), we first convert the device-exclusive entries back to ordinary PTEs, and we only ever allow conversion of writable PTEs to device-exclusive -- only mprotect can currently change them to readable-device-exclusive. Consequently, we always expect PageAnonExclusive(page)==true and can_change_pte_writable()==true, unless we are dealing with soft-dirty tracking or uffd-wp. But reusing can_change_pte_writable() for now is cleaner. Link: https://lkml.kernel.org/r/20250210193801.781278-6-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/memory.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index f77c10fd26b9..568b3afde8d2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -723,18 +723,21 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, struct folio *folio = page_folio(page); pte_t orig_pte; pte_t pte; - swp_entry_t entry; orig_pte = ptep_get(ptep); pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); - entry = pte_to_swp_entry(orig_pte); if (pte_swp_uffd_wp(orig_pte)) pte = pte_mkuffd_wp(pte); - else if (is_writable_device_exclusive_entry(entry)) - pte = maybe_mkwrite(pte_mkdirty(pte), vma); + + if ((vma->vm_flags & VM_WRITE) && + can_change_pte_writable(vma, address, pte)) { + if (folio_test_dirty(folio)) + pte = pte_mkdirty(pte); + pte = pte_mkwrite(pte, vma); + } VM_BUG_ON_FOLIO(pte_write(pte) && (!folio_test_anon(folio) && PageAnonExclusive(page)), folio); From c25465eb7630ffcadaab29c1010071512f8c9621 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:48 +0100 Subject: [PATCH 0840/2157] mm: use single SWP_DEVICE_EXCLUSIVE entry type There is no need for the distinction anymore; let's merge the readable and writable device-exclusive entries into a single device-exclusive entry type. Link: https://lkml.kernel.org/r/20250210193801.781278-7-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Simona Vetter Reviewed-by: Alistair Popple Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- include/linux/swap.h | 7 +++---- include/linux/swapops.h | 27 ++++----------------------- mm/mprotect.c | 8 -------- mm/page_table_check.c | 5 ++--- mm/rmap.c | 2 +- 5 files changed, 10 insertions(+), 39 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 91b30701274e..9a48e79a0a52 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -74,14 +74,13 @@ static inline int current_is_kswapd(void) * to a special SWP_DEVICE_{READ|WRITE} entry. * * When a page is mapped by the device for exclusive access we set the CPU page - * table entries to special SWP_DEVICE_EXCLUSIVE_* entries. + * table entries to a special SWP_DEVICE_EXCLUSIVE entry. */ #ifdef CONFIG_DEVICE_PRIVATE -#define SWP_DEVICE_NUM 4 +#define SWP_DEVICE_NUM 3 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) -#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) -#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3) +#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) #else #define SWP_DEVICE_NUM 0 #endif diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 96f26e29fefe..64ea151a7ae3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -186,26 +186,16 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { - return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset); + return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } static inline bool is_device_exclusive_entry(swp_entry_t entry) { - return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ || - swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE; + return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE); -} #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -227,12 +217,7 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return false; } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(0, 0); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } @@ -242,10 +227,6 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry) return false; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION diff --git a/mm/mprotect.c b/mm/mprotect.c index 516b1d847e2c..9cb6ab7c4048 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -225,14 +225,6 @@ static long change_pte_range(struct mmu_gather *tlb, newpte = swp_entry_to_pte(entry); if (pte_swp_uffd_wp(oldpte)) newpte = pte_swp_mkuffd_wp(newpte); - } else if (is_writable_device_exclusive_entry(entry)) { - entry = make_readable_device_exclusive_entry( - swp_offset(entry)); - newpte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(oldpte)) - newpte = pte_swp_mksoft_dirty(newpte); - if (pte_swp_uffd_wp(oldpte)) - newpte = pte_swp_mkuffd_wp(newpte); } else if (is_pte_marker_entry(entry)) { /* * Ignore error swap entries unconditionally, diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 509c6ef8de40..c2b3600429a0 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -196,9 +196,8 @@ EXPORT_SYMBOL(__page_table_check_pud_clear); /* Whether the swap entry cached writable information */ static inline bool swap_cached_writable(swp_entry_t entry) { - return is_writable_device_exclusive_entry(entry) || - is_writable_device_private_entry(entry) || - is_writable_migration_entry(entry); + return is_writable_device_private_entry(entry) || + is_writable_migration_entry(entry); } static inline void page_table_check_pte_flags(pte_t pte) diff --git a/mm/rmap.c b/mm/rmap.c index 0cd2a2d3de00..1129ed132af9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2492,7 +2492,7 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, * do_swap_page() will trigger the conversion back while holding the * folio lock. */ - entry = make_writable_device_exclusive_entry(page_to_pfn(page)); + entry = make_device_exclusive_entry(page_to_pfn(page)); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(fw.pte)) swp_pte = pte_swp_mksoft_dirty(swp_pte); From 7b2e497a42cdfc52da0df2510ba7941142987922 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:49 +0100 Subject: [PATCH 0841/2157] mm/page_vma_mapped: device-exclusive entries are not migration entries It's unclear why they would be considered migration entries; they are not. Likely we'll never really trigger that case in practice, because migration (including folio split) of a folio that has device-exclusive entries is never started, as we would detect "additional references": device-exclusive entries adjust the mapcount, but not the refcount. Link: https://lkml.kernel.org/r/20250210193801.781278-8-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Reviewed-by: Alistair Popple Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/page_vma_mapped.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 81839a9e74f1..32679be22d30 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -111,8 +111,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) return false; entry = pte_to_swp_entry(ptent); - if (!is_migration_entry(entry) && - !is_device_exclusive_entry(entry)) + if (!is_migration_entry(entry)) return false; pfn = swp_offset_pfn(entry); From 096cbb80ab3fd85a9035ec17a1312c2a7db8bc8c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:50 +0100 Subject: [PATCH 0842/2157] kernel/events/uprobes: handle device-exclusive entries correctly in __replace_page() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). __replace_page() is not prepared for that, so teach it about these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, because GUP would never have returned such folios (conversion to device-private happens by page migration, not in-place conversion of the PTE). There is a race between GUP and us locking the folio to look it up using page_vma_mapped_walk(), so this is likely a fix (unless something else could prevent that race, but it doesn't look like). pte_pfn() on something that is not a present pte could give use garbage, and we'd wrongly mess up the mapcount because it was already adjusted by calling folio_remove_rmap_pte() when making the entry device-exclusive. Link: https://lkml.kernel.org/r/20250210193801.781278-9-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b4ca8898fe17..eac24f39c2c2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -173,6 +173,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0); int err; struct mmu_notifier_range range; + pte_t pte; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr, addr + PAGE_SIZE); @@ -192,6 +193,16 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, if (!page_vma_mapped_walk(&pvmw)) goto unlock; VM_BUG_ON_PAGE(addr != pvmw.address, old_page); + pte = ptep_get(pvmw.pte); + + /* + * Handle PFN swap PTES, such as device-exclusive ones, that actually + * map pages: simply trigger GUP again to fix it up. + */ + if (unlikely(!pte_present(pte))) { + page_vma_mapped_walk_done(&pvmw); + goto unlock; + } if (new_page) { folio_get(new_folio); @@ -206,7 +217,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, inc_mm_counter(mm, MM_ANONPAGES); } - flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte))); + flush_cache_page(vma, addr, pte_pfn(pte)); ptep_clear_flush(vma, addr, pvmw.pte); if (new_page) set_pte_at(mm, addr, pvmw.pte, From 789cfc66992ed90e498a0979edcbf458c799c938 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:51 +0100 Subject: [PATCH 0843/2157] mm/ksm: handle device-exclusive entries correctly in write_protect_page() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). write_protect_page() is not prepared for that, so teach it about these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, because GUP would never have returned such folios (conversion to device-private happens by page migration, not in-place conversion of the PTE). There is a race between performing the folio_walk (which fails on non-present PTEs) and locking the folio to look it up using page_vma_mapped_walk() again, so this is likely a fix (unless something else could prevent that race, but it doesn't look like). In the future it could be handled if ever required, for now just give up and ignore them like folio_walk would. Link: https://lkml.kernel.org/r/20250210193801.781278-10-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/ksm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 8be2b144fefd..8583fb91ef13 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1270,8 +1270,15 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio, if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?")) goto out_unlock; - anon_exclusive = PageAnonExclusive(&folio->page); entry = ptep_get(pvmw.pte); + /* + * Handle PFN swap PTEs, such as device-exclusive ones, that actually + * map pages: give up just like the next folio_walk would. + */ + if (unlikely(!pte_present(entry))) + goto out_unlock; + + anon_exclusive = PageAnonExclusive(&folio->page); if (pte_write(entry) || pte_dirty(entry) || anon_exclusive || mm_tlb_flush_pending(mm)) { swapped = folio_test_swapcache(folio); From 65529295607f1b48cdcf97e7dd569a6ca52cd8ac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:52 +0100 Subject: [PATCH 0844/2157] mm/rmap: handle device-exclusive entries correctly in try_to_unmap_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). try_to_unmap_one() is not prepared for that, so teach it about these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, as we expect ZONE_DEVICE pages so far only in migration code when it comes to the RMAP. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Further note that try_to_unmap() calls MMU notifiers and holds the folio lock, so any device-exclusive users should be properly prepared for a device-exclusive PTE to "vanish". Link: https://lkml.kernel.org/r/20250210193801.781278-11-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/rmap.c | 52 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 1129ed132af9..47142a656ae5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1648,9 +1648,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); + bool anon_exclusive, ret = true; pte_t pteval; struct page *subpage; - bool anon_exclusive, ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; @@ -1722,7 +1722,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); - pfn = pte_pfn(ptep_get(pvmw.pte)); + /* + * Handle PFN swap PTEs, such as device-exclusive ones, that + * actually map pages. + */ + pteval = ptep_get(pvmw.pte); + if (likely(pte_present(pteval))) { + pfn = pte_pfn(pteval); + } else { + pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + } + subpage = folio_page(folio, pfn - folio_pfn(folio)); address = pvmw.address; anon_exclusive = folio_test_anon(folio) && @@ -1778,7 +1789,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, hugetlb_vma_unlock_write(vma); } pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); - } else { + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + } else if (likely(pte_present(pteval))) { flush_cache_page(vma, address, pfn); /* Nuke the page table entry. */ if (should_defer_flush(mm, flags)) { @@ -1796,6 +1809,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, } else { pteval = ptep_clear_flush(vma, address, pvmw.pte); } + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + } else { + pte_clear(mm, address, pvmw.pte); } /* @@ -1805,10 +1822,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval); - /* Set the dirty flag on the folio now the pte is gone. */ - if (pte_dirty(pteval)) - folio_mark_dirty(folio); - /* Update high watermark before we lower rss */ update_hiwater_rss(mm); @@ -1822,8 +1835,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter(folio)); set_pte_at(mm, address, pvmw.pte, pteval); } - - } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { + } else if (likely(pte_present(pteval)) && pte_unused(pteval) && + !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan @@ -1902,6 +1915,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); goto walk_abort; } + + /* + * arch_unmap_one() is expected to be a NOP on + * architectures where we could have PFN swap PTEs, + * so we'll not check/care. + */ if (arch_unmap_one(mm, vma, address, pteval) < 0) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); @@ -1926,10 +1945,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (anon_exclusive) swp_pte = pte_swp_mkexclusive(swp_pte); - if (pte_soft_dirty(pteval)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pteval)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); + if (likely(pte_present(pteval))) { + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_uffd_wp(pteval)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } else { + if (pte_swp_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_swp_uffd_wp(pteval)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } set_pte_at(mm, address, pvmw.pte, swp_pte); } else { /* From bf983108be0ef6cc8e96ccc0458f926dc96e6ba2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:53 +0100 Subject: [PATCH 0845/2157] mm/rmap: handle device-exclusive entries correctly in try_to_migrate_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). try_to_migrate_one() is not prepared for that, so teach it about these PFN swap PTEs. We already handle device-private entries by specializing on the folio, so we can reshuffle that code to make it work on the PFN swap PTEs instead. Get rid of the folio_is_device_private() handling. Note that we never currently expect device-private folios with HWPoison flag set at that point, so add a warning in case that ever changes and we can figure out what the right thing to do is. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Further note that try_to_migrate() calls MMU notifiers and holds the folio lock, so any device-exclusive users should be properly prepared for a device-exclusive PTE to "vanish". Link: https://lkml.kernel.org/r/20250210193801.781278-12-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Alistair Popple Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/rmap.c | 124 ++++++++++++++++++++++-------------------------------- 1 file changed, 51 insertions(+), 73 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 47142a656ae5..7c471c3ea64c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2039,9 +2039,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); + bool anon_exclusive, writable, ret = true; pte_t pteval; struct page *subpage; - bool anon_exclusive, ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; @@ -2108,24 +2108,19 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); - pfn = pte_pfn(ptep_get(pvmw.pte)); - - if (folio_is_zone_device(folio)) { - /* - * Our PTE is a non-present device exclusive entry and - * calculating the subpage as for the common case would - * result in an invalid pointer. - * - * Since only PAGE_SIZE pages can currently be - * migrated, just set it to page. This will need to be - * changed when hugepage migrations to device private - * memory are supported. - */ - VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio); - subpage = &folio->page; + /* + * Handle PFN swap PTEs, such as device-exclusive ones, that + * actually map pages. + */ + pteval = ptep_get(pvmw.pte); + if (likely(pte_present(pteval))) { + pfn = pte_pfn(pteval); } else { - subpage = folio_page(folio, pfn - folio_pfn(folio)); + pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); } + + subpage = folio_page(folio, pfn - folio_pfn(folio)); address = pvmw.address; anon_exclusive = folio_test_anon(folio) && PageAnonExclusive(subpage); @@ -2181,7 +2176,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, } /* Nuke the hugetlb page table entry */ pteval = huge_ptep_clear_flush(vma, address, pvmw.pte); - } else { + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + writable = pte_write(pteval); + } else if (likely(pte_present(pteval))) { flush_cache_page(vma, address, pfn); /* Nuke the page table entry. */ if (should_defer_flush(mm, flags)) { @@ -2199,54 +2197,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, } else { pteval = ptep_clear_flush(vma, address, pvmw.pte); } + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + writable = pte_write(pteval); + } else { + pte_clear(mm, address, pvmw.pte); + writable = is_writable_device_private_entry(pte_to_swp_entry(pteval)); } - /* Set the dirty flag on the folio now the pte is gone. */ - if (pte_dirty(pteval)) - folio_mark_dirty(folio); + VM_WARN_ON_FOLIO(writable && folio_test_anon(folio) && + !anon_exclusive, folio); /* Update high watermark before we lower rss */ update_hiwater_rss(mm); - if (folio_is_device_private(folio)) { - unsigned long pfn = folio_pfn(folio); - swp_entry_t entry; - pte_t swp_pte; + if (PageHWPoison(subpage)) { + VM_WARN_ON_FOLIO(folio_is_device_private(folio), folio); - if (anon_exclusive) - WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio, - subpage)); - - /* - * Store the pfn of the page in a special migration - * pte. do_swap_page() will wait until the migration - * pte is removed and then restart fault handling. - */ - entry = pte_to_swp_entry(pteval); - if (is_writable_device_private_entry(entry)) - entry = make_writable_migration_entry(pfn); - else if (anon_exclusive) - entry = make_readable_exclusive_migration_entry(pfn); - else - entry = make_readable_migration_entry(pfn); - swp_pte = swp_entry_to_pte(entry); - - /* - * pteval maps a zone device page and is therefore - * a swap pte. - */ - if (pte_swp_soft_dirty(pteval)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_swp_uffd_wp(pteval)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); - set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); - trace_set_migration_pte(pvmw.address, pte_val(swp_pte), - folio_order(folio)); - /* - * No need to invalidate here it will synchronize on - * against the special swap migration pte. - */ - } else if (PageHWPoison(subpage)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); @@ -2256,8 +2223,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter(folio)); set_pte_at(mm, address, pvmw.pte, pteval); } - - } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { + } else if (likely(pte_present(pteval)) && pte_unused(pteval) && + !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan @@ -2273,6 +2240,11 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, swp_entry_t entry; pte_t swp_pte; + /* + * arch_unmap_one() is expected to be a NOP on + * architectures where we could have PFN swap PTEs, + * so we'll not check/care. + */ if (arch_unmap_one(mm, vma, address, pteval) < 0) { if (folio_test_hugetlb(folio)) set_huge_pte_at(mm, address, pvmw.pte, @@ -2283,8 +2255,6 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, page_vma_mapped_walk_done(&pvmw); break; } - VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) && - !anon_exclusive, subpage); /* See folio_try_share_anon_rmap_pte(): clear PTE first. */ if (folio_test_hugetlb(folio)) { @@ -2309,7 +2279,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - if (pte_write(pteval)) + if (writable) entry = make_writable_migration_entry( page_to_pfn(subpage)); else if (anon_exclusive) @@ -2318,15 +2288,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, else entry = make_readable_migration_entry( page_to_pfn(subpage)); - if (pte_young(pteval)) - entry = make_migration_entry_young(entry); - if (pte_dirty(pteval)) - entry = make_migration_entry_dirty(entry); - swp_pte = swp_entry_to_pte(entry); - if (pte_soft_dirty(pteval)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pteval)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); + if (likely(pte_present(pteval))) { + if (pte_young(pteval)) + entry = make_migration_entry_young(entry); + if (pte_dirty(pteval)) + entry = make_migration_entry_dirty(entry); + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_uffd_wp(pteval)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } else { + swp_pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_swp_uffd_wp(pteval)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } if (folio_test_hugetlb(folio)) set_huge_pte_at(mm, address, pvmw.pte, swp_pte, hsz); From bd1f2b2ace84bac030b70d51710581b48ffdb690 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:54 +0100 Subject: [PATCH 0846/2157] mm/rmap: handle device-exclusive entries correctly in page_vma_mkclean_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). page_vma_mkclean_one() is not prepared for that, so teach it about these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, as we expect ZONE_DEVICE pages so far only in migration code when it comes to the RMAP. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Link: https://lkml.kernel.org/r/20250210193801.781278-13-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/rmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/rmap.c b/mm/rmap.c index 7c471c3ea64c..7b737f0f68fb 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1044,6 +1044,14 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw) pte_t *pte = pvmw->pte; pte_t entry = ptep_get(pte); + /* + * PFN swap PTEs, such as device-exclusive ones, that + * actually map pages are clean and not writable from a + * CPU perspective. The MMU notifier takes care of any + * device aspects. + */ + if (!pte_present(entry)) + continue; if (!pte_dirty(entry) && !pte_write(entry)) continue; From 3faa3ebc89c1d95605cb43174240f97510ed0e52 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:55 +0100 Subject: [PATCH 0847/2157] mm/page_idle: handle device-exclusive entries correctly in page_idle_clear_pte_refs_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). page_idle_clear_pte_refs_one() is not prepared for that, so let's teach it what to do with these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, as page_idle_get_folio() filters out non-lru folios. Should we just skip PFN swap PTEs completely? Possible, but it seems straight forward to just handle them correctly. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Link: https://lkml.kernel.org/r/20250210193801.781278-14-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/page_idle.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/page_idle.c b/mm/page_idle.c index 947c7c7a3728..408aaf29a3ea 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -62,9 +62,14 @@ static bool page_idle_clear_pte_refs_one(struct folio *folio, /* * For PTE-mapped THP, one sub page is referenced, * the whole THP is referenced. + * + * PFN swap PTEs, such as device-exclusive ones, that + * actually map pages are "old" from a CPU perspective. + * The MMU notifier takes care of any device aspects. */ - if (ptep_clear_young_notify(vma, addr, pvmw.pte)) - referenced = true; + if (likely(pte_present(ptep_get(pvmw.pte)))) + referenced |= ptep_test_and_clear_young(vma, addr, pvmw.pte); + referenced |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { if (pmdp_clear_young_notify(vma, addr, pvmw.pmd)) referenced = true; From 39628c39ba3b9efbbb4ba9df4a20254f0c137cd7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:56 +0100 Subject: [PATCH 0848/2157] mm/damon: handle device-exclusive entries correctly in damon_folio_young_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). damon_folio_young_one() is not prepared for that, so teach it about these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, as we expect ZONE_DEVICE pages so far only in migration code when it comes to the RMAP. The impact is rather small: we'd be calling pte_young() on a non-present PTE, which is not really defined to have semantic. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Link: https://lkml.kernel.org/r/20250210193801.781278-15-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Reviewed-by: SeongJae Park Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index c834aa217835..1a89920efce9 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -92,12 +92,20 @@ static bool damon_folio_young_one(struct folio *folio, { bool *accessed = arg; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); + pte_t pte; *accessed = false; while (page_vma_mapped_walk(&pvmw)) { addr = pvmw.address; if (pvmw.pte) { - *accessed = pte_young(ptep_get(pvmw.pte)) || + pte = ptep_get(pvmw.pte); + + /* + * PFN swap PTEs, such as device-exclusive ones, that + * actually map pages are "old" from a CPU perspective. + * The MMU notifier takes care of any device aspects. + */ + *accessed = (pte_present(pte) && pte_young(pte)) || !folio_test_idle(folio) || mmu_notifier_test_young(vma->vm_mm, addr); } else { From 1f3ac4c577bb993539561b47bf2fe9d1beaaca2e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:57 +0100 Subject: [PATCH 0849/2157] mm/damon: handle device-exclusive entries correctly in damon_folio_mkold_one() Ever since commit b756a3b5e7ea ("mm: device exclusive memory access") we can return with a device-exclusive entry from page_vma_mapped_walk(). damon_folio_mkold_one() is not prepared for that and calls damon_ptep_mkold() with PFN swap PTEs. Teach damon_ptep_mkold() to deal with these PFN swap PTEs. Note that device-private entries are so far not applicable on that path, as damon_get_folio() filters out non-lru folios. Should we just skip PFN swap PTEs completely? Possible, but it seems straight forward to just handle it correctly. Note that we could currently only run into this case with device-exclusive entries on THPs. We still adjust the mapcount on conversion to device-exclusive; this makes the rmap walk abort early for small folios, because we'll always have !folio_mapped() with a single device-exclusive entry. We'll adjust the mapcount logic once all page_vma_mapped_walk() users can properly handle device-exclusive entries. Link: https://lkml.kernel.org/r/20250210193801.781278-16-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: SeongJae Park Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/damon/ops-common.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index d25d99cb5f2b..86a50e8fbc80 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "ops-common.h" @@ -39,12 +41,29 @@ struct folio *damon_get_folio(unsigned long pfn) void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) { - struct folio *folio = damon_get_folio(pte_pfn(ptep_get(pte))); + pte_t pteval = ptep_get(pte); + struct folio *folio; + bool young = false; + unsigned long pfn; + if (likely(pte_present(pteval))) + pfn = pte_pfn(pteval); + else + pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + + folio = damon_get_folio(pfn); if (!folio) return; - if (ptep_clear_young_notify(vma, addr, pte)) + /* + * PFN swap PTEs, such as device-exclusive ones, that actually map pages + * are "old" from a CPU perspective. The MMU notifier takes care of any + * device aspects. + */ + if (likely(pte_present(pteval))) + young |= ptep_test_and_clear_young(vma, addr, pte); + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); + if (young) folio_set_young(folio); folio_set_idle(folio); From f495bd7e0d9bd00c9a76c49f792b532bbb0efd0a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:58 +0100 Subject: [PATCH 0850/2157] mm/rmap: keep mapcount untouched for device-exclusive entries Now that conversion to device-exclusive does no longer perform an rmap walk and all page_vma_mapped_walk() users were taught to properly handle device-exclusive entries, let's treat device-exclusive entries just as if they would be present, similar to how we handle device-private entries already. This fixes swapout/migration/split/hwpoison of folios with device-exclusive entries. We only had to take care of page_vma_mapped_walk() users, because these traditionally assume pte_present(). Other page table walkers already have to handle !pte_present(), and some of them might simply skip them (e.g., MADV_PAGEOUT) if they are not specialized on them. This change doesn't modify the latter. Note that while folios with device-exclusive PTEs can now get migrated, khugepaged will not collapse a THP if there is device-exclusive PTE. Doing so might also not be desired if the device frequently performs atomics to the same page. Similarly, KSM will never merge order-0 folios that are device-exclusive. Link: https://lkml.kernel.org/r/20250210193801.781278-17-david@redhat.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/memory.c | 17 +---------------- mm/rmap.c | 7 ------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 568b3afde8d2..94feb51a7983 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -741,20 +741,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, VM_BUG_ON_FOLIO(pte_write(pte) && (!folio_test_anon(folio) && PageAnonExclusive(page)), folio); - - /* - * No need to take a page reference as one was already - * created when the swap entry was made. - */ - if (folio_test_anon(folio)) - folio_add_anon_rmap_pte(folio, page, vma, address, RMAP_NONE); - else - /* - * Currently device exclusive access only supports anonymous - * memory so the entry shouldn't point to a filebacked page. - */ - WARN_ON_ONCE(1); - set_pte_at(vma->vm_mm, address, ptep, pte); /* @@ -1626,8 +1612,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, */ WARN_ON_ONCE(!vma_is_anonymous(vma)); rss[mm_counter(folio)]--; - if (is_device_private_entry(entry)) - folio_remove_rmap_pte(folio, page, vma); + folio_remove_rmap_pte(folio, page, vma); folio_put(folio); } else if (!non_swap_entry(entry)) { /* Genuine swap entries, hence a private anon pages */ diff --git a/mm/rmap.c b/mm/rmap.c index 7b737f0f68fb..e2a543f639ce 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2511,13 +2511,6 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, /* The pte is writable, uffd-wp does not apply. */ set_pte_at(mm, addr, fw.ptep, swp_pte); - /* - * TODO: The device-exclusive PFN swap PTE holds a folio reference but - * does not count as a mapping (mapcount), which is wrong and must be - * fixed, otherwise RMAP walks don't behave as expected. - */ - folio_remove_rmap_pte(folio, page, vma); - folio_walk_end(&fw, vma); mmu_notifier_invalidate_range_end(&range); *foliop = folio; From b8932ca8b9244839b263786c69d57ed05c0d96ec Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 10 Feb 2025 20:37:59 +0100 Subject: [PATCH 0851/2157] mm/rmap: avoid -EBUSY from make_device_exclusive() Failing to obtain the folio lock, for example because the folio is concurrently getting migrated or swapped out, can easily make the callers fail: for example, the hmm selftest can sometimes be observed to fail because of this. Instead of forcing the caller to retry, let's simply retry in this to-be-expected case. Similarly, avoid spurious failures simply because we raced with someone (e.g., swapout) modifying the page table such that our folio_walk fails. Simply unconditionally lock the folio, and retry GUP if our folio_walk fails. Note that the folio_walk repeatedly failing is not something we expect. Note that we might want to avoid grabbing the folio lock at some point; for now, keep that as is and only unconditionally lock the folio. With this change, the hmm selftests don't fail simply because the folio is already locked. While this fixes the selftests in some cases, it's likely not something that deserves a "Fixes:". Link: https://lkml.kernel.org/r/20250210193801.781278-18-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Alistair Popple Cc: Alex Shi Cc: Danilo Krummrich Cc: Dave Airlie Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Jonathan Corbet Cc: Karol Herbst Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Lyude Cc: "Masami Hiramatsu (Google)" Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: SeongJae Park Cc: Simona Vetter Cc: Vlastimil Babka Cc: Yanteng Si Cc: Barry Song Signed-off-by: Andrew Morton --- mm/rmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index e2a543f639ce..0f760b93fc0a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2435,6 +2435,7 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, struct page *page; swp_entry_t entry; pte_t swp_pte; + int ret; mmap_assert_locked(mm); addr = PAGE_ALIGN_DOWN(addr); @@ -2448,6 +2449,7 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, * fault will trigger a conversion to an ordinary * (non-device-exclusive) PTE and issue a MMU_NOTIFY_EXCLUSIVE. */ +retry: page = get_user_page_vma_remote(mm, addr, FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD, &vma); @@ -2460,9 +2462,10 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, return ERR_PTR(-EOPNOTSUPP); } - if (!folio_trylock(folio)) { + ret = folio_lock_killable(folio); + if (ret) { folio_put(folio); - return ERR_PTR(-EBUSY); + return ERR_PTR(ret); } /* @@ -2488,7 +2491,7 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, mmu_notifier_invalidate_range_end(&range); folio_unlock(folio); folio_put(folio); - return ERR_PTR(-EBUSY); + goto retry; } /* Nuke the page table entry so we get the uptodate dirty bit. */ From 350dce38eb6e221cca16940f3bd8d9947364f1ca Mon Sep 17 00:00:00 2001 From: Hao Zhang Date: Wed, 15 Jan 2025 09:58:29 +0800 Subject: [PATCH 0852/2157] mm/vmscan: extract calculated pressure balance as a function Extract pressure balance calculation into a function.This doesn't change current behaviour. [akpm@linux-foundation.org: 80-col wrapping] Link: https://lkml.kernel.org/r/tencent_735DB36A2306C08B8568049E4C0B99716C07@qq.com Signed-off-by: Hao Zhang Signed-off-by: Andrew Morton --- mm/vmscan.c | 68 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index fc4951d23b97..bc1826020159 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2400,6 +2400,43 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc) } } +static inline void calculate_pressure_balance(struct scan_control *sc, + int swappiness, u64 *fraction, u64 *denominator) +{ + unsigned long anon_cost, file_cost, total_cost; + unsigned long ap, fp; + + /* + * Calculate the pressure balance between anon and file pages. + * + * The amount of pressure we put on each LRU is inversely + * proportional to the cost of reclaiming each list, as + * determined by the share of pages that are refaulting, times + * the relative IO cost of bringing back a swapped out + * anonymous page vs reloading a filesystem page (swappiness). + * + * Although we limit that influence to ensure no list gets + * left behind completely: at least a third of the pressure is + * applied, before swappiness. + * + * With swappiness at 100, anon and file have equal IO cost. + */ + total_cost = sc->anon_cost + sc->file_cost; + anon_cost = total_cost + sc->anon_cost; + file_cost = total_cost + sc->file_cost; + total_cost = anon_cost + file_cost; + + ap = swappiness * (total_cost + 1); + ap /= anon_cost + 1; + + fp = (MAX_SWAPPINESS - swappiness) * (total_cost + 1); + fp /= file_cost + 1; + + fraction[WORKINGSET_ANON] = ap; + fraction[WORKINGSET_FILE] = fp; + *denominator = ap + fp; +} + /* * Determine how aggressively the anon and file LRU lists should be * scanned. @@ -2412,12 +2449,10 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, { struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); - unsigned long anon_cost, file_cost, total_cost; int swappiness = sc_swappiness(sc, memcg); u64 fraction[ANON_AND_FILE]; u64 denominator = 0; /* gcc */ enum scan_balance scan_balance; - unsigned long ap, fp; enum lru_list lru; /* If we have no swap space, do not bother scanning anon folios. */ @@ -2466,35 +2501,8 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, } scan_balance = SCAN_FRACT; - /* - * Calculate the pressure balance between anon and file pages. - * - * The amount of pressure we put on each LRU is inversely - * proportional to the cost of reclaiming each list, as - * determined by the share of pages that are refaulting, times - * the relative IO cost of bringing back a swapped out - * anonymous page vs reloading a filesystem page (swappiness). - * - * Although we limit that influence to ensure no list gets - * left behind completely: at least a third of the pressure is - * applied, before swappiness. - * - * With swappiness at 100, anon and file have equal IO cost. - */ - total_cost = sc->anon_cost + sc->file_cost; - anon_cost = total_cost + sc->anon_cost; - file_cost = total_cost + sc->file_cost; - total_cost = anon_cost + file_cost; + calculate_pressure_balance(sc, swappiness, fraction, &denominator); - ap = swappiness * (total_cost + 1); - ap /= anon_cost + 1; - - fp = (MAX_SWAPPINESS - swappiness) * (total_cost + 1); - fp /= file_cost + 1; - - fraction[0] = ap; - fraction[1] = fp; - denominator = ap + fp; out: for_each_evictable_lru(lru) { bool file = is_file_lru(lru); From 58ba73e521b3d3a7a7612fc200beba1544a5100c Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 29 Jan 2025 18:06:31 +0000 Subject: [PATCH 0853/2157] mm: z3fold: remove z3fold Patch series "mm: zswap: remove z3fold and zbud", v2. After 2 cycles of deprecating z3fold, remove it as well as zbud (rationale in specific patches). This patch (of 2): Z3fold has been marked as deprecated for 2 cycles and no one complained, as expected. As there are no known users, remove the code now. Link: https://lkml.kernel.org/r/20250129180633.3501650-1-yosry.ahmed@linux.dev Link: https://lkml.kernel.org/r/20250129180633.3501650-2-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Nhat Pham Cc: Alexander Gordeev Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Dan Streetman Cc: Heiko Carstens Cc: Huacai Chen Cc: Miaohe Lin Cc: Seth Jennings Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vitaly Wool Cc: Vlastimil Babka Cc: WANG Xuerui Signed-off-by: Andrew Morton --- CREDITS | 1 + Documentation/mm/index.rst | 1 - Documentation/mm/z3fold.rst | 28 - Documentation/translations/zh_CN/mm/index.rst | 1 - .../translations/zh_CN/mm/z3fold.rst | 31 - MAINTAINERS | 7 - mm/Kconfig | 29 - mm/Makefile | 1 - mm/z3fold.c | 1447 ----------------- 9 files changed, 1 insertion(+), 1545 deletions(-) delete mode 100644 Documentation/mm/z3fold.rst delete mode 100644 Documentation/translations/zh_CN/mm/z3fold.rst delete mode 100644 mm/z3fold.c diff --git a/CREDITS b/CREDITS index 53d11a46fd69..aeccd79f9778 100644 --- a/CREDITS +++ b/CREDITS @@ -4311,6 +4311,7 @@ S: England N: Vitaly Wool E: vitaly.wool@konsulko.com D: Maintenance and development of zswap +D: Maintenance and development of z3fold N: Chris Wright E: chrisw@sous-sol.org diff --git a/Documentation/mm/index.rst b/Documentation/mm/index.rst index 0be1c7503a01..d3ada3e45e10 100644 --- a/Documentation/mm/index.rst +++ b/Documentation/mm/index.rst @@ -62,5 +62,4 @@ documentation, or deleted if it has served its purpose. unevictable-lru vmalloced-kernel-stacks vmemmap_dedup - z3fold zsmalloc diff --git a/Documentation/mm/z3fold.rst b/Documentation/mm/z3fold.rst deleted file mode 100644 index 25b5935d06c7..000000000000 --- a/Documentation/mm/z3fold.rst +++ /dev/null @@ -1,28 +0,0 @@ -====== -z3fold -====== - -z3fold is a special purpose allocator for storing compressed pages. -It is designed to store up to three compressed pages per physical page. -It is a zbud derivative which allows for higher compression -ratio keeping the simplicity and determinism of its predecessor. - -The main differences between z3fold and zbud are: - -* unlike zbud, z3fold allows for up to PAGE_SIZE allocations -* z3fold can hold up to 3 compressed pages in its page -* z3fold doesn't export any API itself and is thus intended to be used - via the zpool API. - -To keep the determinism and simplicity, z3fold, just like zbud, always -stores an integral number of compressed pages per page, but it can store -up to 3 pages unlike zbud which can store at most 2. Therefore the -compression ratio goes to around 2.7x while zbud's one is around 1.7x. - -Unlike zbud (but like zsmalloc for that matter) z3fold_alloc() does not -return a dereferenceable pointer. Instead, it returns an unsigned long -handle which encodes actual location of the allocated object. - -Keeping effective compression ratio close to zsmalloc's, z3fold doesn't -depend on MMU enabled and provides more predictable reclaim behavior -which makes it a better fit for small and response-critical systems. diff --git a/Documentation/translations/zh_CN/mm/index.rst b/Documentation/translations/zh_CN/mm/index.rst index c8726bce8f74..a71116be058f 100644 --- a/Documentation/translations/zh_CN/mm/index.rst +++ b/Documentation/translations/zh_CN/mm/index.rst @@ -58,7 +58,6 @@ Linux内存管理文档 remap_file_pages split_page_table_lock vmalloced-kernel-stacks - z3fold zsmalloc TODOLIST: diff --git a/Documentation/translations/zh_CN/mm/z3fold.rst b/Documentation/translations/zh_CN/mm/z3fold.rst deleted file mode 100644 index 9569a6d88270..000000000000 --- a/Documentation/translations/zh_CN/mm/z3fold.rst +++ /dev/null @@ -1,31 +0,0 @@ -:Original: Documentation/mm/z3fold.rst - -:翻译: - - 司延腾 Yanteng Si - -:校译: - - -====== -z3fold -====== - -z3fold是一个专门用于存储压缩页的分配器。它被设计为每个物理页最多可以存储三个压缩页。 -它是zbud的衍生物,允许更高的压缩率,保持其前辈的简单性和确定性。 - -z3fold和zbud的主要区别是: - -* 与zbud不同的是,z3fold允许最大的PAGE_SIZE分配。 -* z3fold在其页面中最多可以容纳3个压缩页面 -* z3fold本身没有输出任何API,因此打算通过zpool的API来使用 - -为了保持确定性和简单性,z3fold,就像zbud一样,总是在每页存储一个整数的压缩页,但是 -它最多可以存储3页,不像zbud最多可以存储2页。因此压缩率达到2.7倍左右,而zbud的压缩 -率是1.7倍左右。 - -不像zbud(但也像zsmalloc),z3fold_alloc()那样不返回一个可重复引用的指针。相反,它 -返回一个无符号长句柄,它编码了被分配对象的实际位置。 - -保持有效的压缩率接近于zsmalloc,z3fold不依赖于MMU的启用,并提供更可预测的回收行 -为,这使得它更适合于小型和反应迅速的系统。 diff --git a/MAINTAINERS b/MAINTAINERS index ed7aa6867674..a65a73227e3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26181,13 +26181,6 @@ S: Maintained F: Documentation/input/devices/yealink.rst F: drivers/input/misc/yealink.* -Z3FOLD COMPRESSED PAGE ALLOCATOR -M: Vitaly Wool -R: Miaohe Lin -L: linux-mm@kvack.org -S: Maintained -F: mm/z3fold.c - Z8530 DRIVER FOR AX.25 M: Joerg Reuter L: linux-hams@vger.kernel.org diff --git a/mm/Kconfig b/mm/Kconfig index 1b501db06417..6fa19022c09b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -146,15 +146,6 @@ config ZSWAP_ZPOOL_DEFAULT_ZBUD help Use the zbud allocator as the default allocator. -config ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED - bool "z3foldi (DEPRECATED)" - select Z3FOLD_DEPRECATED - help - Use the z3fold allocator as the default allocator. - - Deprecated and scheduled for removal in a few cycles, - see CONFIG_Z3FOLD_DEPRECATED. - config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC bool "zsmalloc" select ZSMALLOC @@ -166,7 +157,6 @@ config ZSWAP_ZPOOL_DEFAULT string depends on ZSWAP default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD - default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC default "" @@ -180,25 +170,6 @@ config ZBUD deterministic reclaim properties that make it preferable to a higher density approach when reclaim will be used. -config Z3FOLD_DEPRECATED - tristate "3:1 compression allocator (z3fold) (DEPRECATED)" - depends on ZSWAP - help - Deprecated and scheduled for removal in a few cycles. If you have - a good reason for using Z3FOLD over ZSMALLOC, please contact - linux-mm@kvack.org and the zswap maintainers. - - A special purpose allocator for storing compressed pages. - It is designed to store up to three compressed pages per physical - page. It is a ZBUD derivative so the simplicity and determinism are - still there. - -config Z3FOLD - tristate - default y if Z3FOLD_DEPRECATED=y - default m if Z3FOLD_DEPRECATED=m - depends on Z3FOLD_DEPRECATED - config ZSMALLOC tristate prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM) diff --git a/mm/Makefile b/mm/Makefile index 850386a67b3e..e4c03da3c084 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -115,7 +115,6 @@ obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o -obj-$(CONFIG_Z3FOLD) += z3fold.o obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o obj-$(CONFIG_CMA) += cma.o obj-$(CONFIG_NUMA) += numa.o diff --git a/mm/z3fold.c b/mm/z3fold.c deleted file mode 100644 index 379d24b4fef9..000000000000 --- a/mm/z3fold.c +++ /dev/null @@ -1,1447 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * z3fold.c - * - * Author: Vitaly Wool - * Copyright (C) 2016, Sony Mobile Communications Inc. - * - * This implementation is based on zbud written by Seth Jennings. - * - * z3fold is an special purpose allocator for storing compressed pages. It - * can store up to three compressed pages per page which improves the - * compression ratio of zbud while retaining its main concepts (e. g. always - * storing an integral number of objects per page) and simplicity. - * It still has simple and deterministic reclaim properties that make it - * preferable to a higher density approach (with no requirement on integral - * number of object per page) when reclaim is used. - * - * As in zbud, pages are divided into "chunks". The size of the chunks is - * fixed at compile time and is determined by NCHUNKS_ORDER below. - * - * z3fold doesn't export any API and is meant to be used via zpool API. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * NCHUNKS_ORDER determines the internal allocation granularity, effectively - * adjusting internal fragmentation. It also determines the number of - * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the - * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks - * in the beginning of an allocated page are occupied by z3fold header, so - * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), - * which shows the max number of free chunks in z3fold page, also there will - * be 63, or 62, respectively, freelists per pool. - */ -#define NCHUNKS_ORDER 6 - -#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) -#define CHUNK_SIZE (1 << CHUNK_SHIFT) -#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) -#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) -#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) -#define NCHUNKS (TOTAL_CHUNKS - ZHDR_CHUNKS) - -#define BUDDY_MASK (0x3) -#define BUDDY_SHIFT 2 -#define SLOTS_ALIGN (0x40) - -/***************** - * Structures -*****************/ -struct z3fold_pool; - -enum buddy { - HEADLESS = 0, - FIRST, - MIDDLE, - LAST, - BUDDIES_MAX = LAST -}; - -struct z3fold_buddy_slots { - /* - * we are using BUDDY_MASK in handle_to_buddy etc. so there should - * be enough slots to hold all possible variants - */ - unsigned long slot[BUDDY_MASK + 1]; - unsigned long pool; /* back link */ - rwlock_t lock; -}; -#define HANDLE_FLAG_MASK (0x03) - -/* - * struct z3fold_header - z3fold page metadata occupying first chunks of each - * z3fold page, except for HEADLESS pages - * @buddy: links the z3fold page into the relevant list in the - * pool - * @page_lock: per-page lock - * @refcount: reference count for the z3fold page - * @work: work_struct for page layout optimization - * @slots: pointer to the structure holding buddy slots - * @pool: pointer to the containing pool - * @cpu: CPU which this page "belongs" to - * @first_chunks: the size of the first buddy in chunks, 0 if free - * @middle_chunks: the size of the middle buddy in chunks, 0 if free - * @last_chunks: the size of the last buddy in chunks, 0 if free - * @first_num: the starting number (for the first handle) - * @mapped_count: the number of objects currently mapped - */ -struct z3fold_header { - struct list_head buddy; - spinlock_t page_lock; - struct kref refcount; - struct work_struct work; - struct z3fold_buddy_slots *slots; - struct z3fold_pool *pool; - short cpu; - unsigned short first_chunks; - unsigned short middle_chunks; - unsigned short last_chunks; - unsigned short start_middle; - unsigned short first_num:2; - unsigned short mapped_count:2; - unsigned short foreign_handles:2; -}; - -/** - * struct z3fold_pool - stores metadata for each z3fold pool - * @name: pool name - * @lock: protects pool unbuddied lists - * @stale_lock: protects pool stale page list - * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- - * buddies; the list each z3fold page is added to depends on - * the size of its free region. - * @stale: list of pages marked for freeing - * @pages_nr: number of z3fold pages in the pool. - * @c_handle: cache for z3fold_buddy_slots allocation - * @compact_wq: workqueue for page layout background optimization - * @release_wq: workqueue for safe page release - * @work: work_struct for safe page release - * - * This structure is allocated at pool creation time and maintains metadata - * pertaining to a particular z3fold pool. - */ -struct z3fold_pool { - const char *name; - spinlock_t lock; - spinlock_t stale_lock; - struct list_head __percpu *unbuddied; - struct list_head stale; - atomic64_t pages_nr; - struct kmem_cache *c_handle; - struct workqueue_struct *compact_wq; - struct workqueue_struct *release_wq; - struct work_struct work; -}; - -/* - * Internal z3fold page flags - */ -enum z3fold_page_flags { - PAGE_HEADLESS = 0, - MIDDLE_CHUNK_MAPPED, - NEEDS_COMPACTING, - PAGE_STALE, - PAGE_CLAIMED, /* by either reclaim or free */ - PAGE_MIGRATED, /* page is migrated and soon to be released */ -}; - -/* - * handle flags, go under HANDLE_FLAG_MASK - */ -enum z3fold_handle_flags { - HANDLES_NOFREE = 0, -}; - -/* - * Forward declarations - */ -static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); -static void compact_page_work(struct work_struct *w); - -/***************** - * Helpers -*****************/ - -/* Converts an allocation size in bytes to size in z3fold chunks */ -static int size_to_chunks(size_t size) -{ - return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; -} - -#define for_each_unbuddied_list(_iter, _begin) \ - for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) - -static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, - gfp_t gfp) -{ - struct z3fold_buddy_slots *slots = kmem_cache_zalloc(pool->c_handle, - gfp); - - if (slots) { - /* It will be freed separately in free_handle(). */ - kmemleak_not_leak(slots); - slots->pool = (unsigned long)pool; - rwlock_init(&slots->lock); - } - - return slots; -} - -static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) -{ - return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); -} - -static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) -{ - return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); -} - -/* Lock a z3fold page */ -static inline void z3fold_page_lock(struct z3fold_header *zhdr) -{ - spin_lock(&zhdr->page_lock); -} - -/* Try to lock a z3fold page */ -static inline int z3fold_page_trylock(struct z3fold_header *zhdr) -{ - return spin_trylock(&zhdr->page_lock); -} - -/* Unlock a z3fold page */ -static inline void z3fold_page_unlock(struct z3fold_header *zhdr) -{ - spin_unlock(&zhdr->page_lock); -} - -/* return locked z3fold page if it's not headless */ -static inline struct z3fold_header *get_z3fold_header(unsigned long handle) -{ - struct z3fold_buddy_slots *slots; - struct z3fold_header *zhdr; - int locked = 0; - - if (!(handle & (1 << PAGE_HEADLESS))) { - slots = handle_to_slots(handle); - do { - unsigned long addr; - - read_lock(&slots->lock); - addr = *(unsigned long *)handle; - zhdr = (struct z3fold_header *)(addr & PAGE_MASK); - locked = z3fold_page_trylock(zhdr); - read_unlock(&slots->lock); - if (locked) { - struct page *page = virt_to_page(zhdr); - - if (!test_bit(PAGE_MIGRATED, &page->private)) - break; - z3fold_page_unlock(zhdr); - } - cpu_relax(); - } while (true); - } else { - zhdr = (struct z3fold_header *)(handle & PAGE_MASK); - } - - return zhdr; -} - -static inline void put_z3fold_header(struct z3fold_header *zhdr) -{ - struct page *page = virt_to_page(zhdr); - - if (!test_bit(PAGE_HEADLESS, &page->private)) - z3fold_page_unlock(zhdr); -} - -static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) -{ - struct z3fold_buddy_slots *slots; - int i; - bool is_free; - - if (WARN_ON(*(unsigned long *)handle == 0)) - return; - - slots = handle_to_slots(handle); - write_lock(&slots->lock); - *(unsigned long *)handle = 0; - - if (test_bit(HANDLES_NOFREE, &slots->pool)) { - write_unlock(&slots->lock); - return; /* simple case, nothing else to do */ - } - - if (zhdr->slots != slots) - zhdr->foreign_handles--; - - is_free = true; - for (i = 0; i <= BUDDY_MASK; i++) { - if (slots->slot[i]) { - is_free = false; - break; - } - } - write_unlock(&slots->lock); - - if (is_free) { - struct z3fold_pool *pool = slots_to_pool(slots); - - if (zhdr->slots == slots) - zhdr->slots = NULL; - kmem_cache_free(pool->c_handle, slots); - } -} - -/* Initializes the z3fold header of a newly allocated z3fold page */ -static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, - struct z3fold_pool *pool, gfp_t gfp) -{ - struct z3fold_header *zhdr = page_address(page); - struct z3fold_buddy_slots *slots; - - clear_bit(PAGE_HEADLESS, &page->private); - clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); - clear_bit(NEEDS_COMPACTING, &page->private); - clear_bit(PAGE_STALE, &page->private); - clear_bit(PAGE_CLAIMED, &page->private); - clear_bit(PAGE_MIGRATED, &page->private); - if (headless) - return zhdr; - - slots = alloc_slots(pool, gfp); - if (!slots) - return NULL; - - memset(zhdr, 0, sizeof(*zhdr)); - spin_lock_init(&zhdr->page_lock); - kref_init(&zhdr->refcount); - zhdr->cpu = -1; - zhdr->slots = slots; - zhdr->pool = pool; - INIT_LIST_HEAD(&zhdr->buddy); - INIT_WORK(&zhdr->work, compact_page_work); - return zhdr; -} - -/* Resets the struct page fields and frees the page */ -static void free_z3fold_page(struct page *page, bool headless) -{ - if (!headless) { - lock_page(page); - __ClearPageMovable(page); - unlock_page(page); - } - __free_page(page); -} - -/* Helper function to build the index */ -static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) -{ - return (bud + zhdr->first_num) & BUDDY_MASK; -} - -/* - * Encodes the handle of a particular buddy within a z3fold page. - * Zhdr->page_lock should be held as this function accesses first_num - * if bud != HEADLESS. - */ -static unsigned long __encode_handle(struct z3fold_header *zhdr, - struct z3fold_buddy_slots *slots, - enum buddy bud) -{ - unsigned long h = (unsigned long)zhdr; - int idx = 0; - - /* - * For a headless page, its handle is its pointer with the extra - * PAGE_HEADLESS bit set - */ - if (bud == HEADLESS) - return h | (1 << PAGE_HEADLESS); - - /* otherwise, return pointer to encoded handle */ - idx = __idx(zhdr, bud); - h += idx; - if (bud == LAST) - h |= (zhdr->last_chunks << BUDDY_SHIFT); - - write_lock(&slots->lock); - slots->slot[idx] = h; - write_unlock(&slots->lock); - return (unsigned long)&slots->slot[idx]; -} - -static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) -{ - return __encode_handle(zhdr, zhdr->slots, bud); -} - -/* only for LAST bud, returns zero otherwise */ -static unsigned short handle_to_chunks(unsigned long handle) -{ - struct z3fold_buddy_slots *slots = handle_to_slots(handle); - unsigned long addr; - - read_lock(&slots->lock); - addr = *(unsigned long *)handle; - read_unlock(&slots->lock); - return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; -} - -/* - * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle - * but that doesn't matter. because the masking will result in the - * correct buddy number. - */ -static enum buddy handle_to_buddy(unsigned long handle) -{ - struct z3fold_header *zhdr; - struct z3fold_buddy_slots *slots = handle_to_slots(handle); - unsigned long addr; - - read_lock(&slots->lock); - WARN_ON(handle & (1 << PAGE_HEADLESS)); - addr = *(unsigned long *)handle; - read_unlock(&slots->lock); - zhdr = (struct z3fold_header *)(addr & PAGE_MASK); - return (addr - zhdr->first_num) & BUDDY_MASK; -} - -static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) -{ - return zhdr->pool; -} - -static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) -{ - struct page *page = virt_to_page(zhdr); - struct z3fold_pool *pool = zhdr_to_pool(zhdr); - - WARN_ON(!list_empty(&zhdr->buddy)); - set_bit(PAGE_STALE, &page->private); - clear_bit(NEEDS_COMPACTING, &page->private); - spin_lock(&pool->lock); - spin_unlock(&pool->lock); - - if (locked) - z3fold_page_unlock(zhdr); - - spin_lock(&pool->stale_lock); - list_add(&zhdr->buddy, &pool->stale); - queue_work(pool->release_wq, &pool->work); - spin_unlock(&pool->stale_lock); - - atomic64_dec(&pool->pages_nr); -} - -static void release_z3fold_page_locked(struct kref *ref) -{ - struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, - refcount); - WARN_ON(z3fold_page_trylock(zhdr)); - __release_z3fold_page(zhdr, true); -} - -static void release_z3fold_page_locked_list(struct kref *ref) -{ - struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, - refcount); - struct z3fold_pool *pool = zhdr_to_pool(zhdr); - - spin_lock(&pool->lock); - list_del_init(&zhdr->buddy); - spin_unlock(&pool->lock); - - WARN_ON(z3fold_page_trylock(zhdr)); - __release_z3fold_page(zhdr, true); -} - -static inline int put_z3fold_locked(struct z3fold_header *zhdr) -{ - return kref_put(&zhdr->refcount, release_z3fold_page_locked); -} - -static inline int put_z3fold_locked_list(struct z3fold_header *zhdr) -{ - return kref_put(&zhdr->refcount, release_z3fold_page_locked_list); -} - -static void free_pages_work(struct work_struct *w) -{ - struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); - - spin_lock(&pool->stale_lock); - while (!list_empty(&pool->stale)) { - struct z3fold_header *zhdr = list_first_entry(&pool->stale, - struct z3fold_header, buddy); - struct page *page = virt_to_page(zhdr); - - list_del(&zhdr->buddy); - if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) - continue; - spin_unlock(&pool->stale_lock); - cancel_work_sync(&zhdr->work); - free_z3fold_page(page, false); - cond_resched(); - spin_lock(&pool->stale_lock); - } - spin_unlock(&pool->stale_lock); -} - -/* - * Returns the number of free chunks in a z3fold page. - * NB: can't be used with HEADLESS pages. - */ -static int num_free_chunks(struct z3fold_header *zhdr) -{ - int nfree; - /* - * If there is a middle object, pick up the bigger free space - * either before or after it. Otherwise just subtract the number - * of chunks occupied by the first and the last objects. - */ - if (zhdr->middle_chunks != 0) { - int nfree_before = zhdr->first_chunks ? - 0 : zhdr->start_middle - ZHDR_CHUNKS; - int nfree_after = zhdr->last_chunks ? - 0 : TOTAL_CHUNKS - - (zhdr->start_middle + zhdr->middle_chunks); - nfree = max(nfree_before, nfree_after); - } else - nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; - return nfree; -} - -/* Add to the appropriate unbuddied list */ -static inline void add_to_unbuddied(struct z3fold_pool *pool, - struct z3fold_header *zhdr) -{ - if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || - zhdr->middle_chunks == 0) { - struct list_head *unbuddied; - int freechunks = num_free_chunks(zhdr); - - migrate_disable(); - unbuddied = this_cpu_ptr(pool->unbuddied); - spin_lock(&pool->lock); - list_add(&zhdr->buddy, &unbuddied[freechunks]); - spin_unlock(&pool->lock); - zhdr->cpu = smp_processor_id(); - migrate_enable(); - } -} - -static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks) -{ - enum buddy bud = HEADLESS; - - if (zhdr->middle_chunks) { - if (!zhdr->first_chunks && - chunks <= zhdr->start_middle - ZHDR_CHUNKS) - bud = FIRST; - else if (!zhdr->last_chunks) - bud = LAST; - } else { - if (!zhdr->first_chunks) - bud = FIRST; - else if (!zhdr->last_chunks) - bud = LAST; - else - bud = MIDDLE; - } - - return bud; -} - -static inline void *mchunk_memmove(struct z3fold_header *zhdr, - unsigned short dst_chunk) -{ - void *beg = zhdr; - return memmove(beg + (dst_chunk << CHUNK_SHIFT), - beg + (zhdr->start_middle << CHUNK_SHIFT), - zhdr->middle_chunks << CHUNK_SHIFT); -} - -static inline bool buddy_single(struct z3fold_header *zhdr) -{ - return !((zhdr->first_chunks && zhdr->middle_chunks) || - (zhdr->first_chunks && zhdr->last_chunks) || - (zhdr->middle_chunks && zhdr->last_chunks)); -} - -static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) -{ - struct z3fold_pool *pool = zhdr_to_pool(zhdr); - void *p = zhdr; - unsigned long old_handle = 0; - size_t sz = 0; - struct z3fold_header *new_zhdr = NULL; - int first_idx = __idx(zhdr, FIRST); - int middle_idx = __idx(zhdr, MIDDLE); - int last_idx = __idx(zhdr, LAST); - unsigned short *moved_chunks = NULL; - - /* - * No need to protect slots here -- all the slots are "local" and - * the page lock is already taken - */ - if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { - p += ZHDR_SIZE_ALIGNED; - sz = zhdr->first_chunks << CHUNK_SHIFT; - old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; - moved_chunks = &zhdr->first_chunks; - } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { - p += zhdr->start_middle << CHUNK_SHIFT; - sz = zhdr->middle_chunks << CHUNK_SHIFT; - old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; - moved_chunks = &zhdr->middle_chunks; - } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { - p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); - sz = zhdr->last_chunks << CHUNK_SHIFT; - old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; - moved_chunks = &zhdr->last_chunks; - } - - if (sz > 0) { - enum buddy new_bud = HEADLESS; - short chunks = size_to_chunks(sz); - void *q; - - new_zhdr = __z3fold_alloc(pool, sz, false); - if (!new_zhdr) - return NULL; - - if (WARN_ON(new_zhdr == zhdr)) - goto out_fail; - - new_bud = get_free_buddy(new_zhdr, chunks); - q = new_zhdr; - switch (new_bud) { - case FIRST: - new_zhdr->first_chunks = chunks; - q += ZHDR_SIZE_ALIGNED; - break; - case MIDDLE: - new_zhdr->middle_chunks = chunks; - new_zhdr->start_middle = - new_zhdr->first_chunks + ZHDR_CHUNKS; - q += new_zhdr->start_middle << CHUNK_SHIFT; - break; - case LAST: - new_zhdr->last_chunks = chunks; - q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); - break; - default: - goto out_fail; - } - new_zhdr->foreign_handles++; - memcpy(q, p, sz); - write_lock(&zhdr->slots->lock); - *(unsigned long *)old_handle = (unsigned long)new_zhdr + - __idx(new_zhdr, new_bud); - if (new_bud == LAST) - *(unsigned long *)old_handle |= - (new_zhdr->last_chunks << BUDDY_SHIFT); - write_unlock(&zhdr->slots->lock); - add_to_unbuddied(pool, new_zhdr); - z3fold_page_unlock(new_zhdr); - - *moved_chunks = 0; - } - - return new_zhdr; - -out_fail: - if (new_zhdr && !put_z3fold_locked(new_zhdr)) { - add_to_unbuddied(pool, new_zhdr); - z3fold_page_unlock(new_zhdr); - } - return NULL; - -} - -#define BIG_CHUNK_GAP 3 -/* Has to be called with lock held */ -static int z3fold_compact_page(struct z3fold_header *zhdr) -{ - struct page *page = virt_to_page(zhdr); - - if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) - return 0; /* can't move middle chunk, it's used */ - - if (unlikely(PageIsolated(page))) - return 0; - - if (zhdr->middle_chunks == 0) - return 0; /* nothing to compact */ - - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { - /* move to the beginning */ - mchunk_memmove(zhdr, ZHDR_CHUNKS); - zhdr->first_chunks = zhdr->middle_chunks; - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - zhdr->first_num++; - return 1; - } - - /* - * moving data is expensive, so let's only do that if - * there's substantial gain (at least BIG_CHUNK_GAP chunks) - */ - if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && - zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= - BIG_CHUNK_GAP) { - mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); - zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; - return 1; - } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && - TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle - + zhdr->middle_chunks) >= - BIG_CHUNK_GAP) { - unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - - zhdr->middle_chunks; - mchunk_memmove(zhdr, new_start); - zhdr->start_middle = new_start; - return 1; - } - - return 0; -} - -static void do_compact_page(struct z3fold_header *zhdr, bool locked) -{ - struct z3fold_pool *pool = zhdr_to_pool(zhdr); - struct page *page; - - page = virt_to_page(zhdr); - if (locked) - WARN_ON(z3fold_page_trylock(zhdr)); - else - z3fold_page_lock(zhdr); - if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { - z3fold_page_unlock(zhdr); - return; - } - spin_lock(&pool->lock); - list_del_init(&zhdr->buddy); - spin_unlock(&pool->lock); - - if (put_z3fold_locked(zhdr)) - return; - - if (test_bit(PAGE_STALE, &page->private) || - test_and_set_bit(PAGE_CLAIMED, &page->private)) { - z3fold_page_unlock(zhdr); - return; - } - - if (!zhdr->foreign_handles && buddy_single(zhdr) && - zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { - if (!put_z3fold_locked(zhdr)) { - clear_bit(PAGE_CLAIMED, &page->private); - z3fold_page_unlock(zhdr); - } - return; - } - - z3fold_compact_page(zhdr); - add_to_unbuddied(pool, zhdr); - clear_bit(PAGE_CLAIMED, &page->private); - z3fold_page_unlock(zhdr); -} - -static void compact_page_work(struct work_struct *w) -{ - struct z3fold_header *zhdr = container_of(w, struct z3fold_header, - work); - - do_compact_page(zhdr, false); -} - -/* returns _locked_ z3fold page header or NULL */ -static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, - size_t size, bool can_sleep) -{ - struct z3fold_header *zhdr = NULL; - struct page *page; - struct list_head *unbuddied; - int chunks = size_to_chunks(size), i; - -lookup: - migrate_disable(); - /* First, try to find an unbuddied z3fold page. */ - unbuddied = this_cpu_ptr(pool->unbuddied); - for_each_unbuddied_list(i, chunks) { - struct list_head *l = &unbuddied[i]; - - zhdr = list_first_entry_or_null(READ_ONCE(l), - struct z3fold_header, buddy); - - if (!zhdr) - continue; - - /* Re-check under lock. */ - spin_lock(&pool->lock); - if (unlikely(zhdr != list_first_entry(READ_ONCE(l), - struct z3fold_header, buddy)) || - !z3fold_page_trylock(zhdr)) { - spin_unlock(&pool->lock); - zhdr = NULL; - migrate_enable(); - if (can_sleep) - cond_resched(); - goto lookup; - } - list_del_init(&zhdr->buddy); - zhdr->cpu = -1; - spin_unlock(&pool->lock); - - page = virt_to_page(zhdr); - if (test_bit(NEEDS_COMPACTING, &page->private) || - test_bit(PAGE_CLAIMED, &page->private)) { - z3fold_page_unlock(zhdr); - zhdr = NULL; - migrate_enable(); - if (can_sleep) - cond_resched(); - goto lookup; - } - - /* - * this page could not be removed from its unbuddied - * list while pool lock was held, and then we've taken - * page lock so kref_put could not be called before - * we got here, so it's safe to just call kref_get() - */ - kref_get(&zhdr->refcount); - break; - } - migrate_enable(); - - if (!zhdr) { - int cpu; - - /* look for _exact_ match on other cpus' lists */ - for_each_online_cpu(cpu) { - struct list_head *l; - - unbuddied = per_cpu_ptr(pool->unbuddied, cpu); - spin_lock(&pool->lock); - l = &unbuddied[chunks]; - - zhdr = list_first_entry_or_null(READ_ONCE(l), - struct z3fold_header, buddy); - - if (!zhdr || !z3fold_page_trylock(zhdr)) { - spin_unlock(&pool->lock); - zhdr = NULL; - continue; - } - list_del_init(&zhdr->buddy); - zhdr->cpu = -1; - spin_unlock(&pool->lock); - - page = virt_to_page(zhdr); - if (test_bit(NEEDS_COMPACTING, &page->private) || - test_bit(PAGE_CLAIMED, &page->private)) { - z3fold_page_unlock(zhdr); - zhdr = NULL; - if (can_sleep) - cond_resched(); - continue; - } - kref_get(&zhdr->refcount); - break; - } - } - - if (zhdr && !zhdr->slots) { - zhdr->slots = alloc_slots(pool, GFP_ATOMIC); - if (!zhdr->slots) - goto out_fail; - } - return zhdr; - -out_fail: - if (!put_z3fold_locked(zhdr)) { - add_to_unbuddied(pool, zhdr); - z3fold_page_unlock(zhdr); - } - return NULL; -} - -/* - * API Functions - */ - -/** - * z3fold_create_pool() - create a new z3fold pool - * @name: pool name - * @gfp: gfp flags when allocating the z3fold pool structure - * - * Return: pointer to the new z3fold pool or NULL if the metadata allocation - * failed. - */ -static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp) -{ - struct z3fold_pool *pool = NULL; - int i, cpu; - - pool = kzalloc(sizeof(struct z3fold_pool), gfp); - if (!pool) - goto out; - pool->c_handle = kmem_cache_create("z3fold_handle", - sizeof(struct z3fold_buddy_slots), - SLOTS_ALIGN, 0, NULL); - if (!pool->c_handle) - goto out_c; - spin_lock_init(&pool->lock); - spin_lock_init(&pool->stale_lock); - pool->unbuddied = __alloc_percpu(sizeof(struct list_head) * NCHUNKS, - __alignof__(struct list_head)); - if (!pool->unbuddied) - goto out_pool; - for_each_possible_cpu(cpu) { - struct list_head *unbuddied = - per_cpu_ptr(pool->unbuddied, cpu); - for_each_unbuddied_list(i, 0) - INIT_LIST_HEAD(&unbuddied[i]); - } - INIT_LIST_HEAD(&pool->stale); - atomic64_set(&pool->pages_nr, 0); - pool->name = name; - pool->compact_wq = create_singlethread_workqueue(pool->name); - if (!pool->compact_wq) - goto out_unbuddied; - pool->release_wq = create_singlethread_workqueue(pool->name); - if (!pool->release_wq) - goto out_wq; - INIT_WORK(&pool->work, free_pages_work); - return pool; - -out_wq: - destroy_workqueue(pool->compact_wq); -out_unbuddied: - free_percpu(pool->unbuddied); -out_pool: - kmem_cache_destroy(pool->c_handle); -out_c: - kfree(pool); -out: - return NULL; -} - -/** - * z3fold_destroy_pool() - destroys an existing z3fold pool - * @pool: the z3fold pool to be destroyed - * - * The pool should be emptied before this function is called. - */ -static void z3fold_destroy_pool(struct z3fold_pool *pool) -{ - kmem_cache_destroy(pool->c_handle); - - /* - * We need to destroy pool->compact_wq before pool->release_wq, - * as any pending work on pool->compact_wq will call - * queue_work(pool->release_wq, &pool->work). - * - * There are still outstanding pages until both workqueues are drained, - * so we cannot unregister migration until then. - */ - - destroy_workqueue(pool->compact_wq); - destroy_workqueue(pool->release_wq); - free_percpu(pool->unbuddied); - kfree(pool); -} - -static const struct movable_operations z3fold_mops; - -/** - * z3fold_alloc() - allocates a region of a given size - * @pool: z3fold pool from which to allocate - * @size: size in bytes of the desired allocation - * @gfp: gfp flags used if the pool needs to grow - * @handle: handle of the new allocation - * - * This function will attempt to find a free region in the pool large enough to - * satisfy the allocation request. A search of the unbuddied lists is - * performed first. If no suitable free region is found, then a new page is - * allocated and added to the pool to satisfy the request. - * - * Return: 0 if success and handle is set, otherwise -EINVAL if the size or - * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate - * a new page. - */ -static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, - unsigned long *handle) -{ - int chunks = size_to_chunks(size); - struct z3fold_header *zhdr = NULL; - struct page *page = NULL; - enum buddy bud; - bool can_sleep = gfpflags_allow_blocking(gfp); - - if (!size || (gfp & __GFP_HIGHMEM)) - return -EINVAL; - - if (size > PAGE_SIZE) - return -ENOSPC; - - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) - bud = HEADLESS; - else { -retry: - zhdr = __z3fold_alloc(pool, size, can_sleep); - if (zhdr) { - bud = get_free_buddy(zhdr, chunks); - if (bud == HEADLESS) { - if (!put_z3fold_locked(zhdr)) - z3fold_page_unlock(zhdr); - pr_err("No free chunks in unbuddied\n"); - WARN_ON(1); - goto retry; - } - page = virt_to_page(zhdr); - goto found; - } - bud = FIRST; - } - - page = alloc_page(gfp); - if (!page) - return -ENOMEM; - - zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); - if (!zhdr) { - __free_page(page); - return -ENOMEM; - } - atomic64_inc(&pool->pages_nr); - - if (bud == HEADLESS) { - set_bit(PAGE_HEADLESS, &page->private); - goto headless; - } - if (can_sleep) { - lock_page(page); - __SetPageMovable(page, &z3fold_mops); - unlock_page(page); - } else { - WARN_ON(!trylock_page(page)); - __SetPageMovable(page, &z3fold_mops); - unlock_page(page); - } - z3fold_page_lock(zhdr); - -found: - if (bud == FIRST) - zhdr->first_chunks = chunks; - else if (bud == LAST) - zhdr->last_chunks = chunks; - else { - zhdr->middle_chunks = chunks; - zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; - } - add_to_unbuddied(pool, zhdr); - -headless: - spin_lock(&pool->lock); - *handle = encode_handle(zhdr, bud); - spin_unlock(&pool->lock); - if (bud != HEADLESS) - z3fold_page_unlock(zhdr); - - return 0; -} - -/** - * z3fold_free() - frees the allocation associated with the given handle - * @pool: pool in which the allocation resided - * @handle: handle associated with the allocation returned by z3fold_alloc() - * - * In the case that the z3fold page in which the allocation resides is under - * reclaim, as indicated by the PAGE_CLAIMED flag being set, this function - * only sets the first|middle|last_chunks to 0. The page is actually freed - * once all buddies are evicted (see z3fold_reclaim_page() below). - */ -static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) -{ - struct z3fold_header *zhdr; - struct page *page; - enum buddy bud; - bool page_claimed; - - zhdr = get_z3fold_header(handle); - page = virt_to_page(zhdr); - page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); - - if (test_bit(PAGE_HEADLESS, &page->private)) { - /* if a headless page is under reclaim, just leave. - * NB: we use test_and_set_bit for a reason: if the bit - * has not been set before, we release this page - * immediately so we don't care about its value any more. - */ - if (!page_claimed) { - put_z3fold_header(zhdr); - free_z3fold_page(page, true); - atomic64_dec(&pool->pages_nr); - } - return; - } - - /* Non-headless case */ - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - put_z3fold_header(zhdr); - return; - } - - if (!page_claimed) - free_handle(handle, zhdr); - if (put_z3fold_locked_list(zhdr)) - return; - if (page_claimed) { - /* the page has not been claimed by us */ - put_z3fold_header(zhdr); - return; - } - if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { - clear_bit(PAGE_CLAIMED, &page->private); - put_z3fold_header(zhdr); - return; - } - if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { - zhdr->cpu = -1; - kref_get(&zhdr->refcount); - clear_bit(PAGE_CLAIMED, &page->private); - do_compact_page(zhdr, true); - return; - } - kref_get(&zhdr->refcount); - clear_bit(PAGE_CLAIMED, &page->private); - queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); - put_z3fold_header(zhdr); -} - -/** - * z3fold_map() - maps the allocation associated with the given handle - * @pool: pool in which the allocation resides - * @handle: handle associated with the allocation to be mapped - * - * Extracts the buddy number from handle and constructs the pointer to the - * correct starting chunk within the page. - * - * Returns: a pointer to the mapped allocation - */ -static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) -{ - struct z3fold_header *zhdr; - struct page *page; - void *addr; - enum buddy buddy; - - zhdr = get_z3fold_header(handle); - addr = zhdr; - page = virt_to_page(zhdr); - - if (test_bit(PAGE_HEADLESS, &page->private)) - goto out; - - buddy = handle_to_buddy(handle); - switch (buddy) { - case FIRST: - addr += ZHDR_SIZE_ALIGNED; - break; - case MIDDLE: - addr += zhdr->start_middle << CHUNK_SHIFT; - set_bit(MIDDLE_CHUNK_MAPPED, &page->private); - break; - case LAST: - addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); - break; - default: - pr_err("unknown buddy id %d\n", buddy); - WARN_ON(1); - addr = NULL; - break; - } - - if (addr) - zhdr->mapped_count++; -out: - put_z3fold_header(zhdr); - return addr; -} - -/** - * z3fold_unmap() - unmaps the allocation associated with the given handle - * @pool: pool in which the allocation resides - * @handle: handle associated with the allocation to be unmapped - */ -static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) -{ - struct z3fold_header *zhdr; - struct page *page; - enum buddy buddy; - - zhdr = get_z3fold_header(handle); - page = virt_to_page(zhdr); - - if (test_bit(PAGE_HEADLESS, &page->private)) - return; - - buddy = handle_to_buddy(handle); - if (buddy == MIDDLE) - clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); - zhdr->mapped_count--; - put_z3fold_header(zhdr); -} - -/** - * z3fold_get_pool_pages() - gets the z3fold pool size in pages - * @pool: pool whose size is being queried - * - * Returns: size in pages of the given pool. - */ -static u64 z3fold_get_pool_pages(struct z3fold_pool *pool) -{ - return atomic64_read(&pool->pages_nr); -} - -static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) -{ - struct z3fold_header *zhdr; - struct z3fold_pool *pool; - - VM_BUG_ON_PAGE(PageIsolated(page), page); - - if (test_bit(PAGE_HEADLESS, &page->private)) - return false; - - zhdr = page_address(page); - z3fold_page_lock(zhdr); - if (test_bit(NEEDS_COMPACTING, &page->private) || - test_bit(PAGE_STALE, &page->private)) - goto out; - - if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) - goto out; - - if (test_and_set_bit(PAGE_CLAIMED, &page->private)) - goto out; - pool = zhdr_to_pool(zhdr); - spin_lock(&pool->lock); - if (!list_empty(&zhdr->buddy)) - list_del_init(&zhdr->buddy); - spin_unlock(&pool->lock); - - kref_get(&zhdr->refcount); - z3fold_page_unlock(zhdr); - return true; - -out: - z3fold_page_unlock(zhdr); - return false; -} - -static int z3fold_page_migrate(struct page *newpage, struct page *page, - enum migrate_mode mode) -{ - struct z3fold_header *zhdr, *new_zhdr; - struct z3fold_pool *pool; - - VM_BUG_ON_PAGE(!PageIsolated(page), page); - VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - - zhdr = page_address(page); - pool = zhdr_to_pool(zhdr); - - if (!z3fold_page_trylock(zhdr)) - return -EAGAIN; - if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { - clear_bit(PAGE_CLAIMED, &page->private); - z3fold_page_unlock(zhdr); - return -EBUSY; - } - if (work_pending(&zhdr->work)) { - z3fold_page_unlock(zhdr); - return -EAGAIN; - } - new_zhdr = page_address(newpage); - memcpy(new_zhdr, zhdr, PAGE_SIZE); - newpage->private = page->private; - set_bit(PAGE_MIGRATED, &page->private); - z3fold_page_unlock(zhdr); - spin_lock_init(&new_zhdr->page_lock); - INIT_WORK(&new_zhdr->work, compact_page_work); - /* - * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, - * so we only have to reinitialize it. - */ - INIT_LIST_HEAD(&new_zhdr->buddy); - __ClearPageMovable(page); - - get_page(newpage); - z3fold_page_lock(new_zhdr); - if (new_zhdr->first_chunks) - encode_handle(new_zhdr, FIRST); - if (new_zhdr->last_chunks) - encode_handle(new_zhdr, LAST); - if (new_zhdr->middle_chunks) - encode_handle(new_zhdr, MIDDLE); - set_bit(NEEDS_COMPACTING, &newpage->private); - new_zhdr->cpu = smp_processor_id(); - __SetPageMovable(newpage, &z3fold_mops); - z3fold_page_unlock(new_zhdr); - - queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); - - /* PAGE_CLAIMED and PAGE_MIGRATED are cleared now. */ - page->private = 0; - put_page(page); - return 0; -} - -static void z3fold_page_putback(struct page *page) -{ - struct z3fold_header *zhdr; - struct z3fold_pool *pool; - - zhdr = page_address(page); - pool = zhdr_to_pool(zhdr); - - z3fold_page_lock(zhdr); - if (!list_empty(&zhdr->buddy)) - list_del_init(&zhdr->buddy); - INIT_LIST_HEAD(&page->lru); - if (put_z3fold_locked(zhdr)) - return; - if (list_empty(&zhdr->buddy)) - add_to_unbuddied(pool, zhdr); - clear_bit(PAGE_CLAIMED, &page->private); - z3fold_page_unlock(zhdr); -} - -static const struct movable_operations z3fold_mops = { - .isolate_page = z3fold_page_isolate, - .migrate_page = z3fold_page_migrate, - .putback_page = z3fold_page_putback, -}; - -/***************** - * zpool - ****************/ - -static void *z3fold_zpool_create(const char *name, gfp_t gfp) -{ - return z3fold_create_pool(name, gfp); -} - -static void z3fold_zpool_destroy(void *pool) -{ - z3fold_destroy_pool(pool); -} - -static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, - unsigned long *handle) -{ - return z3fold_alloc(pool, size, gfp, handle); -} -static void z3fold_zpool_free(void *pool, unsigned long handle) -{ - z3fold_free(pool, handle); -} - -static void *z3fold_zpool_map(void *pool, unsigned long handle, - enum zpool_mapmode mm) -{ - return z3fold_map(pool, handle); -} -static void z3fold_zpool_unmap(void *pool, unsigned long handle) -{ - z3fold_unmap(pool, handle); -} - -static u64 z3fold_zpool_total_pages(void *pool) -{ - return z3fold_get_pool_pages(pool); -} - -static struct zpool_driver z3fold_zpool_driver = { - .type = "z3fold", - .sleep_mapped = true, - .owner = THIS_MODULE, - .create = z3fold_zpool_create, - .destroy = z3fold_zpool_destroy, - .malloc = z3fold_zpool_malloc, - .free = z3fold_zpool_free, - .map = z3fold_zpool_map, - .unmap = z3fold_zpool_unmap, - .total_pages = z3fold_zpool_total_pages, -}; - -MODULE_ALIAS("zpool-z3fold"); - -static int __init init_z3fold(void) -{ - /* - * Make sure the z3fold header is not larger than the page size and - * there has remaining spaces for its buddy. - */ - BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); - zpool_register_driver(&z3fold_zpool_driver); - - return 0; -} - -static void __exit exit_z3fold(void) -{ - zpool_unregister_driver(&z3fold_zpool_driver); -} - -module_init(init_z3fold); -module_exit(exit_z3fold); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Vitaly Wool "); -MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); From 6df8bae8e851eacf2acf2237860213e002aba74f Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 29 Jan 2025 18:06:32 +0000 Subject: [PATCH 0854/2157] mm: zbud: remove zbud The zbud compressed pages allocator is rarely used, most users use zsmalloc. zbud consumes much more memory (only stores 1 or 2 compressed pages per physical page). The only advantage of zbud is a marginal performance improvement that by no means justify the memory overhead. Historically, zsmalloc had significantly worse latency than zbud and z3fold but offered better memory savings. This is no longer the case as shown by a simple recent analysis [1]. In a kernel build test on tmpfs in a limited cgroup, zbud 2-3% less time than zsmalloc, but at the cost of using ~32% more memory (1.5G vs 1.13G). The tradeoff does not make sense for zbud in any practical scenario. The only alleged advantage of zbud is not having the dependency on CONFIG_MMU, but CONFIG_SWAP already depends on CONFIG_MMU anyway, and zbud is only used by zswap. Remove zbud after z3fold's removal, leaving zsmalloc as the one and only zpool allocator. Leave the removal of the zpool API (and its associated config options) to a followup cleanup after no more allocators show up. Deprecating zbud for a few cycles before removing it was initially proposed [2], like z3fold was marked as deprecated for 2 cycles [3]. However, Johannes rightfully pointed out that the 2 cycles is too short for most downstream consumers, and z3fold was deprecated first only as a courtesy anyway. [1]https://lore.kernel.org/lkml/CAJD7tkbRF6od-2x_L8-A1QL3=2Ww13sCj4S3i4bNndqF+3+_Vg@mail.gmail.com/ [2]https://lore.kernel.org/lkml/Z5gdnSX5Lv-nfjQL@google.com/ [3]https://lore.kernel.org/lkml/20240904233343.933462-1-yosryahmed@google.com/ Link: https://lkml.kernel.org/r/20250129180633.3501650-3-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Alexander Gordeev Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Dan Streetman Cc: Heiko Carstens Cc: Huacai Chen Cc: Miaohe Lin Cc: Seth Jennings Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vitaly Wool Cc: Vlastimil Babka Cc: WANG Xuerui Signed-off-by: Andrew Morton --- CREDITS | 2 + Documentation/admin-guide/mm/zswap.rst | 10 +- MAINTAINERS | 7 - arch/loongarch/configs/loongson3_defconfig | 3 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- include/linux/zpool.h | 5 +- mm/Kconfig | 18 - mm/Makefile | 1 - mm/zbud.c | 455 --------------------- mm/zpool.c | 4 +- 11 files changed, 13 insertions(+), 496 deletions(-) delete mode 100644 mm/zbud.c diff --git a/CREDITS b/CREDITS index aeccd79f9778..5e65bf8553ba 100644 --- a/CREDITS +++ b/CREDITS @@ -1895,6 +1895,7 @@ S: Czech Republic N: Seth Jennings E: sjenning@redhat.com D: Creation and maintenance of zswap +D: Creation and maintenace of the zbud allocator N: Jeremy Kerr D: Maintainer of SPU File System @@ -3788,6 +3789,7 @@ N: Dan Streetman E: ddstreet@ieee.org D: Maintenance and development of zswap D: Creation and maintenance of the zpool API +D: Maintenace of the zbud allocator N: Drew Sullivan E: drew@ss.org diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index 3598dcd7dbe7..fd3370aa43fe 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -60,15 +60,13 @@ accessed. The compressed memory pool grows on demand and shrinks as compressed pages are freed. The pool is not preallocated. By default, a zpool of type selected in ``CONFIG_ZSWAP_ZPOOL_DEFAULT`` Kconfig option is created, but it can be overridden at boot time by setting the ``zpool`` attribute, -e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs +e.g. ``zswap.zpool=zsmalloc``. It can also be changed at runtime using the sysfs ``zpool`` attribute, e.g.:: - echo zbud > /sys/module/zswap/parameters/zpool + echo zsmalloc > /sys/module/zswap/parameters/zpool -The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which -means the compression ratio will always be 2:1 or worse (because of half-full -zbud pages). The zsmalloc type zpool has a more complex compressed page -storage method, and it can achieve greater storage densities. +The zsmalloc type zpool has a complex compressed page storage method, and it +can achieve great storage densities. When a swap page is passed from swapout to zswap, zswap maintains a mapping of the swap entry, a combination of the swap type and swap offset, to the zpool diff --git a/MAINTAINERS b/MAINTAINERS index a65a73227e3d..60421d3e48a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26191,13 +26191,6 @@ F: Documentation/networking/device_drivers/hamradio/z8530drv.rst F: drivers/net/hamradio/*scc.c F: drivers/net/hamradio/z8530.h -ZBUD COMPRESSED PAGE ALLOCATOR -M: Seth Jennings -M: Dan Streetman -L: linux-mm@kvack.org -S: Maintained -F: mm/zbud.c - ZD1211RW WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 73c77500ac46..7ce5beb3cbf3 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -109,8 +109,7 @@ CONFIG_BINFMT_MISC=m CONFIG_ZPOOL=y CONFIG_ZSWAP=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y -CONFIG_ZBUD=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y # CONFIG_MHP_DEFAULT_ONLINE_TYPE_OFFLINE is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 80bdfbae6e5b..074df6328376 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -92,7 +92,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 449a0e996b96..ac68e663dd4d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -86,7 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set diff --git a/include/linux/zpool.h b/include/linux/zpool.h index a67d62b79698..5e6dc46b8cc4 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -4,9 +4,8 @@ * * Copyright (C) 2014 Dan Streetman * - * This is a common frontend for the zbud and zsmalloc memory - * storage pool implementations. Typically, this is used to - * store compressed memory. + * This is a common frontend for the zswap compressed memory storage + * implementations. */ #ifndef _ZPOOL_H_ diff --git a/mm/Kconfig b/mm/Kconfig index 6fa19022c09b..fba9757e5814 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -129,7 +129,6 @@ choice prompt "Default allocator" depends on ZSWAP default ZSWAP_ZPOOL_DEFAULT_ZSMALLOC if MMU - default ZSWAP_ZPOOL_DEFAULT_ZBUD help Selects the default allocator for the compressed cache for swap pages. @@ -140,12 +139,6 @@ choice The selection made here can be overridden by using the kernel command line 'zswap.zpool=' option. -config ZSWAP_ZPOOL_DEFAULT_ZBUD - bool "zbud" - select ZBUD - help - Use the zbud allocator as the default allocator. - config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC bool "zsmalloc" select ZSMALLOC @@ -156,20 +149,9 @@ endchoice config ZSWAP_ZPOOL_DEFAULT string depends on ZSWAP - default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC default "" -config ZBUD - tristate "2:1 compression allocator (zbud)" - depends on ZSWAP - help - A special purpose allocator for storing compressed pages. - It is designed to store up to two compressed pages per physical - page. While this design limits storage density, it has simple and - deterministic reclaim properties that make it preferable to a higher - density approach when reclaim will be used. - config ZSMALLOC tristate prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM) diff --git a/mm/Makefile b/mm/Makefile index e4c03da3c084..53392d2af3a5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -113,7 +113,6 @@ obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o -obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o obj-$(CONFIG_CMA) += cma.o diff --git a/mm/zbud.c b/mm/zbud.c deleted file mode 100644 index e9836fff9438..000000000000 --- a/mm/zbud.c +++ /dev/null @@ -1,455 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * zbud.c - * - * Copyright (C) 2013, Seth Jennings, IBM - * - * Concepts based on zcache internal zbud allocator by Dan Magenheimer. - * - * zbud is an special purpose allocator for storing compressed pages. Contrary - * to what its name may suggest, zbud is not a buddy allocator, but rather an - * allocator that "buddies" two compressed pages together in a single memory - * page. - * - * While this design limits storage density, it has simple and deterministic - * reclaim properties that make it preferable to a higher density approach when - * reclaim will be used. - * - * zbud works by storing compressed pages, or "zpages", together in pairs in a - * single memory page called a "zbud page". The first buddy is "left - * justified" at the beginning of the zbud page, and the last buddy is "right - * justified" at the end of the zbud page. The benefit is that if either - * buddy is freed, the freed buddy space, coalesced with whatever slack space - * that existed between the buddies, results in the largest possible free region - * within the zbud page. - * - * zbud also provides an attractive lower bound on density. The ratio of zpages - * to zbud pages can not be less than 1. This ensures that zbud can never "do - * harm" by using more pages to store zpages than the uncompressed zpages would - * have used on their own. - * - * zbud pages are divided into "chunks". The size of the chunks is fixed at - * compile time and determined by NCHUNKS_ORDER below. Dividing zbud pages - * into chunks allows organizing unbuddied zbud pages into a manageable number - * of unbuddied lists according to the number of free chunks available in the - * zbud page. - * - * The zbud API differs from that of conventional allocators in that the - * allocation function, zbud_alloc(), returns an opaque handle to the user, - * not a dereferenceable pointer. The user must map the handle using - * zbud_map() in order to get a usable pointer by which to access the - * allocation data and unmap the handle with zbud_unmap() when operations - * on the allocation data are complete. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include - -/***************** - * Structures -*****************/ -/* - * NCHUNKS_ORDER determines the internal allocation granularity, effectively - * adjusting internal fragmentation. It also determines the number of - * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the - * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk - * in allocated page is occupied by zbud header, NCHUNKS will be calculated to - * 63 which shows the max number of free chunks in zbud page, also there will be - * 63 freelists per pool. - */ -#define NCHUNKS_ORDER 6 - -#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) -#define CHUNK_SIZE (1 << CHUNK_SHIFT) -#define ZHDR_SIZE_ALIGNED CHUNK_SIZE -#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) - -struct zbud_pool; - -/** - * struct zbud_pool - stores metadata for each zbud pool - * @lock: protects all pool fields and first|last_chunk fields of any - * zbud page in the pool - * @unbuddied: array of lists tracking zbud pages that only contain one buddy; - * the lists each zbud page is added to depends on the size of - * its free region. - * @buddied: list tracking the zbud pages that contain two buddies; - * these zbud pages are full - * @pages_nr: number of zbud pages in the pool. - * - * This structure is allocated at pool creation time and maintains metadata - * pertaining to a particular zbud pool. - */ -struct zbud_pool { - spinlock_t lock; - union { - /* - * Reuse unbuddied[0] as buddied on the ground that - * unbuddied[0] is unused. - */ - struct list_head buddied; - struct list_head unbuddied[NCHUNKS]; - }; - u64 pages_nr; -}; - -/* - * struct zbud_header - zbud page metadata occupying the first chunk of each - * zbud page. - * @buddy: links the zbud page into the unbuddied/buddied lists in the pool - * @first_chunks: the size of the first buddy in chunks, 0 if free - * @last_chunks: the size of the last buddy in chunks, 0 if free - */ -struct zbud_header { - struct list_head buddy; - unsigned int first_chunks; - unsigned int last_chunks; -}; - -/***************** - * Helpers -*****************/ -/* Just to make the code easier to read */ -enum buddy { - FIRST, - LAST -}; - -/* Converts an allocation size in bytes to size in zbud chunks */ -static int size_to_chunks(size_t size) -{ - return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; -} - -#define for_each_unbuddied_list(_iter, _begin) \ - for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) - -/* Initializes the zbud header of a newly allocated zbud page */ -static struct zbud_header *init_zbud_page(struct page *page) -{ - struct zbud_header *zhdr = page_address(page); - zhdr->first_chunks = 0; - zhdr->last_chunks = 0; - INIT_LIST_HEAD(&zhdr->buddy); - return zhdr; -} - -/* Resets the struct page fields and frees the page */ -static void free_zbud_page(struct zbud_header *zhdr) -{ - __free_page(virt_to_page(zhdr)); -} - -/* - * Encodes the handle of a particular buddy within a zbud page - * Pool lock should be held as this function accesses first|last_chunks - */ -static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud) -{ - unsigned long handle; - - /* - * For now, the encoded handle is actually just the pointer to the data - * but this might not always be the case. A little information hiding. - * Add CHUNK_SIZE to the handle if it is the first allocation to jump - * over the zbud header in the first chunk. - */ - handle = (unsigned long)zhdr; - if (bud == FIRST) - /* skip over zbud header */ - handle += ZHDR_SIZE_ALIGNED; - else /* bud == LAST */ - handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); - return handle; -} - -/* Returns the zbud page where a given handle is stored */ -static struct zbud_header *handle_to_zbud_header(unsigned long handle) -{ - return (struct zbud_header *)(handle & PAGE_MASK); -} - -/* Returns the number of free chunks in a zbud page */ -static int num_free_chunks(struct zbud_header *zhdr) -{ - /* - * Rather than branch for different situations, just use the fact that - * free buddies have a length of zero to simplify everything. - */ - return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; -} - -/***************** - * API Functions -*****************/ -/** - * zbud_create_pool() - create a new zbud pool - * @gfp: gfp flags when allocating the zbud pool structure - * - * Return: pointer to the new zbud pool or NULL if the metadata allocation - * failed. - */ -static struct zbud_pool *zbud_create_pool(gfp_t gfp) -{ - struct zbud_pool *pool; - int i; - - pool = kzalloc(sizeof(struct zbud_pool), gfp); - if (!pool) - return NULL; - spin_lock_init(&pool->lock); - for_each_unbuddied_list(i, 0) - INIT_LIST_HEAD(&pool->unbuddied[i]); - INIT_LIST_HEAD(&pool->buddied); - pool->pages_nr = 0; - return pool; -} - -/** - * zbud_destroy_pool() - destroys an existing zbud pool - * @pool: the zbud pool to be destroyed - * - * The pool should be emptied before this function is called. - */ -static void zbud_destroy_pool(struct zbud_pool *pool) -{ - kfree(pool); -} - -/** - * zbud_alloc() - allocates a region of a given size - * @pool: zbud pool from which to allocate - * @size: size in bytes of the desired allocation - * @gfp: gfp flags used if the pool needs to grow - * @handle: handle of the new allocation - * - * This function will attempt to find a free region in the pool large enough to - * satisfy the allocation request. A search of the unbuddied lists is - * performed first. If no suitable free region is found, then a new page is - * allocated and added to the pool to satisfy the request. - * - * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used - * as zbud pool pages. - * - * Return: 0 if success and handle is set, otherwise -EINVAL if the size or - * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate - * a new page. - */ -static int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, - unsigned long *handle) -{ - int chunks, i, freechunks; - struct zbud_header *zhdr = NULL; - enum buddy bud; - struct page *page; - - if (!size || (gfp & __GFP_HIGHMEM)) - return -EINVAL; - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) - return -ENOSPC; - chunks = size_to_chunks(size); - spin_lock(&pool->lock); - - /* First, try to find an unbuddied zbud page. */ - for_each_unbuddied_list(i, chunks) { - if (!list_empty(&pool->unbuddied[i])) { - zhdr = list_first_entry(&pool->unbuddied[i], - struct zbud_header, buddy); - list_del(&zhdr->buddy); - if (zhdr->first_chunks == 0) - bud = FIRST; - else - bud = LAST; - goto found; - } - } - - /* Couldn't find unbuddied zbud page, create new one */ - spin_unlock(&pool->lock); - page = alloc_page(gfp); - if (!page) - return -ENOMEM; - spin_lock(&pool->lock); - pool->pages_nr++; - zhdr = init_zbud_page(page); - bud = FIRST; - -found: - if (bud == FIRST) - zhdr->first_chunks = chunks; - else - zhdr->last_chunks = chunks; - - if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { - /* Add to unbuddied list */ - freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); - } else { - /* Add to buddied list */ - list_add(&zhdr->buddy, &pool->buddied); - } - - *handle = encode_handle(zhdr, bud); - spin_unlock(&pool->lock); - - return 0; -} - -/** - * zbud_free() - frees the allocation associated with the given handle - * @pool: pool in which the allocation resided - * @handle: handle associated with the allocation returned by zbud_alloc() - */ -static void zbud_free(struct zbud_pool *pool, unsigned long handle) -{ - struct zbud_header *zhdr; - int freechunks; - - spin_lock(&pool->lock); - zhdr = handle_to_zbud_header(handle); - - /* If first buddy, handle will be page aligned */ - if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) - zhdr->last_chunks = 0; - else - zhdr->first_chunks = 0; - - /* Remove from existing buddy list */ - list_del(&zhdr->buddy); - - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { - /* zbud page is empty, free */ - free_zbud_page(zhdr); - pool->pages_nr--; - } else { - /* Add to unbuddied list */ - freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); - } - - spin_unlock(&pool->lock); -} - -/** - * zbud_map() - maps the allocation associated with the given handle - * @pool: pool in which the allocation resides - * @handle: handle associated with the allocation to be mapped - * - * While trivial for zbud, the mapping functions for others allocators - * implementing this allocation API could have more complex information encoded - * in the handle and could create temporary mappings to make the data - * accessible to the user. - * - * Returns: a pointer to the mapped allocation - */ -static void *zbud_map(struct zbud_pool *pool, unsigned long handle) -{ - return (void *)(handle); -} - -/** - * zbud_unmap() - maps the allocation associated with the given handle - * @pool: pool in which the allocation resides - * @handle: handle associated with the allocation to be unmapped - */ -static void zbud_unmap(struct zbud_pool *pool, unsigned long handle) -{ -} - -/** - * zbud_get_pool_pages() - gets the zbud pool size in pages - * @pool: pool whose size is being queried - * - * Returns: size in pages of the given pool. The pool lock need not be - * taken to access pages_nr. - */ -static u64 zbud_get_pool_pages(struct zbud_pool *pool) -{ - return pool->pages_nr; -} - -/***************** - * zpool - ****************/ - -static void *zbud_zpool_create(const char *name, gfp_t gfp) -{ - return zbud_create_pool(gfp); -} - -static void zbud_zpool_destroy(void *pool) -{ - zbud_destroy_pool(pool); -} - -static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp, - unsigned long *handle) -{ - return zbud_alloc(pool, size, gfp, handle); -} -static void zbud_zpool_free(void *pool, unsigned long handle) -{ - zbud_free(pool, handle); -} - -static void *zbud_zpool_map(void *pool, unsigned long handle, - enum zpool_mapmode mm) -{ - return zbud_map(pool, handle); -} -static void zbud_zpool_unmap(void *pool, unsigned long handle) -{ - zbud_unmap(pool, handle); -} - -static u64 zbud_zpool_total_pages(void *pool) -{ - return zbud_get_pool_pages(pool); -} - -static struct zpool_driver zbud_zpool_driver = { - .type = "zbud", - .sleep_mapped = true, - .owner = THIS_MODULE, - .create = zbud_zpool_create, - .destroy = zbud_zpool_destroy, - .malloc = zbud_zpool_malloc, - .free = zbud_zpool_free, - .map = zbud_zpool_map, - .unmap = zbud_zpool_unmap, - .total_pages = zbud_zpool_total_pages, -}; - -MODULE_ALIAS("zpool-zbud"); - -static int __init init_zbud(void) -{ - /* Make sure the zbud header will fit in one chunk */ - BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED); - pr_info("loaded\n"); - - zpool_register_driver(&zbud_zpool_driver); - - return 0; -} - -static void __exit exit_zbud(void) -{ - zpool_unregister_driver(&zbud_zpool_driver); - pr_info("unloaded\n"); -} - -module_init(init_zbud); -module_exit(exit_zbud); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Seth Jennings "); -MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages"); diff --git a/mm/zpool.c b/mm/zpool.c index b9fda1fa857d..4bbd12d4b659 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -95,7 +95,7 @@ static void zpool_put_driver(struct zpool_driver *driver) /** * zpool_has_pool() - Check if the pool driver is available - * @type: The type of the zpool to check (e.g. zbud, zsmalloc) + * @type: The type of the zpool to check (e.g. zsmalloc) * * This checks if the @type pool driver is available. This will try to load * the requested module, if needed, but there is no guarantee the module will @@ -130,7 +130,7 @@ EXPORT_SYMBOL(zpool_has_pool); /** * zpool_create_pool() - Create a new zpool - * @type: The type of the zpool to create (e.g. zbud, zsmalloc) + * @type: The type of the zpool to create (e.g. zsmalloc) * @name: The name of the zpool (e.g. zram0, zswap) * @gfp: The GFP flags to use when allocating the pool. * From 3a75ccba047b11a4e8c437e8347b2d1746f1d808 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 31 Jan 2025 12:31:49 +0000 Subject: [PATCH 0855/2157] mm: simplify vma merge structure and expand comments Patch series "mm: further simplify VMA merge operation", v3. While significant efforts have been made to improve the VMA merge operation, there remains remnants of the bad (or rather confusing) old days, which make the code difficult to understand, more bug prone and thus harder to modify. This series attempts to significantly improve matters in a number of respects - with a focus on simplifying the commit_merge() function which actually actions the merge operation - and importantly, adjusting the two most confusing merge cases - those in which we 'adjust' the VMA immediately adjacent to the one being merged. One source of confusion are the VMAs being threaded through the operation themselves - vmg->prev, vmg->vma and vmg->next. At the start of the operation, vmg->vma is either NULL if a new VMA is propose to be added, or if not then a pointer to an existing VMA being modified, and prev/next are (perhaps not present) VMAs sat immediately before and after the range specified in vmg->start, end, respectively. However, during the VMA merge operation, we change vmg->start, end and pgoff to span the newly merged range and vmg->vma to either be: a. The ultimately returned VMA (in most cases) or b. A VMA which we will manipulate, but ultimately instead return vmg->next. Case b. especially here is confusing for somebody reading this code, but the fact we update this state, along with vmg->start, end, pgoff only makes matters worse. We simplify things by replacing vmg->vma with vmg->middle and never changing it - this is always either NULL (for a new VMA) or the VMA being modified between vmg->prev and vmg->next. We further simplify by placing the merged VMA in a new vmg->target field - whether case b. above is the case or not. The reader of the code can now simply rely on vmg->middle being the middle VMA and vmg->target being the ultimately merged VMA. We additionally tackle the confusing cases where we 'adjust' VMAs other than the one we ultimately return as the merged VMA (this includes case b. above). These are: (1) merge <-----------> |------||--------| |------------|---| | prev || middle | -> | target | m | |------||--------| |------------|---| In which case middle must be adjusted so middle->vm_start is increased as well as performing the merge. (2) (equivalent to case b. above) <-------------> |---------||------| |---|-------------| | middle || next | -> | m | target | |---------||------| |---|-------------| In which case next must be adjusted so next->vm_start is decreased as well as performing the merge. This cases have previously been performed by calculating and passing around a dubious and confusing 'adj_start' parameter along side a pointer to an 'adjust' VMA indicating which VMA requires additional adjustment (middle in case 1 and next in case 2). With the VMG structure in place we are able to avoid this by simply setting a merge flag to describe each case: (1) Sets the vmg->__adjust_middle_start flag (2) Sets the vmg->__adjust_next_start flag By doing so it turns out we can vastly simplify the logic and calculate what is required to perform the operation. Taken together the refactorings make it far easier to understand what is being done even in these more confusing cases, make the code far more maintainable, debuggable, and testable, providing more internal state indicating what is happening in the merge operation. The changes have no functional net impact on the merge operation and everything should still behave as it did before. This patch (of 5): The merge code, while much improved, still has a number of points of confusion. As part of a broader series cleaning this up to make this more maintainable, we start by addressing some confusion around vma_merge_struct fields. So far, the caller either provides no vmg->vma (a new VMA) or supplies the existing VMA which is being altered, setting vmg->start,end,pgoff to the proposed VMA dimensions. vmg->vma is then updated, as are vmg->start,end,pgoff as the merge process proceeds and the appropriate merge strategy is determined. This is rather confusing, as vmg->vma starts off as the 'middle' VMA between vmg->prev,next, but becomes the 'target' VMA, except in one specific edge case (merge next, shrink middle). Int his patch we introduce vmg->middle to describe the VMA that is between vmg->prev and vmg->next, and does NOT change during the merge operation. We replace vmg->vma with vmg->target, and use this only during the merge operation itself. Aside from the merge right, shrink middle case, this becomes the VMA that forms the basis of the VMA that is returned. This edge case can be addressed in a future commit. We also add a number of comments to explain what is going on. Finally, we adjust the ASCII diagrams showing each merge case in vma_merge_existing_range() to be clearer - the arrow range previously showed the vmg->start, end spanned area, but it is clearer to change this to show the final merged VMA. This patch has no change in functional behaviour. Link: https://lkml.kernel.org/r/cover.1738326519.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/4dfe60f1419d55e5d0516f56349695d73a57184c.1738326519.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/debug.c | 18 ++--- mm/mmap.c | 2 +- mm/vma.c | 166 +++++++++++++++++++++------------------- mm/vma.h | 42 ++++++++-- tools/testing/vma/vma.c | 52 ++++++------- 5 files changed, 159 insertions(+), 121 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 8d2acf432385..c9e07651677b 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -261,7 +261,7 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) pr_warn("vmg %px state: mm %px pgoff %lx\n" "vmi %px [%lx,%lx)\n" - "prev %px next %px vma %px\n" + "prev %px middle %px next %px target %px\n" "start %lx end %lx flags %lx\n" "file %px anon_vma %px policy %px\n" "uffd_ctx %px\n" @@ -270,7 +270,7 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) vmg, vmg->mm, vmg->pgoff, vmg->vmi, vmg->vmi ? vma_iter_addr(vmg->vmi) : 0, vmg->vmi ? vma_iter_end(vmg->vmi) : 0, - vmg->prev, vmg->next, vmg->vma, + vmg->prev, vmg->middle, vmg->next, vmg->target, vmg->start, vmg->end, vmg->flags, vmg->file, vmg->anon_vma, vmg->policy, #ifdef CONFIG_USERFAULTFD @@ -288,13 +288,6 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) pr_warn("vmg %px mm: (NULL)\n", vmg); } - if (vmg->vma) { - pr_warn("vmg %px vma:\n", vmg); - dump_vma(vmg->vma); - } else { - pr_warn("vmg %px vma: (NULL)\n", vmg); - } - if (vmg->prev) { pr_warn("vmg %px prev:\n", vmg); dump_vma(vmg->prev); @@ -302,6 +295,13 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) pr_warn("vmg %px prev: (NULL)\n", vmg); } + if (vmg->middle) { + pr_warn("vmg %px middle:\n", vmg); + dump_vma(vmg->middle); + } else { + pr_warn("vmg %px middle: (NULL)\n", vmg); + } + if (vmg->next) { pr_warn("vmg %px next:\n", vmg); dump_vma(vmg->next); diff --git a/mm/mmap.c b/mm/mmap.c index cda01071c7b1..6401a1d73f4a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1707,7 +1707,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) /* * cover the whole range: [new_start, old_end) */ - vmg.vma = vma; + vmg.middle = vma; if (vma_expand(&vmg)) return -ENOMEM; diff --git a/mm/vma.c b/mm/vma.c index 71ca012c616c..7fca21dee7b3 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -52,7 +52,7 @@ struct mmap_state { .pgoff = (map_)->pgoff, \ .file = (map_)->file, \ .prev = (map_)->prev, \ - .vma = vma_, \ + .middle = vma_, \ .next = (vma_) ? NULL : (map_)->next, \ .state = VMA_MERGE_START, \ .merge_flags = VMG_FLAG_DEFAULT, \ @@ -639,7 +639,7 @@ static int commit_merge(struct vma_merge_struct *vmg, { struct vma_prepare vp; - init_multi_vma_prep(&vp, vmg->vma, adjust, remove, remove2); + init_multi_vma_prep(&vp, vmg->target, adjust, remove, remove2); VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && vp.anon_vma != adjust->anon_vma); @@ -652,15 +652,15 @@ static int commit_merge(struct vma_merge_struct *vmg, adjust->vm_end); } - if (vma_iter_prealloc(vmg->vmi, vmg->vma)) + if (vma_iter_prealloc(vmg->vmi, vmg->target)) return -ENOMEM; vma_prepare(&vp); - vma_adjust_trans_huge(vmg->vma, vmg->start, vmg->end, adj_start); - vma_set_range(vmg->vma, vmg->start, vmg->end, vmg->pgoff); + vma_adjust_trans_huge(vmg->target, vmg->start, vmg->end, adj_start); + vma_set_range(vmg->target, vmg->start, vmg->end, vmg->pgoff); if (expanded) - vma_iter_store(vmg->vmi, vmg->vma); + vma_iter_store(vmg->vmi, vmg->target); if (adj_start) { adjust->vm_start += adj_start; @@ -671,7 +671,7 @@ static int commit_merge(struct vma_merge_struct *vmg, } } - vma_complete(&vp, vmg->vmi, vmg->vma->vm_mm); + vma_complete(&vp, vmg->vmi, vmg->target->vm_mm); return 0; } @@ -694,8 +694,9 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma) * identical properties. * * This function checks for the existence of any such mergeable VMAs and updates - * the maple tree describing the @vmg->vma->vm_mm address space to account for - * this, as well as any VMAs shrunk/expanded/deleted as a result of this merge. + * the maple tree describing the @vmg->middle->vm_mm address space to account + * for this, as well as any VMAs shrunk/expanded/deleted as a result of this + * merge. * * As part of this operation, if a merge occurs, the @vmg object will have its * vma, start, end, and pgoff fields modified to execute the merge. Subsequent @@ -704,45 +705,47 @@ static bool can_merge_remove_vma(struct vm_area_struct *vma) * Returns: The merged VMA if merge succeeds, or NULL otherwise. * * ASSUMPTIONS: - * - The caller must assign the VMA to be modifed to @vmg->vma. + * - The caller must assign the VMA to be modifed to @vmg->middle. * - The caller must have set @vmg->prev to the previous VMA, if there is one. * - The caller must not set @vmg->next, as we determine this. * - The caller must hold a WRITE lock on the mm_struct->mmap_lock. - * - vmi must be positioned within [@vmg->vma->vm_start, @vmg->vma->vm_end). + * - vmi must be positioned within [@vmg->middle->vm_start, @vmg->middle->vm_end). */ static __must_check struct vm_area_struct *vma_merge_existing_range( struct vma_merge_struct *vmg) { - struct vm_area_struct *vma = vmg->vma; + struct vm_area_struct *middle = vmg->middle; struct vm_area_struct *prev = vmg->prev; struct vm_area_struct *next, *res; struct vm_area_struct *anon_dup = NULL; struct vm_area_struct *adjust = NULL; unsigned long start = vmg->start; unsigned long end = vmg->end; - bool left_side = vma && start == vma->vm_start; - bool right_side = vma && end == vma->vm_end; + bool left_side = middle && start == middle->vm_start; + bool right_side = middle && end == middle->vm_end; int err = 0; long adj_start = 0; - bool merge_will_delete_vma, merge_will_delete_next; + bool merge_will_delete_middle, merge_will_delete_next; bool merge_left, merge_right, merge_both; bool expanded; mmap_assert_write_locked(vmg->mm); - VM_WARN_ON_VMG(!vma, vmg); /* We are modifying a VMA, so caller must specify. */ + VM_WARN_ON_VMG(!middle, vmg); /* We are modifying a VMA, so caller must specify. */ VM_WARN_ON_VMG(vmg->next, vmg); /* We set this. */ VM_WARN_ON_VMG(prev && start <= prev->vm_start, vmg); VM_WARN_ON_VMG(start >= end, vmg); /* - * If vma == prev, then we are offset into a VMA. Otherwise, if we are + * If middle == prev, then we are offset into a VMA. Otherwise, if we are * not, we must span a portion of the VMA. */ - VM_WARN_ON_VMG(vma && ((vma != prev && vmg->start != vma->vm_start) || - vmg->end > vma->vm_end), vmg); - /* The vmi must be positioned within vmg->vma. */ - VM_WARN_ON_VMG(vma && !(vma_iter_addr(vmg->vmi) >= vma->vm_start && - vma_iter_addr(vmg->vmi) < vma->vm_end), vmg); + VM_WARN_ON_VMG(middle && + ((middle != prev && vmg->start != middle->vm_start) || + vmg->end > middle->vm_end), vmg); + /* The vmi must be positioned within vmg->middle. */ + VM_WARN_ON_VMG(middle && + !(vma_iter_addr(vmg->vmi) >= middle->vm_start && + vma_iter_addr(vmg->vmi) < middle->vm_end), vmg); vmg->state = VMA_MERGE_NOMERGE; @@ -776,13 +779,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( merge_both = merge_left && merge_right; /* If we span the entire VMA, a merge implies it will be deleted. */ - merge_will_delete_vma = left_side && right_side; + merge_will_delete_middle = left_side && right_side; /* - * If we need to remove vma in its entirety but are unable to do so, + * If we need to remove middle in its entirety but are unable to do so, * we have no sensible recourse but to abort the merge. */ - if (merge_will_delete_vma && !can_merge_remove_vma(vma)) + if (merge_will_delete_middle && !can_merge_remove_vma(middle)) return NULL; /* @@ -793,7 +796,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( /* * If we cannot delete next, then we can reduce the operation to merging - * prev and vma (thereby deleting vma). + * prev and middle (thereby deleting middle). */ if (merge_will_delete_next && !can_merge_remove_vma(next)) { merge_will_delete_next = false; @@ -801,8 +804,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( merge_both = false; } - /* No matter what happens, we will be adjusting vma. */ - vma_start_write(vma); + /* No matter what happens, we will be adjusting middle. */ + vma_start_write(middle); if (merge_left) vma_start_write(prev); @@ -812,13 +815,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (merge_both) { /* - * |<----->| - * |-------*********-------| - * prev vma next - * extend delete delete + * |<-------------------->| + * |-------********-------| + * prev middle next + * extend delete delete */ - vmg->vma = prev; + vmg->target = prev; vmg->start = prev->vm_start; vmg->end = next->vm_end; vmg->pgoff = prev->vm_pgoff; @@ -826,78 +829,79 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( /* * We already ensured anon_vma compatibility above, so now it's * simply a case of, if prev has no anon_vma object, which of - * next or vma contains the anon_vma we must duplicate. + * next or middle contains the anon_vma we must duplicate. */ - err = dup_anon_vma(prev, next->anon_vma ? next : vma, &anon_dup); + err = dup_anon_vma(prev, next->anon_vma ? next : middle, + &anon_dup); } else if (merge_left) { /* - * |<----->| OR - * |<--------->| + * |<------------>| OR + * |<----------------->| * |-------************* - * prev vma + * prev middle * extend shrink/delete */ - vmg->vma = prev; + vmg->target = prev; vmg->start = prev->vm_start; vmg->pgoff = prev->vm_pgoff; - if (!merge_will_delete_vma) { - adjust = vma; - adj_start = vmg->end - vma->vm_start; + if (!merge_will_delete_middle) { + adjust = middle; + adj_start = vmg->end - middle->vm_start; } - err = dup_anon_vma(prev, vma, &anon_dup); + err = dup_anon_vma(prev, middle, &anon_dup); } else { /* merge_right */ /* - * |<----->| OR - * |<--------->| + * |<------------->| OR + * |<----------------->| * *************-------| - * vma next + * middle next * shrink/delete extend */ pgoff_t pglen = PHYS_PFN(vmg->end - vmg->start); VM_WARN_ON_VMG(!merge_right, vmg); - /* If we are offset into a VMA, then prev must be vma. */ - VM_WARN_ON_VMG(vmg->start > vma->vm_start && prev && vma != prev, vmg); + /* If we are offset into a VMA, then prev must be middle. */ + VM_WARN_ON_VMG(vmg->start > middle->vm_start && prev && middle != prev, vmg); - if (merge_will_delete_vma) { - vmg->vma = next; + if (merge_will_delete_middle) { + vmg->target = next; vmg->end = next->vm_end; vmg->pgoff = next->vm_pgoff - pglen; } else { /* - * We shrink vma and expand next. + * We shrink middle and expand next. * * IMPORTANT: This is the ONLY case where the final - * merged VMA is NOT vmg->vma, but rather vmg->next. + * merged VMA is NOT vmg->target, but rather vmg->next. */ - - vmg->start = vma->vm_start; + vmg->target = middle; + vmg->start = middle->vm_start; vmg->end = start; - vmg->pgoff = vma->vm_pgoff; + vmg->pgoff = middle->vm_pgoff; adjust = next; - adj_start = -(vma->vm_end - start); + adj_start = -(middle->vm_end - start); } - err = dup_anon_vma(next, vma, &anon_dup); + err = dup_anon_vma(next, middle, &anon_dup); } if (err) goto abort; /* - * In nearly all cases, we expand vmg->vma. There is one exception - + * In nearly all cases, we expand vmg->middle. There is one exception - * merge_right where we partially span the VMA. In this case we shrink - * the end of vmg->vma and adjust the start of vmg->next accordingly. + * the end of vmg->middle and adjust the start of vmg->next accordingly. */ - expanded = !merge_right || merge_will_delete_vma; + expanded = !merge_right || merge_will_delete_middle; if (commit_merge(vmg, adjust, - merge_will_delete_vma ? vma : NULL, + merge_will_delete_middle ? middle : NULL, merge_will_delete_next ? next : NULL, adj_start, expanded)) { if (anon_dup) @@ -973,7 +977,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) bool just_expand = vmg->merge_flags & VMG_FLAG_JUST_EXPAND; mmap_assert_write_locked(vmg->mm); - VM_WARN_ON_VMG(vmg->vma, vmg); + VM_WARN_ON_VMG(vmg->middle, vmg); /* vmi must point at or before the gap. */ VM_WARN_ON_VMG(vma_iter_addr(vmg->vmi) > end, vmg); @@ -989,13 +993,13 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) /* If we can merge with the next VMA, adjust vmg accordingly. */ if (can_merge_right) { vmg->end = next->vm_end; - vmg->vma = next; + vmg->middle = next; } /* If we can merge with the previous VMA, adjust vmg accordingly. */ if (can_merge_left) { vmg->start = prev->vm_start; - vmg->vma = prev; + vmg->middle = prev; vmg->pgoff = prev->vm_pgoff; /* @@ -1017,10 +1021,10 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) * Now try to expand adjacent VMA(s). This takes care of removing the * following VMA if we have VMAs on both sides. */ - if (vmg->vma && !vma_expand(vmg)) { - khugepaged_enter_vma(vmg->vma, vmg->flags); + if (vmg->middle && !vma_expand(vmg)) { + khugepaged_enter_vma(vmg->middle, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; - return vmg->vma; + return vmg->middle; } return NULL; @@ -1032,44 +1036,46 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) * @vmg: Describes a VMA expansion operation. * * Expand @vma to vmg->start and vmg->end. Can expand off the start and end. - * Will expand over vmg->next if it's different from vmg->vma and vmg->end == - * vmg->next->vm_end. Checking if the vmg->vma can expand and merge with + * Will expand over vmg->next if it's different from vmg->middle and vmg->end == + * vmg->next->vm_end. Checking if the vmg->middle can expand and merge with * vmg->next needs to be handled by the caller. * * Returns: 0 on success. * * ASSUMPTIONS: - * - The caller must hold a WRITE lock on vmg->vma->mm->mmap_lock. - * - The caller must have set @vmg->vma and @vmg->next. + * - The caller must hold a WRITE lock on vmg->middle->mm->mmap_lock. + * - The caller must have set @vmg->middle and @vmg->next. */ int vma_expand(struct vma_merge_struct *vmg) { struct vm_area_struct *anon_dup = NULL; bool remove_next = false; - struct vm_area_struct *vma = vmg->vma; + struct vm_area_struct *middle = vmg->middle; struct vm_area_struct *next = vmg->next; mmap_assert_write_locked(vmg->mm); - vma_start_write(vma); - if (next && (vma != next) && (vmg->end == next->vm_end)) { + vma_start_write(middle); + if (next && (middle != next) && (vmg->end == next->vm_end)) { int ret; remove_next = true; /* This should already have been checked by this point. */ VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); vma_start_write(next); - ret = dup_anon_vma(vma, next, &anon_dup); + ret = dup_anon_vma(middle, next, &anon_dup); if (ret) return ret; } /* Not merging but overwriting any part of next is not handled. */ VM_WARN_ON_VMG(next && !remove_next && - next != vma && vmg->end > next->vm_start, vmg); + next != middle && vmg->end > next->vm_start, vmg); /* Only handles expanding */ - VM_WARN_ON_VMG(vma->vm_start < vmg->start || vma->vm_end > vmg->end, vmg); + VM_WARN_ON_VMG(middle->vm_start < vmg->start || + middle->vm_end > vmg->end, vmg); + vmg->target = middle; if (commit_merge(vmg, NULL, remove_next ? next : NULL, NULL, 0, true)) goto nomem; @@ -1508,7 +1514,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, */ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) { - struct vm_area_struct *vma = vmg->vma; + struct vm_area_struct *vma = vmg->middle; unsigned long start = vmg->start; unsigned long end = vmg->end; struct vm_area_struct *merged; @@ -1609,7 +1615,7 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta); vmg.next = vma_iter_next_rewind(vmi, NULL); - vmg.vma = NULL; /* We use the VMA to populate VMG fields only. */ + vmg.middle = NULL; /* We use the VMA to populate VMG fields only. */ return vma_merge_new_range(&vmg); } @@ -1730,7 +1736,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */ - vmg.vma = NULL; /* New VMA range. */ + vmg.middle = NULL; /* New VMA range. */ vmg.pgoff = pgoff; vmg.next = vma_iter_next_rewind(&vmi, NULL); new_vma = vma_merge_new_range(&vmg); diff --git a/mm/vma.h b/mm/vma.h index a2e8710b8c47..5b5dd07e478c 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -69,16 +69,48 @@ enum vma_merge_flags { VMG_FLAG_JUST_EXPAND = 1 << 0, }; -/* Represents a VMA merge operation. */ +/* + * Describes a VMA merge operation and is threaded throughout it. + * + * Any of the fields may be mutated by the merge operation, so no guarantees are + * made to the contents of this structure after a merge operation has completed. + */ struct vma_merge_struct { struct mm_struct *mm; struct vma_iterator *vmi; - pgoff_t pgoff; + /* + * Adjacent VMAs, any of which may be NULL if not present: + * + * |------|--------|------| + * | prev | middle | next | + * |------|--------|------| + * + * middle may not yet exist in the case of a proposed new VMA being + * merged, or it may be an existing VMA. + * + * next may be assigned by the caller. + */ struct vm_area_struct *prev; - struct vm_area_struct *next; /* Modified by vma_merge(). */ - struct vm_area_struct *vma; /* Either a new VMA or the one being modified. */ + struct vm_area_struct *middle; + struct vm_area_struct *next; + /* + * This is the VMA we ultimately target to become the merged VMA, except + * for the one exception of merge right, shrink next (for details of + * this scenario see vma_merge_existing_range()). + */ + struct vm_area_struct *target; + /* + * Initially, the start, end, pgoff fields are provided by the caller + * and describe the proposed new VMA range, whether modifying an + * existing VMA (which will be 'middle'), or adding a new one. + * + * During the merge process these fields are updated to describe the new + * range _including those VMAs which will be merged_. + */ unsigned long start; unsigned long end; + pgoff_t pgoff; + unsigned long flags; struct file *file; struct anon_vma *anon_vma; @@ -118,8 +150,8 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, .mm = vma_->vm_mm, \ .vmi = vmi_, \ .prev = prev_, \ + .middle = vma_, \ .next = NULL, \ - .vma = vma_, \ .start = start_, \ .end = end_, \ .flags = vma_->vm_flags, \ diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 04ab45e27fb8..3c0572120e94 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -147,8 +147,8 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, vma_iter_set(vmg->vmi, start); vmg->prev = NULL; + vmg->middle = NULL; vmg->next = NULL; - vmg->vma = NULL; vmg->start = start; vmg->end = end; @@ -338,7 +338,7 @@ static bool test_simple_expand(void) VMA_ITERATOR(vmi, &mm, 0); struct vma_merge_struct vmg = { .vmi = &vmi, - .vma = vma, + .middle = vma, .start = 0, .end = 0x3000, .pgoff = 0, @@ -631,7 +631,7 @@ static bool test_vma_merge_special_flags(void) */ vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags); ASSERT_NE(vma, NULL); - vmg.vma = vma; + vmg.middle = vma; for (i = 0; i < ARRAY_SIZE(special_flags); i++) { vm_flags_t special_flag = special_flags[i]; @@ -760,7 +760,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; /* * The VMA being modified in a way that would otherwise merge should @@ -787,7 +787,7 @@ static bool test_vma_merge_with_close(void) vma->vm_ops = &vm_ops; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); /* * Initially this is misapprehended as an out of memory report, as the @@ -817,7 +817,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); @@ -843,7 +843,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -940,7 +940,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); - vmg.vma = vma; + vmg.middle = vma; vmg.prev = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_next); @@ -973,7 +973,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags); - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1003,7 +1003,7 @@ static bool test_merge_existing(void) vma->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); @@ -1037,7 +1037,7 @@ static bool test_merge_existing(void) vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1067,7 +1067,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1102,37 +1102,37 @@ static bool test_merge_existing(void) vmg_set_range(&vmg, 0x4000, 0x5000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); @@ -1197,7 +1197,7 @@ static bool test_anon_vma_non_mergeable(void) vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1277,7 +1277,7 @@ static bool test_dup_anon_vma(void) vma_next->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0, 0x5000, 0, flags); - vmg.vma = vma_prev; + vmg.middle = vma_prev; vmg.next = vma_next; ASSERT_EQ(expand_existing(&vmg), 0); @@ -1309,7 +1309,7 @@ static bool test_dup_anon_vma(void) vma_next->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1338,7 +1338,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1366,7 +1366,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1394,7 +1394,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1432,7 +1432,7 @@ static bool test_vmi_prealloc_fail(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; fail_prealloc = true; @@ -1458,7 +1458,7 @@ static bool test_vmi_prealloc_fail(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0, 0x5000, 3, flags); - vmg.vma = vma_prev; + vmg.middle = vma_prev; vmg.next = vma; fail_prealloc = true; From 6ab2d9c7c680c0a041477126722cebe7dc57f713 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 31 Jan 2025 12:31:50 +0000 Subject: [PATCH 0856/2157] mm: further refactor commit_merge() The current VMA merge mechanism contains a number of confusing mechanisms around removal of VMAs on merge and the shrinking of the VMA adjacent to vma->target in the case of merges which result in a partial merge with that adjacent VMA. Since we now have a STABLE set of VMAs - prev, middle, next - we are now able to have the caller of commit_merge() explicitly tell us which VMAs need deleting, using newly introduced internal VMA merge flags. Doing so allows us to embed this state within the VMG and remove the confusing remove, remove2 parameters from commit_merge(). We additionally are able to eliminate the highly confusing and misleading 'expanded' parameter - a parameter that in reality refers to whether or not the return VMA is the target one or the one immediately adjacent. We can infer which is the case from whether or not the adj_start parameter is negative. This also allows us to simplify further logic around iterator configuration and VMA iterator stores. Doing so means we can also eliminate the adjust parameter, as we are able to infer which VMA ought to be adjusted from adj_start - a positive value implies we adjust the start of 'middle', a negative one implies we adjust the start of 'next'. We are then able to have commit_merge() explicitly return the target VMA, or NULL on inability to pre-allocate memory. Errors were previously filtered so behaviour does not change. We additionally move from the slightly odd use of a bitwise-flag enum vmg->merge_flags field to vmg bitfields. This patch has no change in functional behaviour. Link: https://lkml.kernel.org/r/7bf2ed24af68aac18672b7acebbd9102f48c5b03.1738326519.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/debug.c | 6 ++- mm/vma.c | 101 +++++++++++++++++++++------------------- mm/vma.h | 38 +++++++++------ tools/testing/vma/vma.c | 9 +++- 4 files changed, 87 insertions(+), 67 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index c9e07651677b..60c6f1134383 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -266,7 +266,8 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) "file %px anon_vma %px policy %px\n" "uffd_ctx %px\n" "anon_name %px\n" - "merge_flags %x state %x\n", + "state %x\n" + "just_expand %d __remove_middle %d __remove_next %d\n", vmg, vmg->mm, vmg->pgoff, vmg->vmi, vmg->vmi ? vma_iter_addr(vmg->vmi) : 0, vmg->vmi ? vma_iter_end(vmg->vmi) : 0, @@ -279,7 +280,8 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) (void *)0, #endif vmg->anon_name, - (int)vmg->merge_flags, (int)vmg->state); + (int)vmg->state, + vmg->just_expand, vmg->__remove_middle, vmg->__remove_next); if (vmg->mm) { pr_warn("vmg %px mm:\n", vmg); diff --git a/mm/vma.c b/mm/vma.c index 7fca21dee7b3..dd9fdf5c9429 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -55,7 +55,6 @@ struct mmap_state { .middle = vma_, \ .next = (vma_) ? NULL : (map_)->next, \ .state = VMA_MERGE_START, \ - .merge_flags = VMG_FLAG_DEFAULT, \ } static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next) @@ -120,8 +119,8 @@ static void init_multi_vma_prep(struct vma_prepare *vp, memset(vp, 0, sizeof(struct vma_prepare)); vp->vma = vma; vp->anon_vma = vma->anon_vma; - vp->remove = remove; - vp->remove2 = remove2; + vp->remove = remove ? remove : remove2; + vp->remove2 = remove ? remove2 : NULL; vp->adj_next = next; if (!vp->anon_vma && next) vp->anon_vma = next->anon_vma; @@ -129,7 +128,6 @@ static void init_multi_vma_prep(struct vma_prepare *vp, vp->file = vma->vm_file; if (vp->file) vp->mapping = vma->vm_file->f_mapping; - } /* @@ -629,22 +627,40 @@ void validate_mm(struct mm_struct *mm) } #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */ -/* Actually perform the VMA merge operation. */ -static int commit_merge(struct vma_merge_struct *vmg, - struct vm_area_struct *adjust, - struct vm_area_struct *remove, - struct vm_area_struct *remove2, - long adj_start, - bool expanded) +/* + * Actually perform the VMA merge operation. + * + * On success, returns the merged VMA. Otherwise returns NULL. + */ +static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg, + long adj_start) { struct vma_prepare vp; + struct vm_area_struct *remove = NULL; + struct vm_area_struct *remove2 = NULL; + struct vm_area_struct *adjust = NULL; + /* + * In all cases but that of merge right, shrink next, we write + * vmg->target to the maple tree and return this as the merged VMA. + */ + bool merge_target = adj_start >= 0; + + if (vmg->__remove_middle) + remove = vmg->middle; + if (vmg->__remove_next) + remove2 = vmg->next; + + if (adj_start > 0) + adjust = vmg->middle; + else if (adj_start < 0) + adjust = vmg->next; init_multi_vma_prep(&vp, vmg->target, adjust, remove, remove2); VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && vp.anon_vma != adjust->anon_vma); - if (expanded) { + if (merge_target) { /* Note: vma iterator must be pointing to 'start'. */ vma_iter_config(vmg->vmi, vmg->start, vmg->end); } else { @@ -653,27 +669,26 @@ static int commit_merge(struct vma_merge_struct *vmg, } if (vma_iter_prealloc(vmg->vmi, vmg->target)) - return -ENOMEM; + return NULL; vma_prepare(&vp); vma_adjust_trans_huge(vmg->target, vmg->start, vmg->end, adj_start); vma_set_range(vmg->target, vmg->start, vmg->end, vmg->pgoff); - if (expanded) + if (merge_target) vma_iter_store(vmg->vmi, vmg->target); if (adj_start) { adjust->vm_start += adj_start; adjust->vm_pgoff += PHYS_PFN(adj_start); - if (adj_start < 0) { - WARN_ON(expanded); + + if (!merge_target) vma_iter_store(vmg->vmi, adjust); - } } vma_complete(&vp, vmg->vmi, vmg->target->vm_mm); - return 0; + return merge_target ? vmg->target : vmg->next; } /* We can only remove VMAs when merging if they do not have a close hook. */ @@ -718,16 +733,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( struct vm_area_struct *prev = vmg->prev; struct vm_area_struct *next, *res; struct vm_area_struct *anon_dup = NULL; - struct vm_area_struct *adjust = NULL; unsigned long start = vmg->start; unsigned long end = vmg->end; bool left_side = middle && start == middle->vm_start; bool right_side = middle && end == middle->vm_end; int err = 0; long adj_start = 0; - bool merge_will_delete_middle, merge_will_delete_next; bool merge_left, merge_right, merge_both; - bool expanded; mmap_assert_write_locked(vmg->mm); VM_WARN_ON_VMG(!middle, vmg); /* We are modifying a VMA, so caller must specify. */ @@ -779,27 +791,27 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( merge_both = merge_left && merge_right; /* If we span the entire VMA, a merge implies it will be deleted. */ - merge_will_delete_middle = left_side && right_side; + vmg->__remove_middle = left_side && right_side; /* * If we need to remove middle in its entirety but are unable to do so, * we have no sensible recourse but to abort the merge. */ - if (merge_will_delete_middle && !can_merge_remove_vma(middle)) + if (vmg->__remove_middle && !can_merge_remove_vma(middle)) return NULL; /* * If we merge both VMAs, then next is also deleted. This implies * merge_will_delete_vma also. */ - merge_will_delete_next = merge_both; + vmg->__remove_next = merge_both; /* * If we cannot delete next, then we can reduce the operation to merging * prev and middle (thereby deleting middle). */ - if (merge_will_delete_next && !can_merge_remove_vma(next)) { - merge_will_delete_next = false; + if (vmg->__remove_next && !can_merge_remove_vma(next)) { + vmg->__remove_next = false; merge_right = false; merge_both = false; } @@ -846,10 +858,11 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vmg->start = prev->vm_start; vmg->pgoff = prev->vm_pgoff; - if (!merge_will_delete_middle) { - adjust = middle; + /* + * We both expand prev and shrink middle. + */ + if (!vmg->__remove_middle) adj_start = vmg->end - middle->vm_start; - } err = dup_anon_vma(prev, middle, &anon_dup); } else { /* merge_right */ @@ -867,7 +880,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( /* If we are offset into a VMA, then prev must be middle. */ VM_WARN_ON_VMG(vmg->start > middle->vm_start && prev && middle != prev, vmg); - if (merge_will_delete_middle) { + if (vmg->__remove_middle) { vmg->target = next; vmg->end = next->vm_end; vmg->pgoff = next->vm_pgoff - pglen; @@ -883,7 +896,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vmg->end = start; vmg->pgoff = middle->vm_pgoff; - adjust = next; adj_start = -(middle->vm_end - start); } @@ -893,17 +905,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (err) goto abort; - /* - * In nearly all cases, we expand vmg->middle. There is one exception - - * merge_right where we partially span the VMA. In this case we shrink - * the end of vmg->middle and adjust the start of vmg->next accordingly. - */ - expanded = !merge_right || merge_will_delete_middle; - - if (commit_merge(vmg, adjust, - merge_will_delete_middle ? middle : NULL, - merge_will_delete_next ? next : NULL, - adj_start, expanded)) { + res = commit_merge(vmg, adj_start); + if (!res) { if (anon_dup) unlink_anon_vmas(anon_dup); @@ -911,9 +914,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( return NULL; } - res = merge_left ? prev : next; khugepaged_enter_vma(res, vmg->flags); - vmg->state = VMA_MERGE_SUCCESS; return res; @@ -974,7 +975,6 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) struct vm_area_struct *next = vmg->next; unsigned long end = vmg->end; bool can_merge_left, can_merge_right; - bool just_expand = vmg->merge_flags & VMG_FLAG_JUST_EXPAND; mmap_assert_write_locked(vmg->mm); VM_WARN_ON_VMG(vmg->middle, vmg); @@ -988,7 +988,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) return NULL; can_merge_left = can_vma_merge_left(vmg); - can_merge_right = !just_expand && can_vma_merge_right(vmg, can_merge_left); + can_merge_right = !vmg->just_expand && can_vma_merge_right(vmg, can_merge_left); /* If we can merge with the next VMA, adjust vmg accordingly. */ if (can_merge_right) { @@ -1011,7 +1011,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) vmg->end = end; /* In expand-only case we are already positioned at prev. */ - if (!just_expand) { + if (!vmg->just_expand) { /* Equivalent to going to the previous range. */ vma_prev(vmg->vmi); } @@ -1076,7 +1076,10 @@ int vma_expand(struct vma_merge_struct *vmg) middle->vm_end > vmg->end, vmg); vmg->target = middle; - if (commit_merge(vmg, NULL, remove_next ? next : NULL, NULL, 0, true)) + if (remove_next) + vmg->__remove_next = true; + + if (!commit_merge(vmg, 0)) goto nomem; return 0; @@ -2593,7 +2596,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, vmg.prev = vma; /* vmi is positioned at prev, which this mode expects. */ - vmg.merge_flags = VMG_FLAG_JUST_EXPAND; + vmg.just_expand = true; if (vma_merge_new_range(&vmg)) goto out; diff --git a/mm/vma.h b/mm/vma.h index 5b5dd07e478c..7935681a2db8 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -58,17 +58,6 @@ enum vma_merge_state { VMA_MERGE_SUCCESS, }; -enum vma_merge_flags { - VMG_FLAG_DEFAULT = 0, - /* - * If we can expand, simply do so. We know there is nothing to merge to - * the right. Does not reset state upon failure to merge. The VMA - * iterator is assumed to be positioned at the previous VMA, rather than - * at the gap. - */ - VMG_FLAG_JUST_EXPAND = 1 << 0, -}; - /* * Describes a VMA merge operation and is threaded throughout it. * @@ -117,8 +106,31 @@ struct vma_merge_struct { struct mempolicy *policy; struct vm_userfaultfd_ctx uffd_ctx; struct anon_vma_name *anon_name; - enum vma_merge_flags merge_flags; enum vma_merge_state state; + + /* Flags which callers can use to modify merge behaviour: */ + + /* + * If we can expand, simply do so. We know there is nothing to merge to + * the right. Does not reset state upon failure to merge. The VMA + * iterator is assumed to be positioned at the previous VMA, rather than + * at the gap. + */ + bool just_expand :1; + + /* Internal flags set during merge process: */ + + /* + * Internal flag used during the merge operation to indicate we will + * remove vmg->middle. + */ + bool __remove_middle :1; + /* + * Internal flag used during the merge operationr to indicate we will + * remove vmg->next. + */ + bool __remove_next :1; + }; static inline bool vmg_nomem(struct vma_merge_struct *vmg) @@ -142,7 +154,6 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, .flags = flags_, \ .pgoff = pgoff_, \ .state = VMA_MERGE_START, \ - .merge_flags = VMG_FLAG_DEFAULT, \ } #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \ @@ -162,7 +173,6 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, .uffd_ctx = vma_->vm_userfaultfd_ctx, \ .anon_name = anon_vma_name(vma_), \ .state = VMA_MERGE_START, \ - .merge_flags = VMG_FLAG_DEFAULT, \ } #ifdef CONFIG_DEBUG_VM_MAPLE_TREE diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 3c0572120e94..7728498b2f7e 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -149,11 +149,16 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, vmg->prev = NULL; vmg->middle = NULL; vmg->next = NULL; + vmg->target = NULL; vmg->start = start; vmg->end = end; vmg->pgoff = pgoff; vmg->flags = flags; + + vmg->just_expand = false; + vmg->__remove_middle = false; + vmg->__remove_next = false; } /* @@ -1546,7 +1551,7 @@ static bool test_expand_only_mode(void) /* * Place a VMA prior to the one we're expanding so we assert that we do * not erroneously try to traverse to the previous VMA even though we - * have, through the use of VMG_FLAG_JUST_EXPAND, indicated we do not + * have, through the use of the just_expand flag, indicated we do not * need to do so. */ alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); @@ -1558,7 +1563,7 @@ static bool test_expand_only_mode(void) vma_iter_set(&vmi, 0x3000); vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.merge_flags = VMG_FLAG_JUST_EXPAND; + vmg.just_expand = true; vma = vma_merge_new_range(&vmg); ASSERT_NE(vma, NULL); From fe3e9cf0d7a28d333523189d1405770d980b07d6 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 31 Jan 2025 12:31:51 +0000 Subject: [PATCH 0857/2157] mm: eliminate adj_start parameter from commit_merge() Introduce internal vmg->__adjust_middle_start and vmg->__adjust_next_start merge flags, enabling us to indicate to commit_merge() that we are performing a merge which either spans only part of vmg->middle, or part of vmg->next respectively. In the former instance, we change the start of vmg->middle to match the attributes of vmg->prev, without spanning all of vmg->middle. This implies that vmg->prev->vm_end and vmg->middle->vm_start are both increased to form the new merged VMA (vmg->prev) and the new subsequent VMA (vmg->middle). In the latter case, we change the end of vmg->middle to match the attributes of vmg->next, without spanning all of vmg->next. This implies that vmg->middle->vm_end and vmg->next->vm_start are both decreased to form the new merged VMA (vmg->next) and the new prior VMA (vmg->middle). Since we now have a stable set of prev, middle, next VMAs threaded through vmg and with these flags set know what is happening, we can perform the calculation in commit_merge() instead. This allows us to drop the confusing adj_start parameter and instead pass semantic information to commit_merge(). In the latter case the -(middle->vm_end - start) calculation becomes -(middle->vm-end - vmg->end), however this is correct as vmg->end is set to the start parameter. This is because in this case (rather confusingly), we manipulate vmg->middle, but ultimately return vmg->next, whose range will be correctly specified. At this point vmg->start, end is the new range for the prior VMA rather than the merged one. This patch has no change in functional behaviour. Link: https://lkml.kernel.org/r/bcec0cd980b373a5eb02236cb033034ce1effe42.1738326519.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/debug.c | 8 +++++-- mm/vma.c | 50 ++++++++++++++++++++++++----------------- mm/vma.h | 10 +++++++++ tools/testing/vma/vma.c | 2 ++ 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 60c6f1134383..e1282b85a877 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -267,7 +267,9 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) "uffd_ctx %px\n" "anon_name %px\n" "state %x\n" - "just_expand %d __remove_middle %d __remove_next %d\n", + "just_expand %d\n" + "__adjust_middle_start %d __adjust_next_start %d\n" + "__remove_middle %d __remove_next %d\n", vmg, vmg->mm, vmg->pgoff, vmg->vmi, vmg->vmi ? vma_iter_addr(vmg->vmi) : 0, vmg->vmi ? vma_iter_end(vmg->vmi) : 0, @@ -281,7 +283,9 @@ void dump_vmg(const struct vma_merge_struct *vmg, const char *reason) #endif vmg->anon_name, (int)vmg->state, - vmg->just_expand, vmg->__remove_middle, vmg->__remove_next); + vmg->just_expand, + vmg->__adjust_middle_start, vmg->__adjust_next_start, + vmg->__remove_middle, vmg->__remove_next); if (vmg->mm) { pr_warn("vmg %px mm:\n", vmg); diff --git a/mm/vma.c b/mm/vma.c index dd9fdf5c9429..bac0e72ccb62 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -632,29 +632,44 @@ void validate_mm(struct mm_struct *mm) * * On success, returns the merged VMA. Otherwise returns NULL. */ -static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg, - long adj_start) +static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) { - struct vma_prepare vp; struct vm_area_struct *remove = NULL; struct vm_area_struct *remove2 = NULL; + struct vma_prepare vp; struct vm_area_struct *adjust = NULL; + long adj_start; + bool merge_target; + /* - * In all cases but that of merge right, shrink next, we write - * vmg->target to the maple tree and return this as the merged VMA. + * If modifying an existing VMA and we don't remove vmg->middle, then we + * shrink the adjacent VMA. */ - bool merge_target = adj_start >= 0; + if (vmg->__adjust_middle_start) { + adjust = vmg->middle; + /* The POSITIVE value by which we offset vmg->middle->vm_start. */ + adj_start = vmg->end - vmg->middle->vm_start; + merge_target = true; + } else if (vmg->__adjust_next_start) { + adjust = vmg->next; + /* The NEGATIVE value by which we offset vmg->next->vm_start. */ + adj_start = -(vmg->middle->vm_end - vmg->end); + /* + * In all cases but this - merge right, shrink next - we write + * vmg->target to the maple tree and return this as the merged VMA. + */ + merge_target = false; + } else { + adjust = NULL; + adj_start = 0; + merge_target = true; + } if (vmg->__remove_middle) remove = vmg->middle; if (vmg->__remove_next) remove2 = vmg->next; - if (adj_start > 0) - adjust = vmg->middle; - else if (adj_start < 0) - adjust = vmg->next; - init_multi_vma_prep(&vp, vmg->target, adjust, remove, remove2); VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && @@ -738,7 +753,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( bool left_side = middle && start == middle->vm_start; bool right_side = middle && end == middle->vm_end; int err = 0; - long adj_start = 0; bool merge_left, merge_right, merge_both; mmap_assert_write_locked(vmg->mm); @@ -858,11 +872,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vmg->start = prev->vm_start; vmg->pgoff = prev->vm_pgoff; - /* - * We both expand prev and shrink middle. - */ if (!vmg->__remove_middle) - adj_start = vmg->end - middle->vm_start; + vmg->__adjust_middle_start = true; err = dup_anon_vma(prev, middle, &anon_dup); } else { /* merge_right */ @@ -891,12 +902,11 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( * IMPORTANT: This is the ONLY case where the final * merged VMA is NOT vmg->target, but rather vmg->next. */ + vmg->__adjust_next_start = true; vmg->target = middle; vmg->start = middle->vm_start; vmg->end = start; vmg->pgoff = middle->vm_pgoff; - - adj_start = -(middle->vm_end - start); } err = dup_anon_vma(next, middle, &anon_dup); @@ -905,7 +915,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (err) goto abort; - res = commit_merge(vmg, adj_start); + res = commit_merge(vmg); if (!res) { if (anon_dup) unlink_anon_vmas(anon_dup); @@ -1079,7 +1089,7 @@ int vma_expand(struct vma_merge_struct *vmg) if (remove_next) vmg->__remove_next = true; - if (!commit_merge(vmg, 0)) + if (!commit_merge(vmg)) goto nomem; return 0; diff --git a/mm/vma.h b/mm/vma.h index 7935681a2db8..e18487797fa4 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -120,6 +120,16 @@ struct vma_merge_struct { /* Internal flags set during merge process: */ + /* + * Internal flag indicating the merge increases vmg->middle->vm_start + * (and thereby, vmg->prev->vm_end). + */ + bool __adjust_middle_start :1; + /* + * Internal flag indicating the merge decreases vmg->next->vm_start + * (and thereby, vmg->middle->vm_end). + */ + bool __adjust_next_start :1; /* * Internal flag used during the merge operation to indicate we will * remove vmg->middle. diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 7728498b2f7e..c7ffa71841ca 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -159,6 +159,8 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, vmg->just_expand = false; vmg->__remove_middle = false; vmg->__remove_next = false; + vmg->__adjust_middle_start = false; + vmg->__adjust_next_start = false; } /* From 0e5ffe9b2bd6d9ab7bf45f512c016e4710bf6d5d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 31 Jan 2025 12:31:52 +0000 Subject: [PATCH 0858/2157] mm: make vmg->target consistent and further simplify commit_merge() It is confusing for vmg->target to sometimes be the target merged VMA and in one case not. Fix this by having commit_merge() use its awareness of the vmg->_adjust_next_start case to know that it is manipulating a separate vma, abstracted in the 'vma' local variable. Place removal and adjust VMA determination logic into init_multi_vma_prep(), as the flags give us enough information to do so, and since this is the function that sets up the vma_prepare struct it makes sense to do so here. Doing this significantly simplifies commit_merge(), allowing us to eliminate the 'merge_target' handling, initialise the VMA iterator in a more sensible place and simply return vmg->target consistently. This also allows us to simplify setting vmg->target in vma_merge_existing_range() since we are then left only with two cases - merge left (or both) where the target is vmg->prev or merge right in which the target is vmg->next. This makes it easy for somebody reading the code to know what VMA will actually be the one returned and merged into and removes a great deal of the confusing 'adjust' nonsense. This patch has no change in functional behaviour. Link: https://lkml.kernel.org/r/50f96e31ab1980eaaf1006e34a4f6e6dad9320b8.1738326519.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/vma.c | 119 ++++++++++++++++++++++++++++--------------------------- mm/vma.h | 6 +-- 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index bac0e72ccb62..b10625e8fc99 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -106,24 +106,40 @@ static inline bool are_anon_vmas_compatible(struct vm_area_struct *vma1, * init_multi_vma_prep() - Initializer for struct vma_prepare * @vp: The vma_prepare struct * @vma: The vma that will be altered once locked - * @next: The next vma if it is to be adjusted - * @remove: The first vma to be removed - * @remove2: The second vma to be removed + * @vmg: The merge state that will be used to determine adjustment and VMA + * removal. */ static void init_multi_vma_prep(struct vma_prepare *vp, struct vm_area_struct *vma, - struct vm_area_struct *next, - struct vm_area_struct *remove, - struct vm_area_struct *remove2) + struct vma_merge_struct *vmg) { + struct vm_area_struct *adjust; + struct vm_area_struct **remove = &vp->remove; + memset(vp, 0, sizeof(struct vma_prepare)); vp->vma = vma; vp->anon_vma = vma->anon_vma; - vp->remove = remove ? remove : remove2; - vp->remove2 = remove ? remove2 : NULL; - vp->adj_next = next; - if (!vp->anon_vma && next) - vp->anon_vma = next->anon_vma; + + if (vmg && vmg->__remove_middle) { + *remove = vmg->middle; + remove = &vp->remove2; + } + if (vmg && vmg->__remove_next) + *remove = vmg->next; + + if (vmg && vmg->__adjust_middle_start) + adjust = vmg->middle; + else if (vmg && vmg->__adjust_next_start) + adjust = vmg->next; + else + adjust = NULL; + + vp->adj_next = adjust; + if (!vp->anon_vma && adjust) + vp->anon_vma = adjust->anon_vma; + + VM_WARN_ON(vp->anon_vma && adjust && adjust->anon_vma && + vp->anon_vma != adjust->anon_vma); vp->file = vma->vm_file; if (vp->file) @@ -360,7 +376,7 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, */ static void init_vma_prep(struct vma_prepare *vp, struct vm_area_struct *vma) { - init_multi_vma_prep(vp, vma, NULL, NULL, NULL); + init_multi_vma_prep(vp, vma, NULL); } /* @@ -634,76 +650,63 @@ void validate_mm(struct mm_struct *mm) */ static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) { - struct vm_area_struct *remove = NULL; - struct vm_area_struct *remove2 = NULL; + struct vm_area_struct *vma; struct vma_prepare vp; - struct vm_area_struct *adjust = NULL; + struct vm_area_struct *adjust; long adj_start; - bool merge_target; /* * If modifying an existing VMA and we don't remove vmg->middle, then we * shrink the adjacent VMA. */ if (vmg->__adjust_middle_start) { + vma = vmg->target; adjust = vmg->middle; /* The POSITIVE value by which we offset vmg->middle->vm_start. */ adj_start = vmg->end - vmg->middle->vm_start; - merge_target = true; + + /* Note: vma iterator must be pointing to 'start'. */ + vma_iter_config(vmg->vmi, vmg->start, vmg->end); } else if (vmg->__adjust_next_start) { + /* + * In this case alone, the VMA we manipulate is vmg->middle, but + * we ultimately return vmg->next. + */ + vma = vmg->middle; adjust = vmg->next; /* The NEGATIVE value by which we offset vmg->next->vm_start. */ adj_start = -(vmg->middle->vm_end - vmg->end); - /* - * In all cases but this - merge right, shrink next - we write - * vmg->target to the maple tree and return this as the merged VMA. - */ - merge_target = false; + + vma_iter_config(vmg->vmi, vmg->next->vm_start + adj_start, + vmg->next->vm_end); } else { + vma = vmg->target; adjust = NULL; adj_start = 0; - merge_target = true; - } - if (vmg->__remove_middle) - remove = vmg->middle; - if (vmg->__remove_next) - remove2 = vmg->next; - - init_multi_vma_prep(&vp, vmg->target, adjust, remove, remove2); - - VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && - vp.anon_vma != adjust->anon_vma); - - if (merge_target) { - /* Note: vma iterator must be pointing to 'start'. */ + /* Note: vma iterator must be pointing to 'start'. */ vma_iter_config(vmg->vmi, vmg->start, vmg->end); - } else { - vma_iter_config(vmg->vmi, adjust->vm_start + adj_start, - adjust->vm_end); } - if (vma_iter_prealloc(vmg->vmi, vmg->target)) + init_multi_vma_prep(&vp, vma, vmg); + + if (vma_iter_prealloc(vmg->vmi, vma)) return NULL; vma_prepare(&vp); - vma_adjust_trans_huge(vmg->target, vmg->start, vmg->end, adj_start); - vma_set_range(vmg->target, vmg->start, vmg->end, vmg->pgoff); - - if (merge_target) - vma_iter_store(vmg->vmi, vmg->target); + vma_adjust_trans_huge(vma, vmg->start, vmg->end, adj_start); + vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); if (adj_start) { adjust->vm_start += adj_start; adjust->vm_pgoff += PHYS_PFN(adj_start); - - if (!merge_target) - vma_iter_store(vmg->vmi, adjust); } - vma_complete(&vp, vmg->vmi, vmg->target->vm_mm); + vma_iter_store(vmg->vmi, vmg->target); - return merge_target ? vmg->target : vmg->next; + vma_complete(&vp, vmg->vmi, vma->vm_mm); + + return vmg->target; } /* We can only remove VMAs when merging if they do not have a close hook. */ @@ -833,11 +836,15 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( /* No matter what happens, we will be adjusting middle. */ vma_start_write(middle); - if (merge_left) - vma_start_write(prev); - - if (merge_right) + if (merge_right) { vma_start_write(next); + vmg->target = next; + } + + if (merge_left) { + vma_start_write(prev); + vmg->target = prev; + } if (merge_both) { /* @@ -847,7 +854,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( * extend delete delete */ - vmg->target = prev; vmg->start = prev->vm_start; vmg->end = next->vm_end; vmg->pgoff = prev->vm_pgoff; @@ -868,7 +874,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( * extend shrink/delete */ - vmg->target = prev; vmg->start = prev->vm_start; vmg->pgoff = prev->vm_pgoff; @@ -892,7 +897,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( VM_WARN_ON_VMG(vmg->start > middle->vm_start && prev && middle != prev, vmg); if (vmg->__remove_middle) { - vmg->target = next; vmg->end = next->vm_end; vmg->pgoff = next->vm_pgoff - pglen; } else { @@ -903,7 +907,6 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( * merged VMA is NOT vmg->target, but rather vmg->next. */ vmg->__adjust_next_start = true; - vmg->target = middle; vmg->start = middle->vm_start; vmg->end = start; vmg->pgoff = middle->vm_pgoff; diff --git a/mm/vma.h b/mm/vma.h index e18487797fa4..e55e68abfbe3 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -82,11 +82,7 @@ struct vma_merge_struct { struct vm_area_struct *prev; struct vm_area_struct *middle; struct vm_area_struct *next; - /* - * This is the VMA we ultimately target to become the merged VMA, except - * for the one exception of merge right, shrink next (for details of - * this scenario see vma_merge_existing_range()). - */ + /* This is the VMA we ultimately target to become the merged VMA. */ struct vm_area_struct *target; /* * Initially, the start, end, pgoff fields are provided by the caller From c372473a545edff2fdbc002fc67c181e17c7557b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 31 Jan 2025 12:31:53 +0000 Subject: [PATCH 0859/2157] mm: completely abstract unnecessary adj_start calculation The adj_start calculation has been a constant source of confusion in the VMA merge code. There are two cases to consider, one where we adjust the start of the vmg->middle VMA (i.e. the vmg->__adjust_middle_start merge flag is set), in which case adj_start is calculated as: (1) adj_start = vmg->end - vmg->middle->vm_start And the case where we adjust the start of the vmg->next VMA (i.e. the vmg->__adjust_next_start merge flag is set), in which case adj_start is calculated as: (2) adj_start = -(vmg->middle->vm_end - vmg->end) We apply (1) thusly: vmg->middle->vm_start = vmg->middle->vm_start + vmg->end - vmg->middle->vm_start Which simplifies to: vmg->middle->vm_start = vmg->end Similarly, we apply (2) as: vmg->next->vm_start = vmg->next->vm_start + -(vmg->middle->vm_end - vmg->end) Noting that for these VMAs to be mergeable vmg->middle->vm_end == vmg->next->vm_start and so this simplifies to: vmg->next->vm_start = vmg->next->vm_start + -(vmg->next->vm_start - vmg->end) Which simplifies to: vmg->next->vm_start = vmg->end Therefore in each case, we simply need to adjust the start of the VMA to vmg->end (!) and can do away with this adj_start calculation. The only caveat is that we must ensure we update the vm_pgoff field correctly. We therefore abstract this entire calculation to a new function vmg_adjust_set_range() which performs this calculation and sets the adjusted VMA's new range using the general vma_set_range() function. We also must update vma_adjust_trans_huge() which expects the now-abstracted adj_start parameter. It turns out this is wholly unnecessary. In vma_adjust_trans_huge() the relevant code is: if (adjust_next > 0) { struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); unsigned long nstart = next->vm_start; nstart += adjust_next; split_huge_pmd_if_needed(next, nstart); } The only case where this is relevant is when vmg->__adjust_middle_start is specified (in which case adj_next would have been positive), i.e. the one in which the vma specified is vmg->prev and this the sought 'next' VMA would be vmg->middle. We can therefore eliminate the find_vma() invocation altogether and simply provide the vmg->middle VMA in this instance, or NULL otherwise. Again we have an adj_next offset calculation: next->vm_start + vmg->end - vmg->middle->vm_start Where next == vmg->middle this simplifies to vmg->end as previously demonstrated. Therefore nstart is equal to vmg->end, which is already passed to vma_adjust_trans_huge() via the 'end' parameter and so this code (rather delightfully) simplifies to: if (next) split_huge_pmd_if_needed(next, end); With these changes in place, it becomes silly for commit_merge() to return vmg->target, as it is always the same and threaded through vmg, so we finally change commit_merge() to return an error value once again. This patch has no change in functional behaviour. Link: https://lkml.kernel.org/r/7bce2cd4b5afb56211822835d145471280c3dccc.1738326519.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 +- mm/huge_memory.c | 19 ++---- mm/vma.c | 101 +++++++++++++++---------------- tools/testing/vma/vma_internal.h | 4 +- 4 files changed, 58 insertions(+), 70 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 93e509b6c00e..e1bea54820ff 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -404,7 +404,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, - unsigned long end, long adjust_next); + unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); @@ -571,7 +571,7 @@ static inline int madvise_collapse(struct vm_area_struct *vma, static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { } static inline int is_swap_pmd(pmd_t pmd) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e33da765c428..e7ac4f0dc21d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3017,9 +3017,9 @@ static inline void split_huge_pmd_if_needed(struct vm_area_struct *vma, unsigned } void vma_adjust_trans_huge(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - long adjust_next) + unsigned long start, + unsigned long end, + struct vm_area_struct *next) { /* Check if we need to split start first. */ split_huge_pmd_if_needed(vma, start); @@ -3027,16 +3027,9 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, /* Check if we need to split end next. */ split_huge_pmd_if_needed(vma, end); - /* - * If we're also updating the next vma vm_start, - * check if we need to split it. - */ - if (adjust_next > 0) { - struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); - unsigned long nstart = next->vm_start; - nstart += adjust_next; - split_huge_pmd_if_needed(next, nstart); - } + /* If we're incrementing next->vm_start, we might need to split it. */ + if (next) + split_huge_pmd_if_needed(next, end); } static void unmap_folio(struct folio *folio) diff --git a/mm/vma.c b/mm/vma.c index b10625e8fc99..603e538a093f 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -513,7 +513,7 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); - vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); + vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); if (new_below) { vma->vm_start = addr; @@ -643,47 +643,45 @@ void validate_mm(struct mm_struct *mm) } #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */ +/* + * Based on the vmg flag indicating whether we need to adjust the vm_start field + * for the middle or next VMA, we calculate what the range of the newly adjusted + * VMA ought to be, and set the VMA's range accordingly. + */ +static void vmg_adjust_set_range(struct vma_merge_struct *vmg) +{ + struct vm_area_struct *adjust; + pgoff_t pgoff; + + if (vmg->__adjust_middle_start) { + adjust = vmg->middle; + pgoff = adjust->vm_pgoff + PHYS_PFN(vmg->end - adjust->vm_start); + } else if (vmg->__adjust_next_start) { + adjust = vmg->next; + pgoff = adjust->vm_pgoff - PHYS_PFN(adjust->vm_start - vmg->end); + } else { + return; + } + + vma_set_range(adjust, vmg->end, adjust->vm_end, pgoff); +} + /* * Actually perform the VMA merge operation. * - * On success, returns the merged VMA. Otherwise returns NULL. + * Returns 0 on success, or an error value on failure. */ -static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) +static int commit_merge(struct vma_merge_struct *vmg) { struct vm_area_struct *vma; struct vma_prepare vp; - struct vm_area_struct *adjust; - long adj_start; - /* - * If modifying an existing VMA and we don't remove vmg->middle, then we - * shrink the adjacent VMA. - */ - if (vmg->__adjust_middle_start) { - vma = vmg->target; - adjust = vmg->middle; - /* The POSITIVE value by which we offset vmg->middle->vm_start. */ - adj_start = vmg->end - vmg->middle->vm_start; - - /* Note: vma iterator must be pointing to 'start'. */ - vma_iter_config(vmg->vmi, vmg->start, vmg->end); - } else if (vmg->__adjust_next_start) { - /* - * In this case alone, the VMA we manipulate is vmg->middle, but - * we ultimately return vmg->next. - */ + if (vmg->__adjust_next_start) { + /* We manipulate middle and adjust next, which is the target. */ vma = vmg->middle; - adjust = vmg->next; - /* The NEGATIVE value by which we offset vmg->next->vm_start. */ - adj_start = -(vmg->middle->vm_end - vmg->end); - - vma_iter_config(vmg->vmi, vmg->next->vm_start + adj_start, - vmg->next->vm_end); + vma_iter_config(vmg->vmi, vmg->end, vmg->next->vm_end); } else { vma = vmg->target; - adjust = NULL; - adj_start = 0; - /* Note: vma iterator must be pointing to 'start'. */ vma_iter_config(vmg->vmi, vmg->start, vmg->end); } @@ -691,22 +689,22 @@ static struct vm_area_struct *commit_merge(struct vma_merge_struct *vmg) init_multi_vma_prep(&vp, vma, vmg); if (vma_iter_prealloc(vmg->vmi, vma)) - return NULL; + return -ENOMEM; vma_prepare(&vp); - vma_adjust_trans_huge(vma, vmg->start, vmg->end, adj_start); + /* + * THP pages may need to do additional splits if we increase + * middle->vm_start. + */ + vma_adjust_trans_huge(vma, vmg->start, vmg->end, + vmg->__adjust_middle_start ? vmg->middle : NULL); vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); - - if (adj_start) { - adjust->vm_start += adj_start; - adjust->vm_pgoff += PHYS_PFN(adj_start); - } - + vmg_adjust_set_range(vmg); vma_iter_store(vmg->vmi, vmg->target); vma_complete(&vp, vmg->vmi, vma->vm_mm); - return vmg->target; + return 0; } /* We can only remove VMAs when merging if they do not have a close hook. */ @@ -749,7 +747,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( { struct vm_area_struct *middle = vmg->middle; struct vm_area_struct *prev = vmg->prev; - struct vm_area_struct *next, *res; + struct vm_area_struct *next; struct vm_area_struct *anon_dup = NULL; unsigned long start = vmg->start; unsigned long end = vmg->end; @@ -900,12 +898,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vmg->end = next->vm_end; vmg->pgoff = next->vm_pgoff - pglen; } else { - /* - * We shrink middle and expand next. - * - * IMPORTANT: This is the ONLY case where the final - * merged VMA is NOT vmg->target, but rather vmg->next. - */ + /* We shrink middle and expand next. */ vmg->__adjust_next_start = true; vmg->start = middle->vm_start; vmg->end = start; @@ -918,8 +911,10 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (err) goto abort; - res = commit_merge(vmg); - if (!res) { + err = commit_merge(vmg); + if (err) { + VM_WARN_ON(err != -ENOMEM); + if (anon_dup) unlink_anon_vmas(anon_dup); @@ -927,9 +922,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( return NULL; } - khugepaged_enter_vma(res, vmg->flags); + khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; - return res; + return vmg->target; abort: vma_iter_set(vmg->vmi, start); @@ -1092,7 +1087,7 @@ int vma_expand(struct vma_merge_struct *vmg) if (remove_next) vmg->__remove_next = true; - if (!commit_merge(vmg)) + if (commit_merge(vmg)) goto nomem; return 0; @@ -1132,7 +1127,7 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vma_prepare(&vp); - vma_adjust_trans_huge(vma, start, end, 0); + vma_adjust_trans_huge(vma, start, end, NULL); vma_iter_clear(vmi); vma_set_range(vma, start, end, pgoff); diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 1eae23039854..bb273927af0f 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -796,12 +796,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { (void)vma; (void)start; (void)end; - (void)adjust_next; + (void)next; } static inline void vma_iter_free(struct vma_iterator *vmi) From 51ff4d7486f0c0b4110a6da4af805b179dd7b11e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Feb 2025 15:18:00 -0800 Subject: [PATCH 0860/2157] mm: avoid extra mem_alloc_profiling_enabled() checks Refactor code to avoid extra mem_alloc_profiling_enabled() checks inside pgalloc_tag_get() function which is often called after that check was already done. Link: https://lkml.kernel.org/r/20250201231803.2661189-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Cc: David Wang <00107082@163.com> Cc: Steven Rostedt Cc: Kent Overstreet Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yu Zhao Cc: Zhenhua Huang Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 33 ++++++++++++++++++--------------- lib/alloc_tag.c | 6 +++--- mm/page_alloc.c | 3 +-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 3469c4b20105..4a82b6b4820e 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -205,28 +205,32 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) } } -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) { struct alloc_tag *tag = NULL; + union pgtag_ref_handle handle; + union codetag_ref ref; - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub_check(&ref); - if (ref.ct) - tag = ct_to_alloc_tag(ref.ct); - put_page_tag_ref(handle); - } + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); + if (ref.ct) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); } return tag; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) +static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) { - if (mem_alloc_profiling_enabled() && tag) + struct alloc_tag *tag; + + if (!mem_alloc_profiling_enabled()) + return; + + tag = __pgalloc_tag_get(page); + if (tag) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } @@ -241,8 +245,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} +static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 19b45617bdcf..1d893e313614 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -174,7 +174,7 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) if (!mem_alloc_profiling_enabled()) return; - tag = pgalloc_tag_get(&folio->page); + tag = __pgalloc_tag_get(&folio->page); if (!tag) return; @@ -200,10 +200,10 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old) if (!mem_alloc_profiling_enabled()) return; - tag_old = pgalloc_tag_get(&old->page); + tag_old = __pgalloc_tag_get(&old->page); if (!tag_old) return; - tag_new = pgalloc_tag_get(&new->page); + tag_new = __pgalloc_tag_get(&new->page); if (!tag_new) return; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 542d25f77be8..38c2b2d20b1d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4833,12 +4833,11 @@ void __free_pages(struct page *page, unsigned int order) { /* get PageHead before we drop reference */ int head = PageHead(page); - struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) free_frozen_pages(page, order); else if (!head) { - pgalloc_tag_sub_pages(tag, (1 << order) - 1); + pgalloc_tag_sub_pages(page, (1 << order) - 1); while (order-- > 0) free_frozen_pages(page + (1 << order), order); } From a642b27b991fd663de1676bf91583d1e2397d93d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Feb 2025 15:18:01 -0800 Subject: [PATCH 0861/2157] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator When a sizable code section is protected by a disabled static key, that code gets into the instruction cache even though it's not executed and consumes the cache, increasing cache misses. This can be remedied by moving such code into a separate uninlined function. On a Pixel6 phone, slab allocation profiling overhead measured with CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is: baseline modified Big core 3.31% 0.17% Medium core 3.79% 0.57% Little core 6.68% 1.28% This improvement comes at the expense of the configuration when profiling gets enabled, since there is now an additional function call. The overhead from this additional call on Pixel6 is: Big core 0.66% Middle core 1.23% Little core 2.42% However this is negligible when compared with the overall overhead of the memory allocation profiling when it is enabled. On x86 this patch does not make noticeable difference because the overhead with mem_alloc_profiling_key disabled is much lower (under 1%) to start with, so any improvement is less visible and hard to distinguish from the noise. The overhead from additional call when profiling is enabled is also within noise levels. Link: https://lkml.kernel.org/r/20250201231803.2661189-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Vlastimil Babka Reviewed-by: Shakeel Butt Cc: David Wang <00107082@163.com> Cc: Kent Overstreet Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Steven Rostedt Cc: Yu Zhao Cc: Zhenhua Huang Signed-off-by: Andrew Morton --- mm/slub.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 1f50129dcfb3..184fd2b14758 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2000,7 +2000,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -static inline void free_slab_obj_exts(struct slab *slab) +/* Should be called only if mem_alloc_profiling_enabled() */ +static noinline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2077,33 +2078,37 @@ prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p) return slab_obj_exts(slab) + obj_to_index(s, slab, p); } -static inline void -alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) +/* Should be called only if mem_alloc_profiling_enabled() */ +static noinline void +__alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) { - struct slabobj_ext *obj_exts; + struct slabobj_ext *obj_exts; - obj_exts = prepare_slab_obj_exts_hook(s, flags, object); - /* - * Currently obj_exts is used only for allocation profiling. - * If other users appear then mem_alloc_profiling_enabled() - * check should be added before alloc_tag_add(). - */ - if (likely(obj_exts)) - alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size); - } + obj_exts = prepare_slab_obj_exts_hook(s, flags, object); + /* + * Currently obj_exts is used only for allocation profiling. + * If other users appear then mem_alloc_profiling_enabled() + * check should be added before alloc_tag_add(). + */ + if (likely(obj_exts)) + alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size); } static inline void -alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, - int objects) +alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) +{ + if (need_slab_obj_ext()) + __alloc_tagging_slab_alloc_hook(s, object, flags); +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static noinline void +__alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, + int objects) { struct slabobj_ext *obj_exts; int i; - if (!mem_alloc_profiling_enabled()) - return; - /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE)) return; @@ -2119,6 +2124,14 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, } } +static inline void +alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, + int objects) +{ + if (mem_alloc_profiling_enabled()) + __alloc_tagging_slab_free_hook(s, slab, p, objects); +} + #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void From 93d5440ece3c0aa341fb02e3a44a1b7ab44304c8 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Feb 2025 15:18:02 -0800 Subject: [PATCH 0862/2157] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator When a sizable code section is protected by a disabled static key, that code gets into the instruction cache even though it's not executed and consumes the cache, increasing cache misses. This can be remedied by moving such code into a separate uninlined function. On a Pixel6 phone, page allocation profiling overhead measured with CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is: baseline modified Big core 4.93% 1.53% Medium core 4.39% 1.41% Little core 1.02% 0.36% This improvement comes at the expense of the configuration when profiling gets enabled, since there is now an additional function call. The overhead from this additional call on Pixel6 is: Big core 0.24% Middle core 0.63% Little core 1.1% However this is negligible when compared with the overall overhead of the memory allocation profiling when it is enabled. On x86 this patch does not make noticeable difference because the overhead with mem_alloc_profiling_key disabled is much lower (under 1%) to start with, so any improvement is less visible and hard to distinguish from the noise. The overhead from additional call when profiling is enabled is also within noise levels. Link: https://lkml.kernel.org/r/20250201231803.2661189-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Cc: David Wang <00107082@163.com> Cc: Kent Overstreet Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Steven Rostedt Cc: Vlastimil Babka Cc: Yu Zhao Cc: Zhenhua Huang Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 60 +++------------------------- mm/page_alloc.c | 78 +++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 4a82b6b4820e..c74077977830 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -162,47 +162,13 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code } } +/* Should be called only if mem_alloc_profiling_enabled() */ +void __clear_page_tag_ref(struct page *page); + static inline void clear_page_tag_ref(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - set_codetag_empty(&ref); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} - -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} - -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub(&ref, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } + if (mem_alloc_profiling_enabled()) + __clear_page_tag_ref(page); } /* Should be called only if mem_alloc_profiling_enabled() */ @@ -222,18 +188,6 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) -{ - struct alloc_tag *tag; - - if (!mem_alloc_profiling_enabled()) - return; - - tag = __pgalloc_tag_get(page); - if (tag) - this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); -} - void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -242,10 +196,6 @@ void __init alloc_tag_sec_init(void); #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void clear_page_tag_ref(struct page *page) {} -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) {} -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 38c2b2d20b1d..d875f055aa53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1041,6 +1041,84 @@ static void kernel_init_pages(struct page *page, int numpages) kasan_enable_current(); } +#ifdef CONFIG_MEM_ALLOC_PROFILING + +/* Should be called only if mem_alloc_profiling_enabled() */ +void __clear_page_tag_ref(struct page *page) +{ + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + set_codetag_empty(&ref); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static noinline +void __pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) +{ + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } +} + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) + __pgalloc_tag_add(page, task, nr); +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static noinline +void __pgalloc_tag_sub(struct page *page, unsigned int nr) +{ + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub(&ref, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } +} + +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) + __pgalloc_tag_sub(page, nr); +} + +static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) +{ + struct alloc_tag *tag; + + if (!mem_alloc_profiling_enabled()) + return; + + tag = __pgalloc_tag_get(page); + if (tag) + this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) {} +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} +static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + __always_inline bool free_pages_prepare(struct page *page, unsigned int order) { From 023fff71d893d9aa7814078f24d730afccbab9b9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 4 Feb 2025 22:53:43 +0000 Subject: [PATCH 0863/2157] selftests/mm: fix thuge-gen test name uniqueness The thuge-gen test_mmap() and test_shmget() tests are repeatedly run for a variety of sizes but always report the result of their test with the same name, meaning that automated sysetms running the tests are unable to distinguish between the various tests. Add the supplied sizes to the logged test names to distinguish between runs. My test automation was getting pretty confused about what was going on - the test names are a pretty important external interface. Link: https://lkml.kernel.org/r/20250204-kselftest-mm-fix-dups-v1-1-6afe417ef4bb@kernel.org Fixes: b38bd9b2c448 ("selftests/mm: thuge-gen: conform to TAP format output") Signed-off-by: Mark Brown Reviewed-by: Dev Jain Cc: Muhammad Usama Anjum Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/thuge-gen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index e4370b79b62f..cd5174d735be 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap\n", __func__); + "%s mmap %lu\n", __func__, size); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap\n", __func__); + "%s: mmap %lu\n", __func__, size); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } From 4cc39f91ef6c6f876651eb231974a59ffbcb3a21 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Feb 2025 22:15:14 -0800 Subject: [PATCH 0864/2157] mm/madvise: split out mmap locking operations for madvise() Patch series "mm/madvise: remove redundant mmap_lock operations from process_madvise()". process_madvise() calls do_madvise() for each address range. Then, each do_madvise() invocation holds and releases same mmap_lock. Optimize the redundant lock operations by splitting do_madvise() internal logic including the mmap_lock operations, and calling the small logic directly from process_madvise() in a sequence that removes the redundant locking. As a result of this change, process_madvise() becomes more efficient and less racy in terms of its results and latency. Note that the potential downside of this series is that other mmap_lock holders may take more time due to the increased length of mmap_lock critical section for process_madvise() calls. But there is maximum limit in the kernel space (IOV_MAX), and userspace can control the critical section length by setting the request size. Hence, the downside would be limited and controllable. Evaluation ========== I measured the time to apply MADV_DONTNEED advice to 256 MiB memory using multiple madvise() calls, 4 KiB per each call. I also do the same with process_madvise(), but with varying batch size (vlen) from 1 to 1024. The source code for the measurement is available at GitHub[1]. Because the microbenchmark result is not that stable, I ran each configuration five times and use the average. The measurement results are as below. 'sz_batches' column shows the batch size of process_madvise() calls. '0' batch size is for madvise() calls case. 'before' and 'after' columns are the measured time to apply MADV_DONTNEED to the 256 MiB memory buffer in nanoseconds, on kernels that built without and with the last patch of this series, respectively. So lower value means better efficiency. 'after/before' column is the ratio of 'after' to 'before'. sz_batches before after after/before 0 146294215.2 121280536.2 0.829017989769427 1 165851018.8 136305598.2 0.821855658085351 2 129469321.2 103740383.6 0.801273866569094 4 110369232.4 87835896.2 0.795836795182785 8 102906232.4 77420920.2 0.752344327397609 16 97551017.4 74959714.4 0.768415506038587 32 94809848.2 71200848.4 0.750985786305689 64 96087575.6 72593180 0.755489765942227 128 96154163.8 68517055.4 0.712575022154163 256 92901257.6 69054216.6 0.743307662177439 512 93646170.8 67053296.2 0.716028168874151 1024 92663219.2 70168196.8 0.75723892830177 Despite the unstable nature of the test program, the trend is as we expect. The measurement shows this patchset reduces the process_madvise() latency, proportional to the batching size. The latency gain was about 20% with the batch size 2, and it has increased to about 28% with the batch size 512, since more number of mmap locking is reduced with larger batch size. Note that the standard devitation of the measurements for each sz_batches configuration ranged from 1.9% to 7.2%. That is, this result is not very stable. The average of the standard deviations for different batch sizes were 4.62% and 4.70% for the 'before' and 'after' kernel measurements. Also note that this patch has somehow decreased latencies of madvise() and single batch size process_madvise(). Seems this code path is small enough to significantly be affected by compiler optimizations including inlining of split-out functions. Please focus on only the improvement amount that changed by the batch size. [1] https://github.com/sjp38/eval_proc_madvise This patch (of 4): Split out the madvise behavior-dependent mmap_lock operations from do_madvise(), for easier reuse of the logic in an upcoming change. [lorenzo.stoakes@oracle.com: fix madvise_[un]lock() issue] Link: https://lkml.kernel.org/r/2f448f7b-1da7-4099-aa9e-0179d47fde40@lucifer.local [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250206061517.2958-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250206061517.2958-2-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Shakeel Butt Reviewed-by: Lorenzo Stoakes Reviewed-by: Davidlohr Bueso Reviewed-by: Liam R. Howlett Cc: David Hildenbrand Cc: SeongJae Park Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 62 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 08b207f8e61e..fa5dae5a7723 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1574,6 +1574,50 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, madvise_vma_anon_name); } #endif /* CONFIG_ANON_VMA_NAME */ + +#ifdef CONFIG_MEMORY_FAILURE +static bool is_memory_failure(int behavior) +{ + switch (behavior) { + case MADV_HWPOISON: + case MADV_SOFT_OFFLINE: + return true; + default: + return false; + } +} +#else +static bool is_memory_failure(int behavior) +{ + return false; +} +#endif + +static int madvise_lock(struct mm_struct *mm, int behavior) +{ + if (is_memory_failure(behavior)) + return 0; + + if (madvise_need_mmap_write(behavior)) { + if (mmap_write_lock_killable(mm)) + return -EINTR; + } else { + mmap_read_lock(mm); + } + return 0; +} + +static void madvise_unlock(struct mm_struct *mm, int behavior) +{ + if (is_memory_failure(behavior)) + return; + + if (madvise_need_mmap_write(behavior)) + mmap_write_unlock(mm); + else + mmap_read_unlock(mm); +} + /* * The madvise(2) system call. * @@ -1650,7 +1694,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh { unsigned long end; int error; - int write; size_t len; struct blk_plug plug; @@ -1672,19 +1715,15 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh if (end == start) return 0; + error = madvise_lock(mm, behavior); + if (error) + return error; + #ifdef CONFIG_MEMORY_FAILURE if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) return madvise_inject_error(behavior, start, start + len_in); #endif - write = madvise_need_mmap_write(behavior); - if (write) { - if (mmap_write_lock_killable(mm)) - return -EINTR; - } else { - mmap_read_lock(mm); - } - start = untagged_addr_remote(mm, start); end = start + len; @@ -1701,10 +1740,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh } blk_finish_plug(&plug); - if (write) - mmap_write_unlock(mm); - else - mmap_read_unlock(mm); + madvise_unlock(mm, behavior); return error; } From dbb0020bbc2c9f563d68564b36d6e8d32f82008b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Feb 2025 22:15:15 -0800 Subject: [PATCH 0865/2157] mm/madvise: split out madvise input validity check Split out the madvise parameters validation logic from do_madvise(), for easy reuse of the logic from a future change. Link: https://lkml.kernel.org/r/20250206061517.2958-3-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Shakeel Butt Reviewed-by: Lorenzo Stoakes Reviewed-by: Davidlohr Bueso Reviewed-by: Liam R. Howlett Cc: David Hildenbrand Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index fa5dae5a7723..ca858b8a837b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1618,6 +1618,27 @@ static void madvise_unlock(struct mm_struct *mm, int behavior) mmap_read_unlock(mm); } +static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior) +{ + size_t len; + + if (!madvise_behavior_valid(behavior)) + return false; + + if (!PAGE_ALIGNED(start)) + return false; + len = PAGE_ALIGN(len_in); + + /* Check to see whether len was rounded up from small -ve to zero */ + if (len_in && !len) + return false; + + if (start + len < start) + return false; + + return true; +} + /* * The madvise(2) system call. * @@ -1697,20 +1718,11 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh size_t len; struct blk_plug plug; - if (!madvise_behavior_valid(behavior)) + if (!is_valid_madvise(start, len_in, behavior)) return -EINVAL; - if (!PAGE_ALIGNED(start)) - return -EINVAL; len = PAGE_ALIGN(len_in); - - /* Check to see whether len was rounded up from small -ve to zero */ - if (len_in && !len) - return -EINVAL; - end = start + len; - if (end < start) - return -EINVAL; if (end == start) return 0; From 457753da6462024ad821bcb4df2d828cf2ef18be Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Feb 2025 22:15:16 -0800 Subject: [PATCH 0866/2157] mm/madvise: split out madvise() behavior execution Split out the madvise behavior applying logic from do_madvise() to make it easier to reuse from the following change. Link: https://lkml.kernel.org/r/20250206061517.2958-4-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Shakeel Butt Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 53 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index ca858b8a837b..6e31e3202d71 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1639,6 +1639,35 @@ static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior) return true; } +static int madvise_do_behavior(struct mm_struct *mm, + unsigned long start, size_t len_in, size_t len, int behavior) +{ + struct blk_plug plug; + unsigned long end; + int error; + +#ifdef CONFIG_MEMORY_FAILURE + if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) + return madvise_inject_error(behavior, start, start + len_in); +#endif + start = untagged_addr_remote(mm, start); + end = start + len; + + blk_start_plug(&plug); + switch (behavior) { + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: + error = madvise_populate(mm, start, end, behavior); + break; + default: + error = madvise_walk_vmas(mm, start, end, behavior, + madvise_vma_behavior); + break; + } + blk_finish_plug(&plug); + return error; +} + /* * The madvise(2) system call. * @@ -1716,7 +1745,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh unsigned long end; int error; size_t len; - struct blk_plug plug; if (!is_valid_madvise(start, len_in, behavior)) return -EINVAL; @@ -1730,28 +1758,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh error = madvise_lock(mm, behavior); if (error) return error; - -#ifdef CONFIG_MEMORY_FAILURE - if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) - return madvise_inject_error(behavior, start, start + len_in); -#endif - - start = untagged_addr_remote(mm, start); - end = start + len; - - blk_start_plug(&plug); - switch (behavior) { - case MADV_POPULATE_READ: - case MADV_POPULATE_WRITE: - error = madvise_populate(mm, start, end, behavior); - break; - default: - error = madvise_walk_vmas(mm, start, end, behavior, - madvise_vma_behavior); - break; - } - blk_finish_plug(&plug); - + error = madvise_do_behavior(mm, start, len_in, len, behavior); madvise_unlock(mm, behavior); return error; From 4000e3d0a367c5ff2035a0394b01b93974be6cb1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Feb 2025 22:15:17 -0800 Subject: [PATCH 0867/2157] mm/madvise: remove redundant mmap_lock operations from process_madvise() Optimize redundant mmap lock operations from process_madvise() by directly doing the mmap locking first, and then the remaining works for all ranges in the loop. [akpm@linux-foundation.org: update comment, per Lorenzo] Link: https://lkml.kernel.org/r/20250206061517.2958-5-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Shakeel Butt Reviewed-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 6e31e3202d71..6ecead476a80 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1778,16 +1778,33 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, total_len = iov_iter_count(iter); + ret = madvise_lock(mm, behavior); + if (ret) + return ret; + while (iov_iter_count(iter)) { - ret = do_madvise(mm, (unsigned long)iter_iov_addr(iter), - iter_iov_len(iter), behavior); + unsigned long start = (unsigned long)iter_iov_addr(iter); + size_t len_in = iter_iov_len(iter); + size_t len; + + if (!is_valid_madvise(start, len_in, behavior)) { + ret = -EINVAL; + break; + } + + len = PAGE_ALIGN(len_in); + if (start + len == start) + ret = 0; + else + ret = madvise_do_behavior(mm, start, len_in, len, + behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat * the operation in aggregate, and would be surprising to the * user. * - * As we have already dropped locks, it is safe to just loop and + * We drop and reacquire locks so it is safe to just loop and * try again. We check for fatal signals in case we need exit * early anyway. */ @@ -1796,12 +1813,17 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, ret = -EINTR; break; } + + /* Drop and reacquire lock to unwind race. */ + madvise_unlock(mm, behavior); + madvise_lock(mm, behavior); continue; } if (ret < 0) break; iov_iter_advance(iter, iter_iov_len(iter)); } + madvise_unlock(mm, behavior); ret = (total_len - iov_iter_count(iter)) ? : ret; From 33c9b01ed2fcbc101cdfeb497f4581e981e7c1e7 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Thu, 6 Feb 2025 14:09:58 +0800 Subject: [PATCH 0868/2157] mm/memfd: fix spelling and grammatical issues The comment "If a private mapping then writability is irrelevant" contains a typo. It should be "If a private mapping then writability is irrelevant". The comment "SEAL_EXEC implys SEAL_WRITE, making W^X from the start." contains a typo. It should be "SEAL_EXEC implies SEAL_WRITE, making W^X from the start." Link: https://lkml.kernel.org/r/20250206060958.98010-1-liuye@kylinos.cn Signed-off-by: Liu Ye Signed-off-by: Andrew Morton --- mm/memfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memfd.c b/mm/memfd.c index 37f7be57c2f5..c64df1343059 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -259,7 +259,7 @@ static int memfd_add_seals(struct file *file, unsigned int seals) } /* - * SEAL_EXEC implys SEAL_WRITE, making W^X from the start. + * SEAL_EXEC implies SEAL_WRITE, making W^X from the start. */ if (seals & F_SEAL_EXEC && inode->i_mode & 0111) seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE; @@ -337,7 +337,7 @@ static int check_write_seal(unsigned long *vm_flags_ptr) unsigned long vm_flags = *vm_flags_ptr; unsigned long mask = vm_flags & (VM_SHARED | VM_WRITE); - /* If a private matting then writability is irrelevant. */ + /* If a private mapping then writability is irrelevant. */ if (!(mask & VM_SHARED)) return 0; From 81fe88a946051f1dceef72fb3f87bb0880392464 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:10 +0800 Subject: [PATCH 0869/2157] mm/swap_state.c: fix the obsolete code comment Patch series "Tiny cleanup and improvements about SWAP code". These are all made during review and from reading the patchset "[PATCH v3 00/13] mm, swap: rework of swap allocator locks" from Kairui. This patch (of 12): Since commit 85a1333417a7 ("mm/swap: use dedicated entry for swap in folio"), there's a dedicated field in folio for swap entry. Let's update the code comment above add_to_swap_cache() accordingly. Link: https://lkml.kernel.org/r/20250205092721.9395-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20250205092721.9395-2-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Kairui Song Cc: Baoquan he Cc: Chris Li (Google) Signed-off-by: Andrew Morton --- mm/swap_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 2e1acb210e57..aabde86d1f47 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -85,7 +85,7 @@ void *get_shadow_from_swap_cache(swp_entry_t entry) /* * add_to_swap_cache resembles filemap_add_folio on swapper_space, - * but sets SwapCache flag and private instead of mapping and index. + * but sets SwapCache flag and 'swap' instead of mapping and index. */ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, gfp_t gfp, void **shadowp) From cd57a3fb37f91ef6106bc970d2b5c5878d3b5627 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:11 +0800 Subject: [PATCH 0870/2157] mm/swap_state.c: optimize the code in clear_shadow_from_swap_cache() Use ALIGN to achieve the same effect and simplify the code. Link: https://lkml.kernel.org/r/20250205092721.9395-3-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swap_state.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index aabde86d1f47..8a84371980e9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -270,9 +270,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin, xa_unlock_irq(&address_space->i_pages); /* search the next swapcache until we meet end */ - curr >>= SWAP_ADDRESS_SPACE_SHIFT; - curr++; - curr <<= SWAP_ADDRESS_SPACE_SHIFT; + curr = ALIGN((curr + 1), SWAP_ADDRESS_SPACE_PAGES); if (curr > end) break; } From 0eb7d2c337f9df3b6adbcbdce213595d94781e05 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:12 +0800 Subject: [PATCH 0871/2157] mm/swap: remove SWAP_FLAG_PRIO_SHIFT It doesn't make sense to have a zero value of shift. Remove it to avoid confusion. Link: https://lkml.kernel.org/r/20250205092721.9395-4-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 - mm/swapfile.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 9a48e79a0a52..bbd06cbd1f2b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -24,7 +24,6 @@ struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff -#define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ diff --git a/mm/swapfile.c b/mm/swapfile.c index df7c4e8b089c..3a17e40f4c95 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3453,8 +3453,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) - prio = - (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; + prio = swap_flags & SWAP_FLAG_PRIO_MASK; enable_swap_info(si, prio, swap_map, cluster_info, zeromap); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", From 9b9cba7289ba7e5076fbfe913860306b4672631c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:13 +0800 Subject: [PATCH 0872/2157] mm/swap: skip scanning cluster range if it's empty cluster Since ci->lock has been taken when isolating cluster from si->free_clusters or taking si->percpu_cluster->next[order], it's unnecessary to scan and check the cluster range availability if i'ts empty cluster, and this can accelerate the huge page swapping. Link: https://lkml.kernel.org/r/20250205092721.9395-5-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index 3a17e40f4c95..0a0c4118759e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -730,6 +730,9 @@ static bool cluster_scan_range(struct swap_info_struct *si, unsigned long offset, end = start + nr_pages; unsigned char *map = si->swap_map; + if (cluster_is_empty(ci)) + return true; + for (offset = start; offset < end; offset++) { switch (READ_ONCE(map[offset])) { case 0: From b4735d94c29f6a65362d3cce0d55aa65e0e319e3 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:14 +0800 Subject: [PATCH 0873/2157] mm/swap: rename swap_is_has_cache() to swap_only_has_cache() There are two predicates in the name of swap_is_has_cache() which is confusing. Renaming it to remove the confusion and can better reflect its functionality. Link: https://lkml.kernel.org/r/20250205092721.9395-6-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 0a0c4118759e..0a9c1efeffd6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -161,7 +161,7 @@ static long swap_usage_in_pages(struct swap_info_struct *si) /* Reclaim directly, bypass the slot cache and don't touch device lock */ #define TTRS_DIRECT 0x8 -static bool swap_is_has_cache(struct swap_info_struct *si, +static bool swap_only_has_cache(struct swap_info_struct *si, unsigned long offset, int nr_pages) { unsigned char *map = si->swap_map + offset; @@ -243,7 +243,7 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, * reference or pending writeback, and can't be allocated to others. */ ci = lock_cluster(si, offset); - need_reclaim = swap_is_has_cache(si, offset, nr_pages); + need_reclaim = swap_only_has_cache(si, offset, nr_pages); unlock_cluster(ci); if (!need_reclaim) goto out_unlock; @@ -1577,7 +1577,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry) return; ci = lock_cluster(si, offset); - if (swap_is_has_cache(si, offset, size)) + if (swap_only_has_cache(si, offset, size)) swap_entry_range_free(si, ci, entry, size); else { for (int i = 0; i < size; i++, entry.val++) { From f80ddc148ca6505060fbef6f8365ca151103e16f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:15 +0800 Subject: [PATCH 0874/2157] mm/swapfile.c: update the code comment above swap_count_continued() Now, swap_count_continued() has two callers, __swap_duplicate() and __swap_entry_free_locked(), the relevant code comment is stale. Update it to reflect the current situation. [bhe@redhat.com: v2] Link: https://lkml.kernel.org/r/Z6V0/UvG1fvkQ4t/@fedora Link: https://lkml.kernel.org/r/20250205092721.9395-7-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 0a9c1efeffd6..08debaacbbc0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3797,8 +3797,8 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. - * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster - * lock. + * Called while __swap_duplicate() or caller of __swap_entry_free_locked() + * holds cluster lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) From a46a6bc21c223b852ff324d7c7ef3da868b90fd0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:16 +0800 Subject: [PATCH 0875/2157] mm/swapfile.c: optimize code in setup_clusters() In the last 'for' loop inside setup_clusters(), using two local variable 'k' and 'j' are obvisouly redundant. Using 'j' is enough and simpler. And also move macro SWAP_CLUSTER_COLS close to its only user setup_clusters(). Link: https://lkml.kernel.org/r/20250205092721.9395-8-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 08debaacbbc0..cbee03aa74b8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3127,13 +3127,6 @@ static unsigned long read_swap_header(struct swap_info_struct *si, return maxpages; } -#define SWAP_CLUSTER_INFO_COLS \ - DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) -#define SWAP_CLUSTER_SPACE_COLS \ - DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) -#define SWAP_CLUSTER_COLS \ - max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) - static int setup_swap_map_and_extents(struct swap_info_struct *si, union swap_header *swap_header, unsigned char *swap_map, @@ -3173,13 +3166,20 @@ static int setup_swap_map_and_extents(struct swap_info_struct *si, return nr_extents; } +#define SWAP_CLUSTER_INFO_COLS \ + DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) +#define SWAP_CLUSTER_SPACE_COLS \ + DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) +#define SWAP_CLUSTER_COLS \ + max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) + static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, union swap_header *swap_header, unsigned long maxpages) { unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; - unsigned long i, j, k, idx; + unsigned long i, j, idx; int cpu, err = -ENOMEM; cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); @@ -3240,8 +3240,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, * Reduce false cache line sharing between cluster_info and * sharing same address space. */ - for (k = 0; k < SWAP_CLUSTER_COLS; k++) { - j = k % SWAP_CLUSTER_COLS; + for (j = 0; j < SWAP_CLUSTER_COLS; j++) { for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { struct swap_cluster_info *ci; idx = i * SWAP_CLUSTER_COLS + j; From e89c45c700e7d9ab4913ec98f1fe9f23d46c1397 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:17 +0800 Subject: [PATCH 0876/2157] mm/swap_state.c: remove the meaningless code comment Since commit 8d93b41c09d1 ("mm: Convert add_to_swap_cache to XArray"), there's no returned _EEXIT, so the code comment doesn't make sense any more. Link: https://lkml.kernel.org/r/20250205092721.9395-9-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swap_state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 8a84371980e9..718a8de0c07d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -203,10 +203,6 @@ bool add_to_swap(struct folio *folio) err = add_to_swap_cache(folio, entry, __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); if (err) - /* - * add_to_swap_cache() doesn't return -EEXIST, so we can safely - * clear SWAP_HAS_CACHE flag. - */ goto fail; /* * Normally the folio will be dirtied in unmap because its From ac2d3268284ba4e254e4ca346bf6d63fb8a17518 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:18 +0800 Subject: [PATCH 0877/2157] mm/swapfile.c: remove the unneeded checking In free_swap_and_cache_nr(), invocation of get_swap_device() has done the checking if it's a swap entry. So remove the redundant checking here. Link: https://lkml.kernel.org/r/20250205092721.9395-10-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index cbee03aa74b8..45d25f170660 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1789,9 +1789,6 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr) bool any_only_cache = false; unsigned long offset; - if (non_swap_entry(entry)) - return; - si = get_swap_device(entry); if (!si) return; From c523aa890760b004eea47a0e00b5750a528f3ced Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:19 +0800 Subject: [PATCH 0878/2157] mm/swap: rename swap_swapcount() to swap_entry_swapped() The new function name can reflect the real behaviour of the function more clearly and more accurately. And the renaming avoids the confusion between swap_swapcount() and swp_swapcount(). Link: https://lkml.kernel.org/r/20250205092721.9395-11-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- include/linux/swap.h | 6 +++--- mm/swap_state.c | 2 +- mm/swapfile.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index bbd06cbd1f2b..2fe91c293636 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -499,7 +499,7 @@ int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); -extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); +extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; @@ -582,9 +582,9 @@ static inline int __swap_count(swp_entry_t entry) return 0; } -static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { - return 0; + return false; } static inline int swp_swapcount(swp_entry_t entry) diff --git a/mm/swap_state.c b/mm/swap_state.c index 718a8de0c07d..a54b035d6a6c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -457,7 +457,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * as SWAP_HAS_CACHE. That's done in later part of code or * else swap_off will be aborted if we return NULL. */ - if (!swap_swapcount(si, entry) && swap_slot_cache_enabled) + if (!swap_entry_swapped(si, entry) && swap_slot_cache_enabled) goto put_and_return; /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 45d25f170660..d45349ed3ee1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1620,7 +1620,7 @@ int __swap_count(swp_entry_t entry) * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ -int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; @@ -1629,7 +1629,7 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); unlock_cluster(ci); - return count; + return !!count; } /* @@ -1715,7 +1715,7 @@ static bool folio_swapped(struct folio *folio) return false; if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio))) - return swap_swapcount(si, entry) != 0; + return swap_entry_swapped(si, entry); return swap_page_trans_huge_swapped(si, entry, folio_order(folio)); } From 4ccd4154fafff5516103051bc9162c33d832b880 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:20 +0800 Subject: [PATCH 0879/2157] mm/swapfile.c: remove the incorrect code comment Since commit eb085574a752 ("mm, swap: fix race between swapoff and some swap operations"), the non_swap_entry() checking has been taken off from function __swap_duplicate(). Hence, in the kernel-doc comment, the line 'swp_entry is migration entry -> EINVAL' is obsolete. Remove that line to avoid misleading people. Link: https://lkml.kernel.org/r/20250205092721.9395-12-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index d45349ed3ee1..c73e258bb588 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3529,7 +3529,6 @@ void si_swapinfo(struct sysinfo *val) * Returns error code in following case. * - success -> 0 * - swp_entry is invalid -> EINVAL - * - swp_entry is migration entry -> EINVAL * - swap-cache reference is requested but there is already one. -> EEXIST * - swap-cache reference is requested but the entry is not used. -> ENOENT * - swap-mapped reference requested but needs continued swap count. -> ENOMEM From 1d212293ffd145eebd795d5969a81a3b59f71bcb Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 5 Feb 2025 17:27:21 +0800 Subject: [PATCH 0880/2157] mm/swapfile.c: open code cluster_alloc_swap() It's only called in scan_swap_map_slots(). And also remove the stale code comment in scan_swap_map_slots() because it's not fit for the current cluster allocation mechanism. Link: https://lkml.kernel.org/r/20250205092721.9395-13-bhe@redhat.com Signed-off-by: Baoquan He Cc: Chris Li Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index c73e258bb588..cab68e57f4cc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1163,39 +1163,13 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_usage_sub(si, nr_entries); } -static int cluster_alloc_swap(struct swap_info_struct *si, - unsigned char usage, int nr, - swp_entry_t slots[], int order) -{ - int n_ret = 0; - - while (n_ret < nr) { - unsigned long offset = cluster_alloc_swap_entry(si, order, usage); - - if (!offset) - break; - slots[n_ret++] = swp_entry(si->type, offset); - } - - return n_ret; -} - static int scan_swap_map_slots(struct swap_info_struct *si, unsigned char usage, int nr, swp_entry_t slots[], int order) { unsigned int nr_pages = 1 << order; + int n_ret = 0; - /* - * We try to cluster swap pages by allocating them sequentially - * in swap. Once we've allocated SWAPFILE_CLUSTER pages this - * way, however, we resort to first-free allocation, starting - * a new cluster. This prevents us from scattering swap pages - * all over the entire swap partition, so that we reduce - * overall disk seek times between swap pages. -- sct - * But we do now try to find an empty cluster. -Andrea - * And we let swap pages go all over an SSD partition. Hugh - */ if (order > 0) { /* * Should not even be attempting large allocations when huge @@ -1215,7 +1189,15 @@ static int scan_swap_map_slots(struct swap_info_struct *si, return 0; } - return cluster_alloc_swap(si, usage, nr, slots, order); + while (n_ret < nr) { + unsigned long offset = cluster_alloc_swap_entry(si, order, usage); + + if (!offset) + break; + slots[n_ret++] = swp_entry(si->type, offset); + } + + return n_ret; } static bool get_swap_device_info(struct swap_info_struct *si) From 9a5b183941b52f84c0f9e5f27ce44e99318c9e0f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Feb 2025 13:26:33 +0100 Subject: [PATCH 0881/2157] mm, percpu: do not consider sleepable allocations atomic 28307d938fb2 ("percpu: make pcpu_alloc() aware of current gfp context") has fixed a reclaim recursion for scoped GFP_NOFS context. It has done that by avoiding taking pcpu_alloc_mutex. This is a correct solution as the worker context with full GFP_KERNEL allocation/reclaim power and which is using the same lock cannot block the NOFS pcpu_alloc caller. On the other hand this is a very conservative approach that could lead to failures because pcpu_alloc lockless implementation is quite limited. We have a bug report about premature failures when scsi array of 193 devices is scanned. Sometimes (not consistently) the scanning aborts because the iscsid daemon fails to create the queue for a random scsi device during the scan. iscsid itslef is running with PR_SET_IO_FLUSHER set so all allocations from this process context are GFP_NOIO. This in turn makes any pcpu_alloc lockless (without pcpu_alloc_mutex) which leads to pre-mature failures. It has turned out that iscsid has worked around this by dropping PR_SET_IO_FLUSHER (https://github.com/open-iscsi/open-iscsi/pull/382) when scanning host. But we can do better in this case on the kernel side and use pcpu_alloc_mutex for NOIO resp. NOFS constrained allocation scopes too. We just need the WQ worker to never trigger IO/FS reclaim. Achieve that by enforcing scoped GFP_NOIO for the whole execution of pcpu_balance_workfn (this will imply NOFS constrain as well). This will remove the dependency chain and preserve the full allocation power of the pcpu_alloc call. While at it make is_atomic really test for blockable allocations. Link: https://lkml.kernel.org/r/20250206122633.167896-1-mhocko@kernel.org Fixes: 28307d938fb2 ("percpu: make pcpu_alloc() aware of current gfp context") Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Dennis Zhou Cc: Filipe David Manana Cc: Tejun Heo Signed-off-by: Andrew Morton --- mm/percpu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index ac61e3fc5f15..027fb6497495 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1745,7 +1745,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, gfp = current_gfp_context(gfp); /* whitelisted flags that can be passed to the backing allocators */ pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; + is_atomic = !gfpflags_allow_blocking(gfp); do_warn = !(gfp & __GFP_NOWARN); /* @@ -2191,7 +2191,12 @@ static void pcpu_balance_workfn(struct work_struct *work) * to grow other chunks. This then gives pcpu_reclaim_populated() time * to move fully free chunks to the active list to be freed if * appropriate. + * + * Enforce GFP_NOIO allocations because we have pcpu_alloc users + * constrained to GFP_NOIO/NOFS contexts and they could form lock + * dependency through pcpu_alloc_mutex */ + unsigned int flags = memalloc_noio_save(); mutex_lock(&pcpu_alloc_mutex); spin_lock_irq(&pcpu_lock); @@ -2202,6 +2207,7 @@ static void pcpu_balance_workfn(struct work_struct *work) spin_unlock_irq(&pcpu_lock); mutex_unlock(&pcpu_alloc_mutex); + memalloc_noio_restore(flags); } /** From 7ddeb91f5b03f25995861e9b2e1eb766aa528892 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Feb 2025 11:45:36 +0000 Subject: [PATCH 0882/2157] mm: kmemleak: add support for dumping physical and __percpu object info Patch series "mm: kmemleak: Usability improvements". Following a recent false positive tracking that led to commit 488b5b9eca68 ("mm: kmemleak: fix upper boundary check for physical address objects"), I needed kmemleak to give me more debug information about the objects it is tracking. This lead to the first patch of this series. The second patch changes the kmemleak-test module to show the raw pointers for debugging purposes. This patch (of 2): Currently, echo dump=... > /sys/kernel/debug/kmemleak only looks up the main virtual address object tree. However, for debugging, it's useful to dump information about physical address and __percpu objects. Search all three object trees for the dump= command and also print the type of the object if not virtual: "(phys)" or "(percpu)". In addition, allow search by alias (pointer within the object). Link: https://lkml.kernel.org/r/20250206114537.2597764-1-catalin.marinas@arm.com Link: https://lkml.kernel.org/r/20250206114537.2597764-2-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Andrew Morton --- mm/kmemleak.c | 54 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c6ed68604136..c12cef3eeb32 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -352,6 +352,15 @@ static bool unreferenced_object(struct kmemleak_object *object) jiffies_last_scan); } +static const char *__object_type_str(struct kmemleak_object *object) +{ + if (object->flags & OBJECT_PHYS) + return " (phys)"; + if (object->flags & OBJECT_PERCPU) + return " (percpu)"; + return ""; +} + /* * Printing of the unreferenced objects information to the seq file. The * print_unreferenced function must be called with the object->lock held. @@ -364,8 +373,9 @@ static void print_unreferenced(struct seq_file *seq, unsigned int nr_entries; nr_entries = stack_depot_fetch(object->trace_handle, &entries); - warn_or_seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", - object->pointer, object->size); + warn_or_seq_printf(seq, "unreferenced object%s 0x%08lx (size %zu):\n", + __object_type_str(object), + object->pointer, object->size); warn_or_seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", object->comm, object->pid, object->jiffies); hex_dump_object(seq, object); @@ -384,10 +394,10 @@ static void print_unreferenced(struct seq_file *seq, */ static void dump_object_info(struct kmemleak_object *object) { - pr_notice("Object 0x%08lx (size %zu):\n", - object->pointer, object->size); + pr_notice("Object%s 0x%08lx (size %zu):\n", + __object_type_str(object), object->pointer, object->size); pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", - object->comm, object->pid, object->jiffies); + object->comm, object->pid, object->jiffies); pr_notice(" min_count = %d\n", object->min_count); pr_notice(" count = %d\n", object->count); pr_notice(" flags = 0x%x\n", object->flags); @@ -1998,25 +2008,41 @@ static int kmemleak_open(struct inode *inode, struct file *file) return seq_open(file, &kmemleak_seq_ops); } -static int dump_str_object_info(const char *str) +static bool __dump_str_object_info(unsigned long addr, unsigned int objflags) { unsigned long flags; struct kmemleak_object *object; - unsigned long addr; - if (kstrtoul(str, 0, &addr)) - return -EINVAL; - object = find_and_get_object(addr, 0); - if (!object) { - pr_info("Unknown object at 0x%08lx\n", addr); - return -EINVAL; - } + object = __find_and_get_object(addr, 1, objflags); + if (!object) + return false; raw_spin_lock_irqsave(&object->lock, flags); dump_object_info(object); raw_spin_unlock_irqrestore(&object->lock, flags); put_object(object); + + return true; +} + +static int dump_str_object_info(const char *str) +{ + unsigned long addr; + bool found = false; + + if (kstrtoul(str, 0, &addr)) + return -EINVAL; + + found |= __dump_str_object_info(addr, 0); + found |= __dump_str_object_info(addr, OBJECT_PHYS); + found |= __dump_str_object_info(addr, OBJECT_PERCPU); + + if (!found) { + pr_info("Unknown object at 0x%08lx\n", addr); + return -EINVAL; + } + return 0; } From fe1136b4ccbfac9b8e72d4551d1ce788a67d59cb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Feb 2025 11:45:37 +0000 Subject: [PATCH 0883/2157] samples: kmemleak: print the raw pointers for debugging purposes The kmemleak-test.c module is meant to leak some pointers for debugging the kmemleak detection, pointer information dumping. It's no use if it prints the hashed values of such pointers. Change the printk() format from %p to %px. While at it, also display the raw __percpu pointer rather than this_cpu_ptr() since kmemleak now tracks such pointers independently of the standard allocations. Link: https://lkml.kernel.org/r/20250206114537.2597764-3-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Andrew Morton --- samples/kmemleak/kmemleak-test.c | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c index 544c36d51d56..8609812a37eb 100644 --- a/samples/kmemleak/kmemleak-test.c +++ b/samples/kmemleak/kmemleak-test.c @@ -40,25 +40,25 @@ static int kmemleak_test_init(void) pr_info("Kmemleak testing\n"); /* make some orphan objects */ - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); #ifndef CONFIG_MODULES - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); #endif - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); /* * Add elements to a list. They should only appear as orphan @@ -66,7 +66,7 @@ static int kmemleak_test_init(void) */ for (i = 0; i < 10; i++) { elem = kzalloc(sizeof(*elem), GFP_KERNEL); - pr_info("kzalloc(sizeof(*elem)) = %p\n", elem); + pr_info("kzalloc(sizeof(*elem)) = 0x%px\n", elem); if (!elem) return -ENOMEM; INIT_LIST_HEAD(&elem->list); @@ -75,11 +75,11 @@ static int kmemleak_test_init(void) for_each_possible_cpu(i) { per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL); - pr_info("kmalloc(129) = %p\n", + pr_info("kmalloc(129) = 0x%px\n", per_cpu(kmemleak_test_pointer, i)); } - pr_info("__alloc_percpu(64, 4) = %p\n", __alloc_percpu(64, 4)); + pr_info("__alloc_percpu(64, 4) = 0x%px\n", __alloc_percpu(64, 4)); return 0; } From 3a06696305e757f652dd0dcf4dfa2272eda39434 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Fri, 7 Feb 2025 13:20:32 -0800 Subject: [PATCH 0884/2157] mm/damon/ops: have damon_get_folio return folio even for tail pages Patch series "mm/damon/paddr: fix large folios access and schemes handling". DAMON operations set for physical address space, namely 'paddr', treats tail pages as unaccessed always. It can also apply DAMOS action to a large folio multiple times within single DAMOS' regions walking. As a result, the monitoring output has poor quality and DAMOS works in unexpected ways when large folios are being used. Fix those. The patches were parts of Usama's hugepage_size DAMOS filter patch series[1]. The first fix has collected from there with a slight commit message change for the subject prefix. The second fix is re-written by SJ and posted as an RFC before this series. The second one also got a slight commit message change for the subject prefix. [1] https://lore.kernel.org/20250203225604.44742-1-usamaarif642@gmail.com [2] https://lore.kernel.org/20250206231103.38298-1-sj@kernel.org This patch (of 2): This effectively adds support for large folios in damon for paddr, as damon_pa_mkold/young won't get a null folio from this function and won't ignore it, hence access will be checked and reported. This also means that larger folios will be considered for different DAMOS actions like pageout, prioritization and migration. As these DAMOS actions will consider larger folios, iterate through the region at folio_size and not PAGE_SIZE intervals. This should not have an affect on vaddr, as damon_young_pmd_entry considers pmd entries. Link: https://lkml.kernel.org/r/20250207212033.45269-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250207212033.45269-2-sj@kernel.org Fixes: a28397beb55b ("mm/damon: implement primitives for physical address space monitoring") Signed-off-by: Usama Arif Signed-off-by: SeongJae Park Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/ops-common.c | 2 +- mm/damon/paddr.c | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 86a50e8fbc80..0db1fc70c84d 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -26,7 +26,7 @@ struct folio *damon_get_folio(unsigned long pfn) struct page *page = pfn_to_online_page(pfn); struct folio *folio; - if (!page || PageTail(page)) + if (!page) return NULL; folio = page_folio(page); diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 1a89920efce9..2ac19ebc7076 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -277,11 +277,14 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, damos_add_filter(s, filter); } - for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + addr = r->ar.start; + while (addr < r->ar.end) { struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - if (!folio) + if (!folio) { + addr += PAGE_SIZE; continue; + } if (damos_pa_filter_out(s, folio)) goto put_folio; @@ -297,6 +300,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, else list_add(&folio->lru, &folio_list); put_folio: + addr += folio_size(folio); folio_put(folio); } if (install_young_filter) @@ -312,11 +316,14 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate( { unsigned long addr, applied = 0; - for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + addr = r->ar.start; + while (addr < r->ar.end) { struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - if (!folio) + if (!folio) { + addr += PAGE_SIZE; continue; + } if (damos_pa_filter_out(s, folio)) goto put_folio; @@ -329,6 +336,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate( folio_deactivate(folio); applied += folio_nr_pages(folio); put_folio: + addr += folio_size(folio); folio_put(folio); } return applied * PAGE_SIZE; @@ -475,11 +483,14 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, unsigned long addr, applied; LIST_HEAD(folio_list); - for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + addr = r->ar.start; + while (addr < r->ar.end) { struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - if (!folio) + if (!folio) { + addr += PAGE_SIZE; continue; + } if (damos_pa_filter_out(s, folio)) goto put_folio; @@ -490,6 +501,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, goto put_folio; list_add(&folio->lru, &folio_list); put_folio: + addr += folio_size(folio); folio_put(folio); } applied = damon_pa_migrate_pages(&folio_list, s->target_nid); From 94ba17adaba0f651fdcf745c8891a88e2e028cfa Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 7 Feb 2025 13:20:33 -0800 Subject: [PATCH 0885/2157] mm/damon: avoid applying DAMOS action to same entity multiple times 'paddr' DAMON operations set can apply a DAMOS scheme's action to a large folio multiple times in single DAMOS-regions-walk if the folio is laid on multiple DAMON regions. Add a field for DAMOS scheme object that can be used by the underlying ops to know what was the last entity that the scheme's action has applied. The core layer unsets the field when each DAMOS-regions-walk is done for the given scheme. And update 'paddr' ops to use the infrastructure to avoid the problem. Link: https://lkml.kernel.org/r/20250207212033.45269-3-sj@kernel.org Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme") Signed-off-by: SeongJae Park Reported-by: Usama Arif Closes: https://lore.kernel.org/20250203225604.44742-3-usamaarif642@gmail.com Cc: Signed-off-by: Andrew Morton --- include/linux/damon.h | 11 +++++++++++ mm/damon/core.c | 1 + mm/damon/paddr.c | 39 +++++++++++++++++++++++++++------------ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index c9074d569596..b4d37d9b9221 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -432,6 +432,7 @@ struct damos_access_pattern { * @wmarks: Watermarks for automated (in)activation of this scheme. * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. + * @last_applied: Last @action applied ops-managing entity. * @stat: Statistics of this scheme. * @list: List head for siblings. * @@ -454,6 +455,15 @@ struct damos_access_pattern { * implementation could check pages of the region and skip &action to respect * &filters * + * The minimum entity that @action can be applied depends on the underlying + * &struct damon_operations. Since it may not be aligned with the core layer + * abstract, namely &struct damon_region, &struct damon_operations could apply + * @action to same entity multiple times. Large folios that underlying on + * multiple &struct damon region objects could be such examples. The &struct + * damon_operations can use @last_applied to avoid that. DAMOS core logic + * unsets @last_applied when each regions walking for applying the scheme is + * finished. + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -482,6 +492,7 @@ struct damos { int target_nid; }; struct list_head filters; + void *last_applied; struct damos_stat stat; struct list_head list; }; diff --git a/mm/damon/core.c b/mm/damon/core.c index 384935ef4e65..dc8f94fe7c3b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1856,6 +1856,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c) s->next_apply_sis = c->passed_sample_intervals + (s->apply_interval_us ? s->apply_interval_us : c->attrs.aggr_interval) / sample_interval; + s->last_applied = NULL; } } diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 2ac19ebc7076..eb10a723b0a7 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -254,6 +254,17 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) return false; } +static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s) +{ + if (!folio) + return true; + if (folio == s->last_applied) { + folio_put(folio); + return true; + } + return false; +} + static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, unsigned long *sz_filter_passed) { @@ -261,6 +272,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, LIST_HEAD(folio_list); bool install_young_filter = true; struct damos_filter *filter; + struct folio *folio; /* check access in page level again by default */ damos_for_each_filter(filter, s) { @@ -279,9 +291,8 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, addr = r->ar.start; while (addr < r->ar.end) { - struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - - if (!folio) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { addr += PAGE_SIZE; continue; } @@ -307,6 +318,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, damos_destroy_filter(filter); applied = reclaim_pages(&folio_list); cond_resched(); + s->last_applied = folio; return applied * PAGE_SIZE; } @@ -315,12 +327,12 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate( unsigned long *sz_filter_passed) { unsigned long addr, applied = 0; + struct folio *folio; addr = r->ar.start; while (addr < r->ar.end) { - struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - - if (!folio) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { addr += PAGE_SIZE; continue; } @@ -339,6 +351,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate( addr += folio_size(folio); folio_put(folio); } + s->last_applied = folio; return applied * PAGE_SIZE; } @@ -482,12 +495,12 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, { unsigned long addr, applied; LIST_HEAD(folio_list); + struct folio *folio; addr = r->ar.start; while (addr < r->ar.end) { - struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - - if (!folio) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { addr += PAGE_SIZE; continue; } @@ -506,6 +519,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, } applied = damon_pa_migrate_pages(&folio_list, s->target_nid); cond_resched(); + s->last_applied = folio; return applied * PAGE_SIZE; } @@ -523,15 +537,15 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s, { unsigned long addr; LIST_HEAD(folio_list); + struct folio *folio; if (!damon_pa_scheme_has_filter(s)) return 0; addr = r->ar.start; while (addr < r->ar.end) { - struct folio *folio = damon_get_folio(PHYS_PFN(addr)); - - if (!folio) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { addr += PAGE_SIZE; continue; } @@ -541,6 +555,7 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s, addr += folio_size(folio); folio_put(folio); } + s->last_applied = folio; return 0; } From e92b6e7bb6189d688ab8f9b27e3992cd8568ee4b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 7 Feb 2025 17:24:42 +0000 Subject: [PATCH 0886/2157] mm: use READ/WRITE_ONCE() for vma->vm_flags on migrate, mprotect According to the syzbot report referenced here, it is possible to encounter a race between mprotect() writing to the vma->vm_flags field and migration checking whether the VMA is locked. There is no real problem with timing here per se, only that torn reads/writes may occur. Therefore, as a proximate fix, ensure both operations READ_ONCE() and WRITE_ONCE() to avoid this. This race is possible due to the ability to look up VMAs via the rmap, which migration does in this case, which takes no mmap or VMA lock and therefore does not preclude an operation to modify a VMA. When the final update of VMA flags is performed by mprotect, this will cause the rmap lock to be taken while the VMA is inserted on split/merge. However the means by which we perform splits/merges in the kernel is that we perform the split/merge operation on the VMA, acquiring/releasing locks as needed, and only then, after having done so, modifying fields. We should carefully examine and determine whether we can combine the two operations so as to avoid such races, and whether it might be possible to otherwise annotate these rmap field accesses. Link: https://lkml.kernel.org/r/20250207172442.78836-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+c2e5712cbb14c95d4847@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67a34e60.050a0220.50516.0040.GAE@google.com/ Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/migrate.c | 2 +- mm/mprotect.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 97f0edf0c032..a991d3691bda 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -328,7 +328,7 @@ static bool remove_migration_pte(struct folio *folio, folio_add_file_rmap_pte(folio, new, vma); set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } - if (vma->vm_flags & VM_LOCKED) + if (READ_ONCE(vma->vm_flags) & VM_LOCKED) mlock_drain_local(); trace_remove_migration_pte(pvmw.address, pte_val(pte), diff --git a/mm/mprotect.c b/mm/mprotect.c index 9cb6ab7c4048..1444878f7aeb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -599,7 +599,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, unsigned long start, unsigned long end, unsigned long newflags) { struct mm_struct *mm = vma->vm_mm; - unsigned long oldflags = vma->vm_flags; + unsigned long oldflags = READ_ONCE(vma->vm_flags); long nrpages = (end - start) >> PAGE_SHIFT; unsigned int mm_cp_flags = 0; unsigned long charged = 0; @@ -619,7 +619,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, * uncommon case, so doesn't need to be very optimized. */ if (arch_has_pfn_modify_check() && - (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && + (oldflags & (VM_PFNMAP|VM_MIXEDMAP)) && (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); @@ -668,7 +668,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, * held in write mode. */ vma_start_write(vma); - vm_flags_reset(vma, newflags); + vm_flags_reset_once(vma, newflags); if (vma_wants_manual_pte_write_upgrade(vma)) mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); From 8d9a2f5d8abd3ba4355f25e60827c1ee082cd215 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 7 Feb 2025 10:04:53 +0000 Subject: [PATCH 0887/2157] mm/mm_init.c: use round_up() to align movable range Since MAX_ORDER_NR_PAGES is power of 2, let's use a faster version. Link: https://lkml.kernel.org/r/20250207100453.9989-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Anshuman Khandual Reviewed-by: Mike Rapoport (Microsoft) Signed-off-by: Andrew Morton --- mm/mm_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 2630cc30147e..de18d3ad12e1 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -438,7 +438,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) * was requested by the user */ required_movablecore = - roundup(required_movablecore, MAX_ORDER_NR_PAGES); + round_up(required_movablecore, MAX_ORDER_NR_PAGES); required_movablecore = min(totalpages, required_movablecore); corepages = totalpages - required_movablecore; @@ -549,7 +549,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) unsigned long start_pfn, end_pfn; zone_movable_pfn[nid] = - roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + round_up(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); if (zone_movable_pfn[nid] >= end_pfn) From c32696ca5e8e9fff83c951f3aa45cac2e97b0667 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 10 Feb 2025 10:27:34 -0800 Subject: [PATCH 0888/2157] mm/damon/core: unset damos->walk_completed after confimed set Patch series "mm/damon/core: fix wrong and/or useless damos_walk() behaviors". damos_walk() can finish working earlier or later than expected, and start earlier than practical. First two behaviors are clearly wrong behavior (doesn't follow the documentation) and all three behaviors are only making the feature useless. Fix those. This patch (of 3): damos->walk_completed is only set, not unset. This can cause next damos_walk() finish earlier than expected. Unset it after all walk_completed is confirmed. Link: https://lkml.kernel.org/r/20250210182737.134994-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250210182737.134994-2-sj@kernel.org Fixes: bf0eaba0ff9c ("mm/damon/core: implement damos_walk()") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index dc8f94fe7c3b..8e4ae9901b19 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1494,6 +1494,9 @@ static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) if (!siter->walk_completed) return; } + damon_for_each_scheme(siter, ctx) + siter->walk_completed = false; + complete(&control->completion); mutex_lock(&ctx->walk_control_lock); ctx->walk_control = NULL; From 40eb655b410d5c842313e556f743888033687865 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 10 Feb 2025 10:27:35 -0800 Subject: [PATCH 0889/2157] mm/damon/core: do not call damos_walk_control->walk() if walk is completed damos_walk() invokes callback functions of schemes until all schemes finishes at least one round of walks. If there are multiple DAMOS schemes having different apply_interval, the callback functions for longer apply interval scheme will be called for more than a round of the walk. The behavior is different from the document (see damos_walk() kernel-doc comment), and not useful. Make the behavior be same to the documented one, by stopping invoking the callback if the walk for the given scheme is completed. Link: https://lkml.kernel.org/r/20250210182737.134994-3-sj@kernel.org Fixes: bf0eaba0ff9c ("mm/damon/core: implement damos_walk()") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 8e4ae9901b19..e129fb785970 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1458,6 +1458,9 @@ static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t, { struct damos_walk_control *control; + if (s->walk_completed) + return; + mutex_lock(&ctx->walk_control_lock); control = ctx->walk_control; mutex_unlock(&ctx->walk_control_lock); From 6fa70372c86162608c522eeaa58201d6c11ab773 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 10 Feb 2025 10:27:36 -0800 Subject: [PATCH 0890/2157] mm/damon/core: do damos walking in entire regions granularity damos_walk_control can be installed while DAMOS is walking the regions. This means the walk callback function invocations can be started from a region at the middle of the regions list. This makes it hard to be used reliably. Particularly, DAMOS tried regions update for collecting monitoring results gets problematic results. Increase the walk_control_lock critical section to do walking in entire regions granularity. Link: https://lkml.kernel.org/r/20250210182737.134994-4-sj@kernel.org Fixes: bf0eaba0ff9c ("mm/damon/core: implement damos_walk()") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index e129fb785970..f663c8e99dfa 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1461,11 +1461,10 @@ static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t, if (s->walk_completed) return; - mutex_lock(&ctx->walk_control_lock); control = ctx->walk_control; - mutex_unlock(&ctx->walk_control_lock); if (!control) return; + control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed); } @@ -1485,9 +1484,7 @@ static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) struct damos *siter; struct damos_walk_control *control; - mutex_lock(&ctx->walk_control_lock); control = ctx->walk_control; - mutex_unlock(&ctx->walk_control_lock); if (!control) return; @@ -1501,9 +1498,7 @@ static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) siter->walk_completed = false; complete(&control->completion); - mutex_lock(&ctx->walk_control_lock); ctx->walk_control = NULL; - mutex_unlock(&ctx->walk_control_lock); } /* @@ -1850,6 +1845,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c) if (!has_schemes_to_apply) return; + mutex_lock(&c->walk_control_lock); damon_for_each_target(t, c) { damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); @@ -1864,6 +1860,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c) c->attrs.aggr_interval) / sample_interval; s->last_applied = NULL; } + mutex_unlock(&c->walk_control_lock); } /* From 6e80c0aaad469e0a923ea0d7018fb1464e992018 Mon Sep 17 00:00:00 2001 From: Bertrand Wlodarczyk Date: Mon, 10 Feb 2025 17:07:49 +0100 Subject: [PATCH 0891/2157] vmscan, cleanup: add for_each_managed_zone_pgdat macro The macro is introduced to eliminate redundancy in the repeated iteration over managed zones in pgdat data structure, reducing the potential for errors. This change doesn't introduce any functional modifications. Due to concentration of the pattern in vmscan.c the macro is placed locally in that file. Link: https://lkml.kernel.org/r/20250210160818.686-1-bertrand.wlodarczyk@intel.com Signed-off-by: Bertrand Wlodarczyk Reviewed-by: Tim Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Cc: Michal Hocko Signed-off-by: Andrew Morton --- mm/vmscan.c | 83 +++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bc1826020159..fcca38bc640f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -271,6 +271,25 @@ static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg) } #endif +/* for_each_managed_zone_pgdat - helper macro to iterate over all managed zones in a pgdat up to + * and including the specified highidx + * @zone: The current zone in the iterator + * @pgdat: The pgdat which node_zones are being iterated + * @idx: The index variable + * @highidx: The index of the highest zone to return + * + * This macro iterates through all managed zones up to and including the specified highidx. + * The zone iterator enters an invalid state after macro call and must be reinitialized + * before it can be used again. + */ +#define for_each_managed_zone_pgdat(zone, pgdat, idx, highidx) \ + for ((idx) = 0, (zone) = (pgdat)->node_zones; \ + (idx) <= (highidx); \ + (idx)++, (zone)++) \ + if (!managed_zone(zone)) \ + continue; \ + else + static void set_task_reclaim_state(struct task_struct *task, struct reclaim_state *rs) { @@ -396,13 +415,9 @@ static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, { unsigned long size = 0; int zid; + struct zone *zone; - for (zid = 0; zid <= zone_idx; zid++) { - struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; - - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, lruvec_pgdat(lruvec), zid, zone_idx) { if (!mem_cgroup_disabled()) size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid); else @@ -495,7 +510,7 @@ static bool skip_throttle_noprogress(pg_data_t *pgdat) { int reclaimable = 0, write_pending = 0; int i; - + struct zone *zone; /* * If kswapd is disabled, reschedule if necessary but do not * throttle as the system is likely near OOM. @@ -508,12 +523,7 @@ static bool skip_throttle_noprogress(pg_data_t *pgdat) * throttle as throttling will occur when the folios cycle * towards the end of the LRU if still under writeback. */ - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, i, MAX_NR_ZONES - 1) { reclaimable += zone_reclaimable_pages(zone); write_pending += zone_page_state_snapshot(zone, NR_ZONE_WRITE_PENDING); @@ -2372,17 +2382,13 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc) unsigned long total_high_wmark = 0; unsigned long free, anon; int z; + struct zone *zone; free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); file = node_page_state(pgdat, NR_ACTIVE_FILE) + node_page_state(pgdat, NR_INACTIVE_FILE); - for (z = 0; z < MAX_NR_ZONES; z++) { - struct zone *zone = &pgdat->node_zones[z]; - - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, z, MAX_NR_ZONES - 1) { total_high_wmark += high_wmark_pages(zone); } @@ -5851,6 +5857,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long pages_for_compaction; unsigned long inactive_lru_pages; int z; + struct zone *zone; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -5870,11 +5877,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return false; /* If compaction would go ahead or the allocation would succeed, stop */ - for (z = 0; z <= sc->reclaim_idx; z++) { - struct zone *zone = &pgdat->node_zones[z]; - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, z, sc->reclaim_idx) { /* Allocation can already succeed, nothing to do */ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), sc->reclaim_idx, 0)) @@ -6401,11 +6404,7 @@ static bool allow_direct_reclaim(pg_data_t *pgdat) if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) return true; - for (i = 0; i <= ZONE_NORMAL; i++) { - zone = &pgdat->node_zones[i]; - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, i, ZONE_NORMAL) { if (!zone_reclaimable_pages(zone)) continue; @@ -6710,12 +6709,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * Check watermarks bottom-up as lower zones are more likely to * meet watermarks. */ - for (i = 0; i <= highest_zoneidx; i++) { - zone = pgdat->node_zones + i; - - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) mark = promo_wmark_pages(zone); else @@ -6800,11 +6794,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat, /* Reclaim a number of pages proportional to the number of zones */ sc->nr_to_reclaim = 0; - for (z = 0; z <= sc->reclaim_idx; z++) { - zone = pgdat->node_zones + z; - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, z, sc->reclaim_idx) { sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); } @@ -6835,12 +6825,7 @@ update_reclaim_active(pg_data_t *pgdat, int highest_zoneidx, bool active) int i; struct zone *zone; - for (i = 0; i <= highest_zoneidx; i++) { - zone = pgdat->node_zones + i; - - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { if (active) set_bit(ZONE_RECLAIM_ACTIVE, &zone->flags); else @@ -6901,11 +6886,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) * stall or direct reclaim until kswapd is finished. */ nr_boost_reclaim = 0; - for (i = 0; i <= highest_zoneidx; i++) { - zone = pgdat->node_zones + i; - if (!managed_zone(zone)) - continue; - + for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { nr_boost_reclaim += zone->watermark_boost; zone_boosts[i] = zone->watermark_boost; } From 67254c7d70b63797a54173fbe05cd6552aca11d4 Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Mon, 10 Feb 2025 02:10:23 +0800 Subject: [PATCH 0892/2157] maple_tree: correct comment for mas_start() There's no mas->status of "mas_start", what the function is checking is whether mas->status equals to "ma_start". Correct the comment for the function. Link: https://lkml.kernel.org/r/20250209181023.228856-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index f7153ade1be5..42c65974a56c 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1353,7 +1353,7 @@ static void mas_node_count(struct ma_state *mas, int count) * mas_start() - Sets up maple state for operations. * @mas: The maple state. * - * If mas->status == mas_start, then set the min, max and depth to + * If mas->status == ma_start, then set the min, max and depth to * defaults. * * Return: From c2661f5fe888c14eb1f09ed23e74213366fa70f3 Mon Sep 17 00:00:00 2001 From: David Laight Date: Sun, 9 Feb 2025 17:47:11 +0000 Subject: [PATCH 0893/2157] mm: remove the access_ok() call from gup_fast_fallback() Historiaclly the code relied on access_ok() to validate the address range. Commit 26f4c328079d7 added an explicit wrap check before access_ok(). Commit c28b1fc70390d then changed the wrap test to use check_add_overflow(). Commit 6014bc27561f2 relaxed the checks in x86-64's access_ok() and added an explicit check for TASK_SIZE here to make up for it. That left a pointless access_ok() call with its associated 'lfence' that can never actually fail. So just delete the test. Link: https://lkml.kernel.org/r/20250209174711.60889-1-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Cc: Thomas Gleixner Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jan Kara Cc: John Hubbard Cc: Linus Torvalds Cc: Peter Xu Cc: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton --- mm/gup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 61e751baf862..e42e4fdaf765 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2760,7 +2760,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * * *) ptes can be read atomically by the architecture. * - * *) access_ok is sufficient to validate userspace address ranges. + * *) valid user addesses are below TASK_MAX_SIZE * * The last two assumptions can be relaxed by the addition of helper functions. * @@ -3414,8 +3414,6 @@ static int gup_fast_fallback(unsigned long start, unsigned long nr_pages, return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; - if (unlikely(!access_ok((void __user *)start, len))) - return -EFAULT; nr_pinned = gup_fast(start, end, gup_flags, pages); if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) From 3fae696393c7f07aac5db249fc2c537e8744c831 Mon Sep 17 00:00:00 2001 From: Eric Salem Date: Sat, 8 Feb 2025 20:36:36 -0600 Subject: [PATCH 0894/2157] selftests: mm: fix typo Fix misspelling. Link: https://lkml.kernel.org/r/77e0e915-36c3-4c95-84b8-0b73aaa17951@gmail.com Signed-off-by: Eric Salem Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..31e0c8a3110d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -323,7 +323,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) ret = userfaultfd_open(&features); if (ret) { if (errmsg) - *errmsg = "possible lack of priviledge"; + *errmsg = "possible lack of privilege"; return ret; } From cedae19487a368d899301c16ab576b0aedb9396d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 8 Feb 2025 15:52:54 +0000 Subject: [PATCH 0895/2157] mm: refactor rmap_walk_file() to separate out traversal logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "expose mapping wrprotect, fix fb_defio use", v3. Right now the only means by which we can write-protect a range using the reverse mapping is via folio_mkclean(). However this is not always the appropriate means of doing so, specifically in the case of the framebuffer deferred I/O logic (fb_defio enabled by CONFIG_FB_DEFERRED_IO). There, kernel pages are mapped read-only and write-protect faults used to batch up I/O operations. Each time the deferred work is done, folio_mkclean() is used to mark the framebuffer page as having had I/O performed on it. However doing so requires the kernel page (perhaps allocated via vmalloc()) to have its page->mapping, index fields set so the rmap can find everything that maps it in order to write-protect. This is problematic as firstly, these fields should not be set for kernel-allocated memory, and secondly these are not folios (it's not user memory) and page->index, mapping fields are now deprecated and soon to be removed. The removal of these fields is imminent, rendering this series more urgent than it might first appear. The implementers cannot be blamed for having used this however, as there is simply no other way of performing this operation correctly. This series fixes this - we provide the mapping_wrprotect_range() function to allow the reverse mapping to be used to look up mappings from the page cache object (i.e. its address_space pointer) at a specific offset. The fb_defio logic already stores this offset, and can simply be expanded to keep track of the page cache object, so the change then becomes straight-forward. This series should have no functional change. This patch (of 3): In order to permit the traversal of the reverse mapping at a specified mapping and offset rather than those specified by an input folio, we need to separate out the portion of the rmap file logic which deals with this traversal from those parts of the logic which interact with the folio. This patch achieves this by adding a new static __rmap_walk_file() function which rmap_walk_file() invokes. This function permits the ability to pass NULL folio, on the assumption that the caller has provided for this correctly in the callbacks specified in the rmap_walk_control object. Though it provides for this, and adds debug asserts to ensure that, should a folio be specified, these are equal to the mapping and offset specified in the folio, there should be no functional change as a result of this patch. The reason for adding this is to enable for future changes to permit users to be able to traverse mappings of userland-mapped kernel memory, write-protecting those mappings to enable page_mkwrite() or pfn_mkwrite() fault handlers to be retriggered on subsequent dirty. Link: https://lkml.kernel.org/r/cover.1739029358.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/0d1acec0cba1e5a12f9b53efcabc397541c90517.1739029358.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Helge Deller Cc: Jaya Kumar Cc: Kajtar Zsolt Cc: Maíra Canal Cc: Matthew Wilcox [English fixes] Cc: Simona Vetter Cc: Thomas Zimemrmann Signed-off-by: Andrew Morton --- mm/rmap.c | 79 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 0f760b93fc0a..3da1ca49881c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2614,35 +2614,37 @@ static void rmap_walk_anon(struct folio *folio, anon_vma_unlock_read(anon_vma); } -/* - * rmap_walk_file - do something to file page using the object-based rmap method - * @folio: the folio to be handled - * @rwc: control variable according to each walk type - * @locked: caller holds relevant rmap lock +/** + * __rmap_walk_file() - Traverse the reverse mapping for a file-backed mapping + * of a page mapped within a specified page cache object at a specified offset. * - * Find all the mappings of a folio using the mapping pointer and the vma chains - * contained in the address_space struct it points to. + * @folio: Either the folio whose mappings to traverse, or if NULL, + * the callbacks specified in @rwc will be configured such + * as to be able to look up mappings correctly. + * @mapping: The page cache object whose mapping VMAs we intend to + * traverse. If @folio is non-NULL, this should be equal to + * folio_mapping(folio). + * @pgoff_start: The offset within @mapping of the page which we are + * looking up. If @folio is non-NULL, this should be equal + * to folio_pgoff(folio). + * @nr_pages: The number of pages mapped by the mapping. If @folio is + * non-NULL, this should be equal to folio_nr_pages(folio). + * @rwc: The reverse mapping walk control object describing how + * the traversal should proceed. + * @locked: Is the @mapping already locked? If not, we acquire the + * lock. */ -static void rmap_walk_file(struct folio *folio, - struct rmap_walk_control *rwc, bool locked) +static void __rmap_walk_file(struct folio *folio, struct address_space *mapping, + pgoff_t pgoff_start, unsigned long nr_pages, + struct rmap_walk_control *rwc, bool locked) { - struct address_space *mapping = folio_mapping(folio); - pgoff_t pgoff_start, pgoff_end; + pgoff_t pgoff_end = pgoff_start + nr_pages - 1; struct vm_area_struct *vma; - /* - * The page lock not only makes sure that page->mapping cannot - * suddenly be NULLified by truncation, it makes sure that the - * structure at mapping cannot be freed and reused yet, - * so we can safely take mapping->i_mmap_rwsem. - */ - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_FOLIO(folio && mapping != folio_mapping(folio), folio); + VM_WARN_ON_FOLIO(folio && pgoff_start != folio_pgoff(folio), folio); + VM_WARN_ON_FOLIO(folio && nr_pages != folio_nr_pages(folio), folio); - if (!mapping) - return; - - pgoff_start = folio_pgoff(folio); - pgoff_end = pgoff_start + folio_nr_pages(folio) - 1; if (!locked) { if (i_mmap_trylock_read(mapping)) goto lookup; @@ -2657,8 +2659,7 @@ static void rmap_walk_file(struct folio *folio, lookup: vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) { - unsigned long address = vma_address(vma, pgoff_start, - folio_nr_pages(folio)); + unsigned long address = vma_address(vma, pgoff_start, nr_pages); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); @@ -2671,12 +2672,38 @@ static void rmap_walk_file(struct folio *folio, if (rwc->done && rwc->done(folio)) goto done; } - done: if (!locked) i_mmap_unlock_read(mapping); } +/* + * rmap_walk_file - do something to file page using the object-based rmap method + * @folio: the folio to be handled + * @rwc: control variable according to each walk type + * @locked: caller holds relevant rmap lock + * + * Find all the mappings of a folio using the mapping pointer and the vma chains + * contained in the address_space struct it points to. + */ +static void rmap_walk_file(struct folio *folio, + struct rmap_walk_control *rwc, bool locked) +{ + /* + * The folio lock not only makes sure that folio->mapping cannot + * suddenly be NULLified by truncation, it makes sure that the structure + * at mapping cannot be freed and reused yet, so we can safely take + * mapping->i_mmap_rwsem. + */ + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (!folio->mapping) + return; + + __rmap_walk_file(folio, folio->mapping, folio->index, + folio_nr_pages(folio), rwc, locked); +} + void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc) { if (unlikely(folio_test_ksm(folio))) From a4811f53bb89b3524629b1be41add9349fe0a750 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 8 Feb 2025 15:52:55 +0000 Subject: [PATCH 0896/2157] mm: provide mapping_wrprotect_range() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the fb_defio video driver, page dirty state is used to determine when frame buffer pages have been changed, allowing for batched, deferred I/O to be performed for efficiency. This implementation had only one means of doing so effectively - the use of the folio_mkclean() function. However, this use of the function is inappropriate, as the fb_defio implementation allocates kernel memory to back the framebuffer, and then is forced to specified page->index, mapping fields in order to permit the folio_mkclean() rmap traversal to proceed correctly. It is not correct to specify these fields on kernel-allocated memory, and moreover since these are not folios, page->index, mapping are deprecated fields, soon to be removed. We therefore need to provide a means by which we can correctly traverse the reverse mapping and write-protect mappings for a page backing an address_space page cache object at a given offset. This patch provides this - mapping_wrprotect_range() - which allows for this operation to be performed for a specified address_space, offset, PFN and size, without requiring a folio nor, of course, an inappropriate use of page->index, mapping. With this provided, we can subsequently adjust the fb_defio implementation to make use of this function and avoid incorrect invocation of folio_mkclean() and more importantly, incorrect manipulation of page->index and mapping fields. Link: https://lkml.kernel.org/r/e5bf969d64e7f2f2ae944d42341fc8994b736a81.1739029358.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Helge Deller Cc: Jaya Kumar Cc: Kajtar Zsolt Cc: Maíra Canal Cc: Matthew Wilcox Cc: Simona Vetter Cc: Thomas Zimemrmann Signed-off-by: Andrew Morton --- include/linux/rmap.h | 3 ++ mm/rmap.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 86425d42c1a9..69e9a431a40e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -738,6 +738,9 @@ unsigned long page_address_in_vma(const struct folio *folio, */ int folio_mkclean(struct folio *); +int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, + unsigned long pfn, unsigned long nr_pages); + int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); diff --git a/mm/rmap.c b/mm/rmap.c index 3da1ca49881c..24bacce9971f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1135,6 +1135,80 @@ int folio_mkclean(struct folio *folio) } EXPORT_SYMBOL_GPL(folio_mkclean); +struct wrprotect_file_state { + int cleaned; + pgoff_t pgoff; + unsigned long pfn; + unsigned long nr_pages; +}; + +static bool mapping_wrprotect_range_one(struct folio *folio, + struct vm_area_struct *vma, unsigned long address, void *arg) +{ + struct wrprotect_file_state *state = (struct wrprotect_file_state *)arg; + struct page_vma_mapped_walk pvmw = { + .pfn = state->pfn, + .nr_pages = state->nr_pages, + .pgoff = state->pgoff, + .vma = vma, + .address = address, + .flags = PVMW_SYNC, + }; + + state->cleaned += page_vma_mkclean_one(&pvmw); + + return true; +} + +static void __rmap_walk_file(struct folio *folio, struct address_space *mapping, + pgoff_t pgoff_start, unsigned long nr_pages, + struct rmap_walk_control *rwc, bool locked); + +/** + * mapping_wrprotect_range() - Write-protect all mappings in a specified range. + * + * @mapping: The mapping whose reverse mapping should be traversed. + * @pgoff: The page offset at which @pfn is mapped within @mapping. + * @pfn: The PFN of the page mapped in @mapping at @pgoff. + * @nr_pages: The number of physically contiguous base pages spanned. + * + * Traverses the reverse mapping, finding all VMAs which contain a shared + * mapping of the pages in the specified range in @mapping, and write-protects + * them (that is, updates the page tables to mark the mappings read-only such + * that a write protection fault arises when the mappings are written to). + * + * The @pfn value need not refer to a folio, but rather can reference a kernel + * allocation which is mapped into userland. We therefore do not require that + * the page maps to a folio with a valid mapping or index field, rather the + * caller specifies these in @mapping and @pgoff. + * + * Return: the number of write-protected PTEs, or an error. + */ +int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, + unsigned long pfn, unsigned long nr_pages) +{ + struct wrprotect_file_state state = { + .cleaned = 0, + .pgoff = pgoff, + .pfn = pfn, + .nr_pages = nr_pages, + }; + struct rmap_walk_control rwc = { + .arg = (void *)&state, + .rmap_one = mapping_wrprotect_range_one, + .invalid_vma = invalid_mkclean_vma, + }; + + if (!mapping) + return 0; + + __rmap_walk_file(/* folio = */NULL, mapping, pgoff, nr_pages, &rwc, + /* locked = */false); + + return state.cleaned; +} +EXPORT_SYMBOL_GPL(mapping_wrprotect_range); + /** * pfn_mkclean_range - Cleans the PTEs (including PMDs) mapped with range of * [@pfn, @pfn + @nr_pages) at the specific offset (@pgoff) From 6cdef2ddce2b7de8faca69061a6780080cfecc10 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 8 Feb 2025 15:52:56 +0000 Subject: [PATCH 0897/2157] fb_defio: do not use deprecated page->mapping, index fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of mapping_wrprotect_range() there is no need to use folio_mkclean() in order to write-protect mappings of frame buffer pages, and therefore no need to inappropriately set kernel-allocated page->index, mapping fields to permit this operation. Instead, store the pointer to the page cache object for the mapped driver in the fb_deferred_io object, and use the already stored page offset from the pageref object to look up mappings in order to write-protect them. This is justified, as for the page objects to store a mapping pointer at the point of assignment of pages, they must all reference the same underlying address_space object. Since the life time of the pagerefs is also the lifetime of the fb_deferred_io object, storing the pointer here makes sense. This eliminates the need for all of the logic around setting and maintaining page->index,mapping which we remove. This eliminates the use of folio_mkclean() entirely but otherwise should have no functional change. [lorenzo.stoakes@oracle.com: fixup unused variable warnings] Link: https://lkml.kernel.org/r/d4018405-2762-4385-a816-e54cc23839ac@lucifer.local Link: https://lkml.kernel.org/r/81171ab16c14e3df28f6de9d14982cee528d8519.1739029358.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Tested-by: Kajtar Zsolt Acked-by: Thomas Zimmermann Cc: David Hildenbrand Cc: Helge Deller Cc: Jaya Kumar Cc: Maíra Canal Cc: Matthew Wilcox Cc: Simona Vetter Signed-off-by: Andrew Morton --- drivers/video/fbdev/core/fb_defio.c | 43 ++++++++++------------------- include/linux/fb.h | 1 + 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 65363df8e81b..4fc93f253e06 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -69,14 +69,6 @@ static struct fb_deferred_io_pageref *fb_deferred_io_pageref_lookup(struct fb_in return pageref; } -static void fb_deferred_io_pageref_clear(struct fb_deferred_io_pageref *pageref) -{ - struct page *page = pageref->page; - - if (page) - page->mapping = NULL; -} - static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info *info, unsigned long offset, struct page *page) @@ -140,13 +132,10 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf) if (!page) return VM_FAULT_SIGBUS; - if (vmf->vma->vm_file) - page->mapping = vmf->vma->vm_file->f_mapping; - else - printk(KERN_ERR "no mapping available\n"); + if (!vmf->vma->vm_file) + fb_err(info, "no mapping available\n"); - BUG_ON(!page->mapping); - page->index = vmf->pgoff; /* for folio_mkclean() */ + BUG_ON(!info->fbdefio->mapping); vmf->page = page; return 0; @@ -194,9 +183,9 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long /* * We want the page to remain locked from ->page_mkwrite until - * the PTE is marked dirty to avoid folio_mkclean() being called - * before the PTE is updated, which would leave the page ignored - * by defio. + * the PTE is marked dirty to avoid mapping_wrprotect_range() + * being called before the PTE is updated, which would leave + * the page ignored by defio. * Do this by locking the page here and informing the caller * about it with VM_FAULT_LOCKED. */ @@ -274,15 +263,17 @@ static void fb_deferred_io_work(struct work_struct *work) struct fb_deferred_io_pageref *pageref, *next; struct fb_deferred_io *fbdefio = info->fbdefio; - /* here we mkclean the pages, then do all deferred IO */ + /* here we wrprotect the page's mappings, then do all deferred IO. */ mutex_lock(&fbdefio->lock); +#ifdef CONFIG_MMU list_for_each_entry(pageref, &fbdefio->pagereflist, list) { - struct folio *folio = page_folio(pageref->page); + struct page *page = pageref->page; + pgoff_t pgoff = pageref->offset >> PAGE_SHIFT; - folio_lock(folio); - folio_mkclean(folio); - folio_unlock(folio); + mapping_wrprotect_range(fbdefio->mapping, pgoff, + page_to_pfn(page), 1); } +#endif /* driver's callback with pagereflist */ fbdefio->deferred_io(info, &fbdefio->pagereflist); @@ -337,6 +328,7 @@ void fb_deferred_io_open(struct fb_info *info, { struct fb_deferred_io *fbdefio = info->fbdefio; + fbdefio->mapping = file->f_mapping; file->f_mapping->a_ops = &fb_deferred_io_aops; fbdefio->open_count++; } @@ -344,13 +336,7 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_open); static void fb_deferred_io_lastclose(struct fb_info *info) { - unsigned long i; - flush_delayed_work(&info->deferred_work); - - /* clear out the mapping that we setup */ - for (i = 0; i < info->npagerefs; ++i) - fb_deferred_io_pageref_clear(&info->pagerefs[i]); } void fb_deferred_io_release(struct fb_info *info) @@ -370,5 +356,6 @@ void fb_deferred_io_cleanup(struct fb_info *info) kvfree(info->pagerefs); mutex_destroy(&fbdefio->lock); + fbdefio->mapping = NULL; } EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup); diff --git a/include/linux/fb.h b/include/linux/fb.h index 5ba187e08cf7..cd653862ab99 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -225,6 +225,7 @@ struct fb_deferred_io { int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ + struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); From 6340584e489f1f8ddb65e44a7ad2ab9f3503c4d3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 11 Feb 2025 12:59:11 -0800 Subject: [PATCH 0898/2157] mm/vmstat: revert "fix a W=1 clang compiler warning" Commit 75b5ab134bb5 enabled -Wenum-enum-conversion in W=1 builds. Commit 30c2de0a267c ("mm/vmstat: fix a W=1 clang compiler warning") fixed a -Wenum-enum-conversion warning. Commit 8f6629c004b1 removed the -Wenum-enum-conversion option again from W=1 builds. Since the W=1 compiler warning fix is no longer necessary, revert it. Link: https://lkml.kernel.org/r/20250211205911.1707684-1-bvanassche@acm.org Signed-off-by: Bart Van Assche Acked-by: Vlastimil Babka Cc: Matthew Wilcox Cc: Ivan Shapovalov Cc: David Laight Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9f3a04345b86..d2761bf8ff32 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -515,7 +515,7 @@ static inline const char *node_stat_name(enum node_stat_item item) static inline const char *lru_list_name(enum lru_list lru) { - return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_" + return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) From 026e8b55aa05b728e5f5f7858cd91385bf0642e4 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Feb 2025 13:00:23 +0000 Subject: [PATCH 0899/2157] mm/mmu_gather: update comment on RCU freeing Some recent discussion on LMKL [0] brought up some interesting and useful additional context on RCU-freeing for pagetables. Note down some extra info in here, in particular a) be concrete about the reason why an arch might not have an IPI and b) add the interesting paravirt details. [0] https://lore.kernel.org/linux-kernel/20250206044346.3810242-2-riel@surriel.com/ Link: https://lkml.kernel.org/r/20250211-mmugather-comment-v1-1-1ac1e0c765d2@google.com Signed-off-by: Brendan Jackman Cc: "Aneesh Kumar K.V" Cc: Brendan Jackman Cc: Nicholas Piggin Cc: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/mmu_gather.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 7aa6f18c500b..db7ba4a725d6 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -246,8 +246,16 @@ static void __tlb_remove_table_free(struct mmu_table_batch *batch) * IRQs delays the completion of the TLB flush we can never observe an already * freed page. * - * Architectures that do not have this (PPC) need to delay the freeing by some - * other means, this is that means. + * Not all systems IPI every CPU for this purpose: + * + * - Some architectures have HW support for cross-CPU synchronisation of TLB + * flushes, so there's no IPI at all. + * + * - Paravirt guests can do this TLB flushing in the hypervisor, or coordinate + * with the hypervisor to defer flushing on preempted vCPUs. + * + * Such systems need to delay the freeing by some other means, this is that + * means. * * What we do is batch the freed directory pages (tables) and RCU free them. * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling From 0431c42622612a96cce97cdd4cfbe7d487606cf3 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 11 Feb 2025 12:43:40 +0000 Subject: [PATCH 0900/2157] mm/damon: introduce DAMOS filter type hugepage_size Patch series "mm/damon: add support for hugepage_size DAMOS filter", v5. hugepage_size DAMOS filter can be used to gather statistics to check if memory regions of specific access tempratures are backed by hugepages of a size in a specific range. This filter can help to observe and prove the effectivenes of different schemes for shrinking/collapsing hugepages. This patch (of 4): This is to gather statistics to check if memory regions of specific access tempratures are backed by pages of a size in a specific range. This filter can help to observe and prove the effectivenes of different schemes for shrinking/collapsing hugepages. [sj@kernel.org: add kernel-doc comment for damos_filter->sz_range] Link: https://lkml.kernel.org/r/20250218223058.52459-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250211124437.278873-1-usamaarif642@gmail.com Link: https://lkml.kernel.org/r/20250211124437.278873-2-usamaarif642@gmail.com Signed-off-by: Usama Arif Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Johannes Weiner Cc: Usama Arif Signed-off-by: Andrew Morton --- include/linux/damon.h | 14 ++++++++++++++ mm/damon/core.c | 3 +++ mm/damon/paddr.c | 6 ++++++ mm/damon/sysfs-schemes.c | 1 + 4 files changed, 24 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index b4d37d9b9221..5e7ae7bca5dc 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -35,6 +35,16 @@ struct damon_addr_range { unsigned long end; }; +/** + * struct damon_size_range - Represents size for filter to operate on [@min, @max]. + * @min: Min size (inclusive). + * @max: Max size (inclusive). + */ +struct damon_size_range { + unsigned long min; + unsigned long max; +}; + /** * struct damon_region - Represents a monitoring target region. * @ar: The address range of the region. @@ -326,6 +336,7 @@ struct damos_stat { * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. + * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. * @DAMOS_FILTER_TYPE_ADDR: Address range. * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. @@ -345,6 +356,7 @@ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, DAMOS_FILTER_TYPE_MEMCG, DAMOS_FILTER_TYPE_YOUNG, + DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, DAMOS_FILTER_TYPE_ADDR, DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, @@ -360,6 +372,7 @@ enum damos_filter_type { * @target_idx: Index of the &struct damon_target of * &damon_ctx->adaptive_targets if @type is * DAMOS_FILTER_TYPE_TARGET. + * @sz_range: Size range if @type is DAMOS_FILTER_TYPE_HUGEPAGE_SIZE. * @list: List head for siblings. * * Before applying the &damos->action to a memory region, DAMOS checks if each @@ -376,6 +389,7 @@ struct damos_filter { unsigned short memcg_id; struct damon_addr_range addr_range; int target_idx; + struct damon_size_range sz_range; }; struct list_head list; }; diff --git a/mm/damon/core.c b/mm/damon/core.c index f663c8e99dfa..b1ce072b56f2 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -777,6 +777,9 @@ static void damos_commit_filter_arg( case DAMOS_FILTER_TYPE_TARGET: dst->target_idx = src->target_idx; break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + dst->sz_range = src->sz_range; + break; default: break; } diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index eb10a723b0a7..1a5974640b93 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -211,6 +211,7 @@ static bool damos_pa_filter_match(struct damos_filter *filter, { bool matched = false; struct mem_cgroup *memcg; + size_t folio_sz; switch (filter->type) { case DAMOS_FILTER_TYPE_ANON: @@ -230,6 +231,11 @@ static bool damos_pa_filter_match(struct damos_filter *filter, if (matched) damon_folio_mkold(folio); break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + folio_sz = folio_size(folio); + matched = filter->sz_range.min <= folio_sz && + folio_sz <= filter->sz_range.max; + break; default: break; } diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 98f93ae9f59e..9020bc9befac 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -329,6 +329,7 @@ static const char * const damon_sysfs_scheme_filter_type_strs[] = { "anon", "memcg", "young", + "hugepage_size", "addr", "target", }; From ea1f204ba29a65ec70464fdd4de727e76c6e4d9c Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 11 Feb 2025 12:43:41 +0000 Subject: [PATCH 0901/2157] mm/damon/sysfs-schemes: add files for setting damos_filter->sz_range Add min and max files for damon filters to let the userspace decide the min/max folio size to operate on. This will be needed to decide what folio sizes to give pa_stat for. Link: https://lkml.kernel.org/r/20250211124437.278873-3-usamaarif642@gmail.com Signed-off-by: Usama Arif Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 9020bc9befac..881d00bb3a34 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -316,6 +316,7 @@ struct damon_sysfs_scheme_filter { bool allow; char *memcg_path; struct damon_addr_range addr_range; + struct damon_size_range sz_range; int target_idx; }; @@ -474,6 +475,44 @@ static ssize_t addr_end_store(struct kobject *kobj, return err ? err : count; } +static ssize_t min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->sz_range.min); +} + +static ssize_t min_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->sz_range.min); + + return err ? err : count; +} + +static ssize_t max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->sz_range.max); +} + +static ssize_t max_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->sz_range.max); + + return err ? err : count; +} + static ssize_t damon_target_idx_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -520,6 +559,12 @@ static struct kobj_attribute damon_sysfs_scheme_filter_addr_start_attr = static struct kobj_attribute damon_sysfs_scheme_filter_addr_end_attr = __ATTR_RW_MODE(addr_end, 0600); +static struct kobj_attribute damon_sysfs_scheme_filter_min_attr = + __ATTR_RW_MODE(min, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_max_attr = + __ATTR_RW_MODE(max, 0600); + static struct kobj_attribute damon_sysfs_scheme_filter_damon_target_idx_attr = __ATTR_RW_MODE(damon_target_idx, 0600); @@ -530,6 +575,8 @@ static struct attribute *damon_sysfs_scheme_filter_attrs[] = { &damon_sysfs_scheme_filter_memcg_path_attr.attr, &damon_sysfs_scheme_filter_addr_start_attr.attr, &damon_sysfs_scheme_filter_addr_end_attr.attr, + &damon_sysfs_scheme_filter_min_attr.attr, + &damon_sysfs_scheme_filter_max_attr.attr, &damon_sysfs_scheme_filter_damon_target_idx_attr.attr, NULL, }; @@ -1954,6 +2001,13 @@ static int damon_sysfs_add_scheme_filters(struct damos *scheme, filter->addr_range = sysfs_filter->addr_range; } else if (filter->type == DAMOS_FILTER_TYPE_TARGET) { filter->target_idx = sysfs_filter->target_idx; + } else if (filter->type == DAMOS_FILTER_TYPE_HUGEPAGE_SIZE) { + if (sysfs_filter->sz_range.min > + sysfs_filter->sz_range.max) { + damos_destroy_filter(filter); + return -EINVAL; + } + filter->sz_range = sysfs_filter->sz_range; } damos_add_filter(scheme, filter); From 807db03c59097db7f94b642cf4394946989d6417 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 11 Feb 2025 12:43:42 +0000 Subject: [PATCH 0902/2157] Docs/ABI/damon: document DAMOS sysfs files to set the min/max folio_size This will be used to decide the min and max folio size to operate on for pa_stat. Link: https://lkml.kernel.org/r/20250211124437.278873-4-usamaarif642@gmail.com Signed-off-by: Usama Arif Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-kernel-mm-damon | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index b057eddefbfc..ccd13ca668c8 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -345,6 +345,20 @@ Description: If 'addr' is written to the 'type' file, writing to or reading from this file sets or gets the end address of the address range for the filter. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//min +Date: Feb 2025 +Contact: SeongJae Park +Description: If 'hugepage_size' is written to the 'type' file, writing to + or reading from this file sets or gets the minimum size of the + hugepage for the filter. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//max +Date: Feb 2025 +Contact: SeongJae Park +Description: If 'hugepage_size' is written to the 'type' file, writing to + or reading from this file sets or gets the maximum size of the + hugepage for the filter. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters//target_idx Date: Dec 2022 Contact: SeongJae Park From 4ddb20926842ff1e892bb8dce64ab93058102601 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 11 Feb 2025 12:43:43 +0000 Subject: [PATCH 0903/2157] Docs/admin-guide/mm/damon/usage: document hugepage_size filter type This includes both the 'hugepage_size' filter type and the min/max files used to decide range of sizes to filter on. Link: https://lkml.kernel.org/r/20250211124437.278873-5-usamaarif642@gmail.com Signed-off-by: Usama Arif Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 47a44bd348ab..51af66c208c5 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -83,7 +83,7 @@ comma (","). │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value │ │ │ │ │ │ │ :ref:`watermarks `/metric,interval_us,high,mid,low │ │ │ │ │ │ │ :ref:`filters `/nr_filters - │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx + │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max │ │ │ │ │ │ │ :ref:`stats `/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds │ │ │ │ │ │ │ :ref:`tried_regions `/total_bytes │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed @@ -406,13 +406,14 @@ number (``N``) to the file creates the number of child directories named ``0`` to ``N-1``. Each directory represents each filter. The filters are evaluated in the numeric order. -Each filter directory contains seven files, namely ``type``, ``matching``, -``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``. -To ``type`` file, you can write one of five special keywords: ``anon`` for -anonymous pages, ``memcg`` for specific memory cgroup, ``young`` for young -pages, ``addr`` for specific address range (an open-ended interval), or -``target`` for specific DAMON monitoring target filtering. Meaning of the -types are same to the description on the :ref:`design doc +Each filter directory contains nine files, namely ``type``, ``matching``, +``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max`` +and ``target_idx``. To ``type`` file, you can write one of six special +keywords: ``anon`` for anonymous pages, ``memcg`` for specific memory cgroup, +``young`` for young pages, ``addr`` for specific address range (an open-ended +interval), ``hugepage_size`` for large folios of a specific size range [``min``, +``max``] or ``target`` for specific DAMON monitoring target filtering. Meaning +of the types are same to the description on the :ref:`design doc `. In case of the memory cgroup filtering, you can specify the memory cgroup of From 4bc2e699e3d8f56f2aaafd14109ff77311f95336 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 11 Feb 2025 08:29:00 +0000 Subject: [PATCH 0904/2157] mm/mm_init.c: only align start of ZONE_MOVABLE on nodes with memory At the beginning of find_zone_movable_pfns_for_nodes(), it has properly set node_states[N_MEMORY] in early_calculate_totalpages(). Instead of iterating over all possible nodes, we can just do the alignment on nodes with memory. Link: https://lkml.kernel.org/r/20250211082900.10877-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: David Hildenbrand Reviewed-by: Dev Jain Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton --- mm/mm_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index de18d3ad12e1..6078b3651f2e 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -545,7 +545,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ - for (nid = 0; nid < MAX_NUMNODES; nid++) { + for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; zone_movable_pfn[nid] = From 6fbea85271c6764d512a5c5b6c1b44b762693a18 Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Tue, 11 Feb 2025 15:18:50 +0800 Subject: [PATCH 0905/2157] maple_tree: use ma_dead_node() in mte_dead_node() Utilize ma_dead_node() in mte_dead_node(). It can prevent decoding the maple enode for a second time. Use the "node" to find parent for comparison. Link: https://lkml.kernel.org/r/20250211071850.330632-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Reviewed-by: Liam R. Howlett Cc: Ching-Chun (Jim) Huang Cc: Shuah khan Signed-off-by: Andrew Morton --- lib/maple_tree.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 42c65974a56c..60356ccb11ce 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -584,13 +584,10 @@ static __always_inline bool ma_dead_node(const struct maple_node *node) */ static __always_inline bool mte_dead_node(const struct maple_enode *enode) { - struct maple_node *parent, *node; + struct maple_node *node; node = mte_to_node(enode); - /* Do not reorder reads from the node prior to the parent check */ - smp_rmb(); - parent = mte_parent(enode); - return (parent == node); + return ma_dead_node(node); } /* From 1d23b9403aedea2d8eb4175e8eeb05fcd2967219 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 11 Feb 2025 18:13:39 +0800 Subject: [PATCH 0906/2157] mm/mmu_gather: remove unused __tlb_remove_page() Nobody is using __tlb_remove_page() now, clean it up. And also remove the code comment above tlb_remove_page() because it's not meaningful any more. Link: https://lkml.kernel.org/r/Z6si0/A/zzEF/bFJ@MiWiFi-R3L-srv Signed-off-by: Baoquan He Reviewed-by: Qi Zheng Cc: "Aneesh Kumar K.V" Cc: Nicholas Piggin Cc: Peter Zijlstra (Intel) Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e402aef79c93..1fac1985127d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -489,16 +489,6 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, tlb_flush_mmu(tlb); } -static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, - struct page *page, bool delay_rmap) -{ - return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE); -} - -/* tlb_remove_page - * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when - * required. - */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); From 7bd1fa0d5624e78628ad7ce70d7e69082c34c634 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 11 Feb 2025 11:43:48 +0800 Subject: [PATCH 0907/2157] mm/mmu_gather: clean up the stale code comment In commit d7f861b9c43a ("mm/mmu_gather: add __tlb_remove_folio_pages()"), helper function __tlb_remove_folio_pages_size() was added. And based on the helper, wrapper functions __tlb_remove_folio_pages() and __tlb_remove_page_size() are created and used by upper level functions. So let's update the code comment to reflect the current code about tlb_remove_page()/tlb_remove_page_size(), etc. Link: https://lkml.kernel.org/r/20250211034348.39531-2-bhe@redhat.com Signed-off-by: Baoquan He Cc: "Aneesh Kumar K.V" Cc: Nicholas Piggin Cc: Peter Zijlstra (Intel) Cc: Qi Zheng Cc: Will Deacon Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 1fac1985127d..d1adfba8387e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -67,22 +67,21 @@ * * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. * - * - tlb_remove_page() / __tlb_remove_page() - * - tlb_remove_page_size() / __tlb_remove_page_size() - * - __tlb_remove_folio_pages() + * - tlb_remove_page() / tlb_remove_page_size() + * - __tlb_remove_folio_pages() / __tlb_remove_page_size() + * - __tlb_remove_folio_pages_size() * - * __tlb_remove_page_size() is the basic primitive that queues a page for - * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a - * boolean indicating if the queue is (now) full and a call to - * tlb_flush_mmu() is required. + * __tlb_remove_folio_pages_size() is the basic primitive that queues pages + * for freeing. It will return a boolean indicating if the queue is (now) + * full and a call to tlb_flush_mmu() is required. * * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * - * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however, - * instead of removing a single page, remove the given number of consecutive - * pages that are all part of the same (large) folio: just like calling - * __tlb_remove_page() on each page individually. + * __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(), + * however, instead of removing a single page, assume PAGE_SIZE and remove + * the given number of consecutive pages that are all part of the + * same (large) folio. * * - tlb_change_page_size() * From 85968b6a2042cd19c6afd1fb49b64bd32f3a4d07 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Feb 2025 17:44:26 +0000 Subject: [PATCH 0908/2157] selftests/mm: allow tests to run with no huge pages support Currently the mm selftests refuse to run if huge pages are not available in the current system but this is an optional feature and not all the tests actually require them. Change the test during startup to be non-fatal and skip or omit tests which actually rely on having huge pages, allowing the other tests to be run. The gup_test does support using madvise() to configure huge pages but it ignores the error code so we just let it run. Link: https://lkml.kernel.org/r/20250212-kselftest-mm-no-hugepages-v1-2-44702f538522@kernel.org Signed-off-by: Mark Brown Reviewed-by: Nico Pache Cc: Mariano Pache Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 66 ++++++++++++++--------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 7cc71d942f83..93a606198751 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -187,9 +187,10 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" fi + HAVE_HUGEPAGES=1 else echo "no hugetlbfs support in kernel?" - exit 1 + HAVE_HUGEPAGES=0 fi # filter 64bit architectures @@ -218,13 +219,20 @@ pretty_name() { # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local skip=0 + # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then - echo 3 > /proc/sys/vm/drop_caches - sleep 2 - echo 1 > /proc/sys/vm/compact_memory - sleep 2 + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + echo 3 > /proc/sys/vm/drop_caches + sleep 2 + echo 1 > /proc/sys/vm/compact_memory + sleep 2 + else + echo "hugepages not supported" | tap_prefix + skip=1 + fi fi local test=$(pretty_name "$*") @@ -232,8 +240,12 @@ run_test() { local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - ("$@" 2>&1) | tap_prefix - local ret=${PIPESTATUS[0]} + if [ "${skip}" != "1" ]; then + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + else + local ret=$ksft_skip + fi count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) @@ -271,13 +283,15 @@ CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise CATEGORY="hugetlb" run_test ./hugetlb_dio -nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) -# For this test, we need one and just one huge page -echo 1 > /proc/sys/vm/nr_hugepages -CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv -CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map -# Restore the previous number of huge pages, since further tests rely on it -echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) + # For this test, we need one and just one huge page + echo 1 > /proc/sys/vm/nr_hugepages + CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv + CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map + # Restore the previous number of huge pages, since further tests rely on it + echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +fi if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix @@ -393,7 +407,9 @@ CATEGORY="memfd_secret" run_test ./memfd_secret fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 -CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +fi # KSM KSM_MERGE_TIME test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages @@ -442,15 +458,17 @@ CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then - if test_selected "thp"; then - if grep xfs /proc/filesystems &>/dev/null; then - XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) - SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) - truncate -s 314572800 ${XFS_IMG} - mkfs.xfs -q ${XFS_IMG} - mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} - MOUNTED_XFS=1 - fi + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + if test_selected "thp"; then + if grep xfs /proc/filesystems &>/dev/null; then + XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) + SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) + truncate -s 314572800 ${XFS_IMG} + mkfs.xfs -q ${XFS_IMG} + mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} + MOUNTED_XFS=1 + fi + fi fi fi From bf40aa214195864eeb63165cfb02e959c464d409 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 12 Feb 2025 01:38:18 +0000 Subject: [PATCH 0909/2157] mm/mm_init.c: use round_up() to calculate usermap size Since pageblock_nr_pages and BITS_PER_LONG are power of 2, we could use round_up() to calculate it. Also we have renamed blockflags to pageblock_flags, adjust the comment accordingly. Link: https://lkml.kernel.org/r/20250212013818.873-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Suggested-by: Shivank Garg Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton --- mm/mm_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 6078b3651f2e..c767946e8f5f 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1431,7 +1431,7 @@ void __meminit init_currently_empty_zone(struct zone *zone, #ifndef CONFIG_SPARSEMEM /* - * Calculate the size of the zone->blockflags rounded to an unsigned long + * Calculate the size of the zone->pageblock_flags rounded to an unsigned long * Start by making sure zonesize is a multiple of pageblock_order by rounding * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally * round what is now in bits to nearest long in bits, then return it in @@ -1442,10 +1442,10 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l unsigned long usemapsize; zonesize += zone_start_pfn & (pageblock_nr_pages-1); - usemapsize = roundup(zonesize, pageblock_nr_pages); + usemapsize = round_up(zonesize, pageblock_nr_pages); usemapsize = usemapsize >> pageblock_order; usemapsize *= NR_PAGEBLOCK_BITS; - usemapsize = roundup(usemapsize, BITS_PER_LONG); + usemapsize = round_up(usemapsize, BITS_PER_LONG); return usemapsize / BITS_PER_BYTE; } From f807123d578df4218e2580a1e1bb3436f4567c4a Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Feb 2025 18:17:00 +0000 Subject: [PATCH 0910/2157] mm: allow guard regions in file-backed and read-only mappings Patch series "mm: permit guard regions for file-backed/shmem mappings". The guard regions feature was initially implemented to support anonymous mappings only, excluding shmem. This was done so as to introduce the feature carefully and incrementally and to be conservative when considering the various caveats and corner cases that are applicable to file-backed mappings but not to anonymous ones. Now this feature has landed in 6.13, it is time to revisit this and to extend this functionality to file-backed and shmem mappings. In order to make this maximally useful, and since one may map file-backed mappings read-only (for instance ELF images), we also remove the restriction on read-only mappings and permit the establishment of guard regions in any non-hugetlb, non-mlock()'d mapping. It is permissible to permit the establishment of guard regions in read-only mappings because the guard regions only reduce access to the mapping, and when removed simply reinstate the existing attributes of the underlying VMA, meaning no access violations can occur. While the change in kernel code introduced in this series is small, the majority of the effort here is spent in extending the testing to assert that the feature works correctly across numerous file-backed mapping scenarios. Every single guard region self-test performed against anonymous memory (which is relevant and not anon-only) has now been updated to also be performed against shmem and a mapping of a file in the working directory. This confirms that all cases also function correctly for file-backed guard regions. In addition a number of other tests are added for specific file-backed mapping scenarios. There are a number of other concerns that one might have with regard to guard regions, addressed below: Readahead ~~~~~~~~~ Readahead is a process through which the page cache is populated on the assumption that sequential reads will occur, thus amortising I/O and, through a clever use of the PG_readahead folio flag establishing during major fault and checked upon minor fault, provides for asynchronous I/O to occur as dat is processed, reducing I/O stalls as data is faulted in. Guard regions do not alter this mechanism which operates at the folio and fault level, but does of course prevent the faulting of folios that would otherwise be mapped. In the instance of a major fault prior to a guard region, synchronous readahead will occur including populating folios in the page cache which the guard regions will, in the case of the mapping in question, prevent access to. In addition, if PG_readahead is placed in a folio that is now inaccessible, this will prevent asynchronous readahead from occurring as it would otherwise do. However, there are mechanisms for heuristically resetting this within readahead regardless, which will 'recover' correct readahead behaviour. Readahead presumes sequential data access, the presence of a guard region clearly indicates that, at least in the guard region, no such sequential access will occur, as it cannot occur there. So this should have very little impact on any real workload. The far more important point is as to whether readahead causes incorrect or inappropriate mapping of ranges disallowed by the presence of guard regions - this is not the case, as readahead does not 'pre-fault' memory in this fashion. At any rate, any mechanism which would attempt to do so would hit the usual page fault paths, which correctly handle PTE markers as with anonymous mappings. Fault-Around ~~~~~~~~~~~~ The fault-around logic, in a similar vein to readahead, attempts to improve efficiency with regard to file-backed memory mappings, however it differs in that it does not try to fetch folios into the page cache that are about to be accessed, but rather pre-maps a range of folios around the faulting address. Guard regions making use of PTE markers makes this relatively trivial, as this case is already handled - see filemap_map_folio_range() and filemap_map_order0_folio() - in both instances, the solution is to simply keep the established page table mappings and let the fault handler take care of PTE markers, as per the comment: /* * NOTE: If there're PTE markers, we'll leave them to be * handled in the specific fault path, and it'll prohibit * the fault-around logic. */ This works, as establishing guard regions results in page table mappings with PTE markers, and clearing them removes them. Truncation ~~~~~~~~~~ File truncation will not eliminate existing guard regions, as the truncation operation will ultimately zap the range via unmap_mapping_range(), which specifically excludes PTE markers. Zapping ~~~~~~~ Zapping is, as with anonymous mappings, handled by zap_nonpresent_ptes(), which specifically deals with guard entries, leaving them intact except in instances such as process teardown or munmap() where they need to be removed. Reclaim ~~~~~~~ When reclaim is performed on file-backed folios, it ultimately invokes try_to_unmap_one() via the rmap. If the folio is non-large, then map_pte() will ultimately abort the operation for the guard region mapping. If large, then check_pte() will determine that this is a non-device private entry/device-exclusive entry 'swap' PTE and thus abort the operation in that instance. Therefore, no odd things happen in the instance of reclaim being attempted upon a file-backed guard region. Hole Punching ~~~~~~~~~~~~~ This updates the page cache and ultimately invokes unmap_mapping_range(), which explicitly leaves PTE markers in place. Because the establishment of guard regions zapped any existing mappings to file-backed folios, once the guard regions are removed then the hole-punched region will be faulted in as usual and everything will behave as expected. One thing to note with this series is that it now implies file-backed VMAs which install guard regions will now have an anon_vma installed if not already present (i.e. if not post-CoW MAP_PRIVATE). I have audited kernel source for instances of vma->anon_vma checks and found nowhere where this would be problematic for pure file-backed mappings. I also discussed (off-list) with Matthew who confirmed he can't see any issue with this. In effect, we treat these VMAs as if they are MAP_PRIVATE, only with 0 CoW'd pages. As a result, the rmap never has a reason to reference the anon_vma from folios at any point and thus no unexpected or weird behaviour results. The anon_vma logic tries to avoid unnecessary anon_vma propagation on fork so we ought to at least minimise overhead. However, this is still overhead, and unwelcome overhead. The whole reason we do this (in madvise_guard_install()) is to ensure that fork _copies page tables_. Otherwise, in vma_needs_copy(), nothing will indicate that we should do so. This was already an unpleasant thing to have to do, but without a new VMA flag we really have no reasonable means of ensuring this happens. Going forward, I intend to add a new VMA flag, VM_MAYBE_GUARDED or something like this. This would have specific behaviour - for the purposes of merging, it would be ignored. However on both split and merge, it will be propagated. It is therefore 'sticky'. This is to avoid having to traverse page tables to determine which parts of a VMA contain guard regions and of course to maintain the desirable qualities of guard regions - the lack of VMA propagation (+ thus slab allocations of VMAs). Adding this flag and adjusting vma_needs_copy() to reference it would resolve the issue. However :) we have a VMA flag space issue, so it'd render this a 64-bit feature only. Having discussed with Matthew a plan by which to perhaps extend available flags for 32-bit we may going forward be able to avoid this. But this may be a longer term project. In the meantime, we'd have to resort to the anon_vma hack for 32-bit, using the flag for 64-bit. The issue with this however is if we do then intend to allow the flag to enable /proc/$pid/maps visibility (something this could allow), it would also end up being 64-bit only which would be a pity. Regardless - I wanted to highlight this behaviour as it is perhaps somewhat surprising. This patch (of 4): There is no reason to disallow guard regions in file-backed mappings - readahead and fault-around both function correctly in the presence of PTE markers, equally other operations relating to memory-mapped files function correctly. Additionally, read-only mappings if introducing guard-regions, only restrict the mapping further, which means there is no violation of any access rights by permitting this to be so. Removing this restriction allows for read-only mapped files (such as executable files) correctly which would otherwise not be permitted. Link: https://lkml.kernel.org/r/cover.1739469950.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/d885cb259174736c2830a5dfe07f81b214ef3faa.1739469950.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Jann Horn Cc: John Hubbard Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcox Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/madvise.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 6ecead476a80..e01e93e179a8 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1051,13 +1051,7 @@ static bool is_valid_guard_vma(struct vm_area_struct *vma, bool allow_locked) if (!allow_locked) disallowed |= VM_LOCKED; - if (!vma_is_anonymous(vma)) - return false; - - if ((vma->vm_flags & (VM_MAYWRITE | disallowed)) != VM_MAYWRITE) - return false; - - return true; + return !(vma->vm_flags & disallowed); } static bool is_guard_pte_marker(pte_t ptent) From ce1c0824fc2a2e81b384483e4ab66e06f8fc0f3c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Feb 2025 18:17:01 +0000 Subject: [PATCH 0911/2157] selftests/mm: rename guard-pages to guard-regions The feature formerly referred to as guard pages is more correctly referred to as 'guard regions', as in fact no pages are ever allocated in the process of installing the regions. To avoid confusion, rename the tests accordingly. [lorenzo.stoakes@oracle.com: fix guard regions invocation] Link: https://lkml.kernel.org/r/13426c71-d069-4407-9340-b227ff8b8736@lucifer.local Link: https://lkml.kernel.org/r/1c3cd04a3f69b5756b94bda701ac88325a9be18b.1739469950.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: John Hubbard Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcox Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/.gitignore | 2 +- tools/testing/selftests/mm/Makefile | 2 +- .../mm/{guard-pages.c => guard-regions.c} | 42 +++++++++---------- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) rename tools/testing/selftests/mm/{guard-pages.c => guard-regions.c} (98%) diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 121000c28c10..c5241b193db8 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -57,4 +57,4 @@ droppable hugetlb_dio pkey_sighandler_tests_32 pkey_sighandler_tests_64 -guard-pages +guard-regions diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 63ce39d024bb..8270895039d1 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -97,7 +97,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable -TEST_GEN_FILES += guard-pages +TEST_GEN_FILES += guard-regions ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-regions.c similarity index 98% rename from tools/testing/selftests/mm/guard-pages.c rename to tools/testing/selftests/mm/guard-regions.c index ece37212a8a2..7a41cf9ffbdf 100644 --- a/tools/testing/selftests/mm/guard-pages.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -107,12 +107,12 @@ static bool try_read_write_buf(char *ptr) return try_read_buf(ptr) && try_write_buf(ptr); } -FIXTURE(guard_pages) +FIXTURE(guard_regions) { unsigned long page_size; }; -FIXTURE_SETUP(guard_pages) +FIXTURE_SETUP(guard_regions) { struct sigaction act = { .sa_handler = &handle_fatal, @@ -126,7 +126,7 @@ FIXTURE_SETUP(guard_pages) self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); }; -FIXTURE_TEARDOWN(guard_pages) +FIXTURE_TEARDOWN(guard_regions) { struct sigaction act = { .sa_handler = SIG_DFL, @@ -137,7 +137,7 @@ FIXTURE_TEARDOWN(guard_pages) sigaction(SIGSEGV, &act, NULL); } -TEST_F(guard_pages, basic) +TEST_F(guard_regions, basic) { const unsigned long NUM_PAGES = 10; const unsigned long page_size = self->page_size; @@ -231,7 +231,7 @@ TEST_F(guard_pages, basic) } /* Assert that operations applied across multiple VMAs work as expected. */ -TEST_F(guard_pages, multi_vma) +TEST_F(guard_regions, multi_vma) { const unsigned long page_size = self->page_size; char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; @@ -367,7 +367,7 @@ TEST_F(guard_pages, multi_vma) * Assert that batched operations performed using process_madvise() work as * expected. */ -TEST_F(guard_pages, process_madvise) +TEST_F(guard_regions, process_madvise) { const unsigned long page_size = self->page_size; pid_t pid = getpid(); @@ -467,7 +467,7 @@ TEST_F(guard_pages, process_madvise) } /* Assert that unmapping ranges does not leave guard markers behind. */ -TEST_F(guard_pages, munmap) +TEST_F(guard_regions, munmap) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new1, *ptr_new2; @@ -505,7 +505,7 @@ TEST_F(guard_pages, munmap) } /* Assert that mprotect() operations have no bearing on guard markers. */ -TEST_F(guard_pages, mprotect) +TEST_F(guard_regions, mprotect) { const unsigned long page_size = self->page_size; char *ptr; @@ -553,7 +553,7 @@ TEST_F(guard_pages, mprotect) } /* Split and merge VMAs and make sure guard pages still behave. */ -TEST_F(guard_pages, split_merge) +TEST_F(guard_regions, split_merge) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; @@ -684,7 +684,7 @@ TEST_F(guard_pages, split_merge) } /* Assert that MADV_DONTNEED does not remove guard markers. */ -TEST_F(guard_pages, dontneed) +TEST_F(guard_regions, dontneed) { const unsigned long page_size = self->page_size; char *ptr; @@ -737,7 +737,7 @@ TEST_F(guard_pages, dontneed) } /* Assert that mlock()'ed pages work correctly with guard markers. */ -TEST_F(guard_pages, mlock) +TEST_F(guard_regions, mlock) { const unsigned long page_size = self->page_size; char *ptr; @@ -810,7 +810,7 @@ TEST_F(guard_pages, mlock) * * - Moving a mapping alone should retain markers as they are. */ -TEST_F(guard_pages, mremap_move) +TEST_F(guard_regions, mremap_move) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; @@ -857,7 +857,7 @@ TEST_F(guard_pages, mremap_move) * will have to remove guard pages manually to fix up (they'd have to do the * same if it were a PROT_NONE mapping). */ -TEST_F(guard_pages, mremap_expand) +TEST_F(guard_regions, mremap_expand) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; @@ -920,7 +920,7 @@ TEST_F(guard_pages, mremap_expand) * if the user were using a PROT_NONE mapping they'd have to manually fix this * up also so this is OK. */ -TEST_F(guard_pages, mremap_shrink) +TEST_F(guard_regions, mremap_shrink) { const unsigned long page_size = self->page_size; char *ptr; @@ -984,7 +984,7 @@ TEST_F(guard_pages, mremap_shrink) * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set * retain guard pages. */ -TEST_F(guard_pages, fork) +TEST_F(guard_regions, fork) { const unsigned long page_size = self->page_size; char *ptr; @@ -1039,7 +1039,7 @@ TEST_F(guard_pages, fork) * Assert expected behaviour after we fork populated ranges of anonymous memory * and then guard and unguard the range. */ -TEST_F(guard_pages, fork_cow) +TEST_F(guard_regions, fork_cow) { const unsigned long page_size = self->page_size; char *ptr; @@ -1110,7 +1110,7 @@ TEST_F(guard_pages, fork_cow) * Assert that forking a process with VMAs that do have VM_WIPEONFORK set * behave as expected. */ -TEST_F(guard_pages, fork_wipeonfork) +TEST_F(guard_regions, fork_wipeonfork) { const unsigned long page_size = self->page_size; char *ptr; @@ -1160,7 +1160,7 @@ TEST_F(guard_pages, fork_wipeonfork) } /* Ensure that MADV_FREE retains guard entries as expected. */ -TEST_F(guard_pages, lazyfree) +TEST_F(guard_regions, lazyfree) { const unsigned long page_size = self->page_size; char *ptr; @@ -1196,7 +1196,7 @@ TEST_F(guard_pages, lazyfree) } /* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ -TEST_F(guard_pages, populate) +TEST_F(guard_regions, populate) { const unsigned long page_size = self->page_size; char *ptr; @@ -1222,7 +1222,7 @@ TEST_F(guard_pages, populate) } /* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ -TEST_F(guard_pages, cold_pageout) +TEST_F(guard_regions, cold_pageout) { const unsigned long page_size = self->page_size; char *ptr; @@ -1268,7 +1268,7 @@ TEST_F(guard_pages, cold_pageout) } /* Ensure that guard pages do not break userfaultd. */ -TEST_F(guard_pages, uffd) +TEST_F(guard_regions, uffd) { const unsigned long page_size = self->page_size; int uffd; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 93a606198751..4b5e45a10219 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -395,7 +395,7 @@ CATEGORY="mremap" run_test ./mremap_dontunmap CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests -CATEGORY="madv_guard" run_test ./guard-pages +CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate From 272f37d3e99a223fbb51fc2f6e6135c9856dc104 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Feb 2025 18:17:02 +0000 Subject: [PATCH 0912/2157] tools/selftests: expand all guard region tests to file-backed Extend the guard region tests to allow for test fixture variants for anon, shmem, and local file files. This allows us to assert that each of the expected behaviours of anonymous memory also applies correctly to file-backed (both shmem and an a file created locally in the current working directory) and thus asserts the same correctness guarantees as all the remaining tests do. The fixture teardown is now performed in the parent process rather than child forked ones, meaning cleanup is always performed, including unlinking any generated temporary files. Additionally the variant fixture data type now contains an enum value indicating the type of backing store and the mmap() invocation is abstracted to allow for the mapping of whichever backing store the variant is testing. We adjust tests as necessary to account for the fact they may now reference files rather than anonymous memory. Link: https://lkml.kernel.org/r/ab42228d2bd9b8aa18e9faebcd5c88732a7e5820.1739469950.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: John Hubbard Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcox Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 290 +++++++++++++++------ 1 file changed, 205 insertions(+), 85 deletions(-) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 7a41cf9ffbdf..0469c783f4fa 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,79 @@ static sigjmp_buf signal_jmp_buf; */ #define FORCE_READ(x) (*(volatile typeof(x) *)x) +/* + * How is the test backing the mapping being tested? + */ +enum backing_type { + ANON_BACKED, + SHMEM_BACKED, + LOCAL_FILE_BACKED, +}; + +FIXTURE(guard_regions) +{ + unsigned long page_size; + char path[PATH_MAX]; + int fd; +}; + +FIXTURE_VARIANT(guard_regions) +{ + enum backing_type backing; +}; + +FIXTURE_VARIANT_ADD(guard_regions, anon) +{ + .backing = ANON_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, shmem) +{ + .backing = SHMEM_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, file) +{ + .backing = LOCAL_FILE_BACKED, +}; + +static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant) +{ + switch (variant->backing) { + case ANON_BACKED: + case SHMEM_BACKED: + return true; + default: + return false; + } +} + +static void *mmap_(FIXTURE_DATA(guard_regions) * self, + const FIXTURE_VARIANT(guard_regions) * variant, + void *addr, size_t length, int prot, int extra_flags, + off_t offset) +{ + int fd; + int flags = extra_flags; + + switch (variant->backing) { + case ANON_BACKED: + flags |= MAP_PRIVATE | MAP_ANON; + fd = -1; + break; + case SHMEM_BACKED: + case LOCAL_FILE_BACKED: + flags |= MAP_SHARED; + fd = self->fd; + break; + default: + ksft_exit_fail(); + break; + } + + return mmap(addr, length, prot, flags, fd, offset); +} + static int userfaultfd(int flags) { return syscall(SYS_userfaultfd, flags); @@ -107,12 +181,7 @@ static bool try_read_write_buf(char *ptr) return try_read_buf(ptr) && try_write_buf(ptr); } -FIXTURE(guard_regions) -{ - unsigned long page_size; -}; - -FIXTURE_SETUP(guard_regions) +static void setup_sighandler(void) { struct sigaction act = { .sa_handler = &handle_fatal, @@ -122,11 +191,9 @@ FIXTURE_SETUP(guard_regions) sigemptyset(&act.sa_mask); if (sigaction(SIGSEGV, &act, NULL)) ksft_exit_fail_perror("sigaction"); +} - self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); -}; - -FIXTURE_TEARDOWN(guard_regions) +static void teardown_sighandler(void) { struct sigaction act = { .sa_handler = SIG_DFL, @@ -137,6 +204,48 @@ FIXTURE_TEARDOWN(guard_regions) sigaction(SIGSEGV, &act, NULL); } +static int open_file(const char *prefix, char *path) +{ + int fd; + + snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix); + fd = mkstemp(path); + if (fd < 0) + ksft_exit_fail_perror("mkstemp"); + + return fd; +} + +FIXTURE_SETUP(guard_regions) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + setup_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + self->fd = open_file( + variant->backing == SHMEM_BACKED ? "/tmp/" : "", + self->path); + + /* We truncate file to at least 100 pages, tests can modify as needed. */ + ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); +}; + +FIXTURE_TEARDOWN_PARENT(guard_regions) +{ + teardown_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + if (self->fd >= 0) + close(self->fd); + + if (self->path[0] != '\0') + unlink(self->path); +} + TEST_F(guard_regions, basic) { const unsigned long NUM_PAGES = 10; @@ -144,8 +253,8 @@ TEST_F(guard_regions, basic) char *ptr; int i; - ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Trivially assert we can touch the first page. */ @@ -238,25 +347,23 @@ TEST_F(guard_regions, multi_vma) int i; /* Reserve a 100 page region over which we can install VMAs. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* Place a VMA of 10 pages size at the start of the region. */ - ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr1 = mmap_(self, variant, ptr_region, 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr1, MAP_FAILED); /* Place a VMA of 5 pages size 50 pages into the region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); /* Place a VMA of 20 pages size at the end of the region. */ - ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); /* Unmap gaps. */ @@ -326,13 +433,11 @@ TEST_F(guard_regions, multi_vma) } /* Now map incompatible VMAs in the gaps. */ - ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); - ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); /* @@ -379,8 +484,8 @@ TEST_F(guard_regions, process_madvise) ASSERT_NE(pidfd, -1); /* Reserve region to map over. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* @@ -388,9 +493,8 @@ TEST_F(guard_regions, process_madvise) * overwrite existing entries and test this code path against * overwriting existing entries. */ - ptr1 = mmap(&ptr_region[page_size], 10 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0); ASSERT_NE(ptr1, MAP_FAILED); /* We want guard markers at start/end of each VMA. */ vec[0].iov_base = ptr1; @@ -399,9 +503,8 @@ TEST_F(guard_regions, process_madvise) vec[1].iov_len = page_size; /* 5 pages offset 50 pages into reserve region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); vec[2].iov_base = ptr2; vec[2].iov_len = page_size; @@ -409,9 +512,8 @@ TEST_F(guard_regions, process_madvise) vec[3].iov_len = page_size; /* 20 pages offset 79 pages into reserve region. */ - ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); vec[4].iov_base = ptr3; vec[4].iov_len = page_size; @@ -472,8 +574,8 @@ TEST_F(guard_regions, munmap) const unsigned long page_size = self->page_size; char *ptr, *ptr_new1, *ptr_new2; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard first and last pages. */ @@ -489,11 +591,11 @@ TEST_F(guard_regions, munmap) ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); /* Map over them.*/ - ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED, 0); ASSERT_NE(ptr_new1, MAP_FAILED); - ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new2, MAP_FAILED); /* Assert that they are now not guarded. */ @@ -511,8 +613,8 @@ TEST_F(guard_regions, mprotect) char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the middle of the range. */ @@ -559,8 +661,8 @@ TEST_F(guard_regions, split_merge) char *ptr, *ptr_new; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the whole range. */ @@ -601,14 +703,14 @@ TEST_F(guard_regions, split_merge) } /* Now map them again - the unmap will have cleared the guards. */ - ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now make sure guard pages are established. */ @@ -690,8 +792,8 @@ TEST_F(guard_regions, dontneed) char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Back the whole range. */ @@ -721,8 +823,16 @@ TEST_F(guard_regions, dontneed) ASSERT_FALSE(result); } else { ASSERT_TRUE(result); - /* Make sure we really did get reset to zero page. */ - ASSERT_EQ(*curr, '\0'); + switch (variant->backing) { + case ANON_BACKED: + /* If anon, then we get a zero page. */ + ASSERT_EQ(*curr, '\0'); + break; + default: + /* Otherwise, we get the file data. */ + ASSERT_EQ(*curr, 'y'); + break; + } } /* Now write... */ @@ -743,8 +853,8 @@ TEST_F(guard_regions, mlock) char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Populate. */ @@ -816,8 +926,8 @@ TEST_F(guard_regions, mremap_move) char *ptr, *ptr_new; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -831,8 +941,7 @@ TEST_F(guard_regions, mremap_move) /* Map a new region we will move this range into. Doing this ensures * that we have reserved a range to map into. */ - ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, - -1, 0); + ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, @@ -863,8 +972,8 @@ TEST_F(guard_regions, mremap_expand) char *ptr, *ptr_new; /* Map 10 pages... */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* ...But unmap the last 5 so we can ensure we can expand into them. */ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); @@ -888,8 +997,7 @@ TEST_F(guard_regions, mremap_expand) ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); /* Reserve a region which we can move to and expand into. */ - ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now move and expand into it. */ @@ -927,8 +1035,8 @@ TEST_F(guard_regions, mremap_shrink) int i; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -992,8 +1100,8 @@ TEST_F(guard_regions, fork) int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Establish guard pages in the first 5 pages. */ @@ -1046,9 +1154,12 @@ TEST_F(guard_regions, fork_cow) pid_t pid; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "CoW only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Populate range. */ @@ -1117,9 +1228,12 @@ TEST_F(guard_regions, fork_wipeonfork) pid_t pid; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "Wipe on fork only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Mark wipe on fork. */ @@ -1166,9 +1280,12 @@ TEST_F(guard_regions, lazyfree) char *ptr; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "MADV_FREE only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1202,8 +1319,8 @@ TEST_F(guard_regions, populate) char *ptr; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1229,8 +1346,8 @@ TEST_F(guard_regions, cold_pageout) int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1281,6 +1398,9 @@ TEST_F(guard_regions, uffd) struct uffdio_register reg; struct uffdio_range range; + if (!is_anon_backed(variant)) + SKIP(return, "uffd only works on anon backing"); + /* Set up uffd. */ uffd = userfaultfd(0); if (uffd == -1 && errno == EPERM) @@ -1290,8 +1410,8 @@ TEST_F(guard_regions, uffd) ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Register the range with uffd. */ From 0b6d4853d1d7d8ab65de9e6958af4d5cdf6a2b75 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Feb 2025 18:17:03 +0000 Subject: [PATCH 0913/2157] tools/selftests: add file/shmem-backed mapping guard region tests Extend the guard region self tests to explicitly assert that guard regions work correctly for functionality specific to file-backed and shmem mappings. In addition to testing all of the existing guard region functionality that is currently tested against anonymous mappings against file-backed and shmem mappings (except those which are exclusive to anonymous mapping), we now also: * Test that MADV_SEQUENTIAL does not cause unexpected readahead behaviour. * Test that MAP_PRIVATE behaves as expected with guard regions installed in both a shared and private mapping of an fd. * Test that a read-only file can correctly establish guard regions. * Test a probable fault-around case does not interfere with guard regions (or vice-versa). * Test that truncation does not eliminate guard regions. * Test that hole punching functions as expected in the presence of guard regions. * Test that a read-only mapping of a memfd write sealed mapping can have guard regions established within it and function correctly without violation of the seal. * Test that guard regions installed into a mapping of the anonymous zero page function correctly. Link: https://lkml.kernel.org/r/90c16bec5fcaafcd1700dfa3e9988c3e1aa9ac1d.1739469950.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Jann Horn Cc: John Hubbard Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcox Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 595 +++++++++++++++++++++ 1 file changed, 595 insertions(+) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 0469c783f4fa..ea9b5815e828 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -216,6 +216,58 @@ static int open_file(const char *prefix, char *path) return fd; } +/* Establish a varying pattern in a buffer. */ +static void set_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + char *ptr2 = &ptr[i * page_size]; + + memset(ptr2, 'a' + (i % 26), page_size); + } +} + +/* + * Check that a buffer contains the pattern set by set_pattern(), starting at a + * page offset of pgoff within the buffer. + */ +static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size, + size_t pgoff) +{ + size_t i; + + for (i = 0; i < num_pages * page_size; i++) { + size_t offset = pgoff * page_size + i; + char actual = ptr[offset]; + char expected = 'a' + ((offset / page_size) % 26); + + if (actual != expected) + return false; + } + + return true; +} + +/* Check that a buffer contains the pattern set by set_pattern(). */ +static bool check_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + return check_pattern_offset(ptr, num_pages, page_size, 0); +} + +/* Determine if a buffer contains only repetitions of a specified char. */ +static bool is_buf_eq(char *buf, size_t size, char chr) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (buf[i] != chr) + return false; + } + + return true; +} + FIXTURE_SETUP(guard_regions) { self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); @@ -1437,4 +1489,547 @@ TEST_F(guard_regions, uffd) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we + * aggressively read-ahead, then install guard regions and assert that it + * behaves correctly. + * + * We page out using MADV_PAGEOUT before checking guard regions so we drop page + * cache folios, meaning we maximise the possibility of some broken readahead. + */ +TEST_F(guard_regions, madvise_sequential) +{ + char *ptr; + int i; + const unsigned long page_size = self->page_size; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern of data in the file. */ + set_pattern(ptr, 10, page_size); + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Mark it as being accessed sequentially. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0); + + /* Mark every other page a guard page. */ + for (i = 0; i < 10; i += 2) { + char *ptr2 = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now page it out. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Now make sure pages are as expected. */ + for (i = 0; i < 10; i++) { + char *chrp = &ptr[i * page_size]; + + if (i % 2 == 0) { + bool result = try_read_write_buf(chrp); + + ASSERT_FALSE(result); + } else { + ASSERT_EQ(*chrp, 'a' + i); + } + } + + /* Now remove guard pages. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure all data is as expected. */ + if (!check_pattern(ptr, 10, page_size)) + ASSERT_TRUE(false); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Check that file-backed mappings implement guard regions with MAP_PRIVATE + * correctly. + */ +TEST_F(guard_regions, map_private) +{ + const unsigned long page_size = self->page_size; + char *ptr_shared, *ptr_private; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MAP_PRIVATE test specific to file-backed"); + + ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr_shared, MAP_FAILED); + + /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */ + ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0); + ASSERT_NE(ptr_private, MAP_FAILED); + + /* Set pattern in shared mapping. */ + set_pattern(ptr_shared, 10, page_size); + + /* Install guard regions in every other page in the shared mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_shared[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Install guard regions in every other page in the private mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Every odd private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from shared mapping. */ + ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Every even private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from private mapping. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Ensure patterns are intact. */ + ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size)); + ASSERT_TRUE(check_pattern(ptr_private, 10, page_size)); + + /* Now write out every other page to MAP_PRIVATE. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + memset(ptr, 'a' + i, page_size); + } + + /* + * At this point the mapping is: + * + * 0123456789 + * SPSPSPSPSP + * + * Where S = shared, P = private mappings. + */ + + /* Now mark the beginning of the mapping guarded. */ + ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* + * This renders the mapping: + * + * 0123456789 + * xxxxxPSPSP + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* Ensure guard regions as expected. */ + ASSERT_EQ(try_read_buf(ptr), i >= 5); + /* The shared mapping should always succeed. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + } + + /* Remove the guard regions altogether. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * + * We now expect the mapping to be: + * + * 0123456789 + * SSSSSPSPSP + * + * As we removed guard regions, the private pages from the first 5 will + * have been zapped, so on fault will reestablish the shared mapping. + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* + * Assert that shared mappings in the MAP_PRIVATE mapping match + * the shared mapping. + */ + if (i < 5 || i % 2 == 0) { + char *ptr_s = &ptr_shared[i * page_size]; + + ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0); + continue; + } + + /* Everything else is a private mapping. */ + ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i)); + } + + ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0); +} + +/* Test that guard regions established over a read-only mapping function correctly. */ +TEST_F(guard_regions, readonly_file) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Read-only test specific to file-backed"); + + /* Map shared so we can populate with pattern, populate it, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + /* Close the fd so we can re-open read-only. */ + ASSERT_EQ(close(self->fd), 0); + + /* Re-open read-only. */ + self->fd = open(self->path, O_RDONLY); + ASSERT_NE(self->fd, -1); + /* Re-map read-only. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark every other page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Assert that the guard regions are in place.*/ + for (i = 0; i < 10; i++) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0); + } + + /* Remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure the data is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, fault_around) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Fault-around test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern in the backing file. */ + set_pattern(ptr, 10, page_size); + + /* + * Now drop it from the page cache so we get major faults when next we + * map it. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Unmap and remap 'to be sure'. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now fault in every odd page. This should trigger fault-around. */ + for (i = 1; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Finally, ensure that guard regions are intact as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, truncation) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Establish a pattern in the backing file, just so there is data + * there. + */ + set_pattern(ptr, 10, page_size); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to actually used size (initialised to 100). */ + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Here the guard regions will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to half the size, then truncate again to the full size. */ + ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Again, guard pages will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, hole_punch) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + /* Establish pattern in mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + + /* Install a guard region in the middle of the mapping. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_GUARD_INSTALL), 0); + + /* + * The buffer will now be: + * + * 0123456789 + * ***xxxx*** + * + * Where * is data and x is the guard region. + */ + + /* Ensure established. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now hole punch the guarded region. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_REMOVE), 0); + + /* Ensure guard regions remain. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now remove guard region throughout. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Check that the pattern exists in non-hole punched region. */ + ASSERT_TRUE(check_pattern(ptr, 3, page_size)); + /* Check that hole punched region is zeroed. */ + ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0')); + /* Check that the pattern exists in the remainder of the file. */ + ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Ensure that a memfd works correctly with guard regions, that we can write + * seal it then open the mapping read-only and still establish guard regions + * within, remove those guard regions and have everything work correctly. + */ +TEST_F(guard_regions, memfd_write_seal) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != SHMEM_BACKED) + SKIP(return, "memfd write seal test specific to shmem"); + + /* OK, we need a memfd, so close existing one. */ + ASSERT_EQ(close(self->fd), 0); + + /* Create and truncate memfd. */ + self->fd = memfd_create("guard_regions_memfd_seals_test", + MFD_ALLOW_SEALING); + ASSERT_NE(self->fd, -1); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Map, set pattern, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + + /* Write-seal the memfd. */ + ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0); + + /* Now map the memfd readonly. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Ensure write seal intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_FALSE(try_write_buf(ptr_p)); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + + +/* + * Since we are now permitted to establish guard regions in read-only anonymous + * mappings, for the sake of thoroughness, though it probably has no practical + * use, test that guard regions function with a mapping to the anonymous zero + * page. + */ +TEST_F(guard_regions, anon_zeropage) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (!is_anon_backed(variant)) + SKIP(return, "anon zero page test specific to anon/shmem"); + + /* Obtain a read-only i.e. anon zero page mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove all guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Ensure zero page...*/ + ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0')); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN From 18ea595a07bcf931ec6efae5c1f8a0d9a440ed97 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 13 Feb 2025 12:44:53 +0100 Subject: [PATCH 0914/2157] maple_tree: remove a BUG_ON() in mas_alloc_nodes() Remove a BUG_ON() right before a WARN_ON() with the same condition. Calling WARN_ON() and BUG_ON() here is definitely wrong. Since the goal is generally to remove BUG_ON() invocations from the kernel, keep only the WARN_ON(). Link: https://lkml.kernel.org/r/20250213114453.1078318-1-ptesarik@suse.com Fixes: 067311d33e65 ("maple_tree: separate ma_state node from status") Signed-off-by: Petr Tesarik Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 60356ccb11ce..d0bea23fa4bc 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1242,7 +1242,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) if (mas->mas_flags & MA_STATE_PREALLOC) { if (allocated) return; - BUG_ON(!allocated); WARN_ON(!allocated); } From b23ceebd63d85135c9a5061a535ea85210f94f3f Mon Sep 17 00:00:00 2001 From: Guanjun Date: Thu, 13 Feb 2025 13:56:12 +0800 Subject: [PATCH 0915/2157] filemap: remove redundant folio_test_large check in filemap_free_folio The folio_test_large() check in filemap_free_folio() is unnecessary because folio_nr_pages(), which is called internally already performs this check. Removing the redundant condition simplifies the code and avoids double validation. This change improves code readability and reduces unnecessary operations in the folio freeing path. Link: https://lkml.kernel.org/r/20250213055612.490993-1-guanjun@linux.alibaba.com Signed-off-by: Guanjun Acked-by: David Hildenbrand Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/filemap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6e3d27993b67..152993a86de3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -227,15 +227,12 @@ void __filemap_remove_folio(struct folio *folio, void *shadow) void filemap_free_folio(struct address_space *mapping, struct folio *folio) { void (*free_folio)(struct folio *); - int refs = 1; free_folio = mapping->a_ops->free_folio; if (free_folio) free_folio(folio); - if (folio_test_large(folio)) - refs = folio_nr_pages(folio); - folio_put_refs(folio, refs); + folio_put_refs(folio, folio_nr_pages(folio)); } /** From faeb2831b517931f522f971e184b50ac82d29633 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 14 Feb 2025 22:30:12 +1300 Subject: [PATCH 0916/2157] mm: set folio swapbacked iff folios are dirty in try_to_unmap_one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: batched unmap lazyfree large folios during reclamation", v4. Commit 735ecdfaf4e8 ("mm/vmscan: avoid split lazyfree THP during shrink_folio_list()") prevents the splitting of MADV_FREE'd THP in madvise.c. However, those folios are still added to the deferred_split list in try_to_unmap_one() because we are unmapping PTEs and removing rmap entries one by one. Firstly, this has rendered the following counter somewhat confusing, /sys/kernel/mm/transparent_hugepage/hugepages-size/stats/split_deferred The split_deferred counter was originally designed to track operations such as partial unmap or madvise of large folios. However, in practice, most split_deferred cases arise from memory reclamation of aligned lazyfree mTHPs as observed by Tangquan. This discrepancy has made the split_deferred counter highly misleading. Secondly, this approach is slow because it requires iterating through each PTE and removing the rmap one by one for a large folio. In fact, all PTEs of a pte-mapped large folio should be unmapped at once, and the entire folio should be removed from the rmap as a whole. Thirdly, it also increases the risk of a race condition where lazyfree folios are incorrectly set back to swapbacked, as a speculative folio_get may occur in the shrinker's callback. deferred_split_scan() might call folio_try_get(folio) since we have added the folio to split_deferred list while removing rmap for the 1st subpage, and while we are scanning the 2nd to nr_pages PTEs of this folio in try_to_unmap_one(), the entire mTHP could be transitioned back to swap-backed because the reference count is incremented, which can make "ref_count == 1 + map_count" within try_to_unmap_one() false. /* * The only page refs must be one from isolation * plus the rmap(s) (dropped by discard:). */ if (ref_count == 1 + map_count && (!folio_test_dirty(folio) || ... (vma->vm_flags & VM_DROPPABLE))) { dec_mm_counter(mm, MM_ANONPAGES); goto discard; } This patchset resolves the issue by marking only genuinely dirty folios as swap-backed, as suggested by David, and transitioning to batched unmapping of entire folios in try_to_unmap_one(). Consequently, the deferred_split count drops to zero, and memory reclamation performance improves significantly — reclaiming 64KiB lazyfree large folios is now 2.5x faster(The specific data is embedded in the changelog of patch 3/4). By the way, while the patchset is primarily aimed at PTE-mapped large folios, Baolin and Lance also found that try_to_unmap_one() handles lazyfree redirtied PMD-mapped large folios inefficiently — it splits the PMD into PTEs and iterates over them. This patchset removes the unnecessary splitting, enabling us to skip redirtied PMD-mapped large folios 3.5X faster during memory reclamation. (The specific data can be found in the changelog of patch 4/4). This patch (of 4): The refcount may be temporarily or long-term increased, but this does not change the fundamental nature of the folio already being lazy- freed. Therefore, we only reset 'swapbacked' when we are certain the folio is dirty and not droppable. Link: https://lkml.kernel.org/r/20250214093015.51024-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20250214093015.51024-2-21cnbao@gmail.com Fixes: 6c8e2a256915 ("mm: fix race between MADV_FREE reclaim and blkdev direct IO read") Signed-off-by: Barry Song Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lance Yang Cc: Mauricio Faria de Oliveira Cc: Chis Li (Google) Cc: "Huang, Ying" Cc: Kairui Song Cc: Lorenzo Stoakes Cc: Ryan Roberts Cc: Tangquan Zheng Cc: Albert Ou Cc: Anshuman Khandual Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Gavin Shan Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Kefeng Wang Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Shaoqin Huang Cc: Thomas Gleixner Cc: Will Deacon Cc: Yicong Yang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/rmap.c | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 24bacce9971f..5c208e1c8266 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1963,34 +1963,29 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ smp_rmb(); - /* - * The only page refs must be one from isolation - * plus the rmap(s) (dropped by discard:). - */ - if (ref_count == 1 + map_count && - (!folio_test_dirty(folio) || - /* - * Unlike MADV_FREE mappings, VM_DROPPABLE - * ones can be dropped even if they've - * been dirtied. - */ - (vma->vm_flags & VM_DROPPABLE))) { - dec_mm_counter(mm, MM_ANONPAGES); - goto discard; - } - - /* - * If the folio was redirtied, it cannot be - * discarded. Remap the page to page table. - */ - set_pte_at(mm, address, pvmw.pte, pteval); - /* - * Unlike MADV_FREE mappings, VM_DROPPABLE ones - * never get swap backed on failure to drop. - */ - if (!(vma->vm_flags & VM_DROPPABLE)) + if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) { + /* + * redirtied either using the page table or a previously + * obtained GUP reference. + */ + set_pte_at(mm, address, pvmw.pte, pteval); folio_set_swapbacked(folio); - goto walk_abort; + goto walk_abort; + } else if (ref_count != 1 + map_count) { + /* + * Additional reference. Could be a GUP reference or any + * speculative reference. GUP users must mark the folio + * dirty if there was a modification. This folio cannot be + * reclaimed right now either way, so act just like nothing + * happened. + * We'll come back here later and detect if the folio was + * dirtied when the additional reference is gone. + */ + set_pte_at(mm, address, pvmw.pte, pteval); + goto walk_abort; + } + dec_mm_counter(mm, MM_ANONPAGES); + goto discard; } if (swap_duplicate(entry) < 0) { From 2f4ab3ac10e1476abb6ed55f0b5f176cf635e776 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 14 Feb 2025 22:30:13 +1300 Subject: [PATCH 0917/2157] mm: support tlbbatch flush for a range of PTEs This patch lays the groundwork for supporting batch PTE unmapping in try_to_unmap_one(). It introduces range handling for TLB batch flushing, with the range currently set to the size of PAGE_SIZE. The function __flush_tlb_range_nosync() is architecture-specific and is only used within arch/arm64. This function requires the mm structure instead of the vma structure. To allow its reuse by arch_tlbbatch_add_pending(), which operates with mm but not vma, this patch modifies the argument of __flush_tlb_range_nosync() to take mm as its parameter. Link: https://lkml.kernel.org/r/20250214093015.51024-3-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Will Deacon Reviewed-by: Kefeng Wang Cc: Catalin Marinas Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Ryan Roberts Cc: Shaoqin Huang Cc: Gavin Shan Cc: Mark Rutland Cc: David Hildenbrand Cc: Lance Yang Cc: "Kirill A. Shutemov" Cc: Yosry Ahmed Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Yicong Yang Cc: Baolin Wang Cc: Chis Li Cc: "Huang, Ying" Cc: Kairui Song Cc: Lorenzo Stoakes Cc: Mauricio Faria de Oliveira Cc: Tangquan Zheng Signed-off-by: Andrew Morton --- arch/arm64/include/asm/tlbflush.h | 23 +++++++++++------------ arch/arm64/mm/contpte.c | 2 +- arch/riscv/include/asm/tlbflush.h | 3 +-- arch/riscv/mm/tlbflush.c | 3 +-- arch/x86/include/asm/tlbflush.h | 3 +-- mm/rmap.c | 10 +++++----- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bc94e036a26b..b7e1920570bd 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -322,13 +322,6 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) return true; } -static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) -{ - __flush_tlb_page_nosync(mm, uaddr); -} - /* * If mprotect/munmap/etc occurs during TLB batched flushing, we need to * synchronise all the TLBI issued with a DSB to avoid the race mentioned in @@ -448,7 +441,7 @@ static inline bool __flush_tlb_range_limit_excess(unsigned long start, return false; } -static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, +static inline void __flush_tlb_range_nosync(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) @@ -460,12 +453,12 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, pages = (end - start) >> PAGE_SHIFT; if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { - flush_tlb_mm(vma->vm_mm); + flush_tlb_mm(mm); return; } dsb(ishst); - asid = ASID(vma->vm_mm); + asid = ASID(mm); if (last_level) __flush_tlb_range_op(vale1is, start, pages, stride, asid, @@ -474,7 +467,7 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true, lpa2_is_enabled()); - mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } static inline void __flush_tlb_range(struct vm_area_struct *vma, @@ -482,7 +475,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long stride, bool last_level, int tlb_level) { - __flush_tlb_range_nosync(vma, start, end, stride, + __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, last_level, tlb_level); dsb(ish); } @@ -533,6 +526,12 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ish); isb(); } + +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, unsigned long start, unsigned long end) +{ + __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); +} #endif #endif diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index 55107d27d3f8..bcac4f55f9c1 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -335,7 +335,7 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, * eliding the trailing DSB applies here. */ addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); - __flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE, + __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE, PAGE_SIZE, true, 3); } diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 72e559934952..ce0dd0fed764 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -60,8 +60,7 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, bool arch_tlbbatch_should_defer(struct mm_struct *mm); void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr); + struct mm_struct *mm, unsigned long start, unsigned long end); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 9b6e86ce3867..74dd9307fbf1 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -186,8 +186,7 @@ bool arch_tlbbatch_should_defer(struct mm_struct *mm) } void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) + struct mm_struct *mm, unsigned long start, unsigned long end) { cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 02fc2aa06e9e..29373da7b00a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -279,8 +279,7 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) } static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) + struct mm_struct *mm, unsigned long start, unsigned long end) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); diff --git a/mm/rmap.c b/mm/rmap.c index 5c208e1c8266..765e541ac9be 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ void try_to_unmap_flush_dirty(void) (TLB_FLUSH_BATCH_PENDING_MASK / 2) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval, - unsigned long uaddr) + unsigned long start, unsigned long end) { struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; int batch; @@ -681,7 +681,7 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval, if (!pte_accessible(mm, pteval)) return; - arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr); + arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, start, end); tlb_ubc->flush_required = true; /* @@ -757,7 +757,7 @@ void flush_tlb_batched_pending(struct mm_struct *mm) } #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval, - unsigned long uaddr) + unsigned long start, unsigned long end) { } @@ -1887,7 +1887,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ pteval = ptep_get_and_clear(mm, address, pvmw.pte); - set_tlb_ubc_flush_pending(mm, pteval, address); + set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE); } else { pteval = ptep_clear_flush(vma, address, pvmw.pte); } @@ -2270,7 +2270,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, */ pteval = ptep_get_and_clear(mm, address, pvmw.pte); - set_tlb_ubc_flush_pending(mm, pteval, address); + set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE); } else { pteval = ptep_clear_flush(vma, address, pvmw.pte); } From 354dffd29575cdf13154e8fb787322354aa9efc4 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 14 Feb 2025 22:30:14 +1300 Subject: [PATCH 0918/2157] mm: support batched unmap for lazyfree large folios during reclamation Currently, the PTEs and rmap of a large folio are removed one at a time. This is not only slow but also causes the large folio to be unnecessarily added to deferred_split, which can lead to races between the deferred_split shrinker callback and memory reclamation. This patch releases all PTEs and rmap entries in a batch. Currently, it only handles lazyfree large folios. The below microbench tries to reclaim 128MB lazyfree large folios whose sizes are 64KiB: #include #include #include #include #define SIZE 128*1024*1024 // 128 MB unsigned long read_split_deferred() { FILE *file = fopen("/sys/kernel/mm/transparent_hugepage" "/hugepages-64kB/stats/split_deferred", "r"); if (!file) { perror("Error opening file"); return 0; } unsigned long value; if (fscanf(file, "%lu", &value) != 1) { perror("Error reading value"); fclose(file); return 0; } fclose(file); return value; } int main(int argc, char *argv[]) { while(1) { volatile int *p = mmap(0, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); memset((void *)p, 1, SIZE); madvise((void *)p, SIZE, MADV_FREE); clock_t start_time = clock(); unsigned long start_split = read_split_deferred(); madvise((void *)p, SIZE, MADV_PAGEOUT); clock_t end_time = clock(); unsigned long end_split = read_split_deferred(); double elapsed_time = (double)(end_time - start_time) / CLOCKS_PER_SEC; printf("Time taken by reclamation: %f seconds, split_deferred: %ld\n", elapsed_time, end_split - start_split); munmap((void *)p, SIZE); } return 0; } w/o patch: ~ # ./a.out Time taken by reclamation: 0.177418 seconds, split_deferred: 2048 Time taken by reclamation: 0.178348 seconds, split_deferred: 2048 Time taken by reclamation: 0.174525 seconds, split_deferred: 2048 Time taken by reclamation: 0.171620 seconds, split_deferred: 2048 Time taken by reclamation: 0.172241 seconds, split_deferred: 2048 Time taken by reclamation: 0.174003 seconds, split_deferred: 2048 Time taken by reclamation: 0.171058 seconds, split_deferred: 2048 Time taken by reclamation: 0.171993 seconds, split_deferred: 2048 Time taken by reclamation: 0.169829 seconds, split_deferred: 2048 Time taken by reclamation: 0.172895 seconds, split_deferred: 2048 Time taken by reclamation: 0.176063 seconds, split_deferred: 2048 Time taken by reclamation: 0.172568 seconds, split_deferred: 2048 Time taken by reclamation: 0.171185 seconds, split_deferred: 2048 Time taken by reclamation: 0.170632 seconds, split_deferred: 2048 Time taken by reclamation: 0.170208 seconds, split_deferred: 2048 Time taken by reclamation: 0.174192 seconds, split_deferred: 2048 ... w/ patch: ~ # ./a.out Time taken by reclamation: 0.074231 seconds, split_deferred: 0 Time taken by reclamation: 0.071026 seconds, split_deferred: 0 Time taken by reclamation: 0.072029 seconds, split_deferred: 0 Time taken by reclamation: 0.071873 seconds, split_deferred: 0 Time taken by reclamation: 0.073573 seconds, split_deferred: 0 Time taken by reclamation: 0.071906 seconds, split_deferred: 0 Time taken by reclamation: 0.073604 seconds, split_deferred: 0 Time taken by reclamation: 0.075903 seconds, split_deferred: 0 Time taken by reclamation: 0.073191 seconds, split_deferred: 0 Time taken by reclamation: 0.071228 seconds, split_deferred: 0 Time taken by reclamation: 0.071391 seconds, split_deferred: 0 Time taken by reclamation: 0.071468 seconds, split_deferred: 0 Time taken by reclamation: 0.071896 seconds, split_deferred: 0 Time taken by reclamation: 0.072508 seconds, split_deferred: 0 Time taken by reclamation: 0.071884 seconds, split_deferred: 0 Time taken by reclamation: 0.072433 seconds, split_deferred: 0 Time taken by reclamation: 0.071939 seconds, split_deferred: 0 ... Link: https://lkml.kernel.org/r/20250214093015.51024-4-21cnbao@gmail.com Signed-off-by: Barry Song Cc: Albert Ou Cc: Anshuman Khandual Cc: Baolin Wang Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chis Li Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: "H. Peter Anvin" Cc: "Huang, Ying" Cc: Ingo Molnar Cc: Kairui Song Cc: Kefeng Wang Cc: "Kirill A. Shutemov" Cc: Lance Yang Cc: Lorenzo Stoakes Cc: Mark Rutland Cc: Mauricio Faria de Oliveira Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Shaoqin Huang Cc: Tangquan Zheng Cc: Thomas Gleixner Cc: Will Deacon Cc: Yicong Yang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/rmap.c | 72 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 765e541ac9be..7a93a7cd2c64 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1722,6 +1722,25 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page, #endif } +/* We support batch unmapping of PTEs for lazyfree large folios */ +static inline bool can_batch_unmap_folio_ptes(unsigned long addr, + struct folio *folio, pte_t *ptep) +{ + const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; + int max_nr = folio_nr_pages(folio); + pte_t pte = ptep_get(ptep); + + if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) + return false; + if (pte_unused(pte)) + return false; + if (pte_pfn(pte) != folio_pfn(folio)) + return false; + + return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, + NULL, NULL) == max_nr; +} + /* * @arg: enum ttu_flags will be passed to this argument */ @@ -1735,6 +1754,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, struct page *subpage; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + unsigned long nr_pages = 1, end_addr; unsigned long pfn; unsigned long hsz = 0; @@ -1874,23 +1894,26 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (pte_dirty(pteval)) folio_mark_dirty(folio); } else if (likely(pte_present(pteval))) { - flush_cache_page(vma, address, pfn); - /* Nuke the page table entry. */ - if (should_defer_flush(mm, flags)) { - /* - * We clear the PTE but do not flush so potentially - * a remote CPU could still be writing to the folio. - * If the entry was previously clean then the - * architecture must guarantee that a clear->dirty - * transition on a cached TLB entry is written through - * and traps if the PTE is unmapped. - */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); + if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) + nr_pages = folio_nr_pages(folio); + end_addr = address + nr_pages * PAGE_SIZE; + flush_cache_range(vma, address, end_addr); - set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE); - } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); - } + /* Nuke the page table entry. */ + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); + /* + * We clear the PTE but do not flush so potentially + * a remote CPU could still be writing to the folio. + * If the entry was previously clean then the + * architecture must guarantee that a clear->dirty + * transition on a cached TLB entry is written through + * and traps if the PTE is unmapped. + */ + if (should_defer_flush(mm, flags)) + set_tlb_ubc_flush_pending(mm, pteval, address, end_addr); + else + flush_tlb_range(vma, address, end_addr); if (pte_dirty(pteval)) folio_mark_dirty(folio); } else { @@ -1968,7 +1991,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * redirtied either using the page table or a previously * obtained GUP reference. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); folio_set_swapbacked(folio); goto walk_abort; } else if (ref_count != 1 + map_count) { @@ -1981,10 +2004,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * We'll come back here later and detect if the folio was * dirtied when the additional reference is gone. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); goto walk_abort; } - dec_mm_counter(mm, MM_ANONPAGES); + add_mm_counter(mm, MM_ANONPAGES, -nr_pages); goto discard; } @@ -2049,13 +2072,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter_file(folio)); } discard: - if (unlikely(folio_test_hugetlb(folio))) + if (unlikely(folio_test_hugetlb(folio))) { hugetlb_remove_rmap(folio); - else - folio_remove_rmap_pte(folio, subpage, vma); + } else { + folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); + folio_ref_sub(folio, nr_pages - 1); + } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + /* We have already batched the entire folio */ + if (nr_pages > 1) + goto walk_done; continue; walk_abort: ret = false; From 2f9b43d617e2685728568ca609c5c77e45d6f1e8 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 14 Feb 2025 22:30:15 +1300 Subject: [PATCH 0919/2157] mm: avoid splitting pmd for lazyfree pmd-mapped THP in try_to_unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The try_to_unmap_one() function currently handles PMD-mapped THPs inefficiently. It first splits the PMD into PTEs, copies the dirty state from the PMD to the PTEs, iterates over the PTEs to locate the dirty state, and then marks the THP as swap-backed. This process involves unnecessary PMD splitting and redundant iteration. Instead, this functionality can be efficiently managed in __discard_anon_folio_pmd_locked(), avoiding the extra steps and improving performance. The following microbenchmark redirties folios after invoking MADV_FREE, then measures the time taken to perform memory reclamation (actually set those folios swapbacked again) on the redirtied folios. #include #include #include #include #define SIZE 128*1024*1024 // 128 MB int main(int argc, char *argv[]) { while(1) { volatile int *p = mmap(0, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); memset((void *)p, 1, SIZE); madvise((void *)p, SIZE, MADV_FREE); /* redirty after MADV_FREE */ memset((void *)p, 1, SIZE); clock_t start_time = clock(); madvise((void *)p, SIZE, MADV_PAGEOUT); clock_t end_time = clock(); double elapsed_time = (double)(end_time - start_time) / CLOCKS_PER_SEC; printf("Time taken by reclamation: %f seconds\n", elapsed_time); munmap((void *)p, SIZE); } return 0; } Testing results are as below, w/o patch: ~ # ./a.out Time taken by reclamation: 0.007300 seconds Time taken by reclamation: 0.007226 seconds Time taken by reclamation: 0.007295 seconds Time taken by reclamation: 0.007731 seconds Time taken by reclamation: 0.007134 seconds Time taken by reclamation: 0.007285 seconds Time taken by reclamation: 0.007720 seconds Time taken by reclamation: 0.007128 seconds Time taken by reclamation: 0.007710 seconds Time taken by reclamation: 0.007712 seconds Time taken by reclamation: 0.007236 seconds Time taken by reclamation: 0.007690 seconds Time taken by reclamation: 0.007174 seconds Time taken by reclamation: 0.007670 seconds Time taken by reclamation: 0.007169 seconds Time taken by reclamation: 0.007305 seconds Time taken by reclamation: 0.007432 seconds Time taken by reclamation: 0.007158 seconds Time taken by reclamation: 0.007133 seconds … w/ patch ~ # ./a.out Time taken by reclamation: 0.002124 seconds Time taken by reclamation: 0.002116 seconds Time taken by reclamation: 0.002150 seconds Time taken by reclamation: 0.002261 seconds Time taken by reclamation: 0.002137 seconds Time taken by reclamation: 0.002173 seconds Time taken by reclamation: 0.002063 seconds Time taken by reclamation: 0.002088 seconds Time taken by reclamation: 0.002169 seconds Time taken by reclamation: 0.002124 seconds Time taken by reclamation: 0.002111 seconds Time taken by reclamation: 0.002224 seconds Time taken by reclamation: 0.002297 seconds Time taken by reclamation: 0.002260 seconds Time taken by reclamation: 0.002246 seconds Time taken by reclamation: 0.002272 seconds Time taken by reclamation: 0.002277 seconds Time taken by reclamation: 0.002462 seconds … This patch significantly speeds up try_to_unmap_one() by allowing it to skip redirtied THPs without splitting the PMD. Link: https://lkml.kernel.org/r/20250214093015.51024-5-21cnbao@gmail.com Signed-off-by: Barry Song Suggested-by: Baolin Wang Suggested-by: Lance Yang Reviewed-by: Baolin Wang Reviewed-by: Lance Yang Cc: Albert Ou Cc: Anshuman Khandual Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chis Li Cc: Dave Hansen Cc: David Hildenbrand Cc: Gavin Shan Cc: "H. Peter Anvin" Cc: "Huang, Ying" Cc: Ingo Molnar Cc: Kairui Song Cc: Kefeng Wang Cc: "Kirill A. Shutemov" Cc: Lorenzo Stoakes Cc: Mark Rutland Cc: Mauricio Faria de Oliveira Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Shaoqin Huang Cc: Tangquan Zheng Cc: Thomas Gleixner Cc: Will Deacon Cc: Yicong Yang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/huge_memory.c | 24 +++++++++++++++++------- mm/rmap.c | 13 ++++++++++--- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e7ac4f0dc21d..acb12653484e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3063,8 +3063,12 @@ static bool __discard_anon_folio_pmd_locked(struct vm_area_struct *vma, int ref_count, map_count; pmd_t orig_pmd = *pmdp; - if (folio_test_dirty(folio) || pmd_dirty(orig_pmd)) + if (pmd_dirty(orig_pmd)) + folio_set_dirty(folio); + if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) { + folio_set_swapbacked(folio); return false; + } orig_pmd = pmdp_huge_clear_flush(vma, addr, pmdp); @@ -3091,8 +3095,15 @@ static bool __discard_anon_folio_pmd_locked(struct vm_area_struct *vma, * * The only folio refs must be one from isolation plus the rmap(s). */ - if (folio_test_dirty(folio) || pmd_dirty(orig_pmd) || - ref_count != map_count + 1) { + if (pmd_dirty(orig_pmd)) + folio_set_dirty(folio); + if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) { + folio_set_swapbacked(folio); + set_pmd_at(mm, addr, pmdp, orig_pmd); + return false; + } + + if (ref_count != map_count + 1) { set_pmd_at(mm, addr, pmdp, orig_pmd); return false; } @@ -3112,12 +3123,11 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, { VM_WARN_ON_FOLIO(!folio_test_pmd_mappable(folio), folio); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(folio_test_swapbacked(folio), folio); VM_WARN_ON_ONCE(!IS_ALIGNED(addr, HPAGE_PMD_SIZE)); - if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) - return __discard_anon_folio_pmd_locked(vma, addr, pmdp, folio); - - return false; + return __discard_anon_folio_pmd_locked(vma, addr, pmdp, folio); } static void remap_page(struct folio *folio, unsigned long nr, int flags) diff --git a/mm/rmap.c b/mm/rmap.c index 7a93a7cd2c64..333ecac049b2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1804,9 +1804,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, } if (!pvmw.pte) { - if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, - folio)) - goto walk_done; + if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) { + if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, folio)) + goto walk_done; + /* + * unmap_huge_pmd_locked has either already marked + * the folio as swap-backed or decided to retain it + * due to GUP or speculative references. + */ + goto walk_abort; + } if (flags & TTU_SPLIT_HUGE_PMD) { /* From b2ae5fccb8c0ec2167e6e6c5c5a5eb8d656f70ef Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:38 -0800 Subject: [PATCH 0920/2157] mm: introduce vma_start_read_locked{_nested} helpers Patch series "reimplement per-vma lock as a refcount", v10. Back when per-vma locks were introduces, vm_lock was moved out of vm_area_struct in [1] because of the performance regression caused by false cacheline sharing. Recent investigation [2] revealed that the regressions is limited to a rather old Broadwell microarchitecture and even there it can be mitigated by disabling adjacent cacheline prefetching, see [3]. Splitting single logical structure into multiple ones leads to more complicated management, extra pointer dereferences and overall less maintainable code. When that split-away part is a lock, it complicates things even further. With no performance benefits, there are no reasons for this split. Merging the vm_lock back into vm_area_struct also allows vm_area_struct to use SLAB_TYPESAFE_BY_RCU later in this patchset. This patchset: 1. moves vm_lock back into vm_area_struct, aligning it at the cacheline boundary and changing the cache to be cacheline-aligned to minimize cacheline sharing; 2. changes vm_area_struct initialization to mark new vma as detached until it is inserted into vma tree; 3. replaces vm_lock and vma->detached flag with a reference counter; 4. regroups vm_area_struct members to fit them into 3 cachelines; 5. changes vm_area_struct cache to SLAB_TYPESAFE_BY_RCU to allow for their reuse and to minimize call_rcu() calls. Pagefault microbenchmarks show performance improvement: Hmean faults/cpu-1 507926.5547 ( 0.00%) 506519.3692 * -0.28%* Hmean faults/cpu-4 479119.7051 ( 0.00%) 481333.6802 * 0.46%* Hmean faults/cpu-7 452880.2961 ( 0.00%) 455845.6211 * 0.65%* Hmean faults/cpu-12 347639.1021 ( 0.00%) 352004.2254 * 1.26%* Hmean faults/cpu-21 200061.2238 ( 0.00%) 229597.0317 * 14.76%* Hmean faults/cpu-30 145251.2001 ( 0.00%) 164202.5067 * 13.05%* Hmean faults/cpu-48 106848.4434 ( 0.00%) 120641.5504 * 12.91%* Hmean faults/cpu-56 92472.3835 ( 0.00%) 103464.7916 * 11.89%* Hmean faults/sec-1 507566.1468 ( 0.00%) 506139.0811 * -0.28%* Hmean faults/sec-4 1880478.2402 ( 0.00%) 1886795.6329 * 0.34%* Hmean faults/sec-7 3106394.3438 ( 0.00%) 3140550.7485 * 1.10%* Hmean faults/sec-12 4061358.4795 ( 0.00%) 4112477.0206 * 1.26%* Hmean faults/sec-21 3988619.1169 ( 0.00%) 4577747.1436 * 14.77%* Hmean faults/sec-30 3909839.5449 ( 0.00%) 4311052.2787 * 10.26%* Hmean faults/sec-48 4761108.4691 ( 0.00%) 5283790.5026 * 10.98%* Hmean faults/sec-56 4885561.4590 ( 0.00%) 5415839.4045 * 10.85%* This patch (of 18): Introduce helper functions which can be used to read-lock a VMA when holding mmap_lock for read. Replace direct accesses to vma->vm_lock with these new helpers. Link: https://lkml.kernel.org/r/20250213224655.1680278-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Reviewed-by: Davidlohr Bueso Reviewed-by: Shakeel Butt Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 24 ++++++++++++++++++++++++ mm/userfaultfd.c | 22 +++++----------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fd1e85b4b48a..6a4914bc1a38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -735,6 +735,30 @@ static inline bool vma_start_read(struct vm_area_struct *vma) return true; } +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) +{ + mmap_assert_locked(vma->vm_mm); + down_read_nested(&vma->vm_lock->lock, subclass); +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline void vma_start_read_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); + down_read(&vma->vm_lock->lock); +} + static inline void vma_end_read(struct vm_area_struct *vma) { rcu_read_lock(); /* keeps vma alive till the end of up_read */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index d06453fa8aba..48ac81bbfee6 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -85,16 +85,8 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm, mmap_read_lock(mm); vma = find_vma_and_prepare_anon(mm, address); - if (!IS_ERR(vma)) { - /* - * We cannot use vma_start_read() as it may fail due to - * false locked (see comment in vma_start_read()). We - * can avoid that by directly locking vm_lock under - * mmap_lock, which guarantees that nobody can lock the - * vma for write (vma_start_write()) under us. - */ - down_read(&vma->vm_lock->lock); - } + if (!IS_ERR(vma)) + vma_start_read_locked(vma); mmap_read_unlock(mm); return vma; @@ -1564,14 +1556,10 @@ static int uffd_move_lock(struct mm_struct *mm, mmap_read_lock(mm); err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap); if (!err) { - /* - * See comment in uffd_lock_vma() as to why not using - * vma_start_read() here. - */ - down_read(&(*dst_vmap)->vm_lock->lock); + vma_start_read_locked(*dst_vmap); if (*dst_vmap != *src_vmap) - down_read_nested(&(*src_vmap)->vm_lock->lock, - SINGLE_DEPTH_NESTING); + vma_start_read_locked_nested(*src_vmap, + SINGLE_DEPTH_NESTING); } mmap_read_unlock(mm); return err; From 7b6218ae1253491d56f21f4b1f3609f3dd873600 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:39 -0800 Subject: [PATCH 0921/2157] mm: move per-vma lock into vm_area_struct Back when per-vma locks were introduces, vm_lock was moved out of vm_area_struct in [1] because of the performance regression caused by false cacheline sharing. Recent investigation [2] revealed that the regressions is limited to a rather old Broadwell microarchitecture and even there it can be mitigated by disabling adjacent cacheline prefetching, see [3]. Splitting single logical structure into multiple ones leads to more complicated management, extra pointer dereferences and overall less maintainable code. When that split-away part is a lock, it complicates things even further. With no performance benefits, there are no reasons for this split. Merging the vm_lock back into vm_area_struct also allows vm_area_struct to use SLAB_TYPESAFE_BY_RCU later in this patchset. Move vm_lock back into vm_area_struct, aligning it at the cacheline boundary and changing the cache to be cacheline-aligned as well. With kernel compiled using defconfig, this causes VMA memory consumption to grow from 160 (vm_area_struct) + 40 (vm_lock) bytes to 256 bytes: slabinfo before: ... : ... vma_lock ... 40 102 1 : ... vm_area_struct ... 160 51 2 : ... slabinfo after moving vm_lock: ... : ... vm_area_struct ... 256 32 2 : ... Aggregate VMA memory consumption per 1000 VMAs grows from 50 to 64 pages, which is 5.5MB per 100000 VMAs. Note that the size of this structure is dependent on the kernel configuration and typically the original size is higher than 160 bytes. Therefore these calculations are close to the worst case scenario. A more realistic vm_area_struct usage before this change is: ... : ... vma_lock ... 40 102 1 : ... vm_area_struct ... 176 46 2 : ... Aggregate VMA memory consumption per 1000 VMAs grows from 54 to 64 pages, which is 3.9MB per 100000 VMAs. This memory consumption growth can be addressed later by optimizing the vm_lock. [1] https://lore.kernel.org/all/20230227173632.3292573-34-surenb@google.com/ [2] https://lore.kernel.org/all/ZsQyI%2F087V34JoIt@xsang-OptiPlex-9020/ [3] https://lore.kernel.org/all/CAJuCfpEisU8Lfe96AYJDZ+OM4NoPmnw9bP53cT_kbfP_pR+-2g@mail.gmail.com/ Link: https://lkml.kernel.org/r/20250213224655.1680278-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Reviewed-by: Shakeel Butt Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 28 ++++++++++-------- include/linux/mm_types.h | 6 ++-- kernel/fork.c | 49 ++++---------------------------- tools/testing/vma/vma_internal.h | 33 +++++---------------- 4 files changed, 32 insertions(+), 84 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6a4914bc1a38..bf3b6362927f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -697,6 +697,12 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {} #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_PER_VMA_LOCK +static inline void vma_lock_init(struct vm_area_struct *vma) +{ + init_rwsem(&vma->vm_lock.lock); + vma->vm_lock_seq = UINT_MAX; +} + /* * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to @@ -714,7 +720,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) return false; - if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) + if (unlikely(down_read_trylock(&vma->vm_lock.lock) == 0)) return false; /* @@ -729,7 +735,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * This pairs with RELEASE semantics in vma_end_write_all(). */ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { - up_read(&vma->vm_lock->lock); + up_read(&vma->vm_lock.lock); return false; } return true; @@ -744,7 +750,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) { mmap_assert_locked(vma->vm_mm); - down_read_nested(&vma->vm_lock->lock, subclass); + down_read_nested(&vma->vm_lock.lock, subclass); } /* @@ -756,13 +762,13 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int static inline void vma_start_read_locked(struct vm_area_struct *vma) { mmap_assert_locked(vma->vm_mm); - down_read(&vma->vm_lock->lock); + down_read(&vma->vm_lock.lock); } static inline void vma_end_read(struct vm_area_struct *vma) { rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->vm_lock->lock); + up_read(&vma->vm_lock.lock); rcu_read_unlock(); } @@ -791,7 +797,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) if (__is_vma_write_locked(vma, &mm_lock_seq)) return; - down_write(&vma->vm_lock->lock); + down_write(&vma->vm_lock.lock); /* * We should use WRITE_ONCE() here because we can have concurrent reads * from the early lockless pessimistic check in vma_start_read(). @@ -799,7 +805,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. */ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock->lock); + up_write(&vma->vm_lock.lock); } static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -811,7 +817,7 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma) static inline void vma_assert_locked(struct vm_area_struct *vma) { - if (!rwsem_is_locked(&vma->vm_lock->lock)) + if (!rwsem_is_locked(&vma->vm_lock.lock)) vma_assert_write_locked(vma); } @@ -844,6 +850,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, #else /* CONFIG_PER_VMA_LOCK */ +static inline void vma_lock_init(struct vm_area_struct *vma) {} static inline bool vma_start_read(struct vm_area_struct *vma) { return false; } static inline void vma_end_read(struct vm_area_struct *vma) {} @@ -878,10 +885,6 @@ static inline void assert_fault_locked(struct vm_fault *vmf) extern const struct vm_operations_struct vma_dummy_vm_ops; -/* - * WARNING: vma_init does not initialize vma->vm_lock. - * Use vm_area_alloc()/vm_area_free() if vma needs locking. - */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { memset(vma, 0, sizeof(*vma)); @@ -890,6 +893,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); vma_numab_state_init(vma); + vma_lock_init(vma); } /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0234f14f2aa6..36dea20cd101 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -730,8 +730,6 @@ struct vm_area_struct { * slowpath. */ unsigned int vm_lock_seq; - /* Unstable RCU readers are allowed to read this. */ - struct vma_lock *vm_lock; #endif /* @@ -784,6 +782,10 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + struct vma_lock vm_lock ____cacheline_aligned_in_smp; +#endif } __randomize_layout; #ifdef CONFIG_NUMA diff --git a/kernel/fork.c b/kernel/fork.c index 735405a9c5f3..bdbabe73fb29 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -436,35 +436,6 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -#ifdef CONFIG_PER_VMA_LOCK - -/* SLAB cache for vm_area_struct.lock */ -static struct kmem_cache *vma_lock_cachep; - -static bool vma_lock_alloc(struct vm_area_struct *vma) -{ - vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL); - if (!vma->vm_lock) - return false; - - init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = UINT_MAX; - - return true; -} - -static inline void vma_lock_free(struct vm_area_struct *vma) -{ - kmem_cache_free(vma_lock_cachep, vma->vm_lock); -} - -#else /* CONFIG_PER_VMA_LOCK */ - -static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; } -static inline void vma_lock_free(struct vm_area_struct *vma) {} - -#endif /* CONFIG_PER_VMA_LOCK */ - struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -474,10 +445,6 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) return NULL; vma_init(vma, mm); - if (!vma_lock_alloc(vma)) { - kmem_cache_free(vm_area_cachep, vma); - return NULL; - } return vma; } @@ -496,10 +463,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) * will be reinitialized. */ data_race(memcpy(new, orig, sizeof(*new))); - if (!vma_lock_alloc(new)) { - kmem_cache_free(vm_area_cachep, new); - return NULL; - } + vma_lock_init(new); INIT_LIST_HEAD(&new->anon_vma_chain); vma_numab_state_init(new); dup_anon_vma_name(orig, new); @@ -511,7 +475,6 @@ void __vm_area_free(struct vm_area_struct *vma) { vma_numab_state_free(vma); free_anon_vma_name(vma); - vma_lock_free(vma); kmem_cache_free(vm_area_cachep, vma); } @@ -522,7 +485,7 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) vm_rcu); /* The vma should not be locked while being destroyed. */ - VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma); + VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock.lock), vma); __vm_area_free(vma); } #endif @@ -3200,11 +3163,9 @@ void __init proc_caches_init(void) sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - - vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); -#ifdef CONFIG_PER_VMA_LOCK - vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT); -#endif + vm_area_cachep = KMEM_CACHE(vm_area_struct, + SLAB_HWCACHE_ALIGN|SLAB_NO_MERGE|SLAB_PANIC| + SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); } diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index bb273927af0f..4506e6fb3c6f 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -275,10 +275,10 @@ struct vm_area_struct { /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_lock.lock (in write mode) * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_lock.lock (in read or write mode) * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -287,7 +287,7 @@ struct vm_area_struct { * slowpath. */ unsigned int vm_lock_seq; - struct vma_lock *vm_lock; + struct vma_lock vm_lock; #endif /* @@ -464,17 +464,10 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) return mas_find(&vmi->mas, ULONG_MAX); } -static inline bool vma_lock_alloc(struct vm_area_struct *vma) +static inline void vma_lock_init(struct vm_area_struct *vma) { - vma->vm_lock = calloc(1, sizeof(struct vma_lock)); - - if (!vma->vm_lock) - return false; - - init_rwsem(&vma->vm_lock->lock); + init_rwsem(&vma->vm_lock.lock); vma->vm_lock_seq = UINT_MAX; - - return true; } static inline void vma_assert_write_locked(struct vm_area_struct *); @@ -497,6 +490,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); + vma_lock_init(vma); } static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) @@ -507,10 +501,6 @@ static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) return NULL; vma_init(vma, mm); - if (!vma_lock_alloc(vma)) { - free(vma); - return NULL; - } return vma; } @@ -523,10 +513,7 @@ static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return NULL; memcpy(new, orig, sizeof(*new)); - if (!vma_lock_alloc(new)) { - free(new); - return NULL; - } + vma_lock_init(new); INIT_LIST_HEAD(&new->anon_vma_chain); return new; @@ -696,14 +683,8 @@ static inline void mpol_put(struct mempolicy *) { } -static inline void vma_lock_free(struct vm_area_struct *vma) -{ - free(vma->vm_lock); -} - static inline void __vm_area_free(struct vm_area_struct *vma) { - vma_lock_free(vma); free(vma); } From 8ef95d8f15f9e785341775814f0ed2ee22017aa5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:40 -0800 Subject: [PATCH 0922/2157] mm: mark vma as detached until it's added into vma tree Current implementation does not set detached flag when a VMA is first allocated. This does not represent the real state of the VMA, which is detached until it is added into mm's VMA tree. Fix this by marking new VMAs as detached and resetting detached flag only after VMA is added into a tree. Introduce vma_mark_attached() to make the API more readable and to simplify possible future cleanup when vma->vm_mm might be used to indicate detached vma and vma_mark_attached() will need an additional mm parameter. Link: https://lkml.kernel.org/r/20250213224655.1680278-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Reviewed-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 27 ++++++++++++++++++++------- kernel/fork.c | 4 ++++ mm/memory.c | 2 +- mm/vma.c | 6 +++--- mm/vma.h | 2 ++ tools/testing/vma/vma_internal.h | 17 ++++++++++++----- 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index bf3b6362927f..d333d4070362 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -821,12 +821,21 @@ static inline void vma_assert_locked(struct vm_area_struct *vma) vma_assert_write_locked(vma); } -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma->detached = false; +} + +static inline void vma_mark_detached(struct vm_area_struct *vma) { /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; + vma_assert_write_locked(vma); + vma->detached = true; +} + +static inline bool is_vma_detached(struct vm_area_struct *vma) +{ + return vma->detached; } static inline void release_fault_lock(struct vm_fault *vmf) @@ -857,8 +866,8 @@ static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} static inline void vma_assert_write_locked(struct vm_area_struct *vma) { mmap_assert_write_locked(vma->vm_mm); } -static inline void vma_mark_detached(struct vm_area_struct *vma, - bool detached) {} +static inline void vma_mark_attached(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma) {} static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address) @@ -891,7 +900,10 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); +#ifdef CONFIG_PER_VMA_LOCK + /* vma is not locked, can't use vma_mark_detached() */ + vma->detached = true; +#endif vma_numab_state_init(vma); vma_lock_init(vma); } @@ -1086,6 +1098,7 @@ static inline int vma_iter_bulk_store(struct vma_iterator *vmi, if (unlikely(mas_is_err(&vmi->mas))) return -ENOMEM; + vma_mark_attached(vma); return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index bdbabe73fb29..5bf3e407c795 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -465,6 +465,10 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) data_race(memcpy(new, orig, sizeof(*new))); vma_lock_init(new); INIT_LIST_HEAD(&new->anon_vma_chain); +#ifdef CONFIG_PER_VMA_LOCK + /* vma is not locked, can't use vma_mark_detached() */ + new->detached = true; +#endif vma_numab_state_init(new); dup_anon_vma_name(orig, new); diff --git a/mm/memory.c b/mm/memory.c index 94feb51a7983..6ef014220e09 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6374,7 +6374,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, goto inval; /* Check if the VMA got isolated after we found it */ - if (vma->detached) { + if (is_vma_detached(vma)) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ diff --git a/mm/vma.c b/mm/vma.c index 603e538a093f..702e2181f8b7 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -341,7 +341,7 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, if (vp->remove) { again: - vma_mark_detached(vp->remove, true); + vma_mark_detached(vp->remove); if (vp->file) { uprobe_munmap(vp->remove, vp->remove->vm_start, vp->remove->vm_end); @@ -1238,7 +1238,7 @@ static void reattach_vmas(struct ma_state *mas_detach) mas_set(mas_detach, 0); mas_for_each(mas_detach, vma, ULONG_MAX) - vma_mark_detached(vma, false); + vma_mark_attached(vma); __mt_destroy(mas_detach->tree); } @@ -1313,7 +1313,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, if (error) goto munmap_gather_failed; - vma_mark_detached(next, true); + vma_mark_detached(next); nrpages = vma_pages(next); vms->nr_pages += nrpages; diff --git a/mm/vma.h b/mm/vma.h index e55e68abfbe3..bffb56afce5f 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -205,6 +205,7 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, if (unlikely(mas_is_err(&vmi->mas))) return -ENOMEM; + vma_mark_attached(vma); return 0; } @@ -437,6 +438,7 @@ static inline void vma_iter_store(struct vma_iterator *vmi, __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); mas_store_prealloc(&vmi->mas, vma); + vma_mark_attached(vma); } static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4506e6fb3c6f..f93f7f74f97b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -471,12 +471,16 @@ static inline void vma_lock_init(struct vm_area_struct *vma) } static inline void vma_assert_write_locked(struct vm_area_struct *); -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma->detached = false; +} + +static inline void vma_mark_detached(struct vm_area_struct *vma) { /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; + vma_assert_write_locked(vma); + vma->detached = true; } extern const struct vm_operations_struct vma_dummy_vm_ops; @@ -489,7 +493,8 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); + /* vma is not locked, can't use vma_mark_detached() */ + vma->detached = true; vma_lock_init(vma); } @@ -515,6 +520,8 @@ static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) memcpy(new, orig, sizeof(*new)); vma_lock_init(new); INIT_LIST_HEAD(&new->anon_vma_chain); + /* vma is not locked, can't use vma_mark_detached() */ + new->detached = true; return new; } From 55e50223bf3e06abceaf68e2ad125458bb5f874f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:41 -0800 Subject: [PATCH 0923/2157] mm: introduce vma_iter_store_attached() to use with attached vmas vma_iter_store() functions can be used both when adding a new vma and when updating an existing one. However for existing ones we do not need to mark them attached as they are already marked that way. With vma->detached being a separate flag, double-marking a vmas as attached or detached is not an issue because the flag will simply be overwritten with the same value. However once we fold this flag into the refcount later in this series, re-attaching or re-detaching a vma becomes an issue since these operations will be incrementing/decrementing a refcount. Introduce vma_iter_store_new() and vma_iter_store_overwrite() to replace vma_iter_store() and avoid re-attaching a vma during vma update. Add assertions in vma_mark_attached()/vma_mark_detached() to catch invalid usage. Update vma tests to check for vma detached state correctness. Link: https://lkml.kernel.org/r/20250213224655.1680278-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 +++++++++++ mm/nommu.c | 4 +-- mm/vma.c | 12 ++++----- mm/vma.h | 11 +++++++-- tools/testing/vma/vma.c | 42 +++++++++++++++++++++++++------- tools/testing/vma/vma_internal.h | 10 ++++++++ 6 files changed, 74 insertions(+), 19 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d333d4070362..003c3e5c0a96 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -821,8 +821,19 @@ static inline void vma_assert_locked(struct vm_area_struct *vma) vma_assert_write_locked(vma); } +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(vma->detached); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!vma->detached); +} + static inline void vma_mark_attached(struct vm_area_struct *vma) { + vma_assert_detached(vma); vma->detached = false; } @@ -830,6 +841,7 @@ static inline void vma_mark_detached(struct vm_area_struct *vma) { /* When detaching vma should be write-locked */ vma_assert_write_locked(vma); + vma_assert_attached(vma); vma->detached = true; } @@ -866,6 +878,8 @@ static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} static inline void vma_assert_write_locked(struct vm_area_struct *vma) { mmap_assert_write_locked(vma->vm_mm); } +static inline void vma_assert_attached(struct vm_area_struct *vma) {} +static inline void vma_assert_detached(struct vm_area_struct *vma) {} static inline void vma_mark_attached(struct vm_area_struct *vma) {} static inline void vma_mark_detached(struct vm_area_struct *vma) {} diff --git a/mm/nommu.c b/mm/nommu.c index baa79abdaf03..8b31d8396297 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1191,7 +1191,7 @@ unsigned long do_mmap(struct file *file, setup_vma_to_mm(vma, current->mm); current->mm->map_count++; /* add the VMA to the tree */ - vma_iter_store(&vmi, vma); + vma_iter_store_new(&vmi, vma); /* we flush the region from the icache only when the first executable * mapping of it is made */ @@ -1356,7 +1356,7 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, setup_vma_to_mm(vma, mm); setup_vma_to_mm(new, mm); - vma_iter_store(vmi, new); + vma_iter_store_new(vmi, new); mm->map_count++; return 0; diff --git a/mm/vma.c b/mm/vma.c index 702e2181f8b7..7374c04eee9b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -320,7 +320,7 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, * us to insert it before dropping the locks * (it may either follow vma or precede it). */ - vma_iter_store(vmi, vp->insert); + vma_iter_store_new(vmi, vp->insert); mm->map_count++; } @@ -700,7 +700,7 @@ static int commit_merge(struct vma_merge_struct *vmg) vmg->__adjust_middle_start ? vmg->middle : NULL); vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); vmg_adjust_set_range(vmg); - vma_iter_store(vmg->vmi, vmg->target); + vma_iter_store_overwrite(vmg->vmi, vmg->target); vma_complete(&vp, vmg->vmi, vma->vm_mm); @@ -1711,7 +1711,7 @@ int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) return -ENOMEM; vma_start_write(vma); - vma_iter_store(&vmi, vma); + vma_iter_store_new(&vmi, vma); vma_link_file(vma); mm->map_count++; validate_mm(mm); @@ -2390,7 +2390,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap) /* Lock the VMA since it is modified after insertion into VMA tree */ vma_start_write(vma); - vma_iter_store(vmi, vma); + vma_iter_store_new(vmi, vma); map->mm->map_count++; vma_link_file(vma); @@ -2867,7 +2867,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) anon_vma_interval_tree_pre_update_vma(vma); vma->vm_end = address; /* Overwrite old entry in mtree. */ - vma_iter_store(&vmi, vma); + vma_iter_store_overwrite(&vmi, vma); anon_vma_interval_tree_post_update_vma(vma); perf_event_mmap(vma); @@ -2947,7 +2947,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) vma->vm_start = address; vma->vm_pgoff -= grow; /* Overwrite old entry in mtree. */ - vma_iter_store(&vmi, vma); + vma_iter_store_overwrite(&vmi, vma); anon_vma_interval_tree_post_update_vma(vma); perf_event_mmap(vma); diff --git a/mm/vma.h b/mm/vma.h index bffb56afce5f..55be77ff042f 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -413,9 +413,10 @@ static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi) } /* Store a VMA with preallocated memory */ -static inline void vma_iter_store(struct vma_iterator *vmi, - struct vm_area_struct *vma) +static inline void vma_iter_store_overwrite(struct vma_iterator *vmi, + struct vm_area_struct *vma) { + vma_assert_attached(vma); #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start && @@ -438,7 +439,13 @@ static inline void vma_iter_store(struct vma_iterator *vmi, __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); mas_store_prealloc(&vmi->mas, vma); +} + +static inline void vma_iter_store_new(struct vma_iterator *vmi, + struct vm_area_struct *vma) +{ vma_mark_attached(vma); + vma_iter_store_overwrite(vmi, vma); } static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index c7ffa71841ca..11f761769b5b 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -74,10 +74,22 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm, ret->vm_end = end; ret->vm_pgoff = pgoff; ret->__vm_flags = flags; + vma_assert_detached(ret); return ret; } +/* Helper function to allocate a VMA and link it to the tree. */ +static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) +{ + int res; + + res = vma_link(mm, vma); + if (!res) + vma_assert_attached(vma); + return res; +} + /* Helper function to allocate a VMA and link it to the tree. */ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, unsigned long start, @@ -90,7 +102,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, if (vma == NULL) return NULL; - if (vma_link(mm, vma)) { + if (attach_vma(mm, vma)) { vm_area_free(vma); return NULL; } @@ -108,6 +120,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, /* Helper function which provides a wrapper around a merge new VMA operation. */ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) { + struct vm_area_struct *vma; /* * For convenience, get prev and next VMAs. Which the new VMA operation * requires. @@ -116,7 +129,11 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) vmg->prev = vma_prev(vmg->vmi); vma_iter_next_range(vmg->vmi); - return vma_merge_new_range(vmg); + vma = vma_merge_new_range(vmg); + if (vma) + vma_assert_attached(vma); + + return vma; } /* @@ -125,7 +142,12 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) */ static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) { - return vma_merge_existing_range(vmg); + struct vm_area_struct *vma; + + vma = vma_merge_existing_range(vmg); + if (vma) + vma_assert_attached(vma); + return vma; } /* @@ -260,8 +282,8 @@ static bool test_simple_merge(void) .pgoff = 1, }; - ASSERT_FALSE(vma_link(&mm, vma_left)); - ASSERT_FALSE(vma_link(&mm, vma_right)); + ASSERT_FALSE(attach_vma(&mm, vma_left)); + ASSERT_FALSE(attach_vma(&mm, vma_right)); vma = merge_new(&vmg); ASSERT_NE(vma, NULL); @@ -285,7 +307,7 @@ static bool test_simple_modify(void) struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags); VMA_ITERATOR(vmi, &mm, 0x1000); - ASSERT_FALSE(vma_link(&mm, init_vma)); + ASSERT_FALSE(attach_vma(&mm, init_vma)); /* * The flags will not be changed, the vma_modify_flags() function @@ -351,7 +373,7 @@ static bool test_simple_expand(void) .pgoff = 0, }; - ASSERT_FALSE(vma_link(&mm, vma)); + ASSERT_FALSE(attach_vma(&mm, vma)); ASSERT_FALSE(expand_existing(&vmg)); @@ -372,7 +394,7 @@ static bool test_simple_shrink(void) struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); VMA_ITERATOR(vmi, &mm, 0); - ASSERT_FALSE(vma_link(&mm, vma)); + ASSERT_FALSE(attach_vma(&mm, vma)); ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); @@ -1522,11 +1544,11 @@ static bool test_copy_vma(void) vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); - ASSERT_NE(vma_new, vma); ASSERT_EQ(vma_new->vm_start, 0); ASSERT_EQ(vma_new->vm_end, 0x2000); ASSERT_EQ(vma_new->vm_pgoff, 0); + vma_assert_attached(vma_new); cleanup_mm(&mm, &vmi); @@ -1535,6 +1557,7 @@ static bool test_copy_vma(void) vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, flags); vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); + vma_assert_attached(vma_new); ASSERT_EQ(vma_new, vma_next); @@ -1576,6 +1599,7 @@ static bool test_expand_only_mode(void) ASSERT_EQ(vma->vm_pgoff, 3); ASSERT_TRUE(vma_write_started(vma)); ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); + vma_assert_attached(vma); cleanup_mm(&mm, &vmi); return true; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index f93f7f74f97b..34277842156c 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -470,6 +470,16 @@ static inline void vma_lock_init(struct vm_area_struct *vma) vma->vm_lock_seq = UINT_MAX; } +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(vma->detached); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!vma->detached); +} + static inline void vma_assert_write_locked(struct vm_area_struct *); static inline void vma_mark_attached(struct vm_area_struct *vma) { From fe605930f074fb381884456061c7628c1fd35742 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:42 -0800 Subject: [PATCH 0924/2157] mm: mark vmas detached upon exit When exit_mmap() removes vmas belonging to an exiting task, it does not mark them as detached since they can't be reached by other tasks and they will be freed shortly. Once we introduce vma reuse, all vmas will have to be in detached state before they are freed to ensure vma when reused is in a consistent state. Add missing vma_mark_detached() before freeing the vma. Link: https://lkml.kernel.org/r/20250213224655.1680278-6-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- mm/vma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index 7374c04eee9b..53f4d0efce4d 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -427,10 +427,12 @@ void remove_vma(struct vm_area_struct *vma, bool unreachable) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - if (unreachable) + if (unreachable) { + vma_mark_detached(vma); __vm_area_free(vma); - else + } else { vm_area_free(vma); + } } /* From 2c2bd11caba2a45445c1f0c722451fe29e59db79 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:43 -0800 Subject: [PATCH 0925/2157] types: move struct rcuwait into types.h Move rcuwait struct definition into types.h so that rcuwait can be used without including rcuwait.h which includes other headers. Without this change mm_types.h can't use rcuwait due to a the following circular dependency: mm_types.h -> rcuwait.h -> signal.h -> mm_types.h Link: https://lkml.kernel.org/r/20250213224655.1680278-7-surenb@google.com Suggested-by: Matthew Wilcox Signed-off-by: Suren Baghdasaryan Acked-by: Davidlohr Bueso Acked-by: Liam R. Howlett Acked-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/rcuwait.h | 13 +------------ include/linux/types.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 27343424225c..9ad134a04b41 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -4,18 +4,7 @@ #include #include - -/* - * rcuwait provides a way of blocking and waking up a single - * task in an rcu-safe manner. - * - * The only time @task is non-nil is when a user is blocked (or - * checking if it needs to) on a condition, and reset as soon as we - * know that the condition has succeeded and are awoken. - */ -struct rcuwait { - struct task_struct __rcu *task; -}; +#include #define __RCUWAIT_INITIALIZER(name) \ { .task = NULL, } diff --git a/include/linux/types.h b/include/linux/types.h index 1c509ce8f7f6..a3d2182c2686 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -248,5 +248,17 @@ typedef void (*swap_func_t)(void *a, void *b, int size); typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); typedef int (*cmp_func_t)(const void *a, const void *b); +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner. + * + * The only time @task is non-nil is when a user is blocked (or + * checking if it needs to) on a condition, and reset as soon as we + * know that the condition has succeeded and are awoken. + */ +struct rcuwait { + struct task_struct __rcu *task; +}; + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ From 7440adb405dfc4abe21bc95abf3481e6a6649c05 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:44 -0800 Subject: [PATCH 0926/2157] mm: allow vma_start_read_locked/vma_start_read_locked_nested to fail With upcoming replacement of vm_lock with vm_refcnt, we need to handle a possibility of vma_start_read_locked/vma_start_read_locked_nested failing due to refcount overflow. Prepare for such possibility by changing these APIs and adjusting their users. Link: https://lkml.kernel.org/r/20250213224655.1680278-8-surenb@google.com Signed-off-by: Suren Baghdasaryan Cc: Lokesh Gidra Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Vlastimil Babka Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- mm/userfaultfd.c | 30 +++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 003c3e5c0a96..09b48af68699 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,10 +747,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * not be used in such cases because it might fail due to mm_lock_seq overflow. * This functionality is used to obtain vma read lock and drop the mmap read lock. */ -static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) { mmap_assert_locked(vma->vm_mm); down_read_nested(&vma->vm_lock.lock, subclass); + return true; } /* @@ -759,10 +760,11 @@ static inline void vma_start_read_locked_nested(struct vm_area_struct *vma, int * not be used in such cases because it might fail due to mm_lock_seq overflow. * This functionality is used to obtain vma read lock and drop the mmap read lock. */ -static inline void vma_start_read_locked(struct vm_area_struct *vma) +static inline bool vma_start_read_locked(struct vm_area_struct *vma) { mmap_assert_locked(vma->vm_mm); down_read(&vma->vm_lock.lock); + return true; } static inline void vma_end_read(struct vm_area_struct *vma) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 48ac81bbfee6..fbf2cf62ab9f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -85,8 +85,12 @@ static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm, mmap_read_lock(mm); vma = find_vma_and_prepare_anon(mm, address); - if (!IS_ERR(vma)) - vma_start_read_locked(vma); + if (!IS_ERR(vma)) { + bool locked = vma_start_read_locked(vma); + + if (!locked) + vma = ERR_PTR(-EAGAIN); + } mmap_read_unlock(mm); return vma; @@ -1555,12 +1559,24 @@ static int uffd_move_lock(struct mm_struct *mm, mmap_read_lock(mm); err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap); - if (!err) { - vma_start_read_locked(*dst_vmap); - if (*dst_vmap != *src_vmap) - vma_start_read_locked_nested(*src_vmap, - SINGLE_DEPTH_NESTING); + if (err) + goto out; + + if (!vma_start_read_locked(*dst_vmap)) { + err = -EAGAIN; + goto out; } + + /* Nothing further to do if both vmas are locked. */ + if (*dst_vmap == *src_vmap) + goto out; + + if (!vma_start_read_locked_nested(*src_vmap, SINGLE_DEPTH_NESTING)) { + /* Undo dst_vmap locking if src_vmap failed to lock */ + vma_end_read(*dst_vmap); + err = -EAGAIN; + } +out: mmap_read_unlock(mm); return err; } From ce0853966085dd8eab7153ce0b815c4a07d86698 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:45 -0800 Subject: [PATCH 0927/2157] mm: move mmap_init_lock() out of the header file mmap_init_lock() is used only from mm_init() in fork.c, therefore it does not have to reside in the header file. This move lets us avoid including additional headers in mmap_lock.h later, when mmap_init_lock() needs to initialize rcuwait object. Link: https://lkml.kernel.org/r/20250213224655.1680278-9-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 6 ------ kernel/fork.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 45a21faa3ff6..4706c6769902 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -122,12 +122,6 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int #endif /* CONFIG_PER_VMA_LOCK */ -static inline void mmap_init_lock(struct mm_struct *mm) -{ - init_rwsem(&mm->mmap_lock); - mm_lock_seqcount_init(mm); -} - static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); diff --git a/kernel/fork.c b/kernel/fork.c index 5bf3e407c795..f1af413e5aa4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1230,6 +1230,12 @@ static void mm_init_uprobes_state(struct mm_struct *mm) #endif } +static void mmap_init_lock(struct mm_struct *mm) +{ + init_rwsem(&mm->mmap_lock); + mm_lock_seqcount_init(mm); +} + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, struct user_namespace *user_ns) { From 45ad9f5290dc4bb2249e951d4b3756d3ebda2d66 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:46 -0800 Subject: [PATCH 0928/2157] mm: uninline the main body of vma_start_write() vma_start_write() is used in many places and will grow in size very soon. It is not used in performance critical paths and uninlining it should limit the future code size growth. No functional changes. Link: https://lkml.kernel.org/r/20250213224655.1680278-10-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 +++--------- mm/memory.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 09b48af68699..c24c521e38a2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -787,6 +787,8 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_l return (vma->vm_lock_seq == *mm_lock_seq); } +void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); + /* * Begin writing to a VMA. * Exclude concurrent readers under the per-VMA lock until the currently @@ -799,15 +801,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) if (__is_vma_write_locked(vma, &mm_lock_seq)) return; - down_write(&vma->vm_lock.lock); - /* - * We should use WRITE_ONCE() here because we can have concurrent reads - * from the early lockless pessimistic check in vma_start_read(). - * We don't really care about the correctness of that early check, but - * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. - */ - WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock.lock); + __vma_start_write(vma, mm_lock_seq); } static inline void vma_assert_write_locked(struct vm_area_struct *vma) diff --git a/mm/memory.c b/mm/memory.c index 6ef014220e09..f2f7dc215b6b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6353,6 +6353,20 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, #endif #ifdef CONFIG_PER_VMA_LOCK +void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) +{ + down_write(&vma->vm_lock.lock); + /* + * We should use WRITE_ONCE() here because we can have concurrent reads + * from the early lockless pessimistic check in vma_start_read(). + * We don't really care about the correctness of that early check, but + * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. + */ + WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); + up_write(&vma->vm_lock.lock); +} +EXPORT_SYMBOL_GPL(__vma_start_write); + /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the From 7f8ceea0c58039dcea3d31b8d5da58aa5f6e12bf Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:47 -0800 Subject: [PATCH 0929/2157] refcount: provide ops for cases when object's memory can be reused For speculative lookups where a successful inc_not_zero() pins the object, but where we still need to double check if the object acquired is indeed the one we set out to acquire (identity check), needs this validation to happen *after* the increment. Similarly, when a new object is initialized and its memory might have been previously occupied by another object, all stores to initialize the object should happen *before* refcount initialization. Notably SLAB_TYPESAFE_BY_RCU is one such an example when this ordering is required for reference counting. Add refcount_{add|inc}_not_zero_acquire() to guarantee the proper ordering between acquiring a reference count on an object and performing the identity check for that object. Add refcount_set_release() to guarantee proper ordering between stores initializing object attributes and the store initializing the refcount. refcount_set_release() should be done after all other object attributes are initialized. Once refcount_set_release() is called, the object should be considered visible to other tasks even if it was not yet added into an object collection normally used to discover it. This is because other tasks might have discovered the object previously occupying the same memory and after memory reuse they can succeed in taking refcount for the new object and start using it. Object reuse example to consider: consumer: obj = lookup(collection, key); if (!refcount_inc_not_zero_acquire(&obj->ref)) return; if (READ_ONCE(obj->key) != key) { /* identity check */ put_ref(obj); return; } use(obj->value); producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; obj->value = new_value; refcount_set_release(obj->ref, 1); add(collection, new_key, obj); refcount_{add|inc}_not_zero_acquire() is required to prevent the following reordering when refcount_inc_not_zero() is used instead: consumer: obj = lookup(collection, key); if (READ_ONCE(obj->key) != key) { /* reordered identity check */ put_ref(obj); return; } producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; obj->value = new_value; refcount_set_release(obj->ref, 1); add(collection, new_key, obj); if (!refcount_inc_not_zero(&obj->ref)) return; use(obj->value); /* USING WRONG OBJECT */ refcount_set_release() is required to prevent the following reordering when refcount_set() is used instead: consumer: obj = lookup(collection, key); producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; /* new_key == old_key */ refcount_set(obj->ref, 1); if (!refcount_inc_not_zero_acquire(&obj->ref)) return; if (READ_ONCE(obj->key) != key) { /* pass since new_key == old_key */ put_ref(obj); return; } use(obj->value); /* USING STALE obj->value */ obj->value = new_value; /* reordered store */ add(collection, key, obj); [surenb@google.com: fix title underlines in refcount-vs-atomic.rst] Link: https://lkml.kernel.org/r/20250217161645.3137927-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-11-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Vlastimil Babka [slab] Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Peter Zijlstra Cc: Will Deacon Cc: Paul E. McKenney Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: Peter Xu Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- Documentation/RCU/whatisRCU.rst | 10 ++ Documentation/core-api/refcount-vs-atomic.rst | 37 +++++- include/linux/refcount.h | 106 ++++++++++++++++++ include/linux/slab.h | 9 ++ 4 files changed, 156 insertions(+), 6 deletions(-) diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 1ef5784c1b84..53faeed7c190 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -971,6 +971,16 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be initialized after each and every call to kmem_cache_alloc(), which renders reference-free spinlock acquisition completely unsafe. Therefore, when using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter. +If using refcount_t, the specialized refcount_{add|inc}_not_zero_acquire() +and refcount_set_release() APIs should be used to ensure correct operation +ordering when verifying object identity and when initializing newly +allocated objects. Acquire fence in refcount_{add|inc}_not_zero_acquire() +ensures that identity checks happen *after* reference count is taken. +refcount_set_release() should be called after a newly allocated object is +fully initialized and release fence ensures that new values are visible +*before* refcount can be successfully taken by other users. Once +refcount_set_release() is called, the object should be considered visible +by other tasks. (Those willing to initialize their locks in a kmem_cache constructor may also use locking, including cache-friendly sequence locking.) diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst index 79a009ce11df..94e628c1eb49 100644 --- a/Documentation/core-api/refcount-vs-atomic.rst +++ b/Documentation/core-api/refcount-vs-atomic.rst @@ -86,7 +86,19 @@ Memory ordering guarantee changes: * none (both fully unordered) -case 2) - increment-based ops that return no value +case 2) - non-"Read/Modify/Write" (RMW) ops with release ordering +----------------------------------------------------------------- + +Function changes: + + * atomic_set_release() --> refcount_set_release() + +Memory ordering guarantee changes: + + * none (both provide RELEASE ordering) + + +case 3) - increment-based ops that return no value -------------------------------------------------- Function changes: @@ -98,7 +110,7 @@ Memory ordering guarantee changes: * none (both fully unordered) -case 3) - decrement-based RMW ops that return no value +case 4) - decrement-based RMW ops that return no value ------------------------------------------------------ Function changes: @@ -110,7 +122,7 @@ Memory ordering guarantee changes: * fully unordered --> RELEASE ordering -case 4) - increment-based RMW ops that return a value +case 5) - increment-based RMW ops that return a value ----------------------------------------------------- Function changes: @@ -126,7 +138,20 @@ Memory ordering guarantees changes: result of obtaining pointer to the object! -case 5) - generic dec/sub decrement-based RMW ops that return a value +case 6) - increment-based RMW ops with acquire ordering that return a value +--------------------------------------------------------------------------- + +Function changes: + + * atomic_inc_not_zero() --> refcount_inc_not_zero_acquire() + * no atomic counterpart --> refcount_add_not_zero_acquire() + +Memory ordering guarantees changes: + + * fully ordered --> ACQUIRE ordering on success + + +case 7) - generic dec/sub decrement-based RMW ops that return a value --------------------------------------------------------------------- Function changes: @@ -139,7 +164,7 @@ Memory ordering guarantees changes: * fully ordered --> RELEASE ordering + ACQUIRE ordering on success -case 6) other decrement-based RMW ops that return a value +case 8) other decrement-based RMW ops that return a value --------------------------------------------------------- Function changes: @@ -154,7 +179,7 @@ Memory ordering guarantees changes: .. note:: atomic_add_unless() only provides full order on success. -case 7) - lock-based RMW +case 9) - lock-based RMW ------------------------ Function changes: diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 35f039ecb272..4589d2e7bfea 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -87,6 +87,15 @@ * The decrements dec_and_test() and sub_and_test() also provide acquire * ordering on success. * + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide + * acquire and release ordering for cases when the memory occupied by the + * object might be reused to store another object. This is important for the + * cases where secondary validation is required to detect such reuse, e.g. + * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after + * the refcount is taken, hence acquire order is necessary. Similarly, when the + * object is initialized, all stores to its attributes should be visible before + * the refcount is set, otherwise a stale attribute value might be used by + * another task which succeeds in taking a refcount to the new object. */ #ifndef _LINUX_REFCOUNT_H @@ -125,6 +134,31 @@ static inline void refcount_set(refcount_t *r, int n) atomic_set(&r->refs, n); } +/** + * refcount_set_release - set a refcount's value with release ordering + * @r: the refcount + * @n: value to which the refcount will be set + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides release memory ordering which will order previous memory operations + * against this store. This ensures all updates to this object are visible + * once the refcount is set and stale values from the object previously + * occupying this memory are overwritten with new ones. + * + * This function should be called only after new object is fully initialized. + * After this call the object should be considered visible to other tasks even + * if it was not yet added into an object collection normally used to discover + * it. This is because other tasks might have discovered the object previously + * occupying the same memory and after memory reuse they can succeed in taking + * refcount to the new object and start using it. + */ +static inline void refcount_set_release(refcount_t *r, int n) +{ + atomic_set_release(&r->refs, n); +} + /** * refcount_read - get a refcount's value * @r: the refcount @@ -178,6 +212,52 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) +{ + int old = refcount_read(r); + + do { + if (!old) + break; + } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i)); + + if (oldp) + *oldp = old; + + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); + + return old; +} + +/** + * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0 + * + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc_not_zero_acquire() should instead be used to increment a + * reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r) +{ + return __refcount_add_not_zero_acquire(i, r, NULL); +} + static inline __signed_wrap void __refcount_add(int i, refcount_t *r, int *oldp) { @@ -236,6 +316,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) return __refcount_inc_not_zero(r, NULL); } +static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_acquire(1, r, oldp); +} + +/** + * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0 + * @r: the refcount to increment + * + * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on + * success. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ +static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r) +{ + return __refcount_inc_not_zero_acquire(r, NULL); +} + static inline void __refcount_inc(refcount_t *r, int *oldp) { __refcount_add(1, r, oldp); diff --git a/include/linux/slab.h b/include/linux/slab.h index 09eedaecf120..ad902a2d692b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -136,6 +136,15 @@ enum _slab_flag_bits { * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that object identity check has to be done *after* acquiring a + * reference, therefore user has to ensure proper ordering for loads. + * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU, + * the newly allocated object has to be fully initialized *before* its + * refcount gets initialized and proper ordering for stores is required. + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are + * designed with the proper fences required for reference counting objects + * allocated with SLAB_TYPESAFE_BY_RCU. + * * Note that it is not possible to acquire a lock within a structure * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages From 4e0dbe105d5088c77eb09de6f049aaf44711a2ec Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:48 -0800 Subject: [PATCH 0930/2157] refcount: introduce __refcount_{add|inc}_not_zero_limited_acquire Introduce functions to increase refcount but with a top limit above which they will fail to increase (the limit is inclusive). Setting the limit to INT_MAX indicates no limit. Link: https://lkml.kernel.org/r/20250213224655.1680278-12-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/refcount.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4589d2e7bfea..80dc023ac2bf 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -213,13 +213,20 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) } static inline __must_check __signed_wrap -bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) +bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp, + int limit) { int old = refcount_read(r); do { if (!old) break; + + if (i > limit - old) { + if (oldp) + *oldp = old; + return false; + } } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i)); if (oldp) @@ -231,6 +238,18 @@ bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) return old; } +static inline __must_check bool +__refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit) +{ + return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit); +} + +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX); +} + /** * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0 * From f35ab95ca0af7a27feab57b9d7e906405bddb093 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:49 -0800 Subject: [PATCH 0931/2157] mm: replace vm_lock and detached flag with a reference count rw_semaphore is a sizable structure of 40 bytes and consumes considerable space for each vm_area_struct. However vma_lock has two important specifics which can be used to replace rw_semaphore with a simpler structure: 1. Readers never wait. They try to take the vma_lock and fall back to mmap_lock if that fails. 2. Only one writer at a time will ever try to write-lock a vma_lock because writers first take mmap_lock in write mode. Because of these requirements, full rw_semaphore functionality is not needed and we can replace rw_semaphore and the vma->detached flag with a refcount (vm_refcnt). When vma is in detached state, vm_refcnt is 0 and only a call to vma_mark_attached() can take it out of this state. Note that unlike before, now we enforce both vma_mark_attached() and vma_mark_detached() to be done only after vma has been write-locked. vma_mark_attached() changes vm_refcnt to 1 to indicate that it has been attached to the vma tree. When a reader takes read lock, it increments vm_refcnt, unless the top usable bit of vm_refcnt (0x40000000) is set, indicating presence of a writer. When writer takes write lock, it sets the top usable bit to indicate its presence. If there are readers, writer will wait using newly introduced mm->vma_writer_wait. Since all writers take mmap_lock in write mode first, there can be only one writer at a time. The last reader to release the lock will signal the writer to wake up. refcount might overflow if there are many competing readers, in which case read-locking will fail. Readers are expected to handle such failures. In summary: 1. all readers increment the vm_refcnt; 2. writer sets top usable (writer) bit of vm_refcnt; 3. readers cannot increment the vm_refcnt if the writer bit is set; 4. in the presence of readers, writer must wait for the vm_refcnt to drop to 1 (plus the VMA_LOCK_OFFSET writer bit), indicating an attached vma with no readers; 5. vm_refcnt overflow is handled by the readers. While this vm_lock replacement does not yet result in a smaller vm_area_struct (it stays at 256 bytes due to cacheline alignment), it allows for further size optimization by structure member regrouping to bring the size of vm_area_struct below 192 bytes. [surenb@google.com: fix a crash due to vma_end_read() that should have been removed] Link: https://lkml.kernel.org/r/20250220200208.323769-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-13-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Peter Zijlstra Suggested-by: Matthew Wilcox Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Vlastimil Babka Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 130 ++++++++++++++++++++----------- include/linux/mm_types.h | 22 +++--- kernel/fork.c | 13 ++-- mm/init-mm.c | 1 + mm/memory.c | 90 ++++++++++++++++++--- tools/testing/vma/linux/atomic.h | 5 ++ tools/testing/vma/vma_internal.h | 63 +++++++-------- 7 files changed, 218 insertions(+), 106 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index c24c521e38a2..06f179c844c3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -697,19 +698,54 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {} #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_PER_VMA_LOCK -static inline void vma_lock_init(struct vm_area_struct *vma) +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) { - init_rwsem(&vma->vm_lock.lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static struct lock_class_key lockdep_key; + + lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); +#endif + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); vma->vm_lock_seq = UINT_MAX; } +static inline bool is_vma_writer_only(int refcnt) +{ + /* + * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma + * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on + * a detached vma happens only in vma_mark_detached() and is a rare + * case, therefore most of the time there will be no unnecessary wakeup. + */ + return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; +} + +static inline void vma_refcount_put(struct vm_area_struct *vma) +{ + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *mm = vma->vm_mm; + int oldcnt; + + rwsem_release(&vma->vmlock_dep_map, _RET_IP_); + if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { + + if (is_vma_writer_only(oldcnt - 1)) + rcuwait_wake_up(&mm->vma_writer_wait); + } +} + /* * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to * using mmap_lock. The function should never yield false unlocked result. + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got + * detached. */ -static inline bool vma_start_read(struct vm_area_struct *vma) +static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) { + int oldcnt; + /* * Check before locking. A race might cause false locked result. * We can use READ_ONCE() for the mm_lock_seq here, and don't need @@ -718,15 +754,25 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * need ordering is below. */ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) - return false; - - if (unlikely(down_read_trylock(&vma->vm_lock.lock) == 0)) - return false; + return NULL; /* - * Overflow might produce false locked result. + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. + * Acquire fence is required here to avoid reordering against later + * vm_lock_seq check and checks inside lock_vma_under_rcu(). + */ + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) { + /* return EAGAIN if vma got detached from under us */ + return oldcnt ? NULL : ERR_PTR(-EAGAIN); + } + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + /* + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq * modification invalidates all existing locks. * * We must use ACQUIRE semantics for the mm_lock_seq so that if we are @@ -735,10 +781,11 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * This pairs with RELEASE semantics in vma_end_write_all(). */ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { - up_read(&vma->vm_lock.lock); - return false; + vma_refcount_put(vma); + return NULL; } - return true; + + return vma; } /* @@ -749,8 +796,14 @@ static inline bool vma_start_read(struct vm_area_struct *vma) */ static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) { + int oldcnt; + mmap_assert_locked(vma->vm_mm); - down_read_nested(&vma->vm_lock.lock, subclass); + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) + return false; + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); return true; } @@ -762,16 +815,12 @@ static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int */ static inline bool vma_start_read_locked(struct vm_area_struct *vma) { - mmap_assert_locked(vma->vm_mm); - down_read(&vma->vm_lock.lock); - return true; + return vma_start_read_locked_nested(vma, 0); } static inline void vma_end_read(struct vm_area_struct *vma) { - rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->vm_lock.lock); - rcu_read_unlock(); + vma_refcount_put(vma); } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ @@ -813,38 +862,35 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma) static inline void vma_assert_locked(struct vm_area_struct *vma) { - if (!rwsem_is_locked(&vma->vm_lock.lock)) - vma_assert_write_locked(vma); + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && + !__is_vma_write_locked(vma, &mm_lock_seq), vma); } +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ static inline void vma_assert_attached(struct vm_area_struct *vma) { - WARN_ON_ONCE(vma->detached); + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); } static inline void vma_assert_detached(struct vm_area_struct *vma) { - WARN_ON_ONCE(!vma->detached); + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); } static inline void vma_mark_attached(struct vm_area_struct *vma) { - vma_assert_detached(vma); - vma->detached = false; -} - -static inline void vma_mark_detached(struct vm_area_struct *vma) -{ - /* When detaching vma should be write-locked */ vma_assert_write_locked(vma); - vma_assert_attached(vma); - vma->detached = true; + vma_assert_detached(vma); + refcount_set(&vma->vm_refcnt, 1); } -static inline bool is_vma_detached(struct vm_area_struct *vma) -{ - return vma->detached; -} +void vma_mark_detached(struct vm_area_struct *vma); static inline void release_fault_lock(struct vm_fault *vmf) { @@ -867,9 +913,9 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, #else /* CONFIG_PER_VMA_LOCK */ -static inline void vma_lock_init(struct vm_area_struct *vma) {} -static inline bool vma_start_read(struct vm_area_struct *vma) - { return false; } +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} +static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) + { return NULL; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -910,12 +956,8 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); -#ifdef CONFIG_PER_VMA_LOCK - /* vma is not locked, can't use vma_mark_detached() */ - vma->detached = true; -#endif vma_numab_state_init(vma); - vma_lock_init(vma); + vma_lock_init(vma, false); } /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36dea20cd101..9de0a6cb3c2d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -629,9 +630,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) } #endif -struct vma_lock { - struct rw_semaphore lock; -}; +#define VMA_LOCK_OFFSET 0x40000000 +#define VMA_REF_LIMIT (VMA_LOCK_OFFSET - 1) struct vma_numab_state { /* @@ -709,19 +709,13 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* - * Flag to indicate areas detached from the mm->mm_mt tree. - * Unstable RCU readers are allowed to read this. - */ - bool detached; - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -784,7 +778,10 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; #ifdef CONFIG_PER_VMA_LOCK /* Unstable RCU readers are allowed to read this. */ - struct vma_lock vm_lock ____cacheline_aligned_in_smp; + refcount_t vm_refcnt ____cacheline_aligned_in_smp; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map vmlock_dep_map; +#endif #endif } __randomize_layout; @@ -920,6 +917,7 @@ struct mm_struct { * by mmlist_lock */ #ifdef CONFIG_PER_VMA_LOCK + struct rcuwait vma_writer_wait; /* * This field has lock-like semantics, meaning it is sometimes * accessed with ACQUIRE/RELEASE semantics. diff --git a/kernel/fork.c b/kernel/fork.c index f1af413e5aa4..48a0038f606f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -463,12 +463,8 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) * will be reinitialized. */ data_race(memcpy(new, orig, sizeof(*new))); - vma_lock_init(new); + vma_lock_init(new, true); INIT_LIST_HEAD(&new->anon_vma_chain); -#ifdef CONFIG_PER_VMA_LOCK - /* vma is not locked, can't use vma_mark_detached() */ - new->detached = true; -#endif vma_numab_state_init(new); dup_anon_vma_name(orig, new); @@ -477,6 +473,8 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) void __vm_area_free(struct vm_area_struct *vma) { + /* The vma should be detached while being destroyed. */ + vma_assert_detached(vma); vma_numab_state_free(vma); free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); @@ -488,8 +486,6 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) struct vm_area_struct *vma = container_of(head, struct vm_area_struct, vm_rcu); - /* The vma should not be locked while being destroyed. */ - VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock.lock), vma); __vm_area_free(vma); } #endif @@ -1234,6 +1230,9 @@ static void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); mm_lock_seqcount_init(mm); +#ifdef CONFIG_PER_VMA_LOCK + rcuwait_init(&mm->vma_writer_wait); +#endif } static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, diff --git a/mm/init-mm.c b/mm/init-mm.c index 6af3ad675930..4600e7605cab 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -40,6 +40,7 @@ struct mm_struct init_mm = { .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), #ifdef CONFIG_PER_VMA_LOCK + .vma_writer_wait = __RCUWAIT_INITIALIZER(init_mm.vma_writer_wait), .mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq), #endif .user_ns = &init_user_ns, diff --git a/mm/memory.c b/mm/memory.c index f2f7dc215b6b..51f233404b02 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6353,9 +6353,47 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, #endif #ifdef CONFIG_PER_VMA_LOCK +static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching) +{ + unsigned int tgt_refcnt = VMA_LOCK_OFFSET; + + /* Additional refcnt if the vma is attached. */ + if (!detaching) + tgt_refcnt++; + + /* + * If vma is detached then only vma_mark_attached() can raise the + * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached(). + */ + if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt)) + return false; + + rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_); + rcuwait_wait_event(&vma->vm_mm->vma_writer_wait, + refcount_read(&vma->vm_refcnt) == tgt_refcnt, + TASK_UNINTERRUPTIBLE); + lock_acquired(&vma->vmlock_dep_map, _RET_IP_); + + return true; +} + +static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached) +{ + *detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt); + rwsem_release(&vma->vmlock_dep_map, _RET_IP_); +} + void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) { - down_write(&vma->vm_lock.lock); + bool locked; + + /* + * __vma_enter_locked() returns false immediately if the vma is not + * attached, otherwise it waits until refcnt is indicating that vma + * is attached with no readers. + */ + locked = __vma_enter_locked(vma, false); + /* * We should use WRITE_ONCE() here because we can have concurrent reads * from the early lockless pessimistic check in vma_start_read(). @@ -6363,10 +6401,40 @@ void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. */ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock.lock); + + if (locked) { + bool detached; + + __vma_exit_locked(vma, &detached); + WARN_ON_ONCE(detached); /* vma should remain attached */ + } } EXPORT_SYMBOL_GPL(__vma_start_write); +void vma_mark_detached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_attached(vma); + + /* + * We are the only writer, so no need to use vma_refcount_put(). + * The condition below is unlikely because the vma has been already + * write-locked and readers can increment vm_refcnt only temporarily + * before they check vm_lock_seq, realize the vma is locked and drop + * back the vm_refcnt. That is a narrow window for observing a raised + * vm_refcnt. + */ + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { + /* Wait until vma is detached with no readers. */ + if (__vma_enter_locked(vma, true)) { + bool detached; + + __vma_exit_locked(vma, &detached); + WARN_ON_ONCE(!detached); + } + } +} + /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the @@ -6384,15 +6452,17 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, if (!vma) goto inval; - if (!vma_start_read(vma)) - goto inval; + vma = vma_start_read(vma); + if (IS_ERR_OR_NULL(vma)) { + /* Check if the VMA got isolated after we found it */ + if (PTR_ERR(vma) == -EAGAIN) { + count_vm_vma_lock_event(VMA_LOCK_MISS); + /* The area was replaced with another one */ + goto retry; + } - /* Check if the VMA got isolated after we found it */ - if (is_vma_detached(vma)) { - vma_end_read(vma); - count_vm_vma_lock_event(VMA_LOCK_MISS); - /* The area was replaced with another one */ - goto retry; + /* Failed to lock the VMA */ + goto inval; } /* * At this point, we have a stable reference to a VMA: The VMA is diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index 3e1b6adc027b..788c597c4fde 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -9,4 +9,9 @@ #define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_relaxed uatomic_cmpxchg +#define atomic_cmpxchg_release uatomic_cmpxchg +#endif /* atomic_cmpxchg_relaxed */ + #endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 34277842156c..ba838097d3f6 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -135,10 +135,6 @@ typedef __bitwise unsigned int vm_fault_t; */ #define pr_warn_once pr_err -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; - struct kref { refcount_t refcount; }; @@ -233,15 +229,12 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ }; -struct vma_lock { - struct rw_semaphore lock; -}; - - struct file { struct address_space *f_mapping; }; +#define VMA_LOCK_OFFSET 0x40000000 + struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -269,16 +262,13 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* Flag to indicate areas detached from the mm->mm_mt tree */ - bool detached; - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock.lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock.lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -287,7 +277,6 @@ struct vm_area_struct { * slowpath. */ unsigned int vm_lock_seq; - struct vma_lock vm_lock; #endif /* @@ -340,6 +329,10 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt; +#endif } __randomize_layout; struct vm_fault {}; @@ -464,33 +457,40 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) return mas_find(&vmi->mas, ULONG_MAX); } -static inline void vma_lock_init(struct vm_area_struct *vma) -{ - init_rwsem(&vma->vm_lock.lock); - vma->vm_lock_seq = UINT_MAX; -} - +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ static inline void vma_assert_attached(struct vm_area_struct *vma) { - WARN_ON_ONCE(vma->detached); + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); } static inline void vma_assert_detached(struct vm_area_struct *vma) { - WARN_ON_ONCE(!vma->detached); + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); } static inline void vma_assert_write_locked(struct vm_area_struct *); static inline void vma_mark_attached(struct vm_area_struct *vma) { - vma->detached = false; + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set(&vma->vm_refcnt, 1); } static inline void vma_mark_detached(struct vm_area_struct *vma) { - /* When detaching vma should be write-locked */ vma_assert_write_locked(vma); - vma->detached = true; + vma_assert_attached(vma); + /* We are the only writer, so no need to use vma_refcount_put(). */ + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { + /* + * Reader must have temporarily raised vm_refcnt but it will + * drop it without using the vma since vma is write-locked. + */ + } } extern const struct vm_operations_struct vma_dummy_vm_ops; @@ -503,9 +503,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - /* vma is not locked, can't use vma_mark_detached() */ - vma->detached = true; - vma_lock_init(vma); + vma->vm_lock_seq = UINT_MAX; } static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) @@ -528,10 +526,9 @@ static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return NULL; memcpy(new, orig, sizeof(*new)); - vma_lock_init(new); + refcount_set(&new->vm_refcnt, 0); + new->vm_lock_seq = UINT_MAX; INIT_LIST_HEAD(&new->anon_vma_chain); - /* vma is not locked, can't use vma_mark_detached() */ - new->detached = true; return new; } From 6bef4c2f97221f3b595d08c8656eb5845ef80fe9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:50 -0800 Subject: [PATCH 0932/2157] mm: move lesser used vma_area_struct members into the last cacheline Move several vma_area_struct members which are rarely or never used during page fault handling into the last cacheline to better pack vm_area_struct. As a result vm_area_struct will fit into 3 as opposed to 4 cachelines. New typical vm_area_struct layout: struct vm_area_struct { union { struct { long unsigned int vm_start; /* 0 8 */ long unsigned int vm_end; /* 8 8 */ }; /* 0 16 */ freeptr_t vm_freeptr; /* 0 8 */ }; /* 0 16 */ struct mm_struct * vm_mm; /* 16 8 */ pgprot_t vm_page_prot; /* 24 8 */ union { const vm_flags_t vm_flags; /* 32 8 */ vm_flags_t __vm_flags; /* 32 8 */ }; /* 32 8 */ unsigned int vm_lock_seq; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head anon_vma_chain; /* 48 16 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct anon_vma * anon_vma; /* 64 8 */ const struct vm_operations_struct * vm_ops; /* 72 8 */ long unsigned int vm_pgoff; /* 80 8 */ struct file * vm_file; /* 88 8 */ void * vm_private_data; /* 96 8 */ atomic_long_t swap_readahead_info; /* 104 8 */ struct mempolicy * vm_policy; /* 112 8 */ struct vma_numab_state * numab_state; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ refcount_t vm_refcnt (__aligned__(64)); /* 128 4 */ /* XXX 4 bytes hole, try to pack */ struct { struct rb_node rb (__aligned__(8)); /* 136 24 */ long unsigned int rb_subtree_last; /* 160 8 */ } __attribute__((__aligned__(8))) shared; /* 136 32 */ struct anon_vma_name * anon_name; /* 168 8 */ struct vm_userfaultfd_ctx vm_userfaultfd_ctx; /* 176 8 */ /* size: 192, cachelines: 3, members: 18 */ /* sum members: 176, holes: 2, sum holes: 8 */ /* padding: 8 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 4 */ } __attribute__((__aligned__(64))); Memory consumption per 1000 VMAs becomes 48 pages: slabinfo after vm_area_struct changes: ... : ... vm_area_struct ... 192 42 2 : ... Link: https://lkml.kernel.org/r/20250213224655.1680278-14-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Vlastimil Babka Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 38 +++++++++++++++----------------- tools/testing/vma/vma_internal.h | 37 +++++++++++++++---------------- 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9de0a6cb3c2d..c3aa0e20be41 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -725,17 +725,6 @@ struct vm_area_struct { */ unsigned int vm_lock_seq; #endif - - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma @@ -755,14 +744,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -775,7 +756,6 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif - struct vm_userfaultfd_ctx vm_userfaultfd_ctx; #ifdef CONFIG_PER_VMA_LOCK /* Unstable RCU readers are allowed to read this. */ refcount_t vm_refcnt ____cacheline_aligned_in_smp; @@ -783,6 +763,24 @@ struct vm_area_struct { struct lockdep_map vmlock_dep_map; #endif #endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; #ifdef CONFIG_NUMA diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index ba838097d3f6..b385170fbb8f 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -279,16 +279,6 @@ struct vm_area_struct { unsigned int vm_lock_seq; #endif - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma @@ -308,14 +298,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -328,11 +310,28 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif - struct vm_userfaultfd_ctx vm_userfaultfd_ctx; #ifdef CONFIG_PER_VMA_LOCK /* Unstable RCU readers are allowed to read this. */ refcount_t vm_refcnt; #endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; struct vm_fault {}; From 3dd98c5c442358c5aefa13e2c91ee9dee32d776e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:51 -0800 Subject: [PATCH 0933/2157] mm/debug: print vm_refcnt state when dumping the vma vm_refcnt encodes a number of useful states: - whether vma is attached or detached - the number of current vma readers - presence of a vma writer Let's include it in the vma dump. Link: https://lkml.kernel.org/r/20250213224655.1680278-15-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Vlastimil Babka Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- mm/debug.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/debug.c b/mm/debug.c index e1282b85a877..2d1bd67d957b 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -181,11 +181,17 @@ void dump_vma(const struct vm_area_struct *vma) pr_emerg("vma %px start %px end %px mm %px\n" "prot %lx anon_vma %px vm_ops %px\n" "pgoff %lx file %px private_data %px\n" +#ifdef CONFIG_PER_VMA_LOCK + "refcnt %x\n" +#endif "flags: %#lx(%pGv)\n", vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_mm, (unsigned long)pgprot_val(vma->vm_page_prot), vma->anon_vma, vma->vm_ops, vma->vm_pgoff, vma->vm_file, vma->vm_private_data, +#ifdef CONFIG_PER_VMA_LOCK + refcount_read(&vma->vm_refcnt), +#endif vma->vm_flags, &vma->vm_flags); } EXPORT_SYMBOL(dump_vma); From e218d9fedd056a0b17468d6e19fddaf2c3550f97 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:52 -0800 Subject: [PATCH 0934/2157] mm: remove extra vma_numab_state_init() call vma_init() already memset's the whole vm_area_struct to 0, so there is no need to an additional vma_numab_state_init(). Link: https://lkml.kernel.org/r/20250213224655.1680278-16-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 06f179c844c3..aad932c4bcf0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -956,7 +956,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_numab_state_init(vma); vma_lock_init(vma, false); } From e49510bf00de4f832eaaebcfc113795f127ad519 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:53 -0800 Subject: [PATCH 0935/2157] mm: prepare lock_vma_under_rcu() for vma reuse possibility Once we make vma cache SLAB_TYPESAFE_BY_RCU, it will be possible for a vma to be reused and attached to another mm after lock_vma_under_rcu() locks the vma. lock_vma_under_rcu() should ensure that vma_start_read() is using the original mm and after locking the vma it should ensure that vma->vm_mm has not changed from under us. Link: https://lkml.kernel.org/r/20250213224655.1680278-17-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 ++++++++---- mm/memory.c | 7 ++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index aad932c4bcf0..e3f962de1677 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -739,10 +739,13 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to * using mmap_lock. The function should never yield false unlocked result. + * False locked result is possible if mm_lock_seq overflows or if vma gets + * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. */ -static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) { int oldcnt; @@ -753,7 +756,7 @@ static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) return NULL; /* @@ -780,7 +783,7 @@ static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { vma_refcount_put(vma); return NULL; } @@ -914,7 +917,8 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, #else /* CONFIG_PER_VMA_LOCK */ static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} -static inline struct vm_area_struct *vma_start_read(struct vm_area_struct *vma) +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) { return NULL; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} diff --git a/mm/memory.c b/mm/memory.c index 51f233404b02..39bceed7448f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6452,7 +6452,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, if (!vma) goto inval; - vma = vma_start_read(vma); + vma = vma_start_read(mm, vma); if (IS_ERR_OR_NULL(vma)) { /* Check if the VMA got isolated after we found it */ if (PTR_ERR(vma) == -EAGAIN) { @@ -6471,8 +6471,9 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, * fields are accessible for RCU readers. */ - /* Check since vm_start/vm_end might change before we lock the VMA */ - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) + /* Check if the vma we locked is the right one. */ + if (unlikely(vma->vm_mm != mm || + address < vma->vm_start || address >= vma->vm_end)) goto inval_end_read; rcu_read_unlock(); From 3104138517fc66aad21f4a2487bb572e9fc2e3ec Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:54 -0800 Subject: [PATCH 0936/2157] mm: make vma cache SLAB_TYPESAFE_BY_RCU To enable SLAB_TYPESAFE_BY_RCU for vma cache we need to ensure that object reuse before RCU grace period is over will be detected by lock_vma_under_rcu(). Current checks are sufficient as long as vma is detached before it is freed. The only place this is not currently happening is in exit_mmap(). Add the missing vma_mark_detached() in exit_mmap(). Another issue which might trick lock_vma_under_rcu() during vma reuse is vm_area_dup(), which copies the entire content of the vma into a new one, overriding new vma's vm_refcnt and temporarily making it appear as attached. This might trick a racing lock_vma_under_rcu() to operate on a reused vma if it found the vma before it got reused. To prevent this situation, we should ensure that vm_refcnt stays at detached state (0) when it is copied and advances to attached state only after it is added into the vma tree. Introduce vm_area_init_from() which preserves new vma's vm_refcnt and use it in vm_area_dup(). Since all vmas are in detached state with no current readers when they are freed, lock_vma_under_rcu() will not be able to take vm_refcnt after vma got detached even if vma is reused. vma_mark_attached() in modified to include a release fence to ensure all stores to the vma happen before vm_refcnt gets initialized. Finally, make vm_area_cachep SLAB_TYPESAFE_BY_RCU. This will facilitate vm_area_struct reuse and will minimize the number of call_rcu() calls. [surenb@google.com: remove atomic_set_release() usage in tools/] Link: https://lkml.kernel.org/r/20250217054351.2973666-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-18-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Liam R. Howlett Cc: Lokesh Gidra Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 +- include/linux/mm_types.h | 13 ++++-- include/linux/slab.h | 6 --- kernel/fork.c | 73 ++++++++++++++++++++------------ mm/mmap.c | 3 +- mm/vma.c | 11 ++--- mm/vma.h | 2 +- tools/include/linux/refcount.h | 5 +++ tools/testing/vma/vma_internal.h | 9 +--- 9 files changed, 70 insertions(+), 56 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e3f962de1677..14115c9949d8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -258,8 +258,6 @@ void setup_initial_init_mm(void *start_code, void *end_code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); -/* Use only if VMA has no other users */ -void __vm_area_free(struct vm_area_struct *vma); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; @@ -890,7 +888,7 @@ static inline void vma_mark_attached(struct vm_area_struct *vma) { vma_assert_write_locked(vma); vma_assert_detached(vma); - refcount_set(&vma->vm_refcnt, 1); + refcount_set_release(&vma->vm_refcnt, 1); } void vma_mark_detached(struct vm_area_struct *vma); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c3aa0e20be41..6a93abb4452b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -574,6 +574,12 @@ static inline void *folio_get_private(struct folio *folio) typedef unsigned long vm_flags_t; +/* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that @@ -677,6 +683,9 @@ struct vma_numab_state { * * Only explicitly marked struct members may be accessed by RCU readers before * getting a stable reference. + * + * WARNING: when adding new members, please update vm_area_init_from() to copy + * them during vm_area_struct content duplication. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -687,9 +696,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; /* diff --git a/include/linux/slab.h b/include/linux/slab.h index ad902a2d692b..f8924fd6ea26 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -243,12 +243,6 @@ enum _slab_flag_bits { #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif -/* - * freeptr_t represents a SLUB freelist pointer, which might be encoded - * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. - */ -typedef struct { unsigned long v; } freeptr_t; - /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * diff --git a/kernel/fork.c b/kernel/fork.c index 48a0038f606f..364b2d4fd3ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -449,6 +449,42 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) return vma; } +static void vm_area_init_from(const struct vm_area_struct *src, + struct vm_area_struct *dest) +{ + dest->vm_mm = src->vm_mm; + dest->vm_ops = src->vm_ops; + dest->vm_start = src->vm_start; + dest->vm_end = src->vm_end; + dest->anon_vma = src->anon_vma; + dest->vm_pgoff = src->vm_pgoff; + dest->vm_file = src->vm_file; + dest->vm_private_data = src->vm_private_data; + vm_flags_init(dest, src->vm_flags); + memcpy(&dest->vm_page_prot, &src->vm_page_prot, + sizeof(dest->vm_page_prot)); + /* + * src->shared.rb may be modified concurrently when called from + * dup_mmap(), but the clone will reinitialize it. + */ + data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared))); + memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx, + sizeof(dest->vm_userfaultfd_ctx)); +#ifdef CONFIG_ANON_VMA_NAME + dest->anon_name = src->anon_name; +#endif +#ifdef CONFIG_SWAP + memcpy(&dest->swap_readahead_info, &src->swap_readahead_info, + sizeof(dest->swap_readahead_info)); +#endif +#ifndef CONFIG_MMU + dest->vm_region = src->vm_region; +#endif +#ifdef CONFIG_NUMA + dest->vm_policy = src->vm_policy; +#endif +} + struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) { struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); @@ -458,11 +494,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); ASSERT_EXCLUSIVE_WRITER(orig->vm_file); - /* - * orig->shared.rb may be modified concurrently, but the clone - * will be reinitialized. - */ - data_race(memcpy(new, orig, sizeof(*new))); + vm_area_init_from(orig, new); vma_lock_init(new, true); INIT_LIST_HEAD(&new->anon_vma_chain); vma_numab_state_init(new); @@ -471,7 +503,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return new; } -void __vm_area_free(struct vm_area_struct *vma) +void vm_area_free(struct vm_area_struct *vma) { /* The vma should be detached while being destroyed. */ vma_assert_detached(vma); @@ -480,25 +512,6 @@ void __vm_area_free(struct vm_area_struct *vma) kmem_cache_free(vm_area_cachep, vma); } -#ifdef CONFIG_PER_VMA_LOCK -static void vm_area_free_rcu_cb(struct rcu_head *head) -{ - struct vm_area_struct *vma = container_of(head, struct vm_area_struct, - vm_rcu); - - __vm_area_free(vma); -} -#endif - -void vm_area_free(struct vm_area_struct *vma) -{ -#ifdef CONFIG_PER_VMA_LOCK - call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); -#else - __vm_area_free(vma); -#endif -} - static void account_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { @@ -3156,6 +3169,11 @@ void __init mm_cache_init(void) void __init proc_caches_init(void) { + struct kmem_cache_args args = { + .use_freeptr_offset = true, + .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr), + }; + sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -3172,8 +3190,9 @@ void __init proc_caches_init(void) sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - vm_area_cachep = KMEM_CACHE(vm_area_struct, - SLAB_HWCACHE_ALIGN|SLAB_NO_MERGE|SLAB_PANIC| + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), &args, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); diff --git a/mm/mmap.c b/mm/mmap.c index 6401a1d73f4a..15d6cd7cc845 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1305,7 +1305,8 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); - remove_vma(vma, /* unreachable = */ true); + vma_mark_detached(vma); + remove_vma(vma); count++; cond_resched(); vma = vma_next(&vmi); diff --git a/mm/vma.c b/mm/vma.c index 53f4d0efce4d..5cdc5612bfc1 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -420,19 +420,14 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg, /* * Close a vm structure and free it. */ -void remove_vma(struct vm_area_struct *vma, bool unreachable) +void remove_vma(struct vm_area_struct *vma) { might_sleep(); vma_close(vma); if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - if (unreachable) { - vma_mark_detached(vma); - __vm_area_free(vma); - } else { - vm_area_free(vma); - } + vm_area_free(vma); } /* @@ -1218,7 +1213,7 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, /* Remove and clean up vmas */ mas_set(mas_detach, 0); mas_for_each(mas_detach, vma, ULONG_MAX) - remove_vma(vma, /* unreachable = */ false); + remove_vma(vma); vm_unacct_memory(vms->nr_accounted); validate_mm(mm); diff --git a/mm/vma.h b/mm/vma.h index 55be77ff042f..7356ca5a22d3 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -218,7 +218,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); -void remove_vma(struct vm_area_struct *vma, bool unreachable); +void remove_vma(struct vm_area_struct *vma); void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *next); diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h index 36cb29bc57c2..1f30956e070d 100644 --- a/tools/include/linux/refcount.h +++ b/tools/include/linux/refcount.h @@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n) atomic_set(&r->refs, n); } +static inline void refcount_set_release(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index b385170fbb8f..572ab2cea763 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -476,7 +476,7 @@ static inline void vma_mark_attached(struct vm_area_struct *vma) { vma_assert_write_locked(vma); vma_assert_detached(vma); - refcount_set(&vma->vm_refcnt, 1); + refcount_set_release(&vma->vm_refcnt, 1); } static inline void vma_mark_detached(struct vm_area_struct *vma) @@ -696,14 +696,9 @@ static inline void mpol_put(struct mempolicy *) { } -static inline void __vm_area_free(struct vm_area_struct *vma) -{ - free(vma); -} - static inline void vm_area_free(struct vm_area_struct *vma) { - __vm_area_free(vma); + free(vma); } static inline void lru_add_drain(void) From 795f29616e85aff32248e695c9cc1fbc8b4c9632 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 13 Feb 2025 14:46:55 -0800 Subject: [PATCH 0937/2157] docs/mm: document latest changes to vm_lock Change the documentation to reflect that vm_lock is integrated into vma and replaced with vm_refcnt. Document newly introduced vma_start_read_locked{_nested} functions. Link: https://lkml.kernel.org/r/20250213224655.1680278-19-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Tested-by: Shivank Garg Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Reviewed-by: Vlastimil Babka Cc: Christian Brauner Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Hugh Dickins Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Klara Modin Cc: Lokesh Gidra Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pasha Tatashin Cc: "Paul E . McKenney" Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Shakeel Butt Cc: Sourav Panda Cc: Suren Baghdasaryan Cc: Wei Yang Cc: Will Deacon Cc: Heiko Carstens Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- Documentation/mm/process_addrs.rst | 44 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/Documentation/mm/process_addrs.rst b/Documentation/mm/process_addrs.rst index 81417fa2ed20..e6756e78b476 100644 --- a/Documentation/mm/process_addrs.rst +++ b/Documentation/mm/process_addrs.rst @@ -716,9 +716,14 @@ calls :c:func:`!rcu_read_lock` to ensure that the VMA is looked up in an RCU critical section, then attempts to VMA lock it via :c:func:`!vma_start_read`, before releasing the RCU lock via :c:func:`!rcu_read_unlock`. -VMA read locks hold the read lock on the :c:member:`!vma->vm_lock` semaphore for -their duration and the caller of :c:func:`!lock_vma_under_rcu` must release it -via :c:func:`!vma_end_read`. +In cases when the user already holds mmap read lock, :c:func:`!vma_start_read_locked` +and :c:func:`!vma_start_read_locked_nested` can be used. These functions do not +fail due to lock contention but the caller should still check their return values +in case they fail for other reasons. + +VMA read locks increment :c:member:`!vma.vm_refcnt` reference counter for their +duration and the caller of :c:func:`!lock_vma_under_rcu` must drop it via +:c:func:`!vma_end_read`. VMA **write** locks are acquired via :c:func:`!vma_start_write` in instances where a VMA is about to be modified, unlike :c:func:`!vma_start_read` the lock is always @@ -726,9 +731,9 @@ acquired. An mmap write lock **must** be held for the duration of the VMA write lock, releasing or downgrading the mmap write lock also releases the VMA write lock so there is no :c:func:`!vma_end_write` function. -Note that a semaphore write lock is not held across a VMA lock. Rather, a -sequence number is used for serialisation, and the write semaphore is only -acquired at the point of write lock to update this. +Note that when write-locking a VMA lock, the :c:member:`!vma.vm_refcnt` is temporarily +modified so that readers can detect the presense of a writer. The reference counter is +restored once the vma sequence number used for serialisation is updated. This ensures the semantics we require - VMA write locks provide exclusive write access to the VMA. @@ -738,7 +743,7 @@ Implementation details The VMA lock mechanism is designed to be a lightweight means of avoiding the use of the heavily contended mmap lock. It is implemented using a combination of a -read/write semaphore and sequence numbers belonging to the containing +reference counter and sequence numbers belonging to the containing :c:struct:`!struct mm_struct` and the VMA. Read locks are acquired via :c:func:`!vma_start_read`, which is an optimistic @@ -779,28 +784,31 @@ release of any VMA locks on its release makes sense, as you would never want to keep VMAs locked across entirely separate write operations. It also maintains correct lock ordering. -Each time a VMA read lock is acquired, we acquire a read lock on the -:c:member:`!vma->vm_lock` read/write semaphore and hold it, while checking that -the sequence count of the VMA does not match that of the mm. +Each time a VMA read lock is acquired, we increment :c:member:`!vma.vm_refcnt` +reference counter and check that the sequence count of the VMA does not match +that of the mm. -If it does, the read lock fails. If it does not, we hold the lock, excluding -writers, but permitting other readers, who will also obtain this lock under RCU. +If it does, the read lock fails and :c:member:`!vma.vm_refcnt` is dropped. +If it does not, we keep the reference counter raised, excluding writers, but +permitting other readers, who can also obtain this lock under RCU. Importantly, maple tree operations performed in :c:func:`!lock_vma_under_rcu` are also RCU safe, so the whole read lock operation is guaranteed to function correctly. -On the write side, we acquire a write lock on the :c:member:`!vma->vm_lock` -read/write semaphore, before setting the VMA's sequence number under this lock, -also simultaneously holding the mmap write lock. +On the write side, we set a bit in :c:member:`!vma.vm_refcnt` which can't be +modified by readers and wait for all readers to drop their reference count. +Once there are no readers, the VMA's sequence number is set to match that of +the mm. During this entire operation mmap write lock is held. This way, if any read locks are in effect, :c:func:`!vma_start_write` will sleep until these are finished and mutual exclusion is achieved. -After setting the VMA's sequence number, the lock is released, avoiding -complexity with a long-term held write lock. +After setting the VMA's sequence number, the bit in :c:member:`!vma.vm_refcnt` +indicating a writer is cleared. From this point on, VMA's sequence number will +indicate VMA's write-locked state until mmap write lock is dropped or downgraded. -This clever combination of a read/write semaphore and sequence count allows for +This clever combination of a reference counter and sequence count allows for fast RCU-based per-VMA lock acquisition (especially on page fault, though utilised elsewhere) with minimal complexity around lock ordering. From fcd807a03b864e2c7b2aa5eaade185127c4e2414 Mon Sep 17 00:00:00 2001 From: Marcelo Moreira Date: Mon, 17 Feb 2025 18:54:31 -0300 Subject: [PATCH 0938/2157] Docs/mm/damon: fix spelling and grammar in monitoring_intervals_tuning_example.rst This patch fixes some spelling and grammar mistakes in the documentation, improving the readability. - multipled -> multiplied - idential -> identical - minuts -> minutes - efficieny -> efficiency Link: https://lkml.kernel.org/r/20250217215512.12833-1-marcelomoreira1905@gmail.com Signed-off-by: Marcelo Moreira Reviewed-by: SeongJae Park Cc: Shuah khan Signed-off-by: Andrew Morton --- .../mm/damon/monitoring_intervals_tuning_example.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/mm/damon/monitoring_intervals_tuning_example.rst b/Documentation/mm/damon/monitoring_intervals_tuning_example.rst index 334a854efb40..7207cbed591f 100644 --- a/Documentation/mm/damon/monitoring_intervals_tuning_example.rst +++ b/Documentation/mm/damon/monitoring_intervals_tuning_example.rst @@ -36,7 +36,7 @@ Then, list the DAMON-found regions of different access patterns, sorted by the "access temperature". "Access temperature" is a metric representing the access-hotness of a region. It is calculated as a weighted sum of the access frequency and the age of the region. If the access frequency is 0 %, the -temperature is multipled by minus one. That is, if a region is not accessed, +temperature is multiplied by minus one. That is, if a region is not accessed, it gets minus temperature and it gets lower as not accessed for longer time. The sorting is in temperature-ascendint order, so the region at the top of the list is the coldest, and the one at the bottom is the hottest one. :: @@ -58,11 +58,11 @@ list is the coldest, and the one at the bottom is the hottest one. :: The list shows not seemingly hot regions, and only minimum access pattern diversity. Every region has zero access frequency. The number of region is 10, which is the default ``min_nr_regions value``. Size of each region is also -nearly idential. We can suspect this is because “adaptive regions adjustment” +nearly identical. We can suspect this is because “adaptive regions adjustment” mechanism was not well working. As the guide suggested, we can get relative hotness of regions using ``age`` as the recency information. That would be better than nothing, but given the fact that the longest age is only about 6 -seconds while we waited about ten minuts, it is unclear how useful this will +seconds while we waited about ten minutes, it is unclear how useful this will be. The temperature ranges to total size of regions of each range histogram @@ -190,7 +190,7 @@ for sampling and aggregation intervals, respectively). :: The number of regions having different access patterns has significantly increased. Size of each region is also more varied. Total size of non-zero access frequency regions is also significantly increased. Maybe this is already -good enough to make some meaningful memory management efficieny changes. +good enough to make some meaningful memory management efficiency changes. 800ms/16s intervals: Another bias ================================= From 63a23847dc47113b879a5f53cc0ca5cedc881ffd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 19:20:06 +0000 Subject: [PATCH 0939/2157] fs: convert block_commit_write() to take a folio All callers now have a folio, so pass it in instead of converting folio->page->folio. Link: https://lkml.kernel.org/r/20250217192009.437916-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- fs/buffer.c | 14 ++++---------- fs/ext4/inline.c | 2 +- fs/ext4/move_extent.c | 2 +- fs/iomap/buffered-io.c | 2 +- fs/ocfs2/aops.c | 4 ++-- fs/ocfs2/file.c | 2 +- fs/udf/file.c | 2 +- include/linux/buffer_head.h | 2 +- 8 files changed, 12 insertions(+), 18 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index cc8452f60251..c66a59bb068b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2166,7 +2166,7 @@ int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, } EXPORT_SYMBOL(__block_write_begin); -static void __block_commit_write(struct folio *folio, size_t from, size_t to) +void block_commit_write(struct folio *folio, size_t from, size_t to) { size_t block_start, block_end; bool partial = false; @@ -2204,6 +2204,7 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to) if (!partial) folio_mark_uptodate(folio); } +EXPORT_SYMBOL(block_commit_write); /* * block_write_begin takes care of the basic task of block allocation and @@ -2262,7 +2263,7 @@ int block_write_end(struct file *file, struct address_space *mapping, flush_dcache_folio(folio); /* This could be a short (even 0-length) commit */ - __block_commit_write(folio, start, start + copied); + block_commit_write(folio, start, start + copied); return copied; } @@ -2578,13 +2579,6 @@ int cont_write_begin(struct file *file, struct address_space *mapping, } EXPORT_SYMBOL(cont_write_begin); -void block_commit_write(struct page *page, unsigned from, unsigned to) -{ - struct folio *folio = page_folio(page); - __block_commit_write(folio, from, to); -} -EXPORT_SYMBOL(block_commit_write); - /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must @@ -2630,7 +2624,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret)) goto out_unlock; - __block_commit_write(folio, 0, end); + block_commit_write(folio, 0, end); folio_mark_dirty(folio); folio_wait_stable(folio); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3536ca7e4fcc..0af474c8b260 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -637,7 +637,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, goto retry; if (folio) - block_commit_write(&folio->page, from, to); + block_commit_write(folio, from, to); out: if (folio) { folio_unlock(folio); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 898443e98efc..48649be64d6a 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -399,7 +399,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, bh = bh->b_this_page; } - block_commit_write(&folio[0]->page, from, from + replaced_size); + block_commit_write(folio[0], from, from + replaced_size); /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d303e6c8900c..f3904d13cda4 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1484,7 +1484,7 @@ static loff_t iomap_folio_mkwrite_iter(struct iomap_iter *iter, &iter->iomap); if (ret) return ret; - block_commit_write(&folio->page, 0, length); + block_commit_write(folio, 0, length); } else { WARN_ON_ONCE(!folio_test_uptodate(folio)); folio_mark_dirty(folio); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 5bbeb6fbb1ac..ee1d92ed950f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -920,7 +920,7 @@ static void ocfs2_write_failure(struct inode *inode, ocfs2_jbd2_inode_add_write(wc->w_handle, inode, user_pos, user_len); - block_commit_write(&folio->page, from, to); + block_commit_write(folio, from, to); } } } @@ -2012,7 +2012,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length); } - block_commit_write(&folio->page, from, to); + block_commit_write(folio, from, to); } } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e54f2c4b5a90..2056cf08ac1e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -813,7 +813,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* must not update i_size! */ - block_commit_write(&folio->page, block_start + 1, block_start + 1); + block_commit_write(folio, block_start + 1, block_start + 1); } /* diff --git a/fs/udf/file.c b/fs/udf/file.c index 412fe7c4d348..0d76c4f37b3e 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -69,7 +69,7 @@ static vm_fault_t udf_page_mkwrite(struct vm_fault *vmf) goto out_unlock; } - block_commit_write(&folio->page, 0, end); + block_commit_write(folio, 0, end); out_dirty: folio_mark_dirty(folio); folio_wait_stable(folio); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932139c5d46f..6672e1a5031c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -271,7 +271,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); -void block_commit_write(struct page *page, unsigned int from, unsigned int to); +void block_commit_write(struct folio *folio, size_t from, size_t to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); From 52d671a1a36a16f3a0dd9a2beff964e75bce9787 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 19:20:07 +0000 Subject: [PATCH 0940/2157] fs: remove page_file_mapping() This wrapper has no more callers. Delete it. Link: https://lkml.kernel.org/r/20250217192009.437916-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 47bfc6b1b632..975c56fb4f85 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -575,11 +575,6 @@ static inline struct address_space *folio_flush_mapping(struct folio *folio) return folio_mapping(folio); } -static inline struct address_space *page_file_mapping(struct page *page) -{ - return folio_file_mapping(page_folio(page)); -} - /** * folio_inode - Get the host inode for this folio. * @folio: The folio. From 0d40cfe63a2f19b9d375382e6d90b9ebd412901e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 19:20:08 +0000 Subject: [PATCH 0941/2157] fs: remove folio_file_mapping() No callers of this function remain as filesystems no longer see swapfile pages through their normal read/write paths. Link: https://lkml.kernel.org/r/20250217192009.437916-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 975c56fb4f85..ad7c0f615e9b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -535,26 +535,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) struct address_space *folio_mapping(struct folio *); struct address_space *swapcache_mapping(struct folio *); -/** - * folio_file_mapping - Find the mapping this folio belongs to. - * @folio: The folio. - * - * For folios which are in the page cache, return the mapping that this - * page belongs to. Folios in the swap cache return the mapping of the - * swap file or swap device where the data is stored. This is different - * from the mapping returned by folio_mapping(). The only reason to - * use it is if, like NFS, you return 0 from ->activate_swapfile. - * - * Do not call this for folios which aren't in the page cache or swap cache. - */ -static inline struct address_space *folio_file_mapping(struct folio *folio) -{ - if (unlikely(folio_test_swapcache(folio))) - return swapcache_mapping(folio); - - return folio->mapping; -} - /** * folio_flush_mapping - Find the file mapping this folio belongs to. * @folio: The folio. From 8e4909d693224134fb14ae082c379168b43112c9 Mon Sep 17 00:00:00 2001 From: Suchit K Date: Sat, 15 Feb 2025 22:30:42 +0530 Subject: [PATCH 0942/2157] Documentation/mm: fix spelling mistake The word watermark was misspelled as "watemark". Link: https://lkml.kernel.org/r/CAO9wTFhe4sf1eVVgijt2cdLPPsUHBj7B=HN-380_JSpve5KbvQ@mail.gmail.com Signed-off-by: Suchit Cc: Shuah Khan Signed-off-by: Andrew Morton --- Documentation/mm/balance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/mm/balance.rst b/Documentation/mm/balance.rst index abaa78561c31..c4962c89a7f5 100644 --- a/Documentation/mm/balance.rst +++ b/Documentation/mm/balance.rst @@ -81,7 +81,7 @@ Page stealing from process memory and shm is done if stealing the page would alleviate memory pressure on any zone in the page's node that has fallen below its watermark. -watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These +watermark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These are per-zone fields, used to determine when a zone needs to be balanced. When the number of pages falls below watermark[WMARK_MIN], the hysteric field low_on_memory gets set. This stays set till the number of free pages becomes From af3b45aac5c9d0bdaebf4e93e3fecddc3f363857 Mon Sep 17 00:00:00 2001 From: Ujwal Kundur Date: Sat, 15 Feb 2025 13:48:03 +0530 Subject: [PATCH 0943/2157] selftests/mm: fix spelling Fix misspelling flagged by codespell. Link: https://lkml.kernel.org/r/20250215081803.1793-1-ujwal.kundur@gmail.com Signed-off-by: Ujwal Kundur Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 31e0c8a3110d..5457a078690d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -348,7 +348,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) /* * After initialization of area_src, we must explicitly release pages * for area_dst to make sure it's fully empty. Otherwise we could have - * some area_dst pages be errornously initialized with zero pages, + * some area_dst pages be erroneously initialized with zero pages, * hence we could hit memory corruption later in the test. * * One example is when THP is globally enabled, above allocate_area() From 86758b504864913233f6a16076184ba784cd4466 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 18 Feb 2025 15:49:54 +0530 Subject: [PATCH 0944/2157] mm/ioremap: pass pgprot_t to ioremap_prot() instead of unsigned long ioremap_prot() currently accepts pgprot_val parameter as an unsigned long, thus implicitly assuming that pgprot_val and pgprot_t could never be bigger than unsigned long. But this assumption soon will not be true on arm64 when using D128 pgtables. In 128 bit page table configuration, unsigned long is 64 bit, but pgprot_t is 128 bit. Passing platform abstracted pgprot_t argument is better as compared to size based data types. Let's change the parameter to directly pass pgprot_t like another similar helper generic_ioremap_prot(). Without this change in place, D128 configuration does not work on arm64 as the top 64 bits gets silently stripped when passing the protection value to this function. Link: https://lkml.kernel.org/r/20250218101954.415331-1-anshuman.khandual@arm.com Signed-off-by: Ryan Roberts Co-developed-by: Anshuman Khandual Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas [arm64] Signed-off-by: Andrew Morton --- arch/arc/mm/ioremap.c | 6 ++---- arch/arm64/include/asm/io.h | 6 +++--- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/mm/ioremap.c | 3 +-- arch/csky/include/asm/io.h | 2 +- arch/loongarch/include/asm/io.h | 10 +++++----- arch/mips/include/asm/io.h | 8 ++++---- arch/mips/mm/ioremap.c | 4 ++-- arch/mips/mm/ioremap64.c | 4 ++-- arch/parisc/include/asm/io.h | 2 +- arch/parisc/mm/ioremap.c | 4 ++-- arch/powerpc/include/asm/io.h | 2 +- arch/powerpc/mm/ioremap.c | 4 ++-- arch/powerpc/platforms/ps3/spu.c | 4 ++-- arch/riscv/include/asm/io.h | 2 +- arch/riscv/kernel/acpi.c | 2 +- arch/s390/include/asm/io.h | 4 ++-- arch/s390/pci/pci.c | 4 ++-- arch/sh/boards/mach-landisk/setup.c | 2 +- arch/sh/boards/mach-lboxre2/setup.c | 2 +- arch/sh/boards/mach-sh03/setup.c | 2 +- arch/sh/include/asm/io.h | 2 +- arch/sh/mm/ioremap.c | 3 +-- arch/x86/include/asm/io.h | 2 +- arch/x86/mm/ioremap.c | 4 ++-- arch/xtensa/include/asm/io.h | 6 +++--- arch/xtensa/mm/ioremap.c | 4 ++-- include/asm-generic/io.h | 4 ++-- mm/ioremap.c | 4 ++-- mm/memory.c | 6 +++--- 30 files changed, 55 insertions(+), 59 deletions(-) diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index b07004d53267..fd8897a0e52c 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -32,7 +32,7 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size) return (void __iomem *)(u32)paddr; return ioremap_prot(paddr, size, - pgprot_val(pgprot_noncached(PAGE_KERNEL))); + pgprot_noncached(PAGE_KERNEL)); } EXPORT_SYMBOL(ioremap); @@ -44,10 +44,8 @@ EXPORT_SYMBOL(ioremap); * might need finer access control (R/W/X) */ void __iomem *ioremap_prot(phys_addr_t paddr, size_t size, - unsigned long flags) + pgprot_t prot) { - pgprot_t prot = __pgprot(flags); - /* force uncached */ return generic_ioremap_prot(paddr, size, pgprot_noncached(prot)); } diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 76ebbdc6ffdd..9b96840fb979 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -270,9 +270,9 @@ int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); #define _PAGE_IOREMAP PROT_DEVICE_nGnRE #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), PROT_NORMAL_NC) + ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ - ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) + ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) /* * io{read,write}{16,32,64}be() macros @@ -293,7 +293,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(addr))) return (void __iomem *)__phys_to_virt(addr); - return ioremap_prot(addr, size, PROT_NORMAL); + return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); } /* diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e6f66491fbe9..b9a66fc146c9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -379,7 +379,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return ioremap_prot(phys, size, pgprot_val(prot)); + return ioremap_prot(phys, size, prot); } /* diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 6cc0b7e7eb03..10e246f11271 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -15,10 +15,9 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t pgprot) { unsigned long last_addr = phys_addr + size - 1; - pgprot_t pgprot = __pgprot(prot); /* Don't allow outside PHYS_MASK */ if (last_addr & ~PHYS_MASK) diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index ed53f0b47388..536d3bf32ff1 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -36,7 +36,7 @@ */ #define ioremap_wc(addr, size) \ ioremap_prot((addr), (size), \ - (_PAGE_IOREMAP & ~_CACHE_MASK) | _CACHE_UNCACHED) + __pgprot((_PAGE_IOREMAP & ~_CACHE_MASK) | _CACHE_UNCACHED)) #include diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index e77a56eaf906..eaff72b38dc8 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -23,9 +23,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); #ifdef CONFIG_ARCH_IOREMAP static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - switch (prot_val & _CACHE_MASK) { + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); case _CACHE_SUC: @@ -38,7 +38,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, } #define ioremap(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC)) + ioremap_prot((offset), (size), PAGE_KERNEL_SUC) #define iounmap(addr) ((void)(addr)) @@ -55,10 +55,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, */ #define ioremap_wc(offset, size) \ ioremap_prot((offset), (size), \ - pgprot_val(wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC)) + wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC) #define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((offset), (size), PAGE_KERNEL) #define mmiowb() wmb() diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 0bddb568af7c..4dacf40ebefd 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -126,7 +126,7 @@ static inline unsigned long isa_virt_to_bus(volatile void *address) } void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val); + pgprot_t prot); void iounmap(const volatile void __iomem *addr); /* @@ -141,7 +141,7 @@ void iounmap(const volatile void __iomem *addr); * address. */ #define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_UNCACHED) + ioremap_prot((offset), (size), __pgprot(_CACHE_UNCACHED)) /* * ioremap_cache - map bus memory into CPU space @@ -159,7 +159,7 @@ void iounmap(const volatile void __iomem *addr); * memory-like regions on I/O busses. */ #define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _page_cachable_default) + ioremap_prot((offset), (size), __pgprot(_page_cachable_default)) /* * ioremap_wc - map bus memory into CPU space @@ -180,7 +180,7 @@ void iounmap(const volatile void __iomem *addr); * _CACHE_UNCACHED option (see cpu_probe() method). */ #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), boot_cpu_data.writecombine) + ioremap_prot((offset), (size), __pgprot(boot_cpu_data.writecombine)) #if defined(CONFIG_CPU_CAVIUM_OCTEON) #define war_io_reorder_wmb() wmb() diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index d8243d61ef32..c6c4576cd4a8 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -44,9 +44,9 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, * ioremap_prot gives the caller control over cache coherency attributes (CCA) */ void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - unsigned long flags = prot_val & _CACHE_MASK; + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; unsigned long offset, pfn, last_pfn; struct vm_struct *area; phys_addr_t last_addr; diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c index 15e7820d6a5f..acc03ba20098 100644 --- a/arch/mips/mm/ioremap64.c +++ b/arch/mips/mm/ioremap64.c @@ -3,9 +3,9 @@ #include void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - unsigned long flags = prot_val & _CACHE_MASK; + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); void __iomem *addr; diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 3143cf29ce27..04b783e2a6d1 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -131,7 +131,7 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr) _PAGE_ACCESSED | _PAGE_NO_CACHE) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), _PAGE_IOREMAP) + ioremap_prot((addr), (size), __pgprot(_PAGE_IOREMAP)) #define pci_iounmap pci_iounmap diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index fd996472dfe7..0b65c4b3baee 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -14,7 +14,7 @@ #include void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { #ifdef CONFIG_EISA unsigned long end = phys_addr + size - 1; @@ -41,6 +41,6 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, } } - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index fd92ac450169..0436cdc7cfcc 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -895,7 +895,7 @@ void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_cache(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((addr), (size), PAGE_KERNEL) #define iounmap iounmap diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 7b0afcabd89f..0d6615620ada 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -41,9 +41,9 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) return __ioremap_caller(addr, size, prot, caller); } -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, pgprot_t prot) { - pte_t pte = __pte(flags); + pte_t pte = __pte(pgprot_val(prot)); void *caller = __builtin_return_address(0); /* writeable implies dirty for kernel addresses */ diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 4a2520ec6d7f..61b37c9400b2 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -190,10 +190,10 @@ static void spu_unmap(struct spu *spu) static int __init setup_areas(struct spu *spu) { struct table {char* name; unsigned long addr; unsigned long size;}; - unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO)); spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr, - sizeof(struct spe_shadow), shadow_flags); + sizeof(struct spe_shadow), + pgprot_noncached_wc(PAGE_KERNEL_RO)); if (!spu_pdata(spu)->shadow) { pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__); goto fail_ioremap; diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 1c5c641075d2..0536846db9b6 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -137,7 +137,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #ifdef CONFIG_MMU #define arch_memremap_wb(addr, size) \ - ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) + ((__force void *)ioremap_prot((addr), (size), __pgprot(_PAGE_KERNEL))) #endif #endif /* _ASM_RISCV_IO_H */ diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 2fd29695a788..3f6d5a6789e8 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -305,7 +305,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) } } - return ioremap_prot(phys, size, pgprot_val(prot)); + return ioremap_prot(phys, size, prot); } #ifdef CONFIG_PCI diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fc9933a743d6..82f1043a4fc3 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -33,9 +33,9 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL)) #define ioremap_wt(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writethrough(PAGE_KERNEL)) static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 88f72745fa59..9fdcd733d40e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -255,7 +255,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { /* * When PCI MIO instructions are unavailable the "physical" address @@ -265,7 +265,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!static_branch_unlikely(&have_mio)) return (void __iomem *)phys_addr; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 2c44b94f82fb..1b3f43c3ac46 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -58,7 +58,7 @@ static int __init landisk_devices_setup(void) /* open I/O area window */ paddrbase = virt_to_phys((void *)PA_AREA5_IO); prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, pgprot_val(prot)); + cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, prot); if (!cf_ide_base) { printk("allocate_cf_area : can't open CF I/O window!\n"); return -ENOMEM; diff --git a/arch/sh/boards/mach-lboxre2/setup.c b/arch/sh/boards/mach-lboxre2/setup.c index 20d01b430f2a..e95bde207adb 100644 --- a/arch/sh/boards/mach-lboxre2/setup.c +++ b/arch/sh/boards/mach-lboxre2/setup.c @@ -53,7 +53,7 @@ static int __init lboxre2_devices_setup(void) paddrbase = virt_to_phys((void*)PA_AREA5_IO); psize = PAGE_SIZE; prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf0_io_base = (u32)ioremap_prot(paddrbase, psize, pgprot_val(prot)); + cf0_io_base = (u32)ioremap_prot(paddrbase, psize, prot); if (!cf0_io_base) { printk(KERN_ERR "%s : can't open CF I/O window!\n" , __func__ ); return -ENOMEM; diff --git a/arch/sh/boards/mach-sh03/setup.c b/arch/sh/boards/mach-sh03/setup.c index 3901b6031ad5..5c9312f334d3 100644 --- a/arch/sh/boards/mach-sh03/setup.c +++ b/arch/sh/boards/mach-sh03/setup.c @@ -75,7 +75,7 @@ static int __init sh03_devices_setup(void) /* open I/O area window */ paddrbase = virt_to_phys((void *)PA_AREA5_IO); prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, pgprot_val(prot)); + cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, prot); if (!cf_ide_base) { printk("allocate_cf_area : can't open CF I/O window!\n"); return -ENOMEM; diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf5eab840d57..531ec49b878d 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -299,7 +299,7 @@ unsigned long long poke_real_address_q(unsigned long long addr, #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_NOCACHE) #define ioremap_cache(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((addr), (size), PAGE_KERNEL) #endif /* CONFIG_MMU */ #include diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 33d20f34560f..5bbde53fb32d 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -73,10 +73,9 @@ __ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) #endif /* CONFIG_29BIT */ void __iomem __ref *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t pgprot) { void __iomem *mapped; - pgprot_t pgprot = __pgprot(prot); mapped = __ioremap_trapped(phys_addr, size); if (mapped) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index ed580c7f9d0a..0794936ec187 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -170,7 +170,7 @@ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 38ff7791a9c7..d501f0871aa5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -440,10 +440,10 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_cache); void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { return __ioremap_caller(phys_addr, size, - pgprot2cachemode(__pgprot(prot_val)), + pgprot2cachemode(prot), __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index 934e58399c8c..7cdcc2deab3e 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -29,7 +29,7 @@ * I/O memory mapping functions. */ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot); + pgprot_t prot); #define ioremap_prot ioremap_prot #define iounmap iounmap @@ -40,7 +40,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_BYPASS_VADDR); else return ioremap_prot(offset, size, - pgprot_val(pgprot_noncached(PAGE_KERNEL))); + pgprot_noncached(PAGE_KERNEL)); } #define ioremap ioremap @@ -51,7 +51,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset, && offset - XCHAL_KIO_PADDR < XCHAL_KIO_SIZE) return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_CACHED_VADDR); else - return ioremap_prot(offset, size, pgprot_val(PAGE_KERNEL)); + return ioremap_prot(offset, size, PAGE_KERNEL); } #define ioremap_cache ioremap_cache diff --git a/arch/xtensa/mm/ioremap.c b/arch/xtensa/mm/ioremap.c index 8ca660b7ab49..26f238fa9d0d 100644 --- a/arch/xtensa/mm/ioremap.c +++ b/arch/xtensa/mm/ioremap.c @@ -11,12 +11,12 @@ #include void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { unsigned long pfn = __phys_to_pfn((phys_addr)); WARN_ON(pfn_valid(pfn)); - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index a5cbbf3e26ec..402020b23423 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1111,7 +1111,7 @@ void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size, pgprot_t prot); void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot); + pgprot_t prot); void iounmap(volatile void __iomem *addr); void generic_iounmap(volatile void __iomem *addr); @@ -1120,7 +1120,7 @@ void generic_iounmap(volatile void __iomem *addr); static inline void __iomem *ioremap(phys_addr_t addr, size_t size) { /* _PAGE_IOREMAP needs to be supplied by the architecture */ - return ioremap_prot(addr, size, _PAGE_IOREMAP); + return ioremap_prot(addr, size, __pgprot(_PAGE_IOREMAP)); } #endif #endif /* !CONFIG_MMU || CONFIG_GENERIC_IOREMAP */ diff --git a/mm/ioremap.c b/mm/ioremap.c index 3e049dfb28bd..c36dd9f62fd5 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -50,9 +50,9 @@ void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size, #ifndef ioremap_prot void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); #endif diff --git a/mm/memory.c b/mm/memory.c index 39bceed7448f..270c9357475d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6727,7 +6727,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { resource_size_t phys_addr; - unsigned long prot = 0; + pgprot_t prot = __pgprot(0); void __iomem *maddr; int offset = offset_in_page(addr); int ret = -EINVAL; @@ -6737,7 +6737,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, retry: if (follow_pfnmap_start(&args)) return -EINVAL; - prot = pgprot_val(args.pgprot); + prot = args.pgprot; phys_addr = (resource_size_t)args.pfn << PAGE_SHIFT; writable = args.writable; follow_pfnmap_end(&args); @@ -6752,7 +6752,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, if (follow_pfnmap_start(&args)) goto out_unmap; - if ((prot != pgprot_val(args.pgprot)) || + if ((pgprot_val(prot) != pgprot_val(args.pgprot)) || (phys_addr != (args.pfn << PAGE_SHIFT)) || (writable != args.writable)) { follow_pfnmap_end(&args); From 381ff0341ac63d245035f274cacd3f1b8068d388 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Feb 2025 14:37:04 -0800 Subject: [PATCH 0945/2157] Docs/mm/damon/design: fix typo on DAMOS filters usage doc link Patch series "Docs/mm/damon: misc DAMOS filters documentation fixes and improves". Fix and improve DAMOS filters documentation by fixing a copy-paste typo, adding hugepage_size filter documentation on design doc, moving logic details from usage to design, clarify DAMOS filters handling sequence based on handling layer, and re-organizing the filters type list for easier understanding of the handling sequence. This patch (of 5): The link from DAMOS filters design doc to usage doc has a typo calling filters as watermarks. Fix it. Link: https://lkml.kernel.org/r/20250218223708.53437-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250218223708.53437-2-sj@kernel.org Fixes: d31f5626a0e1 ("Docs/mm/damon/design: add links to sections of DAMON sysfs interface usage doc") Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index e28c6a1b40ae..12ae7e1209c8 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -617,7 +617,7 @@ Below ``type`` of filters are currently supported. - Applied to pages that belonging to a given DAMON monitoring target. - Handled by the core logic. -To know how user-space can set the watermarks via :ref:`DAMON sysfs interface +To know how user-space can set the filters via :ref:`DAMON sysfs interface `, refer to :ref:`filters ` part of the documentation. From e52a942b47c8b32072e7f342aca600016bcfca33 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Feb 2025 14:37:05 -0800 Subject: [PATCH 0946/2157] Docs/mm/damon/design: document hugepage_size filter 'hugepage_size' DAMOS filter type is not documented on the design doc. Add a description of the type. Link: https://lkml.kernel.org/r/20250218223708.53437-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 12ae7e1209c8..a959c081bc59 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -610,6 +610,9 @@ Below ``type`` of filters are currently supported. - Applied to pages that are accessed after the last access check from the scheme. - Handled by operations set layer. Supported by only ``paddr`` set. +- pages that managed in a given size range + - Applied to pages that managed in a given size range. + - Handled by operations set layer. Supported by only ``paddr`` set. - address range - Applied to pages that belonging to a given address range. - Handled by the core logic. From 0f28583b28d84a5eaaf299e01b7301922ae8da46 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Feb 2025 14:37:06 -0800 Subject: [PATCH 0947/2157] Docs/damon: move DAMOS filter type names and meaning to design doc DAMON sysfs usage doc is describing DAMOS filter type names and their meanings in short. The design doc is providing the short meaning and detailed descriptions, too. This is unnecessary duplicates and confuses where to document new DAMOS filter types and features. Move the details from usage to design doc. Link: https://lkml.kernel.org/r/20250218223708.53437-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 26 +++++++++----------- Documentation/mm/damon/design.rst | 12 ++++----- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 51af66c208c5..dc37bba96273 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -408,21 +408,19 @@ in the numeric order. Each filter directory contains nine files, namely ``type``, ``matching``, ``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max`` -and ``target_idx``. To ``type`` file, you can write one of six special -keywords: ``anon`` for anonymous pages, ``memcg`` for specific memory cgroup, -``young`` for young pages, ``addr`` for specific address range (an open-ended -interval), ``hugepage_size`` for large folios of a specific size range [``min``, -``max``] or ``target`` for specific DAMON monitoring target filtering. Meaning -of the types are same to the description on the :ref:`design doc -`. +and ``target_idx``. To ``type`` file, you can write the type of the filter. +Refer to :ref:`the design doc ` for available type +names and their meanings. -In case of the memory cgroup filtering, you can specify the memory cgroup of -the interest by writing the path of the memory cgroup from the cgroups mount -point to ``memcg_path`` file. In case of the address range filtering, you can -specify the start and end address of the range to ``addr_start`` and -``addr_end`` files, respectively. For the DAMON monitoring target filtering, -you can specify the index of the target between the list of the DAMON context's -monitoring targets list to ``target_idx`` file. +For ``memcg`` type, you can specify the memory cgroup of the interest by +writing the path of the memory cgroup from the cgroups mount point to +``memcg_path`` file. For ``addr`` type, you can specify the start and end +address of the range (open-ended interval) to ``addr_start`` and ``addr_end`` +files, respectively. For ``hugepage_size`` type, you can specify the minimum +and maximum size of the range (closed interval) to ``min`` and ``max`` files, +respectively. For ``target`` type, you can specify the index of the target +between the list of the DAMON context's monitoring targets list to +``target_idx`` file. You can write ``Y`` or ``N`` to ``matching`` file to specify whether the filter is for memory that matches the ``type``. You can write ``Y`` or ``N`` to diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index a959c081bc59..7360e5ac0d06 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -600,23 +600,23 @@ counted as the scheme has tried. This difference affects the statistics. Below ``type`` of filters are currently supported. -- anonymous page +- anon - Applied to pages that containing data that not stored in files. - Handled by operations set layer. Supported by only ``paddr`` set. -- memory cgroup +- memcg - Applied to pages that belonging to a given cgroup. - Handled by operations set layer. Supported by only ``paddr`` set. -- young page +- young - Applied to pages that are accessed after the last access check from the scheme. - Handled by operations set layer. Supported by only ``paddr`` set. -- pages that managed in a given size range +- hugepage_size - Applied to pages that managed in a given size range. - Handled by operations set layer. Supported by only ``paddr`` set. -- address range +- addr - Applied to pages that belonging to a given address range. - Handled by the core logic. -- DAMON monitoring target +- target - Applied to pages that belonging to a given DAMON monitoring target. - Handled by the core logic. From 4a4d8e792506432270e27516cf03a8208cbbec8b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Feb 2025 14:37:07 -0800 Subject: [PATCH 0948/2157] Docs/mm/damon/design: clarify handling layer based filters evaluation sequence If an element of memory matches a DAMOS filter, filters that installed after that get no chance to make any effect to the element. Hence in what order DAMOS filters are handled is important, if both allow filters and reject filters are used together. The ordering is affected by both the installation order and which layter the filters are handled. The design document is not clearly documenting the latter part. Clarify it on the design doc. Link: https://lkml.kernel.org/r/20250218223708.53437-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 7360e5ac0d06..8b9727d91434 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -569,11 +569,21 @@ number of filters for each scheme. Each filter specifies - whether it is to allow (include) or reject (exclude) applying the scheme's action to the memory (``allow``). -When multiple filters are installed, each filter is evaluated in the installed -order. If a part of memory is matched to one of the filter, next filters are -ignored. If the memory passes through the filters evaluation stage because it -is not matched to any of the filters, applying the scheme's action to it is -allowed, same to the behavior when no filter exists. +For efficient handling of filters, some types of filters are handled by the +core layer, while others are handled by operations set. In the latter case, +hence, support of the filter types depends on the DAMON operations set. In +case of the core layer-handled filters, the memory regions that excluded by the +filter are not counted as the scheme has tried to the region. In contrast, if +a memory regions is filtered by an operations set layer-handled filter, it is +counted as the scheme has tried. This difference affects the statistics. + +When multiple filters are installed, the group of filters that handled by the +core layer are evaluated first. After that, the group of filters that handled +by the operations layer are evaluated. Filters in each of the groups are +evaluated in the installed order. If a part of memory is matched to one of the +filter, next filters are ignored. If the memory passes through the filters +evaluation stage because it is not matched to any of the filters, applying the +scheme's action to it is allowed, same to the behavior when no filter exists. For example, let's assume 1) a filter for allowing anonymous pages and 2) another filter for rejecting young pages are installed in the order. If a page @@ -590,14 +600,6 @@ filter-allowed or filters evaluation stage passed. It means that installing allow-filters at the end of the list makes no practical change but only filters-checking overhead. -For efficient handling of filters, some types of filters are handled by the -core layer, while others are handled by operations set. In the latter case, -hence, support of the filter types depends on the DAMON operations set. In -case of the core layer-handled filters, the memory regions that excluded by the -filter are not counted as the scheme has tried to the region. In contrast, if -a memory regions is filtered by an operations set layer-handled filter, it is -counted as the scheme has tried. This difference affects the statistics. - Below ``type`` of filters are currently supported. - anon From edab6ffd792a7774e7d5fd7eb1d6251e452010f5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Feb 2025 14:37:08 -0800 Subject: [PATCH 0949/2157] Docs/mm/damon/design: categorize DAMOS filter types based on handling layer On what DAMON layer a DAMOS filter is handled is important to expect in what order filters will be evaluated. Re-organize the DAMOS filter types list on the design doc to categorize types based on the handling layer, to let users more easily understand the handling order. Link: https://lkml.kernel.org/r/20250218223708.53437-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 34 ++++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 8b9727d91434..6a66aa0833fd 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -602,25 +602,21 @@ filters-checking overhead. Below ``type`` of filters are currently supported. -- anon - - Applied to pages that containing data that not stored in files. - - Handled by operations set layer. Supported by only ``paddr`` set. -- memcg - - Applied to pages that belonging to a given cgroup. - - Handled by operations set layer. Supported by only ``paddr`` set. -- young - - Applied to pages that are accessed after the last access check from the - scheme. - - Handled by operations set layer. Supported by only ``paddr`` set. -- hugepage_size - - Applied to pages that managed in a given size range. - - Handled by operations set layer. Supported by only ``paddr`` set. -- addr - - Applied to pages that belonging to a given address range. - - Handled by the core logic. -- target - - Applied to pages that belonging to a given DAMON monitoring target. - - Handled by the core logic. +- Core layer handled + - addr + - Applied to pages that belonging to a given address range. + - target + - Applied to pages that belonging to a given DAMON monitoring target. +- Operations layer handled, supported by only ``paddr`` operations set. + - anon + - Applied to pages that containing data that not stored in files. + - memcg + - Applied to pages that belonging to a given cgroup. + - young + - Applied to pages that are accessed after the last access check from the + scheme. + - hugepage_size + - Applied to pages that managed in a given size range. To know how user-space can set the filters via :ref:`DAMON sysfs interface `, refer to :ref:`filters ` part of the From 7365ff2c8eef4ea50b5f3ae2349fa180e3782ef1 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:02 +0000 Subject: [PATCH 0950/2157] mm/cma: export total and free number of pages for CMA areas Patch series "hugetlb/CMA improvements for large systems", v5. On large systems, we observed some issues with hugetlb and CMA: 1) When specifying a large number of hugetlb boot pages (hugepages= on the commandline), the kernel may run out of memory before it even gets to HVO. For example, if you have a 3072G system, and want to use 3024 1G hugetlb pages for VMs, that should leave you plenty of space for the hypervisor, provided you have the hugetlb vmemmap optimization (HVO) enabled. However, since the vmemmap pages are always allocated first, and then later in boot freed, you will actually run yourself out of memory before you can do HVO. This means not getting all the hugetlb pages you want, and worse, failure to boot if there is an allocation failure in the system from which it can't recover. 2) There is a system setup where you might want to use hugetlb_cma with a large value (say, again, 3024 out of 3072G like above), and then lower that if system usage allows it, to make room for non-hugetlb processes. For this, a variation of the problem above applies: the kernel runs out of unmovable space to allocate from before you finish boot, since your CMA area takes up all the space. 3) CMA wants to use one big contiguous area for allocations. Which fails if you have the aforementioned 3T system with a gap in the middle of physical memory (like the < 40bits BIOS DMA area seen on some AMD systems). You then won't be able to set up a CMA area for one of the NUMA nodes, leading to loss of half of your hugetlb CMA area. 4) Under the scenario mentioned in 2), when trying to grow the number of hugetlb pages after dropping it for a while, new CMA allocations may fail occasionally. This is not unexpected, some transient references on pages may prevent cma_alloc from succeeding under memory pressure. However, the hugetlb code then falls back to a normal contiguous alloc, which may end up succeeding. This is not always desired behavior. If you have a large CMA area, then the kernel has a restricted amount of memory it can do unmovable allocations from (a well known issue). A normal contiguous alloc may eat further in to this space. To resolve these issues, do the following: * Add hooks to the section init code to do custom initialization of memmap pages. Hugetlb bootmem (memblock) allocated pages can then be pre-HVOed. This avoids allocating a large number of vmemmap pages early in boot, only to have them be freed again later, and also avoids running out of memory as described under 1). Using these hooks for hugetlb is optional. It requires moving hugetlb bootmem allocation to an earlier spot by the architecture. This has been enabled on x86. * hugetlb_cma doesn't care about the CMA area it uses being one large contiguous range. Multiple smaller ranges are fine. The only requirements are that the areas should be on one NUMA node, and individual gigantic pages should be allocatable from them. So, implement multi-range support for CMA, avoiding issue 3). * Introduce a hugetlb_cma_only option on the commandline. This only allows allocations from CMA for gigantic pages, if hugetlb_cma= is also specified. * With hugetlb_cma_only active, it also makes sense to be able to pre-allocate gigantic hugetlb pages at boot time from the CMA area(s). Add a rudimentary early CMA allocation interface, that just grabs a piece of memblock-allocated space from the CMA area, which gets marked as allocated in the CMA bitmap when the CMA area is initialized. With this, hugepages= can be supported with hugetlb_cma=, making scenario 2) work. Additionally, fix some minor bugs, with one worth mentioning: since hugetlb gigantic bootmem pages are allocated by memblock, they may span multiple zones, as memblock doesn't (and mostly can't) know about zones. This can cause problems. A hugetlb page spanning multiple zones is bad, and it's worse with HVO, when the de-HVO step effectively sneakily re-assigns pages to a different zone than originally configured, since the tail pages all inherit the zone from the first 60 tail pages. This condition is not common, but can be easily reproduced using ZONE_MOVABLE. To fix this, add checks to see if gigantic bootmem pages intersect with multiple zones, and do not use them if they do, giving them back to the page allocator instead. The first patch is kind of along for the ride, except that maintaining an available_count for a CMA area is convenient for the multiple range support. This patch (of 27): In addition to the number of allocations and releases, system management software may like to be aware of the size of CMA areas, and how many pages are available in it. This information is currently not available, so export it in total_page and available_pages, respectively. The name 'available_pages' was picked over 'free_pages' because 'free' implies that the pages are unused. But they might not be, they just haven't been used by cma_alloc The number of available pages is tracked regardless of CONFIG_CMA_SYSFS, allowing for a few minor shortcuts in the code, avoiding bitmap operations. Link: https://lkml.kernel.org/r/20250228182928.2645936-2-fvdl@google.com Signed-off-by: Frank van der Linden Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Joao Martins Cc: Muchun Song Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Yu Zhao Cc: Zi Yan Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: Heiko Carstens Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-kernel-mm-cma | 13 +++++++++++ mm/cma.c | 22 ++++++++++++++----- mm/cma.h | 1 + mm/cma_debug.c | 5 +---- mm/cma_sysfs.c | 20 +++++++++++++++++ 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma index dfd755201142..aaf2a5d8b13b 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-cma +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma @@ -29,3 +29,16 @@ Date: Feb 2024 Contact: Anshuman Khandual Description: the number of pages CMA API succeeded to release + +What: /sys/kernel/mm/cma//total_pages +Date: Jun 2024 +Contact: Frank van der Linden +Description: + The size of the CMA area in pages. + +What: /sys/kernel/mm/cma//available_pages +Date: Jun 2024 +Contact: Frank van der Linden +Description: + The number of pages in the CMA area that are still + available for CMA allocation. diff --git a/mm/cma.c b/mm/cma.c index de5bc0c81fc2..95a8788e54d3 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -86,6 +86,7 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, spin_lock_irqsave(&cma->lock, flags); bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); + cma->available_count += count; spin_unlock_irqrestore(&cma->lock, flags); } @@ -133,7 +134,7 @@ static void __init cma_activate_area(struct cma *cma) free_reserved_page(pfn_to_page(pfn)); } totalcma_pages -= cma->count; - cma->count = 0; + cma->available_count = cma->count = 0; pr_err("CMA area %s could not be activated\n", cma->name); } @@ -206,7 +207,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); cma->base_pfn = PFN_DOWN(base); - cma->count = size >> PAGE_SHIFT; + cma->available_count = cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; *res_cma = cma; cma_area_count++; @@ -390,7 +391,7 @@ static void cma_debug_show_areas(struct cma *cma) { unsigned long next_zero_bit, next_set_bit, nr_zero; unsigned long start = 0; - unsigned long nr_part, nr_total = 0; + unsigned long nr_part; unsigned long nbits = cma_bitmap_maxno(cma); spin_lock_irq(&cma->lock); @@ -402,12 +403,12 @@ static void cma_debug_show_areas(struct cma *cma) next_set_bit = find_next_bit(cma->bitmap, nbits, next_zero_bit); nr_zero = next_set_bit - next_zero_bit; nr_part = nr_zero << cma->order_per_bit; - pr_cont("%s%lu@%lu", nr_total ? "+" : "", nr_part, + pr_cont("%s%lu@%lu", start ? "+" : "", nr_part, next_zero_bit); - nr_total += nr_part; start = next_zero_bit + nr_zero; } - pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count); + pr_cont("=> %lu free of %lu total pages\n", cma->available_count, + cma->count); spin_unlock_irq(&cma->lock); } @@ -444,6 +445,14 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, for (;;) { spin_lock_irq(&cma->lock); + /* + * If the request is larger than the available number + * of pages, stop right away. + */ + if (count > cma->available_count) { + spin_unlock_irq(&cma->lock); + break; + } bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, bitmap_maxno, start, bitmap_count, mask, offset); @@ -452,6 +461,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, break; } bitmap_set(cma->bitmap, bitmap_no, bitmap_count); + cma->available_count -= count; /* * It's safe to drop the lock here. We've marked this region for * our exclusive use. If the migration fails we will take the diff --git a/mm/cma.h b/mm/cma.h index 8485ef893e99..3dd3376ae980 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -13,6 +13,7 @@ struct cma_kobject { struct cma { unsigned long base_pfn; unsigned long count; + unsigned long available_count; unsigned long *bitmap; unsigned int order_per_bit; /* Order of pages represented by one bit */ spinlock_t lock; diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 602fff89b15f..89236f22230a 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -34,13 +34,10 @@ DEFINE_DEBUGFS_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n"); static int cma_used_get(void *data, u64 *val) { struct cma *cma = data; - unsigned long used; spin_lock_irq(&cma->lock); - /* pages counter is smaller than sizeof(int) */ - used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); + *val = cma->count - cma->available_count; spin_unlock_irq(&cma->lock); - *val = (u64)used << cma->order_per_bit; return 0; } diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c index f50db3973171..97acd3e5a6a5 100644 --- a/mm/cma_sysfs.c +++ b/mm/cma_sysfs.c @@ -62,6 +62,24 @@ static ssize_t release_pages_success_show(struct kobject *kobj, } CMA_ATTR_RO(release_pages_success); +static ssize_t total_pages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct cma *cma = cma_from_kobj(kobj); + + return sysfs_emit(buf, "%lu\n", cma->count); +} +CMA_ATTR_RO(total_pages); + +static ssize_t available_pages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct cma *cma = cma_from_kobj(kobj); + + return sysfs_emit(buf, "%lu\n", cma->available_count); +} +CMA_ATTR_RO(available_pages); + static void cma_kobj_release(struct kobject *kobj) { struct cma *cma = cma_from_kobj(kobj); @@ -75,6 +93,8 @@ static struct attribute *cma_attrs[] = { &alloc_pages_success_attr.attr, &alloc_pages_fail_attr.attr, &release_pages_success_attr.attr, + &total_pages_attr.attr, + &available_pages_attr.attr, NULL, }; ATTRIBUTE_GROUPS(cma); From c009da4258f9885c5a3749fc004870db9c0e7a99 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:03 +0000 Subject: [PATCH 0951/2157] mm, cma: support multiple contiguous ranges, if requested Currently, CMA manages one range of physically contiguous memory. Creation of larger CMA areas with hugetlb_cma may run in to gaps in physical memory, so that they are not able to allocate that contiguous physical range from memblock when creating the CMA area. This can happen, for example, on an AMD system with > 1TB of memory, where there will be a gap just below the 1TB (40bit DMA) line. If you have set aside most of memory for potential hugetlb CMA allocation, cma_declare_contiguous_nid will fail. hugetlb_cma doesn't need the entire area to be one physically contiguous range. It just cares about being able to get physically contiguous chunks of a certain size (e.g. 1G), and it is fine to have the CMA area backed by multiple physical ranges, as long as it gets 1G contiguous allocations. Multi-range support is implemented by introducing an array of ranges, instead of just one big one. Each range has its own bitmap. Effectively, the allocate and release operations work as before, just per-range. So, instead of going through one large bitmap, they now go through a number of smaller ones. The maximum number of supported ranges is 8, as defined in CMA_MAX_RANGES. Since some current users of CMA expect a CMA area to just use one physically contiguous range, only allow for multiple ranges if a new interface, cma_declare_contiguous_nid_multi, is used. The other interfaces will work like before, creating only CMA areas with 1 range. cma_declare_contiguous_nid_multi works as follows, mimicking the default "bottom-up, above 4G" reservation approach: 0) Try cma_declare_contiguous_nid, which will use only one region. If this succeeds, return. This makes sure that for all the cases that currently work, the behavior remains unchanged even if the caller switches from cma_declare_contiguous_nid to cma_declare_contiguous_nid_multi. 1) Select the largest free memblock ranges above 4G, with a maximum number of CMA_MAX_RANGES. 2) If we did not find at most CMA_MAX_RANGES that add up to the total size requested, return -ENOMEM. 3) Sort the selected ranges by base address. 4) Reserve them bottom-up until we get what we wanted. Link: https://lkml.kernel.org/r/20250228182928.2645936-3-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Arnd Bergmann Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/cma_debugfs.rst | 10 +- include/linux/cma.h | 3 + mm/cma.c | 596 +++++++++++++++---- mm/cma.h | 27 +- mm/cma_debug.c | 56 +- 5 files changed, 551 insertions(+), 141 deletions(-) diff --git a/Documentation/admin-guide/mm/cma_debugfs.rst b/Documentation/admin-guide/mm/cma_debugfs.rst index 7367e6294ef6..4120e9cb0cd5 100644 --- a/Documentation/admin-guide/mm/cma_debugfs.rst +++ b/Documentation/admin-guide/mm/cma_debugfs.rst @@ -12,10 +12,16 @@ its CMA name like below: The structure of the files created under that directory is as follows: - - [RO] base_pfn: The base PFN (Page Frame Number) of the zone. + - [RO] base_pfn: The base PFN (Page Frame Number) of the CMA area. + This is the same as ranges/0/base_pfn. - [RO] count: Amount of memory in the CMA area. - [RO] order_per_bit: Order of pages represented by one bit. - - [RO] bitmap: The bitmap of page states in the zone. + - [RO] bitmap: The bitmap of allocated pages in the area. + This is the same as ranges/0/base_pfn. + - [RO] ranges/N/base_pfn: The base PFN of contiguous range N + in the CMA area. + - [RO] ranges/N/bitmap: The bit map of allocated pages in + range N in the CMA area. - [WO] alloc: Allocate N pages from that CMA area. For example:: echo 5 > /cma//alloc diff --git a/include/linux/cma.h b/include/linux/cma.h index d15b64f51336..863427c27dc2 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -40,6 +40,9 @@ static inline int __init cma_declare_contiguous(phys_addr_t base, return cma_declare_contiguous_nid(base, size, limit, alignment, order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); } +extern int __init cma_declare_contiguous_multi(phys_addr_t size, + phys_addr_t align, unsigned int order_per_bit, + const char *name, struct cma **res_cma, int nid); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, diff --git a/mm/cma.c b/mm/cma.c index 95a8788e54d3..34caa6b29c99 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -35,9 +36,16 @@ struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; static DEFINE_MUTEX(cma_mutex); +static int __init __cma_declare_contiguous_nid(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma, + int nid); + phys_addr_t cma_get_base(const struct cma *cma) { - return PFN_PHYS(cma->base_pfn); + WARN_ON_ONCE(cma->nranges != 1); + return PFN_PHYS(cma->ranges[0].base_pfn); } unsigned long cma_get_size(const struct cma *cma) @@ -63,9 +71,10 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, + const struct cma_memrange *cmr, unsigned int align_order) { - return (cma->base_pfn & ((1UL << align_order) - 1)) + return (cmr->base_pfn & ((1UL << align_order) - 1)) >> cma->order_per_bit; } @@ -75,46 +84,57 @@ static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; } -static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, - unsigned long count) +static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr, + unsigned long pfn, unsigned long count) { unsigned long bitmap_no, bitmap_count; unsigned long flags; - bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; + bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit; bitmap_count = cma_bitmap_pages_to_bits(cma, count); spin_lock_irqsave(&cma->lock, flags); - bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); + bitmap_clear(cmr->bitmap, bitmap_no, bitmap_count); cma->available_count += count; spin_unlock_irqrestore(&cma->lock, flags); } static void __init cma_activate_area(struct cma *cma) { - unsigned long base_pfn = cma->base_pfn, pfn; + unsigned long pfn, base_pfn; + int allocrange, r; struct zone *zone; + struct cma_memrange *cmr; - cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); - if (!cma->bitmap) - goto out_error; - - /* - * alloc_contig_range() requires the pfn range specified to be in the - * same zone. Simplify by forcing the entire CMA resv range to be in the - * same zone. - */ - WARN_ON_ONCE(!pfn_valid(base_pfn)); - zone = page_zone(pfn_to_page(base_pfn)); - for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - if (page_zone(pfn_to_page(pfn)) != zone) - goto not_in_zone; + for (allocrange = 0; allocrange < cma->nranges; allocrange++) { + cmr = &cma->ranges[allocrange]; + cmr->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma, cmr), + GFP_KERNEL); + if (!cmr->bitmap) + goto cleanup; } - for (pfn = base_pfn; pfn < base_pfn + cma->count; - pfn += pageblock_nr_pages) - init_cma_reserved_pageblock(pfn_to_page(pfn)); + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + base_pfn = cmr->base_pfn; + + /* + * alloc_contig_range() requires the pfn range specified + * to be in the same zone. Simplify by forcing the entire + * CMA resv range to be in the same zone. + */ + WARN_ON_ONCE(!pfn_valid(base_pfn)); + zone = page_zone(pfn_to_page(base_pfn)); + for (pfn = base_pfn + 1; pfn < base_pfn + cmr->count; pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + if (page_zone(pfn_to_page(pfn)) != zone) + goto cleanup; + } + + for (pfn = base_pfn; pfn < base_pfn + cmr->count; + pfn += pageblock_nr_pages) + init_cma_reserved_pageblock(pfn_to_page(pfn)); + } spin_lock_init(&cma->lock); @@ -125,13 +145,19 @@ static void __init cma_activate_area(struct cma *cma) return; -not_in_zone: - bitmap_free(cma->bitmap); -out_error: +cleanup: + for (r = 0; r < allocrange; r++) + bitmap_free(cma->ranges[r].bitmap); + /* Expose all pages to the buddy, they are useless for CMA. */ if (!cma->reserve_pages_on_error) { - for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++) - free_reserved_page(pfn_to_page(pfn)); + for (r = 0; r < allocrange; r++) { + cmr = &cma->ranges[r]; + for (pfn = cmr->base_pfn; + pfn < cmr->base_pfn + cmr->count; + pfn++) + free_reserved_page(pfn_to_page(pfn)); + } } totalcma_pages -= cma->count; cma->available_count = cma->count = 0; @@ -154,6 +180,43 @@ void __init cma_reserve_pages_on_error(struct cma *cma) cma->reserve_pages_on_error = true; } +static int __init cma_new_area(const char *name, phys_addr_t size, + unsigned int order_per_bit, + struct cma **res_cma) +{ + struct cma *cma; + + if (cma_area_count == ARRAY_SIZE(cma_areas)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma = &cma_areas[cma_area_count]; + cma_area_count++; + + if (name) + snprintf(cma->name, CMA_MAX_NAME, "%s", name); + else + snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); + + cma->available_count = cma->count = size >> PAGE_SHIFT; + cma->order_per_bit = order_per_bit; + *res_cma = cma; + totalcma_pages += cma->count; + + return 0; +} + +static void __init cma_drop_area(struct cma *cma) +{ + totalcma_pages -= cma->count; + cma_area_count--; +} + /** * cma_init_reserved_mem() - create custom contiguous area from reserved memory * @base: Base address of the reserved area @@ -172,13 +235,9 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, struct cma **res_cma) { struct cma *cma; + int ret; /* Sanity checks */ - if (cma_area_count == ARRAY_SIZE(cma_areas)) { - pr_err("Not enough slots for CMA reserved regions!\n"); - return -ENOSPC; - } - if (!size || !memblock_is_region_reserved(base, size)) return -EINVAL; @@ -195,27 +254,263 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES)) return -EINVAL; - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma = &cma_areas[cma_area_count]; + ret = cma_new_area(name, size, order_per_bit, &cma); + if (ret != 0) + return ret; - if (name) - snprintf(cma->name, CMA_MAX_NAME, name); - else - snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); + cma->ranges[0].base_pfn = PFN_DOWN(base); + cma->ranges[0].count = cma->count; + cma->nranges = 1; - cma->base_pfn = PFN_DOWN(base); - cma->available_count = cma->count = size >> PAGE_SHIFT; - cma->order_per_bit = order_per_bit; *res_cma = cma; - cma_area_count++; - totalcma_pages += cma->count; return 0; } +/* + * Structure used while walking physical memory ranges and finding out + * which one(s) to use for a CMA area. + */ +struct cma_init_memrange { + phys_addr_t base; + phys_addr_t size; + struct list_head list; +}; + +/* + * Work array used during CMA initialization. + */ +static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata; + +static bool __init revsizecmp(struct cma_init_memrange *mlp, + struct cma_init_memrange *mrp) +{ + return mlp->size > mrp->size; +} + +static bool __init basecmp(struct cma_init_memrange *mlp, + struct cma_init_memrange *mrp) +{ + return mlp->base < mrp->base; +} + +/* + * Helper function to create sorted lists. + */ +static void __init list_insert_sorted( + struct list_head *ranges, + struct cma_init_memrange *mrp, + bool (*cmp)(struct cma_init_memrange *lh, struct cma_init_memrange *rh)) +{ + struct list_head *mp; + struct cma_init_memrange *mlp; + + if (list_empty(ranges)) + list_add(&mrp->list, ranges); + else { + list_for_each(mp, ranges) { + mlp = list_entry(mp, struct cma_init_memrange, list); + if (cmp(mlp, mrp)) + break; + } + __list_add(&mrp->list, mlp->list.prev, &mlp->list); + } +} + +/* + * Create CMA areas with a total size of @total_size. A normal allocation + * for one area is tried first. If that fails, the biggest memblock + * ranges above 4G are selected, and allocated bottom up. + * + * The complexity here is not great, but this function will only be + * called during boot, and the lists operated on have fewer than + * CMA_MAX_RANGES elements (default value: 8). + */ +int __init cma_declare_contiguous_multi(phys_addr_t total_size, + phys_addr_t align, unsigned int order_per_bit, + const char *name, struct cma **res_cma, int nid) +{ + phys_addr_t start, end; + phys_addr_t size, sizesum, sizeleft; + struct cma_init_memrange *mrp, *mlp, *failed; + struct cma_memrange *cmrp; + LIST_HEAD(ranges); + LIST_HEAD(final_ranges); + struct list_head *mp, *next; + int ret, nr = 1; + u64 i; + struct cma *cma; + + /* + * First, try it the normal way, producing just one range. + */ + ret = __cma_declare_contiguous_nid(0, total_size, 0, align, + order_per_bit, false, name, res_cma, nid); + if (ret != -ENOMEM) + goto out; + + /* + * Couldn't find one range that fits our needs, so try multiple + * ranges. + * + * No need to do the alignment checks here, the call to + * cma_declare_contiguous_nid above would have caught + * any issues. With the checks, we know that: + * + * - @align is a power of 2 + * - @align is >= pageblock alignment + * - @size is aligned to @align and to @order_per_bit + * + * So, as long as we create ranges that have a base + * aligned to @align, and a size that is aligned to + * both @align and @order_to_bit, things will work out. + */ + nr = 0; + sizesum = 0; + failed = NULL; + + ret = cma_new_area(name, total_size, order_per_bit, &cma); + if (ret != 0) + goto out; + + align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); + /* + * Create a list of ranges above 4G, largest range first. + */ + for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { + if (upper_32_bits(start) == 0) + continue; + + start = ALIGN(start, align); + if (start >= end) + continue; + + end = ALIGN_DOWN(end, align); + if (end <= start) + continue; + + size = end - start; + size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit)); + if (!size) + continue; + sizesum += size; + + pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end); + + /* + * If we don't yet have used the maximum number of + * areas, grab a new one. + * + * If we can't use anymore, see if this range is not + * smaller than the smallest one already recorded. If + * not, re-use the smallest element. + */ + if (nr < CMA_MAX_RANGES) + mrp = &memranges[nr++]; + else { + mrp = list_last_entry(&ranges, + struct cma_init_memrange, list); + if (size < mrp->size) + continue; + list_del(&mrp->list); + sizesum -= mrp->size; + pr_debug("deleted %016llx - %016llx from the list\n", + (u64)mrp->base, (u64)mrp->base + size); + } + mrp->base = start; + mrp->size = size; + + /* + * Now do a sorted insert. + */ + list_insert_sorted(&ranges, mrp, revsizecmp); + pr_debug("added %016llx - %016llx to the list\n", + (u64)mrp->base, (u64)mrp->base + size); + pr_debug("total size now %llu\n", (u64)sizesum); + } + + /* + * There is not enough room in the CMA_MAX_RANGES largest + * ranges, so bail out. + */ + if (sizesum < total_size) { + cma_drop_area(cma); + ret = -ENOMEM; + goto out; + } + + /* + * Found ranges that provide enough combined space. + * Now, sorted them by address, smallest first, because we + * want to mimic a bottom-up memblock allocation. + */ + sizesum = 0; + list_for_each_safe(mp, next, &ranges) { + mlp = list_entry(mp, struct cma_init_memrange, list); + list_del(mp); + list_insert_sorted(&final_ranges, mlp, basecmp); + sizesum += mlp->size; + if (sizesum >= total_size) + break; + } + + /* + * Walk the final list, and add a CMA range for + * each range, possibly not using the last one fully. + */ + nr = 0; + sizeleft = total_size; + list_for_each(mp, &final_ranges) { + mlp = list_entry(mp, struct cma_init_memrange, list); + size = min(sizeleft, mlp->size); + if (memblock_reserve(mlp->base, size)) { + /* + * Unexpected error. Could go on to + * the next one, but just abort to + * be safe. + */ + failed = mlp; + break; + } + + pr_debug("created region %d: %016llx - %016llx\n", + nr, (u64)mlp->base, (u64)mlp->base + size); + cmrp = &cma->ranges[nr++]; + cmrp->base_pfn = PHYS_PFN(mlp->base); + cmrp->count = size >> PAGE_SHIFT; + + sizeleft -= size; + if (sizeleft == 0) + break; + } + + if (failed) { + list_for_each(mp, &final_ranges) { + mlp = list_entry(mp, struct cma_init_memrange, list); + if (mlp == failed) + break; + memblock_phys_free(mlp->base, mlp->size); + } + cma_drop_area(cma); + ret = -ENOMEM; + goto out; + } + + cma->nranges = nr; + *res_cma = cma; + +out: + if (ret != 0) + pr_err("Failed to reserve %lu MiB\n", + (unsigned long)total_size / SZ_1M); + else + pr_info("Reserved %lu MiB in %d range%s\n", + (unsigned long)total_size / SZ_1M, nr, + nr > 1 ? "s" : ""); + + return ret; +} + /** * cma_declare_contiguous_nid() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any @@ -241,6 +536,26 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, int nid) +{ + int ret; + + ret = __cma_declare_contiguous_nid(base, size, limit, alignment, + order_per_bit, fixed, name, res_cma, nid); + if (ret != 0) + pr_err("Failed to reserve %ld MiB\n", + (unsigned long)size / SZ_1M); + else + pr_info("Reserved %ld MiB at %pa\n", + (unsigned long)size / SZ_1M, &base); + + return ret; +} + +static int __init __cma_declare_contiguous_nid(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma, + int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; @@ -273,10 +588,9 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, /* Sanitise input arguments. */ alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES); if (fixed && base & (alignment - 1)) { - ret = -EINVAL; pr_err("Region at %pa must be aligned to %pa bytes\n", &base, &alignment); - goto err; + return -EINVAL; } base = ALIGN(base, alignment); size = ALIGN(size, alignment); @@ -294,10 +608,9 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, * low/high memory boundary. */ if (fixed && base < highmem_start && base + size > highmem_start) { - ret = -EINVAL; pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", &base, &highmem_start); - goto err; + return -EINVAL; } /* @@ -309,18 +622,16 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, limit = memblock_end; if (base + size > limit) { - ret = -EINVAL; pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", &size, &base, &limit); - goto err; + return -EINVAL; } /* Reserve memory */ if (fixed) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { - ret = -EBUSY; - goto err; + return -EBUSY; } } else { phys_addr_t addr = 0; @@ -357,10 +668,8 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, if (!addr) { addr = memblock_alloc_range_nid(size, alignment, base, limit, nid, true); - if (!addr) { - ret = -ENOMEM; - goto err; - } + if (!addr) + return -ENOMEM; } /* @@ -373,75 +682,67 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); if (ret) - goto free_mem; + memblock_phys_free(base, size); - pr_info("Reserved %ld MiB at %pa on node %d\n", (unsigned long)size / SZ_1M, - &base, nid); - return 0; - -free_mem: - memblock_phys_free(base, size); -err: - pr_err("Failed to reserve %ld MiB on node %d\n", (unsigned long)size / SZ_1M, - nid); return ret; } static void cma_debug_show_areas(struct cma *cma) { unsigned long next_zero_bit, next_set_bit, nr_zero; - unsigned long start = 0; + unsigned long start; unsigned long nr_part; - unsigned long nbits = cma_bitmap_maxno(cma); + unsigned long nbits; + int r; + struct cma_memrange *cmr; spin_lock_irq(&cma->lock); pr_info("number of available pages: "); - for (;;) { - next_zero_bit = find_next_zero_bit(cma->bitmap, nbits, start); - if (next_zero_bit >= nbits) - break; - next_set_bit = find_next_bit(cma->bitmap, nbits, next_zero_bit); - nr_zero = next_set_bit - next_zero_bit; - nr_part = nr_zero << cma->order_per_bit; - pr_cont("%s%lu@%lu", start ? "+" : "", nr_part, - next_zero_bit); - start = next_zero_bit + nr_zero; + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + + start = 0; + nbits = cma_bitmap_maxno(cma, cmr); + + pr_info("range %d: ", r); + for (;;) { + next_zero_bit = find_next_zero_bit(cmr->bitmap, + nbits, start); + if (next_zero_bit >= nbits) + break; + next_set_bit = find_next_bit(cmr->bitmap, nbits, + next_zero_bit); + nr_zero = next_set_bit - next_zero_bit; + nr_part = nr_zero << cma->order_per_bit; + pr_cont("%s%lu@%lu", start ? "+" : "", nr_part, + next_zero_bit); + start = next_zero_bit + nr_zero; + } + pr_info("\n"); } pr_cont("=> %lu free of %lu total pages\n", cma->available_count, cma->count); spin_unlock_irq(&cma->lock); } -static struct page *__cma_alloc(struct cma *cma, unsigned long count, - unsigned int align, gfp_t gfp) +static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, + unsigned long count, unsigned int align, + struct page **pagep, gfp_t gfp) { unsigned long mask, offset; unsigned long pfn = -1; unsigned long start = 0; unsigned long bitmap_maxno, bitmap_no, bitmap_count; - unsigned long i; + int ret = -EBUSY; struct page *page = NULL; - int ret = -ENOMEM; - const char *name = cma ? cma->name : NULL; - - trace_cma_alloc_start(name, count, align); - - if (!cma || !cma->count || !cma->bitmap) - return page; - - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, - (void *)cma, cma->name, count, align); - - if (!count) - return page; mask = cma_bitmap_aligned_mask(cma, align); - offset = cma_bitmap_aligned_offset(cma, align); - bitmap_maxno = cma_bitmap_maxno(cma); + offset = cma_bitmap_aligned_offset(cma, cmr, align); + bitmap_maxno = cma_bitmap_maxno(cma, cmr); bitmap_count = cma_bitmap_pages_to_bits(cma, count); if (bitmap_count > bitmap_maxno) - return page; + goto out; for (;;) { spin_lock_irq(&cma->lock); @@ -453,14 +754,14 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, spin_unlock_irq(&cma->lock); break; } - bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, + bitmap_no = bitmap_find_next_zero_area_off(cmr->bitmap, bitmap_maxno, start, bitmap_count, mask, offset); if (bitmap_no >= bitmap_maxno) { spin_unlock_irq(&cma->lock); break; } - bitmap_set(cma->bitmap, bitmap_no, bitmap_count); + bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); cma->available_count -= count; /* * It's safe to drop the lock here. We've marked this region for @@ -469,7 +770,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, */ spin_unlock_irq(&cma->lock); - pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); mutex_lock(&cma_mutex); ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp); mutex_unlock(&cma_mutex); @@ -478,7 +779,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, break; } - cma_clear_bitmap(cma, pfn, count); + cma_clear_bitmap(cma, cmr, pfn, count); if (ret != -EBUSY) break; @@ -490,6 +791,38 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, /* try again with a bit different memory target */ start = bitmap_no + mask + 1; } +out: + *pagep = page; + return ret; +} + +static struct page *__cma_alloc(struct cma *cma, unsigned long count, + unsigned int align, gfp_t gfp) +{ + struct page *page = NULL; + int ret = -ENOMEM, r; + unsigned long i; + const char *name = cma ? cma->name : NULL; + + trace_cma_alloc_start(name, count, align); + + if (!cma || !cma->count) + return page; + + pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, + (void *)cma, cma->name, count, align); + + if (!count) + return page; + + for (r = 0; r < cma->nranges; r++) { + page = NULL; + + ret = cma_range_alloc(cma, &cma->ranges[r], count, align, + &page, gfp); + if (ret != -EBUSY || page) + break; + } /* * CMA can allocate multiple page blocks, which results in different @@ -508,7 +841,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, } pr_debug("%s(): returned %p\n", __func__, page); - trace_cma_alloc_finish(name, pfn, page, count, align, ret); + trace_cma_alloc_finish(name, page ? page_to_pfn(page) : 0, + page, count, align, ret); if (page) { count_vm_event(CMA_ALLOC_SUCCESS); cma_sysfs_account_success_pages(cma, count); @@ -551,20 +885,31 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count) { - unsigned long pfn; + unsigned long pfn, end; + int r; + struct cma_memrange *cmr; + bool ret; - if (!cma || !pages) + if (!cma || !pages || count > cma->count) return false; pfn = page_to_pfn(pages); + ret = false; - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) { - pr_debug("%s(page %p, count %lu)\n", __func__, - (void *)pages, count); - return false; + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + end = cmr->base_pfn + cmr->count; + if (pfn >= cmr->base_pfn && pfn < end) { + ret = pfn + count <= end; + break; + } } - return true; + if (!ret) + pr_debug("%s(page %p, count %lu)\n", + __func__, (void *)pages, count); + + return ret; } /** @@ -580,19 +925,32 @@ bool cma_pages_valid(struct cma *cma, const struct page *pages, bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { - unsigned long pfn; + struct cma_memrange *cmr; + unsigned long pfn, end_pfn; + int r; + + pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); if (!cma_pages_valid(cma, pages, count)) return false; - pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); - pfn = page_to_pfn(pages); + end_pfn = pfn + count; - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + if (pfn >= cmr->base_pfn && + pfn < (cmr->base_pfn + cmr->count)) { + VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count); + break; + } + } + + if (r == cma->nranges) + return false; free_contig_range(pfn, count); - cma_clear_bitmap(cma, pfn, count); + cma_clear_bitmap(cma, cmr, pfn, count); cma_sysfs_account_release_pages(cma, count); trace_cma_release(cma->name, pfn, pages, count); diff --git a/mm/cma.h b/mm/cma.h index 3dd3376ae980..5f39dd1aac91 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -10,19 +10,35 @@ struct cma_kobject { struct cma *cma; }; +/* + * Multi-range support. This can be useful if the size of the allocation + * is not expected to be larger than the alignment (like with hugetlb_cma), + * and the total amount of memory requested, while smaller than the total + * amount of memory available, is large enough that it doesn't fit in a + * single physical memory range because of memory holes. + */ +struct cma_memrange { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; +#ifdef CONFIG_CMA_DEBUGFS + struct debugfs_u32_array dfs_bitmap; +#endif +}; +#define CMA_MAX_RANGES 8 + struct cma { - unsigned long base_pfn; unsigned long count; unsigned long available_count; - unsigned long *bitmap; unsigned int order_per_bit; /* Order of pages represented by one bit */ spinlock_t lock; #ifdef CONFIG_CMA_DEBUGFS struct hlist_head mem_head; spinlock_t mem_head_lock; - struct debugfs_u32_array dfs_bitmap; #endif char name[CMA_MAX_NAME]; + int nranges; + struct cma_memrange ranges[CMA_MAX_RANGES]; #ifdef CONFIG_CMA_SYSFS /* the number of CMA page successful allocations */ atomic64_t nr_pages_succeeded; @@ -39,9 +55,10 @@ struct cma { extern struct cma cma_areas[MAX_CMA_AREAS]; extern unsigned int cma_area_count; -static inline unsigned long cma_bitmap_maxno(struct cma *cma) +static inline unsigned long cma_bitmap_maxno(struct cma *cma, + struct cma_memrange *cmr) { - return cma->count >> cma->order_per_bit; + return cmr->count >> cma->order_per_bit; } #ifdef CONFIG_CMA_SYSFS diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 89236f22230a..fdf899532ca0 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -46,17 +46,26 @@ DEFINE_DEBUGFS_ATTRIBUTE(cma_used_fops, cma_used_get, NULL, "%llu\n"); static int cma_maxchunk_get(void *data, u64 *val) { struct cma *cma = data; + struct cma_memrange *cmr; unsigned long maxchunk = 0; - unsigned long start, end = 0; - unsigned long bitmap_maxno = cma_bitmap_maxno(cma); + unsigned long start, end; + unsigned long bitmap_maxno; + int r; spin_lock_irq(&cma->lock); - for (;;) { - start = find_next_zero_bit(cma->bitmap, bitmap_maxno, end); - if (start >= bitmap_maxno) - break; - end = find_next_bit(cma->bitmap, bitmap_maxno, start); - maxchunk = max(end - start, maxchunk); + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + bitmap_maxno = cma_bitmap_maxno(cma, cmr); + end = 0; + for (;;) { + start = find_next_zero_bit(cmr->bitmap, + bitmap_maxno, end); + if (start >= bitmap_maxno) + break; + end = find_next_bit(cmr->bitmap, bitmap_maxno, + start); + maxchunk = max(end - start, maxchunk); + } } spin_unlock_irq(&cma->lock); *val = (u64)maxchunk << cma->order_per_bit; @@ -159,24 +168,41 @@ DEFINE_DEBUGFS_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n"); static void cma_debugfs_add_one(struct cma *cma, struct dentry *root_dentry) { - struct dentry *tmp; + struct dentry *tmp, *dir, *rangedir; + int r; + char rdirname[12]; + struct cma_memrange *cmr; tmp = debugfs_create_dir(cma->name, root_dentry); debugfs_create_file("alloc", 0200, tmp, cma, &cma_alloc_fops); debugfs_create_file("free", 0200, tmp, cma, &cma_free_fops); - debugfs_create_file("base_pfn", 0444, tmp, - &cma->base_pfn, &cma_debugfs_fops); debugfs_create_file("count", 0444, tmp, &cma->count, &cma_debugfs_fops); debugfs_create_file("order_per_bit", 0444, tmp, &cma->order_per_bit, &cma_debugfs_fops); debugfs_create_file("used", 0444, tmp, cma, &cma_used_fops); debugfs_create_file("maxchunk", 0444, tmp, cma, &cma_maxchunk_fops); - cma->dfs_bitmap.array = (u32 *)cma->bitmap; - cma->dfs_bitmap.n_elements = DIV_ROUND_UP(cma_bitmap_maxno(cma), - BITS_PER_BYTE * sizeof(u32)); - debugfs_create_u32_array("bitmap", 0444, tmp, &cma->dfs_bitmap); + rangedir = debugfs_create_dir("ranges", tmp); + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + snprintf(rdirname, sizeof(rdirname), "%d", r); + dir = debugfs_create_dir(rdirname, rangedir); + debugfs_create_file("base_pfn", 0444, dir, + &cmr->base_pfn, &cma_debugfs_fops); + cmr->dfs_bitmap.array = (u32 *)cmr->bitmap; + cmr->dfs_bitmap.n_elements = + DIV_ROUND_UP(cma_bitmap_maxno(cma, cmr), + BITS_PER_BYTE * sizeof(u32)); + debugfs_create_u32_array("bitmap", 0444, dir, + &cmr->dfs_bitmap); + } + + /* + * Backward compatible symlinks to range 0 for base_pfn and bitmap. + */ + debugfs_create_symlink("base_pfn", tmp, "ranges/0/base_pfn"); + debugfs_create_symlink("bitmap", tmp, "ranges/0/bitmap"); } static int __init cma_debugfs_init(void) From 624ab90b7b8758090654520b03c97cecc438a414 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:04 +0000 Subject: [PATCH 0952/2157] mm/cma: introduce cma_intersects function Now that CMA areas can have multiple physical ranges, code can't assume a CMA struct represents a base_pfn plus a size, as returned from cma_get_base. Most cases are ok though, since they all explicitly refer to CMA areas that were created using existing interfaces (cma_declare_contiguous_nid or cma_init_reserved_mem), which guarantees they have just one physical range. An exception is the s390 code, which walks all CMA ranges to see if they intersect with a range of memory that is about to be hotremoved. So, in the future, it might run in to multi-range areas. To keep this check working, define a cma_intersects function. This just checks if a physaddr range intersects any of the ranges. Use it in the s390 check. Link: https://lkml.kernel.org/r/20250228182928.2645936-4-fvdl@google.com Signed-off-by: Frank van der Linden Acked-by: Alexander Gordeev Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/s390/mm/init.c | 13 +++++-------- include/linux/cma.h | 1 + mm/cma.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f2298f7a3f21..d88cb1c13f7d 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -239,16 +239,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, diff --git a/include/linux/cma.h b/include/linux/cma.h index 863427c27dc2..03d85c100dcc 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -53,6 +53,7 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); +extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern void cma_reserve_pages_on_error(struct cma *cma); diff --git a/mm/cma.c b/mm/cma.c index 34caa6b29c99..8dc46bfa3819 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -978,3 +978,24 @@ int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) return 0; } + +bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end) +{ + int r; + struct cma_memrange *cmr; + unsigned long rstart, rend; + + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + + rstart = PFN_PHYS(cmr->base_pfn); + rend = PFN_PHYS(cmr->base_pfn + cmr->count); + if (end < rstart) + continue; + if (start >= rend) + continue; + return true; + } + + return false; +} From 3dda0103e8ea4923d1a2f9d3c6b75aadc08aee73 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:05 +0000 Subject: [PATCH 0953/2157] mm, hugetlb: use cma_declare_contiguous_multi hugetlb_cma is fine with using multiple CMA ranges, as long as it can get its gigantic pages allocated from them. So, use cma_declare_contiguous_multi to allow for multiple ranges, increasing the chances of getting what we want on systems with gaps in physical memory. Link: https://lkml.kernel.org/r/20250228182928.2645936-5-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 318624c96584..2585d4da6c45 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7750,9 +7750,8 @@ void __init hugetlb_cma_reserve(int order) * may be returned to CMA allocator in the case of * huge page demotion. */ - res = cma_declare_contiguous_nid(0, size, 0, - PAGE_SIZE << order, - HUGETLB_PAGE_ORDER, false, name, + res = cma_declare_contiguous_multi(size, PAGE_SIZE << order, + HUGETLB_PAGE_ORDER, name, &hugetlb_cma[nid], nid); if (res) { pr_warn("hugetlb_cma: reservation failed: err %d, node %d", From 992e5491b6b83e4de28b14ee96347ff6bf16280a Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:06 +0000 Subject: [PATCH 0954/2157] mm/hugetlb: remove redundant __ClearPageReserved In hugetlb_folio_init_tail_vmemmap, the reserved flag is cleared for the tail page just before it is zeroed out, which is redundant. Remove the __ClearPageReserved call. Link: https://lkml.kernel.org/r/20250228182928.2645936-6-fvdl@google.com Signed-off-by: Frank van der Linden Reviewed-by: Oscar Salvador Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2585d4da6c45..d5f616d07e81 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3210,7 +3210,6 @@ static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio, for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); - __ClearPageReserved(folio_page(folio, pfn - head_pfn)); __init_single_page(page, pfn, zone, nid); prep_compound_tail((struct page *)folio, pfn - head_pfn); ret = page_ref_freeze(page, 1); From de55996d7188e7cd11b5e8d8f618467c8439feaa Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:07 +0000 Subject: [PATCH 0955/2157] mm/hugetlb: use online nodes for bootmem allocation Later commits will move hugetlb bootmem allocation to earlier in init, when N_MEMORY has not yet been set on nodes. Use online nodes instead. At most, this wastes just a few cycles once during boot (and most likely none). Link: https://lkml.kernel.org/r/20250228182928.2645936-7-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d5f616d07e81..38ff808fcc83 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3164,7 +3164,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) goto found; } /* allocate from next node when distributing huge pages */ - for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_MEMORY]) { + for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_ONLINE]) { m = memblock_alloc_try_nid_raw( huge_page_size(h), huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); @@ -4558,8 +4558,8 @@ void __init hugetlb_add_hstate(unsigned int order) for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&h->hugepage_freelists[i]); INIT_LIST_HEAD(&h->hugepage_activelist); - h->next_nid_to_alloc = first_memory_node; - h->next_nid_to_free = first_memory_node; + h->next_nid_to_alloc = first_online_node; + h->next_nid_to_free = first_online_node; snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", huge_page_size(h)/SZ_1K); From 5b47c02967ab770aa7661c8863a21b2fd59e35ff Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:08 +0000 Subject: [PATCH 0956/2157] mm/hugetlb: convert cmdline parameters from setup to early Convert the cmdline parameters (hugepagesz, hugepages, default_hugepagesz and hugetlb_free_vmemmap) to early parameters. Since parse_early_param might run before MMU setups on some platforms (powerpc), validation of huge page sizes as specified in command line parameters would fail. So instead, for the hstate-related values, just record the them and parse them on demand, from hugetlb_bootmem_alloc. The allocation of hugetlb bootmem pages is now done in hugetlb_bootmem_alloc, which is called explicitly at the start of mm_core_init(). core_initcall would be too late, as that happens with memblock already torn down. This change will allow earlier allocation and initialization of bootmem hugetlb pages later on. No functional change intended. Link: https://lkml.kernel.org/r/20250228182928.2645936-8-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 14 +- include/linux/hugetlb.h | 6 + mm/hugetlb.c | 133 ++++++++++++++---- mm/hugetlb_vmemmap.c | 6 +- mm/mm_init.c | 3 + 5 files changed, 126 insertions(+), 36 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8..ae21d911d1c7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1861,7 +1861,7 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. - hugepages= [HW] Number of HugeTLB pages to allocate at boot. + hugepages= [HW,EARLY] Number of HugeTLB pages to allocate at boot. If this follows hugepagesz (below), it specifies the number of pages of hugepagesz to be allocated. If this is the first HugeTLB parameter on the command @@ -1873,12 +1873,12 @@ :[,:] hugepagesz= - [HW] The size of the HugeTLB pages. This is used in - conjunction with hugepages (above) to allocate huge - pages of a specific size at boot. The pair - hugepagesz=X hugepages=Y can be specified once for - each supported huge page size. Huge page sizes are - architecture dependent. See also + [HW,EARLY] The size of the HugeTLB pages. This is + used in conjunction with hugepages (above) to + allocate huge pages of a specific size at boot. The + pair hugepagesz=X hugepages=Y can be specified once + for each supported huge page size. Huge page sizes + are architecture dependent. See also Documentation/admin-guide/mm/hugetlbpage.rst. Format: size[KMG] diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 76a75ec03dd6..a596aaa178d1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -174,6 +174,8 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages[MAX_NUMNODES]; +void hugetlb_bootmem_alloc(void); + /* arch callbacks */ #ifndef CONFIG_HIGHPTE @@ -1257,6 +1259,10 @@ static inline bool hugetlbfs_pagecache_present( { return false; } + +static inline void hugetlb_bootmem_alloc(void) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 38ff808fcc83..d1178059a52b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,24 @@ static unsigned long hugetlb_cma_size __initdata; __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; +/* + * Due to ordering constraints across the init code for various + * architectures, hugetlb hstate cmdline parameters can't simply + * be early_param. early_param might call the setup function + * before valid hugetlb page sizes are determined, leading to + * incorrect rejection of valid hugepagesz= options. + * + * So, record the parameters early and consume them whenever the + * init code is ready for them, by calling hugetlb_parse_params(). + */ + +/* one (hugepagesz=,hugepages=) pair per hstate, one default_hugepagesz */ +#define HUGE_MAX_CMDLINE_ARGS (2 * HUGE_MAX_HSTATE + 1) +struct hugetlb_cmdline { + char *val; + int (*setup)(char *val); +}; + /* for command line parsing */ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; @@ -69,6 +88,20 @@ static bool __initdata parsed_valid_hugepagesz = true; static bool __initdata parsed_default_hugepagesz; static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata; +static char hstate_cmdline_buf[COMMAND_LINE_SIZE] __initdata; +static int hstate_cmdline_index __initdata; +static struct hugetlb_cmdline hugetlb_params[HUGE_MAX_CMDLINE_ARGS] __initdata; +static int hugetlb_param_index __initdata; +static __init int hugetlb_add_param(char *s, int (*setup)(char *val)); +static __init void hugetlb_parse_params(void); + +#define hugetlb_early_param(str, func) \ +static __init int func##args(char *s) \ +{ \ + return hugetlb_add_param(s, func); \ +} \ +early_param(str, func##args) + /* * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages, * free_huge_pages, and surplus_huge_pages. @@ -3496,6 +3529,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < MAX_NUMNODES; i++) INIT_LIST_HEAD(&huge_boot_pages[i]); + h->next_nid_to_alloc = first_online_node; + h->next_nid_to_free = first_online_node; initialized = true; } @@ -4558,8 +4593,6 @@ void __init hugetlb_add_hstate(unsigned int order) for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&h->hugepage_freelists[i]); INIT_LIST_HEAD(&h->hugepage_activelist); - h->next_nid_to_alloc = first_online_node; - h->next_nid_to_free = first_online_node; snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", huge_page_size(h)/SZ_1K); @@ -4584,6 +4617,42 @@ static void __init hugepages_clear_pages_in_node(void) } } +static __init int hugetlb_add_param(char *s, int (*setup)(char *)) +{ + size_t len; + char *p; + + if (hugetlb_param_index >= HUGE_MAX_CMDLINE_ARGS) + return -EINVAL; + + len = strlen(s) + 1; + if (len + hstate_cmdline_index > sizeof(hstate_cmdline_buf)) + return -EINVAL; + + p = &hstate_cmdline_buf[hstate_cmdline_index]; + memcpy(p, s, len); + hstate_cmdline_index += len; + + hugetlb_params[hugetlb_param_index].val = p; + hugetlb_params[hugetlb_param_index].setup = setup; + + hugetlb_param_index++; + + return 0; +} + +static __init void hugetlb_parse_params(void) +{ + int i; + struct hugetlb_cmdline *hcp; + + for (i = 0; i < hugetlb_param_index; i++) { + hcp = &hugetlb_params[i]; + + hcp->setup(hcp->val); + } +} + /* * hugepages command line processing * hugepages normally follows a valid hugepagsz or default_hugepagsz @@ -4603,7 +4672,7 @@ static int __init hugepages_setup(char *s) if (!parsed_valid_hugepagesz) { pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s); parsed_valid_hugepagesz = true; - return 1; + return -EINVAL; } /* @@ -4657,24 +4726,16 @@ static int __init hugepages_setup(char *s) } } - /* - * Global state is always initialized later in hugetlb_init. - * But we need to allocate gigantic hstates here early to still - * use the bootmem allocator. - */ - if (hugetlb_max_hstate && hstate_is_gigantic(parsed_hstate)) - hugetlb_hstate_alloc_pages(parsed_hstate); - last_mhp = mhp; - return 1; + return 0; invalid: pr_warn("HugeTLB: Invalid hugepages parameter %s\n", p); hugepages_clear_pages_in_node(); - return 1; + return -EINVAL; } -__setup("hugepages=", hugepages_setup); +hugetlb_early_param("hugepages", hugepages_setup); /* * hugepagesz command line processing @@ -4693,7 +4754,7 @@ static int __init hugepagesz_setup(char *s) if (!arch_hugetlb_valid_size(size)) { pr_err("HugeTLB: unsupported hugepagesz=%s\n", s); - return 1; + return -EINVAL; } h = size_to_hstate(size); @@ -4708,7 +4769,7 @@ static int __init hugepagesz_setup(char *s) if (!parsed_default_hugepagesz || h != &default_hstate || default_hstate.max_huge_pages) { pr_warn("HugeTLB: hugepagesz=%s specified twice, ignoring\n", s); - return 1; + return -EINVAL; } /* @@ -4718,14 +4779,14 @@ static int __init hugepagesz_setup(char *s) */ parsed_hstate = h; parsed_valid_hugepagesz = true; - return 1; + return 0; } hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); parsed_valid_hugepagesz = true; - return 1; + return 0; } -__setup("hugepagesz=", hugepagesz_setup); +hugetlb_early_param("hugepagesz", hugepagesz_setup); /* * default_hugepagesz command line input @@ -4739,14 +4800,14 @@ static int __init default_hugepagesz_setup(char *s) parsed_valid_hugepagesz = false; if (parsed_default_hugepagesz) { pr_err("HugeTLB: default_hugepagesz previously specified, ignoring %s\n", s); - return 1; + return -EINVAL; } size = (unsigned long)memparse(s, NULL); if (!arch_hugetlb_valid_size(size)) { pr_err("HugeTLB: unsupported default_hugepagesz=%s\n", s); - return 1; + return -EINVAL; } hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); @@ -4763,17 +4824,33 @@ static int __init default_hugepagesz_setup(char *s) */ if (default_hstate_max_huge_pages) { default_hstate.max_huge_pages = default_hstate_max_huge_pages; - for_each_online_node(i) - default_hstate.max_huge_pages_node[i] = - default_hugepages_in_node[i]; - if (hstate_is_gigantic(&default_hstate)) - hugetlb_hstate_alloc_pages(&default_hstate); + /* + * Since this is an early parameter, we can't check + * NUMA node state yet, so loop through MAX_NUMNODES. + */ + for (i = 0; i < MAX_NUMNODES; i++) { + if (default_hugepages_in_node[i] != 0) + default_hstate.max_huge_pages_node[i] = + default_hugepages_in_node[i]; + } default_hstate_max_huge_pages = 0; } - return 1; + return 0; +} +hugetlb_early_param("default_hugepagesz", default_hugepagesz_setup); + +void __init hugetlb_bootmem_alloc(void) +{ + struct hstate *h; + + hugetlb_parse_params(); + + for_each_hstate(h) { + if (hstate_is_gigantic(h)) + hugetlb_hstate_alloc_pages(h); + } } -__setup("default_hugepagesz=", default_hugepagesz_setup); static unsigned int allowed_mems_nr(struct hstate *h) { diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 7735972add01..5b484758f813 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -444,7 +444,11 @@ DEFINE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); -core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); +static int __init hugetlb_vmemmap_optimize_param(char *buf) +{ + return kstrtobool(buf, &vmemmap_optimize_enabled); +} +early_param("hugetlb_free_vmemmap", hugetlb_vmemmap_optimize_param); static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio, unsigned long flags) diff --git a/mm/mm_init.c b/mm/mm_init.c index c767946e8f5f..45bc4b55fd6a 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "internal.h" #include "slab.h" #include "shuffle.h" @@ -2641,6 +2642,8 @@ static void __init mem_init_print_info(void) */ void __init mm_core_init(void) { + hugetlb_bootmem_alloc(); + /* Initializations relying on SMP setup */ BUILD_BUG_ON(MAX_ZONELISTS > 2); build_all_zonelists(NULL); From d3cd80c5879443c6986091717b6de889c60b3eb1 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:09 +0000 Subject: [PATCH 0957/2157] x86/mm: make register_page_bootmem_memmap handle PTE mappings register_page_bootmem_memmap expects that vmemmap pages handed to it are PMD-mapped, and that the number of pages to call get_page_bootmem on is PMD-aligned. This is currently a correct assumption, but will no longer be true once pre-HVO of hugetlb pages is implemented. Make it handle PTE-mapped vmemmap pages and a nr_pages argument that is not necessarily PAGES_PER_SECTION. Link: https://lkml.kernel.org/r/20250228182928.2645936-9-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Dan Carpenter Cc: Alexander Gordeev Cc: Arnd Bergmann Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/x86/mm/init_64.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 01ea7c6df303..6e8e4ef5312a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1599,11 +1599,14 @@ void register_page_bootmem_memmap(unsigned long section_nr, } get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); - if (!boot_cpu_has(X86_FEATURE_PSE)) { + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + + if (!boot_cpu_has(X86_FEATURE_PSE) || !pmd_leaf(*pmd)) { next = (addr + PAGE_SIZE) & PAGE_MASK; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - continue; get_page_bootmem(section_nr, pmd_page(*pmd), MIX_SECTION_INFO); @@ -1614,12 +1617,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, SECTION_INFO); } else { next = pmd_addr_end(addr, end); - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - continue; - - nr_pmd_pages = 1 << get_order(PMD_SIZE); + nr_pmd_pages = (next - addr) >> PAGE_SHIFT; page = pmd_page(*pmd); while (nr_pmd_pages--) get_page_bootmem(section_nr, page++, From 243a75e236802614075511266c8d1834d4bcffe9 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:10 +0000 Subject: [PATCH 0958/2157] mm/bootmem_info: export register_page_bootmem_memmap If other mm code wants to use this function for early memmap inialization (on the platforms that have it), it should be made available properly, not just unconditionally in mm.h Make this function available for such cases. Link: https://lkml.kernel.org/r/20250228182928.2645936-10-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/powerpc/mm/init_64.c | 4 ++++ include/linux/bootmem_info.h | 7 +++++++ include/linux/mm.h | 3 --- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d96bbc001e73..b6f3ae03ca9e 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -386,10 +387,13 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, } #endif + +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) { } +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index d8a8d245824a..4c506e76a808 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -18,6 +18,8 @@ enum bootmem_type { #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void __init register_page_bootmem_info_node(struct pglist_data *pgdat); +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, + unsigned long nr_pages); void get_page_bootmem(unsigned long info, struct page *page, enum bootmem_type type); @@ -58,6 +60,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } +static inline void register_page_bootmem_memmap(unsigned long section_nr, + struct page *map, unsigned long nr_pages) +{ +} + static inline void put_page_bootmem(struct page *page) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 14115c9949d8..e90ab0bdcc8c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4018,9 +4018,6 @@ static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, } #endif -void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, - unsigned long nr_pages); - enum mf_flags { MF_COUNT_INCREASED = 1 << 0, MF_ACTION_REQUIRED = 1 << 1, From d65917c42373f70159a3fc453f8f028fd665e04f Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:11 +0000 Subject: [PATCH 0959/2157] mm/sparse: allow for alternate vmemmap section init at boot Add functions that are called just before the per-section memmap is initialized and just before the memmap page structures are initialized. They are called sparse_vmemmap_init_nid_early and sparse_vmemmap_init_nid_late, respectively. This allows for mm subsystems to add calls to initialize memmap and page structures in a specific way, if using SPARSEMEM_VMEMMAP. Specifically, hugetlb can pre-HVO bootmem allocated pages that way, so that no time and resources are wasted on allocating vmemmap pages, only to free them later (and possibly unnecessarily running the system out of memory in the process). Refactor some code and export a few convenience functions for external use. In sparse_init_nid, skip any sections that are already initialized, e.g. they have been initialized by sparse_vmemmap_init_nid_early already. The hugetlb code to use these functions will be added in a later commit. Export section_map_size, as any alternate memmap init code will want to use it. The internal config option to enable this is SPARSEMEM_VMEMMAP_PREINIT, which is selected if an architecture-specific option, ARCH_WANT_HUGETLB_VMEMMAP_PREINIT, is set. In the future, if other subsystems want to do preinit too, they can do it in a similar fashion. The internal config option is there because a section flag is used, and the number of flags available is architecture-dependent (see mmzone.h). Architecures can decide if there is room for the flag when enabling options that select SPARSEMEM_VMEMMAP_PREINIT. Fortunately, as of right now, all sparse vmemmap using architectures do have room. Link: https://lkml.kernel.org/r/20250228182928.2645936-11-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Johannes Weiner Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- fs/Kconfig | 1 + include/linux/mm.h | 1 + include/linux/mmzone.h | 35 +++++++++++++++++ mm/Kconfig | 6 +++ mm/bootmem_info.c | 4 +- mm/mm_init.c | 3 ++ mm/sparse-vmemmap.c | 23 +++++++++++ mm/sparse.c | 87 ++++++++++++++++++++++++++++++++---------- 8 files changed, 138 insertions(+), 22 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 64d420e3c475..8bcd3a6f80ab 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -286,6 +286,7 @@ config HUGETLB_PAGE_OPTIMIZE_VMEMMAP def_bool HUGETLB_PAGE depends on ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP depends on SPARSEMEM_VMEMMAP + select SPARSEMEM_VMEMMAP_PREINIT if ARCH_WANT_HUGETLB_VMEMMAP_PREINIT config HUGETLB_PMD_PAGE_TABLE_SHARING def_bool HUGETLB_PAGE diff --git a/include/linux/mm.h b/include/linux/mm.h index e90ab0bdcc8c..03e4807bd911 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3928,6 +3928,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) #endif void *sparse_buffer_alloc(unsigned long size); +unsigned long section_map_size(void); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9540b41894da..44ecb2f90db4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1933,6 +1933,9 @@ enum { SECTION_IS_EARLY_BIT, #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, +#endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT + SECTION_IS_VMEMMAP_PREINIT_BIT, #endif SECTION_MAP_LAST_BIT, }; @@ -1944,6 +1947,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +#define SECTION_IS_VMEMMAP_PREINIT BIT(SECTION_IS_VMEMMAP_PREINIT_BIT) +#endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT @@ -1998,6 +2004,30 @@ static inline int online_device_section(struct mem_section *section) } #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return (section && + (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); +} + +void sparse_vmemmap_init_nid_early(int nid); +void sparse_vmemmap_init_nid_late(int nid); + +#else +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return 0; +} +static inline void sparse_vmemmap_init_nid_early(int nid) +{ +} + +static inline void sparse_vmemmap_init_nid_late(int nid) +{ +} +#endif + static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); @@ -2035,6 +2065,9 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) } #endif +void sparse_init_early_section(int nid, struct page *map, unsigned long pnum, + unsigned long flags); + #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN @@ -2116,6 +2149,8 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +#define sparse_vmemmap_init_nid_early(_nid, _use) do {} while (0) +#define sparse_vmemmap_init_nid_late(_nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ diff --git a/mm/Kconfig b/mm/Kconfig index fba9757e5814..4c1640a197a0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -442,6 +442,9 @@ config SPARSEMEM_VMEMMAP SPARSEMEM_VMEMMAP uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn operations. This is the most efficient option when sufficient kernel resources are available. + +config SPARSEMEM_VMEMMAP_PREINIT + bool # # Select this config option from the architecture Kconfig, if it is preferred # to enable the feature of HugeTLB/dev_dax vmemmap optimization. @@ -452,6 +455,9 @@ config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP bool +config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT + bool + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c index 95f288169a38..b0e2a9fa641f 100644 --- a/mm/bootmem_info.c +++ b/mm/bootmem_info.c @@ -88,7 +88,9 @@ static void __init register_page_bootmem_info_section(unsigned long start_pfn) memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); + if (!preinited_vmemmap_section(ms)) + register_page_bootmem_memmap(section_nr, memmap, + PAGES_PER_SECTION); usage = ms->usage; page = virt_to_page(usage); diff --git a/mm/mm_init.c b/mm/mm_init.c index 45bc4b55fd6a..56f4a8cfb764 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1862,6 +1862,9 @@ void __init free_area_init(unsigned long *max_zone_pfn) } } + for_each_node_state(nid, N_MEMORY) + sparse_vmemmap_init_nid_late(nid); + calc_nr_kernel_pages(); memmap_init(); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 3287ebadd167..8751c46c35e4 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -470,3 +470,26 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, return pfn_to_page(pfn); } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +/* + * This is called just before initializing sections for a NUMA node. + * Any special initialization that needs to be done before the + * generic initialization can be done from here. Sections that + * are initialized in hooks called from here will be skipped by + * the generic initialization. + */ +void __init sparse_vmemmap_init_nid_early(int nid) +{ +} + +/* + * This is called just before the initialization of page structures + * through memmap_init. Zones are now initialized, so any work that + * needs to be done that needs zone information can be done from + * here. + */ +void __init sparse_vmemmap_init_nid_late(int nid) +{ +} +#endif diff --git a/mm/sparse.c b/mm/sparse.c index 133b033d0cba..ee0234a77c7f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -408,13 +408,13 @@ static void __init check_usemap_section_nr(int nid, #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_SPARSEMEM_VMEMMAP -static unsigned long __init section_map_size(void) +unsigned long __init section_map_size(void) { return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); } #else -static unsigned long __init section_map_size(void) +unsigned long __init section_map_size(void) { return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); } @@ -495,6 +495,44 @@ void __weak __meminit vmemmap_populate_print_last(void) { } +static void *sparse_usagebuf __meminitdata; +static void *sparse_usagebuf_end __meminitdata; + +/* + * Helper function that is used for generic section initialization, and + * can also be used by any hooks added above. + */ +void __init sparse_init_early_section(int nid, struct page *map, + unsigned long pnum, unsigned long flags) +{ + BUG_ON(!sparse_usagebuf || sparse_usagebuf >= sparse_usagebuf_end); + check_usemap_section_nr(nid, sparse_usagebuf); + sparse_init_one_section(__nr_to_section(pnum), pnum, map, + sparse_usagebuf, SECTION_IS_EARLY | flags); + sparse_usagebuf = (void *)sparse_usagebuf + mem_section_usage_size(); +} + +static int __init sparse_usage_init(int nid, unsigned long map_count) +{ + unsigned long size; + + size = mem_section_usage_size() * map_count; + sparse_usagebuf = sparse_early_usemaps_alloc_pgdat_section( + NODE_DATA(nid), size); + if (!sparse_usagebuf) { + sparse_usagebuf_end = NULL; + return -ENOMEM; + } + + sparse_usagebuf_end = sparse_usagebuf + size; + return 0; +} + +static void __init sparse_usage_fini(void) +{ + sparse_usagebuf = sparse_usagebuf_end = NULL; +} + /* * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end) * And number of present sections in this node is map_count. @@ -503,47 +541,54 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count) { - struct mem_section_usage *usage; unsigned long pnum; struct page *map; + struct mem_section *ms; - usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), - mem_section_usage_size() * map_count); - if (!usage) { + if (sparse_usage_init(nid, map_count)) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } + sparse_buffer_init(map_count * section_map_size(), nid); + + sparse_vmemmap_init_nid_early(nid); + for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum); if (pnum >= pnum_end) break; - map = __populate_section_memmap(pfn, PAGES_PER_SECTION, - nid, NULL, NULL); - if (!map) { - pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", - __func__, nid); - pnum_begin = pnum; - sparse_buffer_fini(); - goto failed; + ms = __nr_to_section(pnum); + if (!preinited_vmemmap_section(ms)) { + map = __populate_section_memmap(pfn, PAGES_PER_SECTION, + nid, NULL, NULL); + if (!map) { + pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", + __func__, nid); + pnum_begin = pnum; + sparse_usage_fini(); + sparse_buffer_fini(); + goto failed; + } + sparse_init_early_section(nid, map, pnum, 0); } - check_usemap_section_nr(nid, usage); - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, - SECTION_IS_EARLY); - usage = (void *) usage + mem_section_usage_size(); } + sparse_usage_fini(); sparse_buffer_fini(); return; failed: - /* We failed to allocate, mark all the following pnums as not present */ + /* + * We failed to allocate, mark all the following pnums as not present, + * except the ones already initialized earlier. + */ for_each_present_section_nr(pnum_begin, pnum) { - struct mem_section *ms; - if (pnum >= pnum_end) break; ms = __nr_to_section(pnum); + if (!preinited_vmemmap_section(ms)) + ms->section_mem_map = 0; ms->section_mem_map = 0; } } From 3d61909cb7f89fd99523e94b7f25d34d87d86496 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:12 +0000 Subject: [PATCH 0960/2157] mm/hugetlb: set migratetype for bootmem folios The pageblocks that back memblock allocated hugetlb folios might not have the migrate type set, in the CONFIG_DEFERRED_STRUCT_PAGE_INIT case. memblock allocated hugetlb folios might be given to the buddy allocator eventually (if nr_hugepages is lowered), so make sure that the migrate type for the pageblocks contained in them is set when initializing them. Set it to the default that memmap init also uses (MIGRATE_MOVABLE). Link: https://lkml.kernel.org/r/20250228182928.2645936-12-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d1178059a52b..00facd2123e5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3266,6 +3266,26 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, prep_compound_head((struct page *)folio, huge_page_order(h)); } +/* + * memblock-allocated pageblocks might not have the migrate type set + * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE) + * here. + * + * Note that this will not write the page struct, it is ok (and necessary) + * to do this on vmemmap optimized folios. + */ +static void __init hugetlb_bootmem_init_migratetype(struct folio *folio, + struct hstate *h) +{ + unsigned long nr_pages = pages_per_huge_page(h), i; + + WARN_ON_ONCE(!pageblock_aligned(folio_pfn(folio))); + + for (i = 0; i < nr_pages; i += pageblock_nr_pages) + set_pageblock_migratetype(folio_page(folio, i), + MIGRATE_MOVABLE); +} + static void __init prep_and_add_bootmem_folios(struct hstate *h, struct list_head *folio_list) { @@ -3287,6 +3307,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, HUGETLB_VMEMMAP_RESERVE_PAGES, pages_per_huge_page(h)); } + hugetlb_bootmem_init_migratetype(folio, h); /* Subdivide locks to achieve better parallel performance */ spin_lock_irqsave(&hugetlb_lock, flags); __prep_account_new_huge_page(h, folio_nid(folio)); From d69d8261a990843514c40aeef36a14add71daf4e Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:13 +0000 Subject: [PATCH 0961/2157] mm: define __init_reserved_page_zone function Sometimes page structs must be unconditionally initialized as reserved, regardless of DEFERRED_STRUCT_PAGE_INIT. Define a function, __init_reserved_page_zone, containing code that already did all of the work in init_reserved_page, and make it available for use. Link: https://lkml.kernel.org/r/20250228182928.2645936-13-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/internal.h | 1 + mm/mm_init.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 20b3535935a3..780c17b4003a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1449,6 +1449,7 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); +void __meminit __init_reserved_page_zone(unsigned long pfn, int nid); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, diff --git a/mm/mm_init.c b/mm/mm_init.c index 56f4a8cfb764..419b7db220d2 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -650,6 +650,28 @@ static inline void fixup_hashdist(void) static inline void fixup_hashdist(void) {} #endif /* CONFIG_NUMA */ +/* + * Initialize a reserved page unconditionally, finding its zone first. + */ +void __meminit __init_reserved_page_zone(unsigned long pfn, int nid) +{ + pg_data_t *pgdat; + int zid; + + pgdat = NODE_DATA(nid); + + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &pgdat->node_zones[zid]; + + if (zone_spans_pfn(zone, pfn)) + break; + } + __init_single_page(pfn_to_page(pfn), pfn, zid, nid); + + if (pageblock_aligned(pfn)) + set_pageblock_migratetype(pfn_to_page(pfn), MIGRATE_MOVABLE); +} + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static inline void pgdat_set_deferred_range(pg_data_t *pgdat) { @@ -708,24 +730,10 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) static void __meminit init_reserved_page(unsigned long pfn, int nid) { - pg_data_t *pgdat; - int zid; - if (early_page_initialised(pfn, nid)) return; - pgdat = NODE_DATA(nid); - - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; - - if (zone_spans_pfn(zone, pfn)) - break; - } - __init_single_page(pfn_to_page(pfn), pfn, zid, nid); - - if (pageblock_aligned(pfn)) - set_pageblock_migratetype(pfn_to_page(pfn), MIGRATE_MOVABLE); + __init_reserved_page_zone(pfn, nid); } #else static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} From 14ed3a595fa4e8f5bceddb91cbcd1ba566c9669b Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:14 +0000 Subject: [PATCH 0962/2157] mm/hugetlb: check bootmem pages for zone intersections Bootmem hugetlb pages are allocated using memblock, which isn't (and mostly can't be) aware of zones. So, they may end up crossing zone boundaries. This would create confusion, a hugetlb page that is part of multiple zones is bad. Worse, HVO might then end up stealthily re-assigning pages to a different zone when a hugetlb page is freed, since the tail page structures beyond the first vmemmap page would inherit the zone of the first page structures. While the chance of this happening is low, you can definitely create a configuration where this happens (especially using ZONE_MOVABLE). To avoid this issue, check if bootmem hugetlb pages intersect with multiple zones during the gather phase, and discard them, handing them to the page allocator, if they do. Record the number of invalid bootmem pages per node and subtract them from the number of available pages at the end, making it easier to do these checks in multiple places later on. Link: https://lkml.kernel.org/r/20250228182928.2645936-14-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- mm/internal.h | 2 ++ mm/mm_init.c | 25 +++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 00facd2123e5..e4bf06f13178 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -62,6 +62,7 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; static unsigned long hugetlb_cma_size __initdata; __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; +static unsigned long hstate_boot_nrinvalid[HUGE_MAX_HSTATE] __initdata; /* * Due to ordering constraints across the init code for various @@ -3316,6 +3317,44 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, } } +static bool __init hugetlb_bootmem_page_zones_valid(int nid, + struct huge_bootmem_page *m) +{ + unsigned long start_pfn; + bool valid; + + start_pfn = virt_to_phys(m) >> PAGE_SHIFT; + + valid = !pfn_range_intersects_zones(nid, start_pfn, + pages_per_huge_page(m->hstate)); + if (!valid) + hstate_boot_nrinvalid[hstate_index(m->hstate)]++; + + return valid; +} + +/* + * Free a bootmem page that was found to be invalid (intersecting with + * multiple zones). + * + * Since it intersects with multiple zones, we can't just do a free + * operation on all pages at once, but instead have to walk all + * pages, freeing them one by one. + */ +static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page, + struct hstate *h) +{ + unsigned long npages = pages_per_huge_page(h); + unsigned long pfn; + + while (npages--) { + pfn = page_to_pfn(page); + __init_reserved_page_zone(pfn, nid); + free_reserved_page(page); + page++; + } +} + /* * Put bootmem huge pages into the standard lists after mem_map is up. * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. @@ -3323,14 +3362,25 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, static void __init gather_bootmem_prealloc_node(unsigned long nid) { LIST_HEAD(folio_list); - struct huge_bootmem_page *m; + struct huge_bootmem_page *m, *tm; struct hstate *h = NULL, *prev_h = NULL; - list_for_each_entry(m, &huge_boot_pages[nid], list) { + list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) { struct page *page = virt_to_page(m); struct folio *folio = (void *)page; h = m->hstate; + if (!hugetlb_bootmem_page_zones_valid(nid, m)) { + /* + * Can't use this page. Initialize the + * page structures if that hasn't already + * been done, and give them to the page + * allocator. + */ + hugetlb_bootmem_free_invalid_page(nid, page, h); + continue; + } + /* * It is possible to have multiple huge page sizes (hstates) * in this list. If so, process each size separately. @@ -3602,13 +3652,20 @@ static void __init hugetlb_init_hstates(void) static void __init report_hugepages(void) { struct hstate *h; + unsigned long nrinvalid; for_each_hstate(h) { char buf[32]; + nrinvalid = hstate_boot_nrinvalid[hstate_index(h)]; + h->max_huge_pages -= nrinvalid; + string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n", buf, h->free_huge_pages); + if (nrinvalid) + pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n", + buf, nrinvalid, nrinvalid > 1 ? "s" : ""); pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n", hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf); } diff --git a/mm/internal.h b/mm/internal.h index 780c17b4003a..8233c207d3f3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -658,6 +658,8 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, } void set_zone_contiguous(struct zone *zone); +bool pfn_range_intersects_zones(int nid, unsigned long start_pfn, + unsigned long nr_pages); static inline void clear_zone_contiguous(struct zone *zone) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 419b7db220d2..3eec528afe43 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2287,6 +2287,31 @@ void set_zone_contiguous(struct zone *zone) zone->contiguous = true; } +/* + * Check if a PFN range intersects multiple zones on one or more + * NUMA nodes. Specify the @nid argument if it is known that this + * PFN range is on one node, NUMA_NO_NODE otherwise. + */ +bool pfn_range_intersects_zones(int nid, unsigned long start_pfn, + unsigned long nr_pages) +{ + struct zone *zone, *izone = NULL; + + for_each_zone(zone) { + if (nid != NUMA_NO_NODE && zone_to_nid(zone) != nid) + continue; + + if (zone_intersects(zone, start_pfn, nr_pages)) { + if (izone != NULL) + return true; + izone = zone; + } + + } + + return false; +} + static void __init mem_init_print_info(void); void __init page_alloc_init_late(void) { From 9eb6207b7812cee13431831c9661d21b53f3e93f Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:15 +0000 Subject: [PATCH 0963/2157] mm/sparse: add vmemmap_*_hvo functions Add a few functions to enable early HVO: vmemmap_populate_hvo vmemmap_undo_hvo vmemmap_wrprotect_hvo The populate and undo functions are expected to be used in early init, from the sparse_init_nid_early() function. The wrprotect function is to be used, potentially, later. To implement these functions, mostly re-use the existing compound pages vmemmap logic used by DAX. vmemmap_populate_address has its argument changed a bit in this commit: the page structure passed in to be reused in the mapping is replaced by a PFN and a flag. The flag indicates whether an extra ref should be taken on the vmemmap page containing the head page structure. Taking the ref is appropriate to for DAX / ZONE_DEVICE, but not for HugeTLB HVO. The HugeTLB vmemmap optimization maps tail page structure pages read-only. The vmemmap_wrprotect_hvo function that does this is implemented separately, because it cannot be guaranteed that reserved page structures will not be write accessed during memory initialization. Even with CONFIG_DEFERRED_STRUCT_PAGE_INIT, they might still be written to (if they are at the bottom of a zone). So, vmemmap_populate_hvo leaves the tail page structure pages RW initially, and then later during initialization, after memmap init is fully done, vmemmap_wrprotect_hvo must be called to finish the job. Subsequent commits will use these functions for early HugeTLB HVO. Link: https://lkml.kernel.org/r/20250228182928.2645936-15-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 ++- mm/sparse-vmemmap.c | 141 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 135 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 03e4807bd911..9a74a3ee68bc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3937,7 +3937,8 @@ p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap, struct page *reuse); + struct vmem_altmap *altmap, unsigned long ptpfn, + unsigned long flags); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, @@ -3953,6 +3954,12 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8751c46c35e4..8cc848c4b17c 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -30,6 +30,13 @@ #include #include +#include + +/* + * Flags for vmemmap_populate_range and friends. + */ +/* Get a ref on the head page struct page, for ZONE_DEVICE compound pages */ +#define VMEMMAP_POPULATE_PAGEREF 0x0001 #include "internal.h" @@ -144,17 +151,18 @@ void __meminit vmemmap_verify(pte_t *pte, int node, pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct vmem_altmap *altmap, - struct page *reuse) + unsigned long ptpfn, unsigned long flags) { pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(ptep_get(pte))) { pte_t entry; void *p; - if (!reuse) { + if (ptpfn == (unsigned long)-1) { p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (!p) return NULL; + ptpfn = PHYS_PFN(__pa(p)); } else { /* * When a PTE/PMD entry is freed from the init_mm @@ -165,10 +173,10 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, * and through vmemmap_populate_compound_pages() when * slab is available. */ - get_page(reuse); - p = page_to_virt(reuse); + if (flags & VMEMMAP_POPULATE_PAGEREF) + get_page(pfn_to_page(ptpfn)); } - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + entry = pfn_pte(ptpfn, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } return pte; @@ -238,7 +246,8 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, struct vmem_altmap *altmap, - struct page *reuse) + unsigned long ptpfn, + unsigned long flags) { pgd_t *pgd; p4d_t *p4d; @@ -258,7 +267,7 @@ static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, pmd = vmemmap_pmd_populate(pud, addr, node); if (!pmd) return NULL; - pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse); + pte = vmemmap_pte_populate(pmd, addr, node, altmap, ptpfn, flags); if (!pte) return NULL; vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); @@ -269,13 +278,15 @@ static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, static int __meminit vmemmap_populate_range(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap, - struct page *reuse) + unsigned long ptpfn, + unsigned long flags) { unsigned long addr = start; pte_t *pte; for (; addr < end; addr += PAGE_SIZE) { - pte = vmemmap_populate_address(addr, node, altmap, reuse); + pte = vmemmap_populate_address(addr, node, altmap, + ptpfn, flags); if (!pte) return -ENOMEM; } @@ -286,7 +297,107 @@ static int __meminit vmemmap_populate_range(unsigned long start, int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_range(start, end, node, altmap, NULL); + return vmemmap_populate_range(start, end, node, altmap, -1, 0); +} + +/* + * Undo populate_hvo, and replace it with a normal base page mapping. + * Used in memory init in case a HVO mapping needs to be undone. + * + * This can happen when it is discovered that a memblock allocated + * hugetlb page spans multiple zones, which can only be verified + * after zones have been initialized. + * + * We know that: + * 1) The first @headsize / PAGE_SIZE vmemmap pages were individually + * allocated through memblock, and mapped. + * + * 2) The rest of the vmemmap pages are mirrors of the last head page. + */ +int __meminit vmemmap_undo_hvo(unsigned long addr, unsigned long end, + int node, unsigned long headsize) +{ + unsigned long maddr, pfn; + pte_t *pte; + int headpages; + + /* + * Should only be called early in boot, so nothing will + * be accessing these page structures. + */ + WARN_ON(!early_boot_irqs_disabled); + + headpages = headsize >> PAGE_SHIFT; + + /* + * Clear mirrored mappings for tail page structs. + */ + for (maddr = addr + headsize; maddr < end; maddr += PAGE_SIZE) { + pte = virt_to_kpte(maddr); + pte_clear(&init_mm, maddr, pte); + } + + /* + * Clear and free mappings for head page and first tail page + * structs. + */ + for (maddr = addr; headpages-- > 0; maddr += PAGE_SIZE) { + pte = virt_to_kpte(maddr); + pfn = pte_pfn(ptep_get(pte)); + pte_clear(&init_mm, maddr, pte); + memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE); + } + + flush_tlb_kernel_range(addr, end); + + return vmemmap_populate(addr, end, node, NULL); +} + +/* + * Write protect the mirrored tail page structs for HVO. This will be + * called from the hugetlb code when gathering and initializing the + * memblock allocated gigantic pages. The write protect can't be + * done earlier, since it can't be guaranteed that the reserved + * page structures will not be written to during initialization, + * even if CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled. + * + * The PTEs are known to exist, and nothing else should be touching + * these pages. The caller is responsible for any TLB flushing. + */ +void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end, + int node, unsigned long headsize) +{ + unsigned long maddr; + pte_t *pte; + + for (maddr = addr + headsize; maddr < end; maddr += PAGE_SIZE) { + pte = virt_to_kpte(maddr); + ptep_set_wrprotect(&init_mm, maddr, pte); + } +} + +/* + * Populate vmemmap pages HVO-style. The first page contains the head + * page and needed tail pages, the other ones are mirrors of the first + * page. + */ +int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end, + int node, unsigned long headsize) +{ + pte_t *pte; + unsigned long maddr; + + for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) { + pte = vmemmap_populate_address(maddr, node, NULL, -1, 0); + if (!pte) + return -ENOMEM; + } + + /* + * Reuse the last page struct page mapped above for the rest. + */ + return vmemmap_populate_range(maddr, end, node, NULL, + pte_pfn(ptep_get(pte)), 0); } void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, @@ -409,7 +520,8 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, * with just tail struct pages. */ return vmemmap_populate_range(start, end, node, NULL, - pte_page(ptep_get(pte))); + pte_pfn(ptep_get(pte)), + VMEMMAP_POPULATE_PAGEREF); } size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); @@ -417,13 +529,13 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, unsigned long next, last = addr + size; /* Populate the head page vmemmap page */ - pte = vmemmap_populate_address(addr, node, NULL, NULL); + pte = vmemmap_populate_address(addr, node, NULL, -1, 0); if (!pte) return -ENOMEM; /* Populate the tail pages vmemmap page */ next = addr + PAGE_SIZE; - pte = vmemmap_populate_address(next, node, NULL, NULL); + pte = vmemmap_populate_address(next, node, NULL, -1, 0); if (!pte) return -ENOMEM; @@ -433,7 +545,8 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, */ next += PAGE_SIZE; rc = vmemmap_populate_range(next, last, node, NULL, - pte_page(ptep_get(pte))); + pte_pfn(ptep_get(pte)), + VMEMMAP_POPULATE_PAGEREF); if (rc) return -ENOMEM; } From d58b2498200724e4f8c12d71a5953da03c8c8bdf Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:16 +0000 Subject: [PATCH 0964/2157] mm/hugetlb: deal with multiple calls to hugetlb_bootmem_alloc Architectures that want pre-HVO of hugetlb vmemmap pages will need to call hugetlb_bootmem_alloc from an earlier spot in boot (before sparse_init). To facilitate some architectures doing this, protect hugetlb_bootmem_alloc against multiple calls. Also provide a helper function to check if it's been called, so that the early HVO code, to be added later, can see if there is anything to do. Link: https://lkml.kernel.org/r/20250228182928.2645936-16-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 ++++++ mm/hugetlb.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a596aaa178d1..f0ab4ca4ecf2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,6 +175,7 @@ extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages[MAX_NUMNODES]; void hugetlb_bootmem_alloc(void); +bool hugetlb_bootmem_allocated(void); /* arch callbacks */ @@ -1263,6 +1264,11 @@ static inline bool hugetlbfs_pagecache_present( static inline void hugetlb_bootmem_alloc(void) { } + +static inline bool hugetlb_bootmem_allocated(void) +{ + return false; +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e4bf06f13178..826af96455aa 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4918,16 +4918,28 @@ static int __init default_hugepagesz_setup(char *s) } hugetlb_early_param("default_hugepagesz", default_hugepagesz_setup); +static bool __hugetlb_bootmem_allocated __initdata; + +bool __init hugetlb_bootmem_allocated(void) +{ + return __hugetlb_bootmem_allocated; +} + void __init hugetlb_bootmem_alloc(void) { struct hstate *h; + if (__hugetlb_bootmem_allocated) + return; + hugetlb_parse_params(); for_each_hstate(h) { if (hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } + + __hugetlb_bootmem_allocated = true; } static unsigned int allowed_mems_nr(struct hstate *h) From 91ec71872a6d0c41abd7c53412f68111ffa060cd Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:17 +0000 Subject: [PATCH 0965/2157] mm/hugetlb: move huge_boot_pages list init to hugetlb_bootmem_alloc Instead of initializing the per-node hugetlb bootmem pages list from the alloc function, we can now do it in a somewhat cleaner way, since there is an explicit hugetlb_bootmem_alloc function. Initialize the lists there. Link: https://lkml.kernel.org/r/20250228182928.2645936-17-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 826af96455aa..f9287d87b8b7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3586,7 +3586,6 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) static void __init hugetlb_hstate_alloc_pages(struct hstate *h) { unsigned long allocated; - static bool initialized __initdata; /* skip gigantic hugepages allocation if hugetlb_cma enabled */ if (hstate_is_gigantic(h) && hugetlb_cma_size) { @@ -3594,17 +3593,6 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) return; } - /* hugetlb_hstate_alloc_pages will be called many times, initialize huge_boot_pages once */ - if (!initialized) { - int i = 0; - - for (i = 0; i < MAX_NUMNODES; i++) - INIT_LIST_HEAD(&huge_boot_pages[i]); - h->next_nid_to_alloc = first_online_node; - h->next_nid_to_free = first_online_node; - initialized = true; - } - /* do node specific alloc */ if (hugetlb_hstate_alloc_pages_specific_nodes(h)) return; @@ -4928,13 +4916,20 @@ bool __init hugetlb_bootmem_allocated(void) void __init hugetlb_bootmem_alloc(void) { struct hstate *h; + int i; if (__hugetlb_bootmem_allocated) return; + for (i = 0; i < MAX_NUMNODES; i++) + INIT_LIST_HEAD(&huge_boot_pages[i]); + hugetlb_parse_params(); for_each_hstate(h) { + h->next_nid_to_alloc = first_online_node; + h->next_nid_to_free = first_online_node; + if (hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } From 752fe17af693323dba5622b19c858a46fed219a1 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:18 +0000 Subject: [PATCH 0966/2157] mm/hugetlb: add pre-HVO framework Define flags for pre-HVOed bootmem hugetlb pages, and act on them. The most important flag is the HVO flag, signalling that a bootmem allocated gigantic page has already been HVO-ed. If this flag is seen by the hugetlb bootmem gather code, the page is marked as HVO optimized. The HVO code will then not try to optimize it again. Instead, it will just map the tail page mirror pages read-only, completing the HVO steps. No functional change, as nothing sets the flags yet. Link: https://lkml.kernel.org/r/20250228182928.2645936-18-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/powerpc/mm/hugetlbpage.c | 1 + include/linux/hugetlb.h | 4 +++ mm/hugetlb.c | 24 ++++++++++++++++- mm/hugetlb_vmemmap.c | 50 +++++++++++++++++++++++++++++++++-- mm/hugetlb_vmemmap.h | 7 +++++ 5 files changed, 83 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 6b043180220a..d3c1b749dcfc 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -113,6 +113,7 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) gpage_freearray[nr_gpages] = 0; list_add(&m->list, &huge_boot_pages[0]); m->hstate = hstate; + m->flags = 0; return 1; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f0ab4ca4ecf2..bbccc3e6b9dd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -681,8 +681,12 @@ struct hstate { struct huge_bootmem_page { struct list_head list; struct hstate *hstate; + unsigned long flags; }; +#define HUGE_BOOTMEM_HVO 0x0001 +#define HUGE_BOOTMEM_ZONES_VALID 0x0002 + int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); void wait_for_freed_hugetlb_folios(void); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f9287d87b8b7..db0d35bc9b9b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3227,6 +3227,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages[node]); m->hstate = h; + m->flags = 0; return 1; } @@ -3294,7 +3295,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, struct folio *folio, *tmp_f; /* Send list for bulk vmemmap optimization processing */ - hugetlb_vmemmap_optimize_folios(h, folio_list); + hugetlb_vmemmap_optimize_bootmem_folios(h, folio_list); list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { if (!folio_test_hugetlb_vmemmap_optimized(folio)) { @@ -3323,6 +3324,13 @@ static bool __init hugetlb_bootmem_page_zones_valid(int nid, unsigned long start_pfn; bool valid; + if (m->flags & HUGE_BOOTMEM_ZONES_VALID) { + /* + * Already validated, skip check. + */ + return true; + } + start_pfn = virt_to_phys(m) >> PAGE_SHIFT; valid = !pfn_range_intersects_zones(nid, start_pfn, @@ -3355,6 +3363,11 @@ static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page, } } +static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) +{ + return (m->flags & HUGE_BOOTMEM_HVO); +} + /* * Put bootmem huge pages into the standard lists after mem_map is up. * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. @@ -3395,6 +3408,15 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES); init_new_hugetlb_folio(h, folio); + + if (hugetlb_bootmem_page_prehvo(m)) + /* + * If pre-HVO was done, just set the + * flag, the HVO code will then skip + * this folio. + */ + folio_set_hugetlb_vmemmap_optimized(folio); + list_add(&folio->lru, &folio_list); /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 5b484758f813..be6b33ecbc8e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -649,14 +649,39 @@ static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *fol return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); } -void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) +static void __hugetlb_vmemmap_optimize_folios(struct hstate *h, + struct list_head *folio_list, + bool boot) { struct folio *folio; + int nr_to_optimize; LIST_HEAD(vmemmap_pages); unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; + nr_to_optimize = 0; list_for_each_entry(folio, folio_list, lru) { - int ret = hugetlb_vmemmap_split_folio(h, folio); + int ret; + unsigned long spfn, epfn; + + if (boot && folio_test_hugetlb_vmemmap_optimized(folio)) { + /* + * Already optimized by pre-HVO, just map the + * mirrored tail page structs RO. + */ + spfn = (unsigned long)&folio->page; + epfn = spfn + pages_per_huge_page(h); + vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio), + HUGETLB_VMEMMAP_RESERVE_SIZE); + register_page_bootmem_memmap(pfn_to_section_nr(spfn), + &folio->page, + HUGETLB_VMEMMAP_RESERVE_SIZE); + static_branch_inc(&hugetlb_optimize_vmemmap_key); + continue; + } + + nr_to_optimize++; + + ret = hugetlb_vmemmap_split_folio(h, folio); /* * Spliting the PMD requires allocating a page, thus lets fail @@ -668,6 +693,16 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l break; } + if (!nr_to_optimize) + /* + * All pre-HVO folios, nothing left to do. It's ok if + * there is a mix of pre-HVO and not yet HVO-ed folios + * here, as __hugetlb_vmemmap_optimize_folio() will + * skip any folios that already have the optimized flag + * set, see vmemmap_should_optimize_folio(). + */ + goto out; + flush_tlb_all(); list_for_each_entry(folio, folio_list, lru) { @@ -693,10 +728,21 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l } } +out: flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); } +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) +{ + __hugetlb_vmemmap_optimize_folios(h, folio_list, false); +} + +void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list) +{ + __hugetlb_vmemmap_optimize_folios(h, folio_list, true); +} + static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 2fcae92d3359..71110a90275f 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -24,6 +24,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, struct list_head *non_hvo_folios); void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio); void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); +void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list); + static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) { @@ -64,6 +66,11 @@ static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list { } +static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, + struct list_head *folio_list) +{ +} + static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) { return 0; From eefd3d024a53c5d45e7e70a8677257b035e4de52 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:19 +0000 Subject: [PATCH 0967/2157] mm/hugetlb_vmemmap: fix hugetlb_vmemmap_restore_folios definition Make the hugetlb_vmemmap_restore_folios definition inline for the !CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP case, so that including this file in files other than hugetlb_vmemmap.c will work. Link: https://lkml.kernel.org/r/20250228182928.2645936-19-fvdl@google.com Fixes: cfb8c75099db ("hugetlb: perform vmemmap restoration on a list of pages") Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 71110a90275f..62d3d645a793 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -50,7 +50,7 @@ static inline int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct f return 0; } -static long hugetlb_vmemmap_restore_folios(const struct hstate *h, +static inline long hugetlb_vmemmap_restore_folios(const struct hstate *h, struct list_head *folio_list, struct list_head *non_hvo_folios) { From b1222550fbf73840e31a103305d03ad53b8f5a59 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:20 +0000 Subject: [PATCH 0968/2157] mm/hugetlb: do pre-HVO for bootmem allocated pages For large systems, the overhead of vmemmap pages for hugetlb is substantial. It's about 1.5% of memory, which is about 45G for a 3T system. If you want to configure most of that system for hugetlb (e.g. to use as backing memory for VMs), there is a chance of running out of memory on boot, even though you know that the 45G will become available later. To avoid this scenario, and since it's a waste to first allocate and then free that 45G during boot, do pre-HVO for hugetlb bootmem allocated pages ('gigantic' pages). pre-HVO is done by adding functions that are called from sparse_init_nid_early and sparse_init_nid_late. The first is called before memmap allocation, so it takes care of allocating memmap HVO-style. The second verifies that all bootmem pages look good, specifically it checks that they do not intersect with multiple zones. This can only be done from sparse_init_nid_late path, when zones have been initialized. The hugetlb page size must be aligned to the section size, and aligned to the size of memory described by the number of page structures contained in one PMD (since pre-HVO is not prepared to split PMDs). This should be true for most 'gigantic' pages, it is for 1G pages on x86, where both of these alignment requirements are 128M. This will only have an effect if hugetlb_bootmem_alloc was called early in boot. If not, it won't do anything, and HVO for bootmem hugetlb pages works as before. Link: https://lkml.kernel.org/r/20250228182928.2645936-20-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 + mm/hugetlb.c | 17 ++++- mm/hugetlb_vmemmap.c | 143 ++++++++++++++++++++++++++++++++++++++++ mm/hugetlb_vmemmap.h | 14 ++++ mm/sparse-vmemmap.c | 4 ++ 5 files changed, 177 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bbccc3e6b9dd..f6b82b0524ed 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -687,6 +687,8 @@ struct huge_bootmem_page { #define HUGE_BOOTMEM_HVO 0x0001 #define HUGE_BOOTMEM_ZONES_VALID 0x0002 +bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); + int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); void wait_for_freed_hugetlb_folios(void); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index db0d35bc9b9b..d1134e915927 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3223,7 +3223,18 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) */ memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), huge_page_size(h) - PAGE_SIZE); - /* Put them into a private list first because mem_map is not up yet */ + + /* + * Put them into a private list first because mem_map is not up yet. + * + * For pre-HVO to work correctly, pages need to be on the list for + * the node they were actually allocated from. That node may be + * different in the case of fallback by memblock_alloc_try_nid_raw. + * So, extract the actual node first. + */ + if (nid == NUMA_NO_NODE) + node = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); + INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages[node]); m->hstate = h; @@ -3318,8 +3329,8 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, } } -static bool __init hugetlb_bootmem_page_zones_valid(int nid, - struct huge_bootmem_page *m) +bool __init hugetlb_bootmem_page_zones_valid(int nid, + struct huge_bootmem_page *m) { unsigned long start_pfn; bool valid; diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index be6b33ecbc8e..9a99dfa3c495 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -743,6 +743,149 @@ void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head __hugetlb_vmemmap_optimize_folios(h, folio_list, true); } +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT + +/* Return true of a bootmem allocated HugeTLB page should be pre-HVO-ed */ +static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m) +{ + unsigned long section_size, psize, pmd_vmemmap_size; + phys_addr_t paddr; + + if (!READ_ONCE(vmemmap_optimize_enabled)) + return false; + + if (!hugetlb_vmemmap_optimizable(m->hstate)) + return false; + + psize = huge_page_size(m->hstate); + paddr = virt_to_phys(m); + + /* + * Pre-HVO only works if the bootmem huge page + * is aligned to the section size. + */ + section_size = (1UL << PA_SECTION_SHIFT); + if (!IS_ALIGNED(paddr, section_size) || + !IS_ALIGNED(psize, section_size)) + return false; + + /* + * The pre-HVO code does not deal with splitting PMDS, + * so the bootmem page must be aligned to the number + * of base pages that can be mapped with one vmemmap PMD. + */ + pmd_vmemmap_size = (PMD_SIZE / (sizeof(struct page))) << PAGE_SHIFT; + if (!IS_ALIGNED(paddr, pmd_vmemmap_size) || + !IS_ALIGNED(psize, pmd_vmemmap_size)) + return false; + + return true; +} + +/* + * Initialize memmap section for a gigantic page, HVO-style. + */ +void __init hugetlb_vmemmap_init_early(int nid) +{ + unsigned long psize, paddr, section_size; + unsigned long ns, i, pnum, pfn, nr_pages; + unsigned long start, end; + struct huge_bootmem_page *m = NULL; + void *map; + + /* + * Noting to do if bootmem pages were not allocated + * early in boot, or if HVO wasn't enabled in the + * first place. + */ + if (!hugetlb_bootmem_allocated()) + return; + + if (!READ_ONCE(vmemmap_optimize_enabled)) + return; + + section_size = (1UL << PA_SECTION_SHIFT); + + list_for_each_entry(m, &huge_boot_pages[nid], list) { + if (!vmemmap_should_optimize_bootmem_page(m)) + continue; + + nr_pages = pages_per_huge_page(m->hstate); + psize = nr_pages << PAGE_SHIFT; + paddr = virt_to_phys(m); + pfn = PHYS_PFN(paddr); + map = pfn_to_page(pfn); + start = (unsigned long)map; + end = start + nr_pages * sizeof(struct page); + + if (vmemmap_populate_hvo(start, end, nid, + HUGETLB_VMEMMAP_RESERVE_SIZE) < 0) + continue; + + memmap_boot_pages_add(HUGETLB_VMEMMAP_RESERVE_SIZE / PAGE_SIZE); + + pnum = pfn_to_section_nr(pfn); + ns = psize / section_size; + + for (i = 0; i < ns; i++) { + sparse_init_early_section(nid, map, pnum, + SECTION_IS_VMEMMAP_PREINIT); + map += section_map_size(); + pnum++; + } + + m->flags |= HUGE_BOOTMEM_HVO; + } +} + +void __init hugetlb_vmemmap_init_late(int nid) +{ + struct huge_bootmem_page *m, *tm; + unsigned long phys, nr_pages, start, end; + unsigned long pfn, nr_mmap; + struct hstate *h; + void *map; + + if (!hugetlb_bootmem_allocated()) + return; + + if (!READ_ONCE(vmemmap_optimize_enabled)) + return; + + list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) { + if (!(m->flags & HUGE_BOOTMEM_HVO)) + continue; + + phys = virt_to_phys(m); + h = m->hstate; + pfn = PHYS_PFN(phys); + nr_pages = pages_per_huge_page(h); + + if (!hugetlb_bootmem_page_zones_valid(nid, m)) { + /* + * Oops, the hugetlb page spans multiple zones. + * Remove it from the list, and undo HVO. + */ + list_del(&m->list); + + map = pfn_to_page(pfn); + + start = (unsigned long)map; + end = start + nr_pages * sizeof(struct page); + + vmemmap_undo_hvo(start, end, nid, + HUGETLB_VMEMMAP_RESERVE_SIZE); + nr_mmap = end - start - HUGETLB_VMEMMAP_RESERVE_SIZE; + memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE)); + + memblock_phys_free(phys, huge_page_size(h)); + continue; + } else + m->flags |= HUGE_BOOTMEM_ZONES_VALID; + } +} +#endif + static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 62d3d645a793..18b490825215 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -9,6 +9,8 @@ #ifndef _LINUX_HUGETLB_VMEMMAP_H #define _LINUX_HUGETLB_VMEMMAP_H #include +#include +#include /* * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See @@ -25,6 +27,10 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio); void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list); +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +void hugetlb_vmemmap_init_early(int nid); +void hugetlb_vmemmap_init_late(int nid); +#endif static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) @@ -71,6 +77,14 @@ static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, { } +static inline void hugetlb_vmemmap_init_early(int nid) +{ +} + +static inline void hugetlb_vmemmap_init_late(int nid) +{ +} + static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) { return 0; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8cc848c4b17c..fd2ab5118e13 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -32,6 +32,8 @@ #include #include +#include "hugetlb_vmemmap.h" + /* * Flags for vmemmap_populate_range and friends. */ @@ -594,6 +596,7 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, */ void __init sparse_vmemmap_init_nid_early(int nid) { + hugetlb_vmemmap_init_early(nid); } /* @@ -604,5 +607,6 @@ void __init sparse_vmemmap_init_nid_early(int nid) */ void __init sparse_vmemmap_init_nid_late(int nid) { + hugetlb_vmemmap_init_late(nid); } #endif From 665eaf313314432b5bbb5f71ec71e275ff2358e0 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:21 +0000 Subject: [PATCH 0969/2157] x86/setup: call hugetlb_bootmem_alloc early Call hugetlb_bootmem_allloc in an earlier spot in setup, after hugelb_cma_reserve. This will make vmemmap preinit of the sections covered by the allocated hugetlb pages possible. Link: https://lkml.kernel.org/r/20250228182928.2645936-21-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Alexander Gordeev Cc: Arnd Bergmann Cc: Dan Carpenter Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/x86/kernel/setup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cebee310e200..ff8604007b08 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1108,8 +1108,10 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); - if (boot_cpu_has(X86_FEATURE_GBPAGES)) + if (boot_cpu_has(X86_FEATURE_GBPAGES)) { hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + hugetlb_bootmem_alloc(); + } /* * Reserve memory for crash kernel after SRAT is parsed so that it From 08efe293503098b539cb18f55528176b6b559348 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:22 +0000 Subject: [PATCH 0970/2157] x86/mm: set ARCH_WANT_HUGETLB_VMEMMAP_PREINIT Now that hugetlb bootmem pages are allocated earlier, and available for section preinit (HVO-style), set ARCH_WANT_HUGETLB_VMEMMAP_PREINIT for x86_64, so that is can be done. This enables pre-HVO on x86_64. Link: https://lkml.kernel.org/r/20250228182928.2645936-22-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Johannes Weiner Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0e27ebd7e36a..cf49c130d1d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -146,6 +146,7 @@ config X86 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 + select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64 select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT From b51d3db91d4d358f9787e3e75e0c79eb858e8197 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:23 +0000 Subject: [PATCH 0971/2157] mm/cma: simplify zone intersection check cma_activate_area walks all pages in the area, checking their zone individually to see if the area resides in more than one zone. Make this a little more efficient by using the recently introduced pfn_range_intersects_zones() function. Store the NUMA node id (if any) in the cma structure to facilitate this. Link: https://lkml.kernel.org/r/20250228182928.2645936-23-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/cma.c | 13 ++++++------- mm/cma.h | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 8dc46bfa3819..61ad4fd2f62d 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -103,7 +103,6 @@ static void __init cma_activate_area(struct cma *cma) { unsigned long pfn, base_pfn; int allocrange, r; - struct zone *zone; struct cma_memrange *cmr; for (allocrange = 0; allocrange < cma->nranges; allocrange++) { @@ -124,12 +123,8 @@ static void __init cma_activate_area(struct cma *cma) * CMA resv range to be in the same zone. */ WARN_ON_ONCE(!pfn_valid(base_pfn)); - zone = page_zone(pfn_to_page(base_pfn)); - for (pfn = base_pfn + 1; pfn < base_pfn + cmr->count; pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - if (page_zone(pfn_to_page(pfn)) != zone) - goto cleanup; - } + if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) + goto cleanup; for (pfn = base_pfn; pfn < base_pfn + cmr->count; pfn += pageblock_nr_pages) @@ -261,6 +256,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, cma->ranges[0].base_pfn = PFN_DOWN(base); cma->ranges[0].count = cma->count; cma->nranges = 1; + cma->nid = NUMA_NO_NODE; *res_cma = cma; @@ -497,6 +493,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, } cma->nranges = nr; + cma->nid = nid; *res_cma = cma; out: @@ -684,6 +681,8 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t base, if (ret) memblock_phys_free(base, size); + (*res_cma)->nid = nid; + return ret; } diff --git a/mm/cma.h b/mm/cma.h index 5f39dd1aac91..ff79dba5508c 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -50,6 +50,8 @@ struct cma { struct cma_kobject *cma_kobj; #endif bool reserve_pages_on_error; + /* NUMA node (NUMA_NO_NODE if unspecified) */ + int nid; }; extern struct cma cma_areas[MAX_CMA_AREAS]; From 9320fa2717810a7d3451dd1a18c31f986a1d4068 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:24 +0000 Subject: [PATCH 0972/2157] mm/cma: introduce a cma validate function Define a function to check if a CMA area is valid, which means: do its ranges not cross any zone boundaries. Store the result in the newly created flags for each CMA area, so that multiple calls are dealt with. This allows for checking the validity of a CMA area early, which is needed later in order to be able to allocate hugetlb bootmem pages from it with pre-HVO. Link: https://lkml.kernel.org/r/20250228182928.2645936-24-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/cma.h | 5 ++++ mm/cma.c | 60 ++++++++++++++++++++++++++++++++++++--------- mm/cma.h | 8 +++++- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 03d85c100dcc..62d9c1cf6326 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -60,6 +60,7 @@ extern void cma_reserve_pages_on_error(struct cma *cma); #ifdef CONFIG_CMA struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); +bool cma_validate_zones(struct cma *cma); #else static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) { @@ -70,6 +71,10 @@ static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) { return false; } +static inline bool cma_validate_zones(struct cma *cma) +{ + return false; +} #endif #endif diff --git a/mm/cma.c b/mm/cma.c index 61ad4fd2f62d..5e1d169e24fa 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -99,6 +99,49 @@ static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr, spin_unlock_irqrestore(&cma->lock, flags); } +/* + * Check if a CMA area contains no ranges that intersect with + * multiple zones. Store the result in the flags in case + * this gets called more than once. + */ +bool cma_validate_zones(struct cma *cma) +{ + int r; + unsigned long base_pfn; + struct cma_memrange *cmr; + bool valid_bit_set; + + /* + * If already validated, return result of previous check. + * Either the valid or invalid bit will be set if this + * check has already been done. If neither is set, the + * check has not been performed yet. + */ + valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags); + if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags)) + return valid_bit_set; + + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + base_pfn = cmr->base_pfn; + + /* + * alloc_contig_range() requires the pfn range specified + * to be in the same zone. Simplify by forcing the entire + * CMA resv range to be in the same zone. + */ + WARN_ON_ONCE(!pfn_valid(base_pfn)); + if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) { + set_bit(CMA_ZONES_INVALID, &cma->flags); + return false; + } + } + + set_bit(CMA_ZONES_VALID, &cma->flags); + + return true; +} + static void __init cma_activate_area(struct cma *cma) { unsigned long pfn, base_pfn; @@ -113,19 +156,12 @@ static void __init cma_activate_area(struct cma *cma) goto cleanup; } + if (!cma_validate_zones(cma)) + goto cleanup; + for (r = 0; r < cma->nranges; r++) { cmr = &cma->ranges[r]; base_pfn = cmr->base_pfn; - - /* - * alloc_contig_range() requires the pfn range specified - * to be in the same zone. Simplify by forcing the entire - * CMA resv range to be in the same zone. - */ - WARN_ON_ONCE(!pfn_valid(base_pfn)); - if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) - goto cleanup; - for (pfn = base_pfn; pfn < base_pfn + cmr->count; pfn += pageblock_nr_pages) init_cma_reserved_pageblock(pfn_to_page(pfn)); @@ -145,7 +181,7 @@ static void __init cma_activate_area(struct cma *cma) bitmap_free(cma->ranges[r].bitmap); /* Expose all pages to the buddy, they are useless for CMA. */ - if (!cma->reserve_pages_on_error) { + if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) { for (r = 0; r < allocrange; r++) { cmr = &cma->ranges[r]; for (pfn = cmr->base_pfn; @@ -172,7 +208,7 @@ core_initcall(cma_init_reserved_areas); void __init cma_reserve_pages_on_error(struct cma *cma) { - cma->reserve_pages_on_error = true; + set_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags); } static int __init cma_new_area(const char *name, phys_addr_t size, diff --git a/mm/cma.h b/mm/cma.h index ff79dba5508c..bddc84b3cd96 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -49,11 +49,17 @@ struct cma { /* kobject requires dynamic object */ struct cma_kobject *cma_kobj; #endif - bool reserve_pages_on_error; + unsigned long flags; /* NUMA node (NUMA_NO_NODE if unspecified) */ int nid; }; +enum cma_flags { + CMA_RESERVE_PAGES_ON_ERROR, + CMA_ZONES_VALID, + CMA_ZONES_INVALID, +}; + extern struct cma cma_areas[MAX_CMA_AREAS]; extern unsigned int cma_area_count; From 85abcd023640067fcbb6e23c45e8a0014dbba11d Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:25 +0000 Subject: [PATCH 0973/2157] mm/cma: introduce interface for early reservations It can be desirable to reserve memory in a CMA area before it is activated, early in boot. Such reservations would effectively be memblock allocations, but they can be returned to the CMA area later. This functionality can be used to allow hugetlb bootmem allocations from a hugetlb CMA area. A new interface, cma_reserve_early is introduced. This allows for pageblock-aligned reservations. These reservations are skipped during the initial handoff of pages in a CMA area to the buddy allocator. The caller is responsible for making sure that the page structures are set up, and that the migrate type is set correctly, as with other memblock allocations that stick around. If the CMA area fails to activate (because it intersects with multiple zones), the reserved memory is not given to the buddy allocator, the caller needs to take care of that. Link: https://lkml.kernel.org/r/20250228182928.2645936-25-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/cma.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++----- mm/cma.h | 8 +++++ mm/internal.h | 16 ++++++++++ mm/mm_init.c | 9 ++++++ 4 files changed, 109 insertions(+), 7 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 5e1d169e24fa..09322b8284bd 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -144,9 +144,10 @@ bool cma_validate_zones(struct cma *cma) static void __init cma_activate_area(struct cma *cma) { - unsigned long pfn, base_pfn; + unsigned long pfn, end_pfn; int allocrange, r; struct cma_memrange *cmr; + unsigned long bitmap_count, count; for (allocrange = 0; allocrange < cma->nranges; allocrange++) { cmr = &cma->ranges[allocrange]; @@ -161,8 +162,13 @@ static void __init cma_activate_area(struct cma *cma) for (r = 0; r < cma->nranges; r++) { cmr = &cma->ranges[r]; - base_pfn = cmr->base_pfn; - for (pfn = base_pfn; pfn < base_pfn + cmr->count; + if (cmr->early_pfn != cmr->base_pfn) { + count = cmr->early_pfn - cmr->base_pfn; + bitmap_count = cma_bitmap_pages_to_bits(cma, count); + bitmap_set(cmr->bitmap, 0, bitmap_count); + } + + for (pfn = cmr->early_pfn; pfn < cmr->base_pfn + cmr->count; pfn += pageblock_nr_pages) init_cma_reserved_pageblock(pfn_to_page(pfn)); } @@ -173,6 +179,7 @@ static void __init cma_activate_area(struct cma *cma) INIT_HLIST_HEAD(&cma->mem_head); spin_lock_init(&cma->mem_head_lock); #endif + set_bit(CMA_ACTIVATED, &cma->flags); return; @@ -184,9 +191,8 @@ static void __init cma_activate_area(struct cma *cma) if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) { for (r = 0; r < allocrange; r++) { cmr = &cma->ranges[r]; - for (pfn = cmr->base_pfn; - pfn < cmr->base_pfn + cmr->count; - pfn++) + end_pfn = cmr->base_pfn + cmr->count; + for (pfn = cmr->early_pfn; pfn < end_pfn; pfn++) free_reserved_page(pfn_to_page(pfn)); } } @@ -290,6 +296,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, return ret; cma->ranges[0].base_pfn = PFN_DOWN(base); + cma->ranges[0].early_pfn = PFN_DOWN(base); cma->ranges[0].count = cma->count; cma->nranges = 1; cma->nid = NUMA_NO_NODE; @@ -509,6 +516,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, nr, (u64)mlp->base, (u64)mlp->base + size); cmrp = &cma->ranges[nr++]; cmrp->base_pfn = PHYS_PFN(mlp->base); + cmrp->early_pfn = cmrp->base_pfn; cmrp->count = size >> PAGE_SHIFT; sizeleft -= size; @@ -540,7 +548,6 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, pr_info("Reserved %lu MiB in %d range%s\n", (unsigned long)total_size / SZ_1M, nr, nr > 1 ? "s" : ""); - return ret; } @@ -1034,3 +1041,65 @@ bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end) return false; } + +/* + * Very basic function to reserve memory from a CMA area that has not + * yet been activated. This is expected to be called early, when the + * system is single-threaded, so there is no locking. The alignment + * checking is restrictive - only pageblock-aligned areas + * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function. + * This keeps things simple, and is enough for the current use case. + * + * The CMA bitmaps have not yet been allocated, so just start + * reserving from the bottom up, using a PFN to keep track + * of what has been reserved. Unreserving is not possible. + * + * The caller is responsible for initializing the page structures + * in the area properly, since this just points to memblock-allocated + * memory. The caller should subsequently use init_cma_pageblock to + * set the migrate type and CMA stats the pageblocks that were reserved. + * + * If the CMA area fails to activate later, memory obtained through + * this interface is not handed to the page allocator, this is + * the responsibility of the caller (e.g. like normal memblock-allocated + * memory). + */ +void __init *cma_reserve_early(struct cma *cma, unsigned long size) +{ + int r; + struct cma_memrange *cmr; + unsigned long available; + void *ret = NULL; + + if (!cma || !cma->count) + return NULL; + /* + * Can only be called early in init. + */ + if (test_bit(CMA_ACTIVATED, &cma->flags)) + return NULL; + + if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES)) + return NULL; + + if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit))) + return NULL; + + size >>= PAGE_SHIFT; + + if (size > cma->available_count) + return NULL; + + for (r = 0; r < cma->nranges; r++) { + cmr = &cma->ranges[r]; + available = cmr->count - (cmr->early_pfn - cmr->base_pfn); + if (size <= available) { + ret = phys_to_virt(PFN_PHYS(cmr->early_pfn)); + cmr->early_pfn += size; + cma->available_count -= size; + return ret; + } + } + + return ret; +} diff --git a/mm/cma.h b/mm/cma.h index bddc84b3cd96..df7fc623b7a6 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -16,9 +16,16 @@ struct cma_kobject { * and the total amount of memory requested, while smaller than the total * amount of memory available, is large enough that it doesn't fit in a * single physical memory range because of memory holes. + * + * Fields: + * @base_pfn: physical address of range + * @early_pfn: first PFN not reserved through cma_reserve_early + * @count: size of range + * @bitmap: bitmap of allocated (1 << order_per_bit)-sized chunks. */ struct cma_memrange { unsigned long base_pfn; + unsigned long early_pfn; unsigned long count; unsigned long *bitmap; #ifdef CONFIG_CMA_DEBUGFS @@ -58,6 +65,7 @@ enum cma_flags { CMA_RESERVE_PAGES_ON_ERROR, CMA_ZONES_VALID, CMA_ZONES_INVALID, + CMA_ACTIVATED, }; extern struct cma cma_areas[MAX_CMA_AREAS]; diff --git a/mm/internal.h b/mm/internal.h index 8233c207d3f3..31c626130883 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -848,6 +848,22 @@ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ +struct cma; + +#ifdef CONFIG_CMA +void *cma_reserve_early(struct cma *cma, unsigned long size); +void init_cma_pageblock(struct page *page); +#else +static inline void *cma_reserve_early(struct cma *cma, unsigned long size) +{ + return NULL; +} +static inline void init_cma_pageblock(struct page *page) +{ +} +#endif + + int find_suitable_fallback(struct free_area *area, unsigned int order, int migratetype, bool only_stealable, bool *can_steal); diff --git a/mm/mm_init.c b/mm/mm_init.c index 3eec528afe43..b5047c5ef7d6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2263,6 +2263,15 @@ void __init init_cma_reserved_pageblock(struct page *page) adjust_managed_page_count(page, pageblock_nr_pages); page_zone(page)->cma_pages += pageblock_nr_pages; } +/* + * Similar to above, but only set the migrate type and stats. + */ +void __init init_cma_pageblock(struct page *page) +{ + set_pageblock_migratetype(page, MIGRATE_CMA); + adjust_managed_page_count(page, pageblock_nr_pages); + page_zone(page)->cma_pages += pageblock_nr_pages; +} #endif void set_zone_contiguous(struct zone *zone) From f866cfcec20cdc16955bca255424be255764d2e7 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:26 +0000 Subject: [PATCH 0974/2157] mm/hugetlb: add hugetlb_cma_only cmdline option Add an option to force hugetlb gigantic pages to be allocated using CMA only (if hugetlb_cma is enabled). This avoids a fallback to allocation from the rest of system memory if the CMA allocation fails. This makes the size of hugetlb_cma a hard upper boundary for gigantic hugetlb page allocations. This is useful because, with a large CMA area, the kernel's unmovable allocations will have less room to work with and it is undesirable for new hugetlb gigantic page allocations to be done from that remaining area. It will eat in to the space available for unmovable allocations, leading to unwanted system behavior (OOMs because the kernel fails to do unmovable allocations). So, with this enabled, an administrator can force a hard upper bound for runtime gigantic page allocations, and have more predictable system behavior. Link: https://lkml.kernel.org/r/20250228182928.2645936-26-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ mm/hugetlb.c | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ae21d911d1c7..491628ac071a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1892,6 +1892,13 @@ hugepages using the CMA allocator. If enabled, the boot-time allocation of gigantic hugepages is skipped. + hugetlb_cma_only= + [HW,CMA,EARLY] When allocating new HugeTLB pages, only + try to allocate from the CMA areas. + + This option does nothing if hugetlb_cma= is not also + specified. + hugetlb_free_vmemmap= [KNL] Requires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP enabled. diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d1134e915927..80d401593669 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -59,6 +59,7 @@ struct hstate hstates[HUGE_MAX_HSTATE]; static struct cma *hugetlb_cma[MAX_NUMNODES]; static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; #endif +static bool hugetlb_cma_only; static unsigned long hugetlb_cma_size __initdata; __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; @@ -1510,6 +1511,9 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, } #endif if (!folio) { + if (hugetlb_cma_only) + return NULL; + folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask); if (!folio) return NULL; @@ -4750,6 +4754,9 @@ static __init void hugetlb_parse_params(void) hcp->setup(hcp->val); } + + if (!hugetlb_cma_size) + hugetlb_cma_only = false; } /* @@ -7862,6 +7869,13 @@ static int __init cmdline_parse_hugetlb_cma(char *p) early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); +static int __init cmdline_parse_hugetlb_cma_only(char *p) +{ + return kstrtobool(p, &hugetlb_cma_only); +} + +early_param("hugetlb_cma_only", cmdline_parse_hugetlb_cma_only); + void __init hugetlb_cma_reserve(int order) { unsigned long size, reserved, per_node; From d2d78671408061b5332d9ad357686812bbec5070 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:27 +0000 Subject: [PATCH 0975/2157] mm/hugetlb: enable bootmem allocation from CMA areas If hugetlb_cma_only is enabled, we know that hugetlb pages can only be allocated from CMA. Now that there is an interface to do early reservations from a CMA area (returning memblock memory), it can be used to allocate hugetlb pages from CMA. This also allows for doing pre-HVO on these pages (if enabled). Make sure to initialize the page structures and associated data correctly. Create a flag to signal that a hugetlb page has been allocated from CMA to make things a little easier. Some configurations of powerpc have a special hugetlb bootmem allocator, so introduce a boolean arch_specific_huge_bootmem_alloc that returns true if such an allocator is present. In that case, CMA bootmem allocations can't be used, so check that function before trying. Link: https://lkml.kernel.org/r/20250228182928.2645936-27-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 6 + include/linux/hugetlb.h | 17 ++ mm/hugetlb.c | 168 ++++++++++++++----- 3 files changed, 152 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index f0bba9c5f9c3..bb786694dd26 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -94,4 +94,10 @@ static inline int check_and_get_huge_psize(int shift) return mmu_psize; } +#define arch_has_huge_bootmem_alloc arch_has_huge_bootmem_alloc + +static inline bool arch_has_huge_bootmem_alloc(void) +{ + return (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()); +} #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f6b82b0524ed..8f3ac832ee7f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -591,6 +591,7 @@ enum hugetlb_page_flags { HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, + HPG_cma, __NR_HPAGEFLAGS, }; @@ -650,6 +651,7 @@ HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) +HPAGEFLAG(Cma, cma) #ifdef CONFIG_HUGETLB_PAGE @@ -678,14 +680,18 @@ struct hstate { char name[HSTATE_NAME_LEN]; }; +struct cma; + struct huge_bootmem_page { struct list_head list; struct hstate *hstate; unsigned long flags; + struct cma *cma; }; #define HUGE_BOOTMEM_HVO 0x0001 #define HUGE_BOOTMEM_ZONES_VALID 0x0002 +#define HUGE_BOOTMEM_CMA 0x0004 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); @@ -824,6 +830,17 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +#ifndef arch_has_huge_bootmem_alloc +/* + * Some architectures do their own bootmem allocation, so they can't use + * early CMA allocation. + */ +static inline bool arch_has_huge_bootmem_alloc(void) +{ + return false; +} +#endif + static inline struct hstate *folio_hstate(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 80d401593669..8f753695438c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -131,8 +131,10 @@ static void hugetlb_free_folio(struct folio *folio) #ifdef CONFIG_CMA int nid = folio_nid(folio); - if (cma_free_folio(hugetlb_cma[nid], folio)) + if (folio_test_hugetlb_cma(folio)) { + WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio)); return; + } #endif folio_put(folio); } @@ -1508,6 +1510,9 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, break; } } + + if (folio) + folio_set_hugetlb_cma(folio); } #endif if (!folio) { @@ -3186,6 +3191,86 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } +static bool __init hugetlb_early_cma(struct hstate *h) +{ + if (arch_has_huge_bootmem_alloc()) + return false; + + return (hstate_is_gigantic(h) && hugetlb_cma_only); +} + +static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) +{ + struct huge_bootmem_page *m; + unsigned long flags; + struct cma *cma; + int listnode = nid; + +#ifdef CONFIG_CMA + if (hugetlb_early_cma(h)) { + flags = HUGE_BOOTMEM_CMA; + cma = hugetlb_cma[nid]; + m = cma_reserve_early(cma, huge_page_size(h)); + if (!m) { + int node; + + if (node_exact) + return NULL; + for_each_online_node(node) { + cma = hugetlb_cma[node]; + if (!cma || node == nid) + continue; + m = cma_reserve_early(cma, huge_page_size(h)); + if (m) { + listnode = node; + break; + } + } + } + } else +#endif + { + flags = 0; + cma = NULL; + if (node_exact) + m = memblock_alloc_exact_nid_raw(huge_page_size(h), + huge_page_size(h), 0, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); + else { + m = memblock_alloc_try_nid_raw(huge_page_size(h), + huge_page_size(h), 0, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); + /* + * For pre-HVO to work correctly, pages need to be on + * the list for the node they were actually allocated + * from. That node may be different in the case of + * fallback by memblock_alloc_try_nid_raw. So, + * extract the actual node first. + */ + if (m) + listnode = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); + } + } + + if (m) { + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + * + * Put them into a private list first because mem_map + * is not up yet. + */ + INIT_LIST_HEAD(&m->list); + list_add(&m->list, &huge_boot_pages[listnode]); + m->hstate = h; + m->flags = flags; + m->cma = cma; + } + + return m; +} + int alloc_bootmem_huge_page(struct hstate *h, int nid) __attribute__ ((weak, alias("__alloc_bootmem_huge_page"))); int __alloc_bootmem_huge_page(struct hstate *h, int nid) @@ -3195,22 +3280,15 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) /* do node specific alloc */ if (nid != NUMA_NO_NODE) { - m = memblock_alloc_exact_nid_raw(huge_page_size(h), huge_page_size(h), - 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + m = alloc_bootmem(h, node, true); if (!m) return 0; goto found; } + /* allocate from next node when distributing huge pages */ for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_ONLINE]) { - m = memblock_alloc_try_nid_raw( - huge_page_size(h), huge_page_size(h), - 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); - /* - * Use the beginning of the huge page to store the - * huge_bootmem_page struct (until gather_bootmem - * puts them into the mem_map). - */ + m = alloc_bootmem(h, node, false); if (!m) return 0; goto found; @@ -3228,21 +3306,6 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), huge_page_size(h) - PAGE_SIZE); - /* - * Put them into a private list first because mem_map is not up yet. - * - * For pre-HVO to work correctly, pages need to be on the list for - * the node they were actually allocated from. That node may be - * different in the case of fallback by memblock_alloc_try_nid_raw. - * So, extract the actual node first. - */ - if (nid == NUMA_NO_NODE) - node = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); - - INIT_LIST_HEAD(&m->list); - list_add(&m->list, &huge_boot_pages[node]); - m->hstate = h; - m->flags = 0; return 1; } @@ -3283,13 +3346,25 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, prep_compound_head((struct page *)folio, huge_page_order(h)); } +static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) +{ + return m->flags & HUGE_BOOTMEM_HVO; +} + +static bool __init hugetlb_bootmem_page_earlycma(struct huge_bootmem_page *m) +{ + return m->flags & HUGE_BOOTMEM_CMA; +} + /* * memblock-allocated pageblocks might not have the migrate type set * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE) - * here. + * here, or MIGRATE_CMA if this was a page allocated through an early CMA + * reservation. * - * Note that this will not write the page struct, it is ok (and necessary) - * to do this on vmemmap optimized folios. + * In case of vmemmap optimized folios, the tail vmemmap pages are mapped + * read-only, but that's ok - for sparse vmemmap this does not write to + * the page structure. */ static void __init hugetlb_bootmem_init_migratetype(struct folio *folio, struct hstate *h) @@ -3298,9 +3373,13 @@ static void __init hugetlb_bootmem_init_migratetype(struct folio *folio, WARN_ON_ONCE(!pageblock_aligned(folio_pfn(folio))); - for (i = 0; i < nr_pages; i += pageblock_nr_pages) - set_pageblock_migratetype(folio_page(folio, i), + for (i = 0; i < nr_pages; i += pageblock_nr_pages) { + if (folio_test_hugetlb_cma(folio)) + init_cma_pageblock(folio_page(folio, i)); + else + set_pageblock_migratetype(folio_page(folio, i), MIGRATE_MOVABLE); + } } static void __init prep_and_add_bootmem_folios(struct hstate *h, @@ -3346,10 +3425,16 @@ bool __init hugetlb_bootmem_page_zones_valid(int nid, return true; } + if (hugetlb_bootmem_page_earlycma(m)) { + valid = cma_validate_zones(m->cma); + goto out; + } + start_pfn = virt_to_phys(m) >> PAGE_SHIFT; valid = !pfn_range_intersects_zones(nid, start_pfn, pages_per_huge_page(m->hstate)); +out: if (!valid) hstate_boot_nrinvalid[hstate_index(m->hstate)]++; @@ -3378,11 +3463,6 @@ static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page, } } -static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) -{ - return (m->flags & HUGE_BOOTMEM_HVO); -} - /* * Put bootmem huge pages into the standard lists after mem_map is up. * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. @@ -3432,14 +3512,21 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) */ folio_set_hugetlb_vmemmap_optimized(folio); + if (hugetlb_bootmem_page_earlycma(m)) + folio_set_hugetlb_cma(folio); + list_add(&folio->lru, &folio_list); /* * We need to restore the 'stolen' pages to totalram_pages * in order to fix confusing memory reports from free(1) and * other side-effects, like CommitLimit going negative. + * + * For CMA pages, this is done in init_cma_pageblock + * (via hugetlb_bootmem_init_migratetype), so skip it here. */ - adjust_managed_page_count(page, pages_per_huge_page(h)); + if (!folio_test_hugetlb_cma(folio)) + adjust_managed_page_count(page, pages_per_huge_page(h)); cond_resched(); } @@ -3624,8 +3711,11 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) { unsigned long allocated; - /* skip gigantic hugepages allocation if hugetlb_cma enabled */ - if (hstate_is_gigantic(h) && hugetlb_cma_size) { + /* + * Skip gigantic hugepages allocation if early CMA + * reservations are not available. + */ + if (hstate_is_gigantic(h) && hugetlb_cma_size && !hugetlb_early_cma(h)) { pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); return; } From 474fe91f213a400334d41397de1a447560be76a6 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Fri, 28 Feb 2025 18:29:28 +0000 Subject: [PATCH 0976/2157] mm/hugetlb: move hugetlb CMA code in to its own file hugetlb.c contained a number of CONFIG_CMA ifdefs, and the code inside them was large enough to merit being in its own file, so move it, cleaning up things a bit. Hide some direct variable access behind functions to accommodate the move. No functional change intended. Link: https://lkml.kernel.org/r/20250228182928.2645936-28-fvdl@google.com Signed-off-by: Frank van der Linden Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dan Carpenter Cc: Dave Hansen Cc: David Hildenbrand Cc: Heiko Carstens Cc: Joao Martins Cc: Johannes Weiner Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Roman Gushchin (Cruise) Cc: Usama Arif Cc: Vasily Gorbik Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- MAINTAINERS | 2 + mm/Makefile | 3 + mm/hugetlb.c | 269 +++------------------------------------------ mm/hugetlb_cma.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++ mm/hugetlb_cma.h | 57 ++++++++++ 5 files changed, 354 insertions(+), 252 deletions(-) create mode 100644 mm/hugetlb_cma.c create mode 100644 mm/hugetlb_cma.h diff --git a/MAINTAINERS b/MAINTAINERS index 60421d3e48a8..40f233b0fa9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10708,6 +10708,8 @@ F: fs/hugetlbfs/ F: include/linux/hugetlb.h F: include/trace/events/hugetlbfs.h F: mm/hugetlb.c +F: mm/hugetlb_cma.c +F: mm/hugetlb_cma.h F: mm/hugetlb_vmemmap.c F: mm/hugetlb_vmemmap.h F: tools/testing/selftests/cgroup/test_hugetlb_memcg.c diff --git a/mm/Makefile b/mm/Makefile index 53392d2af3a5..2600e94abd3c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -79,6 +79,9 @@ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o obj-$(CONFIG_ZSWAP) += zswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o +ifdef CONFIG_CMA +obj-$(CONFIG_HUGETLBFS) += hugetlb_cma.o +endif obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) += hugetlb_vmemmap.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8f753695438c..7a96c6edeaef 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -49,19 +49,13 @@ #include #include "internal.h" #include "hugetlb_vmemmap.h" +#include "hugetlb_cma.h" #include int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; -#ifdef CONFIG_CMA -static struct cma *hugetlb_cma[MAX_NUMNODES]; -static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; -#endif -static bool hugetlb_cma_only; -static unsigned long hugetlb_cma_size __initdata; - __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; static unsigned long hstate_boot_nrinvalid[HUGE_MAX_HSTATE] __initdata; @@ -128,14 +122,11 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static void hugetlb_free_folio(struct folio *folio) { -#ifdef CONFIG_CMA - int nid = folio_nid(folio); - if (folio_test_hugetlb_cma(folio)) { - WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio)); + hugetlb_cma_free_folio(folio); return; } -#endif + folio_put(folio); } @@ -1492,31 +1483,9 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, if (nid == NUMA_NO_NODE) nid = numa_mem_id(); retry: - folio = NULL; -#ifdef CONFIG_CMA - { - int node; - - if (hugetlb_cma[nid]) - folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask); - - if (!folio && !(gfp_mask & __GFP_THISNODE)) { - for_each_node_mask(node, *nodemask) { - if (node == nid || !hugetlb_cma[node]) - continue; - - folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask); - if (folio) - break; - } - } - - if (folio) - folio_set_hugetlb_cma(folio); - } -#endif + folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask); if (!folio) { - if (hugetlb_cma_only) + if (hugetlb_cma_exclusive_alloc()) return NULL; folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask); @@ -3191,47 +3160,14 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } -static bool __init hugetlb_early_cma(struct hstate *h) -{ - if (arch_has_huge_bootmem_alloc()) - return false; - - return (hstate_is_gigantic(h) && hugetlb_cma_only); -} - static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) { struct huge_bootmem_page *m; - unsigned long flags; - struct cma *cma; int listnode = nid; -#ifdef CONFIG_CMA - if (hugetlb_early_cma(h)) { - flags = HUGE_BOOTMEM_CMA; - cma = hugetlb_cma[nid]; - m = cma_reserve_early(cma, huge_page_size(h)); - if (!m) { - int node; - - if (node_exact) - return NULL; - for_each_online_node(node) { - cma = hugetlb_cma[node]; - if (!cma || node == nid) - continue; - m = cma_reserve_early(cma, huge_page_size(h)); - if (m) { - listnode = node; - break; - } - } - } - } else -#endif - { - flags = 0; - cma = NULL; + if (hugetlb_early_cma(h)) + m = hugetlb_cma_alloc_bootmem(h, &listnode, node_exact); + else { if (node_exact) m = memblock_alloc_exact_nid_raw(huge_page_size(h), huge_page_size(h), 0, @@ -3250,6 +3186,11 @@ static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) if (m) listnode = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); } + + if (m) { + m->flags = 0; + m->cma = NULL; + } } if (m) { @@ -3264,8 +3205,6 @@ static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages[listnode]); m->hstate = h; - m->flags = flags; - m->cma = cma; } return m; @@ -3715,7 +3654,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) * Skip gigantic hugepages allocation if early CMA * reservations are not available. */ - if (hstate_is_gigantic(h) && hugetlb_cma_size && !hugetlb_early_cma(h)) { + if (hstate_is_gigantic(h) && hugetlb_cma_total_size() && + !hugetlb_early_cma(h)) { pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); return; } @@ -3752,7 +3692,7 @@ static void __init hugetlb_init_hstates(void) */ if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) continue; - if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER) + if (hugetlb_cma_total_size() && h->order <= HUGETLB_PAGE_ORDER) continue; for_each_hstate(h2) { if (h2 == h) @@ -4654,14 +4594,6 @@ static void hugetlb_register_all_nodes(void) { } #endif -#ifdef CONFIG_CMA -static void __init hugetlb_cma_check(void); -#else -static inline __init void hugetlb_cma_check(void) -{ -} -#endif - static void __init hugetlb_sysfs_init(void) { struct hstate *h; @@ -4845,8 +4777,7 @@ static __init void hugetlb_parse_params(void) hcp->setup(hcp->val); } - if (!hugetlb_cma_size) - hugetlb_cma_only = false; + hugetlb_cma_validate_params(); } /* @@ -7916,169 +7847,3 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), ALIGN_DOWN(vma->vm_end, PUD_SIZE)); } - -#ifdef CONFIG_CMA -static bool cma_reserve_called __initdata; - -static int __init cmdline_parse_hugetlb_cma(char *p) -{ - int nid, count = 0; - unsigned long tmp; - char *s = p; - - while (*s) { - if (sscanf(s, "%lu%n", &tmp, &count) != 1) - break; - - if (s[count] == ':') { - if (tmp >= MAX_NUMNODES) - break; - nid = array_index_nospec(tmp, MAX_NUMNODES); - - s += count + 1; - tmp = memparse(s, &s); - hugetlb_cma_size_in_node[nid] = tmp; - hugetlb_cma_size += tmp; - - /* - * Skip the separator if have one, otherwise - * break the parsing. - */ - if (*s == ',') - s++; - else - break; - } else { - hugetlb_cma_size = memparse(p, &p); - break; - } - } - - return 0; -} - -early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); - -static int __init cmdline_parse_hugetlb_cma_only(char *p) -{ - return kstrtobool(p, &hugetlb_cma_only); -} - -early_param("hugetlb_cma_only", cmdline_parse_hugetlb_cma_only); - -void __init hugetlb_cma_reserve(int order) -{ - unsigned long size, reserved, per_node; - bool node_specific_cma_alloc = false; - int nid; - - /* - * HugeTLB CMA reservation is required for gigantic - * huge pages which could not be allocated via the - * page allocator. Just warn if there is any change - * breaking this assumption. - */ - VM_WARN_ON(order <= MAX_PAGE_ORDER); - cma_reserve_called = true; - - if (!hugetlb_cma_size) - return; - - for (nid = 0; nid < MAX_NUMNODES; nid++) { - if (hugetlb_cma_size_in_node[nid] == 0) - continue; - - if (!node_online(nid)) { - pr_warn("hugetlb_cma: invalid node %d specified\n", nid); - hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; - hugetlb_cma_size_in_node[nid] = 0; - continue; - } - - if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { - pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", - nid, (PAGE_SIZE << order) / SZ_1M); - hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; - hugetlb_cma_size_in_node[nid] = 0; - } else { - node_specific_cma_alloc = true; - } - } - - /* Validate the CMA size again in case some invalid nodes specified. */ - if (!hugetlb_cma_size) - return; - - if (hugetlb_cma_size < (PAGE_SIZE << order)) { - pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", - (PAGE_SIZE << order) / SZ_1M); - hugetlb_cma_size = 0; - return; - } - - if (!node_specific_cma_alloc) { - /* - * If 3 GB area is requested on a machine with 4 numa nodes, - * let's allocate 1 GB on first three nodes and ignore the last one. - */ - per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); - pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", - hugetlb_cma_size / SZ_1M, per_node / SZ_1M); - } - - reserved = 0; - for_each_online_node(nid) { - int res; - char name[CMA_MAX_NAME]; - - if (node_specific_cma_alloc) { - if (hugetlb_cma_size_in_node[nid] == 0) - continue; - - size = hugetlb_cma_size_in_node[nid]; - } else { - size = min(per_node, hugetlb_cma_size - reserved); - } - - size = round_up(size, PAGE_SIZE << order); - - snprintf(name, sizeof(name), "hugetlb%d", nid); - /* - * Note that 'order per bit' is based on smallest size that - * may be returned to CMA allocator in the case of - * huge page demotion. - */ - res = cma_declare_contiguous_multi(size, PAGE_SIZE << order, - HUGETLB_PAGE_ORDER, name, - &hugetlb_cma[nid], nid); - if (res) { - pr_warn("hugetlb_cma: reservation failed: err %d, node %d", - res, nid); - continue; - } - - reserved += size; - pr_info("hugetlb_cma: reserved %lu MiB on node %d\n", - size / SZ_1M, nid); - - if (reserved >= hugetlb_cma_size) - break; - } - - if (!reserved) - /* - * hugetlb_cma_size is used to determine if allocations from - * cma are possible. Set to zero if no cma regions are set up. - */ - hugetlb_cma_size = 0; -} - -static void __init hugetlb_cma_check(void) -{ - if (!hugetlb_cma_size || cma_reserve_called) - return; - - pr_warn("hugetlb_cma: the option isn't supported by current arch\n"); -} - -#endif /* CONFIG_CMA */ diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c new file mode 100644 index 000000000000..e0f2d5c3a84c --- /dev/null +++ b/mm/hugetlb_cma.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include + +#include +#include "internal.h" +#include "hugetlb_cma.h" + + +static struct cma *hugetlb_cma[MAX_NUMNODES]; +static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; +static bool hugetlb_cma_only; +static unsigned long hugetlb_cma_size __initdata; + +void hugetlb_cma_free_folio(struct folio *folio) +{ + int nid = folio_nid(folio); + + WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio)); +} + + +struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) +{ + int node; + int order = huge_page_order(h); + struct folio *folio = NULL; + + if (hugetlb_cma[nid]) + folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask); + + if (!folio && !(gfp_mask & __GFP_THISNODE)) { + for_each_node_mask(node, *nodemask) { + if (node == nid || !hugetlb_cma[node]) + continue; + + folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask); + if (folio) + break; + } + } + + if (folio) + folio_set_hugetlb_cma(folio); + + return folio; +} + +struct huge_bootmem_page * __init +hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact) +{ + struct cma *cma; + struct huge_bootmem_page *m; + int node = *nid; + + cma = hugetlb_cma[*nid]; + m = cma_reserve_early(cma, huge_page_size(h)); + if (!m) { + if (node_exact) + return NULL; + + for_each_online_node(node) { + cma = hugetlb_cma[node]; + if (!cma || node == *nid) + continue; + m = cma_reserve_early(cma, huge_page_size(h)); + if (m) { + *nid = node; + break; + } + } + } + + if (m) { + m->flags = HUGE_BOOTMEM_CMA; + m->cma = cma; + } + + return m; +} + + +static bool cma_reserve_called __initdata; + +static int __init cmdline_parse_hugetlb_cma(char *p) +{ + int nid, count = 0; + unsigned long tmp; + char *s = p; + + while (*s) { + if (sscanf(s, "%lu%n", &tmp, &count) != 1) + break; + + if (s[count] == ':') { + if (tmp >= MAX_NUMNODES) + break; + nid = array_index_nospec(tmp, MAX_NUMNODES); + + s += count + 1; + tmp = memparse(s, &s); + hugetlb_cma_size_in_node[nid] = tmp; + hugetlb_cma_size += tmp; + + /* + * Skip the separator if have one, otherwise + * break the parsing. + */ + if (*s == ',') + s++; + else + break; + } else { + hugetlb_cma_size = memparse(p, &p); + break; + } + } + + return 0; +} + +early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); + +static int __init cmdline_parse_hugetlb_cma_only(char *p) +{ + return kstrtobool(p, &hugetlb_cma_only); +} + +early_param("hugetlb_cma_only", cmdline_parse_hugetlb_cma_only); + +void __init hugetlb_cma_reserve(int order) +{ + unsigned long size, reserved, per_node; + bool node_specific_cma_alloc = false; + int nid; + + /* + * HugeTLB CMA reservation is required for gigantic + * huge pages which could not be allocated via the + * page allocator. Just warn if there is any change + * breaking this assumption. + */ + VM_WARN_ON(order <= MAX_PAGE_ORDER); + cma_reserve_called = true; + + if (!hugetlb_cma_size) + return; + + for (nid = 0; nid < MAX_NUMNODES; nid++) { + if (hugetlb_cma_size_in_node[nid] == 0) + continue; + + if (!node_online(nid)) { + pr_warn("hugetlb_cma: invalid node %d specified\n", nid); + hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; + hugetlb_cma_size_in_node[nid] = 0; + continue; + } + + if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", + nid, (PAGE_SIZE << order) / SZ_1M); + hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; + hugetlb_cma_size_in_node[nid] = 0; + } else { + node_specific_cma_alloc = true; + } + } + + /* Validate the CMA size again in case some invalid nodes specified. */ + if (!hugetlb_cma_size) + return; + + if (hugetlb_cma_size < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", + (PAGE_SIZE << order) / SZ_1M); + hugetlb_cma_size = 0; + return; + } + + if (!node_specific_cma_alloc) { + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + } + + reserved = 0; + for_each_online_node(nid) { + int res; + char name[CMA_MAX_NAME]; + + if (node_specific_cma_alloc) { + if (hugetlb_cma_size_in_node[nid] == 0) + continue; + + size = hugetlb_cma_size_in_node[nid]; + } else { + size = min(per_node, hugetlb_cma_size - reserved); + } + + size = round_up(size, PAGE_SIZE << order); + + snprintf(name, sizeof(name), "hugetlb%d", nid); + /* + * Note that 'order per bit' is based on smallest size that + * may be returned to CMA allocator in the case of + * huge page demotion. + */ + res = cma_declare_contiguous_multi(size, PAGE_SIZE << order, + HUGETLB_PAGE_ORDER, name, + &hugetlb_cma[nid], nid); + if (res) { + pr_warn("hugetlb_cma: reservation failed: err %d, node %d", + res, nid); + continue; + } + + reserved += size; + pr_info("hugetlb_cma: reserved %lu MiB on node %d\n", + size / SZ_1M, nid); + + if (reserved >= hugetlb_cma_size) + break; + } + + if (!reserved) + /* + * hugetlb_cma_size is used to determine if allocations from + * cma are possible. Set to zero if no cma regions are set up. + */ + hugetlb_cma_size = 0; +} + +void __init hugetlb_cma_check(void) +{ + if (!hugetlb_cma_size || cma_reserve_called) + return; + + pr_warn("hugetlb_cma: the option isn't supported by current arch\n"); +} + +bool hugetlb_cma_exclusive_alloc(void) +{ + return hugetlb_cma_only; +} + +unsigned long __init hugetlb_cma_total_size(void) +{ + return hugetlb_cma_size; +} + +void __init hugetlb_cma_validate_params(void) +{ + if (!hugetlb_cma_size) + hugetlb_cma_only = false; +} + +bool __init hugetlb_early_cma(struct hstate *h) +{ + if (arch_has_huge_bootmem_alloc()) + return false; + + return hstate_is_gigantic(h) && hugetlb_cma_only; +} diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h new file mode 100644 index 000000000000..f7d7fb9880a2 --- /dev/null +++ b/mm/hugetlb_cma.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HUGETLB_CMA_H +#define _LINUX_HUGETLB_CMA_H + +#ifdef CONFIG_CMA +void hugetlb_cma_free_folio(struct folio *folio); +struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask, + int nid, nodemask_t *nodemask); +struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, + bool node_exact); +void hugetlb_cma_check(void); +bool hugetlb_cma_exclusive_alloc(void); +unsigned long hugetlb_cma_total_size(void); +void hugetlb_cma_validate_params(void); +bool hugetlb_early_cma(struct hstate *h); +#else +static inline void hugetlb_cma_free_folio(struct folio *folio) +{ +} + +static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h, + gfp_t gfp_mask, int nid, nodemask_t *nodemask) +{ + return NULL; +} + +static inline +struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, + bool node_exact) +{ + return NULL; +} + +static inline void hugetlb_cma_check(void) +{ +} + +static inline bool hugetlb_cma_exclusive_alloc(void) +{ + return false; +} + +static inline unsigned long hugetlb_cma_total_size(void) +{ + return 0; +} + +static inline void hugetlb_cma_validate_params(void) +{ +} + +static inline bool hugetlb_early_cma(struct hstate *h) +{ + return false; +} +#endif +#endif From 2560c8c3f41d7a53e1554d1e4f207bf966e7527f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 19 Feb 2025 12:24:02 +0100 Subject: [PATCH 0977/2157] arm/pgtable: remove duplicate included header file The header file asm-generic/pgtable-nopud.h is included whether CONFIG_MMU is defined or not. Include it only once before the #ifndef/#else/#endif preprocessor directives and remove the following make includecheck warning: asm-generic/pgtable-nopud.h is included more than once Link: https://lkml.kernel.org/r/20250219112403.3959-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Reviewed-by: Mike Rapoport (Microsoft) Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index be91e376df79..6b986ef6042f 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -19,14 +19,13 @@ extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) #endif -#ifndef CONFIG_MMU - #include + +#ifndef CONFIG_MMU #include #else -#include #include #include From f809b9f3046f702bc1ae40695acb27c1b0abc346 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 19 Feb 2025 14:01:45 -0800 Subject: [PATCH 0978/2157] mm/damon: implement a new DAMOS filter type for unmapped pages Patch series "mm/damon: introduce DAMOS filter type for unmapped pages". User decides whether their memory will be mapped or unmapped. It implies that the two types of memory can have different characteristics and management requirements. Provide the DAMON-observaibility DAMOS-operation capability for the different types by introducing a new DAMOS filter type for unmapped pages. This patch (of 2): Implement yet another DAMOS filter type for unmapped pages on DAMON kernel API, and add support of it from the physical address space DAMON operations set (paddr). Since it is for only unmapped pages, support from the virtual address spaces DAMON operations set (vaddr) is not required. Link: https://lkml.kernel.org/r/20250219220146.133650-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250219220146.133650-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ mm/damon/paddr.c | 3 +++ mm/damon/sysfs-schemes.c | 1 + 3 files changed, 6 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 5e7ae7bca5dc..242910b190c9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -337,6 +337,7 @@ struct damos_stat { * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. + * @DAMOS_FILTER_TYPE_UNMAPPED: Unmapped pages. * @DAMOS_FILTER_TYPE_ADDR: Address range. * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. @@ -357,6 +358,7 @@ enum damos_filter_type { DAMOS_FILTER_TYPE_MEMCG, DAMOS_FILTER_TYPE_YOUNG, DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, + DAMOS_FILTER_TYPE_UNMAPPED, DAMOS_FILTER_TYPE_ADDR, DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 1a5974640b93..d5db313ca717 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -236,6 +236,9 @@ static bool damos_pa_filter_match(struct damos_filter *filter, matched = filter->sz_range.min <= folio_sz && folio_sz <= filter->sz_range.max; break; + case DAMOS_FILTER_TYPE_UNMAPPED: + matched = !folio_mapped(folio) || !folio_raw_mapping(folio); + break; default: break; } diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 881d00bb3a34..66a1c46cee84 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -331,6 +331,7 @@ static const char * const damon_sysfs_scheme_filter_type_strs[] = { "memcg", "young", "hugepage_size", + "unmapped", "addr", "target", }; From 375c28a0df0ee8869d0980228d659d91f85455f9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 19 Feb 2025 14:01:46 -0800 Subject: [PATCH 0979/2157] Docs/mm/damon/design: document unmapped DAMOS filter type Document availability and meaning of unmapped DAMOS filter type on design document. Since introduction of the type requires no additional user ABI, usage and ABI document need no update. Link: https://lkml.kernel.org/r/20250219220146.133650-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 6a66aa0833fd..5af991551a86 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -617,6 +617,8 @@ Below ``type`` of filters are currently supported. scheme. - hugepage_size - Applied to pages that managed in a given size range. + - unmapped + - Applied to pages that unmapped. To know how user-space can set the filters via :ref:`DAMON sysfs interface `, refer to :ref:`filters ` part of the From 9fa26fb554baaf71826814804749f5cff130c4d6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Feb 2025 08:36:07 +0000 Subject: [PATCH 0980/2157] mm/mincore: improve performance by adding an unlikely hint Adding an unlikely() hint on the masked start comparison error return path improves run-time performance of the mincore system call. Benchmarking on an i9-12900 shows an improvement of 7ns on mincore calls on a 256KB mmap'd region where 50% of the pages we resident. Improvement was from ~970 ns down to 963 ns, so a small ~0.7% improvement. Results based on running 20 tests with turbo disabled (to reduce clock freq turbo changes), with 10 second run per test and comparing the number of mincores calls per second. The % standard deviation of the 20 tests was ~0.10%, so results are reliable. Link: https://lkml.kernel.org/r/20250219083607.5183-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Cc: Matthew Wilcow Signed-off-by: Andrew Morton --- mm/mincore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mincore.c b/mm/mincore.c index d6bd19e520fc..832f29f46767 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -239,7 +239,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, start = untagged_addr(start); /* Check the start address: needs to be page-aligned.. */ - if (start & ~PAGE_MASK) + if (unlikely(start & ~PAGE_MASK)) return -EINVAL; /* ..and we need to be passed a valid user-space range */ From 58abac769b05f6e972d34a02a46a04589d6e9853 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Wed, 12 Feb 2025 10:58:42 +0800 Subject: [PATCH 0981/2157] mm/folio_queue: delete __folio_order and use folio_order directly __folio_order is the same as folio_order, remove __folio_order and then just include mm.h and use folio_order directly. Link: https://lkml.kernel.org/r/20250212025843.80283-2-liuye@kylinos.cn Signed-off-by: Liu Ye Reviewed-by: Shivank Garg Reviewed-by: Dev Jain Acked-by: David Howells Cc: Christian Brauner Signed-off-by: Andrew Morton --- include/linux/folio_queue.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 4d3f8074c137..45ad2408a80c 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -15,6 +15,7 @@ #define _LINUX_FOLIO_QUEUE_H #include +#include /* * Segment in a queue of running buffers. Each segment can hold a number of @@ -216,13 +217,6 @@ static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks3); } -static inline unsigned int __folio_order(struct folio *folio) -{ - if (!folio_test_large(folio)) - return 0; - return folio->_flags_1 & 0xff; -} - /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to @@ -241,7 +235,7 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); return slot; } @@ -263,7 +257,7 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); folioq_mark(folioq, slot); return slot; } From bd175a1d84e3ff05da032160ca2399437c23a59f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:10 +0900 Subject: [PATCH 0982/2157] zram: sleepable entry locking Patch series "zsmalloc/zram: there be preemption", v10. Currently zram runs compression and decompression in non-preemptible sections, e.g. zcomp_stream_get() // grabs CPU local lock zcomp_compress() or zram_slot_lock() // grabs entry spin-lock zcomp_stream_get() // grabs CPU local lock zs_map_object() // grabs rwlock and CPU local lock zcomp_decompress() Potentially a little troublesome for a number of reasons. For instance, this makes it impossible to use async compression algorithms or/and H/W compression algorithms, which can wait for OP completion or resource availability. This also restricts what compression algorithms can do internally, for example, zstd can allocate internal state memory for C/D dictionaries: do_fsync() do_writepages() zram_bio_write() zram_write_page() // become non-preemptible zcomp_compress() zstd_compress() ZSTD_compress_usingCDict() ZSTD_compressBegin_usingCDict_internal() ZSTD_resetCCtx_usingCDict() ZSTD_resetCCtx_internal() zstd_custom_alloc() // memory allocation Not to mention that the system can be configured to maximize compression ratio at a cost of CPU/HW time (e.g. lz4hc or deflate with very high compression level) so zram can stay in non-preemptible section (even under spin-lock or/and rwlock) for an extended period of time. Aside from compression algorithms, this also restricts what zram can do. One particular example is zram_write_page() zsmalloc handle allocation, which has an optimistic allocation (disallowing direct reclaim) and a pessimistic fallback path, which then forces zram to compress the page one more time. This series changes zram to not directly impose atomicity restrictions on compression algorithms (and on itself), which makes zram write() fully preemptible; zram read(), sadly, is not always preemptible yet. There are still indirect atomicity restrictions imposed by zsmalloc(). One notable example is object mapping API, which returns with: a) local CPU lock held b) zspage rwlock held First, zsmalloc's zspage lock is converted from rwlock to a special type of RW-lookalike look with some extra guarantees/features. Second, a new handle mapping is introduced which doesn't use per-CPU buffers (and hence no local CPU lock), does fewer memcpy() calls, but requires users to provide a pointer to temp buffer for object copy-in (when needed). Third, zram is converted to the new zsmalloc mapping API and thus zram read() becomes preemptible. This patch (of 19): Concurrent modifications of meta table entries is now handled by per-entry spin-lock. This has a number of shortcomings. First, this imposes atomic requirements on compression backends. zram can call both zcomp_compress() and zcomp_decompress() under entry spin-lock, which implies that we can use only compression algorithms that don't schedule/sleep/wait during compression and decompression. This, for instance, makes it impossible to use some of the ASYNC compression algorithms (H/W compression, etc.) implementations. Second, this can potentially trigger watchdogs. For example, entry re-compression with secondary algorithms is performed under entry spin-lock. Given that we chain secondary compression algorithms and that some of them can be configured for best compression ratio (and worst compression speed) zram can stay under spin-lock for quite some time. Having a per-entry mutex (or, for instance, a rw-semaphore) significantly increases sizeof() of each entry and hence the meta table. Therefore entry locking returns back to bit locking, as before, however, this time also preempt-rt friendly, because if waits-on-bit instead of spinning-on-bit. Lock owners are also now permitted to schedule, which is a first step on the path of making zram non-atomic. Link: https://lkml.kernel.org/r/20250303022425.285971-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20250303022425.285971-2-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 54 ++++++++++++++++++++++++++++------- drivers/block/zram/zram_drv.h | 15 ++++++---- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9f5020b077c5..70599d41b828 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -58,19 +58,56 @@ static void zram_free_page(struct zram *zram, size_t index); static int zram_read_from_zspool(struct zram *zram, struct page *page, u32 index); -static int zram_slot_trylock(struct zram *zram, u32 index) +#define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) + +static void zram_slot_lock_init(struct zram *zram, u32 index) { - return spin_trylock(&zram->table[index].lock); + static struct lock_class_key __key; + + lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", + &__key, 0); +} + +/* + * entry locking rules: + * + * 1) Lock is exclusive + * + * 2) lock() function can sleep waiting for the lock + * + * 3) Lock owner can sleep + * + * 4) Use TRY lock variant when in atomic context + * - must check return value and handle locking failers + */ +static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) +{ + unsigned long *lock = &zram->table[index].flags; + + if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { + mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); + lock_acquired(slot_dep_map(zram, index), _RET_IP_); + return true; + } + + return false; } static void zram_slot_lock(struct zram *zram, u32 index) { - spin_lock(&zram->table[index].lock); + unsigned long *lock = &zram->table[index].flags; + + mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); + wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); + lock_acquired(slot_dep_map(zram, index), _RET_IP_); } static void zram_slot_unlock(struct zram *zram, u32 index) { - spin_unlock(&zram->table[index].lock); + unsigned long *lock = &zram->table[index].flags; + + mutex_release(slot_dep_map(zram, index), _RET_IP_); + clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); } static inline bool init_done(struct zram *zram) @@ -93,7 +130,6 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) zram->table[index].handle = handle; } -/* flag operations require table entry bit_spin_lock() being held */ static bool zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { @@ -1473,15 +1509,11 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) huge_class_size = zs_huge_class_size(zram->mem_pool); for (index = 0; index < num_pages; index++) - spin_lock_init(&zram->table[index].lock); + zram_slot_lock_init(zram, index); + return true; } -/* - * To protect concurrent access to the same index entry, - * caller should hold this table index entry's bit_spinlock to - * indicate this index entry is accessing. - */ static void zram_free_page(struct zram *zram, size_t index) { unsigned long handle; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index db78d7c01b9a..c804f78a7fa8 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -28,7 +28,6 @@ #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) - /* * ZRAM is mainly used for memory efficiency so we want to keep memory * footprint small and thus squeeze size and zram pageflags into a flags @@ -46,6 +45,7 @@ /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { ZRAM_SAME = ZRAM_FLAG_SHIFT, /* Page consists the same element */ + ZRAM_ENTRY_LOCK, /* entry access lock bit */ ZRAM_WB, /* page is stored on backing_device */ ZRAM_PP_SLOT, /* Selected for post-processing */ ZRAM_HUGE, /* Incompressible page */ @@ -58,16 +58,19 @@ enum zram_pageflags { __NR_ZRAM_PAGEFLAGS, }; -/*-- Data structures */ - -/* Allocated for each disk page */ +/* + * Allocated for each disk page. We use bit-lock (ZRAM_ENTRY_LOCK bit + * of flags) to save memory. There can be plenty of entries and standard + * locking primitives (e.g. mutex) will significantly increase sizeof() + * of each entry and hence of the meta table. + */ struct zram_table_entry { unsigned long handle; - unsigned int flags; - spinlock_t lock; + unsigned long flags; #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME ktime_t ac_time; #endif + struct lockdep_map dep_map; }; struct zram_stats { From 2efa9e9eb4db2725c9f419f241cb0e09fc3d8574 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:11 +0900 Subject: [PATCH 0983/2157] zram: permit preemption with active compression stream Currently, per-CPU stream access is done from a non-preemptible (atomic) section, which imposes the same atomicity requirements on compression backends as entry spin-lock, and makes it impossible to use algorithms that can schedule/wait/sleep during compression and decompression. Switch to preemptible per-CPU model, similar to the one used in zswap. Instead of a per-CPU local lock, each stream carries a mutex which is locked throughout entire time zram uses it for compression or decompression, so that cpu-dead event waits for zram to stop using a particular per-CPU stream and release it. Link: https://lkml.kernel.org/r/20250303022425.285971-3-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Suggested-by: Yosry Ahmed Reviewed-by: Yosry Ahmed Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- drivers/block/zram/zcomp.c | 41 +++++++++++++++++++++++++---------- drivers/block/zram/zcomp.h | 6 ++--- drivers/block/zram/zram_drv.c | 20 ++++++++--------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index bb514403e305..53e4c37441be 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -109,13 +109,29 @@ ssize_t zcomp_available_show(const char *comp, char *buf) struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { - local_lock(&comp->stream->lock); - return this_cpu_ptr(comp->stream); + for (;;) { + struct zcomp_strm *zstrm = raw_cpu_ptr(comp->stream); + + /* + * Inspired by zswap + * + * stream is returned with ->mutex locked which prevents + * cpu_dead() from releasing this stream under us, however + * there is still a race window between raw_cpu_ptr() and + * mutex_lock(), during which we could have been migrated + * from a CPU that has already destroyed its stream. If + * so then unlock and re-try on the current CPU. + */ + mutex_lock(&zstrm->lock); + if (likely(zstrm->buffer)) + return zstrm; + mutex_unlock(&zstrm->lock); + } } -void zcomp_stream_put(struct zcomp *comp) +void zcomp_stream_put(struct zcomp_strm *zstrm) { - local_unlock(&comp->stream->lock); + mutex_unlock(&zstrm->lock); } int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, @@ -151,12 +167,9 @@ int zcomp_decompress(struct zcomp *comp, struct zcomp_strm *zstrm, int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { struct zcomp *comp = hlist_entry(node, struct zcomp, node); - struct zcomp_strm *zstrm; + struct zcomp_strm *zstrm = per_cpu_ptr(comp->stream, cpu); int ret; - zstrm = per_cpu_ptr(comp->stream, cpu); - local_lock_init(&zstrm->lock); - ret = zcomp_strm_init(comp, zstrm); if (ret) pr_err("Can't allocate a compression stream\n"); @@ -166,16 +179,17 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) { struct zcomp *comp = hlist_entry(node, struct zcomp, node); - struct zcomp_strm *zstrm; + struct zcomp_strm *zstrm = per_cpu_ptr(comp->stream, cpu); - zstrm = per_cpu_ptr(comp->stream, cpu); + mutex_lock(&zstrm->lock); zcomp_strm_free(comp, zstrm); + mutex_unlock(&zstrm->lock); return 0; } static int zcomp_init(struct zcomp *comp, struct zcomp_params *params) { - int ret; + int ret, cpu; comp->stream = alloc_percpu(struct zcomp_strm); if (!comp->stream) @@ -186,6 +200,9 @@ static int zcomp_init(struct zcomp *comp, struct zcomp_params *params) if (ret) goto cleanup; + for_each_possible_cpu(cpu) + mutex_init(&per_cpu_ptr(comp->stream, cpu)->lock); + ret = cpuhp_state_add_instance(CPUHP_ZCOMP_PREPARE, &comp->node); if (ret < 0) goto cleanup; diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index ad5762813842..23b8236b9090 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -3,7 +3,7 @@ #ifndef _ZCOMP_H_ #define _ZCOMP_H_ -#include +#include #define ZCOMP_PARAM_NO_LEVEL INT_MIN @@ -31,7 +31,7 @@ struct zcomp_ctx { }; struct zcomp_strm { - local_lock_t lock; + struct mutex lock; /* compression buffer */ void *buffer; struct zcomp_ctx ctx; @@ -77,7 +77,7 @@ struct zcomp *zcomp_create(const char *alg, struct zcomp_params *params); void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_stream_get(struct zcomp *comp); -void zcomp_stream_put(struct zcomp *comp); +void zcomp_stream_put(struct zcomp_strm *zstrm); int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, const void *src, unsigned int *dst_len); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 70599d41b828..dd669d48ae6f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1607,7 +1607,7 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index) ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); kunmap_local(dst); zs_unmap_object(zram->mem_pool, handle); - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); return ret; } @@ -1768,14 +1768,14 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) kunmap_local(mem); if (unlikely(ret)) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); pr_err("Compression failed! err=%d\n", ret); zs_free(zram->mem_pool, handle); return ret; } if (comp_len >= huge_class_size) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); return write_incompressible_page(zram, page, index); } @@ -1799,7 +1799,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR_VALUE(handle)) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); atomic64_inc(&zram->stats.writestall); handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | @@ -1811,7 +1811,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) } if (!zram_can_store_page(zram)) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); zs_free(zram->mem_pool, handle); return -ENOMEM; } @@ -1819,7 +1819,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); memcpy(dst, zstrm->buffer, comp_len); - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); zs_unmap_object(zram->mem_pool, handle); zram_slot_lock(zram, index); @@ -1978,7 +1978,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, kunmap_local(src); if (ret) { - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); return ret; } @@ -1988,7 +1988,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, /* Continue until we make progress */ if (class_index_new >= class_index_old || (threshold && comp_len_new >= threshold)) { - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); continue; } @@ -2046,13 +2046,13 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR_VALUE(handle_new)) { - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); return PTR_ERR((void *)handle_new); } dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); memcpy(dst, zstrm->buffer, comp_len_new); - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); zs_unmap_object(zram->mem_pool, handle_new); From be656187b8a9c07bd19e5268fa626ecb674876c6 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:12 +0900 Subject: [PATCH 0984/2157] zram: remove unused crypto include We stopped using crypto API (for the time being), so remove its include and replace CRYPTO_MAX_ALG_NAME with a local define. Link: https://lkml.kernel.org/r/20250303022425.285971-4-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zcomp.c | 1 - drivers/block/zram/zram_drv.c | 4 +++- drivers/block/zram/zram_drv.h | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 53e4c37441be..cfdde2e0748a 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "zcomp.h" diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index dd669d48ae6f..248dab7cc7f4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -44,6 +44,8 @@ static DEFINE_MUTEX(zram_index_mutex); static int zram_major; static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; +#define ZRAM_MAX_ALGO_NAME_SZ 128 + /* Module params (documentation at end) */ static unsigned int num_devices = 1; /* @@ -1148,7 +1150,7 @@ static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) size_t sz; sz = strlen(buf); - if (sz >= CRYPTO_MAX_ALG_NAME) + if (sz >= ZRAM_MAX_ALGO_NAME_SZ) return -E2BIG; compressor = kstrdup(buf, GFP_KERNEL); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c804f78a7fa8..7c11f9dab335 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -17,7 +17,6 @@ #include #include -#include #include "zcomp.h" From 4127e13c9302f6892f73793f133ea4b4fffb2964 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:13 +0900 Subject: [PATCH 0985/2157] zram: remove max_comp_streams device attr max_comp_streams device attribute has been defunct since May 2016 when zram switched to per-CPU compression streams, remove it. Link: https://lkml.kernel.org/r/20250303022425.285971-5-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-block-zram | 8 ----- Documentation/admin-guide/blockdev/zram.rst | 36 ++++++--------------- drivers/block/zram/zram_drv.c | 23 ------------- 3 files changed, 10 insertions(+), 57 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 1ef69e0271f9..36c57de0a10a 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -22,14 +22,6 @@ Description: device. The reset operation frees all the memory associated with this device. -What: /sys/block/zram/max_comp_streams -Date: February 2014 -Contact: Sergey Senozhatsky -Description: - The max_comp_streams file is read-write and specifies the - number of backend's zcomp_strm compression streams (number of - concurrent compress operations). - What: /sys/block/zram/comp_algorithm Date: February 2014 Contact: Sergey Senozhatsky diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 1576fb93f06c..9bdb30901a93 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -54,7 +54,7 @@ The list of possible return codes: If you use 'echo', the returned value is set by the 'echo' utility, and, in general case, something like:: - echo 3 > /sys/block/zram0/max_comp_streams + echo foo > /sys/block/zram0/comp_algorithm if [ $? -ne 0 ]; then handle_error fi @@ -73,21 +73,7 @@ This creates 4 devices: /dev/zram{0,1,2,3} num_devices parameter is optional and tells zram how many devices should be pre-created. Default: 1. -2) Set max number of compression streams -======================================== - -Regardless of the value passed to this attribute, ZRAM will always -allocate multiple compression streams - one per online CPU - thus -allowing several concurrent compression operations. The number of -allocated compression streams goes down when some of the CPUs -become offline. There is no single-compression-stream mode anymore, -unless you are running a UP system or have only 1 CPU online. - -To find out how many streams are currently available:: - - cat /sys/block/zram0/max_comp_streams - -3) Select compression algorithm +2) Select compression algorithm =============================== Using comp_algorithm device attribute one can see available and @@ -107,7 +93,7 @@ Examples:: For the time being, the `comp_algorithm` content shows only compression algorithms that are supported by zram. -4) Set compression algorithm parameters: Optional +3) Set compression algorithm parameters: Optional ================================================= Compression algorithms may support specific parameters which can be @@ -138,7 +124,7 @@ better the compression ratio, it even can take negatives values for some algorithms), for other algorithms `level` is acceleration level (the higher the value the lower the compression ratio). -5) Set Disksize +4) Set Disksize =============== Set disk size by writing the value to sysfs node 'disksize'. @@ -158,7 +144,7 @@ There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. -6) Set memory limit: Optional +5) Set memory limit: Optional ============================= Set memory limit by writing the value to sysfs node 'mem_limit'. @@ -177,7 +163,7 @@ Examples:: # To disable memory limit echo 0 > /sys/block/zram0/mem_limit -7) Activate +6) Activate =========== :: @@ -188,7 +174,7 @@ Examples:: mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -8) Add/remove zram devices +7) Add/remove zram devices ========================== zram provides a control interface, which enables dynamic (on-demand) device @@ -208,7 +194,7 @@ execute:: echo X > /sys/class/zram-control/hot_remove -9) Stats +8) Stats ======== Per-device statistics are exported as various nodes under /sys/block/zram/ @@ -228,8 +214,6 @@ mem_limit WO specifies the maximum amount of memory ZRAM can writeback_limit WO specifies the maximum amount of write IO zram can write out to backing device as 4KB unit writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress - operations comp_algorithm RW show and change the compression algorithm algorithm_params WO setup compression algorithm parameters compact WO trigger memory compaction @@ -310,7 +294,7 @@ a single line of text and contains the following stats separated by whitespace: Unit: 4K bytes ============== ============================================================= -10) Deactivate +9) Deactivate ============== :: @@ -318,7 +302,7 @@ a single line of text and contains the following stats separated by whitespace: swapoff /dev/zram0 umount /dev/zram1 -11) Reset +10) Reset ========= Write any positive value to 'reset' sysfs node:: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 248dab7cc7f4..93cedc60ac16 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1103,27 +1103,6 @@ static void zram_debugfs_register(struct zram *zram) {}; static void zram_debugfs_unregister(struct zram *zram) {}; #endif -/* - * We switched to per-cpu streams and this attr is not needed anymore. - * However, we will keep it around for some time, because: - * a) we may revert per-cpu streams in the future - * b) it's visible to user space and we need to follow our 2 years - * retirement rule; but we already have a number of 'soon to be - * altered' attrs, so max_comp_streams need to wait for the next - * layoff cycle. - */ -static ssize_t max_comp_streams_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); -} - -static ssize_t max_comp_streams_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - return len; -} - static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) { /* Do not free statically defined compression algorithms */ @@ -2540,7 +2519,6 @@ static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_WO(idle); -static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK static DEVICE_ATTR_RW(backing_dev); @@ -2562,7 +2540,6 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, &dev_attr_idle.attr, - &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, #ifdef CONFIG_ZRAM_WRITEBACK &dev_attr_backing_dev.attr, From 80af56cb29332b78792ca1bc785157e404b10004 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:14 +0900 Subject: [PATCH 0986/2157] zram: remove second stage of handle allocation Previously zram write() was atomic which required us to pass __GFP_KSWAPD_RECLAIM to zsmalloc handle allocation on a fast path and attempt a slow path allocation (with recompression) if the fast path failed. Since we are not in atomic context anymore we can permit direct reclaim during handle allocation, and hence can have a single allocation path. There is no slow path anymore so we don't unlock per-CPU stream (and don't lose compressed data) which means that there is no need to do recompression now (which should reduce CPU and battery usage). Link: https://lkml.kernel.org/r/20250303022425.285971-6-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 39 +++++++---------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 93cedc60ac16..f043f35b17a4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1723,11 +1723,11 @@ static int write_incompressible_page(struct zram *zram, struct page *page, static int zram_write_page(struct zram *zram, struct page *page, u32 index) { int ret = 0; - unsigned long handle = -ENOMEM; - unsigned int comp_len = 0; + unsigned long handle; + unsigned int comp_len; void *dst, *mem; struct zcomp_strm *zstrm; - unsigned long element = 0; + unsigned long element; bool same_filled; /* First, free memory allocated to this slot (if any) */ @@ -1741,7 +1741,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) if (same_filled) return write_same_filled_page(zram, element, index); -compress_again: zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); mem = kmap_local_page(page); ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, @@ -1751,7 +1750,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) if (unlikely(ret)) { zcomp_stream_put(zstrm); pr_err("Compression failed! err=%d\n", ret); - zs_free(zram->mem_pool, handle); return ret; } @@ -1760,35 +1758,12 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) return write_incompressible_page(zram, page, index); } - /* - * handle allocation has 2 paths: - * a) fast path is executed with preemption disabled (for - * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, - * since we can't sleep; - * b) slow path enables preemption and attempts to allocate - * the page with __GFP_DIRECT_RECLAIM bit set. we have to - * put per-cpu compression stream and, thus, to re-do - * the compression once handle is allocated. - * - * if we have a 'non-null' handle here then we are coming - * from the slow path and handle has already been allocated. - */ - if (IS_ERR_VALUE(handle)) - handle = zs_malloc(zram->mem_pool, comp_len, - __GFP_KSWAPD_RECLAIM | - __GFP_NOWARN | - __GFP_HIGHMEM | - __GFP_MOVABLE); + handle = zs_malloc(zram->mem_pool, comp_len, + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR_VALUE(handle)) { zcomp_stream_put(zstrm); - atomic64_inc(&zram->stats.writestall); - handle = zs_malloc(zram->mem_pool, comp_len, - GFP_NOIO | __GFP_HIGHMEM | - __GFP_MOVABLE); - if (IS_ERR_VALUE(handle)) - return PTR_ERR((void *)handle); - - goto compress_again; + return PTR_ERR((void *)handle); } if (!zram_can_store_page(zram)) { From 9c7ccc8d99ad4afbfb7f6328dff0a88bed63bc27 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:15 +0900 Subject: [PATCH 0987/2157] zram: add GFP_NOWARN to incompressible zsmalloc handle allocation We normally use __GFP_NOWARN for zsmalloc handle allocations, add it to write_incompressible_page() allocation too. Link: https://lkml.kernel.org/r/20250303022425.285971-7-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f043f35b17a4..249a936b6aac 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1691,7 +1691,8 @@ static int write_incompressible_page(struct zram *zram, struct page *page, * like we do for compressible pages. */ handle = zs_malloc(zram->mem_pool, PAGE_SIZE, - GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR_VALUE(handle)) return PTR_ERR((void *)handle); From f3b0c6c8996a8b4e717f71e13ebb7501841f0eb7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:16 +0900 Subject: [PATCH 0988/2157] zram: remove writestall zram_stats member There is no zsmalloc handle allocation slow path now and writestall is not possible any longer. Remove it from zram_stats. Link: https://lkml.kernel.org/r/20250303022425.285971-8-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 3 +-- drivers/block/zram/zram_drv.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 249a936b6aac..fc9321af3ef4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1437,9 +1437,8 @@ static ssize_t debug_stat_show(struct device *dev, down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "version: %d\n%8llu %8llu\n", + "version: %d\n0 %8llu\n", version, - (u64)atomic64_read(&zram->stats.writestall), (u64)atomic64_read(&zram->stats.miss_free)); up_read(&zram->init_lock); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 7c11f9dab335..6cee93f9c0d0 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -82,7 +82,6 @@ struct zram_stats { atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ - atomic64_t writestall; /* no. of write slow paths */ atomic64_t miss_free; /* no. of missed free */ #ifdef CONFIG_ZRAM_WRITEBACK atomic64_t bd_count; /* no. of pages in backing device */ From d7fdc5a620aeecf96074cf8c1b852539b12cb067 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:17 +0900 Subject: [PATCH 0989/2157] zram: limit max recompress prio to num_active_comps Use the actual number of algorithms zram was configure with instead of theoretical limit of ZRAM_MAX_COMPS. Also make sure that min prio is not above max prio. Link: https://lkml.kernel.org/r/20250303022425.285971-9-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fc9321af3ef4..776c31606eec 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2027,16 +2027,19 @@ static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; struct zram *zram = dev_to_zram(dev); char *args, *param, *val, *algo = NULL; u64 num_recomp_pages = ULLONG_MAX; struct zram_pp_ctl *ctl = NULL; struct zram_pp_slot *pps; u32 mode = 0, threshold = 0; + u32 prio, prio_max; struct page *page; ssize_t ret; + prio = ZRAM_SECONDARY_COMP; + prio_max = zram->num_active_comps; + args = skip_spaces(buf); while (*args) { args = next_arg(args, ¶m, &val); @@ -2089,7 +2092,7 @@ static ssize_t recompress_store(struct device *dev, if (prio == ZRAM_PRIMARY_COMP) prio = ZRAM_SECONDARY_COMP; - prio_max = min(prio + 1, ZRAM_MAX_COMPS); + prio_max = prio + 1; continue; } } @@ -2117,7 +2120,7 @@ static ssize_t recompress_store(struct device *dev, continue; if (!strcmp(zram->comp_algs[prio], algo)) { - prio_max = min(prio + 1, ZRAM_MAX_COMPS); + prio_max = prio + 1; found = true; break; } @@ -2129,6 +2132,12 @@ static ssize_t recompress_store(struct device *dev, } } + prio_max = min(prio_max, (u32)zram->num_active_comps); + if (prio >= prio_max) { + ret = -EINVAL; + goto release_init_lock; + } + page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; From 9724bef96df47ba0dec907416cfa0b5e1cd7202e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:18 +0900 Subject: [PATCH 0990/2157] zram: filter out recomp targets based on priority Do no select for post processing slots that are already compressed with same or higher priority compression algorithm. This should save some memory, as previously we would still put those entries into corresponding post-processing buckets and filter them out later in recompress_slot(). Link: https://lkml.kernel.org/r/20250303022425.285971-10-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 776c31606eec..6dee885bef9b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1823,7 +1823,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, #define RECOMPRESS_IDLE (1 << 0) #define RECOMPRESS_HUGE (1 << 1) -static int scan_slots_for_recompress(struct zram *zram, u32 mode, +static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, struct zram_pp_ctl *ctl) { unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; @@ -1855,6 +1855,10 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; + /* Already compressed with same of higher priority */ + if (zram_get_priority(zram, index) + 1 >= prio_max) + goto next; + pps->index = index; place_pp_slot(zram, ctl, pps); pps = NULL; @@ -1911,6 +1915,16 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, zram_clear_flag(zram, index, ZRAM_IDLE); class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); + + prio = max(prio, zram_get_priority(zram, index) + 1); + /* + * Recompression slots scan should not select slots that are + * already compressed with a higher priority algorithm, but + * just in case + */ + if (prio >= prio_max) + return 0; + /* * Iterate the secondary comp algorithms list (in order of priority) * and try to recompress the page. @@ -1919,13 +1933,6 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (!zram->comps[prio]) continue; - /* - * Skip if the object is already re-compressed with a higher - * priority algorithm (or same algorithm). - */ - if (prio <= zram_get_priority(zram, index)) - continue; - num_recomps++; zstrm = zcomp_stream_get(zram->comps[prio]); src = kmap_local_page(page); @@ -2150,7 +2157,7 @@ static ssize_t recompress_store(struct device *dev, goto release_init_lock; } - scan_slots_for_recompress(zram, mode, ctl); + scan_slots_for_recompress(zram, mode, prio_max, ctl); ret = len; while ((pps = select_pp_slot(ctl))) { From b0624f0b223420047625eaff66af9e9d6d3df85f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:19 +0900 Subject: [PATCH 0991/2157] zram: rework recompression loop This reworks recompression loop handling: - set a rule that stream-put NULLs the stream pointer If the loop returns with a non-NULL stream then it's a successful recompression, otherwise the stream should always be NULL. - do not count the number of recompressions Mark object as incompressible as soon as the algorithm with the highest priority failed to compress that object. - count compression errors as resource usage Even if compression has failed, we still need to bump num_recomp_pages counter. Link: https://lkml.kernel.org/r/20250303022425.285971-11-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 54 +++++++++++++---------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6dee885bef9b..bb88b63d193b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1888,9 +1888,8 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, unsigned int comp_len_new; unsigned int class_index_old; unsigned int class_index_new; - u32 num_recomps = 0; void *src, *dst; - int ret; + int ret = 0; handle_old = zram_get_handle(zram, index); if (!handle_old) @@ -1933,7 +1932,6 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (!zram->comps[prio]) continue; - num_recomps++; zstrm = zcomp_stream_get(zram->comps[prio]); src = kmap_local_page(page); ret = zcomp_compress(zram->comps[prio], zstrm, @@ -1942,7 +1940,8 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (ret) { zcomp_stream_put(zstrm); - return ret; + zstrm = NULL; + break; } class_index_new = zs_lookup_class_index(zram->mem_pool, @@ -1952,6 +1951,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (class_index_new >= class_index_old || (threshold && comp_len_new >= threshold)) { zcomp_stream_put(zstrm); + zstrm = NULL; continue; } @@ -1959,14 +1959,6 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, break; } - /* - * We did not try to recompress, e.g. when we have only one - * secondary algorithm and the page is already recompressed - * using that algorithm - */ - if (!zstrm) - return 0; - /* * Decrement the limit (if set) on pages we can recompress, even * when current recompression was unsuccessful or did not compress @@ -1976,38 +1968,32 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (*num_recomp_pages) *num_recomp_pages -= 1; - if (class_index_new >= class_index_old) { + /* Compression error */ + if (ret) + return ret; + + if (!zstrm) { /* * Secondary algorithms failed to re-compress the page - * in a way that would save memory, mark the object as - * incompressible so that we will not try to compress - * it again. + * in a way that would save memory. * - * We need to make sure that all secondary algorithms have - * failed, so we test if the number of recompressions matches - * the number of active secondary algorithms. + * Mark the object incompressible if the max-priority + * algorithm couldn't re-compress it. */ - if (num_recomps == zram->num_active_comps - 1) - zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + if (prio < zram->num_active_comps) + return 0; + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } - /* Successful recompression but above threshold */ - if (threshold && comp_len_new >= threshold) - return 0; - /* - * No direct reclaim (slow path) for handle allocation and no - * re-compression attempt (unlike in zram_write_bvec()) since - * we already have stored that object in zsmalloc. If we cannot - * alloc memory for recompressed object then we bail out and - * simply keep the old (existing) object in zsmalloc. + * We are holding per-CPU stream mutex and entry lock so better + * avoid direct reclaim. Allocation error is not fatal since + * we still have the old object in the mem_pool. */ handle_new = zs_malloc(zram->mem_pool, comp_len_new, - __GFP_KSWAPD_RECLAIM | - __GFP_NOWARN | - __GFP_HIGHMEM | - __GFP_MOVABLE); + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE); if (IS_ERR_VALUE(handle_new)) { zcomp_stream_put(zstrm); return PTR_ERR((void *)handle_new); From 7e1b0212d4d59281a80c836841539a91b724bf24 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:20 +0900 Subject: [PATCH 0992/2157] zram: move post-processing target allocation Allocate post-processing target in place_pp_slot(). This simplifies scan_slots_for_writeback() and scan_slots_for_recompress() loops because we don't need to track pps pointer state anymore. Previously we have to explicitly NULL the point if it has been added to a post-processing bucket or re-use previously allocated pointer otherwise and make sure we don't leak the memory in the end. We are also fine doing GFP_NOIO allocation, as post-processing can be called under memory pressure so we better pick as many slots as we can as soon as we can and start post-processing them, possibly saving the memory. Allocation failure there is not fatal, we will post-process whatever we put into the buckets on previous iterations. Link: https://lkml.kernel.org/r/20250303022425.285971-12-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 50 +++++++++++++++-------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bb88b63d193b..f6e887f94b71 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -295,15 +295,24 @@ static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) kfree(ctl); } -static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, - struct zram_pp_slot *pps) +static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, + u32 index) { - u32 idx; + struct zram_pp_slot *pps; + u32 bid; - idx = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; - list_add(&pps->entry, &ctl->pp_buckets[idx]); + pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN); + if (!pps) + return false; + + INIT_LIST_HEAD(&pps->entry); + pps->index = index; + + bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; + list_add(&pps->entry, &ctl->pp_buckets[bid]); zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); + return true; } static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) @@ -737,15 +746,8 @@ static int scan_slots_for_writeback(struct zram *zram, u32 mode, unsigned long index, struct zram_pp_ctl *ctl) { - struct zram_pp_slot *pps = NULL; - for (; nr_pages != 0; index++, nr_pages--) { - if (!pps) - pps = kmalloc(sizeof(*pps), GFP_KERNEL); - if (!pps) - return -ENOMEM; - - INIT_LIST_HEAD(&pps->entry); + bool ok = true; zram_slot_lock(zram, index); if (!zram_allocated(zram, index)) @@ -765,14 +767,13 @@ static int scan_slots_for_writeback(struct zram *zram, u32 mode, !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; - pps->index = index; - place_pp_slot(zram, ctl, pps); - pps = NULL; + ok = place_pp_slot(zram, ctl, index); next: zram_slot_unlock(zram, index); + if (!ok) + break; } - kfree(pps); return 0; } @@ -1827,16 +1828,10 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, struct zram_pp_ctl *ctl) { unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - struct zram_pp_slot *pps = NULL; unsigned long index; for (index = 0; index < nr_pages; index++) { - if (!pps) - pps = kmalloc(sizeof(*pps), GFP_KERNEL); - if (!pps) - return -ENOMEM; - - INIT_LIST_HEAD(&pps->entry); + bool ok = true; zram_slot_lock(zram, index); if (!zram_allocated(zram, index)) @@ -1859,14 +1854,13 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, if (zram_get_priority(zram, index) + 1 >= prio_max) goto next; - pps->index = index; - place_pp_slot(zram, ctl, pps); - pps = NULL; + ok = place_pp_slot(zram, ctl, index); next: zram_slot_unlock(zram, index); + if (!ok) + break; } - kfree(pps); return 0; } From 0d6fa44e4e25f57cca8f9a638369d5bf58412cb6 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:21 +0900 Subject: [PATCH 0993/2157] zsmalloc: rename pool lock The old name comes from the times when the pool did not have compaction (defragmentation). Rename it to ->lock because these days it synchronizes not only migration. Link: https://lkml.kernel.org/r/20250303022425.285971-13-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Reviewed-by: Yosry Ahmed Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 6d0e47f7ae33..2e338cde0d21 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -18,7 +18,7 @@ /* * lock ordering: * page_lock - * pool->migrate_lock + * pool->lock * class->lock * zspage->lock */ @@ -223,8 +223,8 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - /* protect page/zspage migration */ - rwlock_t migrate_lock; + /* protect zspage migration/compaction */ + rwlock_t lock; atomic_t compaction_in_progress; }; @@ -1206,7 +1206,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, BUG_ON(in_interrupt()); /* It guarantees it can get zspage from handle safely */ - read_lock(&pool->migrate_lock); + read_lock(&pool->lock); obj = handle_to_obj(handle); obj_to_location(obj, &zpdesc, &obj_idx); zspage = get_zspage(zpdesc); @@ -1218,7 +1218,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, * which is smaller granularity. */ migrate_read_lock(zspage); - read_unlock(&pool->migrate_lock); + read_unlock(&pool->lock); class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); @@ -1450,16 +1450,16 @@ void zs_free(struct zs_pool *pool, unsigned long handle) return; /* - * The pool->migrate_lock protects the race with zpage's migration + * The pool->lock protects the race with zpage's migration * so it's safe to get the page from handle. */ - read_lock(&pool->migrate_lock); + read_lock(&pool->lock); obj = handle_to_obj(handle); obj_to_zpdesc(obj, &f_zpdesc); zspage = get_zspage(f_zpdesc); class = zspage_class(pool, zspage); spin_lock(&class->lock); - read_unlock(&pool->migrate_lock); + read_unlock(&pool->lock); class_stat_sub(class, ZS_OBJS_INUSE, 1); obj_free(class->size, obj); @@ -1796,7 +1796,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * The pool migrate_lock protects the race between zpage migration * and zs_free. */ - write_lock(&pool->migrate_lock); + write_lock(&pool->lock); class = zspage_class(pool, zspage); /* @@ -1833,7 +1833,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * Since we complete the data copy and set up new zspage structure, * it's okay to release migration_lock. */ - write_unlock(&pool->migrate_lock); + write_unlock(&pool->lock); spin_unlock(&class->lock); migrate_write_unlock(zspage); @@ -1956,7 +1956,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, * protect the race between zpage migration and zs_free * as well as zpage allocation/free */ - write_lock(&pool->migrate_lock); + write_lock(&pool->lock); spin_lock(&class->lock); while (zs_can_compact(class)) { int fg; @@ -1983,14 +1983,14 @@ static unsigned long __zs_compact(struct zs_pool *pool, src_zspage = NULL; if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100 - || rwlock_is_contended(&pool->migrate_lock)) { + || rwlock_is_contended(&pool->lock)) { putback_zspage(class, dst_zspage); dst_zspage = NULL; spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + write_unlock(&pool->lock); cond_resched(); - write_lock(&pool->migrate_lock); + write_lock(&pool->lock); spin_lock(&class->lock); } } @@ -2002,7 +2002,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, putback_zspage(class, dst_zspage); spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + write_unlock(&pool->lock); return pages_freed; } @@ -2014,10 +2014,10 @@ unsigned long zs_compact(struct zs_pool *pool) unsigned long pages_freed = 0; /* - * Pool compaction is performed under pool->migrate_lock so it is basically + * Pool compaction is performed under pool->lock so it is basically * single-threaded. Having more than one thread in __zs_compact() - * will increase pool->migrate_lock contention, which will impact other - * zsmalloc operations that need pool->migrate_lock. + * will increase pool->lock contention, which will impact other + * zsmalloc operations that need pool->lock. */ if (atomic_xchg(&pool->compaction_in_progress, 1)) return 0; @@ -2139,7 +2139,7 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); - rwlock_init(&pool->migrate_lock); + rwlock_init(&pool->lock); atomic_set(&pool->compaction_in_progress, 0); pool->name = kstrdup(name, GFP_KERNEL); From e27af3f9360ee130b7ab0b274088f92146a0855b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:22 +0900 Subject: [PATCH 0994/2157] zsmalloc: sleepable zspage reader-lock In order to implement preemptible object mapping we need a zspage lock that satisfies several preconditions: - it should be reader-write type of a lock - it should be possible to hold it from any context, but also being preemptible if the context allows it - we never sleep while acquiring but can sleep while holding in read mode An rwsemaphore doesn't suffice, due to atomicity requirements, rwlock doesn't satisfy due to reader-preemptability requirement. It's also worth to mention, that per-zspage rwsem is a little too memory heavy (we can easily have double digits megabytes used only on rwsemaphores). Switch over from rwlock_t to a atomic_t-based implementation of a reader-writer semaphore that satisfies all of the preconditions. The spin-lock based zspage_lock is suggested by Hillf Danton. Link: https://lkml.kernel.org/r/20250303022425.285971-14-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Suggested-by: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 166 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 114 insertions(+), 52 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2e338cde0d21..818bf381a517 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -257,6 +257,15 @@ static inline void free_zpdesc(struct zpdesc *zpdesc) __free_page(page); } +#define ZS_PAGE_UNLOCKED 0 +#define ZS_PAGE_WRLOCKED -1 + +struct zspage_lock { + spinlock_t lock; + int cnt; + struct lockdep_map dep_map; +}; + struct zspage { struct { unsigned int huge:HUGE_BITS; @@ -269,7 +278,7 @@ struct zspage { struct zpdesc *first_zpdesc; struct list_head list; /* fullness list */ struct zs_pool *pool; - rwlock_t lock; + struct zspage_lock zsl; }; struct mapping_area { @@ -279,6 +288,84 @@ struct mapping_area { enum zs_mapmode vm_mm; /* mapping mode */ }; +static void zspage_lock_init(struct zspage *zspage) +{ + static struct lock_class_key __key; + struct zspage_lock *zsl = &zspage->zsl; + + lockdep_init_map(&zsl->dep_map, "zspage->lock", &__key, 0); + spin_lock_init(&zsl->lock); + zsl->cnt = ZS_PAGE_UNLOCKED; +} + +/* + * The zspage lock can be held from atomic contexts, but it needs to remain + * preemptible when held for reading because it remains held outside of those + * atomic contexts, otherwise we unnecessarily lose preemptibility. + * + * To achieve this, the following rules are enforced on readers and writers: + * + * - Writers are blocked by both writers and readers, while readers are only + * blocked by writers (i.e. normal rwlock semantics). + * + * - Writers are always atomic (to allow readers to spin waiting for them). + * + * - Writers always use trylock (as the lock may be held be sleeping readers). + * + * - Readers may spin on the lock (as they can only wait for atomic writers). + * + * - Readers may sleep while holding the lock (as writes only use trylock). + */ +static void zspage_read_lock(struct zspage *zspage) +{ + struct zspage_lock *zsl = &zspage->zsl; + + rwsem_acquire_read(&zsl->dep_map, 0, 0, _RET_IP_); + + spin_lock(&zsl->lock); + zsl->cnt++; + spin_unlock(&zsl->lock); + + lock_acquired(&zsl->dep_map, _RET_IP_); +} + +static void zspage_read_unlock(struct zspage *zspage) +{ + struct zspage_lock *zsl = &zspage->zsl; + + rwsem_release(&zsl->dep_map, _RET_IP_); + + spin_lock(&zsl->lock); + zsl->cnt--; + spin_unlock(&zsl->lock); +} + +static __must_check bool zspage_write_trylock(struct zspage *zspage) +{ + struct zspage_lock *zsl = &zspage->zsl; + + spin_lock(&zsl->lock); + if (zsl->cnt == ZS_PAGE_UNLOCKED) { + zsl->cnt = ZS_PAGE_WRLOCKED; + rwsem_acquire(&zsl->dep_map, 0, 1, _RET_IP_); + lock_acquired(&zsl->dep_map, _RET_IP_); + return true; + } + + spin_unlock(&zsl->lock); + return false; +} + +static void zspage_write_unlock(struct zspage *zspage) +{ + struct zspage_lock *zsl = &zspage->zsl; + + rwsem_release(&zsl->dep_map, _RET_IP_); + + zsl->cnt = ZS_PAGE_UNLOCKED; + spin_unlock(&zsl->lock); +} + /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ static void SetZsHugePage(struct zspage *zspage) { @@ -290,12 +377,6 @@ static bool ZsHugePage(struct zspage *zspage) return zspage->huge; } -static void migrate_lock_init(struct zspage *zspage); -static void migrate_read_lock(struct zspage *zspage); -static void migrate_read_unlock(struct zspage *zspage); -static void migrate_write_lock(struct zspage *zspage); -static void migrate_write_unlock(struct zspage *zspage); - #ifdef CONFIG_COMPACTION static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); @@ -992,7 +1073,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, return NULL; zspage->magic = ZSPAGE_MAGIC; - migrate_lock_init(zspage); + zspage->pool = pool; + zspage->class = class->index; + zspage_lock_init(zspage); for (i = 0; i < class->pages_per_zspage; i++) { struct zpdesc *zpdesc; @@ -1015,8 +1098,6 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, create_page_chain(class, zspage, zpdescs); init_zspage(class, zspage); - zspage->pool = pool; - zspage->class = class->index; return zspage; } @@ -1217,7 +1298,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, * zs_unmap_object API so delegate the locking from class to zspage * which is smaller granularity. */ - migrate_read_lock(zspage); + zspage_read_lock(zspage); read_unlock(&pool->lock); class = zspage_class(pool, zspage); @@ -1277,7 +1358,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) } local_unlock(&zs_map_area.lock); - migrate_read_unlock(zspage); + zspage_read_unlock(zspage); } EXPORT_SYMBOL_GPL(zs_unmap_object); @@ -1671,18 +1752,18 @@ static void lock_zspage(struct zspage *zspage) /* * Pages we haven't locked yet can be migrated off the list while we're * trying to lock them, so we need to be careful and only attempt to - * lock each page under migrate_read_lock(). Otherwise, the page we lock + * lock each page under zspage_read_lock(). Otherwise, the page we lock * may no longer belong to the zspage. This means that we may wait for * the wrong page to unlock, so we must take a reference to the page - * prior to waiting for it to unlock outside migrate_read_lock(). + * prior to waiting for it to unlock outside zspage_read_lock(). */ while (1) { - migrate_read_lock(zspage); + zspage_read_lock(zspage); zpdesc = get_first_zpdesc(zspage); if (zpdesc_trylock(zpdesc)) break; zpdesc_get(zpdesc); - migrate_read_unlock(zspage); + zspage_read_unlock(zspage); zpdesc_wait_locked(zpdesc); zpdesc_put(zpdesc); } @@ -1693,41 +1774,16 @@ static void lock_zspage(struct zspage *zspage) curr_zpdesc = zpdesc; } else { zpdesc_get(zpdesc); - migrate_read_unlock(zspage); + zspage_read_unlock(zspage); zpdesc_wait_locked(zpdesc); zpdesc_put(zpdesc); - migrate_read_lock(zspage); + zspage_read_lock(zspage); } } - migrate_read_unlock(zspage); + zspage_read_unlock(zspage); } #endif /* CONFIG_COMPACTION */ -static void migrate_lock_init(struct zspage *zspage) -{ - rwlock_init(&zspage->lock); -} - -static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock) -{ - read_lock(&zspage->lock); -} - -static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) -{ - read_unlock(&zspage->lock); -} - -static void migrate_write_lock(struct zspage *zspage) -{ - write_lock(&zspage->lock); -} - -static void migrate_write_unlock(struct zspage *zspage) -{ - write_unlock(&zspage->lock); -} - #ifdef CONFIG_COMPACTION static const struct movable_operations zsmalloc_mops; @@ -1785,9 +1841,6 @@ static int zs_page_migrate(struct page *newpage, struct page *page, VM_BUG_ON_PAGE(!zpdesc_is_isolated(zpdesc), zpdesc_page(zpdesc)); - /* We're committed, tell the world that this is a Zsmalloc page. */ - __zpdesc_set_zsmalloc(newzpdesc); - /* The page is locked, so this pointer must remain valid */ zspage = get_zspage(zpdesc); pool = zspage->pool; @@ -1803,8 +1856,15 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * the class lock protects zpage alloc/free in the zspage. */ spin_lock(&class->lock); - /* the migrate_write_lock protects zpage access via zs_map_object */ - migrate_write_lock(zspage); + /* the zspage write_lock protects zpage access via zs_map_object */ + if (!zspage_write_trylock(zspage)) { + spin_unlock(&class->lock); + write_unlock(&pool->lock); + return -EINVAL; + } + + /* We're committed, tell the world that this is a Zsmalloc page. */ + __zpdesc_set_zsmalloc(newzpdesc); offset = get_first_obj_offset(zpdesc); s_addr = kmap_local_zpdesc(zpdesc); @@ -1835,7 +1895,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, */ write_unlock(&pool->lock); spin_unlock(&class->lock); - migrate_write_unlock(zspage); + zspage_write_unlock(zspage); zpdesc_get(newzpdesc); if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) { @@ -1971,9 +2031,11 @@ static unsigned long __zs_compact(struct zs_pool *pool, if (!src_zspage) break; - migrate_write_lock(src_zspage); + if (!zspage_write_trylock(src_zspage)) + break; + migrate_zspage(pool, src_zspage, dst_zspage); - migrate_write_unlock(src_zspage); + zspage_write_unlock(src_zspage); fg = putback_zspage(class, src_zspage); if (fg == ZS_INUSE_RATIO_0) { From 44f76413496ec343da0d8292ceecdcabe3e6ec16 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:23 +0900 Subject: [PATCH 0995/2157] zsmalloc: introduce new object mapping API Current object mapping API is a little cumbersome. First, it's inconsistent, sometimes it returns with page-faults disabled and sometimes with page-faults enabled. Second, and most importantly, it enforces atomicity restrictions on its users. zs_map_object() has to return a liner object address which is not always possible because some objects span multiple physical (non-contiguous) pages. For such objects zsmalloc uses a per-CPU buffer to which object's data is copied before a pointer to that per-CPU buffer is returned back to the caller. This leads to another, final, issue - extra memcpy(). Since the caller gets a pointer to per-CPU buffer it can memcpy() data only to that buffer, and during zs_unmap_object() zsmalloc will memcpy() from that per-CPU buffer to physical pages that object in question spans across. New API splits functions by access mode: - zs_obj_read_begin(handle, local_copy) Returns a pointer to handle memory. For objects that span two physical pages a local_copy buffer is used to store object's data before the address is returned to the caller. Otherwise the object's page is kmap_local mapped directly. - zs_obj_read_end(handle, buf) Unmaps the page if it was kmap_local mapped by zs_obj_read_begin(). - zs_obj_write(handle, buf, len) Copies len-bytes from compression buffer to handle memory (takes care of objects that span two pages). This does not need any additional (e.g. per-CPU) buffers and writes the data directly to zsmalloc pool pages. In terms of performance, on a synthetic and completely reproducible test that allocates fixed number of objects of fixed sizes and iterates over those objects, first mapping in RO then in RW mode: OLD API ======= 3 first results out of 10 369,205,778 instructions # 0.80 insn per cycle 40,467,926 branches # 113.732 M/sec 369,002,122 instructions # 0.62 insn per cycle 40,426,145 branches # 189.361 M/sec 369,036,706 instructions # 0.63 insn per cycle 40,430,860 branches # 204.105 M/sec [..] NEW API ======= 3 first results out of 10 265,799,293 instructions # 0.51 insn per cycle 29,834,567 branches # 170.281 M/sec 265,765,970 instructions # 0.55 insn per cycle 29,829,019 branches # 161.602 M/sec 265,764,702 instructions # 0.51 insn per cycle 29,828,015 branches # 189.677 M/sec [..] T-test on all 10 runs ===================== Difference at 95.0% confidence -1.03219e+08 +/- 55308.7 -27.9705% +/- 0.0149878% (Student's t, pooled s = 58864.4) The old API will stay around until the remaining users switch to the new one. After that we'll also remove zsmalloc per-CPU buffer and CPU hotplug handling. The split of map(RO) and map(WO) into read_{begin/end}/write is suggested by Yosry Ahmed. Link: https://lkml.kernel.org/r/20250303022425.285971-15-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Suggested-by: Yosry Ahmed Reviewed-by: Yosry Ahmed Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- include/linux/zsmalloc.h | 8 +++ mm/zsmalloc.c | 125 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..7d70983cf398 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -58,4 +58,12 @@ unsigned long zs_compact(struct zs_pool *pool); unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); + +void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, + void *local_copy); +void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, + void *handle_mem); +void zs_obj_write(struct zs_pool *pool, unsigned long handle, + void *handle_mem, size_t mem_len); + #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 818bf381a517..63c99db71dc1 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1362,6 +1362,131 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) } EXPORT_SYMBOL_GPL(zs_unmap_object); +void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, + void *local_copy) +{ + struct zspage *zspage; + struct zpdesc *zpdesc; + unsigned long obj, off; + unsigned int obj_idx; + struct size_class *class; + void *addr; + + /* Guarantee we can get zspage from handle safely */ + read_lock(&pool->lock); + obj = handle_to_obj(handle); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); + + /* Make sure migration doesn't move any pages in this zspage */ + zspage_read_lock(zspage); + read_unlock(&pool->lock); + + class = zspage_class(pool, zspage); + off = offset_in_page(class->size * obj_idx); + + if (off + class->size <= PAGE_SIZE) { + /* this object is contained entirely within a page */ + addr = kmap_local_zpdesc(zpdesc); + addr += off; + } else { + size_t sizes[2]; + + /* this object spans two pages */ + sizes[0] = PAGE_SIZE - off; + sizes[1] = class->size - sizes[0]; + addr = local_copy; + + memcpy_from_page(addr, zpdesc_page(zpdesc), + off, sizes[0]); + zpdesc = get_next_zpdesc(zpdesc); + memcpy_from_page(addr + sizes[0], + zpdesc_page(zpdesc), + 0, sizes[1]); + } + + if (!ZsHugePage(zspage)) + addr += ZS_HANDLE_SIZE; + + return addr; +} +EXPORT_SYMBOL_GPL(zs_obj_read_begin); + +void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, + void *handle_mem) +{ + struct zspage *zspage; + struct zpdesc *zpdesc; + unsigned long obj, off; + unsigned int obj_idx; + struct size_class *class; + + obj = handle_to_obj(handle); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); + class = zspage_class(pool, zspage); + off = offset_in_page(class->size * obj_idx); + + if (off + class->size <= PAGE_SIZE) { + if (!ZsHugePage(zspage)) + off += ZS_HANDLE_SIZE; + handle_mem -= off; + kunmap_local(handle_mem); + } + + zspage_read_unlock(zspage); +} +EXPORT_SYMBOL_GPL(zs_obj_read_end); + +void zs_obj_write(struct zs_pool *pool, unsigned long handle, + void *handle_mem, size_t mem_len) +{ + struct zspage *zspage; + struct zpdesc *zpdesc; + unsigned long obj, off; + unsigned int obj_idx; + struct size_class *class; + + /* Guarantee we can get zspage from handle safely */ + read_lock(&pool->lock); + obj = handle_to_obj(handle); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); + + /* Make sure migration doesn't move any pages in this zspage */ + zspage_read_lock(zspage); + read_unlock(&pool->lock); + + class = zspage_class(pool, zspage); + off = offset_in_page(class->size * obj_idx); + + if (off + class->size <= PAGE_SIZE) { + /* this object is contained entirely within a page */ + void *dst = kmap_local_zpdesc(zpdesc); + + if (!ZsHugePage(zspage)) + off += ZS_HANDLE_SIZE; + memcpy(dst + off, handle_mem, mem_len); + kunmap_local(dst); + } else { + /* this object spans two pages */ + size_t sizes[2]; + + off += ZS_HANDLE_SIZE; + sizes[0] = PAGE_SIZE - off; + sizes[1] = mem_len - sizes[0]; + + memcpy_to_page(zpdesc_page(zpdesc), off, + handle_mem, sizes[0]); + zpdesc = get_next_zpdesc(zpdesc); + memcpy_to_page(zpdesc_page(zpdesc), 0, + handle_mem + sizes[0], sizes[1]); + } + + zspage_read_unlock(zspage); +} +EXPORT_SYMBOL_GPL(zs_obj_write); + /** * zs_huge_class_size() - Returns the size (in bytes) of the first huge * zsmalloc &size_class. From 82f91900c7222c6fae7884da133c5c81dca28d19 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:24 +0900 Subject: [PATCH 0996/2157] zram: switch to new zsmalloc object mapping API Use new read/write zsmalloc object API. For cases when RO mapped object spans two physical pages (requires temp buffer) compression streams now carry around one extra physical page. Link: https://lkml.kernel.org/r/20250303022425.285971-16-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zcomp.c | 4 +++- drivers/block/zram/zcomp.h | 2 ++ drivers/block/zram/zram_drv.c | 28 ++++++++++------------------ 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index cfdde2e0748a..a1d627054bb1 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -45,6 +45,7 @@ static const struct zcomp_ops *backends[] = { static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) { comp->ops->destroy_ctx(&zstrm->ctx); + vfree(zstrm->local_copy); vfree(zstrm->buffer); zstrm->buffer = NULL; } @@ -57,12 +58,13 @@ static int zcomp_strm_init(struct zcomp *comp, struct zcomp_strm *zstrm) if (ret) return ret; + zstrm->local_copy = vzalloc(PAGE_SIZE); /* * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ zstrm->buffer = vzalloc(2 * PAGE_SIZE); - if (!zstrm->buffer) { + if (!zstrm->buffer || !zstrm->local_copy) { zcomp_strm_free(comp, zstrm); return -ENOMEM; } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 23b8236b9090..25339ed1e07e 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -34,6 +34,8 @@ struct zcomp_strm { struct mutex lock; /* compression buffer */ void *buffer; + /* local copy of handle memory */ + void *local_copy; struct zcomp_ctx ctx; }; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f6e887f94b71..62aef12417a4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1561,11 +1561,11 @@ static int read_incompressible_page(struct zram *zram, struct page *page, void *src, *dst; handle = zram_get_handle(zram, index); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); + src = zs_obj_read_begin(zram->mem_pool, handle, NULL); dst = kmap_local_page(page); copy_page(dst, src); kunmap_local(dst); - zs_unmap_object(zram->mem_pool, handle); + zs_obj_read_end(zram->mem_pool, handle, src); return 0; } @@ -1583,11 +1583,11 @@ static int read_compressed_page(struct zram *zram, struct page *page, u32 index) prio = zram_get_priority(zram, index); zstrm = zcomp_stream_get(zram->comps[prio]); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); + src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); dst = kmap_local_page(page); ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); kunmap_local(dst); - zs_unmap_object(zram->mem_pool, handle); + zs_obj_read_end(zram->mem_pool, handle, src); zcomp_stream_put(zstrm); return ret; @@ -1683,7 +1683,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page, u32 index) { unsigned long handle; - void *src, *dst; + void *src; /* * This function is called from preemptible context so we don't need @@ -1701,11 +1701,9 @@ static int write_incompressible_page(struct zram *zram, struct page *page, return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); src = kmap_local_page(page); - memcpy(dst, src, PAGE_SIZE); + zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); kunmap_local(src); - zs_unmap_object(zram->mem_pool, handle); zram_slot_lock(zram, index); zram_set_flag(zram, index, ZRAM_HUGE); @@ -1726,7 +1724,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) int ret = 0; unsigned long handle; unsigned int comp_len; - void *dst, *mem; + void *mem; struct zcomp_strm *zstrm; unsigned long element; bool same_filled; @@ -1773,11 +1771,8 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); - - memcpy(dst, zstrm->buffer, comp_len); + zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); zcomp_stream_put(zstrm); - zs_unmap_object(zram->mem_pool, handle); zram_slot_lock(zram, index); zram_set_handle(zram, index, handle); @@ -1882,7 +1877,7 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, unsigned int comp_len_new; unsigned int class_index_old; unsigned int class_index_new; - void *src, *dst; + void *src; int ret = 0; handle_old = zram_get_handle(zram, index); @@ -1993,12 +1988,9 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, return PTR_ERR((void *)handle_new); } - dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); - memcpy(dst, zstrm->buffer, comp_len_new); + zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); zcomp_stream_put(zstrm); - zs_unmap_object(zram->mem_pool, handle_new); - zram_free_page(zram, index); zram_set_handle(zram, index, handle_new); zram_set_obj_size(zram, index, comp_len_new); From f66140eb71053c0a444421c7f04c7aecc4e31716 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:25 +0900 Subject: [PATCH 0997/2157] zram: permit reclaim in zstd custom allocator When configured with pre-trained compression/decompression dictionary support, zstd requires custom memory allocator, which it calls internally from compression()/decompression() routines. That means allocation from atomic context (either under entry spin-lock, or per-CPU local-lock or both). Now, with non-atomic zram read()/write(), those limitations are relaxed and we can allow direct and indirect reclaim. Link: https://lkml.kernel.org/r/20250303022425.285971-17-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/backend_zstd.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/block/zram/backend_zstd.c b/drivers/block/zram/backend_zstd.c index 1184c0036f44..22c8067536f3 100644 --- a/drivers/block/zram/backend_zstd.c +++ b/drivers/block/zram/backend_zstd.c @@ -24,19 +24,10 @@ struct zstd_params { /* * For C/D dictionaries we need to provide zstd with zstd_custom_mem, * which zstd uses internally to allocate/free memory when needed. - * - * This means that allocator.customAlloc() can be called from zcomp_compress() - * under local-lock (per-CPU compression stream), in which case we must use - * GFP_ATOMIC. - * - * Another complication here is that we can be configured as a swap device. */ static void *zstd_custom_alloc(void *opaque, size_t size) { - if (!preemptible()) - return kvzalloc(size, GFP_ATOMIC); - - return kvzalloc(size, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN); + return kvzalloc(size, GFP_NOIO | __GFP_NOWARN); } static void zstd_custom_free(void *opaque, void *address) From 5b683d4e987d2a9067d9146d8724b6ae1633519e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:26 +0900 Subject: [PATCH 0998/2157] zram: do not leak page on recompress_store error path Ensure the page used for local object data is freed on error out path. Link: https://lkml.kernel.org/r/20250303022425.285971-18-senozhatsky@chromium.org Fixes: 3f909a60cec1 ("zram: rework recompress target selection strategy") Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 62aef12417a4..e50a5a216974 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2013,7 +2013,7 @@ static ssize_t recompress_store(struct device *dev, struct zram_pp_slot *pps; u32 mode = 0, threshold = 0; u32 prio, prio_max; - struct page *page; + struct page *page = NULL; ssize_t ret; prio = ZRAM_SECONDARY_COMP; @@ -2157,9 +2157,9 @@ static ssize_t recompress_store(struct device *dev, cond_resched(); } - __free_page(page); - release_init_lock: + if (page) + __free_page(page); release_pp_ctl(zram, ctl); atomic_set(&zram->pp_in_progress, 0); up_read(&zram->init_lock); From a6d2193b3ef53d5c5a2c56d397227f5e891f2df3 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:27 +0900 Subject: [PATCH 0999/2157] zram: do not leak page on writeback_store error path Ensure the page used for local object data is freed on error out path. Link: https://lkml.kernel.org/r/20250303022425.285971-19-senozhatsky@chromium.org Fixes: 330edc2bc059 (zram: rework writeback target selection strategy) Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e50a5a216974..fda7d8624889 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -787,7 +787,7 @@ static ssize_t writeback_store(struct device *dev, unsigned long index = 0; struct bio bio; struct bio_vec bio_vec; - struct page *page; + struct page *page = NULL; ssize_t ret = len; int mode, err; unsigned long blk_idx = 0; @@ -929,8 +929,10 @@ static ssize_t writeback_store(struct device *dev, if (blk_idx) free_block_bdev(zram, blk_idx); - __free_page(page); + release_init_lock: + if (page) + __free_page(page); release_pp_ctl(zram, ctl); atomic_set(&zram->pp_in_progress, 0); up_read(&zram->init_lock); From 2ad951865aa7849f203a771d59ec1a0f1356a34c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 3 Mar 2025 11:03:28 +0900 Subject: [PATCH 1000/2157] zram: add might_sleep to zcomp API Explicitly state that zcomp compress/decompress must be called from non-atomic context. Link: https://lkml.kernel.org/r/20250303022425.285971-20-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Cc: Hillf Danton Cc: Kairui Song Cc: Minchan Kim Cc: Sebastian Andrzej Siewior Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- drivers/block/zram/zcomp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index a1d627054bb1..d26a58c67e95 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -146,6 +146,7 @@ int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, }; int ret; + might_sleep(); ret = comp->ops->compress(comp->params, &zstrm->ctx, &req); if (!ret) *dst_len = req.dst_len; @@ -162,6 +163,7 @@ int zcomp_decompress(struct zcomp *comp, struct zcomp_strm *zstrm, .dst_len = PAGE_SIZE, }; + might_sleep(); return comp->ops->decompress(comp->params, &zstrm->ctx, &req); } From 800ddf3cd74bda8061105cab7cc169b9a2667f44 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:12 +0000 Subject: [PATCH 1001/2157] selftests/mm: report errno when things fail in gup_longterm Patch series "selftests/mm: Some cleanups from trying to run them", v4. I never had much luck running mm selftests so I spent a few hours digging into why. Looks like most of the reason is missing SKIP checks, so this series is just adding a bunch of those that I found. I did not do anything like all of them, just the ones I spotted in gup_longterm, gup_test, mmap, userfaultfd and memfd_secret. It's a bit unfortunate to have to skip those tests when ftruncate() fails, but I don't have time to dig deep enough into it to actually make them pass. I have observed the issue on 9pfs and heard rumours that NFS has a similar problem. I'm now able to run these test groups successfully: - mmap - gup_test - compaction - migration - page_frag - userfaultfd - mlock I've never gone past "Waiting for hugetlb memory to get depleted", in the hugetlb tests. I don't know if they are stuck or if they would eventually work if I was patient enough (testing on a 1G machine). I have not investigated further. I had some issues with mlock tests failing due to -ENOSRCH from mlock2(), I can no longer reproduce that though, things work OK now. Of the remaining tests there may be others that work fine, but there's no convenient way to survey the whole output of run_vmtests.sh so I'm just going test by test here. In my spare moments I am slowly chipping away at a setup to run these tests continuously in a reasonably hermetic QEMU environment via virtme-ng: https://github.com/bjackman/linux/blob/5fad4b9c592290f38e0f8bc73c9abb9c99d8787c/README.md Hopefully that will eventually offer a way to provide a "canned" environment where the tests are known to work, which can be fairly easily reproduced by any developer. This patch (of 12): Just reporting failure doesn't tell you what went wrong. This can fail in different ways so report errno to help the reader get started debugging. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-0-dec210a658f5@google.com Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-1-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 37 +++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 9423ad439a61..15335820656b 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -96,13 +96,13 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) int ret; if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed\n"); + ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); return; } if (fallocate(fd, 0, 0, size)) { if (size == pagesize) - ksft_test_result_fail("fallocate() failed\n"); + ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); else ksft_test_result_skip("need more free huge pages\n"); return; @@ -112,7 +112,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed\n"); + ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); else ksft_test_result_skip("need more free huge pages\n"); return; @@ -130,7 +130,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); goto munmap; } /* FALLTHROUGH */ @@ -165,18 +165,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); break; } else if (ret && errno == EFAULT) { ksft_test_result(!should_work, "Should have failed\n"); break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); break; } if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", + strerror(errno)); /* * TODO: if the kernel ever supports long-term R/W pinning on @@ -202,7 +204,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); break; } /* @@ -215,13 +218,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed\n"); + ksft_test_result(!should_work, "Should have failed (%s)\n", + strerror(errno)); } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); } else { ksft_test_result(should_work, "Should have worked\n"); io_uring_unregister_buffers(&ring); @@ -249,7 +254,7 @@ static void run_with_memfd(test_fn fn, const char *desc) fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); return; } @@ -266,13 +271,13 @@ static void run_with_tmpfile(test_fn fn, const char *desc) file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_fail("fileno() failed\n"); + ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); goto close; } @@ -290,12 +295,12 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) fd = mkstemp(filename); if (fd < 0) { - ksft_test_result_fail("mkstemp() failed\n"); + ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); return; } if (unlink(filename)) { - ksft_test_result_fail("unlink() failed\n"); + ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); goto close; } @@ -317,7 +322,7 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); return; } From 0046dbed80e67f57014bdfdcabd7a8ae5e73824a Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:13 +0000 Subject: [PATCH 1002/2157] selftests/mm: skip uffd-stress if userfaultfd not available It's pretty obvious that the test wouldn't work if you don't have the feature enabled. But, it's still useful to SKIP instead of failing so the reader can immediately tell that this is the reason why. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-2-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 944d559ade21..91174e9425cd 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -412,8 +412,8 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (uffd_get_features(&features)) - err("failed to get available features"); + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); From f4b3e6c7f14c3e84c4faf228868a62289efed22b Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:14 +0000 Subject: [PATCH 1003/2157] selftests/mm: skip uffd-wp-mremap if userfaultfd not available It's obvious that this should fail in that case, but still, save the reader the effort of figuring out that they've run into this by just SKIPping Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-3-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-wp-mremap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index 2c4f984bd73c..c2ba7d46c7b4 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -182,7 +182,10 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb /* Register range for uffd-wp. */ if (userfaultfd_open(&features)) { - ksft_test_result_fail("userfaultfd_open() failed\n"); + if (errno == ENOENT) + ksft_test_result_skip("userfaultfd not available\n"); + else + ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } if (uffd_register(uffd, mem, size, false, true, false)) { From f3b5535abce9f05318ee52de7f0a97be58d032a0 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:15 +0000 Subject: [PATCH 1004/2157] selftests/mm/uffd: rename nr_cpus -> nr_parallel A later commit will bound this variable so it no longer necessarily matches the number of CPUs. Rename it appropriately. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-4-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-common.c | 8 +++--- tools/testing/selftests/mm/uffd-common.h | 2 +- tools/testing/selftests/mm/uffd-stress.c | 28 ++++++++++---------- tools/testing/selftests/mm/uffd-unit-tests.c | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 5457a078690d..a37088a23ffe 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -10,7 +10,7 @@ #define BASE_PMD_ADDR ((void *)(1UL << 30)) volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; int uffd = -1, uffd_flags, finished, *pipefd, test_type; bool map_shared; @@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void) size_t i; if (pipefd) { - for (i = 0; i < nr_cpus * 2; ++i) { + for (i = 0; i < nr_parallel * 2; ++i) { if (close(pipefd[i])) err("close pipefd"); } @@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) */ uffd_test_ops->release_pages(area_dst); - pipefd = malloc(sizeof(int) * nr_cpus * 2); + pipefd = malloc(sizeof(int) * nr_parallel * 2); if (!pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index a70ae10b5f62..7700cbfa3975 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -98,7 +98,7 @@ struct uffd_test_case_ops { }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; extern bool map_shared; diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 91174e9425cd..d6b57e5a2e1d 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -180,12 +180,12 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; + pthread_t locking_threads[nr_parallel]; + pthread_t uffd_threads[nr_parallel]; + pthread_t background_threads[nr_parallel]; finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, locking_thread, (void *)cpu)) return 1; @@ -203,7 +203,7 @@ static int stress(struct uffd_args *args) background_thread, (void *)cpu)) return 1; } - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -219,11 +219,11 @@ static int stress(struct uffd_args *args) uffd_test_ops->release_pages(area_src); finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { if (write(pipefd[cpu*2+1], &c, 1) != 1) @@ -246,11 +246,11 @@ static int userfaultfd_stress(void) { void *area; unsigned long nr; - struct uffd_args args[nr_cpus]; + struct uffd_args args[nr_parallel]; uint64_t mem_size = nr_pages * page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + memset(args, 0, sizeof(struct uffd_args) * nr_parallel); if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; @@ -325,7 +325,7 @@ static int userfaultfd_stress(void) */ uffd_test_ops->release_pages(area_dst); - uffd_stats_reset(args, nr_cpus); + uffd_stats_reset(args, nr_parallel); /* bounce pass */ if (stress(args)) { @@ -359,7 +359,7 @@ static int userfaultfd_stress(void) swap(area_src_alias, area_dst_alias); - uffd_stats_report(args, nr_cpus); + uffd_stats_report(args, nr_parallel); } uffd_test_ctx_clear(); @@ -453,9 +453,9 @@ int main(int argc, char **argv) return KSFT_SKIP; } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + nr_parallel = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = bytes / page_size / nr_cpus; + nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { _err("invalid MiB"); usage(); @@ -466,7 +466,7 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_cpus; + nr_pages = nr_pages_per_cpu * nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 74c8bc02b506..24ea82ee2231 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -198,7 +198,7 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, nr_pages = UFFD_TEST_MEM_SIZE / page_size; /* TODO: remove this global var.. it's so ugly */ - nr_cpus = 1; + nr_parallel = 1; /* Initialize test arguments */ args->mem_type = mem_type; From db0f1c138f18296e9c1c91619a0517c05ee50f1b Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:16 +0000 Subject: [PATCH 1005/2157] selftests/mm: print some details when uffd-stress gets bad params So this can be debugged more easily. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-5-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index d6b57e5a2e1d..4ba5bf13a010 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -457,7 +457,8 @@ int main(int argc, char **argv) nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { - _err("invalid MiB"); + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, page_size, nr_parallel); usage(); } From bf6d575e24ee91f7ba8a752c0354bb00db1d3bf2 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:17 +0000 Subject: [PATCH 1006/2157] selftests/mm: don't fail uffd-stress if too many CPUs This calculation divides a fixed parameter by an environment-dependent parameter i.e. the number of CPUs. The simple way to avoid machine-specific failures here is to just put a cap on the max value of the latter. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-6-dec210a658f5@google.com Signed-off-by: Brendan Jackman Suggested-by: Mateusz Guzik Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 4ba5bf13a010..40af7f67c407 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -435,6 +435,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + unsigned long nr_cpus; size_t bytes; if (argc < 4) @@ -453,7 +454,15 @@ int main(int argc, char **argv) return KSFT_SKIP; } - nr_parallel = sysconf(_SC_NPROCESSORS_ONLN); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + nr_parallel = 32; + } else { + nr_parallel = nr_cpus; + } nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { From 571a4b62ed63cace383619b0b4ef0c7e012237e1 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:18 +0000 Subject: [PATCH 1007/2157] selftests/mm: skip map_populate on weird filesystems It seems that 9pfs does not allow truncating unlinked files, Mark Brown has noted that NFS may also behave this way. It doesn't seem quite right to call this a "bug" but it's probably a special enough case that it makes sense for the test to just SKIP if it happens. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-7-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/map_populate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 5c8a53869b1b..433e54fb634f 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -87,6 +87,13 @@ int main(int argc, char **argv) BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); + if (ret < 0 && errno == ENOENT) { + /* + * This probably means tmpfile() made a file on a filesystem + * that doesn't handle temporary files the way we want. + */ + ksft_exit_skip("ftruncate(fileno(tmpfile())) gave ENOENT, weird filesystem?\n"); + } BUG_ON(ret, "ftruncate()"); smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, From 32b42970e8614c0b8652fcd441acec937bc2595e Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:19 +0000 Subject: [PATCH 1008/2157] selftests/mm: skip gup_longterm tests on weird filesystems Some filesystems don't support ftruncate()ing unlinked files. They return ENOENT. In that case, skip the test. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-8-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 15335820656b..03271442aae5 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -96,7 +96,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) int ret; if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + if (errno == ENOENT) { + /* + * This can happen if the file has been unlinked and the + * filesystem doesn't support truncating unlinked files. + */ + ksft_test_result_skip("ftruncate() failed with ENOENT\n"); + } else { + ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + } return; } From e9269b2cc403b7681980e7219cf2dc339fca8d38 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:20 +0000 Subject: [PATCH 1009/2157] selftests/mm: drop unnecessary sudo usage This script must be run as root anyway (see all the writing to privileged files in /proc etc). Remove the unnecessary use of sudo to avoid breaking on single-user systems that don't have sudo. This also avoids confusing readers. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-9-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 4b5e45a10219..31a576d70b57 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -402,7 +402,7 @@ CATEGORY="madv_populate" run_test ./madv_populate if [ -x ./memfd_secret ] then -(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret fi From f896c6de833342bda61b8fe39f612023f7daf2a5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:21 +0000 Subject: [PATCH 1010/2157] selftests/mm: ensure uffd-wp-mremap gets pages of each size This test allocates a page of every available size and doesn't have any SKIP logic if the allocation fails. So, ensure it's available and skip the test if we can't do so. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-10-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 31a576d70b57..e1c20dcf8486 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -325,9 +325,30 @@ CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 3 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 -CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +# uffd-wp-mremap requires at least one page of each size. +have_all_size_hugepgs=true +declare -A nr_size_hugepgs +for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do + old=$(cat $f) + nr_size_hugepgs["$f"]="$old" + if [ "$old" == 0 ]; then + echo 1 > "$f" + fi + if [ $(cat "$f") == 0 ]; then + have_all_size_hugepgs=false + break + fi +done +if $have_all_size_hugepgs; then + CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +else + echo "# SKIP ./uffd-wp-mremap" +fi #cleanup +for f in "${!nr_size_hugepgs[@]}"; do + echo "${nr_size_hugepgs["$f"]}" > "$f" +done echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test From 5d2146a3354f8eeb1f9f9581ee9a40e0a9d2c714 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:22 +0000 Subject: [PATCH 1011/2157] selftests/mm: skip mlock tests if nobody user can't read it If running from a directory that can't be read by unprivileged users, executing on-fault-test via the nobody user will fail. The kselftest build does give the file the correct permissions, but after being installed it might be in a directory without global execute permissions. Since the script can't safely fix that, just skip if it happens. Note that the stderr of the `ls` command is unfiltered meaning the user sees a "permission denied" error that can help inform them why the test was skipped. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-11-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index e1c20dcf8486..9aff33b10999 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -353,7 +353,7 @@ echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -if command -v sudo &> /dev/null; +if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null; then CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit else From 1ddae9d67ee11886f9a35b78ad837eb26559e9ab Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:23 +0000 Subject: [PATCH 1012/2157] selftests/mm/mlock: print error on failure It's not really possible to start diagnosing this without knowing the actual error. Also update the mlock2 helper to behave like libc would by setting errno and returning -1. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-12-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mlock-random-test.c | 4 ++-- tools/testing/selftests/mm/mlock2.h | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 1cd80b0f76c3..b8d7e966f44c 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size) MLOCK_ONFAULT); if (ret) - ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n", is_mlock ? "mlock" : "mlock2", - p, alloc_size, + strerror(errno), p, alloc_size, p + start_offset, lock_size); } diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..81e77fa41901 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -6,7 +6,13 @@ static int mlock2_(void *start, size_t len, int flags) { - return syscall(__NR_mlock2, start, len, flags); + int ret = syscall(__NR_mlock2, start, len, flags); + + if (ret) { + errno = ret; + return -1; + } + return 0; } static FILE *seek_to_smaps_entry(unsigned long addr) From 43e9bbc3bb19893377364379e224c35db0256b88 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:48 +0800 Subject: [PATCH 1013/2157] mm, swap: remove setting SWAP_MAP_BAD for discard cluster Before alloc from a cluster, we will aqcuire cluster's lock and make sure it is usable by cluster_is_usable(), so there is no need to set SWAP_MAP_BAD for cluster to be discarded. Link: https://lkml.kernel.org/r/20250222160850.505274-5-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index cab68e57f4cc..80e4ad24fe53 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -479,15 +479,6 @@ static void move_cluster(struct swap_info_struct *si, static void swap_cluster_schedule_discard(struct swap_info_struct *si, struct swap_cluster_info *ci) { - unsigned int idx = cluster_index(si, ci); - /* - * If scan_swap_map_slots() can't find a free cluster, it will check - * si->swap_map directly. To make sure the discarding cluster isn't - * taken by scan_swap_map_slots(), mark the swap entries bad (occupied). - * It will be cleared after discard - */ - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - SWAP_MAP_BAD, SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); schedule_work(&si->discard_work); @@ -571,8 +562,6 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si) * return the cluster to allocation list. */ ci->flags = CLUSTER_FLAG_NONE; - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - 0, SWAPFILE_CLUSTER); __free_cluster(si, ci); spin_unlock(&ci->lock); ret = true; From 2310f0894225024397dfa193ccfc69b74366072e Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:49 +0800 Subject: [PATCH 1014/2157] mm, swap: correct comment in swap_usage_sub() We will add si back to plist in swap_usage_sub(), just correct the wrong comment which says we will remove si from plist in swap_usage_sub(). Link: https://lkml.kernel.org/r/20250222160850.505274-6-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 80e4ad24fe53..dc9f93b66f69 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1098,7 +1098,7 @@ static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries) /* * If device is not full, and SWAP_USAGE_OFFLIST_BIT is set, - * remove it from the plist. + * add it to the plist. */ if (unlikely(val & SWAP_USAGE_OFFLIST_BIT)) add_to_avail_list(si, false); From 0a8a5b6c4129e61070eb9a45979c395fb6ab31c4 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:50 +0800 Subject: [PATCH 1015/2157] mm: swap: remove stale comment of swap_reclaim_full_clusters() swap_reclaim_full_clusters() has no return value now, just remove the stale comment which says swap_reclaim_full_clusters() wil return a bool value. Link: https://lkml.kernel.org/r/20250222160850.505274-7-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index dc9f93b66f69..a7f60006c52c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -820,7 +820,6 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, return found; } -/* Return true if reclaimed a whole cluster */ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) { long to_scan = 1; From 8e2f2aeb8b48aceef6e6c07b2d9bede4eaa50c06 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 21 Feb 2025 12:05:22 +0000 Subject: [PATCH 1016/2157] fs/proc/task_mmu: add guard region bit to pagemap Patch series "fs/proc/task_mmu: add guard region bit to pagemap". Currently there is no means of determining whether a given page in a mapping range is designated a guard region (as installed via madvise() using the MADV_GUARD_INSTALL flag). This is generally not an issue, but in some instances users may wish to determine whether this is the case. This series adds this ability via /proc/$pid/pagemap, updates the documentation and adds a self test to assert that this functions correctly. This patch (of 2): Currently there is no means by which users can determine whether a given page in memory is in fact a guard region, that is having had the MADV_GUARD_INSTALL madvise() flag applied to it. This is intentional, as to provide this information in VMA metadata would contradict the intent of the feature (providing a means to change fault behaviour at a page table level rather than a VMA level), and would require VMA metadata operations to scan page tables, which is unacceptable. In many cases, users have no need to reflect and determine what regions have been designated guard regions, as it is the user who has established them in the first place. But in some instances, such as monitoring software, or software that relies upon being able to ascertain the nature of mappings within a remote process for instance, it becomes useful to be able to determine which pages have the guard region marker applied. This patch makes use of an unused pagemap bit (58) to provide this information. This patch updates the documentation at the same time as making the change such that the implementation of the feature and the documentation of it are tied together. Link: https://lkml.kernel.org/r/cover.1740139449.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/521d99c08b975fb06a1e7201e971cc24d68196d1.1740139449.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jonathan Corbet Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcow (Oracle) Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/pagemap.rst | 3 ++- fs/proc/task_mmu.c | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index caba0f52dd36..a297e824f990 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -21,7 +21,8 @@ There are four components to pagemap: * Bit 56 page exclusively mapped (since 4.2) * Bit 57 pte is uffd-wp write-protected (since 5.13) (see Documentation/admin-guide/mm/userfaultfd.rst) - * Bits 58-60 zero + * Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page) + * Bits 59-60 zero * Bit 61 page is file-page or shared-anon (since 3.5) * Bit 62 page swapped * Bit 63 page present diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f02cd362309a..c17615e21a5d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1632,6 +1632,7 @@ struct pagemapread { #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -1732,6 +1733,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; + if (is_guard_swp_entry(entry)) + flags |= PM_GUARD_REGION; } if (page) { @@ -1931,7 +1934,8 @@ static const struct mm_walk_ops pagemap_ops = { * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bit 57 pte is uffd-wp write-protected - * Bits 58-60 zero + * Bit 58 pte is a guard region + * Bits 59-60 zero * Bit 61 page is file-page or shared-anon * Bit 62 page swapped * Bit 63 page present From f3b92176f4f7100f7e150975f0378f31ea5ce040 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 21 Feb 2025 12:05:23 +0000 Subject: [PATCH 1017/2157] tools/selftests: add guard region test for /proc/$pid/pagemap Add a test to the guard region self tests to assert that the /proc/$pid/pagemap information now made availabile to the user correctly identifies and reports guard regions. As a part of this change, update vm_util.h to add the new bit (note there is no header file in the kernel where this is exposed, the user is expected to provide their own mask) and utilise the helper functions there for pagemap functionality. [lorenzo.stoakes@oracle.com: fixup define name] Link: https://lkml.kernel.org/r/32e83941-e6f5-42ee-9292-a44c16463cf1@lucifer.local Link: https://lkml.kernel.org/r/164feb0a43ae72650e6b20c3910213f469566311.1740139449.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Jonathan Corbet Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcow (Oracle) Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 47 ++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 1 + 2 files changed, 48 insertions(+) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index ea9b5815e828..280d1831bf73 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -19,6 +19,7 @@ #include #include #include +#include "vm_util.h" /* * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: @@ -2032,4 +2033,50 @@ TEST_F(guard_regions, anon_zeropage) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Assert that /proc/$pid/pagemap correctly identifies guard region ranges. + */ +TEST_F(guard_regions, pagemap) +{ + const unsigned long page_size = self->page_size; + int proc_fd; + char *ptr; + int i; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Read from pagemap, and assert no guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, 0); + } + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b60ac68a9dc8..0e629586556b 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -10,6 +10,7 @@ #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) From c2f6ea38fc1b640aa7a2e155cc1c0410ff91afa2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 Feb 2025 19:08:24 -0500 Subject: [PATCH 1018/2157] mm: page_alloc: don't steal single pages from biggest buddy The fallback code searches for the biggest buddy first in an attempt to steal the whole block and encourage type grouping down the line. The approach used to be this: - Non-movable requests will split the largest buddy and steal the remainder. This splits up contiguity, but it allows subsequent requests of this type to fall back into adjacent space. - Movable requests go and look for the smallest buddy instead. The thinking is that movable requests can be compacted, so grouping is less important than retaining contiguity. c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion") enforces freelist type hygiene, which restricts stealing to either claiming the whole block or just taking the requested chunk; no additional pages or buddy remainders can be stolen any more. The patch mishandled when to switch to finding the smallest buddy in that new reality. As a result, it may steal the exact request size, but from the biggest buddy. This causes fracturing for no good reason. Fix this by committing to the new behavior: either steal the whole block, or fall back to the smallest buddy. Remove single-page stealing from steal_suitable_fallback(). Rename it to try_to_steal_block() to make the intentions clear. If this fails, always fall back to the smallest buddy. The following is from 4 runs of mmtest's thpchallenge. "Pollute" is single page fallback, "steal" is conversion of a partially used block. The numbers for free block conversions (omitted) are comparable. vanilla patched @pollute[unmovable from reclaimable]: 27 106 @pollute[unmovable from movable]: 82 46 @pollute[reclaimable from unmovable]: 256 83 @pollute[reclaimable from movable]: 46 8 @pollute[movable from unmovable]: 4841 868 @pollute[movable from reclaimable]: 5278 12568 @steal[unmovable from reclaimable]: 11 12 @steal[unmovable from movable]: 113 49 @steal[reclaimable from unmovable]: 19 34 @steal[reclaimable from movable]: 47 21 @steal[movable from unmovable]: 250 183 @steal[movable from reclaimable]: 81 93 The allocator appears to do a better job at keeping stealing and polluting to the first fallback preference. As a result, the numbers for "from movable" - the least preferred fallback option, and most detrimental to compactability - are down across the board. Link: https://lkml.kernel.org/r/20250225001023.1494422-2-hannes@cmpxchg.org Fixes: c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion") Signed-off-by: Johannes Weiner Suggested-by: Vlastimil Babka Reviewed-by: Brendan Jackman Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/page_alloc.c | 80 +++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d875f055aa53..462f0e5342e5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1986,13 +1986,12 @@ static inline bool boost_watermark(struct zone *zone) * can claim the whole pageblock for the requested migratetype. If not, we check * the pageblock for constituent pages; if at least half of the pages are free * or compatible, we can still claim the whole block, so pages freed in the - * future will be put on the correct free list. Otherwise, we isolate exactly - * the order we need from the fallback block and leave its migratetype alone. + * future will be put on the correct free list. */ static struct page * -steal_suitable_fallback(struct zone *zone, struct page *page, - int current_order, int order, int start_type, - unsigned int alloc_flags, bool whole_block) +try_to_steal_block(struct zone *zone, struct page *page, + int current_order, int order, int start_type, + unsigned int alloc_flags) { int free_pages, movable_pages, alike_pages; unsigned long start_pfn; @@ -2005,7 +2004,7 @@ steal_suitable_fallback(struct zone *zone, struct page *page, * highatomic accounting. */ if (is_migrate_highatomic(block_type)) - goto single_page; + return NULL; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { @@ -2026,14 +2025,10 @@ steal_suitable_fallback(struct zone *zone, struct page *page, if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD)) set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); - /* We are not allowed to try stealing from the whole block */ - if (!whole_block) - goto single_page; - /* moving whole block can fail due to zone boundary conditions */ if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages, &movable_pages)) - goto single_page; + return NULL; /* * Determine how many pages are compatible with our allocation. @@ -2066,9 +2061,7 @@ steal_suitable_fallback(struct zone *zone, struct page *page, return __rmqueue_smallest(zone, order, start_type); } -single_page: - page_del_and_expand(zone, page, order, current_order, block_type); - return page; + return NULL; } /* @@ -2250,14 +2243,19 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, } /* - * Try finding a free buddy page on the fallback list and put it on the free - * list of requested migratetype, possibly along with other pages from the same - * block, depending on fragmentation avoidance heuristics. Returns true if - * fallback was found so that __rmqueue_smallest() can grab it. + * Try finding a free buddy page on the fallback list. + * + * This will attempt to steal a whole pageblock for the requested type + * to ensure grouping of such requests in the future. + * + * If a whole block cannot be stolen, regress to __rmqueue_smallest() + * logic to at least break up as little contiguity as possible. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop * condition simpler. + * + * Return the stolen page, or NULL if none can be found. */ static __always_inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, @@ -2291,45 +2289,35 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, if (fallback_mt == -1) continue; - /* - * We cannot steal all free pages from the pageblock and the - * requested migratetype is movable. In that case it's better to - * steal and split the smallest available page instead of the - * largest available page, because even if the next movable - * allocation falls back into a different pageblock than this - * one, it won't cause permanent fragmentation. - */ - if (!can_steal && start_migratetype == MIGRATE_MOVABLE - && current_order > order) - goto find_smallest; + if (!can_steal) + break; - goto do_steal; + page = get_page_from_free_area(area, fallback_mt); + page = try_to_steal_block(zone, page, current_order, order, + start_migratetype, alloc_flags); + if (page) + goto got_one; } - return NULL; + if (alloc_flags & ALLOC_NOFRAGMENT) + return NULL; -find_smallest: + /* No luck stealing blocks. Find the smallest fallback page */ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, start_migratetype, false, &can_steal); - if (fallback_mt != -1) - break; + if (fallback_mt == -1) + continue; + + page = get_page_from_free_area(area, fallback_mt); + page_del_and_expand(zone, page, order, current_order, fallback_mt); + goto got_one; } - /* - * This should not happen - we already found a suitable fallback - * when looking for the largest page. - */ - VM_BUG_ON(current_order > MAX_PAGE_ORDER); - -do_steal: - page = get_page_from_free_area(area, fallback_mt); - - /* take off list, maybe claim block, expand remainder */ - page = steal_suitable_fallback(zone, page, current_order, order, - start_migratetype, alloc_flags, can_steal); + return NULL; +got_one: trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, fallback_mt); From 020396a581dc69be2d30939fabde6c029d847034 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 Feb 2025 19:08:25 -0500 Subject: [PATCH 1019/2157] mm: page_alloc: remove remnants of unlocked migratetype updates The freelist hygiene patches made migratetype accesses fully protected under the zone->lock. Remove remnants of handling the race conditions that existed before from the MIGRATE_HIGHATOMIC code. Link: https://lkml.kernel.org/r/20250225001023.1494422-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Brendan Jackman Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/page_alloc.c | 50 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 462f0e5342e5..9e6f0db6c79f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1991,20 +1991,10 @@ static inline bool boost_watermark(struct zone *zone) static struct page * try_to_steal_block(struct zone *zone, struct page *page, int current_order, int order, int start_type, - unsigned int alloc_flags) + int block_type, unsigned int alloc_flags) { int free_pages, movable_pages, alike_pages; unsigned long start_pfn; - int block_type; - - block_type = get_pageblock_migratetype(page); - - /* - * This can happen due to races and we want to prevent broken - * highatomic accounting. - */ - if (is_migrate_highatomic(block_type)) - return NULL; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { @@ -2179,33 +2169,22 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &(zone->free_area[order]); - int mt; + unsigned long size; page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); if (!page) continue; - mt = get_pageblock_migratetype(page); /* - * In page freeing path, migratetype change is racy so - * we can counter several free pages in a pageblock - * in this loop although we changed the pageblock type - * from highatomic to ac->migratetype. So we should - * adjust the count once. + * It should never happen but changes to + * locking could inadvertently allow a per-cpu + * drain to add pages to MIGRATE_HIGHATOMIC + * while unreserving so be safe and watch for + * underflows. */ - if (is_migrate_highatomic(mt)) { - unsigned long size; - /* - * It should never happen but changes to - * locking could inadvertently allow a per-cpu - * drain to add pages to MIGRATE_HIGHATOMIC - * while unreserving so be safe and watch for - * underflows. - */ - size = max(pageblock_nr_pages, 1UL << order); - size = min(size, zone->nr_reserved_highatomic); - zone->nr_reserved_highatomic -= size; - } + size = max(pageblock_nr_pages, 1UL << order); + size = min(size, zone->nr_reserved_highatomic); + zone->nr_reserved_highatomic -= size; /* * Convert to ac->migratetype and avoid the normal @@ -2217,10 +2196,12 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * may increase. */ if (order < pageblock_order) - ret = move_freepages_block(zone, page, mt, + ret = move_freepages_block(zone, page, + MIGRATE_HIGHATOMIC, ac->migratetype); else { - move_to_free_list(page, zone, order, mt, + move_to_free_list(page, zone, order, + MIGRATE_HIGHATOMIC, ac->migratetype); change_pageblock_range(page, order, ac->migratetype); @@ -2294,7 +2275,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = get_page_from_free_area(area, fallback_mt); page = try_to_steal_block(zone, page, current_order, order, - start_migratetype, alloc_flags); + start_migratetype, fallback_mt, + alloc_flags); if (page) goto got_one; } From a4138a2702a4428317ecdb115934554df4b788b4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 Feb 2025 19:08:26 -0500 Subject: [PATCH 1020/2157] mm: page_alloc: group fallback functions together The way the fallback rules are spread out makes them hard to follow. Move the functions next to each other at least. Link: https://lkml.kernel.org/r/20250225001023.1494422-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Brendan Jackman Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/page_alloc.c | 394 ++++++++++++++++++++++++------------------------ 1 file changed, 197 insertions(+), 197 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9e6f0db6c79f..945437d7ac44 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1903,6 +1903,43 @@ static void change_pageblock_range(struct page *pageblock_page, } } +static inline bool boost_watermark(struct zone *zone) +{ + unsigned long max_boost; + + if (!watermark_boost_factor) + return false; + /* + * Don't bother in zones that are unlikely to produce results. + * On small machines, including kdump capture kernels running + * in a small area, boosting the watermark can cause an out of + * memory situation immediately. + */ + if ((pageblock_nr_pages * 4) > zone_managed_pages(zone)) + return false; + + max_boost = mult_frac(zone->_watermark[WMARK_HIGH], + watermark_boost_factor, 10000); + + /* + * high watermark may be uninitialised if fragmentation occurs + * very early in boot so do not boost. We do not fall + * through and boost by pageblock_nr_pages as failing + * allocations that early means that reclaim is not going + * to help and it may even be impossible to reclaim the + * boosted watermark resulting in a hang. + */ + if (!max_boost) + return false; + + max_boost = max(pageblock_nr_pages, max_boost); + + zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, + max_boost); + + return true; +} + /* * When we are falling back to another migratetype during allocation, try to * steal extra free pages from the same pageblocks to satisfy further @@ -1944,41 +1981,38 @@ static bool can_steal_fallback(unsigned int order, int start_mt) return false; } -static inline bool boost_watermark(struct zone *zone) +/* + * Check whether there is a suitable fallback freepage with requested order. + * If only_stealable is true, this function returns fallback_mt only if + * we can steal other freepages all together. This would help to reduce + * fragmentation due to mixed migratetype pages in one pageblock. + */ +int find_suitable_fallback(struct free_area *area, unsigned int order, + int migratetype, bool only_stealable, bool *can_steal) { - unsigned long max_boost; + int i; + int fallback_mt; - if (!watermark_boost_factor) - return false; - /* - * Don't bother in zones that are unlikely to produce results. - * On small machines, including kdump capture kernels running - * in a small area, boosting the watermark can cause an out of - * memory situation immediately. - */ - if ((pageblock_nr_pages * 4) > zone_managed_pages(zone)) - return false; + if (area->nr_free == 0) + return -1; - max_boost = mult_frac(zone->_watermark[WMARK_HIGH], - watermark_boost_factor, 10000); + *can_steal = false; + for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) { + fallback_mt = fallbacks[migratetype][i]; + if (free_area_empty(area, fallback_mt)) + continue; - /* - * high watermark may be uninitialised if fragmentation occurs - * very early in boot so do not boost. We do not fall - * through and boost by pageblock_nr_pages as failing - * allocations that early means that reclaim is not going - * to help and it may even be impossible to reclaim the - * boosted watermark resulting in a hang. - */ - if (!max_boost) - return false; + if (can_steal_fallback(order, migratetype)) + *can_steal = true; - max_boost = max(pageblock_nr_pages, max_boost); + if (!only_stealable) + return fallback_mt; - zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, - max_boost); + if (*can_steal) + return fallback_mt; + } - return true; + return -1; } /* @@ -2054,175 +2088,6 @@ try_to_steal_block(struct zone *zone, struct page *page, return NULL; } -/* - * Check whether there is a suitable fallback freepage with requested order. - * If only_stealable is true, this function returns fallback_mt only if - * we can steal other freepages all together. This would help to reduce - * fragmentation due to mixed migratetype pages in one pageblock. - */ -int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal) -{ - int i; - int fallback_mt; - - if (area->nr_free == 0) - return -1; - - *can_steal = false; - for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) { - fallback_mt = fallbacks[migratetype][i]; - if (free_area_empty(area, fallback_mt)) - continue; - - if (can_steal_fallback(order, migratetype)) - *can_steal = true; - - if (!only_stealable) - return fallback_mt; - - if (*can_steal) - return fallback_mt; - } - - return -1; -} - -/* - * Reserve the pageblock(s) surrounding an allocation request for - * exclusive use of high-order atomic allocations if there are no - * empty page blocks that contain a page with a suitable order - */ -static void reserve_highatomic_pageblock(struct page *page, int order, - struct zone *zone) -{ - int mt; - unsigned long max_managed, flags; - - /* - * The number reserved as: minimum is 1 pageblock, maximum is - * roughly 1% of a zone. But if 1% of a zone falls below a - * pageblock size, then don't reserve any pageblocks. - * Check is race-prone but harmless. - */ - if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages) - return; - max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages); - if (zone->nr_reserved_highatomic >= max_managed) - return; - - spin_lock_irqsave(&zone->lock, flags); - - /* Recheck the nr_reserved_highatomic limit under the lock */ - if (zone->nr_reserved_highatomic >= max_managed) - goto out_unlock; - - /* Yoink! */ - mt = get_pageblock_migratetype(page); - /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (!migratetype_is_mergeable(mt)) - goto out_unlock; - - if (order < pageblock_order) { - if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) - goto out_unlock; - zone->nr_reserved_highatomic += pageblock_nr_pages; - } else { - change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); - zone->nr_reserved_highatomic += 1 << order; - } - -out_unlock: - spin_unlock_irqrestore(&zone->lock, flags); -} - -/* - * Used when an allocation is about to fail under memory pressure. This - * potentially hurts the reliability of high-order allocations when under - * intense memory pressure but failed atomic allocations should be easier - * to recover from than an OOM. - * - * If @force is true, try to unreserve pageblocks even though highatomic - * pageblock is exhausted. - */ -static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, - bool force) -{ - struct zonelist *zonelist = ac->zonelist; - unsigned long flags; - struct zoneref *z; - struct zone *zone; - struct page *page; - int order; - int ret; - - for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, - ac->nodemask) { - /* - * Preserve at least one pageblock unless memory pressure - * is really high. - */ - if (!force && zone->nr_reserved_highatomic <= - pageblock_nr_pages) - continue; - - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < NR_PAGE_ORDERS; order++) { - struct free_area *area = &(zone->free_area[order]); - unsigned long size; - - page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); - if (!page) - continue; - - /* - * It should never happen but changes to - * locking could inadvertently allow a per-cpu - * drain to add pages to MIGRATE_HIGHATOMIC - * while unreserving so be safe and watch for - * underflows. - */ - size = max(pageblock_nr_pages, 1UL << order); - size = min(size, zone->nr_reserved_highatomic); - zone->nr_reserved_highatomic -= size; - - /* - * Convert to ac->migratetype and avoid the normal - * pageblock stealing heuristics. Minimally, the caller - * is doing the work and needs the pages. More - * importantly, if the block was always converted to - * MIGRATE_UNMOVABLE or another type then the number - * of pageblocks that cannot be completely freed - * may increase. - */ - if (order < pageblock_order) - ret = move_freepages_block(zone, page, - MIGRATE_HIGHATOMIC, - ac->migratetype); - else { - move_to_free_list(page, zone, order, - MIGRATE_HIGHATOMIC, - ac->migratetype); - change_pageblock_range(page, order, - ac->migratetype); - ret = 1; - } - /* - * Reserving the block(s) already succeeded, - * so this should not fail on zone boundaries. - */ - WARN_ON_ONCE(ret == -1); - if (ret > 0) { - spin_unlock_irqrestore(&zone->lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&zone->lock, flags); - } - - return false; -} - /* * Try finding a free buddy page on the fallback list. * @@ -3143,6 +3008,141 @@ struct page *rmqueue(struct zone *preferred_zone, return page; } +/* + * Reserve the pageblock(s) surrounding an allocation request for + * exclusive use of high-order atomic allocations if there are no + * empty page blocks that contain a page with a suitable order + */ +static void reserve_highatomic_pageblock(struct page *page, int order, + struct zone *zone) +{ + int mt; + unsigned long max_managed, flags; + + /* + * The number reserved as: minimum is 1 pageblock, maximum is + * roughly 1% of a zone. But if 1% of a zone falls below a + * pageblock size, then don't reserve any pageblocks. + * Check is race-prone but harmless. + */ + if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages) + return; + max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages); + if (zone->nr_reserved_highatomic >= max_managed) + return; + + spin_lock_irqsave(&zone->lock, flags); + + /* Recheck the nr_reserved_highatomic limit under the lock */ + if (zone->nr_reserved_highatomic >= max_managed) + goto out_unlock; + + /* Yoink! */ + mt = get_pageblock_migratetype(page); + /* Only reserve normal pageblocks (i.e., they can merge with others) */ + if (!migratetype_is_mergeable(mt)) + goto out_unlock; + + if (order < pageblock_order) { + if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) + goto out_unlock; + zone->nr_reserved_highatomic += pageblock_nr_pages; + } else { + change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); + zone->nr_reserved_highatomic += 1 << order; + } + +out_unlock: + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * Used when an allocation is about to fail under memory pressure. This + * potentially hurts the reliability of high-order allocations when under + * intense memory pressure but failed atomic allocations should be easier + * to recover from than an OOM. + * + * If @force is true, try to unreserve pageblocks even though highatomic + * pageblock is exhausted. + */ +static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, + bool force) +{ + struct zonelist *zonelist = ac->zonelist; + unsigned long flags; + struct zoneref *z; + struct zone *zone; + struct page *page; + int order; + int ret; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, + ac->nodemask) { + /* + * Preserve at least one pageblock unless memory pressure + * is really high. + */ + if (!force && zone->nr_reserved_highatomic <= + pageblock_nr_pages) + continue; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < NR_PAGE_ORDERS; order++) { + struct free_area *area = &(zone->free_area[order]); + unsigned long size; + + page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); + if (!page) + continue; + + /* + * It should never happen but changes to + * locking could inadvertently allow a per-cpu + * drain to add pages to MIGRATE_HIGHATOMIC + * while unreserving so be safe and watch for + * underflows. + */ + size = max(pageblock_nr_pages, 1UL << order); + size = min(size, zone->nr_reserved_highatomic); + zone->nr_reserved_highatomic -= size; + + /* + * Convert to ac->migratetype and avoid the normal + * pageblock stealing heuristics. Minimally, the caller + * is doing the work and needs the pages. More + * importantly, if the block was always converted to + * MIGRATE_UNMOVABLE or another type then the number + * of pageblocks that cannot be completely freed + * may increase. + */ + if (order < pageblock_order) + ret = move_freepages_block(zone, page, + MIGRATE_HIGHATOMIC, + ac->migratetype); + else { + move_to_free_list(page, zone, order, + MIGRATE_HIGHATOMIC, + ac->migratetype); + change_pageblock_range(page, order, + ac->migratetype); + ret = 1; + } + /* + * Reserving the block(s) already succeeded, + * so this should not fail on zone boundaries. + */ + WARN_ON_ONCE(ret == -1); + if (ret > 0) { + spin_unlock_irqrestore(&zone->lock, flags); + return ret; + } + } + spin_unlock_irqrestore(&zone->lock, flags); + } + + return false; +} + static inline long __zone_watermark_unusable_free(struct zone *z, unsigned int order, unsigned int alloc_flags) { From 442b1eca223b4860cc85ef970ae602d125aec5a4 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 24 Feb 2025 14:14:45 -0700 Subject: [PATCH 1021/2157] mm: make page_mapped_in_vma() hugetlb walk aware When a process consumes a UE in a page, the memory failure handler attempts to collect information for a potential SIGBUS. If the page is an anonymous page, page_mapped_in_vma(page, vma) is invoked in order to 1. retrieve the vaddr from the process' address space, 2. verify that the vaddr is indeed mapped to the poisoned page, where 'page' is the precise small page with UE. It's been observed that when injecting poison to a non-head subpage of an anonymous hugetlb page, no SIGBUS shows up, while injecting to the head page produces a SIGBUS. The cause is that, though hugetlb_walk() returns a valid pmd entry (on x86), but check_pte() detects mismatch between the head page per the pmd and the input subpage. Thus the vaddr is considered not mapped to the subpage and the process is not collected for SIGBUS purpose. This is the calling stack: collect_procs_anon page_mapped_in_vma page_vma_mapped_walk hugetlb_walk huge_pte_lock check_pte check_pte() header says that it "check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is mapped at the @pvmw->pte" but practically works only if pvmw->pfn is the head page pfn at pvmw->pte. Hindsight acknowledging that some pvmw->pte could point to a hugepage of some sort such that it makes sense to make check_pte() work for hugepage. Link: https://lkml.kernel.org/r/20250224211445.2663312-1-jane.chu@oracle.com Signed-off-by: Jane Chu Cc: Hugh Dickins Cc: Kirill A. Shuemov Cc: linmiaohe Cc: Matthew Wilcow (Oracle) Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/page_vma_mapped.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 32679be22d30..e463c3be934a 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -84,6 +84,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, * mapped at the @pvmw->pte * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range * for checking + * @pte_nr: the number of small pages described by @pvmw->pte. * * page_vma_mapped_walk() found a place where pfn range is *potentially* * mapped. check_pte() has to validate this. @@ -100,7 +101,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, * Otherwise, return false. * */ -static bool check_pte(struct page_vma_mapped_walk *pvmw) +static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) { unsigned long pfn; pte_t ptent = ptep_get(pvmw->pte); @@ -132,7 +133,11 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) pfn = pte_pfn(ptent); } - return (pfn - pvmw->pfn) < pvmw->nr_pages; + if ((pfn + pte_nr - 1) < pvmw->pfn) + return false; + if (pfn > (pvmw->pfn + pvmw->nr_pages - 1)) + return false; + return true; } /* Returns true if the two ranges overlap. Careful to not overflow. */ @@ -207,7 +212,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return false; pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); - if (!check_pte(pvmw)) + if (!check_pte(pvmw, pages_per_huge_page(hstate))) return not_found(pvmw); return true; } @@ -290,7 +295,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) goto next_pte; } this_pte: - if (check_pte(pvmw)) + if (check_pte(pvmw, 1)) return true; next_pte: do { From fae85955053130be0b8b3d10e19cadcc173c7e4b Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:29 +0800 Subject: [PATCH 1022/2157] mm, swap: avoid reclaiming irrelevant swap cache Patch series "mm, swap: remove swap slot cache", v3. Slot cache was initially introduced by commit 67afa38e012e ("mm/swap: add cache for swap slots allocation") to reduce the lock contention of si->lock. Previous series "mm, swap: rework of swap allocator locks" [1] removed swap slot cache for freeing path as freeing path no longer touches si->lock in most cased. Allocation path also have slight to none contention on si->lock since that series, but slot cache still helps to reduce other overheads, like counters and the plist. This series removes the slot cache from allocation path too, by using the cluster as allocation fast path and also reduce other overheads. Now slot cache is completely gone, the code is much simplified without obvious feature or performance change, also clean up related workaround. Also this should avoid other potential issues, e.g. the long pinning of swap slots: swap slot cache pins swap slots with HAS_CACHE, causing reclaim or allocation fail to use these slots on scanning. The only behavior change is the swap device allocation rotation mechanism, as explained in the patch "mm, swap: use percpu cluster as allocation fast path". Test results are looking good after deleting the swap slot cache: - vm-scalability with: `usemem --init-time -O -y -x -R -31 1G`, 12G memory cgroup using simulated pmem as SWAP (32G pmem, 32 CPUs), 16 test runs for each case, measuring the total throughput: Before (KB/s) (stdev) After (KB/s) (stdev) Random (4K): 424907.60 (24410.78) 414745.92 (34554.78) Random (64K): 163308.82 (11635.72) 167314.50 (18434.99) Sequential (4K, !-R): 6150056.79 (103205.90) 6321469.06 (115878.16) - Build linux kernel with make -j96, using 4K folio with 1.5G memory cgroup limit and 64K folio with 2G memory cgroup limit, on top of tmpfs, 12 test runs, measuring the system time: Before (s) (stdev) After (s) (stdev) make -j96 (4K): 6445.69 (61.95) 6408.80 (69.46) make -j96 (64K): 6841.71 (409.04) 6437.99 (435.55) The performance is unchanged, slightly better in some cases. [1] https://lore.kernel.org/linux-mm/20250113175732.48099-1-ryncsn@gmail.com/ This patch (of 7): Swap allocator will do swap cache reclaim to recycle HAS_CACHE slots for allocation. It initiates the reclaim from the offset to be reclaimed and looks up the corresponding folio. The lookup process is lockless, so it's possible the folio will be removed from the swap cache and given a different swap entry before the reclaim locks the folio. If it happens, the reclaim will end up reclaiming an irrelevant folio, and return wrong return value. This shouldn't cause any problem with correctness or stability, but it is indeed confusing and unexpected, and will increase fragmentation, decrease performance. Fix this by checking whether the folio is still pointing to the offset the allocator want to reclaim before reclaiming it. Link: https://lkml.kernel.org/r/20250313165935.63303-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20250313165935.63303-2-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baoquan He Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swapfile.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index a7f60006c52c..5618cd1c4b03 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -210,6 +210,7 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, int ret, nr_pages; bool need_reclaim; +again: folio = filemap_get_folio(address_space, swap_cache_index(entry)); if (IS_ERR(folio)) return 0; @@ -227,8 +228,16 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, if (!folio_trylock(folio)) goto out; - /* offset could point to the middle of a large folio */ + /* + * Offset could point to the middle of a large folio, or folio + * may no longer point to the expected offset before it's locked. + */ entry = folio->swap; + if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) { + folio_unlock(folio); + folio_put(folio); + goto again; + } offset = swp_offset(entry); need_reclaim = ((flags & TTRS_ANYWAY) || From 3123fb0a18d6c76b536a315b88553211cd3c2b1d Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:30 +0800 Subject: [PATCH 1023/2157] mm, swap: drop the flag TTRS_DIRECT This flag exists temporarily to allow the allocator to bypass the slot cache during freeing, so reclaiming one slot will free the slot immediately. But now we have already removed slot cache usage on freeing, so this flag has no effect now. Link: https://lkml.kernel.org/r/20250313165935.63303-3-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baoquan He Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swapfile.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 5618cd1c4b03..6f2de59c6355 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -158,8 +158,6 @@ static long swap_usage_in_pages(struct swap_info_struct *si) #define TTRS_UNMAPPED 0x2 /* Reclaim the swap entry if swap is getting full */ #define TTRS_FULL 0x4 -/* Reclaim directly, bypass the slot cache and don't touch device lock */ -#define TTRS_DIRECT 0x8 static bool swap_only_has_cache(struct swap_info_struct *si, unsigned long offset, int nr_pages) @@ -257,23 +255,8 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, if (!need_reclaim) goto out_unlock; - if (!(flags & TTRS_DIRECT)) { - /* Free through slot cache */ - delete_from_swap_cache(folio); - folio_set_dirty(folio); - ret = nr_pages; - goto out_unlock; - } - - xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(folio, entry, NULL); - xa_unlock_irq(&address_space->i_pages); - folio_ref_sub(folio, nr_pages); + delete_from_swap_cache(folio); folio_set_dirty(folio); - - ci = lock_cluster(si, offset); - swap_entry_range_free(si, ci, entry, nr_pages); - unlock_cluster(ci); ret = nr_pages; out_unlock: folio_unlock(folio); @@ -697,7 +680,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si, offset++; break; case SWAP_HAS_CACHE: - nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT); + nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); if (nr_reclaim > 0) offset += nr_reclaim; else @@ -849,7 +832,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { spin_unlock(&ci->lock); nr_reclaim = __try_to_reclaim_swap(si, offset, - TTRS_ANYWAY | TTRS_DIRECT); + TTRS_ANYWAY); spin_lock(&ci->lock); if (nr_reclaim) { offset += abs(nr_reclaim); From 78524b05f1a3e16a5d00cc9c6259c41a9d6003ce Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:31 +0800 Subject: [PATCH 1024/2157] mm, swap: avoid redundant swap device pinning Currently __read_swap_cache_async() has get/put_swap_device() calls to increase/decrease a swap device reference to prevent swapoff. While some of its callers have already held the swap device reference, e.g in do_swap_page() and shmem_swapin_folio() where __read_swap_cache_async() will finally called. Now there are only two callers not holding a swap device reference, so make them hold a reference instead. And drop the get/put_swap_device calls in __read_swap_cache_async. This should reduce the overhead for swap in during page fault slightly. Link: https://lkml.kernel.org/r/20250313165935.63303-4-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baoquan He Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swap_state.c | 14 ++++++++------ mm/zswap.c | 8 +++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index a54b035d6a6c..50840a2887a5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -426,17 +426,13 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, bool skip_if_exists) { - struct swap_info_struct *si; + struct swap_info_struct *si = swp_swap_info(entry); struct folio *folio; struct folio *new_folio = NULL; struct folio *result = NULL; void *shadow = NULL; *new_page_allocated = false; - si = get_swap_device(entry); - if (!si) - return NULL; - for (;;) { int err; /* @@ -532,7 +528,6 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, put_swap_folio(new_folio, entry); folio_unlock(new_folio); put_and_return: - put_swap_device(si); if (!(*new_page_allocated) && new_folio) folio_put(new_folio); return result; @@ -552,11 +547,16 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr, struct swap_iocb **plug) { + struct swap_info_struct *si; bool page_allocated; struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; + si = get_swap_device(entry); + if (!si) + return NULL; + mpol = get_vma_policy(vma, addr, 0, &ilx); folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); @@ -564,6 +564,8 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (page_allocated) swap_read_folio(folio, plug); + + put_swap_device(si); return folio; } diff --git a/mm/zswap.c b/mm/zswap.c index 23365e76a3ce..8a1ded8fa973 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1051,14 +1051,20 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct folio *folio; struct mempolicy *mpol; bool folio_was_allocated; + struct swap_info_struct *si; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; /* try to allocate swap cache folio */ + si = get_swap_device(swpentry); + if (!si) + return -EEXIST; + mpol = get_task_policy(current); folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, - NO_INTERLEAVE_INDEX, &folio_was_allocated, true); + NO_INTERLEAVE_INDEX, &folio_was_allocated, true); + put_swap_device(si); if (!folio) return -ENOMEM; From 280cfccaa20c012f0979021939c68ada03c3d973 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:32 +0800 Subject: [PATCH 1025/2157] mm, swap: don't update the counter up-front The counter update before allocation design was useful to avoid unnecessary scan when device is full, so it will abort early if the counter indicates the device is full. But that is an uncommon case, and now scanning of a full device is very fast, so the up-front update is not helpful any more. Remove it and simplify the slot allocation logic. Link: https://lkml.kernel.org/r/20250313165935.63303-5-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baoquan He Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swapfile.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 6f2de59c6355..db836670c334 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1201,22 +1201,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) int order = swap_entry_order(entry_order); unsigned long size = 1 << order; struct swap_info_struct *si, *next; - long avail_pgs; int n_ret = 0; int node; spin_lock(&swap_avail_lock); - - avail_pgs = atomic_long_read(&nr_swap_pages) / size; - if (avail_pgs <= 0) { - spin_unlock(&swap_avail_lock); - goto noswap; - } - - n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); - - atomic_long_sub(n_goal * size, &nr_swap_pages); - start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { @@ -1250,10 +1238,8 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) spin_unlock(&swap_avail_lock); check_out: - if (n_ret < n_goal) - atomic_long_add((long)(n_goal - n_ret) * size, - &nr_swap_pages); -noswap: + atomic_long_sub(n_ret * size, &nr_swap_pages); + return n_ret; } From 1b7e90020eb770aa52991a34f21552b4c38ea690 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:33 +0800 Subject: [PATCH 1026/2157] mm, swap: use percpu cluster as allocation fast path Current allocation workflow first traverses the plist with a global lock held, after choosing a device, it uses the percpu cluster on that swap device. This commit moves the percpu cluster variable out of being tied to individual swap devices, making it a global percpu variable, and will be used directly for allocation as a fast path. The global percpu cluster variable will never point to a HDD device, and allocations on a HDD device are still globally serialized. This improves the allocator performance and prepares for removal of the slot cache in later commits. There shouldn't be much observable behavior change, except one thing: this changes how swap device allocation rotation works. Currently, each allocation will rotate the plist, and because of the existence of slot cache (one order 0 allocation usually returns 64 entries), swap devices of the same priority are rotated for every 64 order 0 entries consumed. High order allocations are different, they will bypass the slot cache, and so swap device is rotated for every 16K, 32K, or up to 2M allocation. The rotation rule was never clearly defined or documented, it was changed several times without mentioning. After this commit, and once slot cache is gone in later commits, swap device rotation will happen for every consumed cluster. Ideally non-HDD devices will be rotated if 2M space has been consumed for each order. Fragmented clusters will rotate the device faster, which seems OK. HDD devices is rotated for every allocation regardless of the allocation order, which should be OK too and trivial. This commit also slightly changes allocation behaviour for slot cache. The new added cluster allocation fast path may allocate entries from different device to the slot cache, this is not observable from user space, only impact performance very slightly, and slot cache will be just gone in next commit, so this can be ignored. Link: https://lkml.kernel.org/r/20250313165935.63303-6-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/swap.h | 11 ++- mm/swapfile.c | 158 ++++++++++++++++++++++++++++++++----------- 2 files changed, 121 insertions(+), 48 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fe91c293636..374bffc87427 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -284,12 +284,10 @@ enum swap_cluster_flags { #endif /* - * We assign a cluster to each CPU, so each CPU can allocate swap entry from - * its own cluster and swapout sequentially. The purpose is to optimize swapout - * throughput. + * We keep using same cluster for rotational device so IO will be sequential. + * The purpose is to optimize SWAP throughput on these device. */ -struct percpu_cluster { - local_lock_t lock; /* Protect the percpu_cluster above */ +struct swap_sequential_cluster { unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; @@ -315,8 +313,7 @@ struct swap_info_struct { atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ atomic_long_t inuse_pages; /* number of those currently in use */ - struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ - struct percpu_cluster *global_cluster; /* Use one global cluster for rotating device */ + struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */ spinlock_t global_cluster_lock; /* Serialize usage of global cluster */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ diff --git a/mm/swapfile.c b/mm/swapfile.c index db836670c334..8b296c4c636b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -116,6 +116,18 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0); atomic_t nr_rotate_swap = ATOMIC_INIT(0); +struct percpu_swap_cluster { + struct swap_info_struct *si[SWAP_NR_ORDERS]; + unsigned long offset[SWAP_NR_ORDERS]; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = { + .si = { NULL }, + .offset = { SWAP_ENTRY_INVALID }, + .lock = INIT_LOCAL_LOCK(), +}; + static struct swap_info_struct *swap_type_to_swap_info(int type) { if (type >= MAX_SWAPFILES) @@ -539,7 +551,7 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si) ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list); /* * Delete the cluster from list to prepare for discard, but keep - * the CLUSTER_FLAG_DISCARD flag, there could be percpu_cluster + * the CLUSTER_FLAG_DISCARD flag, percpu_swap_cluster could be * pointing to it, or ran into by relocate_cluster. */ list_del(&ci->list); @@ -805,10 +817,12 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, out: relocate_cluster(si, ci); unlock_cluster(ci); - if (si->flags & SWP_SOLIDSTATE) - __this_cpu_write(si->percpu_cluster->next[order], next); - else + if (si->flags & SWP_SOLIDSTATE) { + this_cpu_write(percpu_swap_cluster.offset[order], next); + this_cpu_write(percpu_swap_cluster.si[order], si); + } else { si->global_cluster->next[order] = next; + } return found; } @@ -862,20 +876,18 @@ static void swap_reclaim_work(struct work_struct *work) } /* - * Try to get swap entries with specified order from current cpu's swap entry - * pool (a cluster). This might involve allocating a new cluster for current CPU - * too. + * Try to allocate swap entries with specified order and try set a new + * cluster for current CPU too. */ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order, unsigned char usage) { struct swap_cluster_info *ci; - unsigned int offset, found = 0; + unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; if (si->flags & SWP_SOLIDSTATE) { - /* Fast path using per CPU cluster */ - local_lock(&si->percpu_cluster->lock); - offset = __this_cpu_read(si->percpu_cluster->next[order]); + if (si == this_cpu_read(percpu_swap_cluster.si[order])) + offset = this_cpu_read(percpu_swap_cluster.offset[order]); } else { /* Serialize HDD SWAP allocation for each device. */ spin_lock(&si->global_cluster_lock); @@ -973,9 +985,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o } } done: - if (si->flags & SWP_SOLIDSTATE) - local_unlock(&si->percpu_cluster->lock); - else + if (!(si->flags & SWP_SOLIDSTATE)) spin_unlock(&si->global_cluster_lock); return found; } @@ -1196,6 +1206,51 @@ static bool get_swap_device_info(struct swap_info_struct *si) return true; } +/* + * Fast path try to get swap entries with specified order from current + * CPU's swap entry pool (a cluster). + */ +static int swap_alloc_fast(swp_entry_t entries[], + unsigned char usage, + int order, int n_goal) +{ + struct swap_cluster_info *ci; + struct swap_info_struct *si; + unsigned int offset, found; + int n_ret = 0; + + n_goal = min(n_goal, SWAP_BATCH); + + /* + * Once allocated, swap_info_struct will never be completely freed, + * so checking it's liveness by get_swap_device_info is enough. + */ + si = this_cpu_read(percpu_swap_cluster.si[order]); + offset = this_cpu_read(percpu_swap_cluster.offset[order]); + if (!si || !offset || !get_swap_device_info(si)) + return 0; + + while (offset) { + ci = lock_cluster(si, offset); + if (!cluster_is_usable(ci, order)) { + unlock_cluster(ci); + break; + } + if (cluster_is_empty(ci)) + offset = cluster_offset(si, ci); + found = alloc_swap_scan_cluster(si, ci, offset, order, usage); + if (!found) + break; + entries[n_ret++] = swp_entry(si->type, found); + if (n_ret == n_goal) + break; + offset = this_cpu_read(percpu_swap_cluster.offset[order]); + } + + put_swap_device(si); + return n_ret; +} + int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) { int order = swap_entry_order(entry_order); @@ -1204,19 +1259,36 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) int n_ret = 0; int node; + /* Fast path using percpu cluster */ + local_lock(&percpu_swap_cluster.lock); + n_ret = swap_alloc_fast(swp_entries, + SWAP_HAS_CACHE, + order, n_goal); + if (n_ret == n_goal) + goto out; + + n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH); + /* Rotate the device and switch to a new cluster */ spin_lock(&swap_avail_lock); start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { - /* requeue si to after same-priority siblings */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { - n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, - n_goal, swp_entries, order); + /* + * For order 0 allocation, try best to fill the request + * as it's used by slot cache. + * + * For mTHP allocation, it always have n_goal == 1, + * and falling a mTHP swapin will just make the caller + * fallback to order 0 allocation, so just bail out. + */ + n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal, + swp_entries + n_ret, order); put_swap_device(si); if (n_ret || size > 1) - goto check_out; + goto out; } spin_lock(&swap_avail_lock); @@ -1234,12 +1306,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) if (plist_node_empty(&next->avail_lists[node])) goto start_over; } - spin_unlock(&swap_avail_lock); - -check_out: +out: + local_unlock(&percpu_swap_cluster.lock); atomic_long_sub(n_ret * size, &nr_swap_pages); - return n_ret; } @@ -2597,6 +2667,28 @@ static void wait_for_allocation(struct swap_info_struct *si) } } +/* + * Called after swap device's reference count is dead, so + * neither scan nor allocation will use it. + */ +static void flush_percpu_swap_cluster(struct swap_info_struct *si) +{ + int cpu, i; + struct swap_info_struct **pcp_si; + + for_each_possible_cpu(cpu) { + pcp_si = per_cpu_ptr(percpu_swap_cluster.si, cpu); + /* + * Invalidate the percpu swap cluster cache, si->users + * is dead, so no new user will point to it, just flush + * any existing user. + */ + for (i = 0; i < SWAP_NR_ORDERS; i++) + cmpxchg(&pcp_si[i], si, NULL); + } +} + + SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; @@ -2698,6 +2790,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) flush_work(&p->discard_work); flush_work(&p->reclaim_work); + flush_percpu_swap_cluster(p); destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) @@ -2725,8 +2818,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) arch_swap_invalidate_area(p->type); zswap_swapoff(p->type); mutex_unlock(&swapon_mutex); - free_percpu(p->percpu_cluster); - p->percpu_cluster = NULL; kfree(p->global_cluster); p->global_cluster = NULL; vfree(swap_map); @@ -3125,7 +3216,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; unsigned long i, j, idx; - int cpu, err = -ENOMEM; + int err = -ENOMEM; cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); if (!cluster_info) @@ -3134,20 +3225,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, for (i = 0; i < nr_clusters; i++) spin_lock_init(&cluster_info[i].lock); - if (si->flags & SWP_SOLIDSTATE) { - si->percpu_cluster = alloc_percpu(struct percpu_cluster); - if (!si->percpu_cluster) - goto err_free; - - for_each_possible_cpu(cpu) { - struct percpu_cluster *cluster; - - cluster = per_cpu_ptr(si->percpu_cluster, cpu); - for (i = 0; i < SWAP_NR_ORDERS; i++) - cluster->next[i] = SWAP_ENTRY_INVALID; - local_lock_init(&cluster->lock); - } - } else { + if (!(si->flags & SWP_SOLIDSTATE)) { si->global_cluster = kmalloc(sizeof(*si->global_cluster), GFP_KERNEL); if (!si->global_cluster) @@ -3424,8 +3502,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) bad_swap_unlock_inode: inode_unlock(inode); bad_swap: - free_percpu(si->percpu_cluster); - si->percpu_cluster = NULL; kfree(si->global_cluster); si->global_cluster = NULL; inode = NULL; From 0ff67f990bd45726e0d9e91111d998e7a3595b32 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:34 +0800 Subject: [PATCH 1027/2157] mm, swap: remove swap slot cache Slot cache is no longer needed now, removing it and all related code. - vm-scalability with: `usemem --init-time -O -y -x -R -31 1G`, 12G memory cgroup using simulated pmem as SWAP (32G pmem, 32 CPUs), 16 test runs for each case, measuring the total throughput: Before (KB/s) (stdev) After (KB/s) (stdev) Random (4K): 424907.60 (24410.78) 414745.92 (34554.78) Random (64K): 163308.82 (11635.72) 167314.50 (18434.99) Sequential (4K, !-R): 6150056.79 (103205.90) 6321469.06 (115878.16) The performance changes are below noise level. - Build linux kernel with make -j96, using 4K folio with 1.5G memory cgroup limit and 64K folio with 2G memory cgroup limit, on top of tmpfs, 12 test runs, measuring the system time: Before (s) (stdev) After (s) (stdev) make -j96 (4K): 6445.69 (61.95) 6408.80 (69.46) make -j96 (64K): 6841.71 (409.04) 6437.99 (435.55) Similar to above, 64k mTHP case showed a slight improvement. Link: https://lkml.kernel.org/r/20250313165935.63303-7-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baoquan He Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 - include/linux/swap_slots.h | 28 ---- mm/Makefile | 2 +- mm/swap_slots.c | 295 ------------------------------------- mm/swap_state.c | 8 +- mm/swapfile.c | 194 ++++++++---------------- 6 files changed, 67 insertions(+), 463 deletions(-) delete mode 100644 include/linux/swap_slots.h delete mode 100644 mm/swap_slots.c diff --git a/include/linux/swap.h b/include/linux/swap.h index 374bffc87427..c5856dcc263a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -465,7 +465,6 @@ void free_pages_and_swap_cache(struct encoded_page **, int); extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; -extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) @@ -483,13 +482,11 @@ swp_entry_t folio_alloc_swap(struct folio *folio); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); -extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h deleted file mode 100644 index 840aec3523b2..000000000000 --- a/include/linux/swap_slots.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SWAP_SLOTS_H -#define _LINUX_SWAP_SLOTS_H - -#include -#include -#include - -#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH -#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) -#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) - -struct swap_slots_cache { - bool lock_initialized; - struct mutex alloc_lock; /* protects slots, nr, cur */ - swp_entry_t *slots; - int nr; - int cur; - int n_ret; -}; - -void disable_swap_slots_cache_lock(void); -void reenable_swap_slots_cache_unlock(void); -void enable_swap_slots_cache(void); - -extern bool swap_slot_cache_enabled; - -#endif /* _LINUX_SWAP_SLOTS_H */ diff --git a/mm/Makefile b/mm/Makefile index 2600e94abd3c..84b1127e43a5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -75,7 +75,7 @@ ifdef CONFIG_MMU obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o endif -obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o +obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_ZSWAP) += zswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o diff --git a/mm/swap_slots.c b/mm/swap_slots.c deleted file mode 100644 index 9c7c171df7ba..000000000000 --- a/mm/swap_slots.c +++ /dev/null @@ -1,295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Manage cache of swap slots to be used for and returned from - * swap. - * - * Copyright(c) 2016 Intel Corporation. - * - * Author: Tim Chen - * - * We allocate the swap slots from the global pool and put - * it into local per cpu caches. This has the advantage - * of no needing to acquire the swap_info lock every time - * we need a new slot. - * - * There is also opportunity to simply return the slot - * to local caches without needing to acquire swap_info - * lock. We do not reuse the returned slots directly but - * move them back to the global pool in a batch. This - * allows the slots to coalesce and reduce fragmentation. - * - * The swap entry allocated is marked with SWAP_HAS_CACHE - * flag in map_count that prevents it from being allocated - * again from the global pool. - * - * The swap slots cache is protected by a mutex instead of - * a spin lock as when we search for slots with scan_swap_map, - * we can possibly sleep. - */ - -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); -static bool swap_slot_cache_active; -bool swap_slot_cache_enabled; -static bool swap_slot_cache_initialized; -static DEFINE_MUTEX(swap_slots_cache_mutex); -/* Serialize swap slots cache enable/disable operations */ -static DEFINE_MUTEX(swap_slots_cache_enable_mutex); - -static void __drain_swap_slots_cache(void); - -#define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled) - -static void deactivate_swap_slots_cache(void) -{ - mutex_lock(&swap_slots_cache_mutex); - swap_slot_cache_active = false; - __drain_swap_slots_cache(); - mutex_unlock(&swap_slots_cache_mutex); -} - -static void reactivate_swap_slots_cache(void) -{ - mutex_lock(&swap_slots_cache_mutex); - swap_slot_cache_active = true; - mutex_unlock(&swap_slots_cache_mutex); -} - -/* Must not be called with cpu hot plug lock */ -void disable_swap_slots_cache_lock(void) -{ - mutex_lock(&swap_slots_cache_enable_mutex); - swap_slot_cache_enabled = false; - if (swap_slot_cache_initialized) { - /* serialize with cpu hotplug operations */ - cpus_read_lock(); - __drain_swap_slots_cache(); - cpus_read_unlock(); - } -} - -static void __reenable_swap_slots_cache(void) -{ - swap_slot_cache_enabled = has_usable_swap(); -} - -void reenable_swap_slots_cache_unlock(void) -{ - __reenable_swap_slots_cache(); - mutex_unlock(&swap_slots_cache_enable_mutex); -} - -static bool check_cache_active(void) -{ - long pages; - - if (!swap_slot_cache_enabled) - return false; - - pages = get_nr_swap_pages(); - if (!swap_slot_cache_active) { - if (pages > num_online_cpus() * - THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE) - reactivate_swap_slots_cache(); - goto out; - } - - /* if global pool of slot caches too low, deactivate cache */ - if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE) - deactivate_swap_slots_cache(); -out: - return swap_slot_cache_active; -} - -static int alloc_swap_slot_cache(unsigned int cpu) -{ - struct swap_slots_cache *cache; - swp_entry_t *slots; - - /* - * Do allocation outside swap_slots_cache_mutex - * as kvzalloc could trigger reclaim and folio_alloc_swap, - * which can lock swap_slots_cache_mutex. - */ - slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t), - GFP_KERNEL); - if (!slots) - return -ENOMEM; - - mutex_lock(&swap_slots_cache_mutex); - cache = &per_cpu(swp_slots, cpu); - if (cache->slots) { - /* cache already allocated */ - mutex_unlock(&swap_slots_cache_mutex); - - kvfree(slots); - - return 0; - } - - if (!cache->lock_initialized) { - mutex_init(&cache->alloc_lock); - cache->lock_initialized = true; - } - cache->nr = 0; - cache->cur = 0; - cache->n_ret = 0; - /* - * We initialized alloc_lock and free_lock earlier. We use - * !cache->slots or !cache->slots_ret to know if it is safe to acquire - * the corresponding lock and use the cache. Memory barrier below - * ensures the assumption. - */ - mb(); - cache->slots = slots; - mutex_unlock(&swap_slots_cache_mutex); - return 0; -} - -static void drain_slots_cache_cpu(unsigned int cpu, bool free_slots) -{ - struct swap_slots_cache *cache; - - cache = &per_cpu(swp_slots, cpu); - if (cache->slots) { - mutex_lock(&cache->alloc_lock); - swapcache_free_entries(cache->slots + cache->cur, cache->nr); - cache->cur = 0; - cache->nr = 0; - if (free_slots && cache->slots) { - kvfree(cache->slots); - cache->slots = NULL; - } - mutex_unlock(&cache->alloc_lock); - } -} - -static void __drain_swap_slots_cache(void) -{ - unsigned int cpu; - - /* - * This function is called during - * 1) swapoff, when we have to make sure no - * left over slots are in cache when we remove - * a swap device; - * 2) disabling of swap slot cache, when we run low - * on swap slots when allocating memory and need - * to return swap slots to global pool. - * - * We cannot acquire cpu hot plug lock here as - * this function can be invoked in the cpu - * hot plug path: - * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback - * -> memory allocation -> direct reclaim -> folio_alloc_swap - * -> drain_swap_slots_cache - * - * Hence the loop over current online cpu below could miss cpu that - * is being brought online but not yet marked as online. - * That is okay as we do not schedule and run anything on a - * cpu before it has been marked online. Hence, we will not - * fill any swap slots in slots cache of such cpu. - * There are no slots on such cpu that need to be drained. - */ - for_each_online_cpu(cpu) - drain_slots_cache_cpu(cpu, false); -} - -static int free_slot_cache(unsigned int cpu) -{ - mutex_lock(&swap_slots_cache_mutex); - drain_slots_cache_cpu(cpu, true); - mutex_unlock(&swap_slots_cache_mutex); - return 0; -} - -void enable_swap_slots_cache(void) -{ - mutex_lock(&swap_slots_cache_enable_mutex); - if (!swap_slot_cache_initialized) { - int ret; - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache", - alloc_swap_slot_cache, free_slot_cache); - if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating " - "without swap slots cache.\n", __func__)) - goto out_unlock; - - swap_slot_cache_initialized = true; - } - - __reenable_swap_slots_cache(); -out_unlock: - mutex_unlock(&swap_slots_cache_enable_mutex); -} - -/* called with swap slot cache's alloc lock held */ -static int refill_swap_slots_cache(struct swap_slots_cache *cache) -{ - if (!use_swap_slot_cache) - return 0; - - cache->cur = 0; - if (swap_slot_cache_active) - cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, - cache->slots, 0); - - return cache->nr; -} - -swp_entry_t folio_alloc_swap(struct folio *folio) -{ - swp_entry_t entry; - struct swap_slots_cache *cache; - - entry.val = 0; - - if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) - get_swap_pages(1, &entry, folio_order(folio)); - goto out; - } - - /* - * Preemption is allowed here, because we may sleep - * in refill_swap_slots_cache(). But it is safe, because - * accesses to the per-CPU data structure are protected by the - * mutex cache->alloc_lock. - * - * The alloc path here does not touch cache->slots_ret - * so cache->free_lock is not taken. - */ - cache = raw_cpu_ptr(&swp_slots); - - if (likely(check_cache_active() && cache->slots)) { - mutex_lock(&cache->alloc_lock); - if (cache->slots) { -repeat: - if (cache->nr) { - entry = cache->slots[cache->cur]; - cache->slots[cache->cur++].val = 0; - cache->nr--; - } else if (refill_swap_slots_cache(cache)) { - goto repeat; - } - } - mutex_unlock(&cache->alloc_lock); - if (entry.val) - goto out; - } - - get_swap_pages(1, &entry, 0); -out: - if (mem_cgroup_try_charge_swap(folio, entry)) { - put_swap_folio(folio, entry); - entry.val = 0; - } - return entry; -} diff --git a/mm/swap_state.c b/mm/swap_state.c index 50840a2887a5..2b5744e211cd 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -447,13 +446,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* * Just skip read ahead for unused swap slot. - * During swap_off when swap_slot_cache is disabled, - * we have to handle the race between putting - * swap entry in swap cache and marking swap slot - * as SWAP_HAS_CACHE. That's done in later part of code or - * else swap_off will be aborted if we return NULL. */ - if (!swap_entry_swapped(si, entry) && swap_slot_cache_enabled) + if (!swap_entry_swapped(si, entry)) goto put_and_return; /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 8b296c4c636b..9bd95173865d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -885,16 +884,20 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o struct swap_cluster_info *ci; unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; - if (si->flags & SWP_SOLIDSTATE) { - if (si == this_cpu_read(percpu_swap_cluster.si[order])) - offset = this_cpu_read(percpu_swap_cluster.offset[order]); - } else { + /* + * Swapfile is not block device so unable + * to allocate large entries. + */ + if (order && !(si->flags & SWP_BLKDEV)) + return 0; + + if (!(si->flags & SWP_SOLIDSTATE)) { /* Serialize HDD SWAP allocation for each device. */ spin_lock(&si->global_cluster_lock); offset = si->global_cluster->next[order]; - } + if (offset == SWAP_ENTRY_INVALID) + goto new_cluster; - if (offset) { ci = lock_cluster(si, offset); /* Cluster could have been used by another order */ if (cluster_is_usable(ci, order)) { @@ -1153,43 +1156,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_usage_sub(si, nr_entries); } -static int scan_swap_map_slots(struct swap_info_struct *si, - unsigned char usage, int nr, - swp_entry_t slots[], int order) -{ - unsigned int nr_pages = 1 << order; - int n_ret = 0; - - if (order > 0) { - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (!IS_ENABLED(CONFIG_THP_SWAP) || - nr_pages > SWAPFILE_CLUSTER) { - VM_WARN_ON_ONCE(1); - return 0; - } - - /* - * Swapfile is not block device so unable - * to allocate large entries. - */ - if (!(si->flags & SWP_BLKDEV)) - return 0; - } - - while (n_ret < nr) { - unsigned long offset = cluster_alloc_swap_entry(si, order, usage); - - if (!offset) - break; - slots[n_ret++] = swp_entry(si->type, offset); - } - - return n_ret; -} - static bool get_swap_device_info(struct swap_info_struct *si) { if (!percpu_ref_tryget_live(&si->users)) @@ -1210,16 +1176,13 @@ static bool get_swap_device_info(struct swap_info_struct *si) * Fast path try to get swap entries with specified order from current * CPU's swap entry pool (a cluster). */ -static int swap_alloc_fast(swp_entry_t entries[], +static int swap_alloc_fast(swp_entry_t *entry, unsigned char usage, - int order, int n_goal) + int order) { struct swap_cluster_info *ci; struct swap_info_struct *si; - unsigned int offset, found; - int n_ret = 0; - - n_goal = min(n_goal, SWAP_BATCH); + unsigned int offset, found = SWAP_ENTRY_INVALID; /* * Once allocated, swap_info_struct will never be completely freed, @@ -1228,46 +1191,48 @@ static int swap_alloc_fast(swp_entry_t entries[], si = this_cpu_read(percpu_swap_cluster.si[order]); offset = this_cpu_read(percpu_swap_cluster.offset[order]); if (!si || !offset || !get_swap_device_info(si)) - return 0; + return false; - while (offset) { - ci = lock_cluster(si, offset); - if (!cluster_is_usable(ci, order)) { - unlock_cluster(ci); - break; - } + ci = lock_cluster(si, offset); + if (cluster_is_usable(ci, order)) { if (cluster_is_empty(ci)) offset = cluster_offset(si, ci); found = alloc_swap_scan_cluster(si, ci, offset, order, usage); - if (!found) - break; - entries[n_ret++] = swp_entry(si->type, found); - if (n_ret == n_goal) - break; - offset = this_cpu_read(percpu_swap_cluster.offset[order]); + if (found) + *entry = swp_entry(si->type, found); + } else { + unlock_cluster(ci); } put_swap_device(si); - return n_ret; + return !!found; } -int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) +swp_entry_t folio_alloc_swap(struct folio *folio) { - int order = swap_entry_order(entry_order); - unsigned long size = 1 << order; + unsigned int order = folio_order(folio); + unsigned int size = 1 << order; struct swap_info_struct *si, *next; - int n_ret = 0; + swp_entry_t entry = {}; + unsigned long offset; int node; + if (order) { + /* + * Should not even be attempting large allocations when huge + * page swap is disabled. Warn and fail the allocation. + */ + if (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER) { + VM_WARN_ON_ONCE(1); + return entry; + } + } + /* Fast path using percpu cluster */ local_lock(&percpu_swap_cluster.lock); - n_ret = swap_alloc_fast(swp_entries, - SWAP_HAS_CACHE, - order, n_goal); - if (n_ret == n_goal) + if (swap_alloc_fast(&entry, SWAP_HAS_CACHE, order)) goto out; - n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH); /* Rotate the device and switch to a new cluster */ spin_lock(&swap_avail_lock); start_over: @@ -1276,18 +1241,13 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { - /* - * For order 0 allocation, try best to fill the request - * as it's used by slot cache. - * - * For mTHP allocation, it always have n_goal == 1, - * and falling a mTHP swapin will just make the caller - * fallback to order 0 allocation, so just bail out. - */ - n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal, - swp_entries + n_ret, order); + offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); put_swap_device(si); - if (n_ret || size > 1) + if (offset) { + entry = swp_entry(si->type, offset); + goto out; + } + if (order) goto out; } @@ -1309,8 +1269,14 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) spin_unlock(&swap_avail_lock); out: local_unlock(&percpu_swap_cluster.lock); - atomic_long_sub(n_ret * size, &nr_swap_pages); - return n_ret; + /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ + if (mem_cgroup_try_charge_swap(folio, entry)) { + put_swap_folio(folio, entry); + entry.val = 0; + } + if (entry.val) + atomic_long_sub(size, &nr_swap_pages); + return entry; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) @@ -1606,25 +1572,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry) unlock_cluster(ci); } -void swapcache_free_entries(swp_entry_t *entries, int n) -{ - int i; - struct swap_cluster_info *ci; - struct swap_info_struct *si = NULL; - - if (n <= 0) - return; - - for (i = 0; i < n; ++i) { - si = _swap_info_get(entries[i]); - if (si) { - ci = lock_cluster(si, swp_offset(entries[i])); - swap_entry_range_free(si, ci, entries[i], 1); - unlock_cluster(ci); - } - } -} - int __swap_count(swp_entry_t entry) { struct swap_info_struct *si = swp_swap_info(entry); @@ -1865,6 +1812,7 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr) swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si = swap_type_to_swap_info(type); + unsigned long offset; swp_entry_t entry = {0}; if (!si) @@ -1872,8 +1820,13 @@ swp_entry_t get_swap_page_of_type(int type) /* This is called for allocating swap entry, not cache */ if (get_swap_device_info(si)) { - if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0)) - atomic_long_dec(&nr_swap_pages); + if (si->flags & SWP_WRITEOK) { + offset = cluster_alloc_swap_entry(si, 0, 1); + if (offset) { + entry = swp_entry(si->type, offset); + atomic_long_dec(&nr_swap_pages); + } + } put_swap_device(si); } fail: @@ -2634,21 +2587,6 @@ static void reinsert_swap_info(struct swap_info_struct *si) spin_unlock(&swap_lock); } -static bool __has_usable_swap(void) -{ - return !plist_head_empty(&swap_active_head); -} - -bool has_usable_swap(void) -{ - bool ret; - - spin_lock(&swap_lock); - ret = __has_usable_swap(); - spin_unlock(&swap_lock); - return ret; -} - /* * Called after clearing SWP_WRITEOK, ensures cluster_alloc_range * see the updated flags, so there will be no more allocations. @@ -2761,8 +2699,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) wait_for_allocation(p); - disable_swap_slots_cache_lock(); - set_current_oom_origin(); err = try_to_unuse(p->type); clear_current_oom_origin(); @@ -2770,12 +2706,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); - reenable_swap_slots_cache_unlock(); goto out_dput; } - reenable_swap_slots_cache_unlock(); - /* * Wait for swap operations protected by get/put_swap_device() * to complete. Because of synchronize_rcu() here, all swap @@ -3525,8 +3458,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) putname(name); if (inode) inode_unlock(inode); - if (!error) - enable_swap_slots_cache(); return error; } @@ -3922,6 +3853,11 @@ static void free_swap_count_continuations(struct swap_info_struct *si) } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +static bool __has_usable_swap(void) +{ + return !plist_head_empty(&swap_active_head); +} + void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { struct swap_info_struct *si, *next; From b487a2da3575b6cdfb6d6559311830c8fea70bb9 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 14 Mar 2025 00:59:35 +0800 Subject: [PATCH 1028/2157] mm, swap: simplify folio swap allocation With slot cache gone, clean up the allocation helpers even more. folio_alloc_swap will be the only entry for allocation and adding the folio to swap cache (except suspend), making it opposite of folio_free_swap. Link: https://lkml.kernel.org/r/20250313165935.63303-8-ryncsn@gmail.com Signed-off-by: Kairui Song Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kalesh Singh Cc: Matthew Wilcow (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/swap.h | 8 ++- mm/shmem.c | 21 +++----- mm/swap.h | 6 --- mm/swap_state.c | 57 ---------------------- mm/swapfile.c | 113 ++++++++++++++++++++++++++++--------------- mm/vmscan.c | 16 +++++- 6 files changed, 96 insertions(+), 125 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index c5856dcc263a..9c99eee160f9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -478,7 +478,7 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -swp_entry_t folio_alloc_swap(struct folio *folio); +int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); @@ -586,11 +586,9 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline swp_entry_t folio_alloc_swap(struct folio *folio) +static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask) { - swp_entry_t entry; - entry.val = 0; - return entry; + return -EINVAL; } static inline bool folio_free_swap(struct folio *folio) diff --git a/mm/shmem.c b/mm/shmem.c index 15fa7fa9c8e8..b276ae233dfa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1533,7 +1533,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - swp_entry_t swap; pgoff_t index; int nr_pages; bool split = false; @@ -1615,14 +1614,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) folio_mark_uptodate(folio); } - swap = folio_alloc_swap(folio); - if (!swap.val) { - if (nr_pages > 1) - goto try_split; - - goto redirty; - } - /* * Add inode to shmem_unuse()'s list of swapped-out inodes, * if it's not already there. Do it now before the folio is @@ -1635,20 +1626,20 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (list_empty(&info->swaplist)) list_add(&info->swaplist, &shmem_swaplist); - if (add_to_swap_cache(folio, swap, - __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, - NULL) == 0) { + if (!folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN)) { shmem_recalc_inode(inode, 0, nr_pages); - swap_shmem_alloc(swap, nr_pages); - shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap)); + swap_shmem_alloc(folio->swap, nr_pages); + shmem_delete_from_page_cache(folio, swp_to_radix_entry(folio->swap)); mutex_unlock(&shmem_swaplist_mutex); BUG_ON(folio_mapped(folio)); return swap_writepage(&folio->page, wbc); } + list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); - put_swap_folio(folio, swap); + if (nr_pages > 1) + goto try_split; redirty: folio_mark_dirty(folio); if (wbc->for_reclaim) diff --git a/mm/swap.h b/mm/swap.h index ad2f121de970..0abb68091b4f 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -50,7 +50,6 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry) } void show_swap_cache_info(void); -bool add_to_swap(struct folio *folio); void *get_shadow_from_swap_cache(swp_entry_t entry); int add_to_swap_cache(struct folio *folio, swp_entry_t entry, gfp_t gfp, void **shadowp); @@ -163,11 +162,6 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping, return filemap_get_folio(mapping, index); } -static inline bool add_to_swap(struct folio *folio) -{ - return false; -} - static inline void *get_shadow_from_swap_cache(swp_entry_t entry) { return NULL; diff --git a/mm/swap_state.c b/mm/swap_state.c index 2b5744e211cd..68fd981b514f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -166,63 +166,6 @@ void __delete_from_swap_cache(struct folio *folio, __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr); } -/** - * add_to_swap - allocate swap space for a folio - * @folio: folio we want to move to swap - * - * Allocate swap space for the folio and add the folio to the - * swap cache. - * - * Context: Caller needs to hold the folio lock. - * Return: Whether the folio was added to the swap cache. - */ -bool add_to_swap(struct folio *folio) -{ - swp_entry_t entry; - int err; - - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); - - entry = folio_alloc_swap(folio); - if (!entry.val) - return false; - - /* - * XArray node allocations from PF_MEMALLOC contexts could - * completely exhaust the page allocator. __GFP_NOMEMALLOC - * stops emergency reserves from being allocated. - * - * TODO: this could cause a theoretical memory reclaim - * deadlock in the swap out path. - */ - /* - * Add it to the swap cache. - */ - err = add_to_swap_cache(folio, entry, - __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); - if (err) - goto fail; - /* - * Normally the folio will be dirtied in unmap because its - * pte should be dirty. A special case is MADV_FREE page. The - * page's pte could have dirty bit cleared but the folio's - * SwapBacked flag is still set because clearing the dirty bit - * and SwapBacked flag has no lock protected. For such folio, - * unmap will not set dirty bit for it, so folio reclaim will - * not write the folio out. This can cause data corruption when - * the folio is swapped in later. Always setting the dirty flag - * for the folio solves the problem. - */ - folio_mark_dirty(folio); - - return true; - -fail: - put_swap_folio(folio, entry); - return false; -} - /* * This must be called only on folios that have * been verified to be in the swap cache and locked. diff --git a/mm/swapfile.c b/mm/swapfile.c index 9bd95173865d..2eff8b51a945 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1176,9 +1176,8 @@ static bool get_swap_device_info(struct swap_info_struct *si) * Fast path try to get swap entries with specified order from current * CPU's swap entry pool (a cluster). */ -static int swap_alloc_fast(swp_entry_t *entry, - unsigned char usage, - int order) +static bool swap_alloc_fast(swp_entry_t *entry, + int order) { struct swap_cluster_info *ci; struct swap_info_struct *si; @@ -1197,7 +1196,7 @@ static int swap_alloc_fast(swp_entry_t *entry, if (cluster_is_usable(ci, order)) { if (cluster_is_empty(ci)) offset = cluster_offset(si, ci); - found = alloc_swap_scan_cluster(si, ci, offset, order, usage); + found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE); if (found) *entry = swp_entry(si->type, found); } else { @@ -1208,47 +1207,30 @@ static int swap_alloc_fast(swp_entry_t *entry, return !!found; } -swp_entry_t folio_alloc_swap(struct folio *folio) +/* Rotate the device and switch to a new cluster */ +static bool swap_alloc_slow(swp_entry_t *entry, + int order) { - unsigned int order = folio_order(folio); - unsigned int size = 1 << order; - struct swap_info_struct *si, *next; - swp_entry_t entry = {}; - unsigned long offset; int node; + unsigned long offset; + struct swap_info_struct *si, *next; - if (order) { - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER) { - VM_WARN_ON_ONCE(1); - return entry; - } - } - - /* Fast path using percpu cluster */ - local_lock(&percpu_swap_cluster.lock); - if (swap_alloc_fast(&entry, SWAP_HAS_CACHE, order)) - goto out; - - /* Rotate the device and switch to a new cluster */ + node = numa_node_id(); spin_lock(&swap_avail_lock); start_over: - node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { + /* Rotate the device and switch to a new cluster */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); put_swap_device(si); if (offset) { - entry = swp_entry(si->type, offset); - goto out; + *entry = swp_entry(si->type, offset); + return true; } if (order) - goto out; + return false; } spin_lock(&swap_avail_lock); @@ -1267,16 +1249,67 @@ swp_entry_t folio_alloc_swap(struct folio *folio) goto start_over; } spin_unlock(&swap_avail_lock); -out: - local_unlock(&percpu_swap_cluster.lock); - /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ - if (mem_cgroup_try_charge_swap(folio, entry)) { - put_swap_folio(folio, entry); - entry.val = 0; + return false; +} + +/** + * folio_alloc_swap - allocate swap space for a folio + * @folio: folio we want to move to swap + * @gfp: gfp mask for shadow nodes + * + * Allocate swap space for the folio and add the folio to the + * swap cache. + * + * Context: Caller needs to hold the folio lock. + * Return: Whether the folio was added to the swap cache. + */ +int folio_alloc_swap(struct folio *folio, gfp_t gfp) +{ + unsigned int order = folio_order(folio); + unsigned int size = 1 << order; + swp_entry_t entry = {}; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); + + /* + * Should not even be attempting large allocations when huge + * page swap is disabled. Warn and fail the allocation. + */ + if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { + VM_WARN_ON_ONCE(1); + return -EINVAL; } - if (entry.val) - atomic_long_sub(size, &nr_swap_pages); - return entry; + + local_lock(&percpu_swap_cluster.lock); + if (!swap_alloc_fast(&entry, order)) + swap_alloc_slow(&entry, order); + local_unlock(&percpu_swap_cluster.lock); + + /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ + if (mem_cgroup_try_charge_swap(folio, entry)) + goto out_free; + + if (!entry.val) + return -ENOMEM; + + /* + * XArray node allocations from PF_MEMALLOC contexts could + * completely exhaust the page allocator. __GFP_NOMEMALLOC + * stops emergency reserves from being allocated. + * + * TODO: this could cause a theoretical memory reclaim + * deadlock in the swap out path. + */ + if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL)) + goto out_free; + + atomic_long_sub(size, &nr_swap_pages); + return 0; + +out_free: + put_swap_folio(folio, entry); + return -ENOMEM; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) diff --git a/mm/vmscan.c b/mm/vmscan.c index fcca38bc640f..be00af3763b5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1289,7 +1289,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, split_folio_to_list(folio, folio_list)) goto activate_locked; } - if (!add_to_swap(folio)) { + if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN)) { int __maybe_unused order = folio_order(folio); if (!folio_test_large(folio)) @@ -1305,9 +1305,21 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, } #endif count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK); - if (!add_to_swap(folio)) + if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN)) goto activate_locked_split; } + /* + * Normally the folio will be dirtied in unmap because its + * pte should be dirty. A special case is MADV_FREE page. The + * page's pte could have dirty bit cleared but the folio's + * SwapBacked flag is still set because clearing the dirty bit + * and SwapBacked flag has no lock protected. For such folio, + * unmap will not set dirty bit for it, so folio reclaim will + * not write the folio out. This can cause data corruption when + * the folio is swapped in later. Always setting the dirty flag + * for the folio solves the problem. + */ + folio_mark_dirty(folio); } } From 88fb7794f69375b08ff5432d8b5bc79eeb3e1dbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Feb 2025 08:30:33 -0800 Subject: [PATCH 1029/2157] vmalloc: drop Christoph from Reviewers I haven't been doing as much review as I should. As part of reducing my inbox flow drop me from the official Reviewers. I might still chime in on patches occasionally. Link: https://lkml.kernel.org/r/20250224163033.350072-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 40f233b0fa9c..c13201979633 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25301,7 +25301,6 @@ F: tools/testing/vsock/ VMALLOC M: Andrew Morton R: Uladzislau Rezki -R: Christoph Hellwig L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From ebc29409c2966bd6a6215b27fc654de7f55ce099 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 25 Feb 2025 18:45:09 +0000 Subject: [PATCH 1030/2157] mm/page_alloc: warn on nr_reserved_highatomic underflow As documented in the comment this underflow should not happen. The locking has indeed changed here since the comment was written, see the migratetype hygiene patches[0]. However, those changes made the locking _safer_, so the underflow _really_ shouldn't happen now. So upgrade the comment to a warning. [0] https://lore.kernel.org/all/20240320180429.678181-7-hannes@cmpxchg.org/T/#m3da87e6cc3348a4640aa298137bc9f8f61b76c84 Link: https://lkml.kernel.org/r/20250225-warn-underflow-v1-1-3dc542941d3a@google.com Signed-off-by: Brendan Jackman Reviewed-by: Vlastimil Babka Cc: Johannes Weiner Signed-off-by: Andrew Morton --- mm/page_alloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 945437d7ac44..3c5624380b6c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3095,6 +3095,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, if (!page) continue; + size = max(pageblock_nr_pages, 1UL << order); /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -3102,8 +3103,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * while unreserving so be safe and watch for * underflows. */ - size = max(pageblock_nr_pages, 1UL << order); - size = min(size, zone->nr_reserved_highatomic); + if (WARN_ON_ONCE(size > zone->nr_reserved_highatomic)) + size = zone->nr_reserved_highatomic; zone->nr_reserved_highatomic -= size; /* From 0c555a3c1bc9114ad91422b941dcd29e02490687 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Jan 2025 14:21:14 -0800 Subject: [PATCH 1031/2157] mm,procfs: allow read-only remote mm access under CAP_PERFMON It's very common for various tracing and profiling toolis to need to access /proc/PID/maps contents for stack symbolization needs to learn which shared libraries are mapped in memory, at which file offset, etc. Currently, access to /proc/PID/maps requires CAP_SYS_PTRACE (unless we are looking at data for our own process, which is a trivial case not too relevant for profilers use cases). Unfortunately, CAP_SYS_PTRACE implies way more than just ability to discover memory layout of another process: it allows to fully control arbitrary other processes. This is problematic from security POV for applications that only need read-only /proc/PID/maps (and other similar read-only data) access, and in large production settings CAP_SYS_PTRACE is frowned upon even for the system-wide profilers. On the other hand, it's already possible to access similar kind of information (and more) with just CAP_PERFMON capability. E.g., setting up PERF_RECORD_MMAP collection through perf_event_open() would give one similar information to what /proc/PID/maps provides. CAP_PERFMON, together with CAP_BPF, is already a very common combination for system-wide profiling and observability application. As such, it's reasonable and convenient to be able to access /proc/PID/maps with CAP_PERFMON capabilities instead of CAP_SYS_PTRACE. For procfs, these permissions are checked through common mm_access() helper, and so we augment that with cap_perfmon() check *only* if requested mode is PTRACE_MODE_READ. I.e., PTRACE_MODE_ATTACH wouldn't be permitted by CAP_PERFMON. So /proc/PID/mem, which uses PTRACE_MODE_ATTACH, won't be permitted by CAP_PERFMON, but /proc/PID/maps, /proc/PID/environ, and a bunch of other read-only contents will be allowable under CAP_PERFMON. Besides procfs itself, mm_access() is used by process_madvise() and process_vm_{readv,writev}() syscalls. The former one uses PTRACE_MODE_READ to avoid leaking ASLR metadata, and as such CAP_PERFMON seems like a meaningful allowable capability as well. process_vm_{readv,writev} currently assume PTRACE_MODE_ATTACH level of permissions (though for readv PTRACE_MODE_READ seems more reasonable, but that's outside the scope of this change), and as such won't be affected by this patch. Link: https://lkml.kernel.org/r/20250127222114.1132392-1-andrii@kernel.org Signed-off-by: Andrii Nakryiko Reviewed-by: Shakeel Butt Acked-by: Ingo Molnar Cc: Al Viro Cc: Christian Brauner Cc: Jann Horn Cc: Kees Cook Cc: Liam Howlett Cc: "Mike Rapoport (IBM)" Cc: Peter Zijlstra (Intel) Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- kernel/fork.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 735405a9c5f3..7757e74ebce4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1559,6 +1559,17 @@ struct mm_struct *get_task_mm(struct task_struct *task) } EXPORT_SYMBOL_GPL(get_task_mm); +static bool may_access_mm(struct mm_struct *mm, struct task_struct *task, unsigned int mode) +{ + if (mm == current->mm) + return true; + if (ptrace_may_access(task, mode)) + return true; + if ((mode & PTRACE_MODE_READ) && perfmon_capable()) + return true; + return false; +} + struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) { struct mm_struct *mm; @@ -1571,7 +1582,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mm = get_task_mm(task); if (!mm) { mm = ERR_PTR(-ESRCH); - } else if (mm != current->mm && !ptrace_may_access(task, mode)) { + } else if (!may_access_mm(mm, task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } From fc0d9d9afcd304e5402a73f6244926915e6de8e6 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Fri, 17 Jan 2025 22:20:13 +0800 Subject: [PATCH 1032/2157] MAINTAINERS: add Yang Yang as a co-maintainer of PER-TASK DELAY ACCOUNTING Balbir Singh is the unique maintainer of PER-TASK DELAY ACCOUNTING, and he had started work on cgroupstats a long time back, this subsystem then is not growing at a very rapid pace. With their excellent work delay accounting is still very useful for observing and optimizing system delay, but still needs continuous improvement. Yang Yang with his team had worked for most of the recent patches of the subsystem, and he has a strong willing to help, Balbir Singh is glad to see that, so add him as a co-maintainer. Link: https://lkml.kernel.org/r/20250117222013817zWHgBaSigRI_eRJt1hqnu@zte.com.cn Signed-off-by: Yang Yang Cc: Balbir Singh Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ed7aa6867674..a666d7e34fd3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18513,6 +18513,7 @@ F: mm/percpu*.c PER-TASK DELAY ACCOUNTING M: Balbir Singh +M: Yang Yang S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c From 35dac71cff8f68f065a6719631db919d0640d5e5 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Mon, 20 Jan 2025 16:04:26 -0300 Subject: [PATCH 1033/2157] scripts: add script to extract built-in firmware blobs There is currently no tool to extract a firmware blob that is built-in on vmlinux to the best of my knowledge. So if we have a kernel image containing the blobs, and we want to rebuild the kernel with some debug patches for example (and given that the image also has IKCONFIG=y), we currently can't do that for the same versions for all the firmware blobs, _unless_ we have exact commits of linux-firmware for the specific versions for each firmware included. Through the options CONFIG_EXTRA_FIRMWARE{_DIR} one is able to build a kernel including firmware blobs in a built-in fashion. This is usually the case of built-in drivers that require some blobs in order to work properly, for example, like in non-initrd based systems. Add hereby a script to extract these blobs from a non-stripped vmlinux, similar to the idea of "extract-ikconfig". The firmware loader interface saves such built-in blobs as rodata entries, having a field for the FW name as "_fw___bin"; the tool extracts files named "_" for each rodata firmware entry detected. It makes use of awk, bash, dd and readelf, pretty standard tooling for Linux development. With this tool, we can blindly extract the FWs and easily re-add them in the new debug kernel build, allowing a more deterministic testing without the burden of "hunting down" the proper version of each firmware binary. Link: https://lkml.kernel.org/r/20250120190436.127578-1-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Suggested-by: Thadeu Lima de Souza Cascardo Reviewed-by: Thadeu Lima de Souza Cascardo Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: Luis Chamberalin Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: "Rafael J. Wysocki" Cc: Russ Weight Signed-off-by: Andrew Morton --- scripts/extract-fwblobs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 scripts/extract-fwblobs diff --git a/scripts/extract-fwblobs b/scripts/extract-fwblobs new file mode 100755 index 000000000000..53729124e5a0 --- /dev/null +++ b/scripts/extract-fwblobs @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# ----------------------------------------------------------------------------- +# Extracts the vmlinux built-in firmware blobs - requires a non-stripped image +# ----------------------------------------------------------------------------- + +if [ -z "$1" ]; then + echo "Must provide a non-stripped vmlinux as argument" + exit 1 +fi + +read -r RD_ADDR_HEX RD_OFF_HEX <<< "$( readelf -SW "$1" |\ +grep -w rodata | awk '{print "0x"$5" 0x"$6}' )" + +FW_SYMS="$(readelf -sW "$1" |\ +awk -n '/fw_end/ { end=$2 ; print name " 0x" start " 0x" end; } { start=$2; name=$8; }')" + +while IFS= read -r entry; do + read -r FW_NAME FW_ADDR_ST_HEX FW_ADDR_END_HEX <<< "$entry" + + # Notice kernel prepends _fw_ and appends _bin to the FW name + # in rodata; hence we hereby filter that out. + FW_NAME=${FW_NAME:4:-4} + + FW_OFFSET="$(printf "%d" $((FW_ADDR_ST_HEX - RD_ADDR_HEX + RD_OFF_HEX)))" + FW_SIZE="$(printf "%d" $((FW_ADDR_END_HEX - FW_ADDR_ST_HEX)))" + + dd if="$1" of="./${FW_NAME}" bs="${FW_SIZE}" count=1 iflag=skip_bytes skip="${FW_OFFSET}" +done <<< "${FW_SYMS}" From 541da9f87ddbc68b183a6345284d55441e6d18ca Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Tue, 28 Jan 2025 19:34:30 -0600 Subject: [PATCH 1034/2157] .mailmap: remove redundant mappings of emails Remove two redundant mappings: changbin.du@intel.com -> changbin.du@intel.com viresh.kumar@linaro.org -> viresh.kumar@linaro.org Link: https://lkml.kernel.org/r/20250129013430.1117720-1-carlos.bilbao@kernel.org Signed-off-by: Carlos Bilbao Cc: Jonathan Corbet Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- .mailmap | 2 -- 1 file changed, 2 deletions(-) diff --git a/.mailmap b/.mailmap index 72bf830e1e58..0e373e743495 100644 --- a/.mailmap +++ b/.mailmap @@ -153,7 +153,6 @@ Carlos Bilbao Carlos Bilbao Carlos Bilbao Changbin Du -Changbin Du Chao Yu Chao Yu Chester Lin @@ -757,7 +756,6 @@ Vinod Koul Viresh Kumar Viresh Kumar Viresh Kumar -Viresh Kumar Viresh Kumar Vishnu Dasa Vivek Aknurwar From 87ad827a2780b8abe08e64a03553e4898a06b9fc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Jan 2025 16:17:47 -0800 Subject: [PATCH 1035/2157] docs,procfs: document /proc/PID/* access permission checks Add a paragraph explaining what sort of capabilities a process would need to read procfs data for some other process. Also mention that reading data for its own process doesn't require any extra permissions. Link: https://lkml.kernel.org/r/20250129001747.759990-1-andrii@kernel.org Signed-off-by: Andrii Nakryiko Reviewed-by: Shakeel Butt Cc: Al Viro Cc: Christian Brauner Cc: Steven Rostedt (VMware) Cc: Ingo Molnar Cc: Jann Horn Cc: Kees Cook Cc: Liam Howlett Cc: "Mike Rapoport (IBM)" Cc: Peter Zijlstra (Intel) Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 09f0aed5a08b..0e7825bb1e3a 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -128,6 +128,16 @@ process running on the system, which is named after the process ID (PID). The link 'self' points to the process reading the file system. Each process subdirectory has the entries listed in Table 1-1. +A process can read its own information from /proc/PID/* with no extra +permissions. When reading /proc/PID/* information for other processes, reading +process is required to have either CAP_SYS_PTRACE capability with +PTRACE_MODE_READ access permissions, or, alternatively, CAP_PERFMON +capability. This applies to all read-only information like `maps`, `environ`, +`pagemap`, etc. The only exception is `mem` file due to its read-write nature, +which requires CAP_SYS_PTRACE capabilities with more elevated +PTRACE_MODE_ATTACH permissions; CAP_PERFMON capability does not grant access +to /proc/PID/mem for other processes. + Note that an open file descriptor to /proc/ or to any of its contained files or subdirectories does not prevent being reused for some other process in the event that exits. Operations on From 95d4b3450ebe2e28a87980b7f7407a8d8d75d633 Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Sun, 19 Jan 2025 14:24:08 +0800 Subject: [PATCH 1036/2157] lib/plist.c: add shortcut for plist_requeue() In the operation of plist_requeue(), "node" is deleted from the list before queueing it back to the list again, which involves looping to find the tail of same-prio entries. If "node" is the head of same-prio entries which means its prio_list is on the priority list, then "node_next" can be retrieve immediately by the next entry of prio_list, instead of looping nodes on node_list. The shortcut implementation can benefit plist_requeue() running the below test, and the test result is shown in the following table. One can observe from the test result that when the number of nodes of same-prio entries is smaller, then the probability of hitting the shortcut can be bigger, thus the benefit can be more significant. While it tends to behave almost the same for long same-prio entries, since the probability of taking the shortcut is much smaller. ----------------------------------------------------------------------- | Test size | 200 | 400 | 600 | 800 | 1000 | ----------------------------------------------------------------------- | new_plist_requeue | 271521| 1007913| 2148033| 4346792| 12200940| ----------------------------------------------------------------------- | old_plist_requeue | 301395| 1105544| 2488301| 4632980| 12217275| ----------------------------------------------------------------------- The test is done on x86_64 architecture with v6.9 kernel and Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz. Test script( executed in kernel module mode ): int init_module(void) { unsigned int test_data[test_size]; /* Split the list into 10 different priority * , when test_size is larger, the number of * nodes within each priority is larger. */ for (i = 0; i < ARRAY_SIZE(test_data); i++) { test_data[i] = i % 10; } ktime_t start, end, time_elapsed = 0; plist_head_init(&test_head_local); for (i = 0; i < ARRAY_SIZE(test_node_local); i++) { plist_node_init(test_node_local + i, 0); test_node_local[i].prio = test_data[i]; } for (i = 0; i < ARRAY_SIZE(test_node_local); i++) { if (plist_node_empty(test_node_local + i)) { plist_add(test_node_local + i, &test_head_local); } } for (i = 0; i < ARRAY_SIZE(test_node_local); i += 1) { start = ktime_get(); plist_requeue(test_node_local + i, &test_head_local); end = ktime_get(); time_elapsed += (end - start); } pr_info("plist_requeue() elapsed time : %lld, size %d\n", time_elapsed, test_size); return 0; } [akpm@linux-foundation.org: tweak comment and code layout] Link: https://lkml.kernel.org/r/20250119062408.77638-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/plist.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/plist.c b/lib/plist.c index c6bce1226874..330febb4bd7d 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -171,12 +171,24 @@ void plist_requeue(struct plist_node *node, struct plist_head *head) plist_del(node, head); + /* + * After plist_del(), iter is the replacement of the node. If the node + * was on prio_list, take shortcut to find node_next instead of looping. + */ + if (!list_empty(&iter->prio_list)) { + iter = list_entry(iter->prio_list.next, struct plist_node, + prio_list); + node_next = &iter->node_list; + goto queue; + } + plist_for_each_continue(iter, head) { if (node->prio != iter->prio) { node_next = &iter->node_list; break; } } +queue: list_add_tail(&node->node_list, node_next); plist_check_head(head); From 9986fb5164c8b21f6439cfd45ba36d8cc80c9710 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:24 +0530 Subject: [PATCH 1037/2157] kexec: initialize ELF lowest address to ULONG_MAX Patch series "powerpc/crash: use generic crashkernel reservation", v3. Commit 0ab97169aa05 ("crash_core: add generic function to do reservation") added a generic function to reserve crashkernel memory. So let's use the same function on powerpc and remove the architecture-specific code that essentially does the same thing. The generic crashkernel reservation also provides a way to split the crashkernel reservation into high and low memory reservations, which can be enabled for powerpc in the future. Additionally move powerpc to use generic APIs to locate memory hole for kexec segments while loading kdump kernel. This patch (of 7): kexec_elf_load() loads an ELF executable and sets the address of the lowest PT_LOAD section to the address held by the lowest_load_addr function argument. To determine the lowest PT_LOAD address, a local variable lowest_addr (type unsigned long) is initialized to UINT_MAX. After loading each PT_LOAD, its address is compared to lowest_addr. If a loaded PT_LOAD address is lower, lowest_addr is updated. However, setting lowest_addr to UINT_MAX won't work when the kernel image is loaded above 4G, as the returned lowest PT_LOAD address would be invalid. This is resolved by initializing lowest_addr to ULONG_MAX instead. This issue was discovered while implementing crashkernel high/low reservation on the PowerPC architecture. Link: https://lkml.kernel.org/r/20250131113830.925179-1-sourabhjain@linux.ibm.com Link: https://lkml.kernel.org/r/20250131113830.925179-2-sourabhjain@linux.ibm.com Fixes: a0458284f062 ("powerpc: Add support code for kexec_file_load()") Signed-off-by: Sourabh Jain Acked-by: Hari Bathini Acked-by: Baoquan He Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Signed-off-by: Andrew Morton --- kernel/kexec_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c index d3689632e8b9..3a5c25b2adc9 100644 --- a/kernel/kexec_elf.c +++ b/kernel/kexec_elf.c @@ -390,7 +390,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_buf *kbuf, unsigned long *lowest_load_addr) { - unsigned long lowest_addr = UINT_MAX; + unsigned long lowest_addr = ULONG_MAX; int ret; size_t i; From 7b54a96f30dec4b812841d179a26e88a7887dc06 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:25 +0530 Subject: [PATCH 1038/2157] crash: remove an unused argument from reserve_crashkernel_generic() cmdline argument is not used in reserve_crashkernel_generic() so remove it. Correspondingly, all the callers have been updated as well. No functional change intended. Link: https://lkml.kernel.org/r/20250131113830.925179-3-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Acked-by: Hari Bathini Acked-by: Baoquan He Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Signed-off-by: Andrew Morton --- arch/arm64/mm/init.c | 6 ++---- arch/loongarch/kernel/setup.c | 5 ++--- arch/riscv/mm/init.c | 6 ++---- arch/x86/kernel/setup.c | 6 ++---- include/linux/crash_reserve.h | 11 +++++------ kernel/crash_reserve.c | 9 ++++----- 6 files changed, 17 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ccdef53872a0..2e496209af80 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -98,21 +98,19 @@ static void __init arch_reserve_crashkernel(void) { unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 90cb3ca96f08..b99fbb388fe0 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -259,18 +259,17 @@ static void __init arch_reserve_crashkernel(void) int ret; unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static void __init fdt_setup(void) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..3ec9bfaa088a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1396,21 +1396,19 @@ static void __init arch_reserve_crashkernel(void) { unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } void __init paging_init(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cebee310e200..b8ca14d29b44 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -472,14 +472,13 @@ static void __init memblock_x86_reserve_range_setup_data(void) static void __init arch_reserve_crashkernel(void) { unsigned long long crash_base, crash_size, low_size = 0; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) @@ -490,8 +489,7 @@ static void __init arch_reserve_crashkernel(void) return; } - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static struct resource standard_io_resources[] = { diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 5a9df944fb80..1fe7e7d1b214 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -32,13 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, #define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() #endif -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high); +void __init reserve_crashkernel_generic(unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); #else -static inline void __init reserve_crashkernel_generic(char *cmdline, +static inline void __init reserve_crashkernel_generic( unsigned long long crash_size, unsigned long long crash_base, unsigned long long crash_low_size, diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index a620fb4b2116..aff7c0fdbefa 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -375,11 +375,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) return 0; } -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high) +void __init reserve_crashkernel_generic(unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) { unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; bool fixed_base = false; From 9f0552c978f8950b4661f1222290fb57af811a8b Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:26 +0530 Subject: [PATCH 1039/2157] crash: let arch decide usable memory range in reserved area Although the crashkernel area is reserved, on architectures like PowerPC, it is possible for the crashkernel reserved area to contain components like RTAS, TCE, OPAL, etc. To avoid placing kexec segments over these components, PowerPC has its own set of APIs to locate holes in the crashkernel reserved area. Add an arch hook in the generic locate mem hole APIs so that architectures can handle such special regions in the crashkernel area while locating memory holes for kexec segments using generic APIs. With this, a lot of redundant arch-specific code can be removed, as it performs the exact same job as the generic APIs. To keep the generic and arch-specific changes separate, the changes related to moving PowerPC to use the generic APIs and the removal of PowerPC-specific APIs for memory hole allocation are done in a subsequent patch titled "powerpc/crash: Use generic APIs to locate memory hole for kdump. Link: https://lkml.kernel.org/r/20250131113830.925179-4-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Acked-by: Baoquan He Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Signed-off-by: Andrew Morton --- include/linux/kexec.h | 9 +++++++++ kernel/kexec_file.c | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f0e9f8eda7a3..407f8b0346aa 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -205,6 +205,15 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif +#ifndef arch_check_excluded_range +static inline int arch_check_excluded_range(struct kimage *image, + unsigned long start, + unsigned long end) +{ + return 0; +} +#endif + #ifdef CONFIG_KEXEC_SIG #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 3eedb8c226ad..fba686487e3b 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -464,6 +464,12 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end, continue; } + /* Make sure this does not conflict with exclude range */ + if (arch_check_excluded_range(image, temp_start, temp_end)) { + temp_start = temp_start - PAGE_SIZE; + continue; + } + /* We found a suitable memory range */ break; } while (1); @@ -498,6 +504,12 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, continue; } + /* Make sure this does not conflict with exclude range */ + if (arch_check_excluded_range(image, temp_start, temp_end)) { + temp_start = temp_start + PAGE_SIZE; + continue; + } + /* We found a suitable memory range */ break; } while (1); From 6e5250eaa665fac681926251a8d7f5f418103399 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:27 +0530 Subject: [PATCH 1040/2157] powerpc/crash: use generic APIs to locate memory hole for kdump On PowerPC, the memory reserved for the crashkernel can contain components like RTAS, TCE, OPAL, etc., which should be avoided when loading kexec segments into crashkernel memory. Due to these special components, PowerPC has its own set of APIs to locate holes in the crashkernel memory for loading kexec segments for kdump. However, for loading kexec segments in the kexec case, PowerPC already uses generic APIs to locate holes. The previous patch in this series, titled "crash: Let arch decide usable memory range in reserved area," introduced arch-specific hook to handle such special regions in the crashkernel area. So, switch PowerPC to use the generic APIs to locate memory holes for kdump and remove the redundant PowerPC-specific APIs. Link: https://lkml.kernel.org/r/20250131113830.925179-5-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Cc: Baoquan he Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/kexec.h | 6 +- arch/powerpc/kexec/file_load_64.c | 259 ++---------------------------- 2 files changed, 13 insertions(+), 252 deletions(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 601e569303e1..1b800df5eb30 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,8 +94,10 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); -#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole +int arch_check_excluded_range(struct kimage *image, unsigned long start, + unsigned long end); +#define arch_check_excluded_range arch_check_excluded_range + int load_crashdump_segments_ppc64(struct kimage *image, struct kexec_buf *kbuf); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index dc65c1391157..e7ef8b2a2554 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -49,201 +49,18 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { NULL }; -/** - * __locate_mem_hole_top_down - Looks top down for a large enough memory hole - * in the memory regions between buf_min & buf_max - * for the buffer. If found, sets kbuf->mem. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * - * Returns 0 on success, negative errno on error. - */ -static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max) +int arch_check_excluded_range(struct kimage *image, unsigned long start, + unsigned long end) { - int ret = -EADDRNOTAVAIL; - phys_addr_t start, end; - u64 i; + struct crash_mem *emem; + int i; - for_each_mem_range_rev(i, &start, &end) { - /* - * memblock uses [start, end) convention while it is - * [start, end] here. Fix the off-by-one to have the - * same convention. - */ - end -= 1; + emem = image->arch.exclude_ranges; + for (i = 0; i < emem->nr_ranges; i++) + if (start < emem->ranges[i].end && end > emem->ranges[i].start) + return 1; - if (start > buf_max) - continue; - - /* Memory hole not found */ - if (end < buf_min) - break; - - /* Adjust memory region based on the given range */ - if (start < buf_min) - start = buf_min; - if (end > buf_max) - end = buf_max; - - start = ALIGN(start, kbuf->buf_align); - if (start < end && (end - start + 1) >= kbuf->memsz) { - /* Suitable memory range found. Set kbuf->mem */ - kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, - kbuf->buf_align); - ret = 0; - break; - } - } - - return ret; -} - -/** - * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a - * suitable buffer with top down approach. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * @emem: Exclude memory ranges. - * - * Returns 0 on success, negative errno on error. - */ -static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max, - const struct crash_mem *emem) -{ - int i, ret = 0, err = -EADDRNOTAVAIL; - u64 start, end, tmin, tmax; - - tmax = buf_max; - for (i = (emem->nr_ranges - 1); i >= 0; i--) { - start = emem->ranges[i].start; - end = emem->ranges[i].end; - - if (start > tmax) - continue; - - if (end < tmax) { - tmin = (end < buf_min ? buf_min : end + 1); - ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); - if (!ret) - return 0; - } - - tmax = start - 1; - - if (tmax < buf_min) { - ret = err; - break; - } - ret = 0; - } - - if (!ret) { - tmin = buf_min; - ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); - } - return ret; -} - -/** - * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole - * in the memory regions between buf_min & buf_max - * for the buffer. If found, sets kbuf->mem. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * - * Returns 0 on success, negative errno on error. - */ -static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max) -{ - int ret = -EADDRNOTAVAIL; - phys_addr_t start, end; - u64 i; - - for_each_mem_range(i, &start, &end) { - /* - * memblock uses [start, end) convention while it is - * [start, end] here. Fix the off-by-one to have the - * same convention. - */ - end -= 1; - - if (end < buf_min) - continue; - - /* Memory hole not found */ - if (start > buf_max) - break; - - /* Adjust memory region based on the given range */ - if (start < buf_min) - start = buf_min; - if (end > buf_max) - end = buf_max; - - start = ALIGN(start, kbuf->buf_align); - if (start < end && (end - start + 1) >= kbuf->memsz) { - /* Suitable memory range found. Set kbuf->mem */ - kbuf->mem = start; - ret = 0; - break; - } - } - - return ret; -} - -/** - * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a - * suitable buffer with bottom up approach. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * @emem: Exclude memory ranges. - * - * Returns 0 on success, negative errno on error. - */ -static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max, - const struct crash_mem *emem) -{ - int i, ret = 0, err = -EADDRNOTAVAIL; - u64 start, end, tmin, tmax; - - tmin = buf_min; - for (i = 0; i < emem->nr_ranges; i++) { - start = emem->ranges[i].start; - end = emem->ranges[i].end; - - if (end < tmin) - continue; - - if (start > tmin) { - tmax = (start > buf_max ? buf_max : start - 1); - ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); - if (!ret) - return 0; - } - - tmin = end + 1; - - if (tmin > buf_max) { - ret = err; - break; - } - ret = 0; - } - - if (!ret) { - tmax = buf_max; - ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); - } - return ret; + return 0; } #ifdef CONFIG_CRASH_DUMP @@ -1004,64 +821,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem return ret; } -/** - * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, - * tce-table, reserved-ranges & such (exclude - * memory ranges) as they can't be used for kexec - * segment buffer. Sets kbuf->mem when a suitable - * memory hole is found. - * @kbuf: Buffer contents and memory parameters. - * - * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. - * - * Returns 0 on success, negative errno on error. - */ -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) -{ - struct crash_mem **emem; - u64 buf_min, buf_max; - int ret; - - /* Look up the exclude ranges list while locating the memory hole */ - emem = &(kbuf->image->arch.exclude_ranges); - if (!(*emem) || ((*emem)->nr_ranges == 0)) { - pr_warn("No exclude range list. Using the default locate mem hole method\n"); - return kexec_locate_mem_hole(kbuf); - } - - buf_min = kbuf->buf_min; - buf_max = kbuf->buf_max; - /* Segments for kdump kernel should be within crashkernel region */ - if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) { - buf_min = (buf_min < crashk_res.start ? - crashk_res.start : buf_min); - buf_max = (buf_max > crashk_res.end ? - crashk_res.end : buf_max); - } - - if (buf_min > buf_max) { - pr_err("Invalid buffer min and/or max values\n"); - return -EINVAL; - } - - if (kbuf->top_down) - ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, - *emem); - else - ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, - *emem); - - /* Add the buffer allocated to the exclude list for the next lookup */ - if (!ret) { - add_mem_range(emem, kbuf->mem, kbuf->memsz); - sort_memory_ranges(*emem, true); - } else { - pr_err("Failed to locate memory buffer of size %lu\n", - kbuf->memsz); - } - return ret; -} - /** * arch_kexec_kernel_image_probe - Does additional handling needed to setup * kexec segments. From 021b5b303c5e9bc6634bc4fc4607586904dc7775 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:28 +0530 Subject: [PATCH 1041/2157] powerpc/crash: preserve user-specified memory limit Commit 59d58189f3d9 ("crash: fix crash memory reserve exceed system memory bug") fails crashkernel parsing if the crash size is found to be higher than system RAM, which makes the memory_limit adjustment code ineffective due to an early exit from reserve_crashkernel(). Regardless lets not violate the user-specified memory limit by adjusting it. Remove this adjustment to ensure all reservations stay within the limit. Commit f94f5ac07983 ("powerpc/fadump: Don't update the user-specified memory limit") did the same for fadump. Link: https://lkml.kernel.org/r/20250131113830.925179-6-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Reviewed-by: Mahesh Salgaonkar Acked-by: Hari Bathini Cc: Baoquan he Cc: Madhavan Srinivasan Cc: Michael Ellerman Signed-off-by: Andrew Morton --- arch/powerpc/kexec/core.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 58a930a47422..f9be7bb5aa08 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -128,14 +128,6 @@ void __init reserve_crashkernel(void) return; } - /* Crash kernel trumps memory limit */ - if (memory_limit && memory_limit <= crashk_res.end) { - memory_limit = crashk_res.end + 1; - total_mem_sz = memory_limit; - printk("Adjusted memory limit for crashkernel, now 0x%llx\n", - memory_limit); - } - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), From bce074bdbc36e747b9e0783fe642f7b634cfb536 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:29 +0530 Subject: [PATCH 1042/2157] powerpc: insert System RAM resource to prevent crashkernel conflict The next patch in the series with title "powerpc/crash: use generic crashkernel reservation" enables powerpc to use generic crashkernel reservation instead of custom implementation. This leads to exporting of `Crash Kernel` memory to iomem_resource (/proc/iomem) via insert_crashkernel_resources():kernel/crash_reserve.c or at another place in the same file if HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY is set. The add_system_ram_resources():arch/powerpc/mm/mem.c adds `System RAM` to iomem_resource using request_resource(). This creates a conflict with `Crash Kernel`, which is added by the generic crashkernel reservation code. As a result, the kernel ultimately fails to add `System RAM` to iomem_resource. Consequently, it does not appear in /proc/iomem. There are multiple approches tried to avoid this: 1. Don't add Crash Kernel to iomem_resource: - This has two issues. First, it requires adding an architecture-specific hook in the generic code. There are already two code paths to choose when to add `Crash Kernel` to iomem_resource. This adds one more code path to skip it. Second, what if `Crash Kernel` is required in /proc/iomem in the future? Many architectures do export it. 2. Don't add `System RAM` to iomem_resource by reverting commit c40dd2f766440 ("powerpc: Add System RAM to /proc/iomem"): - It's not ideal to export `System RAM` via /proc/iomem, but since it already done ealier and userspace tools like kdump and kdump-utils rely on `System RAM` from /proc/iomem, removing it will break userspace. 3. Add Crash Kernel along with System RAM to /proc/iomem: This patch takes the third approach by updating add_system_ram_resources() to use insert_resource() instead of the request_resource() API to add the `System RAM` resource to iomem_resource. insert_resource() allows inserting resources even if they overlap with existing ones. Since `Crash Kernel` and `System RAM` resources are added to iomem_resource early in the boot, any other conflict is not expected. With the changes introduced here and in the next patch, "powerpc/crash: use generic crashkernel reservation," /proc/iomem now exports `System RAM` and `Crash Kernel` as shown below: $ cat /proc/iomem 00000000-3ffffffff : System RAM 10000000-4fffffff : Crash kernel The kdump script is capable of handling `System RAM` and `Crash Kernel` in the above format. The same format is used in other architectures. Link: https://lkml.kernel.org/r/20250131113830.925179-7-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Cc: Baoquan he Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c7708c8fad29..615966d71425 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -376,7 +376,7 @@ static int __init add_system_ram_resources(void) */ res->end = end - 1; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - WARN_ON(request_resource(&iomem_resource, res) < 0); + WARN_ON(insert_resource(&iomem_resource, res) < 0); } } From e3185ee438c28ee926cb3ef26f3bfb0aae510606 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 31 Jan 2025 17:08:30 +0530 Subject: [PATCH 1043/2157] powerpc/crash: use generic crashkernel reservation Commit 0ab97169aa05 ("crash_core: add generic function to do reservation") added a generic function to reserve crashkernel memory. So let's use the same function on powerpc and remove the architecture-specific code that essentially does the same thing. The generic crashkernel reservation also provides a way to split the crashkernel reservation into high and low memory reservations, which can be enabled for powerpc in the future. Along with moving to the generic crashkernel reservation, the code related to finding the base address for the crashkernel has been separated into its own function name get_crash_base() for better readability and maintainability. Link: https://lkml.kernel.org/r/20250131113830.925179-8-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Reviewed-by: Mahesh Salgaonkar Acked-by: Hari Bathini Cc: Baoquan he Cc: Madhavan Srinivasan Cc: Michael Ellerman Signed-off-by: Andrew Morton --- arch/powerpc/Kconfig | 3 + arch/powerpc/include/asm/crash_reserve.h | 8 +++ arch/powerpc/include/asm/kexec.h | 4 +- arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kexec/core.c | 92 +++++++++++------------- 5 files changed, 54 insertions(+), 55 deletions(-) create mode 100644 arch/powerpc/include/asm/crash_reserve.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 424f188e62d9..b7e00bb8e556 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -721,6 +721,9 @@ config ARCH_SUPPORTS_CRASH_HOTPLUG def_bool y depends on PPC64 +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_RESERVE + config FA_DUMP bool "Firmware-assisted dump" depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV) diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h new file mode 100644 index 000000000000..6467ce29b1fa --- /dev/null +++ b/arch/powerpc/include/asm/crash_reserve.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_CRASH_RESERVE_H +#define _ASM_POWERPC_CRASH_RESERVE_H + +/* crash kernel regions are Page size agliged */ +#define CRASH_ALIGN PAGE_SIZE + +#endif /* _ASM_POWERPC_CRASH_RESERVE_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 1b800df5eb30..70f2f0517509 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -114,9 +114,9 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem #ifdef CONFIG_CRASH_RESERVE int __init overlaps_crashkernel(unsigned long start, unsigned long size); -extern void reserve_crashkernel(void); +extern void arch_reserve_crashkernel(void); #else -static inline void reserve_crashkernel(void) {} +static inline void arch_reserve_crashkernel(void) {} static inline int overlaps_crashkernel(unsigned long start, unsigned long size) { return 0; } #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e0059842a1c6..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -860,7 +860,7 @@ void __init early_init_devtree(void *params) */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); if (memory_limit > memblock_phys_mem_size()) diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index f9be7bb5aa08..00e9c267b912 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -58,38 +58,20 @@ void machine_kexec(struct kimage *image) } #ifdef CONFIG_CRASH_RESERVE -void __init reserve_crashkernel(void) + +static unsigned long long __init get_crash_base(unsigned long long crash_base) { - unsigned long long crash_size, crash_base, total_mem_sz; - int ret; - - total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); - /* use common parsing */ - ret = parse_crashkernel(boot_command_line, total_mem_sz, - &crash_size, &crash_base, NULL, NULL); - if (ret == 0 && crash_size > 0) { - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } - - if (crashk_res.end == crashk_res.start) { - crashk_res.start = crashk_res.end = 0; - return; - } - - /* We might have got these values via the command line or the - * device tree, either way sanitise them now. */ - - crash_size = resource_size(&crashk_res); #ifndef CONFIG_NONSTATIC_KERNEL - if (crashk_res.start != KDUMP_KERNELBASE) + if (crash_base != KDUMP_KERNELBASE) printk("Crash kernel location must be 0x%x\n", KDUMP_KERNELBASE); - crashk_res.start = KDUMP_KERNELBASE; + return KDUMP_KERNELBASE; #else - if (!crashk_res.start) { + unsigned long long crash_base_align; + + if (!crash_base) { #ifdef CONFIG_PPC64 /* * On the LPAR platform place the crash kernel to mid of @@ -101,45 +83,51 @@ void __init reserve_crashkernel(void) * kernel starts at 128MB offset on other platforms. */ if (firmware_has_feature(FW_FEATURE_LPAR)) - crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_512M); + crash_base = min_t(u64, ppc64_rma_size / 2, SZ_512M); else - crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_128M); + crash_base = min_t(u64, ppc64_rma_size / 2, SZ_128M); #else - crashk_res.start = KDUMP_KERNELBASE; + crash_base = KDUMP_KERNELBASE; #endif } - crash_base = PAGE_ALIGN(crashk_res.start); - if (crash_base != crashk_res.start) { - printk("Crash kernel base must be aligned to 0x%lx\n", - PAGE_SIZE); - crashk_res.start = crash_base; - } + crash_base_align = PAGE_ALIGN(crash_base); + if (crash_base != crash_base_align) + pr_warn("Crash kernel base must be aligned to 0x%lx\n", PAGE_SIZE); + return crash_base_align; #endif - crash_size = PAGE_ALIGN(crash_size); - crashk_res.end = crashk_res.start + crash_size - 1; +} + +void __init arch_reserve_crashkernel(void) +{ + unsigned long long crash_size, crash_base, crash_end; + unsigned long long kernel_start, kernel_size; + unsigned long long total_mem_sz; + int ret; + + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); + + /* use common parsing */ + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, + &crash_base, NULL, NULL); + + if (ret) + return; + + crash_base = get_crash_base(crash_base); + crash_end = crash_base + crash_size - 1; + + kernel_start = __pa(_stext); + kernel_size = _end - _stext; /* The crash region must not overlap the current kernel */ - if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { - printk(KERN_WARNING - "Crash kernel can not overlap current kernel\n"); - crashk_res.start = crashk_res.end = 0; + if ((kernel_start + kernel_size > crash_base) && (kernel_start <= crash_end)) { + pr_warn("Crash kernel can not overlap current kernel\n"); return; } - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crashk_res.start >> 20), - (unsigned long)(total_mem_sz >> 20)); - - if (!memblock_is_region_memory(crashk_res.start, crash_size) || - memblock_reserve(crashk_res.start, crash_size)) { - pr_err("Failed to reserve memory for crashkernel!\n"); - crashk_res.start = crashk_res.end = 0; - return; - } + reserve_crashkernel_generic(crash_size, crash_base, 0, false); } int __init overlaps_crashkernel(unsigned long start, unsigned long size) From 9ad18c855518161d0a80f6a7de3ed495f746cb5d Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Feb 2025 12:13:16 +0100 Subject: [PATCH 1044/2157] get_maintainer: add --substatus for reporting subsystem status Patch series "get_maintainer: report subsystem status separately", v2. The subsystem status (S: field) can inform a patch submitter if the subsystem is well maintained or e.g. maintainers are missing. In get_maintainer, it is currently reported with --role(stats) by adjusting the maintainer role for any status different from Maintained. This has two downsides: - if a subsystem has only reviewers or mailing lists and no maintainers, the status is not reported. For example Orphan subsystems typically have no maintainers so there's nobody to report as orphan minder. - the Supported status means that someone is paid for maintaining, but it is reported as "supporter" for all the maintainers, which can be incorrect (only some of them may be paid). People (including myself) have been also confused about what "supporter" means. The second point has been brought up in 2022 and the discussion in the end resulted in adjusting documentation only [1]. I however agree with Ted's points that it's misleading to take the subsystem status and apply it to all maintainers [2]. The attempt to modify get_maintainer output was retracted after Joe objected that the status becomes not reported at all [3]. This series addresses that concern by reporting the status (unless it's the most common Maintained one) on separate lines that follow the reported emails, using a new --substatus parameter. Care is taken to reduce the noise to minimum by not reporting the most common Maintained status, by default require no opt-in that would need the users to discover the new parameter, and at the same time not to break existing git --cc-cmd usage. [1] https://lore.kernel.org/all/20221006162413.858527-1-bryan.odonoghue@linaro.org/ [2] https://lore.kernel.org/all/Yzen4X1Na0MKXHs9@mit.edu/ [3] https://lore.kernel.org/all/30776fe75061951777da8fa6618ae89bea7a8ce4.camel@perches.com/ This patch (of 2): The subsystem status is currently reported with --role(stats) by adjusting the maintainer role for any status different from Maintained. This has two downsides: - if a subsystem has only reviewers or mailing lists and no maintainers, the status is not reported (i.e. typically, Orphan subsystems have no maintainers) - the Supported status means that someone is paid for maintaining, but it is reported as "supporter" for all the maintainers, which can be incorrect. People have been also confused about what "supporter" means. This patch introduces a new --substatus option and functionality aimed to report the subsystem status separately, without adjusting the reported maintainer role. After the e-mails are output, the status of subsystems will follow, for example: ... linux-kernel@vger.kernel.org (open list:LIBRARY CODE) LIBRARY CODE status: Supported In order to allow replacing the role rewriting seamlessly, the new option works as follows: - it is automatically enabled when --email and --role are enabled (the defaults include --email and --rolestats which implies --role) - usages with --norolestats e.g. for git's --cc-cmd will thus need no adjustments - the most common Maintained status is not reported at all, to reduce unnecessary noise - THE REST catch-all section (contains lkml) status is not reported - the existing --subsystem and --status options are unaffected so their users will need no adjustments [vbabka@suse.cz: require that script output goes to a terminal] Link: https://lkml.kernel.org/r/66c2bf7a-9119-4850-b6b8-ac8f426966e1@suse.cz Link: https://lkml.kernel.org/r/20250203-b4-get_maintainer-v2-0-83ba008b491f@suse.cz Link: https://lkml.kernel.org/r/20250203-b4-get_maintainer-v2-1-83ba008b491f@suse.cz Fixes: c1565b6f7b53 ("get_maintainer: add --substatus for reporting subsystem status") Closes: https://lore.kernel.org/all/7aodxv46lj6rthjo4i5zhhx2lybrhb4uknpej2dyz3e7im5w3w@w23bz6fx3jnn/ Signed-off-by: Vlastimil Babka Acked-by: Lorenzo Stoakes Tested-by: Geert Uytterhoeven Tested-by: Uwe Kleine-K=F6nig Cc: Bryan O'Donoghue Cc: Joe Perches Cc: Kees Cook Cc: Ted Ts'o Cc: Thorsten Leemhuis Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- scripts/get_maintainer.pl | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 5ac02e198737..3eb922b4d22c 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -50,6 +50,7 @@ my $output_multiline = 1; my $output_separator = ", "; my $output_roles = 0; my $output_rolestats = 1; +my $output_substatus = undef; my $output_section_maxlen = 50; my $scm = 0; my $tree = 1; @@ -269,6 +270,7 @@ if (!GetOptions( 'separator=s' => \$output_separator, 'subsystem!' => \$subsystem, 'status!' => \$status, + 'substatus!' => \$output_substatus, 'scm!' => \$scm, 'tree!' => \$tree, 'web!' => \$web, @@ -314,6 +316,10 @@ $output_multiline = 0 if ($output_separator ne ", "); $output_rolestats = 1 if ($interactive); $output_roles = 1 if ($output_rolestats); +if (!defined $output_substatus) { + $output_substatus = $email && $output_roles && -t STDOUT; +} + if ($sections || $letters ne "") { $sections = 1; $email = 0; @@ -637,6 +643,7 @@ my @web = (); my @bug = (); my @subsystem = (); my @status = (); +my @substatus = (); my %deduplicate_name_hash = (); my %deduplicate_address_hash = (); @@ -651,6 +658,11 @@ if ($scm) { output(@scm); } +if ($output_substatus) { + @substatus = uniq(@substatus); + output(@substatus); +} + if ($status) { @status = uniq(@status); output(@status); @@ -859,6 +871,7 @@ sub get_maintainers { @bug = (); @subsystem = (); @status = (); + @substatus = (); %deduplicate_name_hash = (); %deduplicate_address_hash = (); if ($email_git_all_signature_types) { @@ -1073,6 +1086,7 @@ MAINTAINER field selection options: --remove-duplicates => minimize duplicate email names/addresses --roles => show roles (status:subsystem, git-signer, list, etc...) --rolestats => show roles and statistics (commits/total_commits, %) + --substatus => show subsystem status if not Maintained (default: match --roles when output is tty)" --file-emails => add email addresses found in -f file (default: 0 (off)) --fixes => for patches, add signatures of commits with 'Fixes: ' (default: 1 (on)) --scm => print SCM tree(s) if any @@ -1335,7 +1349,9 @@ sub add_categories { my $start = find_starting_index($index); my $end = find_ending_index($index); - push(@subsystem, $typevalue[$start]); + my $subsystem = $typevalue[$start]; + push(@subsystem, $subsystem); + my $status = "Unknown"; for ($i = $start + 1; $i < $end; $i++) { my $tv = $typevalue[$i]; @@ -1386,8 +1402,8 @@ sub add_categories { } } elsif ($ptype eq "R") { if ($email_reviewer) { - my $subsystem = get_subsystem_name($i); - push_email_addresses($pvalue, "reviewer:$subsystem" . $suffix); + my $subs = get_subsystem_name($i); + push_email_addresses($pvalue, "reviewer:$subs" . $suffix); } } elsif ($ptype eq "T") { push(@scm, $pvalue . $suffix); @@ -1397,9 +1413,14 @@ sub add_categories { push(@bug, $pvalue . $suffix); } elsif ($ptype eq "S") { push(@status, $pvalue . $suffix); + $status = $pvalue; } } } + + if ($subsystem ne "THE REST" and $status ne "Maintained") { + push(@substatus, $subsystem . " status: " . $status . $suffix) + } } sub email_inuse { @@ -1903,6 +1924,7 @@ EOT $done = 1; $output_rolestats = 0; $output_roles = 0; + $output_substatus = 0; last; } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) { $selected{$nr - 1} = !$selected{$nr - 1}; From 6ba31721aeef048922672d49d48a7f3826005f7d Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Feb 2025 12:13:17 +0100 Subject: [PATCH 1045/2157] get_maintainer: stop reporting subsystem status as maintainer role After introducing the --substatus option, we can stop adjusting the reported maintainer role by the subsystem's status. For compatibility with the --git-chief-penguins option, keep the "chief penguin" role. Link: https://lkml.kernel.org/r/20250203-b4-get_maintainer-v2-2-83ba008b491f@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Cc: Bryan O'Donoghue Cc: Joe Perches Cc: Kees Cook Cc: Ted Ts'o Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/get_maintainer.pl | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 3eb922b4d22c..4414194bedcf 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -1084,7 +1084,7 @@ MAINTAINER field selection options: --moderated => include moderated lists(s) if any (default: true) --s => include subscriber only list(s) if any (default: false) --remove-duplicates => minimize duplicate email names/addresses - --roles => show roles (status:subsystem, git-signer, list, etc...) + --roles => show roles (role:subsystem, git-signer, list, etc...) --rolestats => show roles and statistics (commits/total_commits, %) --substatus => show subsystem status if not Maintained (default: match --roles when output is tty)" --file-emails => add email addresses found in -f file (default: 0 (off)) @@ -1298,8 +1298,9 @@ sub get_maintainer_role { my $start = find_starting_index($index); my $end = find_ending_index($index); - my $role = "unknown"; + my $role = "maintainer"; my $subsystem = get_subsystem_name($index); + my $status = "unknown"; for ($i = $start + 1; $i < $end; $i++) { my $tv = $typevalue[$i]; @@ -1307,23 +1308,13 @@ sub get_maintainer_role { my $ptype = $1; my $pvalue = $2; if ($ptype eq "S") { - $role = $pvalue; + $status = $pvalue; } } } - $role = lc($role); - if ($role eq "supported") { - $role = "supporter"; - } elsif ($role eq "maintained") { - $role = "maintainer"; - } elsif ($role eq "odd fixes") { - $role = "odd fixer"; - } elsif ($role eq "orphan") { - $role = "orphan minder"; - } elsif ($role eq "obsolete") { - $role = "obsolete minder"; - } elsif ($role eq "buried alive in reporters") { + $status = lc($status); + if ($status eq "buried alive in reporters") { $role = "chief penguin"; } From b115b6eccdf5a09909b1db6f1c37074923337f5c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 5 Feb 2025 16:29:32 -0500 Subject: [PATCH 1046/2157] lib/zlib: drop EQUAL macro The macro is prehistoric, and only exists to help those readers who don't know what memcmp() returns if memory areas differ. This is pretty well documented, so the macro looks excessive. Now that the only user of the macro depends on DEBUG_ZLIB config, GCC warns about unused macro if the library is built with W=2 against defconfig. So drop it for good. Link: https://lkml.kernel.org/r/20250205212933.68695-1-yury.norov@gmail.com Signed-off-by: Yury Norov Reviewed-by: Sergey Senozhatsky Reviewed-by: Mikhail Zaslonko Cc: Heiko Carsten Cc: Ilya Leoshkevich Cc: Joe Perches Signed-off-by: Andrew Morton --- lib/zlib_deflate/deflate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index 3a1d8d34182e..8fb2a3e17c0e 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -151,9 +151,6 @@ static const config configuration_table[10] = { * meaning. */ -#define EQUAL 0 -/* result of memcmp for equal strings */ - /* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to UPDATE_HASH are made with consecutive @@ -713,8 +710,7 @@ static void check_match( ) { /* check that the match is indeed a match */ - if (memcmp((char *)s->window + match, - (char *)s->window + start, length) != EQUAL) { + if (memcmp((char *)s->window + match, (char *)s->window + start, length)) { fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); do { From 8c6bbda879b62f16bb03321a84554b4f63415c55 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 3 Feb 2025 16:05:22 +0100 Subject: [PATCH 1047/2157] rcu: provide a static initializer for hlist_nulls_head Patch series "ucount: Simplify refcounting with rcuref_t". I noticed that the atomic_dec_and_lock_irqsave() in put_ucounts() loops sometimes even during boot. Something like 2-3 iterations but still. This series replaces the refcounting with rcuref_t and adds a RCU lookup. This allows a lockless lookup in alloc_ucounts() if the entry is available and a cmpxchg()less put of the item. This patch (of 4): Provide a static initializer for hlist_nulls_head so that it can be used in statically defined data structures. Link: https://lkml.kernel.org/r/20250203150525.456525-1-bigeasy@linutronix.de Link: https://lkml.kernel.org/r/20250203150525.456525-2-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Boqun Feng Cc: Steven Rostedt Cc: Joel Fernandes Cc: Josh Triplett Cc: Lai jiangshan Cc: Mathieu Desnoyers Cc: Mengen Sun Cc: "Paul E . McKenney" Cc: "Uladzislau Rezki (Sony)" Cc: YueHong Wu Cc: Zqiang Signed-off-by: Andrew Morton --- include/linux/list_nulls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index fa6e8471bd22..248db9b77ee2 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -28,6 +28,7 @@ struct hlist_nulls_node { #define NULLS_MARKER(value) (1UL | (((long)value) << 1)) #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) +#define HLIST_NULLS_HEAD_INIT(nulls) {.first = (struct hlist_nulls_node *)NULLS_MARKER(nulls)} #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) From 328152e6774d9d801ad1d90af557b9113647b379 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 3 Feb 2025 16:05:23 +0100 Subject: [PATCH 1048/2157] ucount: replace get_ucounts_or_wrap() with atomic_inc_not_zero() get_ucounts_or_wrap() increments the counter and if the counter is negative then it decrements it again in order to reset the previous increment. This statement can be replaced with atomic_inc_not_zero() to only increment the counter if it is not yet 0. This simplifies the get function because the put (if the get failed) can be removed. atomic_inc_not_zero() is implement as a cmpxchg() loop which can be repeated several times if another get/put is performed in parallel. This will be optimized later. Increment the reference counter only if not yet dropped to zero. Link: https://lkml.kernel.org/r/20250203150525.456525-3-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Boqun Feng Cc: Joel Fernandes Cc: Josh Triplett Cc: Lai jiangshan Cc: Mathieu Desnoyers Cc: Mengen Sun Cc: Steven Rostedt Cc: "Uladzislau Rezki (Sony)" Cc: YueHong Wu Cc: Zqiang Signed-off-by: Andrew Morton --- kernel/ucount.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 86c5f1c0bad9..4aa501153825 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -146,25 +146,16 @@ static void hlist_add_ucounts(struct ucounts *ucounts) spin_unlock_irq(&ucounts_lock); } -static inline bool get_ucounts_or_wrap(struct ucounts *ucounts) -{ - /* Returns true on a successful get, false if the count wraps. */ - return !atomic_add_negative(1, &ucounts->count); -} - struct ucounts *get_ucounts(struct ucounts *ucounts) { - if (!get_ucounts_or_wrap(ucounts)) { - put_ucounts(ucounts); - ucounts = NULL; - } - return ucounts; + if (atomic_inc_not_zero(&ucounts->count)) + return ucounts; + return NULL; } struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); - bool wrapped; struct ucounts *ucounts, *new = NULL; spin_lock_irq(&ucounts_lock); @@ -189,14 +180,11 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } - - wrapped = !get_ucounts_or_wrap(ucounts); + if (!atomic_inc_not_zero(&ucounts->count)) + ucounts = NULL; spin_unlock_irq(&ucounts_lock); kfree(new); - if (wrapped) { - put_ucounts(ucounts); - return NULL; - } + return ucounts; } From 5f01a22c5b231dd590f61a2591b3090665733bcb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 3 Feb 2025 16:05:24 +0100 Subject: [PATCH 1049/2157] ucount: use RCU for ucounts lookups The ucounts element is looked up under ucounts_lock. This can be optimized by using RCU for a lockless lookup and return and element if the reference can be obtained. Replace hlist_head with hlist_nulls_head which is RCU compatible. Let find_ucounts() search for the required item within a RCU section and return the item if a reference could be obtained. This means alloc_ucounts() will always return an element (unless the memory allocation failed). Let put_ucounts() RCU free the element if the reference counter dropped to zero. Link: https://lkml.kernel.org/r/20250203150525.456525-4-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Boqun Feng Cc: Joel Fernandes Cc: Josh Triplett Cc: Lai jiangshan Cc: Mathieu Desnoyers Cc: Mengen Sun Cc: Steven Rostedt Cc: "Uladzislau Rezki (Sony)" Cc: YueHong Wu Cc: Zqiang Signed-off-by: Andrew Morton --- include/linux/user_namespace.h | 4 +- kernel/ucount.c | 75 ++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 7183e5aca282..ad4dbef92597 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -115,9 +116,10 @@ struct user_namespace { } __randomize_layout; struct ucounts { - struct hlist_node node; + struct hlist_nulls_node node; struct user_namespace *ns; kuid_t uid; + struct rcu_head rcu; atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; diff --git a/kernel/ucount.c b/kernel/ucount.c index 4aa501153825..b6abaf68cdcc 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -15,7 +15,10 @@ struct ucounts init_ucounts = { }; #define UCOUNTS_HASHTABLE_BITS 10 -static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; +#define UCOUNTS_HASHTABLE_ENTRIES (1 << UCOUNTS_HASHTABLE_BITS) +static struct hlist_nulls_head ucounts_hashtable[UCOUNTS_HASHTABLE_ENTRIES] = { + [0 ... UCOUNTS_HASHTABLE_ENTRIES - 1] = HLIST_NULLS_HEAD_INIT(0) +}; static DEFINE_SPINLOCK(ucounts_lock); #define ucounts_hashfn(ns, uid) \ @@ -24,7 +27,6 @@ static DEFINE_SPINLOCK(ucounts_lock); #define ucounts_hashentry(ns, uid) \ (ucounts_hashtable + ucounts_hashfn(ns, uid)) - #ifdef CONFIG_SYSCTL static struct ctl_table_set * set_lookup(struct ctl_table_root *root) @@ -127,22 +129,28 @@ void retire_userns_sysctls(struct user_namespace *ns) #endif } -static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) +static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, + struct hlist_nulls_head *hashent) { struct ucounts *ucounts; + struct hlist_nulls_node *pos; - hlist_for_each_entry(ucounts, hashent, node) { - if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) - return ucounts; + guard(rcu)(); + hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) { + if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) { + if (atomic_inc_not_zero(&ucounts->count)) + return ucounts; + } } return NULL; } static void hlist_add_ucounts(struct ucounts *ucounts) { - struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); + struct hlist_nulls_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); + spin_lock_irq(&ucounts_lock); - hlist_add_head(&ucounts->node, hashent); + hlist_nulls_add_head_rcu(&ucounts->node, hashent); spin_unlock_irq(&ucounts_lock); } @@ -155,37 +163,33 @@ struct ucounts *get_ucounts(struct ucounts *ucounts) struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { - struct hlist_head *hashent = ucounts_hashentry(ns, uid); - struct ucounts *ucounts, *new = NULL; + struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid); + struct ucounts *ucounts, *new; + + ucounts = find_ucounts(ns, uid, hashent); + if (ucounts) + return ucounts; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->ns = ns; + new->uid = uid; + atomic_set(&new->count, 1); spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); - if (!ucounts) { + if (ucounts) { spin_unlock_irq(&ucounts_lock); - - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (!new) - return NULL; - - new->ns = ns; - new->uid = uid; - atomic_set(&new->count, 1); - - spin_lock_irq(&ucounts_lock); - ucounts = find_ucounts(ns, uid, hashent); - if (!ucounts) { - hlist_add_head(&new->node, hashent); - get_user_ns(new->ns); - spin_unlock_irq(&ucounts_lock); - return new; - } + kfree(new); + return ucounts; } - if (!atomic_inc_not_zero(&ucounts->count)) - ucounts = NULL; - spin_unlock_irq(&ucounts_lock); - kfree(new); - return ucounts; + hlist_nulls_add_head_rcu(&new->node, hashent); + get_user_ns(new->ns); + spin_unlock_irq(&ucounts_lock); + return new; } void put_ucounts(struct ucounts *ucounts) @@ -193,10 +197,11 @@ void put_ucounts(struct ucounts *ucounts) unsigned long flags; if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { - hlist_del_init(&ucounts->node); + hlist_nulls_del_rcu(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); + put_user_ns(ucounts->ns); - kfree(ucounts); + kfree_rcu(ucounts, rcu); } } From b4dc0bee2a749083028afba346910e198653f42a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 3 Feb 2025 16:05:25 +0100 Subject: [PATCH 1050/2157] ucount: use rcuref_t for reference counting Use rcuref_t for reference counting. This eliminates the cmpxchg loop in the get and put path. This also eliminates the need to acquire the lock in the put path because once the final user returns the reference, it can no longer be obtained anymore. Use rcuref_t for reference counting. Link: https://lkml.kernel.org/r/20250203150525.456525-5-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Boqun Feng Cc: Joel Fernandes Cc: Josh Triplett Cc: Lai jiangshan Cc: Mathieu Desnoyers Cc: Mengen Sun Cc: Steven Rostedt Cc: "Uladzislau Rezki (Sony)" Cc: YueHong Wu Cc: Zqiang Signed-off-by: Andrew Morton --- include/linux/user_namespace.h | 11 +++++++++-- kernel/ucount.c | 16 +++++----------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index ad4dbef92597..a0bb6d012137 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ struct ucounts { struct user_namespace *ns; kuid_t uid; struct rcu_head rcu; - atomic_t count; + rcuref_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; @@ -133,9 +134,15 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); -struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); +static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts) +{ + if (rcuref_get(&ucounts->count)) + return ucounts; + return NULL; +} + static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); diff --git a/kernel/ucount.c b/kernel/ucount.c index b6abaf68cdcc..8686e329b8f2 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -11,7 +11,7 @@ struct ucounts init_ucounts = { .ns = &init_user_ns, .uid = GLOBAL_ROOT_UID, - .count = ATOMIC_INIT(1), + .count = RCUREF_INIT(1), }; #define UCOUNTS_HASHTABLE_BITS 10 @@ -138,7 +138,7 @@ static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, guard(rcu)(); hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) { if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) { - if (atomic_inc_not_zero(&ucounts->count)) + if (rcuref_get(&ucounts->count)) return ucounts; } } @@ -154,13 +154,6 @@ static void hlist_add_ucounts(struct ucounts *ucounts) spin_unlock_irq(&ucounts_lock); } -struct ucounts *get_ucounts(struct ucounts *ucounts) -{ - if (atomic_inc_not_zero(&ucounts->count)) - return ucounts; - return NULL; -} - struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid); @@ -176,7 +169,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 1); + rcuref_init(&new->count, 1); spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -196,7 +189,8 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { + if (rcuref_put(&ucounts->count)) { + spin_lock_irqsave(&ucounts_lock, flags); hlist_nulls_del_rcu(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); From a406aff8c05115119127c962cbbbbd202e1973ef Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Fri, 14 Feb 2025 11:49:08 +0300 Subject: [PATCH 1051/2157] ocfs2: validate l_tree_depth to avoid out-of-bounds access The l_tree_depth field is 16-bit (__le16), but the actual maximum depth is limited to OCFS2_MAX_PATH_DEPTH. Add a check to prevent out-of-bounds access if l_tree_depth has an invalid value, which may occur when reading from a corrupted mounted disk [1]. Link: https://lkml.kernel.org/r/20250214084908.736528-1-kovalev@altlinux.org Fixes: ccd979bdbce9 ("[PATCH] OCFS2: The Second Oracle Cluster Filesystem") Signed-off-by: Vasiliy Kovalev Reported-by: syzbot+66c146268dc88f4341fd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=66c146268dc88f4341fd [1] Reviewed-by: Joseph Qi Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Kurt Hackel Cc: Mark Fasheh Cc: Vasiliy Kovalev Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 4414743b638e..b8ac85b548c7 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1803,6 +1803,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci, el = root_el; while (el->l_tree_depth) { + if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) { + ocfs2_error(ocfs2_metadata_cache_get_super(ci), + "Owner %llu has invalid tree depth %u in extent list\n", + (unsigned long long)ocfs2_metadata_cache_owner(ci), + le16_to_cpu(el->l_tree_depth)); + ret = -EROFS; + goto out; + } if (le16_to_cpu(el->l_next_free_rec) == 0) { ocfs2_error(ocfs2_metadata_cache_get_super(ci), "Owner %llu has empty extent list at depth %u\n", From dbc3b6320eb3f7d775d4da41de693109060856d6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 13 Feb 2025 21:45:30 +0000 Subject: [PATCH 1052/2157] ocfs2: use memcpy_to_folio() in ocfs2_symlink_get_block() Replace use of kmap_atomic() with the higher-level construct memcpy_to_folio(). This removes a use of b_page and supports large folios as well as being easier to understand. It also removes the check for kmap_atomic() failing (because it can't). Link: https://lkml.kernel.org/r/20250213214533.2242224-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Joseph Qi Cc: Mark Tinguely Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 5bbeb6fbb1ac..ccf2930cd2e6 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -46,7 +46,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - void *kaddr; trace_ocfs2_symlink_get_block( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -91,17 +90,11 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ - if (buffer_jbd(buffer_cache_bh) - && ocfs2_inode_is_new(inode)) { - kaddr = kmap_atomic(bh_result->b_page); - if (!kaddr) { - mlog(ML_ERROR, "couldn't kmap!\n"); - goto bail; - } - memcpy(kaddr + (bh_result->b_size * iblock), - buffer_cache_bh->b_data, - bh_result->b_size); - kunmap_atomic(kaddr); + if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { + memcpy_to_folio(bh_result->b_folio, + bh_result->b_size * iblock, + buffer_cache_bh->b_data, + bh_result->b_size); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); From bd0ee47da40afc73221de8d5490375931b1f93ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 13 Feb 2025 21:45:31 +0000 Subject: [PATCH 1053/2157] ocfs2: remove reference to bh->b_page Buffer heads are attached to folios, not to pages. Also flush_dcache_page() is now deprecated in favour of flush_dcache_folio(). Link: https://lkml.kernel.org/r/20250213214533.2242224-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Mark Tinguely Signed-off-by: Andrew Morton --- fs/ocfs2/quota_global.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 15d9acd456ec..e85b1ccf81be 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -273,7 +273,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, if (new) memset(bh->b_data, 0, sb->s_blocksize); memcpy(bh->b_data + offset, data, len); - flush_dcache_page(bh->b_page); + flush_dcache_folio(bh->b_folio); set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh); From 318f05a057157c400a92f17c5f2fa3dd96f57c7e Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:41 +0100 Subject: [PATCH 1054/2157] reboot: replace __hw_protection_shutdown bool action parameter with an enum Patch series "reboot: support runtime configuration of emergency hw_protection action", v3. We currently leave the decision of whether to shutdown or reboot to protect hardware in an emergency situation to the individual drivers. This works out in some cases, where the driver detecting the critical failure has inside knowledge: It binds to the system management controller for example or is guided by hardware description that defines what to do. This is inadequate in the general case though as a driver reporting e.g. an imminent power failure can't know whether a shutdown or a reboot would be more appropriate for a given hardware platform. To address this, this series adds a hw_protection kernel parameter and sysfs toggle that can be used to change the action from the shutdown default to reboot. A new hw_protection_trigger API then makes use of this default action. My particular use case is unattended embedded systems that don't have support for shutdown and that power on automatically when power is supplied: - A brief power cycle gets detected by the driver - The kernel powers down the system and SoC goes into shutdown mode - Power is restored - The system remains oblivious to the restored power - System needs to be manually power cycled for a duration long enough to drain the capacitors With this series, such systems can configure the kernel with hw_protection=reboot to have the boot firmware worry about critical conditions. This patch (of 12): Currently __hw_protection_shutdown() either reboots or shuts down the system according to its shutdown argument. To make the logic easier to follow, both inside __hw_protection_shutdown and at caller sites, lets replace the bool parameter with an enum. This will be extra useful, when in a later commit, a third action is added to the enumeration. No functional change. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-0-e1c09b090c0c@pengutronix.de Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-1-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Mark Brown Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- include/linux/reboot.h | 18 +++++++++++++++--- kernel/reboot.c | 14 ++++++-------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index abcdde4df697..e97f6b8e8586 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -177,16 +177,28 @@ void ctrl_alt_del(void); extern void orderly_poweroff(bool force); extern void orderly_reboot(void); -void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown); + +/** + * enum hw_protection_action - Hardware protection action + * + * @HWPROT_ACT_SHUTDOWN: + * The system should be shut down (powered off) for HW protection. + * @HWPROT_ACT_REBOOT: + * The system should be rebooted for HW protection. + */ +enum hw_protection_action { HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; + +void __hw_protection_shutdown(const char *reason, int ms_until_forced, + enum hw_protection_action action); static inline void hw_protection_reboot(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, false); + __hw_protection_shutdown(reason, ms_until_forced, HWPROT_ACT_REBOOT); } static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, true); + __hw_protection_shutdown(reason, ms_until_forced, HWPROT_ACT_SHUTDOWN); } /* diff --git a/kernel/reboot.c b/kernel/reboot.c index b5a8569e5d81..b20b53f08648 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -983,10 +983,7 @@ static void hw_failure_emergency_poweroff(int poweroff_delay_ms) * @ms_until_forced: Time to wait for orderly shutdown or reboot before * triggering it. Negative value disables the forced * shutdown or reboot. - * @shutdown: If true, indicates that a shutdown will happen - * after the critical tempeature is reached. - * If false, indicates that a reboot will happen - * after the critical tempeature is reached. + * @action: The hardware protection action to be taken. * * Initiate an emergency system shutdown or reboot in order to protect * hardware from further damage. Usage examples include a thermal protection. @@ -994,7 +991,8 @@ static void hw_failure_emergency_poweroff(int poweroff_delay_ms) * pending even if the previous request has given a large timeout for forced * shutdown/reboot. */ -void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown) +void __hw_protection_shutdown(const char *reason, int ms_until_forced, + enum hw_protection_action action) { static atomic_t allow_proceed = ATOMIC_INIT(1); @@ -1009,10 +1007,10 @@ void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shut * orderly_poweroff failure */ hw_failure_emergency_poweroff(ms_until_forced); - if (shutdown) - orderly_poweroff(true); - else + if (action == HWPROT_ACT_REBOOT) orderly_reboot(); + else + orderly_poweroff(true); } EXPORT_SYMBOL_GPL(__hw_protection_shutdown); From bbf0ec4f57e0a34983ca25f00ec90f4765572d78 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:42 +0100 Subject: [PATCH 1055/2157] reboot: reboot, not shutdown, on hw_protection_reboot timeout hw_protection_shutdown() will kick off an orderly shutdown and if that takes longer than a configurable amount of time, an emergency shutdown will occur. Recently, hw_protection_reboot() was added for those systems that don't implement a proper shutdown and are better served by rebooting and having the boot firmware worry about doing something about the critical condition. On timeout of the orderly reboot of hw_protection_reboot(), the system would go into shutdown, instead of reboot. This is not a good idea, as going into shutdown was explicitly not asked for. Fix this by always doing an emergency reboot if hw_protection_reboot() is called and the orderly reboot takes too long. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-2-e1c09b090c0c@pengutronix.de Fixes: 79fa723ba84c ("reboot: Introduce thermal_zone_device_critical_reboot()") Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Reviewed-by: Matti Vaittinen Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- kernel/reboot.c | 70 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index b20b53f08648..f348f1ba9e22 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -932,48 +932,76 @@ void orderly_reboot(void) } EXPORT_SYMBOL_GPL(orderly_reboot); +static const char *hw_protection_action_str(enum hw_protection_action action) +{ + switch (action) { + case HWPROT_ACT_SHUTDOWN: + return "shutdown"; + case HWPROT_ACT_REBOOT: + return "reboot"; + default: + return "undefined"; + } +} + +static enum hw_protection_action hw_failure_emergency_action; + /** - * hw_failure_emergency_poweroff_func - emergency poweroff work after a known delay - * @work: work_struct associated with the emergency poweroff function + * hw_failure_emergency_action_func - emergency action work after a known delay + * @work: work_struct associated with the emergency action function * * This function is called in very critical situations to force - * a kernel poweroff after a configurable timeout value. + * a kernel poweroff or reboot after a configurable timeout value. */ -static void hw_failure_emergency_poweroff_func(struct work_struct *work) +static void hw_failure_emergency_action_func(struct work_struct *work) { + const char *action_str = hw_protection_action_str(hw_failure_emergency_action); + + pr_emerg("Hardware protection timed-out. Trying forced %s\n", + action_str); + /* - * We have reached here after the emergency shutdown waiting period has - * expired. This means orderly_poweroff has not been able to shut off - * the system for some reason. + * We have reached here after the emergency action waiting period has + * expired. This means orderly_poweroff/reboot has not been able to + * shut off the system for some reason. * - * Try to shut down the system immediately using kernel_power_off - * if populated + * Try to shut off the system immediately if possible */ - pr_emerg("Hardware protection timed-out. Trying forced poweroff\n"); - kernel_power_off(); + + if (hw_failure_emergency_action == HWPROT_ACT_REBOOT) + kernel_restart(NULL); + else + kernel_power_off(); /* * Worst of the worst case trigger emergency restart */ - pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); + pr_emerg("Hardware protection %s failed. Trying emergency restart\n", + action_str); emergency_restart(); } -static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, - hw_failure_emergency_poweroff_func); +static DECLARE_DELAYED_WORK(hw_failure_emergency_action_work, + hw_failure_emergency_action_func); /** - * hw_failure_emergency_poweroff - Trigger an emergency system poweroff + * hw_failure_emergency_schedule - Schedule an emergency system shutdown or reboot + * + * @action: The hardware protection action to be taken + * @action_delay_ms: Time in milliseconds to elapse before triggering action * * This may be called from any critical situation to trigger a system shutdown - * after a given period of time. If time is negative this is not scheduled. + * or reboot after a given period of time. + * If time is negative this is not scheduled. */ -static void hw_failure_emergency_poweroff(int poweroff_delay_ms) +static void hw_failure_emergency_schedule(enum hw_protection_action action, + int action_delay_ms) { - if (poweroff_delay_ms <= 0) + if (action_delay_ms <= 0) return; - schedule_delayed_work(&hw_failure_emergency_poweroff_work, - msecs_to_jiffies(poweroff_delay_ms)); + hw_failure_emergency_action = action; + schedule_delayed_work(&hw_failure_emergency_action_work, + msecs_to_jiffies(action_delay_ms)); } /** @@ -1006,7 +1034,7 @@ void __hw_protection_shutdown(const char *reason, int ms_until_forced, * Queue a backup emergency shutdown in the event of * orderly_poweroff failure */ - hw_failure_emergency_poweroff(ms_until_forced); + hw_failure_emergency_schedule(action, ms_until_forced); if (action == HWPROT_ACT_REBOOT) orderly_reboot(); else From 06eaeaee5b780106c3578bfb9ff2babc19ccffb7 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:43 +0100 Subject: [PATCH 1056/2157] docs: thermal: sync hardware protection doc with code Originally, the thermal framework's only hardware protection action was to trigger a shutdown. This has been changed a little over a year ago to also support rebooting as alternative hardware protection action. Update the documentation to reflect this. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-3-e1c09b090c0c@pengutronix.de Fixes: 62e79e38b257 ("thermal/thermal_of: Allow rebooting after critical temp") Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Reviewed-by: Matti Vaittinen Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- .../driver-api/thermal/sysfs-api.rst | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index c803b89b7248..f73de211bdce 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -413,18 +413,21 @@ This function serves as an arbitrator to set the state of a cooling device. It sets the cooling device to the deepest cooling state if possible. -5. thermal_emergency_poweroff -============================= +5. Critical Events +================== -On an event of critical trip temperature crossing the thermal framework -shuts down the system by calling hw_protection_shutdown(). The -hw_protection_shutdown() first attempts to perform an orderly shutdown -but accepts a delay after which it proceeds doing a forced power-off -or as last resort an emergency_restart. +On an event of critical trip temperature crossing, the thermal framework +will trigger a hardware protection power-off (shutdown) or reboot, +depending on configuration. + +At first, the kernel will attempt an orderly power-off or reboot, but +accepts a delay after which it proceeds to do a forced power-off or +reboot, respectively. If this fails, ``emergency_restart()`` is invoked +as last resort. The delay should be carefully profiled so as to give adequate time for -orderly poweroff. +orderly power-off or reboot. -If the delay is set to 0 emergency poweroff will not be supported. So a -carefully profiled non-zero positive value is a must for emergency -poweroff to be triggered. +If the delay is set to 0, the emergency action will not be supported. So a +carefully profiled non-zero positive value is a must for the emergency +action to be triggered. From aafb1245b2feaeab50d5e20d5a76f7c00bc736a0 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:44 +0100 Subject: [PATCH 1057/2157] reboot: describe do_kernel_restart's cmd argument in kernel-doc A W=1 build rightfully complains about the function's kernel-doc being incomplete. Describe its single parameter to fix this. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-4-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- kernel/reboot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index f348f1ba9e22..6185cfe5d4ee 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -229,6 +229,9 @@ EXPORT_SYMBOL(unregister_restart_handler); /** * do_kernel_restart - Execute kernel restart handler call chain * + * @cmd: pointer to buffer containing command to execute for restart + * or %NULL + * * Calls functions registered with register_restart_handler. * * Expected to be called from machine_restart as last step of the restart From 81cab0f94ab864f13e29e561382d722daec6a55a Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:45 +0100 Subject: [PATCH 1058/2157] reboot: rename now misleading __hw_protection_shutdown symbols The __hw_protection_shutdown function name has become misleading since it can cause either a shutdown (poweroff) or a reboot depending on its argument. To avoid further confusion, let's rename it, so it doesn't suggest that a poweroff is all it can do. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-5-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- include/linux/reboot.h | 8 ++++---- kernel/reboot.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index e97f6b8e8586..53c64e31b3cf 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -188,17 +188,17 @@ extern void orderly_reboot(void); */ enum hw_protection_action { HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; -void __hw_protection_shutdown(const char *reason, int ms_until_forced, - enum hw_protection_action action); +void __hw_protection_trigger(const char *reason, int ms_until_forced, + enum hw_protection_action action); static inline void hw_protection_reboot(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, HWPROT_ACT_REBOOT); + __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_REBOOT); } static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, HWPROT_ACT_SHUTDOWN); + __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_SHUTDOWN); } /* diff --git a/kernel/reboot.c b/kernel/reboot.c index 6185cfe5d4ee..c1f11d5e085e 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -1008,7 +1008,7 @@ static void hw_failure_emergency_schedule(enum hw_protection_action action, } /** - * __hw_protection_shutdown - Trigger an emergency system shutdown or reboot + * __hw_protection_trigger - Trigger an emergency system shutdown or reboot * * @reason: Reason of emergency shutdown or reboot to be printed. * @ms_until_forced: Time to wait for orderly shutdown or reboot before @@ -1022,8 +1022,8 @@ static void hw_failure_emergency_schedule(enum hw_protection_action action, * pending even if the previous request has given a large timeout for forced * shutdown/reboot. */ -void __hw_protection_shutdown(const char *reason, int ms_until_forced, - enum hw_protection_action action) +void __hw_protection_trigger(const char *reason, int ms_until_forced, + enum hw_protection_action action) { static atomic_t allow_proceed = ATOMIC_INIT(1); @@ -1043,7 +1043,7 @@ void __hw_protection_shutdown(const char *reason, int ms_until_forced, else orderly_poweroff(true); } -EXPORT_SYMBOL_GPL(__hw_protection_shutdown); +EXPORT_SYMBOL_GPL(__hw_protection_trigger); static int __init reboot_setup(char *str) { From 96201a8a984658169dfa23507b6e75311c9975a8 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:46 +0100 Subject: [PATCH 1059/2157] reboot: indicate whether it is a HARDWARE PROTECTION reboot or shutdown It currently depends on the caller, whether we attempt a hardware protection shutdown (poweroff) or a reboot. A follow-up commit will make this partially user-configurable, so it's a good idea to have the emergency message clearly state whether the kernel is going for a reboot or a shutdown. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-6-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- kernel/reboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index c1f11d5e085e..faf1ff422634 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -1027,7 +1027,8 @@ void __hw_protection_trigger(const char *reason, int ms_until_forced, { static atomic_t allow_proceed = ATOMIC_INIT(1); - pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); + pr_emerg("HARDWARE PROTECTION %s (%s)\n", + hw_protection_action_str(action), reason); /* Shutdown should be initiated only once. */ if (!atomic_dec_and_test(&allow_proceed)) From e016173f656b89f5f71b7d45dfc599ada8107eef Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:47 +0100 Subject: [PATCH 1060/2157] reboot: add support for configuring emergency hardware protection action We currently leave the decision of whether to shutdown or reboot to protect hardware in an emergency situation to the individual drivers. This works out in some cases, where the driver detecting the critical failure has inside knowledge: It binds to the system management controller for example or is guided by hardware description that defines what to do. In the general case, however, the driver detecting the issue can't know what the appropriate course of action is and shouldn't be dictating the policy of dealing with it. Therefore, add a global hw_protection toggle that allows the user to specify whether shutdown or reboot should be the default action when the driver doesn't set policy. This introduces no functional change yet as hw_protection_trigger() has no callers, but these will be added in subsequent commits. [arnd@arndb.de: hide unused hw_protection_attr] Link: https://lkml.kernel.org/r/20250224141849.1546019-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-7-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-kernel-reboot | 8 ++++ .../admin-guide/kernel-parameters.txt | 6 +++ include/linux/reboot.h | 22 ++++++++- include/uapi/linux/capability.h | 1 + kernel/reboot.c | 48 +++++++++++++++++++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot index 837330fb2511..e117aba46be0 100644 --- a/Documentation/ABI/testing/sysfs-kernel-reboot +++ b/Documentation/ABI/testing/sysfs-kernel-reboot @@ -30,3 +30,11 @@ KernelVersion: 5.11 Contact: Matteo Croce Description: Don't wait for any other CPUs on reboot and avoid anything that could hang. + +What: /sys/kernel/reboot/hw_protection +Date: April 2025 +KernelVersion: 6.15 +Contact: Ahmad Fatoum +Description: Hardware protection action taken on critical events like + overtemperature or imminent voltage loss. + Valid values are: reboot shutdown diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8..b2f04967876f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1933,6 +1933,12 @@ which allow the hypervisor to 'idle' the guest on lock contention. + hw_protection= [HW] + Format: reboot | shutdown + + Hardware protection action taken on critical events like + overtemperature or imminent voltage loss. + i2c_bus= [HW] Override the default board specific I2C bus speed or register an additional I2C bus that is not registered from board initialization code. diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 53c64e31b3cf..79e02876f2ba 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -181,16 +181,36 @@ extern void orderly_reboot(void); /** * enum hw_protection_action - Hardware protection action * + * @HWPROT_ACT_DEFAULT: + * The default action should be taken. This is HWPROT_ACT_SHUTDOWN + * by default, but can be overridden. * @HWPROT_ACT_SHUTDOWN: * The system should be shut down (powered off) for HW protection. * @HWPROT_ACT_REBOOT: * The system should be rebooted for HW protection. */ -enum hw_protection_action { HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; +enum hw_protection_action { HWPROT_ACT_DEFAULT, HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; void __hw_protection_trigger(const char *reason, int ms_until_forced, enum hw_protection_action action); +/** + * hw_protection_trigger - Trigger default emergency system hardware protection action + * + * @reason: Reason of emergency shutdown or reboot to be printed. + * @ms_until_forced: Time to wait for orderly shutdown or reboot before + * triggering it. Negative value disables the forced + * shutdown or reboot. + * + * Initiate an emergency system shutdown or reboot in order to protect + * hardware from further damage. The exact action taken is controllable at + * runtime and defaults to shutdown. + */ +static inline void hw_protection_trigger(const char *reason, int ms_until_forced) +{ + __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_DEFAULT); +} + static inline void hw_protection_reboot(const char *reason, int ms_until_forced) { __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_REBOOT); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 5bb906098697..2e21b5594f81 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -275,6 +275,7 @@ struct vfs_ns_cap_data { /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ /* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */ +/* Allow setting hardware protection emergency action */ #define CAP_SYS_ADMIN 21 diff --git a/kernel/reboot.c b/kernel/reboot.c index faf1ff422634..2d6a06fe6c66 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -36,6 +36,8 @@ enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; EXPORT_SYMBOL_GPL(reboot_mode); enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED; +static enum hw_protection_action hw_protection_action = HWPROT_ACT_SHUTDOWN; + /* * This variable is used privately to keep track of whether or not * reboot_type is still set to its default value (i.e., reboot= hasn't @@ -1027,6 +1029,9 @@ void __hw_protection_trigger(const char *reason, int ms_until_forced, { static atomic_t allow_proceed = ATOMIC_INIT(1); + if (action == HWPROT_ACT_DEFAULT) + action = hw_protection_action; + pr_emerg("HARDWARE PROTECTION %s (%s)\n", hw_protection_action_str(action), reason); @@ -1046,6 +1051,48 @@ void __hw_protection_trigger(const char *reason, int ms_until_forced, } EXPORT_SYMBOL_GPL(__hw_protection_trigger); +static bool hw_protection_action_parse(const char *str, + enum hw_protection_action *action) +{ + if (sysfs_streq(str, "shutdown")) + *action = HWPROT_ACT_SHUTDOWN; + else if (sysfs_streq(str, "reboot")) + *action = HWPROT_ACT_REBOOT; + else + return false; + + return true; +} + +static int __init hw_protection_setup(char *str) +{ + hw_protection_action_parse(str, &hw_protection_action); + return 1; +} +__setup("hw_protection=", hw_protection_setup); + +#ifdef CONFIG_SYSFS +static ssize_t hw_protection_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", + hw_protection_action_str(hw_protection_action)); +} +static ssize_t hw_protection_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, + size_t count) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!hw_protection_action_parse(buf, &hw_protection_action)) + return -EINVAL; + + return count; +} +static struct kobj_attribute hw_protection_attr = __ATTR_RW(hw_protection); +#endif + static int __init reboot_setup(char *str) { for (;;) { @@ -1305,6 +1352,7 @@ static struct kobj_attribute reboot_cpu_attr = __ATTR_RW(cpu); #endif static struct attribute *reboot_attrs[] = { + &hw_protection_attr.attr, &reboot_mode_attr.attr, #ifdef CONFIG_X86 &reboot_force_attr.attr, From b15388a2f0195c2921d80cb16eab91b8802af2b8 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:48 +0100 Subject: [PATCH 1061/2157] regulator: allow user configuration of hardware protection action When the core detects permanent regulator hardware failure or imminent power failure of a critical supply, it will call hw_protection_shutdown in an attempt to do a limited orderly shutdown followed by powering off the system. This doesn't work out well for many unattended embedded systems that don't have support for shutdown and that power on automatically when power is supplied: - A brief power cycle gets detected by the driver - The kernel powers down the system and SoC goes into shutdown mode - Power is restored - The system remains oblivious to the restored power - System needs to be manually power cycled for a duration long enough to drain the capacitors Allow users to fix this by calling the newly introduced hw_protection_trigger() instead: This way the hw_protection commandline or sysfs parameter is used to dictate the policy of dealing with the regulator fault. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-8-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Reviewed-by: Matti Vaittinen Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- drivers/regulator/core.c | 4 ++-- drivers/regulator/irq_helpers.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 4ddf0efead68..280559509dcf 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5262,8 +5262,8 @@ static void regulator_handle_critical(struct regulator_dev *rdev, if (!reason) return; - hw_protection_shutdown(reason, - rdev->constraints->uv_less_critical_window_ms); + hw_protection_trigger(reason, + rdev->constraints->uv_less_critical_window_ms); } /** diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c index 0aa188b2bbb2..5742faee8071 100644 --- a/drivers/regulator/irq_helpers.c +++ b/drivers/regulator/irq_helpers.c @@ -64,16 +64,16 @@ static void regulator_notifier_isr_work(struct work_struct *work) reread: if (d->fatal_cnt && h->retry_cnt > d->fatal_cnt) { if (!d->die) - return hw_protection_shutdown("Regulator HW failure? - no IC recovery", - REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); + return hw_protection_trigger("Regulator HW failure? - no IC recovery", + REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); ret = d->die(rid); /* * If the 'last resort' IC recovery failed we will have * nothing else left to do... */ if (ret) - return hw_protection_shutdown("Regulator HW failure. IC recovery failed", - REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); + return hw_protection_trigger("Regulator HW failure. IC recovery failed", + REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); /* * If h->die() was implemented we assume recovery has been @@ -263,14 +263,14 @@ static irqreturn_t regulator_notifier_isr(int irq, void *data) if (d->fatal_cnt && h->retry_cnt > d->fatal_cnt) { /* If we have no recovery, just try shut down straight away */ if (!d->die) { - hw_protection_shutdown("Regulator failure. Retry count exceeded", - REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); + hw_protection_trigger("Regulator failure. Retry count exceeded", + REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); } else { ret = d->die(rid); /* If die() failed shut down as a last attempt to save the HW */ if (ret) - hw_protection_shutdown("Regulator failure. Recovery failed", - REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); + hw_protection_trigger("Regulator failure. Recovery failed", + REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS); } } From cd9beb9eb395a0df631e5b1d1314b4f7f1e8579f Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:49 +0100 Subject: [PATCH 1062/2157] platform/chrome: cros_ec_lpc: prepare for hw_protection_shutdown removal In the general case, a driver doesn't know which of system shutdown or reboot is the better action to take to protect hardware in an emergency situation. For this reason, hw_protection_shutdown is going to be removed in favor of hw_protection_trigger, which defaults to shutdown, but may be configured at kernel runtime to be a reboot instead. The ChromeOS EC situation is different as we do know that shutdown is the correct action as the EC is programmed to force reset after the short period, thus replace hw_protection_shutdown with __hw_protection_trigger with HWPROT_ACT_SHUTDOWN as argument to maintain the same behavior. No functional change. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-9-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Acked-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- drivers/platform/chrome/cros_ec_lpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index 5a2f1d98b350..0b723c1e435c 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -454,7 +454,7 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data) blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev); kobject_uevent_env(&ec_dev->dev->kobj, KOBJ_CHANGE, (char **)env); /* Begin orderly shutdown. EC will force reset after a short period. */ - hw_protection_shutdown("CrOS EC Panic", -1); + __hw_protection_trigger("CrOS EC Panic", -1, HWPROT_ACT_SHUTDOWN); /* Do not query for other events after a panic is reported */ return; } From 738b7856933de7b9775ea7bf6a72a1002dbd2e12 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:50 +0100 Subject: [PATCH 1063/2157] dt-bindings: thermal: give OS some leeway in absence of critical-action An operating system may allow its user to configure the action to be undertaken on critical overtemperature events. However, the bindings currently mandate an absence of the critical-action property to be equal to critical-action = "shutdown", which would mean any differing user configuration would violate the bindings. Resolve this by documenting the absence of the property to mean that the OS gets to decide. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-10-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Acked-by: Rob Herring (Arm) Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Cc: Tzung-Bi Shih Signed-off-by: Andrew Morton --- Documentation/devicetree/bindings/thermal/thermal-zones.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 0f435be1dbd8..0de0a9757ccc 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -82,9 +82,8 @@ patternProperties: $ref: /schemas/types.yaml#/definitions/string description: | The action the OS should perform after the critical temperature is reached. - By default the system will shutdown as a safe action to prevent damage - to the hardware, if the property is not set. - The shutdown action should be always the default and preferred one. + If the property is not set, it is up to the system to select the correct + action. The recommended and preferred default is shutdown. Choose 'reboot' with care, as the hardware may be in thermal stress, thus leading to infinite reboots that may cause damage to the hardware. Make sure the firmware/bootloader will act as the last resort and take From 941a07cad2fdfe79e768936fb0f2652e4deb1766 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:51 +0100 Subject: [PATCH 1064/2157] thermal: core: allow user configuration of hardware protection action In the general case, we don't know which of system shutdown or reboot is the better action to take to protect hardware in an emergency situation. We thus allow the policy to come from the device-tree in the form of an optional critical-action OF property, but so far there was no way for the end user to configure this. With recent addition of the hw_protection parameter, the user can now choose a default action for the case, where the driver isn't fully sure what's the better course of action. Let's make use of this by passing HWPROT_ACT_DEFAULT in absence of the critical-action OF property. As HWPROT_ACT_DEFAULT is shutdown by default, this introduces no functional change for users, unless they start using the new parameter. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-11-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- drivers/thermal/thermal_core.c | 17 ++++++++++------- drivers/thermal/thermal_core.h | 1 + drivers/thermal/thermal_of.c | 7 +++++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2328ac0d8561..5847729419f2 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -369,7 +369,8 @@ void thermal_governor_update_tz(struct thermal_zone_device *tz, tz->governor->update_tz(tz, reason); } -static void thermal_zone_device_halt(struct thermal_zone_device *tz, bool shutdown) +static void thermal_zone_device_halt(struct thermal_zone_device *tz, + enum hw_protection_action action) { /* * poweroff_delay_ms must be a carefully profiled positive value. @@ -380,21 +381,23 @@ static void thermal_zone_device_halt(struct thermal_zone_device *tz, bool shutdo dev_emerg(&tz->device, "%s: critical temperature reached\n", tz->type); - if (shutdown) - hw_protection_shutdown(msg, poweroff_delay_ms); - else - hw_protection_reboot(msg, poweroff_delay_ms); + __hw_protection_trigger(msg, poweroff_delay_ms, action); } void thermal_zone_device_critical(struct thermal_zone_device *tz) { - thermal_zone_device_halt(tz, true); + thermal_zone_device_halt(tz, HWPROT_ACT_DEFAULT); } EXPORT_SYMBOL(thermal_zone_device_critical); +void thermal_zone_device_critical_shutdown(struct thermal_zone_device *tz) +{ + thermal_zone_device_halt(tz, HWPROT_ACT_SHUTDOWN); +} + void thermal_zone_device_critical_reboot(struct thermal_zone_device *tz) { - thermal_zone_device_halt(tz, false); + thermal_zone_device_halt(tz, HWPROT_ACT_REBOOT); } static void handle_critical_trips(struct thermal_zone_device *tz, diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 09866f0ce765..bdadd141aa24 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -262,6 +262,7 @@ int thermal_build_list_of_policies(char *buf); void __thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event); void thermal_zone_device_critical_reboot(struct thermal_zone_device *tz); +void thermal_zone_device_critical_shutdown(struct thermal_zone_device *tz); void thermal_governor_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason); diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 5401f03d6b6c..ce9260cc30f7 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -405,9 +405,12 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * of_ops.should_bind = thermal_of_should_bind; ret = of_property_read_string(np, "critical-action", &action); - if (!ret) - if (!of_ops.critical && !strcasecmp(action, "reboot")) + if (!ret && !of_ops.critical) { + if (!strcasecmp(action, "reboot")) of_ops.critical = thermal_zone_device_critical_reboot; + else if (!strcasecmp(action, "shutdown")) + of_ops.critical = thermal_zone_device_critical_shutdown; + } tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips, data, &of_ops, &tzp, From 5e40d0d196595874c3f0606f0642021e6ade8054 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 17 Feb 2025 21:39:52 +0100 Subject: [PATCH 1065/2157] reboot: retire hw_protection_reboot and hw_protection_shutdown helpers The hw_protection_reboot and hw_protection_shutdown functions mix mechanism with policy: They let the driver requesting an emergency action for hardware protection also decide how to deal with it. This is inadequate in the general case as a driver reporting e.g. an imminent power failure can't know whether a shutdown or a reboot would be more appropriate for a given hardware platform. With the addition of the hw_protection parameter, it's now possible to configure at runtime the default emergency action and drivers are expected to use hw_protection_trigger to have this parameter dictate policy. As no current users of either hw_protection_shutdown or hw_protection_shutdown helpers remain, remove them, as not to tempt driver authors to call them. Existing users now either defer to hw_protection_trigger or call __hw_protection_trigger with a suitable argument directly when they have inside knowledge on whether a reboot or shutdown would be more appropriate. Link: https://lkml.kernel.org/r/20250217-hw_protection-reboot-v3-12-e1c09b090c0c@pengutronix.de Signed-off-by: Ahmad Fatoum Reviewed-by: Tzung-Bi Shih Cc: Benson Leung Cc: Daniel Lezcano Cc: Fabio Estevam Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Liam Girdwood Cc: Lukasz Luba Cc: Mark Brown Cc: Matteo Croce Cc: Matti Vaittinen Cc: "Rafael J. Wysocki" Cc: Rob Herring (Arm) Cc: Rui Zhang Cc: Sascha Hauer Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- include/linux/reboot.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 79e02876f2ba..aa08c3bbbf59 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -211,16 +211,6 @@ static inline void hw_protection_trigger(const char *reason, int ms_until_forced __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_DEFAULT); } -static inline void hw_protection_reboot(const char *reason, int ms_until_forced) -{ - __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_REBOOT); -} - -static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) -{ - __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_SHUTDOWN); -} - /* * Emergency restart, callable from an interrupt handler. */ From 0ac451ecec6dd516fdac1ca064467e753ef6ae1a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 16 Feb 2025 00:56:18 +0800 Subject: [PATCH 1066/2157] lib min_heap: use size_t for array size and index variables Replace the int type with size_t for variables representing array sizes and indices in the min-heap implementation. Using size_t aligns with standard practices for size-related variables and avoids potential issues on platforms where int may be insufficient to represent all valid sizes or indices. Link: https://lkml.kernel.org/r/20250215165618.1757219-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: Yu-Chun Lin Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 12 ++++++------ lib/min_heap.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 1160bed6579e..79ddc0adbf2b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -218,7 +218,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) /* Initialize a min-heap. */ static __always_inline -void __min_heap_init_inline(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, size_t size) { heap->nr = 0; heap->size = size; @@ -254,7 +254,7 @@ bool __min_heap_full_inline(min_heap_char *heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { const unsigned long lsbit = elem_size & -elem_size; @@ -324,7 +324,7 @@ static __always_inline void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func, void *args) { - int i; + ssize_t i; for (i = heap->nr / 2 - 1; i >= 0; i--) __min_heap_sift_down_inline(heap, i, elem_size, func, args); @@ -379,7 +379,7 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele const struct min_heap_callbacks *func, void *args) { void *data = heap->data; - int pos; + size_t pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) return false; @@ -428,10 +428,10 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ __minheap_obj_size(_heap), _idx, _func, _args) -void __min_heap_init(min_heap_char *heap, void *data, int size); +void __min_heap_init(min_heap_char *heap, void *data, size_t size); void *__min_heap_peek(struct min_heap_char *heap); bool __min_heap_full(min_heap_char *heap); -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args); void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); diff --git a/lib/min_heap.c b/lib/min_heap.c index 4485372ff3b1..96f01a4c5fb6 100644 --- a/lib/min_heap.c +++ b/lib/min_heap.c @@ -2,7 +2,7 @@ #include #include -void __min_heap_init(min_heap_char *heap, void *data, int size) +void __min_heap_init(min_heap_char *heap, void *data, size_t size) { __min_heap_init_inline(heap, data, size); } @@ -20,7 +20,7 @@ bool __min_heap_full(min_heap_char *heap) } EXPORT_SYMBOL(__min_heap_full); -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { __min_heap_sift_down_inline(heap, pos, elem_size, func, args); From fa34ce0ad9417705e718343462ce010d85d70c9c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 18 Feb 2025 19:05:55 +0200 Subject: [PATCH 1067/2157] mailmap: remove never used @parity.io email As this employment lasted only four months and was never used over here, let's just rip it off for good. Link: https://lkml.kernel.org/r/20250218170557.68371-1-jarkko@kernel.org Signed-off-by: Jarkko Sakkinen Cc: Alex Elder Cc: Antonio Quartulli Cc: Eugen Hristev Cc: Greg Kroah-Hartman Cc: Kees Cook Cc: Mathieu Othacehe Cc: Naoya Horiguchi Cc: Quentin Monnet Cc: Satya Priya Kakitapalli Cc: Simon Wunderlich Signed-off-by: Andrew Morton --- .mailmap | 1 - 1 file changed, 1 deletion(-) diff --git a/.mailmap b/.mailmap index 0e373e743495..87e42ee58685 100644 --- a/.mailmap +++ b/.mailmap @@ -302,7 +302,6 @@ Jan Glauber Jan Kuliga Jarkko Sakkinen Jarkko Sakkinen -Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe From ee87af982405baec02a188f41f1a141eb3fbdf01 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 19 Feb 2025 16:17:02 +0900 Subject: [PATCH 1068/2157] MAINTAINERS: mailmap: update Hyeonggon's name and email address Update my Korean name and personal email address to my English name and Oracle email address. Link: https://lkml.kernel.org/r/20250219071702.964344-1-42.hyeyoo@gmail.com Signed-off-by: Hyeonggon Yoo (Oracle) <42.hyeyoo@gmail.com> Acked-by: Lorenzo Stoakes Cc: Dev Jain Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 87e42ee58685..f88e5e28ce00 100644 --- a/.mailmap +++ b/.mailmap @@ -269,6 +269,7 @@ Hamza Mahfooz Hanjun Guo Hans Verkuil Hans Verkuil +Harry Yoo <42.hyeyoo@gmail.com> Heiko Carstens Heiko Carstens Heiko Stuebner diff --git a/MAINTAINERS b/MAINTAINERS index a666d7e34fd3..7cd3650ca466 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21812,7 +21812,7 @@ M: Joonsoo Kim M: Andrew Morton M: Vlastimil Babka R: Roman Gushchin -R: Hyeonggon Yoo <42.hyeyoo@gmail.com> +R: Harry Yoo L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git From 4022918876f9a28fe5379e2d7b7840e84f7b56ed Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 20 Feb 2025 12:23:40 +0000 Subject: [PATCH 1069/2157] scripts/gdb: add $lx_per_cpu_ptr() We currently have $lx_per_cpu() which works fine for stuff that kernel code would access via per_cpu(). But this doesn't work for stuff that kernel code accesses via per_cpu_ptr(): (gdb) p $lx_per_cpu(node_data[1].node_zones[2]->per_cpu_pageset) Cannot access memory at address 0xffff11105fbd6c28 This is because we take the address of the pointer and use that as the offset, instead of using the stored value. Add a GDB version that mirrors the kernel API, which uses the pointer value. To be consistent with per_cpu_ptr(), we need to return the pointer value instead of dereferencing it for the user. Therefore, move the existing dereference out of the per_cpu() Python helper and do that only in the $lx_per_cpu() implementation. Link: https://lkml.kernel.org/r/20250220-lx-per-cpu-ptr-v2-1-945dee8d8d38@google.com Signed-off-by: Brendan Jackman Reviewed-by: Jan Kiszka Cc: Florian Rommel Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/cpus.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 13eb8b3901b8..1a50a4195def 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -46,7 +46,7 @@ def per_cpu(var_ptr, cpu): # !CONFIG_SMP case offset = 0 pointer = var_ptr.cast(utils.get_long_type()) + offset - return pointer.cast(var_ptr.type).dereference() + return pointer.cast(var_ptr.type) cpu_mask = {} @@ -149,11 +149,29 @@ Note that VAR has to be quoted as string.""" super(PerCpu, self).__init__("lx_per_cpu") def invoke(self, var, cpu=-1): - return per_cpu(var.address, cpu) + return per_cpu(var.address, cpu).dereference() PerCpu() + +class PerCpuPtr(gdb.Function): + """Return per-cpu pointer. + +$lx_per_cpu_ptr("VAR"[, CPU]): Return the per-cpu pointer called VAR for the +given CPU number. If CPU is omitted, the CPU of the current context is used. +Note that VAR has to be quoted as string.""" + + def __init__(self): + super(PerCpuPtr, self).__init__("lx_per_cpu_ptr") + + def invoke(self, var, cpu=-1): + return per_cpu(var, cpu) + + +PerCpuPtr() + + def get_current_task(cpu): task_ptr_type = task_type.get_type().pointer() From 758502918e77323bf999a3eddd71466bf6135173 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 21 Feb 2025 05:02:21 -0800 Subject: [PATCH 1070/2157] rhashtable: remove needless return in three void APIs Remove needless 'return' in the following void APIs: rhltable_walk_enter() rhltable_free_and_destroy() rhltable_destroy() Since both the API and callee involved are void functions. Link: https://lkml.kernel.org/r/20250221-rmv_return-v1-16-cc8dff275827@quicinc.com Signed-off-by: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/rhashtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 8463a128e2f4..6c85b28ea30b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1259,7 +1259,7 @@ static inline int rhashtable_replace_fast( static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { - return rhashtable_walk_enter(&hlt->ht, iter); + rhashtable_walk_enter(&hlt->ht, iter); } /** @@ -1275,12 +1275,12 @@ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void *arg), void *arg) { - return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); + rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { - return rhltable_free_and_destroy(hlt, NULL, NULL); + rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */ From ede7cd607a1d206b9579264a7405d78316b2d7fd Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 21 Feb 2025 05:02:07 -0800 Subject: [PATCH 1071/2157] cpu: remove needless return in void API suspend_enable_secondary_cpus() Remove needless 'return' in void API suspend_enable_secondary_cpus() since both the API and thaw_secondary_cpus() are void functions. Link: https://lkml.kernel.org/r/20250221-rmv_return-v1-2-cc8dff275827@quicinc.com Signed-off-by: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6a0a8f1c7c90..e3049543008b 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -148,7 +148,7 @@ static inline int suspend_disable_secondary_cpus(void) } static inline void suspend_enable_secondary_cpus(void) { - return thaw_secondary_cpus(); + thaw_secondary_cpus(); } #else /* !CONFIG_PM_SLEEP_SMP */ From 15c200eaf569f804ba684cefbae437a6c731ba95 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:15 +0000 Subject: [PATCH 1072/2157] coccinelle: misc: secs_to_jiffies: Patch expressions too Patch series "Converge on using secs_to_jiffies() part two", v3. This is the second series that converts users of msecs_to_jiffies() that either use the multiply pattern of either of: - msecs_to_jiffies(N*1000) or - msecs_to_jiffies(N*MSEC_PER_SEC) where N is a constant or an expression, to avoid the multiplication. The conversion is made with Coccinelle with the secs_to_jiffies() script in scripts/coccinelle/misc. Attention is paid to what the best change can be rather than restricting to what the tool provides. This patch (of 16): Teach the script to suggest conversions for timeout patterns where the arguments to msecs_to_jiffies() are expressions as well. Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-0-a43967e36c88@linux.microsoft.com Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-1-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Mark Brown Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Takashi Iwai Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Xiubo Li Cc: Carlos Maiolino Cc: Marc Kleine-Budde Signed-off-by: Andrew Morton --- scripts/coccinelle/misc/secs_to_jiffies.cocci | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/coccinelle/misc/secs_to_jiffies.cocci b/scripts/coccinelle/misc/secs_to_jiffies.cocci index 8bbb2884ea5d..416f348174ca 100644 --- a/scripts/coccinelle/misc/secs_to_jiffies.cocci +++ b/scripts/coccinelle/misc/secs_to_jiffies.cocci @@ -20,3 +20,13 @@ virtual patch - msecs_to_jiffies(C * MSEC_PER_SEC) + secs_to_jiffies(C) + +@depends on patch@ expression E; @@ + +- msecs_to_jiffies(E * 1000) ++ secs_to_jiffies(E) + +@depends on patch@ expression E; @@ + +- msecs_to_jiffies(E * MSEC_PER_SEC) ++ secs_to_jiffies(E) From 84c34d0105877836a1c000b97bd6025d20f390e8 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:16 +0000 Subject: [PATCH 1073/2157] scsi: lpfc: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies(E * 1000) +secs_to_jiffies(E) -msecs_to_jiffies(E * MSEC_PER_SEC) +secs_to_jiffies(E) While here, convert some timeouts that are denominated in seconds manually. [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-2-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/scsi/lpfc/lpfc.h | 3 +-- drivers/scsi/lpfc/lpfc_els.c | 11 ++++----- drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +- drivers/scsi/lpfc/lpfc_init.c | 10 ++++---- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++------ drivers/scsi/lpfc/lpfc_sli.c | 41 +++++++++++++------------------- drivers/scsi/lpfc/lpfc_vport.c | 2 +- 7 files changed, 34 insertions(+), 47 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index e5a9c5a323f8..19e227f8b398 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -74,8 +74,7 @@ struct lpfc_sli2_slim; * queue depths when there are driver resource error or Firmware * resource error. */ -/* 1 Second */ -#define QUEUE_RAMP_DOWN_INTERVAL (msecs_to_jiffies(1000 * 1)) +#define QUEUE_RAMP_DOWN_INTERVAL (secs_to_jiffies(1)) /* Number of exchanges reserved for discovery to complete */ #define LPFC_DISC_IOCB_BUFF_COUNT 20 diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 1d7db49a8fe4..fcf8186ad5e3 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8045,8 +8045,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, if (test_bit(FC_DISC_TMO, &vport->fc_flag)) { tmo = ((phba->fc_ratov * 3) + 3); mod_timer(&vport->fc_disctmo, - jiffies + - msecs_to_jiffies(1000 * tmo)); + jiffies + secs_to_jiffies(tmo)); } return 0; } @@ -8081,7 +8080,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, if (test_bit(FC_DISC_TMO, &vport->fc_flag)) { tmo = ((phba->fc_ratov * 3) + 3); mod_timer(&vport->fc_disctmo, - jiffies + msecs_to_jiffies(1000 * tmo)); + jiffies + secs_to_jiffies(tmo)); } if ((rscn_cnt < FC_MAX_HOLD_RSCN) && !test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag)) { @@ -9511,7 +9510,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport) if (!list_empty(&pring->txcmplq)) if (!test_bit(FC_UNLOADING, &phba->pport->load_flag)) mod_timer(&vport->els_tmofunc, - jiffies + msecs_to_jiffies(1000 * timeout)); + jiffies + secs_to_jiffies(timeout)); } /** @@ -10899,7 +10898,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) "3334 Delay fc port discovery for %d secs\n", phba->fc_ratov); mod_timer(&vport->delayed_disc_tmo, - jiffies + msecs_to_jiffies(1000 * phba->fc_ratov)); + jiffies + secs_to_jiffies(phba->fc_ratov)); return; } @@ -11156,7 +11155,7 @@ lpfc_retry_pport_discovery(struct lpfc_hba *phba) if (!ndlp) return; - mod_timer(&ndlp->nlp_delayfunc, jiffies + msecs_to_jiffies(1000)); + mod_timer(&ndlp->nlp_delayfunc, jiffies + secs_to_jiffies(1)); set_bit(NLP_DELAY_TMO, &ndlp->nlp_flag); ndlp->nlp_last_elscmd = ELS_CMD_FLOGI; phba->pport->port_state = LPFC_FLOGI; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 36e66df36a18..0c390d703ee3 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4973,7 +4973,7 @@ lpfc_set_disctmo(struct lpfc_vport *vport) tmo, vport->port_state, vport->fc_flag); } - mod_timer(&vport->fc_disctmo, jiffies + msecs_to_jiffies(1000 * tmo)); + mod_timer(&vport->fc_disctmo, jiffies + secs_to_jiffies(tmo)); set_bit(FC_DISC_TMO, &vport->fc_flag); /* Start Discovery Timer state */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index bcadf11414c8..aaf4004454ab 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -595,7 +595,7 @@ lpfc_config_port_post(struct lpfc_hba *phba) /* Set up ring-0 (ELS) timer */ timeout = phba->fc_ratov * 2; mod_timer(&vport->els_tmofunc, - jiffies + msecs_to_jiffies(1000 * timeout)); + jiffies + secs_to_jiffies(timeout)); /* Set up heart beat (HB) timer */ mod_timer(&phba->hb_tmofunc, jiffies + secs_to_jiffies(LPFC_HB_MBOX_INTERVAL)); @@ -604,7 +604,7 @@ lpfc_config_port_post(struct lpfc_hba *phba) phba->last_completion_time = jiffies; /* Set up error attention (ERATT) polling timer */ mod_timer(&phba->eratt_poll, - jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval)); + jiffies + secs_to_jiffies(phba->eratt_poll_interval)); if (test_bit(LINK_DISABLED, &phba->hba_flag)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, @@ -3361,8 +3361,8 @@ lpfc_block_mgmt_io(struct lpfc_hba *phba, int mbx_action) /* Determine how long we might wait for the active mailbox * command to be gracefully completed by firmware. */ - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, - phba->sli.mbox_active) * 1000) + jiffies; + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, + phba->sli.mbox_active)) + jiffies; } spin_unlock_irqrestore(&phba->hbalock, iflag); @@ -6909,7 +6909,7 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba, * re-instantiate the Vlink using FDISC. */ mod_timer(&ndlp->nlp_delayfunc, - jiffies + msecs_to_jiffies(1000)); + jiffies + secs_to_jiffies(1)); set_bit(NLP_DELAY_TMO, &ndlp->nlp_flag); ndlp->nlp_last_elscmd = ELS_CMD_FDISC; vport->port_state = LPFC_FDISC; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 055ed632c14d..f0158fc00f78 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5645,9 +5645,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) * cmd_flag is set to LPFC_DRIVER_ABORTED before we wait * for abort to complete. */ - wait_event_timeout(waitq, - (lpfc_cmd->pCmd != cmnd), - msecs_to_jiffies(2*vport->cfg_devloss_tmo*1000)); + wait_event_timeout(waitq, (lpfc_cmd->pCmd != cmnd), + secs_to_jiffies(2*vport->cfg_devloss_tmo)); spin_lock(&lpfc_cmd->buf_lock); @@ -5911,7 +5910,7 @@ lpfc_chk_tgt_mapped(struct lpfc_vport *vport, struct fc_rport *rport) * If target is not in a MAPPED state, delay until * target is rediscovered or devloss timeout expires. */ - later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies; + later = secs_to_jiffies(2 * vport->cfg_devloss_tmo) + jiffies; while (time_after(later, jiffies)) { if (!pnode) return FAILED; @@ -5957,7 +5956,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id, lpfc_sli_abort_taskmgmt(vport, &phba->sli.sli3_ring[LPFC_FCP_RING], tgt_id, lun_id, context); - later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies; + later = secs_to_jiffies(2 * vport->cfg_devloss_tmo) + jiffies; while (time_after(later, jiffies) && cnt) { schedule_timeout_uninterruptible(msecs_to_jiffies(20)); cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context); @@ -6137,8 +6136,7 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) wait_event_timeout(waitq, !test_bit(NLP_WAIT_FOR_LOGO, &pnode->save_flags), - msecs_to_jiffies(dev_loss_tmo * - 1000)); + secs_to_jiffies(dev_loss_tmo)); if (test_and_clear_bit(NLP_WAIT_FOR_LOGO, &pnode->save_flags)) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 3fd9723cd271..af11b4ef13df 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1025,7 +1025,7 @@ lpfc_handle_rrq_active(struct lpfc_hba *phba) LIST_HEAD(send_rrq); clear_bit(HBA_RRQ_ACTIVE, &phba->hba_flag); - next_time = jiffies + msecs_to_jiffies(1000 * (phba->fc_ratov + 1)); + next_time = jiffies + secs_to_jiffies(phba->fc_ratov + 1); spin_lock_irqsave(&phba->rrq_list_lock, iflags); list_for_each_entry_safe(rrq, nextrrq, &phba->active_rrq_list, list) { @@ -1208,8 +1208,7 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, else rrq->send_rrq = 0; rrq->xritag = xritag; - rrq->rrq_stop_time = jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov + 1)); + rrq->rrq_stop_time = jiffies + secs_to_jiffies(phba->fc_ratov + 1); rrq->nlp_DID = ndlp->nlp_DID; rrq->vport = ndlp->vport; rrq->rxid = rxid; @@ -1736,8 +1735,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, BUG_ON(!piocb->vport); if (!test_bit(FC_UNLOADING, &piocb->vport->load_flag)) mod_timer(&piocb->vport->els_tmofunc, - jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); + jiffies + secs_to_jiffies(phba->fc_ratov << 1)); } return 0; @@ -3956,8 +3954,7 @@ void lpfc_poll_eratt(struct timer_list *t) else /* Restart the timer for next eratt poll */ mod_timer(&phba->eratt_poll, - jiffies + - msecs_to_jiffies(1000 * phba->eratt_poll_interval)); + jiffies + secs_to_jiffies(phba->eratt_poll_interval)); return; } @@ -9008,7 +9005,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) /* Start the ELS watchdog timer */ mod_timer(&vport->els_tmofunc, - jiffies + msecs_to_jiffies(1000 * (phba->fc_ratov * 2))); + jiffies + secs_to_jiffies(phba->fc_ratov * 2)); /* Start heart beat timer */ mod_timer(&phba->hb_tmofunc, @@ -9027,7 +9024,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) /* Start error attention (ERATT) polling timer */ mod_timer(&phba->eratt_poll, - jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval)); + jiffies + secs_to_jiffies(phba->eratt_poll_interval)); /* * The port is ready, set the host's link state to LINK_DOWN @@ -9504,8 +9501,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, goto out_not_finished; } /* timeout active mbox command */ - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, pmbox) * - 1000); + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, pmbox)); mod_timer(&psli->mbox_tmo, jiffies + timeout); } @@ -9629,8 +9625,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, drvr_flag); goto out_not_finished; } - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, pmbox) * - 1000) + jiffies; + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, pmbox)) + jiffies; i = 0; /* Wait for command to complete */ while (((word0 & OWN_CHIP) == OWN_CHIP) || @@ -9756,9 +9751,8 @@ lpfc_sli4_async_mbox_block(struct lpfc_hba *phba) * command to be gracefully completed by firmware. */ if (phba->sli.mbox_active) - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, - phba->sli.mbox_active) * - 1000) + jiffies; + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, + phba->sli.mbox_active)) + jiffies; spin_unlock_irq(&phba->hbalock); /* Make sure the mailbox is really active */ @@ -9881,8 +9875,7 @@ lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) } } - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq) - * 1000) + jiffies; + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq)) + jiffies; do { bmbx_reg.word0 = readl(phba->sli4_hba.BMBXregaddr); @@ -10230,7 +10223,7 @@ lpfc_sli4_post_async_mbox(struct lpfc_hba *phba) /* Start timer for the mbox_tmo and log some mailbox post messages */ mod_timer(&psli->mbox_tmo, (jiffies + - msecs_to_jiffies(1000 * lpfc_mbox_tmo_val(phba, mboxq)))); + secs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq)))); lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, "(%d):0355 Mailbox cmd x%x (x%x/x%x) issue Data: " @@ -13159,7 +13152,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, retval = lpfc_sli_issue_iocb(phba, ring_number, piocb, SLI_IOCB_RET_IOCB); if (retval == IOCB_SUCCESS) { - timeout_req = msecs_to_jiffies(timeout * 1000); + timeout_req = secs_to_jiffies(timeout); timeleft = wait_event_timeout(done_q, lpfc_chk_iocb_flg(phba, piocb, LPFC_IO_WAKE), timeout_req); @@ -13275,8 +13268,7 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, /* now issue the command */ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT); if (retval == MBX_BUSY || retval == MBX_SUCCESS) { - wait_for_completion_timeout(&mbox_done, - msecs_to_jiffies(timeout * 1000)); + wait_for_completion_timeout(&mbox_done, secs_to_jiffies(timeout)); spin_lock_irqsave(&phba->hbalock, flag); pmboxq->ctx_u.mbox_wait = NULL; @@ -13336,9 +13328,8 @@ lpfc_sli_mbox_sys_shutdown(struct lpfc_hba *phba, int mbx_action) * command to be gracefully completed by firmware. */ if (phba->sli.mbox_active) - timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, - phba->sli.mbox_active) * - 1000) + jiffies; + timeout = secs_to_jiffies(lpfc_mbox_tmo_val(phba, + phba->sli.mbox_active)) + jiffies; spin_unlock_irq(&phba->hbalock); /* Enable softirqs again, done with phba->hbalock */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 3d70cc517573..cc56a7334319 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -246,7 +246,7 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport) * fabric RA_TOV value and dev_loss tmo. The driver's * devloss_tmo is 10 giving this loop a 3x multiplier minimally. */ - wait_time_max = msecs_to_jiffies(((phba->fc_ratov * 3) + 3) * 1000); + wait_time_max = secs_to_jiffies((phba->fc_ratov * 3) + 3); wait_time_max += jiffies; start_time = jiffies; while (time_before(jiffies, wait_time_max)) { From 78cf56f8832a932ade20b8340a029ace14ac0e98 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:17 +0000 Subject: [PATCH 1074/2157] accel/habanalabs: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-3-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/accel/habanalabs/common/command_submission.c | 2 +- drivers/accel/habanalabs/common/debugfs.c | 2 +- drivers/accel/habanalabs/common/device.c | 2 +- drivers/accel/habanalabs/common/habanalabs_drv.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 59823e3c3bf7..dee487724918 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -2586,7 +2586,7 @@ int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) cs_seq = args->in.seq; timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT - ? msecs_to_jiffies(args->in.timeout * 1000) + ? secs_to_jiffies(args->in.timeout) : hpriv->hdev->timeout_jiffies; switch (cs_type) { diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index ca7677293a55..4b391807e5f2 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -1403,7 +1403,7 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, return rc; if (value) - hdev->timeout_jiffies = msecs_to_jiffies(value * 1000); + hdev->timeout_jiffies = secs_to_jiffies(value); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 30277ae410d4..68eebed3b050 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -2091,7 +2091,7 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", hdev->device_release_watchdog_timeout_sec); schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, - msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); + secs_to_jiffies(hdev->device_release_watchdog_timeout_sec)); hdev->reset_info.watchdog_active = 1; out: spin_unlock(&hdev->reset_info.lock); diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index 596c52e8aa26..0035748f3228 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -386,7 +386,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; if (tmp_timeout) - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); + hdev->timeout_jiffies = secs_to_jiffies(tmp_timeout); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; From 344825e17cfa9a7506ad4fd4fc62df6181162c4a Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:18 +0000 Subject: [PATCH 1075/2157] ALSA: ac97: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-4-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Takashi Iwai Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- sound/pci/ac97/ac97_codec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 6e710dce5c60..88ac37739b76 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -2461,8 +2461,7 @@ int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup) * (for avoiding loud click noises for many (OSS) apps * that open/close frequently) */ - schedule_delayed_work(&ac97->power_work, - msecs_to_jiffies(power_save * 1000)); + schedule_delayed_work(&ac97->power_work, secs_to_jiffies(power_save)); else { cancel_delayed_work(&ac97->power_work); update_power_regs(ac97); From 8ef9019ed2acdf74dce7c8b5618bf2bdabb47004 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:19 +0000 Subject: [PATCH 1076/2157] btrfs: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-5-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: David Sterba Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f09db62e61a1..ed2772d27919 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1568,7 +1568,7 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; - delay = msecs_to_jiffies(fs_info->commit_interval * 1000); + delay = secs_to_jiffies(fs_info->commit_interval); mutex_lock(&fs_info->transaction_kthread_mutex); spin_lock(&fs_info->trans_lock); @@ -1583,9 +1583,9 @@ static int transaction_kthread(void *arg) cur->state < TRANS_STATE_COMMIT_PREP && delta < fs_info->commit_interval) { spin_unlock(&fs_info->trans_lock); - delay -= msecs_to_jiffies((delta - 1) * 1000); + delay -= secs_to_jiffies(delta - 1); delay = min(delay, - msecs_to_jiffies(fs_info->commit_interval * 1000)); + secs_to_jiffies(fs_info->commit_interval)); goto sleep; } transid = cur->transid; From d1f6d6c537d488b1a8f04091c7751124985a0ab9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 14 Mar 2025 17:09:04 +0100 Subject: [PATCH 1077/2157] rust: pci: use to_result() in enable_device_mem() Simplify enable_device_mem() by using to_result() to handle the return value of the corresponding FFI call. Reviewed-by: Benno Lossin Signed-off-by: Danilo Krummrich Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20250314160932.100165-2-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/pci.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 4c98b5b9aa1e..386484dcf36e 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -382,12 +382,7 @@ pub fn device_id(&self) -> u16 { /// Enable memory resources for this device. pub fn enable_device_mem(&self) -> Result { // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. - let ret = unsafe { bindings::pci_enable_device_mem(self.as_raw()) }; - if ret != 0 { - Err(Error::from_errno(ret)) - } else { - Ok(()) - } + to_result(unsafe { bindings::pci_enable_device_mem(self.as_raw()) }) } /// Enable bus-mastering for this device. From 4d032779ab321baa1a430ff27791e5e0c98a0c2f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 14 Mar 2025 17:09:05 +0100 Subject: [PATCH 1078/2157] rust: device: implement device context marker Some bus device functions should only be called from bus callbacks, such as probe(), remove(), resume(), suspend(), etc. To ensure this add device context marker structs, that can be used as generics for bus device implementations. Reviewed-by: Benno Lossin Suggested-by: Benno Lossin Signed-off-by: Danilo Krummrich Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20250314160932.100165-3-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/device.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index db2d9658ba47..21b343a1dc4d 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -209,6 +209,32 @@ unsafe impl Send for Device {} // synchronization in `struct device`. unsafe impl Sync for Device {} +/// Marker trait for the context of a bus specific device. +/// +/// Some functions of a bus specific device should only be called from a certain context, i.e. bus +/// callbacks, such as `probe()`. +/// +/// This is the marker trait for structures representing the context of a bus specific device. +pub trait DeviceContext: private::Sealed {} + +/// The [`Normal`] context is the context of a bus specific device when it is not an argument of +/// any bus callback. +pub struct Normal; + +/// The [`Core`] context is the context of a bus specific device when it is supplied as argument of +/// any of the bus callbacks, such as `probe()`. +pub struct Core; + +mod private { + pub trait Sealed {} + + impl Sealed for super::Core {} + impl Sealed for super::Normal {} +} + +impl DeviceContext for Core {} +impl DeviceContext for Normal {} + #[doc(hidden)] #[macro_export] macro_rules! dev_printk { From 7b948a2af6b5d64a25c14da8f63d8084ea527cd9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 14 Mar 2025 17:09:06 +0100 Subject: [PATCH 1079/2157] rust: pci: fix unrestricted &mut pci::Device As by now, pci::Device is implemented as: #[derive(Clone)] pub struct Device(ARef); This may be convenient, but has the implication that drivers can call device methods that require a mutable reference concurrently at any point of time. Instead define pci::Device as pub struct Device( Opaque, PhantomData, ); and manually implement the AlwaysRefCounted trait. With this we can implement methods that should only be called from bus callbacks (such as probe()) for pci::Device. Consequently, we make this type accessible in bus callbacks only. Arbitrary references taken by the driver are still of type ARef and hence don't provide access to methods that are reserved for bus callbacks. Fixes: 1bd8b6b2c5d3 ("rust: pci: add basic PCI device / driver abstractions") Reviewed-by: Benno Lossin Signed-off-by: Danilo Krummrich Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20250314160932.100165-4-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/pci.rs | 136 ++++++++++++++++++++------------ samples/rust/rust_driver_pci.rs | 8 +- 2 files changed, 91 insertions(+), 53 deletions(-) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 386484dcf36e..0ac6cef74f81 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -6,7 +6,7 @@ use crate::{ alloc::flags::*, - bindings, container_of, device, + bindings, device, device_id::RawDeviceId, devres::Devres, driver, @@ -17,7 +17,11 @@ types::{ARef, ForeignOwnable, Opaque}, ThisModule, }; -use core::{ops::Deref, ptr::addr_of_mut}; +use core::{ + marker::PhantomData, + ops::Deref, + ptr::{addr_of_mut, NonNull}, +}; use kernel::prelude::*; /// An adapter for the registration of PCI drivers. @@ -60,17 +64,16 @@ extern "C" fn probe_callback( ) -> kernel::ffi::c_int { // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a // `struct pci_dev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; - // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call - // above. - let mut pdev = unsafe { Device::from_dev(dev) }; + // + // INVARIANT: `pdev` is valid for the duration of `probe_callback()`. + let pdev = unsafe { &*pdev.cast::>() }; // SAFETY: `DeviceId` is a `#[repr(transparent)` wrapper of `struct pci_device_id` and // does not add additional invariants, so it's safe to transmute. let id = unsafe { &*id.cast::() }; let info = T::ID_TABLE.info(id.index()); - match T::probe(&mut pdev, info) { + match T::probe(pdev, info) { Ok(data) => { // Let the `struct pci_dev` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a @@ -192,7 +195,7 @@ macro_rules! pci_device_table { /// # Example /// ///``` -/// # use kernel::{bindings, pci}; +/// # use kernel::{bindings, device::Core, pci}; /// /// struct MyDriver; /// @@ -210,7 +213,7 @@ macro_rules! pci_device_table { /// const ID_TABLE: pci::IdTable = &PCI_TABLE; /// /// fn probe( -/// _pdev: &mut pci::Device, +/// _pdev: &pci::Device, /// _id_info: &Self::IdInfo, /// ) -> Result>> { /// Err(ENODEV) @@ -234,20 +237,23 @@ pub trait Driver { /// /// Called when a new platform device is added or discovered. /// Implementers should attempt to initialize the device here. - fn probe(dev: &mut Device, id_info: &Self::IdInfo) -> Result>>; + fn probe(dev: &Device, id_info: &Self::IdInfo) -> Result>>; } /// The PCI device representation. /// -/// A PCI device is based on an always reference counted `device:Device` instance. Cloning a PCI -/// device, hence, also increments the base device' reference count. +/// This structure represents the Rust abstraction for a C `struct pci_dev`. The implementation +/// abstracts the usage of an already existing C `struct pci_dev` within Rust code that we get +/// passed from the C side. /// /// # Invariants /// -/// `Device` hold a valid reference of `ARef` whose underlying `struct device` is a -/// member of a `struct pci_dev`. -#[derive(Clone)] -pub struct Device(ARef); +/// A [`Device`] instance represents a valid `struct device` created by the C portion of the kernel. +#[repr(transparent)] +pub struct Device( + Opaque, + PhantomData, +); /// A PCI BAR to perform I/O-Operations on. /// @@ -256,13 +262,13 @@ pub trait Driver { /// `Bar` always holds an `IoRaw` inststance that holds a valid pointer to the start of the I/O /// memory mapped PCI bar and its size. pub struct Bar { - pdev: Device, + pdev: ARef, io: IoRaw, num: i32, } impl Bar { - fn new(pdev: Device, num: u32, name: &CStr) -> Result { + fn new(pdev: &Device, num: u32, name: &CStr) -> Result { let len = pdev.resource_len(num)?; if len == 0 { return Err(ENOMEM); @@ -300,12 +306,16 @@ fn new(pdev: Device, num: u32, name: &CStr) -> Result { // `pdev` is valid by the invariants of `Device`. // `ioptr` is guaranteed to be the start of a valid I/O mapped memory region. // `num` is checked for validity by a previous call to `Device::resource_len`. - unsafe { Self::do_release(&pdev, ioptr, num) }; + unsafe { Self::do_release(pdev, ioptr, num) }; return Err(err); } }; - Ok(Bar { pdev, io, num }) + Ok(Bar { + pdev: pdev.into(), + io, + num, + }) } /// # Safety @@ -351,20 +361,8 @@ fn deref(&self) -> &Self::Target { } impl Device { - /// Create a PCI Device instance from an existing `device::Device`. - /// - /// # Safety - /// - /// `dev` must be an `ARef` whose underlying `bindings::device` is a member of - /// a `bindings::pci_dev`. - pub unsafe fn from_dev(dev: ARef) -> Self { - Self(dev) - } - fn as_raw(&self) -> *mut bindings::pci_dev { - // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` - // embedded in `struct pci_dev`. - unsafe { container_of!(self.0.as_raw(), bindings::pci_dev, dev) as _ } + self.0.get() } /// Returns the PCI vendor ID. @@ -379,18 +377,6 @@ pub fn device_id(&self) -> u16 { unsafe { (*self.as_raw()).device } } - /// Enable memory resources for this device. - pub fn enable_device_mem(&self) -> Result { - // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. - to_result(unsafe { bindings::pci_enable_device_mem(self.as_raw()) }) - } - - /// Enable bus-mastering for this device. - pub fn set_master(&self) { - // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. - unsafe { bindings::pci_set_master(self.as_raw()) }; - } - /// Returns the size of the given PCI bar resource. pub fn resource_len(&self, bar: u32) -> Result { if !Bar::index_is_valid(bar) { @@ -410,7 +396,7 @@ pub fn iomap_region_sized( bar: u32, name: &CStr, ) -> Result>> { - let bar = Bar::::new(self.clone(), bar, name)?; + let bar = Bar::::new(self, bar, name)?; let devres = Devres::new(self.as_ref(), bar, GFP_KERNEL)?; Ok(devres) @@ -422,8 +408,60 @@ pub fn iomap_region(&self, bar: u32, name: &CStr) -> Result> { } } -impl AsRef for Device { - fn as_ref(&self) -> &device::Device { - &self.0 +impl Device { + /// Enable memory resources for this device. + pub fn enable_device_mem(&self) -> Result { + // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. + to_result(unsafe { bindings::pci_enable_device_mem(self.as_raw()) }) + } + + /// Enable bus-mastering for this device. + pub fn set_master(&self) { + // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`. + unsafe { bindings::pci_set_master(self.as_raw()) }; + } +} + +impl Deref for Device { + type Target = Device; + + fn deref(&self) -> &Self::Target { + let ptr: *const Self = self; + + // CAST: `Device` is a transparent wrapper of `Opaque`. + let ptr = ptr.cast::(); + + // SAFETY: `ptr` was derived from `&self`. + unsafe { &*ptr } + } +} + +impl From<&Device> for ARef { + fn from(dev: &Device) -> Self { + (&**dev).into() + } +} + +// SAFETY: Instances of `Device` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Device { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::pci_dev_get(self.as_raw()) }; + } + + unsafe fn dec_ref(obj: NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::pci_dev_put(obj.cast().as_ptr()) } + } +} + +impl AsRef for Device { + fn as_ref(&self) -> &device::Device { + // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid + // `struct pci_dev`. + let dev = unsafe { addr_of_mut!((*self.as_raw()).dev) }; + + // SAFETY: `dev` points to a valid `struct device`. + unsafe { device::Device::as_ref(dev) } } } diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index ddc52db71a82..6cee912b6a66 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -4,7 +4,7 @@ //! //! To make this driver probe, QEMU must be run with `-device pci-testdev`. -use kernel::{bindings, c_str, devres::Devres, pci, prelude::*}; +use kernel::{bindings, c_str, device::Core, devres::Devres, pci, prelude::*, types::ARef}; struct Regs; @@ -26,7 +26,7 @@ impl TestIndex { } struct SampleDriver { - pdev: pci::Device, + pdev: ARef, bar: Devres, } @@ -62,7 +62,7 @@ impl pci::Driver for SampleDriver { const ID_TABLE: pci::IdTable = &PCI_TABLE; - fn probe(pdev: &mut pci::Device, info: &Self::IdInfo) -> Result>> { + fn probe(pdev: &pci::Device, info: &Self::IdInfo) -> Result>> { dev_dbg!( pdev.as_ref(), "Probe Rust PCI driver sample (PCI ID: 0x{:x}, 0x{:x}).\n", @@ -77,7 +77,7 @@ fn probe(pdev: &mut pci::Device, info: &Self::IdInfo) -> Result>> let drvdata = KBox::new( Self { - pdev: pdev.clone(), + pdev: pdev.into(), bar, }, GFP_KERNEL, From 4d320e30ee04c25c660eca2bb33e846ebb71a79a Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 14 Mar 2025 17:09:07 +0100 Subject: [PATCH 1080/2157] rust: platform: fix unrestricted &mut platform::Device As by now, platform::Device is implemented as: #[derive(Clone)] pub struct Device(ARef); This may be convenient, but has the implication that drivers can call device methods that require a mutable reference concurrently at any point of time. Instead define platform::Device as pub struct Device( Opaque, PhantomData, ); and manually implement the AlwaysRefCounted trait. With this we can implement methods that should only be called from bus callbacks (such as probe()) for platform::Device. Consequently, we make this type accessible in bus callbacks only. Arbitrary references taken by the driver are still of type ARef and hence don't provide access to methods that are reserved for bus callbacks. Fixes: 683a63befc73 ("rust: platform: add basic platform device / driver abstractions") Reviewed-by: Benno Lossin Signed-off-by: Danilo Krummrich Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20250314160932.100165-5-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/platform.rs | 95 +++++++++++++++++++--------- samples/rust/rust_driver_platform.rs | 11 ++-- 2 files changed, 72 insertions(+), 34 deletions(-) diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 50e6b0421813..c77c9f2e9aea 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -5,7 +5,7 @@ //! C header: [`include/linux/platform_device.h`](srctree/include/linux/platform_device.h) use crate::{ - bindings, container_of, device, driver, + bindings, device, driver, error::{to_result, Result}, of, prelude::*, @@ -14,7 +14,11 @@ ThisModule, }; -use core::ptr::addr_of_mut; +use core::{ + marker::PhantomData, + ops::Deref, + ptr::{addr_of_mut, NonNull}, +}; /// An adapter for the registration of platform drivers. pub struct Adapter(T); @@ -54,14 +58,14 @@ unsafe fn unregister(pdrv: &Opaque) { impl Adapter { extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int { - // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; - // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the - // call above. - let mut pdev = unsafe { Device::from_dev(dev) }; + // SAFETY: The platform bus only ever calls the probe callback with a valid pointer to a + // `struct platform_device`. + // + // INVARIANT: `pdev` is valid for the duration of `probe_callback()`. + let pdev = unsafe { &*pdev.cast::>() }; let info = ::id_info(pdev.as_ref()); - match T::probe(&mut pdev, info) { + match T::probe(pdev, info) { Ok(data) => { // Let the `struct platform_device` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a @@ -120,7 +124,7 @@ macro_rules! module_platform_driver { /// # Example /// ///``` -/// # use kernel::{bindings, c_str, of, platform}; +/// # use kernel::{bindings, c_str, device::Core, of, platform}; /// /// struct MyDriver; /// @@ -138,7 +142,7 @@ macro_rules! module_platform_driver { /// const OF_ID_TABLE: Option> = Some(&OF_TABLE); /// /// fn probe( -/// _pdev: &mut platform::Device, +/// _pdev: &platform::Device, /// _id_info: Option<&Self::IdInfo>, /// ) -> Result>> { /// Err(ENODEV) @@ -160,41 +164,72 @@ pub trait Driver { /// /// Called when a new platform device is added or discovered. /// Implementers should attempt to initialize the device here. - fn probe(dev: &mut Device, id_info: Option<&Self::IdInfo>) -> Result>>; + fn probe(dev: &Device, id_info: Option<&Self::IdInfo>) + -> Result>>; } /// The platform device representation. /// -/// A platform device is based on an always reference counted `device:Device` instance. Cloning a -/// platform device, hence, also increments the base device' reference count. +/// This structure represents the Rust abstraction for a C `struct platform_device`. The +/// implementation abstracts the usage of an already existing C `struct platform_device` within Rust +/// code that we get passed from the C side. /// /// # Invariants /// -/// `Device` holds a valid reference of `ARef` whose underlying `struct device` is a -/// member of a `struct platform_device`. -#[derive(Clone)] -pub struct Device(ARef); +/// A [`Device`] instance represents a valid `struct platform_device` created by the C portion of +/// the kernel. +#[repr(transparent)] +pub struct Device( + Opaque, + PhantomData, +); impl Device { - /// Convert a raw kernel device into a `Device` - /// - /// # Safety - /// - /// `dev` must be an `Aref` whose underlying `bindings::device` is a member of a - /// `bindings::platform_device`. - unsafe fn from_dev(dev: ARef) -> Self { - Self(dev) + fn as_raw(&self) -> *mut bindings::platform_device { + self.0.get() + } +} + +impl Deref for Device { + type Target = Device; + + fn deref(&self) -> &Self::Target { + let ptr: *const Self = self; + + // CAST: `Device` is a transparent wrapper of `Opaque`. + let ptr = ptr.cast::(); + + // SAFETY: `ptr` was derived from `&self`. + unsafe { &*ptr } + } +} + +impl From<&Device> for ARef { + fn from(dev: &Device) -> Self { + (&**dev).into() + } +} + +// SAFETY: Instances of `Device` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Device { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::get_device(self.as_ref().as_raw()) }; } - fn as_raw(&self) -> *mut bindings::platform_device { - // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` - // embedded in `struct platform_device`. - unsafe { container_of!(self.0.as_raw(), bindings::platform_device, dev) }.cast_mut() + unsafe fn dec_ref(obj: NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::platform_device_put(obj.cast().as_ptr()) } } } impl AsRef for Device { fn as_ref(&self) -> &device::Device { - &self.0 + // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid + // `struct platform_device`. + let dev = unsafe { addr_of_mut!((*self.as_raw()).dev) }; + + // SAFETY: `dev` points to a valid `struct device`. + unsafe { device::Device::as_ref(dev) } } } diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs index 8120609e2940..9bb66db0a4f4 100644 --- a/samples/rust/rust_driver_platform.rs +++ b/samples/rust/rust_driver_platform.rs @@ -2,10 +2,10 @@ //! Rust Platform driver sample. -use kernel::{c_str, of, platform, prelude::*}; +use kernel::{c_str, device::Core, of, platform, prelude::*, types::ARef}; struct SampleDriver { - pdev: platform::Device, + pdev: ARef, } struct Info(u32); @@ -21,14 +21,17 @@ impl platform::Driver for SampleDriver { type IdInfo = Info; const OF_ID_TABLE: Option> = Some(&OF_TABLE); - fn probe(pdev: &mut platform::Device, info: Option<&Self::IdInfo>) -> Result>> { + fn probe( + pdev: &platform::Device, + info: Option<&Self::IdInfo>, + ) -> Result>> { dev_dbg!(pdev.as_ref(), "Probe Rust Platform driver sample.\n"); if let Some(info) = info { dev_info!(pdev.as_ref(), "Probed with info: '{}'.\n", info.0); } - let drvdata = KBox::new(Self { pdev: pdev.clone() }, GFP_KERNEL)?; + let drvdata = KBox::new(Self { pdev: pdev.into() }, GFP_KERNEL)?; Ok(drvdata.into()) } From a58f3dcf20ea9e7e968ee8369fd782bbb53dff73 Mon Sep 17 00:00:00 2001 From: Seongjun Kim Date: Wed, 26 Feb 2025 10:42:04 -0800 Subject: [PATCH 1081/2157] samples/damon: a typo in the kconfig - sameple There is a typo in the Kconfig file of the damon sample module. Correct it: s/sameple/sample/ Link: https://lkml.kernel.org/r/20250226184204.29370-1-sj@kernel.org Signed-off-by: Seongjun Kim Signed-off-by: SeongJae Park Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- samples/damon/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/damon/Kconfig b/samples/damon/Kconfig index 63f6dcd71daa..564c49ed69a2 100644 --- a/samples/damon/Kconfig +++ b/samples/damon/Kconfig @@ -3,7 +3,7 @@ menu "DAMON Samples" config SAMPLE_DAMON_WSSE - bool "DAMON sameple module for working set size estimation" + bool "DAMON sample module for working set size estimation" depends on DAMON && DAMON_VADDR help This builds DAMON sample module for working set size estimation. @@ -15,7 +15,7 @@ config SAMPLE_DAMON_WSSE If unsure, say N. config SAMPLE_DAMON_PRCL - bool "DAMON sameple module for access-aware proactive reclamation" + bool "DAMON sample module for access-aware proactive reclamation" depends on DAMON && DAMON_VADDR help This builds DAMON sample module for access-aware proactive From 173a3dc051bda746e284ff3933fcf6771d7247b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 26 Feb 2025 15:36:12 +0000 Subject: [PATCH 1082/2157] mm: assert the folio is locked in folio_start_writeback() The folio must be locked when we start writeback in order to prevent writeback from being started twice on the same folio. I don't expect this to catch any problems, but it should be good documentation. Link: https://lkml.kernel.org/r/20250226153614.3774896-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: "Darrick J. Wong" Signed-off-by: Andrew Morton --- mm/page-writeback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index eb55ece39c56..8b325aa525eb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -3109,6 +3109,7 @@ void __folio_start_writeback(struct folio *folio, bool keep_write) int access_ret; VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (mapping && mapping_use_writeback_tags(mapping)) { XA_STATE(xas, &mapping->i_pages, folio_index(folio)); From 66add5e9093b4b4112aebacbc91d43ae4e030456 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 26 Feb 2025 14:22:53 +0100 Subject: [PATCH 1083/2157] lib/test_hmm: make dmirror_atomic_map() consume a single page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: cleanups for device-exclusive entries (hmm)", v2. Some smaller device-exclusive cleanups I have lying around. This patch (of 5): The caller now always passes a single page; let's simplify, and return "0" on success. Link: https://lkml.kernel.org/r/20250226132257.2826043-1-david@redhat.com Link: https://lkml.kernel.org/r/20250226132257.2826043-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Alistair Popple Cc: Jason Gunthorpe Cc: Jérôme Glisse Signed-off-by: Andrew Morton --- lib/test_hmm.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e4afca8d1880..39a2286f8592 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -706,34 +706,23 @@ static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, return 0; } -static int dmirror_atomic_map(unsigned long start, unsigned long end, - struct page **pages, struct dmirror *dmirror) +static int dmirror_atomic_map(unsigned long addr, struct page *page, + struct dmirror *dmirror) { - unsigned long pfn, mapped = 0; - int i; + void *entry; /* Map the migrated pages into the device's page tables. */ mutex_lock(&dmirror->mutex); - for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { - void *entry; - - if (!pages[i]) - continue; - - entry = pages[i]; - entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); - entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); - if (xa_is_err(entry)) { - mutex_unlock(&dmirror->mutex); - return xa_err(entry); - } - - mapped++; + entry = xa_tag_pointer(page, DPT_XA_TAG_ATOMIC); + entry = xa_store(&dmirror->pt, addr >> PAGE_SHIFT, entry, GFP_ATOMIC); + if (xa_is_err(entry)) { + mutex_unlock(&dmirror->mutex); + return xa_err(entry); } mutex_unlock(&dmirror->mutex); - return mapped; + return 0; } static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, @@ -803,8 +792,7 @@ static int dmirror_exclusive(struct dmirror *dmirror, break; } - ret = dmirror_atomic_map(addr, addr + PAGE_SIZE, &page, dmirror); - ret = ret == 1 ? 0 : -EBUSY; + ret = dmirror_atomic_map(addr, page, dmirror); folio_unlock(folio); folio_put(folio); } From db0f6e674c2b61ff9d8880e7ae5ed11681fe9651 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 26 Feb 2025 14:22:54 +0100 Subject: [PATCH 1084/2157] mm/memory: remove PageAnonExclusive sanity-check in restore_exclusive_pte() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit b832a354d787 ("mm/memory: page_add_anon_rmap() -> folio_add_anon_rmap_pte()") we accidentally changed the sanity check to essentially ignore anonymous folio by mis-placing the "!" ... but we really always only get anonymous folios in restore_exclusive_pte(). However, in the meantime we removed the separate "writable device-exclusive entries" and always detect if the PTE can be writable using can_change_pte_writable() -- which also consults PageAnonExclusive. So let's just get rid of this sanity check completely. Link: https://lkml.kernel.org/r/20250226132257.2826043-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alistair Popple Cc: Jason Gunthorpe Cc: Jérôme Glisse Signed-off-by: Andrew Morton --- mm/memory.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 270c9357475d..b207e3175392 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -738,9 +738,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, pte = pte_mkdirty(pte); pte = pte_mkwrite(pte, vma); } - - VM_BUG_ON_FOLIO(pte_write(pte) && (!folio_test_anon(folio) && - PageAnonExclusive(page)), folio); set_pte_at(vma->vm_mm, address, ptep, pte); /* From 248624f9c6b454c352fd7a95921706e489ae990f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 26 Feb 2025 14:22:55 +0100 Subject: [PATCH 1085/2157] mm/memory: pass folio and pte to restore_exclusive_pte() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's pass the folio and the pte to restore_exclusive_pte(), so we can avoid repeated page_folio() and ptep_get(). To do that, pass the pte to try_restore_exclusive_pte() and use a folio in there already. While at it, just avoid the "swp_entry_t entry" variable in try_restore_exclusive_pte() and add a folio-locked check to restore_exclusive_pte(). Link: https://lkml.kernel.org/r/20250226132257.2826043-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Alistair Popple Cc: Jason Gunthorpe Cc: Jérôme Glisse Signed-off-by: Andrew Morton --- mm/memory.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b207e3175392..7b0317d07d4f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -717,14 +717,13 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, #endif static void restore_exclusive_pte(struct vm_area_struct *vma, - struct page *page, unsigned long address, - pte_t *ptep) + struct folio *folio, struct page *page, unsigned long address, + pte_t *ptep, pte_t orig_pte) { - struct folio *folio = page_folio(page); - pte_t orig_pte; pte_t pte; - orig_pte = ptep_get(ptep); + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); @@ -751,16 +750,15 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, * Tries to restore an exclusive pte if the page lock can be acquired without * sleeping. */ -static int -try_restore_exclusive_pte(pte_t *src_pte, struct vm_area_struct *vma, - unsigned long addr) +static int try_restore_exclusive_pte(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, pte_t orig_pte) { - swp_entry_t entry = pte_to_swp_entry(ptep_get(src_pte)); - struct page *page = pfn_swap_entry_to_page(entry); + struct page *page = pfn_swap_entry_to_page(pte_to_swp_entry(orig_pte)); + struct folio *folio = page_folio(page); - if (trylock_page(page)) { - restore_exclusive_pte(vma, page, addr, src_pte); - unlock_page(page); + if (folio_trylock(folio)) { + restore_exclusive_pte(vma, folio, page, addr, ptep, orig_pte); + folio_unlock(folio); return 0; } @@ -866,7 +864,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * (ie. COW) mappings. */ VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); - if (try_restore_exclusive_pte(src_pte, src_vma, addr)) + if (try_restore_exclusive_pte(src_vma, addr, src_pte, orig_pte)) return -EBUSY; return -ENOENT; } else if (is_pte_marker_entry(entry)) { @@ -3987,7 +3985,8 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) - restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte); + restore_exclusive_pte(vma, folio, vmf->page, vmf->address, + vmf->pte, vmf->orig_pte); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); From 2f95381f8a4cb1fd19ed67523d4bc98d8a117ec1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 26 Feb 2025 14:22:56 +0100 Subject: [PATCH 1086/2157] mm/memory: document restore_exclusive_pte() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's document how this function is to be used, and why the folio lock is involved. Link: https://lkml.kernel.org/r/20250226132257.2826043-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alistair Popple Cc: Jason Gunthorpe Cc: Jérôme Glisse Signed-off-by: Andrew Morton --- mm/memory.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 7b0317d07d4f..5b475d31ce41 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -716,6 +716,32 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, } #endif +/** + * restore_exclusive_pte - Restore a device-exclusive entry + * @vma: VMA covering @address + * @folio: the mapped folio + * @page: the mapped folio page + * @address: the virtual address + * @ptep: pte pointer into the locked page table mapping the folio page + * @orig_pte: pte value at @ptep + * + * Restore a device-exclusive non-swap entry to an ordinary present pte. + * + * The folio and the page table must be locked, and MMU notifiers must have + * been called to invalidate any (exclusive) device mappings. + * + * Locking the folio makes sure that anybody who just converted the pte to + * a device-exclusive entry can map it into the device to make forward + * progress without others converting it back until the folio was unlocked. + * + * If the folio lock ever becomes an issue, we can stop relying on the folio + * lock; it might make some scenarios with heavy thrashing less likely to + * make forward progress, but these scenarios might not be valid use cases. + * + * Note that the folio lock does not protect against all cases of concurrent + * page table modifications (e.g., MADV_DONTNEED, mprotect), so device drivers + * must use MMU notifiers to sync against any concurrent changes. + */ static void restore_exclusive_pte(struct vm_area_struct *vma, struct folio *folio, struct page *page, unsigned long address, pte_t *ptep, pte_t orig_pte) From 720ba85040a6549674e5599685ca7df86a902448 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 26 Feb 2025 14:22:57 +0100 Subject: [PATCH 1087/2157] mm/mmu_notifier: use MMU_NOTIFY_CLEAR in remove_device_exclusive_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's limit the use of MMU_NOTIFY_EXCLUSIVE to the case where we convert a present PTE to device-exclusive. For the other case, we can simply use MMU_NOTIFY_CLEAR, because it really is clearing the device-exclusive entry first, to then install the present entry. Update the documentation of MMU_NOTIFY_EXCLUSIVE, to document the single use case more thoroughly. If ever required, we could add a separate MMU_NOTIFY_CLEAR_EXCLUSIVE; for now using MMU_NOTIFY_CLEAR seems to be sufficient. Link: https://lkml.kernel.org/r/20250226132257.2826043-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: Alistair Popple Cc: Jason Gunthorpe Cc: Jérôme Glisse Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 8 ++++---- mm/memory.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d4e714661826..bc2402a45741 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -43,10 +43,10 @@ struct mmu_interval_notifier; * a device driver to possibly ignore the invalidation if the * owner field matches the driver's device private pgmap owner. * - * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no - * longer have exclusive access to the page. When sent during creation of an - * exclusive range the owner will be initialised to the value provided by the - * caller of make_device_exclusive(), otherwise the owner will be NULL. + * @MMU_NOTIFY_EXCLUSIVE: conversion of a page table entry to device-exclusive. + * The owner is initialized to the value provided by the caller of + * make_device_exclusive(), such that this caller can filter out these + * events. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, diff --git a/mm/memory.c b/mm/memory.c index 5b475d31ce41..4c12a05fabd9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4003,7 +4003,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) folio_put(folio); return ret; } - mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, + mmu_notifier_range_init_owner(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); mmu_notifier_invalidate_range_start(&range); From 9a4f9e2a81d1d8d159110e135dddd708266241f1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 26 Feb 2025 17:54:00 +0530 Subject: [PATCH 1088/2157] configs: drop GENERIC_PTDUMP from debug.config Patch series "mm: Rework generic PTDUMP configs", v3. The series reworks generic PTDUMP configs before eventually renaming them after some basic cleanups first. This patch (of 5): The platforms that support GENERIC_PTDUMP select the config explicitly. But enabling this feature on platforms that don't really support - does nothing or might cause a build failure. Hence just drop GENERIC_PTDUMP from generic debug.config Link: https://lkml.kernel.org/r/20250226122404.1927473-1-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/20250226122404.1927473-2-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Steven Price Cc: Christophe Leroy Cc: Mark Rutland Cc: Catalin Marinas Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Madhavan Srinivasan Cc: Marc Zyngier Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- kernel/configs/debug.config | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 20552f163930..8aafd050b754 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -73,7 +73,6 @@ CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_VM_RB=y CONFIG_DEBUG_VM_VMACACHE=y -CONFIG_GENERIC_PTDUMP=y CONFIG_KASAN=y CONFIG_KASAN_GENERIC=y CONFIG_KASAN_INLINE=y From 2c5e6ac2db64ace51f66a9f3b3b3ab9553d748e8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 26 Feb 2025 17:54:01 +0530 Subject: [PATCH 1089/2157] arch/powerpc: drop GENERIC_PTDUMP from mpc885_ads_defconfig GENERIC_PTDUMP gets selected on powerpc explicitly and hence can be dropped off from mpc885_ads_defconfig. Replace with CONFIG_PTDUMP_DEBUGFS instead. Link: https://lkml.kernel.org/r/20250226122404.1927473-3-anshuman.khandual@arm.com Fixes: e084728393a5 ("powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP") Signed-off-by: Anshuman Khandual Suggested-by: Christophe Leroy Reviewed-by: Christophe Leroy Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Catalin Marinas Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Marc Zyngier Cc: Mark Rutland Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Steven Price Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/configs/mpc885_ads_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 77306be62e9e..129355f87f80 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -78,4 +78,4 @@ CONFIG_DEBUG_VM_PGTABLE=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_GENERIC_PTDUMP=y +CONFIG_PTDUMP_DEBUGFS=y From a5c96dfd47d88658ac9cdece96e98c2ef17ab465 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 26 Feb 2025 17:54:02 +0530 Subject: [PATCH 1090/2157] docs: arm64: drop PTDUMP config options from ptdump.rst Both GENERIC_PTDUMP and PTDUMP_CORE are not user selectable config options. Just drop these from documentation. Link: https://lkml.kernel.org/r/20250226122404.1927473-4-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Suggested-by: Steven Price Acked-by: Catalin Marinas Cc: Will Deacon Cc: Jonathan Corbet Cc: Christophe Leroy Cc: Heiko Carstens Cc: Ingo Molnar Cc: Madhavan Srinivasan Cc: Marc Zyngier Cc: Mark Rutland Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Gleixner Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- Documentation/arch/arm64/ptdump.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/arch/arm64/ptdump.rst b/Documentation/arch/arm64/ptdump.rst index 5dcfc5d7cddf..51eb902ba41a 100644 --- a/Documentation/arch/arm64/ptdump.rst +++ b/Documentation/arch/arm64/ptdump.rst @@ -22,8 +22,6 @@ offlining of memory being accessed by the ptdump code. In order to dump the kernel page tables, enable the following configurations and mount debugfs:: - CONFIG_GENERIC_PTDUMP=y - CONFIG_PTDUMP_CORE=y CONFIG_PTDUMP_DEBUGFS=y mount -t debugfs nodev /sys/kernel/debug From 3f54872454a927a2b5f9fb3e2d3cdbd51b3666b7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 26 Feb 2025 17:54:03 +0530 Subject: [PATCH 1091/2157] mm: make DEBUG_WX depdendent on GENERIC_PTDUMP DEBUG_WX selects PTDUMP_CORE without even ensuring that the given platform implements GENERIC_PTDUMP. This problem has been latent until now, as all the platforms subscribing ARCH_HAS_DEBUG_WX also subscribe GENERIC_PTDUMP. Link: https://lkml.kernel.org/r/20250226122404.1927473-5-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Steven Price Reviewed-by: Christophe Leroy Cc: Catalin Marinas Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Madhavan Srinivasan Cc: Marc Zyngier Cc: Mark Rutland Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 41a58536531d..a51a1149909a 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -186,6 +186,7 @@ config ARCH_HAS_DEBUG_WX config DEBUG_WX bool "Warn on W+X mappings at boot" depends on ARCH_HAS_DEBUG_WX + depends on GENERIC_PTDUMP depends on MMU select PTDUMP_CORE help From f9aad622006bd64c28fdf73c03a1c5139fcbf049 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 26 Feb 2025 17:54:04 +0530 Subject: [PATCH 1092/2157] mm: rename GENERIC_PTDUMP and PTDUMP_CORE Platforms subscribe into generic ptdump implementation via GENERIC_PTDUMP. But generic ptdump gets enabled via PTDUMP_CORE. These configs combination is confusing as they sound very similar and does not differentiate between platform's feature subscription and feature enablement for ptdump. Rename the configs as ARCH_HAS_PTDUMP and PTDUMP making it more clear and improve readability. Link: https://lkml.kernel.org/r/20250226122404.1927473-6-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Christophe Leroy (powerpc) Acked-by: Catalin Marinas [arm64] Cc: Will Deacon Cc: Jonathan Corbet Cc: Marc Zyngier Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Christophe Leroy Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Steven Price Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/ptdump.h | 4 ++-- arch/arm64/kvm/Kconfig | 4 ++-- arch/arm64/mm/Makefile | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/mm/Makefile | 2 +- arch/riscv/Kconfig | 2 +- arch/riscv/mm/Makefile | 2 +- arch/s390/Kconfig | 2 +- arch/s390/mm/Makefile | 2 +- arch/x86/Kconfig | 2 +- arch/x86/Kconfig.debug | 2 +- arch/x86/mm/Makefile | 2 +- mm/Kconfig.debug | 12 ++++++------ mm/Makefile | 2 +- 15 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 940343beb3d4..5cf688ee01b7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -41,6 +41,7 @@ config ARM64 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_HW_PTE_YOUNG @@ -157,7 +158,6 @@ config ARM64 select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP - select GENERIC_PTDUMP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 6cf4aae05219..b2931d1ae0fb 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -7,7 +7,7 @@ #include -#ifdef CONFIG_PTDUMP_CORE +#ifdef CONFIG_PTDUMP #include #include @@ -70,6 +70,6 @@ static inline void ptdump_debugfs_register(struct ptdump_info *info, #else static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { } -#endif /* CONFIG_PTDUMP_CORE */ +#endif /* CONFIG_PTDUMP */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index ead632ad01b4..096e45acadb2 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -71,8 +71,8 @@ config PTDUMP_STAGE2_DEBUGFS depends on KVM depends on DEBUG_KERNEL depends on DEBUG_FS - depends on GENERIC_PTDUMP - select PTDUMP_CORE + depends on ARCH_HAS_PTDUMP + select PTDUMP default n help Say Y here if you want to show the stage-2 kernel pagetables diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index fc92170a8f37..c26489cf96cd 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -5,7 +5,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o fixmap.o obj-$(CONFIG_ARM64_CONTPTE) += contpte.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 424f188e62d9..6f1ae41dcf85 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 @@ -206,7 +207,6 @@ config PPC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_TIME_NS diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 0fe2f085c05a..8c1582b2987d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,5 +15,5 @@ obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump/ +obj-$(CONFIG_PTDUMP) += ptdump/ obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..353cf41d01f4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -43,6 +43,7 @@ config RISCV select ARCH_HAS_PMEM_API select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PREPARE_SYNC_CORE_CMD + select ARCH_HAS_PTDUMP if MMU select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU @@ -112,7 +113,6 @@ config RISCV select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP - select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index cbe4d775ef56..b916a68d324a 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -19,7 +19,7 @@ obj-y += context.o obj-y += pmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..dd9dd2f8e673 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -96,6 +96,7 @@ config S390 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -163,7 +164,6 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_TIME_NS diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..9726b91fe7e4 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,6 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cf49c130d1d0..bfd23a09b911 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,6 +26,7 @@ config X86_64 depends on 64BIT # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_PTDUMP select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE @@ -175,7 +176,6 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1eb4d23cdaae..c95c3aaadf97 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select PTDUMP_CORE + select PTDUMP help Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 690fbf48e853..e0c99a8760ca 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -39,7 +39,7 @@ CFLAGS_fault.o := -I $(src)/../include/asm/trace obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index a51a1149909a..32b65073d0cc 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -186,9 +186,9 @@ config ARCH_HAS_DEBUG_WX config DEBUG_WX bool "Warn on W+X mappings at boot" depends on ARCH_HAS_DEBUG_WX - depends on GENERIC_PTDUMP + depends on ARCH_HAS_PTDUMP depends on MMU - select PTDUMP_CORE + select PTDUMP help Generate a warning if any W+X mappings are found at boot. @@ -213,18 +213,18 @@ config DEBUG_WX If in doubt, say "Y". -config GENERIC_PTDUMP +config ARCH_HAS_PTDUMP bool -config PTDUMP_CORE +config PTDUMP bool config PTDUMP_DEBUGFS bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL depends on DEBUG_FS - depends on GENERIC_PTDUMP - select PTDUMP_CORE + depends on ARCH_HAS_PTDUMP + select PTDUMP help Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers diff --git a/mm/Makefile b/mm/Makefile index 84b1127e43a5..e7f6bbf8ae5f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -139,7 +139,7 @@ obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o From b9585a3f3e0b30b3b60c85dc39f27ed3b06fb623 Mon Sep 17 00:00:00 2001 From: Zeng Jingxiang Date: Thu, 27 Feb 2025 16:22:23 +0800 Subject: [PATCH 1093/2157] mm/list_lru: make the case where mlru is NULL as unlikely In the following memcg_list_lru_alloc() function, mlru here is almost always NULL, so in most cases this should save a function call, mark mlru as unlikely to optimize the code, and reusing the mlru for the next attempt when the tree insertion fails. do { xas_lock_irqsave(&xas, flags); if (!xas_load(&xas) && !css_is_dying(&pos->css)) { xas_store(&xas, mlru); if (!xas_error(&xas)) mlru = NULL; } xas_unlock_irqrestore(&xas, flags); } while (xas_nomem(&xas, GFP_KERNEL)); > if (mlru) kfree(mlru); Link: https://lkml.kernel.org/r/20250227082223.1173847-1-jingxiangzeng.cas@gmail.com Signed-off-by: Zeng Jingxiang Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202412290924.UTP7GH2Z-lkp@intel.com/ Suggested-by: Johannes Weiner Reviewed-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Chengming Zhou Cc: Jingxiang Zeng Cc: Kairui Song Cc: Michal Hocko Cc: Roman Gushchin Signed-off-by: Andrew Morton --- mm/list_lru.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index 7d69434c70e0..490473af3122 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -510,7 +510,7 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp) { unsigned long flags; - struct list_lru_memcg *mlru; + struct list_lru_memcg *mlru = NULL; struct mem_cgroup *pos, *parent; XA_STATE(xas, &lru->xa, 0); @@ -535,9 +535,11 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, parent = parent_mem_cgroup(pos); } - mlru = memcg_init_list_lru_one(lru, gfp); - if (!mlru) - return -ENOMEM; + if (!mlru) { + mlru = memcg_init_list_lru_one(lru, gfp); + if (!mlru) + return -ENOMEM; + } xas_set(&xas, pos->kmemcg_id); do { xas_lock_irqsave(&xas, flags); @@ -548,10 +550,11 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, } xas_unlock_irqrestore(&xas, flags); } while (xas_nomem(&xas, gfp)); - if (mlru) - kfree(mlru); } while (pos != memcg && !css_is_dying(&pos->css)); + if (unlikely(mlru)) + kfree(mlru); + return xas_error(&xas); } #else From 1eb3471bf5749ff3769ec52723bd9b8d773c7a62 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:19 -0800 Subject: [PATCH 1094/2157] mm/damon: add data structure for monitoring intervals auto-tuning Patch series "mm/damon: auto-tune aggregation interval". DAMON requires time-consuming and repetitive aggregation interval tuning. Introduce a feature for automating it using a feedback loop that aims an amount of observed access events, like auto-exposing cameras. Background: Access Frequency Monitoring and Aggregation Interval ================================================================ DAMON checks if each memory element (damon_region) is accessed or not for every user-specified time interval called 'sampling interval'. It aggregates the check intervals on per-element counter called 'nr_accesses'. DAMON users can read the counters to get the access temperature of a given element. The counters are reset for every another user-specified time interval called 'aggregation interval'. This can be illustrated as DAMON continuously capturing a snapshot of access events that happen and captured within the last aggregation interval. This implies the aggregation interval plays a key role for the quality of the snapshots, like the camera exposure time. If it is too short, the amount of access events that happened and captured for each snapshot is small, so each snapshot will show no many interesting things but just a cold and dark world with hopefuly one pale blue dot or two. If it is too long, too many events are aggregated in a single shot, so each snapshot will look like world of flames, or Muspellheim. It will be difficult to find practical insights in both cases. Problem: Time Consuming and Repetitive Tuning ============================================= The appropriate length of the aggregation interval depends on how frequently the system and workloads are making access events that DAMON can observe. Hence, users have to tune the interval with excessive amount of tests with the target system and workloads. If the system and workloads are changed, the tuning should be done again. If the characteristic of the workloads is dynamic, it becomes more challenging. It is therefore time-consuming and repetitive. The tuning challenge mainly stems from the wrong question. It is not asking users what quality of monitoring results they want, but how DAMON should operate for their hidden goal. To make the right answer, users need to fully understand DAMON's mechanisms and the characteristics of their workloads. Users shouldn't be asked to understand the underlying mechanism. Understanding the characteristics of the workloads shouldn't be the role of users but DAMON. Aim-oriented Feedback-driven Auto-Tuning ========================================= Fortunately, the appropriate length of the aggregation interval can be inferred using a feedback loop. If the current snapshots are showing no much intresting information, in other words, if it shows only rare access events, increasing the aggregation interval helps, and vice versa. We tested this theory on a few real-world workloads, and documented one of the experience with an official DAMON monitoring intervals tuning guideline. Since it is a simple theory that requires repeatable tries, it can be a good job for machines. Based on the guideline's theory, we design an automation of aggregation interval tuning, in a way similar to that of camera auto-exposure feature. It defines the amount of interesting information as the ratio of DAMON-observed access events that DAMON actually observed to theoretical maximum amount of it within each snapshot. Events are accounted in byte and sampling attempts granularity. For example, let's say there is a region of 'X' bytes size. DAMON tried access check smapling for the region 'Y' times in total for a given aggregation. Among the 'Y' attempts, 'Z' times it shown positive results. Then, the theoritical maximum number of access events for the region is 'X * Y'. And the number of access events that DAMON has observed for the region is 'X * Z'. The abount of the interesting information is '(X * Z / X * Y)'. Note that each snapshot would have multiple regions. Users can set an arbitrary value of the ratio as their target. Once the target is set, the automation periodically measures the current value of the ratio and increase or decrease the aggregation interval if the ratio value is lower or higher than the target. The amount of the change is proportion to the distance between the current adn the target values. To avoid auto-tuning goes too long way, let users set the minimum and the maximum aggregation interval times. Changing only aggregation interval while sampling interval is kept makes the maximum level of access frequency in each snapshot, or discernment of regions inconsistent. Also, unnecessarily short sampling interval causes meaningless monitoring overhed. The automation therefore adjusts the sampling interval together with aggregation interval, while keeping the ratio between the two intervals. Users can set the ratio, or the discernment. Discussion ========== The modified question (aimed amount of access events, or lights, in each snapshot) is easy to answer by both the users and the kernel. If users are interested in finding more cold regions, the value should be lower, and vice versa. If users have no idea, kernel can suggest a fair default value based on some theories and experiments. For example, based on the Pareto principle (80/20 rule), we could expect 20% target ratio will capture 80% of real access events. Since 80% might be too high, applying the rule once again, 4% (20% * 20%) may capture about 56% (80% * 80%) of real access events. Sampling to aggregation intervals ratio and min/max aggregation intervals are also arguably easy to answer. What users want is discernment of regions for efficient system operation, for examples, X amount of colder regions or Y amount of warmer regions, not exactly how many times each cache line is accessed in nanoseconds degree. The appropriate min/max aggregation interval can relatively naively set, and may better to set for aimed monitoring overhead. Since sampling interval is directly deciding the overhead, setting it based on the sampling interval can be easy. With my experiences, I'd argue the intervals ratio 0.05, and 5 milliseconds to 20 seconds sampling interval range (100 milliseconds to 400 seconds aggregation interval) can be a good default suggestion. Evaluation ========== On a machine running a real world server workload, I ran DAMON to monitor its physical address space for about 23 hours, with this feature turned on. We set it to tune sampling interval in a range from 5 milliseconds to 10 seconds, aiming 4 % DAMON-observed access ratio per three aggregation intervals. The exact command I used is as below. damo start --monitoring_intervals_goal 4% 3 5ms 10s --damos_action stat During the test run, DAMON continuously updated sampling and aggregation intervals as designed, within the given range. For all the time, DAMON was able to find the intervals that meets the target access events ratio in the given intervals range (sampling interval between 5 milliseconds and 10 seconds). For most of the time, tuned sampling interval was converged in 300-400 milliseconds. It made only small amount of changes within the range. The average of the tuned sampling interval during the test was about 380 milliseconds. The workload periodically gets less load and decreases its CPU usage. Presumably this also caused it making less memory access events. Reactively to such event,s DAMON also increased the intervals as expected. It was still able to find the optimum interval that satisfying the target access ratio within the given intervals range. Usually it was converged to about 5 seconds. Once the workload gets normal amount of load again, DAMON reactively reduced the intervals to the normal range. I collected and visualized DAMON's monitoring results on the server a few times. Every time the visualized access pattern looked not biased to only cold or hot pages but diverse and balanced. Let me show some of the snapshots that I collected at the nearly end of the test (after about 23 hours have passed since starting DAMON on the server). The recency histogram looks as below. Please note that this visualization shows only a very coarse grained information. For more details about the visualization format, please refer to DAMON user-space tool documentation[1]. # ./damo report access --style recency-sz-hist --tried_regions_of 0 0 0 --access_rate 0 0 [-19 h 7 m 45.514 s, -17 h 12 m 58.963 s) 6.198 GiB |**** | [-17 h 12 m 58.963 s, -15 h 18 m 12.412 s) 0 B | | [-15 h 18 m 12.412 s, -13 h 23 m 25.860 s) 0 B | | [-13 h 23 m 25.860 s, -11 h 28 m 39.309 s) 0 B | | [-11 h 28 m 39.309 s, -9 h 33 m 52.757 s) 0 B | | [-9 h 33 m 52.757 s, -7 h 39 m 6.206 s) 0 B | | [-7 h 39 m 6.206 s, -5 h 44 m 19.654 s) 0 B | | [-5 h 44 m 19.654 s, -3 h 49 m 33.103 s) 0 B | | [-3 h 49 m 33.103 s, -1 h 54 m 46.551 s) 0 B | | [-1 h 54 m 46.551 s, -0 ns) 16.967 GiB |********* | [-0 ns, --6886551440000 ns) 38.835 GiB |********************| memory bw estimate: 9.425 GiB per second total size: 62.000 GiB It shows about 38 GiB of memory was accessed at least once within last aggregation interval (given ~300 milliseconds tuned sampling interval, this is about six seconds). This is about 61 % of the total memory. In other words, DAMON found warmest 61 % memory of the system. The number is particularly interesting given our Pareto principle based theory for the tuning goal value. We set it as 20 % of 20 % (4 %), thinking it would capture 80 % of 80 % (64 %) real access events. And it foudn 61 % hot memory, or working set. Nevertheless, to make the theory clearer, much more discussion and tests would be needed. At the moment, nonetheless, we can say making the target value higher helps finding more hot memory regions. The histogram also shows an amount of cold memory. About 17 GiB memory of the system has not accessed at least for last aggregation interval (about six seconds), and at most for about last two hours. The real longest unaccessed time of the 17 GiB memory was about 19 minutes, though. This is a limitation of this visualization format. It further found very cold 6 GiB memory. It has not accessed at least for last 17 hours and at most 19 hours. What about hot memory distribution? To see this, I capture and visualize the snapshot in access temperature histogram. Again, please refer to the DAMON user-space tool documentation[1] for the format and what access temperature mean. Both the visualization and metric shows only very coarse grained and limited information. The resulting histogram look like below. # ./damo report access --style temperature-sz-hist --tried_regions_of 0 0 0 [-6,840,763,776,000, -5,501,580,939,800) 6.198 GiB |*** | [-5,501,580,939,800, -4,162,398,103,600) 0 B | | [-4,162,398,103,600, -2,823,215,267,400) 0 B | | [-2,823,215,267,400, -1,484,032,431,200) 0 B | | [-1,484,032,431,200, -144,849,595,000) 0 B | | [-144,849,595,000, 1,194,333,241,200) 55.802 GiB |********************| [1,194,333,241,200, 2,533,516,077,400) 4.000 KiB |* | [2,533,516,077,400, 3,872,698,913,600) 4.000 KiB |* | [3,872,698,913,600, 5,211,881,749,800) 8.000 KiB |* | [5,211,881,749,800, 6,551,064,586,000) 12.000 KiB |* | [6,551,064,586,000, 7,890,247,422,200) 4.000 KiB |* | memory bw estimate: 5.178 GiB per second total size: 62.000 GiB We can see most of the memory is in similar access temperature range, and definitely some pages are extremely hot. To see the picture in more detail, let's capture and visualize the snapshot per DAMON-region, sorted by their access temperature. The total number of the regions was about 300. Due to the limited space, I'm showing only a few parts of the output here. # ./damo report access --style hot --tried_regions_of 0 0 0 heatmap: 00000000888888889999999888888888888888888888888888888888888888888888888888888888 # min/max temperatures: -6,827,258,184,000, 17,589,052,500, column size: 793.600 MiB |999999999999999999999999999999999999999| 4.000 KiB access 100 % 18 h 9 m 43.918 s |999999999999999999999999999999999999999| 8.000 KiB access 100 % 17 h 56 m 5.351 s |999999999999999999999999999999999999999| 4.000 KiB access 100 % 15 h 24 m 19.634 s |999999999999999999999999999999999999999| 4.000 KiB access 100 % 14 h 10 m 55.606 s |999999999999999999999999999999999999999| 4.000 KiB access 100 % 11 h 34 m 18.993 s [...] |99999999999999999999999999999| 8.000 KiB access 100 % 1 m 27.945 s |11111111111111111111111111111| 80.000 KiB access 15 % 1 m 21.180 s |00000000000000000000000000000| 24.000 KiB access 5 % 1 m 21.180 s |00000000000000000000000000000| 5.919 GiB access 10 % 1 m 14.415 s |99999999999999999999999999999| 12.000 KiB access 100 % 1 m 7.650 s [...] |0| 4.000 KiB access 5 % 0 ns |0| 12.000 KiB access 5 % 0 ns |0| 188.000 KiB access 0 % 0 ns |0| 24.000 KiB access 0 % 0 ns |0| 48.000 KiB access 0 % 0 ns [...] |0000000000000000000000000000000| 8.000 KiB access 0 % 6 m 45.901 s |00000000000000000000000000000000| 36.000 KiB access 0 % 7 m 26.491 s |00000000000000000000000000000000| 4.000 KiB access 0 % 12 m 37.682 s |000000000000000000000000000000000| 8.000 KiB access 0 % 18 m 9.168 s |000000000000000000000000000000000| 16.000 KiB access 0 % 19 m 3.288 s |0000000000000000000000000000000000000000| 6.198 GiB access 0 % 18 h 57 m 52.582 s memory bw estimate: 8.798 GiB per second total size: 62.000 GiB We can see DAMON found small and extremely hot regions that accessed for all access check sampling (once per about 300 milliseconds) for more than 10 hours. The access temperature rapidly decreases. DAMON was also able to find small and big regions that not accessed for up to about 19 minutes. It even found an outlier cold region of 6 GiB that not accessed for about 19 hours. It is unclear what the outlier region is, as of this writing. For the testing, DAMON was consuming about 0.1% of single CPU time. This is again expected results, since DAMON was using about 370 milliseconds sampling interval in most case. # ps -p $kdamond_pid -o %cpu %CPU 0.1 I also ran similar tests against kernel build workload and an in-memory cache workload benchmark[2]. Detialed results including tuned intervals and captured access pattern were of course different sicne those depend on the workloads. But the auto-tuning feature was always working as expected like the above results for the real world workload. To wrap up, with intervals auto-tuning feature, DAMON was able to capture access pattern snapshots of a quality on a real world server workload. The auto-tuning feature was able to adaptively react to the dynamic access patterns of the workload and reliably provide consistent monitoring results without manual human interventions. Also, the auto-tuning made DAMON consumes only necessary amount of resource for the required quality. References ========== [1] https://github.com/damonitor/damo/blob/next/USAGE.md#access-report-styles [2] https://github.com/facebookresearch/DCPerf/blob/main/packages/tao_bench/README.md This patch (of 8): Add data structures for DAMON sampling and aggregation intervals automatic tuning that aims specific amount of DAMON-observed access events per snapshot. In more detail, define the data structure for the tuning goal, link it to the monitoring attributes data structure so that DAMON kernel API callers can make the request, and update parameters setup DAMON function to respect the new parameter. Link: https://lkml.kernel.org/r/20250303221726.484227-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250303221726.484227-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 27 +++++++++++++++++++++++++++ mm/damon/core.c | 22 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 242910b190c9..5f2609f24761 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -659,12 +659,38 @@ struct damon_call_control { bool canceled; }; +/** + * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. + * + * @access_bp: Access events observation ratio to achieve in bp. + * @aggrs: Number of aggregations to acheive @access_bp within. + * @min_sample_us: Minimum resulting sampling interval in microseconds. + * @max_sample_us: Maximum resulting sampling interval in microseconds. + * + * DAMON automatically tunes &damon_attrs->sample_interval and + * &damon_attrs->aggr_interval aiming the ratio in bp (1/10,000) of + * DAMON-observed access events to theoretical maximum amount within @aggrs + * aggregations be same to @access_bp. The logic increases + * &damon_attrs->aggr_interval and &damon_attrs->sampling_interval in same + * ratio if the current access events observation ratio is lower than the + * target for each @aggrs aggregations, and vice versa. + * + * If @aggrs is zero, the tuning is disabled and hence this struct is ignored. + */ +struct damon_intervals_goal { + unsigned long access_bp; + unsigned long aggrs; + unsigned long min_sample_us; + unsigned long max_sample_us; +}; + /** * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. * @ops_update_interval: The time between monitoring operations updates. + * @intervals_goal: Intervals auto-tuning goal. * @min_nr_regions: The minimum number of adaptive monitoring * regions. * @max_nr_regions: The maximum number of adaptive monitoring @@ -684,6 +710,7 @@ struct damon_attrs { unsigned long sample_interval; unsigned long aggr_interval; unsigned long ops_update_interval; + struct damon_intervals_goal intervals_goal; unsigned long min_nr_regions; unsigned long max_nr_regions; }; diff --git a/mm/damon/core.c b/mm/damon/core.c index b1ce072b56f2..ad3b5c065cb8 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -615,6 +615,25 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, r, old_attrs, new_attrs); } +/* + * damon_valid_intervals_goal() - return if the intervals goal of @attrs is + * valid. + */ +static bool damon_valid_intervals_goal(struct damon_attrs *attrs) +{ + struct damon_intervals_goal *goal = &attrs->intervals_goal; + + /* tuning is disabled */ + if (!goal->aggrs) + return true; + if (goal->min_sample_us > goal->max_sample_us) + return false; + if (attrs->sample_interval < goal->min_sample_us || + goal->max_sample_us < attrs->sample_interval) + return false; + return true; +} + /** * damon_set_attrs() - Set attributes for the monitoring. * @ctx: monitoring context @@ -635,6 +654,9 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) attrs->sample_interval : 1; struct damos *s; + if (!damon_valid_intervals_goal(attrs)) + return -EINVAL; + if (attrs->min_nr_regions < 3) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) From f04b0fedbe714f822bd066b319a60faa39a985a1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:20 -0800 Subject: [PATCH 1095/2157] mm/damon/core: implement intervals auto-tuning Implement the DAMON sampling and aggregation intervals auto-tuning mechanism as briefly described on 'struct damon_intervals_goal'. The core part for deciding the direction and amount of the changes is implemented reusing the feedback loop function which is being used for DAMOS quotas auto-tuning. Unlike the DAMOS quotas auto-tuning use case, limit the maximum decreasing amount after the adjustment to 50% of the current value, though. This is because the intervals have no good merits at rapid reductions since it could unnecessarily increase the monitoring overhead. Link: https://lkml.kernel.org/r/20250303221726.484227-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 16 +++++++++ mm/damon/core.c | 76 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 5f2609f24761..b3e2c793c1f4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -713,6 +713,17 @@ struct damon_attrs { struct damon_intervals_goal intervals_goal; unsigned long min_nr_regions; unsigned long max_nr_regions; +/* private: internal use only */ + /* + * @aggr_interval to @sample_interval ratio. + * Core-external components call damon_set_attrs() with &damon_attrs + * that this field is unset. In the case, damon_set_attrs() sets this + * field of resulting &damon_attrs. Core-internal components such as + * kdamond_tune_intervals() calls damon_set_attrs() with &damon_attrs + * that this field is set. In the case, damon_set_attrs() just keep + * it. + */ + unsigned long aggr_samples; }; /** @@ -761,6 +772,11 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* + * number of sample intervals that should be passed before next + * intervals tuning + */ + unsigned long next_intervals_tune_sis; /* for waiting until the execution of the kdamond_fn is started */ struct completion kdamond_started; /* for scheme quotas prioritization */ diff --git a/mm/damon/core.c b/mm/damon/core.c index ad3b5c065cb8..9d37d3664030 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -664,6 +664,10 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) if (attrs->sample_interval > attrs->aggr_interval) return -EINVAL; + /* calls from core-external doesn't set this. */ + if (!attrs->aggr_samples) + attrs->aggr_samples = attrs->aggr_interval / sample_interval; + ctx->next_aggregation_sis = ctx->passed_sample_intervals + attrs->aggr_interval / sample_interval; ctx->next_ops_update_sis = ctx->passed_sample_intervals + @@ -1301,6 +1305,65 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) } } +static unsigned long damon_get_intervals_score(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz_region, max_access_events = 0, access_events = 0; + unsigned long target_access_events; + unsigned long goal_bp = c->attrs.intervals_goal.access_bp; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + sz_region = damon_sz_region(r); + max_access_events += sz_region * c->attrs.aggr_samples; + access_events += sz_region * r->nr_accesses; + } + } + target_access_events = max_access_events * goal_bp / 10000; + return access_events * 10000 / target_access_events; +} + +static unsigned long damon_feed_loop_next_input(unsigned long last_input, + unsigned long score); + +static unsigned long damon_get_intervals_adaptation_bp(struct damon_ctx *c) +{ + unsigned long score_bp, adaptation_bp; + + score_bp = damon_get_intervals_score(c); + adaptation_bp = damon_feed_loop_next_input(100000000, score_bp) / + 10000; + /* + * adaptaion_bp ranges from 1 to 20,000. Avoid too rapid reduction of + * the intervals by rescaling [1,10,000] to [5000, 10,000]. + */ + if (adaptation_bp <= 10000) + adaptation_bp = 5000 + adaptation_bp / 2; + return adaptation_bp; +} + +static void kdamond_tune_intervals(struct damon_ctx *c) +{ + unsigned long adaptation_bp; + struct damon_attrs new_attrs; + struct damon_intervals_goal *goal; + + adaptation_bp = damon_get_intervals_adaptation_bp(c); + if (adaptation_bp == 10000) + return; + + new_attrs = c->attrs; + goal = &c->attrs.intervals_goal; + new_attrs.sample_interval = min(goal->max_sample_us, + c->attrs.sample_interval * adaptation_bp / 10000); + new_attrs.sample_interval = max(goal->min_sample_us, + new_attrs.sample_interval); + new_attrs.aggr_interval = new_attrs.sample_interval * + c->attrs.aggr_samples; + damon_set_attrs(c, &new_attrs); +} + static void damon_split_region_at(struct damon_target *t, struct damon_region *r, unsigned long sz_r); @@ -2209,6 +2272,8 @@ static void kdamond_init_intervals_sis(struct damon_ctx *ctx) ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / sample_interval; + ctx->next_intervals_tune_sis = ctx->next_aggregation_sis * + ctx->attrs.intervals_goal.aggrs; damon_for_each_scheme(scheme, ctx) { apply_interval = scheme->apply_interval_us ? @@ -2293,6 +2358,17 @@ static int kdamond_fn(void *data) sample_interval = ctx->attrs.sample_interval ? ctx->attrs.sample_interval : 1; if (ctx->passed_sample_intervals >= next_aggregation_sis) { + if (ctx->attrs.intervals_goal.aggrs && + ctx->passed_sample_intervals >= + ctx->next_intervals_tune_sis) { + ctx->next_intervals_tune_sis += + ctx->attrs.aggr_samples * + ctx->attrs.intervals_goal.aggrs; + kdamond_tune_intervals(ctx); + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + } ctx->next_aggregation_sis = next_aggregation_sis + ctx->attrs.aggr_interval / sample_interval; From 8fbbcbeaafeb82498fd83f58c1e5ad1aff135212 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:21 -0800 Subject: [PATCH 1096/2157] mm/damon/sysfs: implement intervals tuning goal directory Implement DAMON sysfs interface directory and its files for setting DAMON sampling and aggregation intervals auto-tuning goal. Link: https://lkml.kernel.org/r/20250303221726.484227-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index deeab04d3b46..a772060300b4 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -408,6 +408,164 @@ static const struct kobj_type damon_sysfs_targets_ktype = { .default_groups = damon_sysfs_targets_groups, }; +/* + * intervals goal directory + */ + +struct damon_sysfs_intervals_goal { + struct kobject kobj; + unsigned long access_bp; + unsigned long aggrs; + unsigned long min_sample_us; + unsigned long max_sample_us; +}; + +static struct damon_sysfs_intervals_goal *damon_sysfs_intervals_goal_alloc( + unsigned long access_bp, unsigned long aggrs, + unsigned long min_sample_us, unsigned long max_sample_us) +{ + struct damon_sysfs_intervals_goal *goal = kmalloc(sizeof(*goal), + GFP_KERNEL); + + if (!goal) + return NULL; + + goal->kobj = (struct kobject){}; + goal->access_bp = access_bp; + goal->aggrs = aggrs; + goal->min_sample_us = min_sample_us; + goal->max_sample_us = max_sample_us; + return goal; +} + +static ssize_t access_bp_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->access_bp); +} + +static ssize_t access_bp_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->access_bp = nr; + return count; +} + +static ssize_t aggrs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->aggrs); +} + +static ssize_t aggrs_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->aggrs = nr; + return count; +} + +static ssize_t min_sample_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->min_sample_us); +} + +static ssize_t min_sample_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->min_sample_us = nr; + return count; +} + +static ssize_t max_sample_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->max_sample_us); +} + +static ssize_t max_sample_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->max_sample_us = nr; + return count; +} + +static void damon_sysfs_intervals_goal_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_intervals_goal, kobj)); +} + +static struct kobj_attribute damon_sysfs_intervals_goal_access_bp_attr = + __ATTR_RW_MODE(access_bp, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_aggrs_attr = + __ATTR_RW_MODE(aggrs, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_min_sample_us_attr = + __ATTR_RW_MODE(min_sample_us, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_max_sample_us_attr = + __ATTR_RW_MODE(max_sample_us, 0600); + +static struct attribute *damon_sysfs_intervals_goal_attrs[] = { + &damon_sysfs_intervals_goal_access_bp_attr.attr, + &damon_sysfs_intervals_goal_aggrs_attr.attr, + &damon_sysfs_intervals_goal_min_sample_us_attr.attr, + &damon_sysfs_intervals_goal_max_sample_us_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_intervals_goal); + +static const struct kobj_type damon_sysfs_intervals_goal_ktype = { + .release = damon_sysfs_intervals_goal_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_intervals_goal_groups, +}; + /* * intervals directory */ @@ -417,6 +575,7 @@ struct damon_sysfs_intervals { unsigned long sample_us; unsigned long aggr_us; unsigned long update_us; + struct damon_sysfs_intervals_goal *intervals_goal; }; static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc( @@ -436,6 +595,32 @@ static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc( return intervals; } +static int damon_sysfs_intervals_add_dirs(struct damon_sysfs_intervals *intervals) +{ + struct damon_sysfs_intervals_goal *goal; + int err; + + goal = damon_sysfs_intervals_goal_alloc(0, 0, 0, 0); + if (!goal) + return -ENOMEM; + + err = kobject_init_and_add(&goal->kobj, + &damon_sysfs_intervals_goal_ktype, &intervals->kobj, + "intervals_goal"); + if (err) { + kobject_put(&goal->kobj); + intervals->intervals_goal = NULL; + return err; + } + intervals->intervals_goal = goal; + return 0; +} + +static void damon_sysfs_intervals_rm_dirs(struct damon_sysfs_intervals *intervals) +{ + kobject_put(&intervals->intervals_goal->kobj); +} + static ssize_t sample_us_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -569,6 +754,9 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) err = kobject_init_and_add(&intervals->kobj, &damon_sysfs_intervals_ktype, &attrs->kobj, "intervals"); + if (err) + goto put_intervals_out; + err = damon_sysfs_intervals_add_dirs(intervals); if (err) goto put_intervals_out; attrs->intervals = intervals; @@ -599,6 +787,7 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs) { kobject_put(&attrs->nr_regions_range->kobj); + damon_sysfs_intervals_rm_dirs(attrs->intervals); kobject_put(&attrs->intervals->kobj); } From 0622c68d0a51f1268f3f9a171f4969c1bfc07c05 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:22 -0800 Subject: [PATCH 1097/2157] mm/damon/sysfs: commit intervals tuning goal Connect DAMON sysfs interface for sampling and aggregation intervals auto-tuning with DAMON core API, so that users can really use the feature using the sysfs files. Link: https://lkml.kernel.org/r/20250303221726.484227-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index a772060300b4..fa5f004f0670 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1273,11 +1273,18 @@ static int damon_sysfs_set_attrs(struct damon_ctx *ctx, struct damon_sysfs_attrs *sys_attrs) { struct damon_sysfs_intervals *sys_intervals = sys_attrs->intervals; + struct damon_sysfs_intervals_goal *sys_goal = + sys_intervals->intervals_goal; struct damon_sysfs_ul_range *sys_nr_regions = sys_attrs->nr_regions_range; struct damon_attrs attrs = { .sample_interval = sys_intervals->sample_us, .aggr_interval = sys_intervals->aggr_us, + .intervals_goal = { + .access_bp = sys_goal->access_bp, + .aggrs = sys_goal->aggrs, + .min_sample_us = sys_goal->min_sample_us, + .max_sample_us = sys_goal->max_sample_us}, .ops_update_interval = sys_intervals->update_us, .min_nr_regions = sys_nr_regions->min, .max_nr_regions = sys_nr_regions->max, From 1077605396b4da993327ebe40eabc28478e2be94 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:23 -0800 Subject: [PATCH 1098/2157] mm/damon/sysfs: implement a command to update auto-tuned monitoring intervals DAMON kernel API callers can show auto-tuned sampling and aggregation intervals from the monmitoring attributes data structure. That can be useful for debugging or tuning of the feature. DAMON user-space ABI users has no way to see that, though. Implement a new DAMON sysfs interface command, namely 'update_tuned_intervals', for the purpose. If the command is written to the kdamond state file, the tuned sampling and aggregation intervals will be updated to the corresponding sysfs interface files. Link: https://lkml.kernel.org/r/20250303221726.484227-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index fa5f004f0670..ccd435d234b9 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1213,6 +1213,11 @@ enum damon_sysfs_cmd { * effective size quota of the scheme in bytes. */ DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS, + /* + * @DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS: Update the tuned monitoring + * intevals. + */ + DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS, /* * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands. */ @@ -1230,6 +1235,7 @@ static const char * const damon_sysfs_cmd_strs[] = { "update_schemes_tried_regions", "clear_schemes_tried_regions", "update_schemes_effective_quotas", + "update_tuned_intervals", }; /* @@ -1502,6 +1508,17 @@ static int damon_sysfs_upd_schemes_effective_quotas(void *data) return 0; } +static int damon_sysfs_upd_tuned_intervals(void *data) +{ + struct damon_sysfs_kdamond *kdamond = data; + struct damon_ctx *ctx = kdamond->damon_ctx; + + kdamond->contexts->contexts_arr[0]->attrs->intervals->sample_us = + ctx->attrs.sample_interval; + kdamond->contexts->contexts_arr[0]->attrs->intervals->aggr_us = + ctx->attrs.aggr_interval; + return 0; +} /* * damon_sysfs_cmd_request_callback() - DAMON callback for handling requests. @@ -1723,6 +1740,9 @@ static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, return damon_sysfs_damon_call( damon_sysfs_upd_schemes_effective_quotas, kdamond); + case DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS: + return damon_sysfs_damon_call( + damon_sysfs_upd_tuned_intervals, kdamond); default: break; } From af03edb521f1ea5f66a2fa7cd3e4af7d9a1984e2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:24 -0800 Subject: [PATCH 1099/2157] Docs/mm/damon/design: document for intervals auto-tuning Document the design of DAMON sampling and aggregation intervals auto-tuning. [sj@kernel.org: fix a typo on 'intervals auto-tuning' section] Link: https://lkml.kernel.org/r/20250305182744.56125-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250303221726.484227-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 5af991551a86..5a8c1752dc8a 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -313,6 +313,10 @@ sufficient for the given purpose, it shouldn't be unnecessarily further lowered. It is recommended to be set proportional to ``aggregation interval``. By default, the ratio is set as ``1/20``, and it is still recommended. +Based on the manual tuning guide, DAMON provides more intuitive knob-based +intervals auto tuning mechanism. Please refer to :ref:`the design document of +the feature ` for detail. + Refer to below documents for an example tuning based on the above guide. .. toctree:: @@ -321,6 +325,48 @@ Refer to below documents for an example tuning based on the above guide. monitoring_intervals_tuning_example +.. _damon_design_monitoring_intervals_autotuning: + +Monitoring Intervals Auto-tuning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DAMON provides automatic tuning of the ``sampling interval`` and ``aggregation +interval`` based on the :ref:`the tuning guide idea +`. The tuning mechanism allows +users to set the aimed amount of access events to observe via DAMON within +given time interval. The target can be specified by the user as a ratio of +DAMON-observed access events to the theoretical maximum amount of the events +(``access_bp``) that measured within a given number of aggregations +(``aggrs``). + +The DAMON-observed access events are calculated in byte granularity based on +DAMON :ref:`region assumption `. For +example, if a region of size ``X`` bytes of ``Y`` ``nr_accesses`` is found, it +means ``X * Y`` access events are observed by DAMON. Theoretical maximum +access events for the region is calculated in same way, but replacing ``Y`` +with theoretical maximum ``nr_accesses``, which can be calculated as +``aggregation interval / sampling interval``. + +The mechanism calculates the ratio of access events for ``aggrs`` aggregations, +and increases or decrease the ``sampleing interval`` and ``aggregation +interval`` in same ratio, if the observed access ratio is lower or higher than +the target, respectively. The ratio of the intervals change is decided in +proportion to the distance between current samples ratio and the target ratio. + +The user can further set the minimum and maximum ``sampling interval`` that can +be set by the tuning mechanism using two parameters (``min_sample_us`` and +``max_sample_us``). Because the tuning mechanism changes ``sampling interval`` +and ``aggregation interval`` in same ratio always, the minimum and maximum +``aggregation interval`` after each of the tuning changes can automatically set +together. + +The tuning is turned off by default, and need to be set explicitly by the user. +As a rule of thumbs and the Parreto principle, 4% access samples ratio target +is recommended. Note that Parreto principle (80/20 rule) has applied twice. +That is, assumes 4% (20% of 20%) DAMON-observed access events ratio (source) +to capture 64% (80% multipled by 80%) real access events (outcomes). + + .. _damon_design_damos: Operation Schemes From e2b23dc62369b76b68d8354f12baeaff14b6e24f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:25 -0800 Subject: [PATCH 1100/2157] Docs/ABI/damon: document intervals auto-tuning ABI Document the DAMON user-space ABI for DAMON sampling and aggregation intervals auto-tuning. Link: https://lkml.kernel.org/r/20250303221726.484227-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- .../ABI/testing/sysfs-kernel-mm-damon | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index ccd13ca668c8..76da77d7f7b6 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -91,6 +91,36 @@ Description: Writing a value to this file sets the update interval of the DAMON context in microseconds as the value. Reading this file returns the value. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/access_bp +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the monitoring intervals + auto-tuning target DAMON-observed access events ratio within + the given time interval (aggrs in same directory), in bp + (1/10,000). Reading this file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/aggrs +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the time interval to achieve + the monitoring intervals auto-tuning target DAMON-observed + access events ratio (access_bp in same directory) within. + Reading this file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/min_sample_us +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the minimum value of + auto-tuned sampling interval in microseconds. Reading this + file returns the value. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/intervals/intrvals_goal/max_sample_us +Date: Feb 2025 +Contact: SeongJae Park +Description: Writing a value to this file sets the maximum value of + auto-tuned sampling interval in microseconds. Reading this + file returns the value. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//monitoring_attrs/nr_regions/min WDate: Mar 2022 From b243d666d1079587daa3f41fffdabbabad8dd075 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 3 Mar 2025 14:17:26 -0800 Subject: [PATCH 1101/2157] Docs/admin-guide/mm/damon/usage: add intervals_goal directory on the hierarchy Document DAMON sysfs interface usage for DAMON sampling and aggregation intervals auto-tuning. Link: https://lkml.kernel.org/r/20250303221726.484227-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 25 ++++++++++++++++++++ Documentation/mm/damon/design.rst | 4 ++++ 2 files changed, 29 insertions(+) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index dc37bba96273..de549dd18107 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -64,6 +64,7 @@ comma (","). │ │ │ │ :ref:`0 `/avail_operations,operations │ │ │ │ │ :ref:`monitoring_attrs `/ │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us + │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us │ │ │ │ │ │ nr_regions/min,max │ │ │ │ │ :ref:`targets `/nr_targets │ │ │ │ │ │ :ref:`0 `/pid_target @@ -132,6 +133,11 @@ Users can write below commands for the kdamond to the ``state`` file. - ``off``: Stop running. - ``commit``: Read the user inputs in the sysfs files except ``state`` file again. +- ``update_tuned_intervals``: Update the contents of ``sample_us`` and + ``aggr_us`` files of the kdamond with the auto-tuning applied ``sampling + interval`` and ``aggregation interval`` for the files. Please refer to + :ref:`intervals_goal section ` + for more details. - ``commit_schemes_quota_goals``: Read the DAMON-based operation schemes' :ref:`quota goals `. - ``update_schemes_stats``: Update the contents of stats files for each @@ -213,6 +219,25 @@ writing to and rading from the files. For more details about the intervals and monitoring regions range, please refer to the Design document (:doc:`/mm/damon/design`). +.. _damon_usage_sysfs_monitoring_intervals_goal: + +contexts//monitoring_attrs/intervals/intervals_goal/ +------------------------------------------------------- + +Under the ``intervals`` directory, one directory for automated tuning of +``sample_us`` and ``aggr_us``, namely ``intervals_goal`` directory also exists. +Under the directory, four files for the auto-tuning control, namely +``access_bp``, ``aggrs``, ``min_sample_us`` and ``max_sample_us`` exist. +Please refer to the :ref:`design document of the feature +` for the internal of the tuning +mechanism. Reading and writing the four files under ``intervals_goal`` +directory shows and updates the tuning parameters that described in the +:ref:design doc ` with the same +names. The tuning starts with the user-set ``sample_us`` and ``aggr_us``. The +tuning-applied current values of the two intervals can be read from the +``sample_us`` and ``aggr_us`` files after writing ``update_tuned_intervals`` to +the ``state`` file. + .. _sysfs_targets: contexts//targets/ diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 5a8c1752dc8a..e6fd3b604e70 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -366,6 +366,10 @@ is recommended. Note that Parreto principle (80/20 rule) has applied twice. That is, assumes 4% (20% of 20%) DAMON-observed access events ratio (source) to capture 64% (80% multipled by 80%) real access events (outcomes). +To know how user-space can use this feature via :ref:`DAMON sysfs interface +`, refer to :ref:`intervals_goal ` part of +the documentation. + .. _damon_design_damos: From 691ee97e1a9de0cdb3efb893c1f180e3f4a35e32 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 3 Mar 2025 14:15:35 +0000 Subject: [PATCH 1102/2157] mm: fix lazy mmu docs and usage Patch series "Fix lazy mmu mode", v2. I'm planning to implement lazy mmu mode for arm64 to optimize vmalloc. As part of that, I will extend lazy mmu mode to cover kernel mappings in vmalloc table walkers. While lazy mmu mode is already used for kernel mappings in a few places, this will extend it's use significantly. Having reviewed the existing lazy mmu implementations in powerpc, sparc and x86, it looks like there are a bunch of bugs, some of which may be more likely to trigger once I extend the use of lazy mmu. So this series attempts to clarify the requirements and fix all the bugs in advance of that series. See patch #1 commit log for all the details. This patch (of 5): The docs, implementations and use of arch_[enter|leave]_lazy_mmu_mode() is a bit of a mess (to put it politely). There are a number of issues related to nesting of lazy mmu regions and confusion over whether the task, when in a lazy mmu region, is preemptible or not. Fix all the issues relating to the core-mm. Follow up commits will fix the arch-specific implementations. 3 arches implement lazy mmu; powerpc, sparc and x86. When arch_[enter|leave]_lazy_mmu_mode() was first introduced by commit 6606c3e0da53 ("[PATCH] paravirt: lazy mmu mode hooks.patch"), it was expected that lazy mmu regions would never nest and that the appropriate page table lock(s) would be held while in the region, thus ensuring the region is non-preemptible. Additionally lazy mmu regions were only used during manipulation of user mappings. Commit 38e0edb15bd0 ("mm/apply_to_range: call pte function with lazy updates") started invoking the lazy mmu mode in apply_to_pte_range(), which is used for both user and kernel mappings. For kernel mappings the region is no longer protected by any lock so there is no longer any guarantee about non-preemptibility. Additionally, for RT configs, the holding the PTL only implies no CPU migration, it doesn't prevent preemption. Commit bcc6cc832573 ("mm: add default definition of set_ptes()") added arch_[enter|leave]_lazy_mmu_mode() to the default implementation of set_ptes(), used by x86. So after this commit, lazy mmu regions can be nested. Additionally commit 1a10a44dfc1d ("sparc64: implement the new page table range API") and commit 9fee28baa601 ("powerpc: implement the new page table range API") did the same for the sparc and powerpc set_ptes() overrides. powerpc couldn't deal with preemption so avoids it in commit b9ef323ea168 ("powerpc/64s: Disable preemption in hash lazy mmu mode"), which explicitly disables preemption for the whole region in its implementation. x86 can support preemption (or at least it could until it tried to add support nesting; more on this below). Sparc looks to be totally broken in the face of preemption, as far as I can tell. powerpc can't deal with nesting, so avoids it in commit 47b8def9358c ("powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes"), which removes the lazy mmu calls from its implementation of set_ptes(). x86 attempted to support nesting in commit 49147beb0ccb ("x86/xen: allow nesting of same lazy mode") but as far as I can tell, this breaks its support for preemption. In short, it's all a mess; the semantics for arch_[enter|leave]_lazy_mmu_mode() are not clearly defined and as a result the implementations all have different expectations, sticking plasters and bugs. arm64 is aiming to start using these hooks, so let's clean everything up before adding an arm64 implementation. Update the documentation to state that lazy mmu regions can never be nested, must not be called in interrupt context and preemption may or may not be enabled for the duration of the region. And fix the generic implementation of set_ptes() to avoid nesting. arch-specific fixes to conform to the new spec will proceed this one. These issues were spotted by code review and I have no evidence of issues being reported in the wild. Link: https://lkml.kernel.org/r/20250303141542.3371656-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20250303141542.3371656-2-ryan.roberts@arm.com Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Acked-by: Juergen Gross Cc: Andreas Larsson Cc: Borislav Betkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juegren Gross Cc: Matthew Wilcow (Oracle) Cc: Thomas Gleinxer Cc: Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 94d267d02372..787c632ee2c9 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -222,10 +222,14 @@ static inline int pmd_dirty(pmd_t pmd) * hazard could result in the direct mode hypervisor case, since the actual * write to the page tables may not yet have taken place, so reads though * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. + * up to date. + * + * In the general case, no lock is guaranteed to be held between entry and exit + * of the lazy mode. So the implementation must assume preemption may be enabled + * and cpu migration is possible; it must take steps to be robust against this. + * (In practice, for user PTE updates, the appropriate page table lock(s) are + * held, but for kernel PTE updates, no lock is held). Nesting is not permitted + * and the mode cannot be used in interrupt context. */ #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE #define arch_enter_lazy_mmu_mode() do {} while (0) @@ -287,7 +291,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, { page_table_check_ptes_set(mm, ptep, pte, nr); - arch_enter_lazy_mmu_mode(); for (;;) { set_pte(ptep, pte); if (--nr == 0) @@ -295,7 +298,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, ptep++; pte = pte_next_pfn(pte); } - arch_leave_lazy_mmu_mode(); } #endif #define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) From ad449d856bd7e7461ac740abb9b5d10a824e0166 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 3 Mar 2025 14:15:36 +0000 Subject: [PATCH 1103/2157] fs/proc/task_mmu: reduce scope of lazy mmu region Update the way arch_[enter|leave]_lazy_mmu_mode() is called in pagemap_scan_pmd_entry() to follow the normal pattern of holding the ptl for user space mappings. As a result the scope is reduced to only the pte table, but that's where most of the performance win is. While I believe there wasn't technically a bug here, the original scope made it easier to accidentally nest or, worse, accidentally call something like kmap() which would expect an immediate mode pte modification but it would end up deferred. Link: https://lkml.kernel.org/r/20250303141542.3371656-3-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Acked-by: Juergen Gross Cc: Andreas Larsson Cc: Borislav Betkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juegren Gross Cc: Matthew Wilcow (Oracle) Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c17615e21a5d..b0f189815512 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2459,22 +2459,19 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, spinlock_t *ptl; int ret; - arch_enter_lazy_mmu_mode(); - ret = pagemap_scan_thp_entry(pmd, start, end, walk); - if (ret != -ENOENT) { - arch_leave_lazy_mmu_mode(); + if (ret != -ENOENT) return ret; - } ret = 0; start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); if (!pte) { - arch_leave_lazy_mmu_mode(); walk->action = ACTION_AGAIN; return 0; } + arch_enter_lazy_mmu_mode(); + if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { @@ -2543,8 +2540,8 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if (flush_end) flush_tlb_range(vma, start, addr); - pte_unmap_unlock(start_pte, ptl); arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); cond_resched(); return ret; From a1d416bf9faf4f4871cb5a943614a07f80a7d70f Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 3 Mar 2025 14:15:37 +0000 Subject: [PATCH 1104/2157] sparc/mm: disable preemption in lazy mmu mode Since commit 38e0edb15bd0 ("mm/apply_to_range: call pte function with lazy updates") it's been possible for arch_[enter|leave]_lazy_mmu_mode() to be called without holding a page table lock (for the kernel mappings case), and therefore it is possible that preemption may occur while in the lazy mmu mode. The Sparc lazy mmu implementation is not robust to preemption since it stores the lazy mode state in a per-cpu structure and does not attempt to manage that state on task switch. Powerpc had the same issue and fixed it by explicitly disabling preemption in arch_enter_lazy_mmu_mode() and re-enabling in arch_leave_lazy_mmu_mode(). See commit b9ef323ea168 ("powerpc/64s: Disable preemption in hash lazy mmu mode"). Given Sparc's lazy mmu mode is based on powerpc's, let's fix it in the same way here. Link: https://lkml.kernel.org/r/20250303141542.3371656-4-ryan.roberts@arm.com Fixes: 38e0edb15bd0 ("mm/apply_to_range: call pte function with lazy updates") Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Acked-by: Andreas Larsson Acked-by: Juergen Gross Cc: Borislav Betkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juegren Gross Cc: Matthew Wilcow (Oracle) Cc: Thomas Gleinxer Cc: Signed-off-by: Andrew Morton --- arch/sparc/mm/tlb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 8648a50afe88..a35ddcca5e76 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -52,8 +52,10 @@ void flush_tlb_pending(void) void arch_enter_lazy_mmu_mode(void) { - struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); + struct tlb_batch *tb; + preempt_disable(); + tb = this_cpu_ptr(&tlb_batch); tb->active = 1; } @@ -64,6 +66,7 @@ void arch_leave_lazy_mmu_mode(void) if (tb->tlb_nr) flush_tlb_pending(); tb->active = 0; + preempt_enable(); } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, From eb61ad14c459b54f71f76331ca35d12fa3eb8f98 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 3 Mar 2025 14:15:38 +0000 Subject: [PATCH 1105/2157] sparc/mm: avoid calling arch_enter/leave_lazy_mmu() in set_ptes With commit 1a10a44dfc1d ("sparc64: implement the new page table range API") set_ptes was added to the sparc architecture. The implementation included calling arch_enter/leave_lazy_mmu() calls. The patch removes the usage of arch_enter/leave_lazy_mmu() since this implies nesting of lazy mmu regions which is not supported. Without this fix, lazy mmu mode is effectively disabled because we exit the mode after the first set_ptes: remap_pte_range() -> arch_enter_lazy_mmu() -> set_ptes() -> arch_enter_lazy_mmu() -> arch_leave_lazy_mmu() -> arch_leave_lazy_mmu() Powerpc suffered the same problem and fixed it in a corresponding way with commit 47b8def9358c ("powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes"). Link: https://lkml.kernel.org/r/20250303141542.3371656-5-ryan.roberts@arm.com Fixes: 1a10a44dfc1d ("sparc64: implement the new page table range API") Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Acked-by: Andreas Larsson Acked-by: Juergen Gross Cc: Borislav Betkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juegren Gross Cc: Matthew Wilcow (Oracle) Cc: Thomas Gleinxer Cc: Signed-off-by: Andrew Morton --- arch/sparc/include/asm/pgtable_64.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2b7f358762c1..dc28f2c4eee3 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -936,7 +936,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr) { - arch_enter_lazy_mmu_mode(); for (;;) { __set_pte_at(mm, addr, ptep, pte, 0); if (--nr == 0) @@ -945,7 +944,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_val(pte) += PAGE_SIZE; addr += PAGE_SIZE; } - arch_leave_lazy_mmu_mode(); } #define set_ptes set_ptes From c36549ff8d8423fbf1a0c5bbd72be4f902d74700 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 3 Mar 2025 14:15:39 +0000 Subject: [PATCH 1106/2157] Revert "x86/xen: allow nesting of same lazy mode" Commit 49147beb0ccb ("x86/xen: allow nesting of same lazy mode") was added as a solution for a core-mm code change where arch_[enter|leave]_lazy_mmu_mode() started to be called in a nested manner; see commit bcc6cc832573 ("mm: add default definition of set_ptes()"). However, now that we have fixed the API to avoid nesting, we no longer need this capability in the x86 implementation. Additionally, from code review, I don't believe the fix was ever robust in the case of preemption occurring while in the nested lazy mode. The implementation usually deals with preemption by calling arch_leave_lazy_mmu_mode() from xen_start_context_switch() for the outgoing task if we are in the lazy mmu mode. Then in xen_end_context_switch(), it restarts the lazy mode by calling arch_enter_lazy_mmu_mode() for an incoming task that was in the lazy mode when it was switched out. But arch_leave_lazy_mmu_mode() will only unwind a single level of nesting. If we are in the double nest, then it's not fully unwound and per-cpu variables are left in a bad state. So the correct solution is to remove the possibility of nesting from the higher level (which has now been done) and remove this x86-specific solution. Link: https://lkml.kernel.org/r/20250303141542.3371656-6-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Acked-by: Juergen Gross Cc: Andreas Larsson Cc: Borislav Betkov Cc: Boris Ostrovsky Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juegren Gross Cc: Matthew Wilcow (Oracle) Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- arch/x86/include/asm/xen/hypervisor.h | 15 ++------------- arch/x86/xen/enlighten_pv.c | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a9088250770f..bd0fc69a10a7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -72,18 +72,10 @@ enum xen_lazy_mode { }; DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); -DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); static inline void enter_lazy(enum xen_lazy_mode mode) { - enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); - - if (mode == old_mode) { - this_cpu_inc(xen_lazy_nesting); - return; - } - - BUG_ON(old_mode != XEN_LAZY_NONE); + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); this_cpu_write(xen_lazy_mode, mode); } @@ -92,10 +84,7 @@ static inline void leave_lazy(enum xen_lazy_mode mode) { BUG_ON(this_cpu_read(xen_lazy_mode) != mode); - if (this_cpu_read(xen_lazy_nesting) == 0) - this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); - else - this_cpu_dec(xen_lazy_nesting); + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); } enum xen_lazy_mode xen_get_lazy_mode(void); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5e57835e999d..919e4df9380b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -99,7 +99,6 @@ struct tls_descs { }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; -DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { From e47f1f56dd82cc6d91f5c4d914a534aa03cd12ca Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Fri, 28 Feb 2025 09:52:17 +0000 Subject: [PATCH 1107/2157] mm/page_alloc: clarify terminology in migratetype fallback code Patch series "mm/page_alloc: Some clarifications for migratetype fallback", v4. A couple of patches to try and make the code easier to follow. This patch (of 2): This code is rather confusing because: 1. "Steal" is sometimes used to refer to the general concept of allocating from a from a block of a fallback migratetype (steal_suitable_fallback()) but sometimes it refers specifically to converting a whole block's migratetype (can_steal_fallback()). 2. can_steal_fallback() sounds as though it's answering the question "am I functionally permitted to allocate from that other type" but in fact it is encoding a heuristic preference. 3. The same piece of data has different names in different places: can_steal vs whole_block. This reinforces point 2 because it looks like the different names reflect a shift in intent from "am I allowed to steal" to "do I want to steal", but no such shift exists. Fix 1. by avoiding the term "steal" in ambiguous contexts. Start using the term "claim" to refer to the special case of stealing the entire block. Fix 2. by using "should" instead of "can", and also rename its parameters and add some commentary to make it more explicit what they mean. Fix 3. by adopting the new "claim" terminology universally for this set of variables. Link: https://lkml.kernel.org/r/20250228-clarify-steal-v4-0-cb2ef1a4e610@google.com Link: https://lkml.kernel.org/r/20250228-clarify-steal-v4-1-cb2ef1a4e610@google.com Signed-off-by: Brendan Jackman Reviewed-by: Vlastimil Babka Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/compaction.c | 4 +-- mm/internal.h | 2 +- mm/page_alloc.c | 72 ++++++++++++++++++++++++------------------------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index a3203d97123e..2e2d4db33e68 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2332,7 +2332,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) ret = COMPACT_NO_SUITABLE_PAGE; for (order = cc->order; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &cc->zone->free_area[order]; - bool can_steal; + bool claim_block; /* Job done if page is free of the right migratetype */ if (!free_area_empty(area, migratetype)) @@ -2349,7 +2349,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) * other migratetype buddy lists. */ if (find_suitable_fallback(area, order, migratetype, - true, &can_steal) != -1) + true, &claim_block) != -1) /* * Movable pages are OK in any pageblock. If we are * stealing for a non-movable allocation, make sure diff --git a/mm/internal.h b/mm/internal.h index 31c626130883..70fa96e61c76 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -865,7 +865,7 @@ static inline void init_cma_pageblock(struct page *page) int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal); + int migratetype, bool claim_only, bool *claim_block); static inline bool free_area_empty(struct free_area *area, int migratetype) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c5624380b6c..0f0ecfe82f5a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1942,22 +1942,22 @@ static inline bool boost_watermark(struct zone *zone) /* * When we are falling back to another migratetype during allocation, try to - * steal extra free pages from the same pageblocks to satisfy further - * allocations, instead of polluting multiple pageblocks. + * claim entire blocks to satisfy further allocations, instead of polluting + * multiple pageblocks. * - * If we are stealing a relatively large buddy page, it is likely there will - * be more free pages in the pageblock, so try to steal them all. For - * reclaimable and unmovable allocations, we steal regardless of page size, - * as fragmentation caused by those allocations polluting movable pageblocks - * is worse than movable allocations stealing from unmovable and reclaimable - * pageblocks. + * If we are stealing a relatively large buddy page, it is likely there will be + * more free pages in the pageblock, so try to claim the whole block. For + * reclaimable and unmovable allocations, we try to claim the whole block + * regardless of page size, as fragmentation caused by those allocations + * polluting movable pageblocks is worse than movable allocations stealing from + * unmovable and reclaimable pageblocks. */ -static bool can_steal_fallback(unsigned int order, int start_mt) +static bool should_try_claim_block(unsigned int order, int start_mt) { /* * Leaving this order check is intended, although there is * relaxed order check in next check. The reason is that - * we can actually steal whole pageblock if this condition met, + * we can actually claim the whole pageblock if this condition met, * but, below check doesn't guarantee it and that is just heuristic * so could be changed anytime. */ @@ -1970,7 +1970,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt) * reclaimable pages that are closest to the request size. After a * while, memory compaction may occur to form large contiguous pages, * and the next movable allocation may not need to steal. Unmovable and - * reclaimable allocations need to actually steal pages. + * reclaimable allocations need to actually claim the whole block. */ if (order >= pageblock_order / 2 || start_mt == MIGRATE_RECLAIMABLE || @@ -1983,12 +1983,14 @@ static bool can_steal_fallback(unsigned int order, int start_mt) /* * Check whether there is a suitable fallback freepage with requested order. - * If only_stealable is true, this function returns fallback_mt only if - * we can steal other freepages all together. This would help to reduce + * Sets *claim_block to instruct the caller whether it should convert a whole + * pageblock to the returned migratetype. + * If only_claim is true, this function returns fallback_mt only if + * we would do this whole-block claiming. This would help to reduce * fragmentation due to mixed migratetype pages in one pageblock. */ int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal) + int migratetype, bool only_claim, bool *claim_block) { int i; int fallback_mt; @@ -1996,19 +1998,16 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, if (area->nr_free == 0) return -1; - *can_steal = false; + *claim_block = false; for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) { fallback_mt = fallbacks[migratetype][i]; if (free_area_empty(area, fallback_mt)) continue; - if (can_steal_fallback(order, migratetype)) - *can_steal = true; + if (should_try_claim_block(order, migratetype)) + *claim_block = true; - if (!only_stealable) - return fallback_mt; - - if (*can_steal) + if (*claim_block || !only_claim) return fallback_mt; } @@ -2016,14 +2015,14 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, } /* - * This function implements actual steal behaviour. If order is large enough, we - * can claim the whole pageblock for the requested migratetype. If not, we check - * the pageblock for constituent pages; if at least half of the pages are free - * or compatible, we can still claim the whole block, so pages freed in the - * future will be put on the correct free list. + * This function implements actual block claiming behaviour. If order is large + * enough, we can claim the whole pageblock for the requested migratetype. If + * not, we check the pageblock for constituent pages; if at least half of the + * pages are free or compatible, we can still claim the whole block, so pages + * freed in the future will be put on the correct free list. */ static struct page * -try_to_steal_block(struct zone *zone, struct page *page, +try_to_claim_block(struct zone *zone, struct page *page, int current_order, int order, int start_type, int block_type, unsigned int alloc_flags) { @@ -2091,11 +2090,12 @@ try_to_steal_block(struct zone *zone, struct page *page, /* * Try finding a free buddy page on the fallback list. * - * This will attempt to steal a whole pageblock for the requested type + * This will attempt to claim a whole pageblock for the requested type * to ensure grouping of such requests in the future. * - * If a whole block cannot be stolen, regress to __rmqueue_smallest() - * logic to at least break up as little contiguity as possible. + * If a whole block cannot be claimed, steal an individual page, regressing to + * __rmqueue_smallest() logic to at least break up as little contiguity as + * possible. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop @@ -2112,7 +2112,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, int min_order = order; struct page *page; int fallback_mt; - bool can_steal; + bool claim_block; /* * Do not steal pages from freelists belonging to other pageblocks @@ -2131,15 +2131,15 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, --current_order) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, - start_migratetype, false, &can_steal); + start_migratetype, false, &claim_block); if (fallback_mt == -1) continue; - if (!can_steal) + if (!claim_block) break; page = get_page_from_free_area(area, fallback_mt); - page = try_to_steal_block(zone, page, current_order, order, + page = try_to_claim_block(zone, page, current_order, order, start_migratetype, fallback_mt, alloc_flags); if (page) @@ -2149,11 +2149,11 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, if (alloc_flags & ALLOC_NOFRAGMENT) return NULL; - /* No luck stealing blocks. Find the smallest fallback page */ + /* No luck claiming pageblock. Find the smallest fallback page */ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, - start_migratetype, false, &can_steal); + start_migratetype, false, &claim_block); if (fallback_mt == -1) continue; From a14efee04796dd3f614eaf5348ca1ac099c21349 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Fri, 28 Feb 2025 09:52:18 +0000 Subject: [PATCH 1108/2157] mm/page_alloc: clarify should_claim_block() commentary There's lots of text here but it's a little hard to follow, this is an attempt to break it up and align its structure more closely with the code. Reword the top-level function comment to just explain what question the function answers from the point of view of the caller. Break up the internal logic into different sections that can have their own commentary describing why that part of the rationale is present. Note the page_group_by_mobility_disabled logic is not explained in the commentary, that is outside the scope of this patch... Link: https://lkml.kernel.org/r/20250228-clarify-steal-v4-2-cb2ef1a4e610@google.com Signed-off-by: Brendan Jackman Reviewed-by: Vlastimil Babka Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/page_alloc.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0f0ecfe82f5a..57f959af79c5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1941,16 +1941,9 @@ static inline bool boost_watermark(struct zone *zone) } /* - * When we are falling back to another migratetype during allocation, try to - * claim entire blocks to satisfy further allocations, instead of polluting - * multiple pageblocks. - * - * If we are stealing a relatively large buddy page, it is likely there will be - * more free pages in the pageblock, so try to claim the whole block. For - * reclaimable and unmovable allocations, we try to claim the whole block - * regardless of page size, as fragmentation caused by those allocations - * polluting movable pageblocks is worse than movable allocations stealing from - * unmovable and reclaimable pageblocks. + * When we are falling back to another migratetype during allocation, should we + * try to claim an entire block to satisfy further allocations, instead of + * polluting multiple pageblocks? */ static bool should_try_claim_block(unsigned int order, int start_mt) { @@ -1965,19 +1958,32 @@ static bool should_try_claim_block(unsigned int order, int start_mt) return true; /* - * Movable pages won't cause permanent fragmentation, so when you alloc - * small pages, you just need to temporarily steal unmovable or - * reclaimable pages that are closest to the request size. After a - * while, memory compaction may occur to form large contiguous pages, - * and the next movable allocation may not need to steal. Unmovable and - * reclaimable allocations need to actually claim the whole block. + * Above a certain threshold, always try to claim, as it's likely there + * will be more free pages in the pageblock. */ - if (order >= pageblock_order / 2 || - start_mt == MIGRATE_RECLAIMABLE || - start_mt == MIGRATE_UNMOVABLE || - page_group_by_mobility_disabled) + if (order >= pageblock_order / 2) return true; + /* + * Unmovable/reclaimable allocations would cause permanent + * fragmentations if they fell back to allocating from a movable block + * (polluting it), so we try to claim the whole block regardless of the + * allocation size. Later movable allocations can always steal from this + * block, which is less problematic. + */ + if (start_mt == MIGRATE_RECLAIMABLE || start_mt == MIGRATE_UNMOVABLE) + return true; + + if (page_group_by_mobility_disabled) + return true; + + /* + * Movable pages won't cause permanent fragmentation, so when you alloc + * small pages, we just need to temporarily steal unmovable or + * reclaimable pages that are closest to the request size. After a + * while, memory compaction may occur to form large contiguous pages, + * and the next movable allocation may not need to steal. + */ return false; } From 645207a670a96ebd7b3cc9b85699a3a03ad35483 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 27 Feb 2025 23:58:06 -0800 Subject: [PATCH 1109/2157] memcg: don't call propagate_protected_usage() for v1 Patch series "page_counter cleanup and size reduction". Commit c6f53ed8f213a ("mm, memcg: cg2 memory{.swap,}.peak write handlers") accidently increased the size of struct page_counter. This series rearrange the fields to reduce its size and also has some cleanups. This patch (of 3): Memcg-v1 does not support memory protection (min/low) and thus there is no need to track protected memory usage for it. Link: https://lkml.kernel.org/r/20250228075808.207484-1-shakeel.butt@linux.dev Link: https://lkml.kernel.org/r/20250228075808.207484-2-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin (Cruise) Signed-off-by: Andrew Morton --- mm/memcontrol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 04973c084c63..9e0e00a2c941 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3610,6 +3610,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { struct mem_cgroup *parent = mem_cgroup_from_css(parent_css); struct mem_cgroup *memcg, *old_memcg; + bool memcg_on_dfl = cgroup_subsys_on_dfl(memory_cgrp_subsys); old_memcg = set_active_memcg(parent); memcg = mem_cgroup_alloc(parent); @@ -3627,7 +3628,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent) { WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent)); - page_counter_init(&memcg->memory, &parent->memory, true); + page_counter_init(&memcg->memory, &parent->memory, memcg_on_dfl); page_counter_init(&memcg->swap, &parent->swap, false); #ifdef CONFIG_MEMCG_V1 WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); @@ -3647,7 +3648,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; } - if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) + if (memcg_on_dfl && !cgroup_memory_nosocket) static_branch_inc(&memcg_sockets_enabled_key); if (!cgroup_memory_nobpf) From 0e2759afcaf9bff25a63201856fa89b64181749f Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 27 Feb 2025 23:58:07 -0800 Subject: [PATCH 1110/2157] page_counter: track failcnt only for legacy cgroups Currently page_counter tracks failcnt for counters used by v1 and v2 controllers. However failcnt is only exported for v1 deployment and thus there is no need to maintain it in v2. The oom report does expose failcnt for memory and swap in v2 but v2 already maintains MEMCG_MAX and MEMCG_SWAP_MAX event counters which can be used. Link: https://lkml.kernel.org/r/20250228075808.207484-3-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin (Cruise) Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 4 +++- mm/hugetlb_cgroup.c | 31 ++++++++++++++----------------- mm/memcontrol.c | 12 ++++++++++-- mm/page_counter.c | 4 +++- 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 46406f3fe34d..e4bd8fd427be 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -28,12 +28,13 @@ struct page_counter { unsigned long watermark; /* Latest cg2 reset watermark */ unsigned long local_watermark; - unsigned long failcnt; + unsigned long failcnt; /* v1-only field */ /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); bool protection_support; + bool track_failcnt; unsigned long min; unsigned long low; unsigned long high; @@ -58,6 +59,7 @@ static inline void page_counter_init(struct page_counter *counter, counter->max = PAGE_COUNTER_MAX; counter->parent = parent; counter->protection_support = protection_support; + counter->track_failcnt = false; } static inline unsigned long page_counter_read(struct page_counter *counter) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index bb9578bd99f9..58e895f3899a 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -101,10 +101,9 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, int idx; for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { - struct page_counter *fault_parent = NULL; - struct page_counter *rsvd_parent = NULL; + struct page_counter *fault, *fault_parent = NULL; + struct page_counter *rsvd, *rsvd_parent = NULL; unsigned long limit; - int ret; if (parent_h_cgroup) { fault_parent = hugetlb_cgroup_counter_from_cgroup( @@ -112,24 +111,22 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( parent_h_cgroup, idx); } - page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, - idx), - fault_parent, false); - page_counter_init( - hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), - rsvd_parent, false); + fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx); + rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx); + + page_counter_init(fault, fault_parent, false); + page_counter_init(rsvd, rsvd_parent, false); + + if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) { + fault->track_failcnt = true; + rsvd->track_failcnt = true; + } limit = round_down(PAGE_COUNTER_MAX, pages_per_huge_page(&hstates[idx])); - ret = page_counter_set_max( - hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), - limit); - VM_BUG_ON(ret); - ret = page_counter_set_max( - hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), - limit); - VM_BUG_ON(ret); + VM_BUG_ON(page_counter_set_max(fault, limit)); + VM_BUG_ON(page_counter_set_max(rsvd, limit)); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9e0e00a2c941..1c496b79de97 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1572,16 +1572,23 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) /* Use static buffer, for the caller is holding oom_lock. */ static char buf[SEQ_BUF_SIZE]; struct seq_buf s; + unsigned long memory_failcnt; lockdep_assert_held(&oom_lock); + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memory_failcnt = atomic_long_read(&memcg->memory_events[MEMCG_MAX]); + else + memory_failcnt = memcg->memory.failcnt; + pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->memory)), - K((u64)READ_ONCE(memcg->memory.max)), memcg->memory.failcnt); + K((u64)READ_ONCE(memcg->memory.max)), memory_failcnt); if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) pr_info("swap: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->swap)), - K((u64)READ_ONCE(memcg->swap.max)), memcg->swap.failcnt); + K((u64)READ_ONCE(memcg->swap.max)), + atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX])); #ifdef CONFIG_MEMCG_V1 else { pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n", @@ -3631,6 +3638,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) page_counter_init(&memcg->memory, &parent->memory, memcg_on_dfl); page_counter_init(&memcg->swap, &parent->swap, false); #ifdef CONFIG_MEMCG_V1 + memcg->memory.track_failcnt = !memcg_on_dfl; WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); page_counter_init(&memcg->kmem, &parent->kmem, false); page_counter_init(&memcg->tcpmem, &parent->tcpmem, false); diff --git a/mm/page_counter.c b/mm/page_counter.c index af23f927611b..661e0f2a5127 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -121,6 +121,7 @@ bool page_counter_try_charge(struct page_counter *counter, { struct page_counter *c; bool protection = track_protection(counter); + bool track_failcnt = counter->track_failcnt; for (c = counter; c; c = c->parent) { long new; @@ -146,7 +147,8 @@ bool page_counter_try_charge(struct page_counter *counter, * inaccuracy in the failcnt which is only used * to report stats. */ - data_race(c->failcnt++); + if (track_failcnt) + data_race(c->failcnt++); *fail = c; goto failed; } From f7b0797d36e75d2a622580b56b9bfd3130d5d0e9 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 27 Feb 2025 23:58:08 -0800 Subject: [PATCH 1111/2157] page_counter: reduce struct page_counter size The struct page_counter has explicit padding for better cache alignment. The commit c6f53ed8f213a ("mm, memcg: cg2 memory{.swap,}.peak write handlers") added a field to the struct page_counter and accidently increased its size. Let's move the failcnt field which is v1-only field to the same cacheline of usage to reduce the size of struct page_counter. Link: https://lkml.kernel.org/r/20250228075808.207484-4-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin (Cruise) Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index e4bd8fd427be..d649b6bbbc87 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -9,10 +9,12 @@ struct page_counter { /* - * Make sure 'usage' does not share cacheline with any other field. The - * memcg->memory.usage is a hot member of struct mem_cgroup. + * Make sure 'usage' does not share cacheline with any other field in + * v2. The memcg->memory.usage is a hot member of struct mem_cgroup. */ atomic_long_t usage; + unsigned long failcnt; /* v1-only field */ + CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ @@ -28,7 +30,6 @@ struct page_counter { unsigned long watermark; /* Latest cg2 reset watermark */ unsigned long local_watermark; - unsigned long failcnt; /* v1-only field */ /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); From 3dc30ef64ba6b0f4c2a38aec7e87691f2d859b84 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 27 Feb 2025 18:23:54 -0800 Subject: [PATCH 1112/2157] memcg: bypass root memcg check for skmem charging The root memcg is never associated with a socket in mem_cgroup_sk_alloc, so there is no need to check if the given memcg is root for the skmem charging code path. Link: https://lkml.kernel.org/r/20250228022354.2624249-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin (Cruise) Signed-off-by: Andrew Morton --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1c496b79de97..b07eff78414b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4881,7 +4881,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return memcg1_charge_skmem(memcg, nr_pages, gfp_mask); - if (try_charge(memcg, gfp_mask, nr_pages) == 0) { + if (try_charge_memcg(memcg, gfp_mask, nr_pages) == 0) { mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); return true; } From c34b3eceeac64d59f8b475046501faa5d8daa5a4 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Thu, 27 Feb 2025 23:45:05 +0100 Subject: [PATCH 1113/2157] mm: hugetlb: improve parallel huge page allocation time Patch series "Add a command line option that enables control of how many threads should be used to allocate huge pages", v2. Allocating huge pages can take a very long time on servers with terabytes of memory even when they are allocated at boot time where the allocation happens in parallel. Before this series, the kernel used a hard coded value of 2 threads per NUMA node for these allocations. This value might have been good enough in the past but it is not sufficient to fully utilize newer systems. This series changes the default so the kernel uses 25% of the available hardware threads for these allocations. In addition, we allow the user that wish to micro-optimize the allocation time to override this value via a new kernel parameter. We tested this on 2 generations of Xeon CPUs and the results show a big improvement of the overall allocation time. +-----------------------+-------+-------+-------+-------+-------+ | threads | 8 | 16 | 32 | 64 | 128 | +-----------------------+-------+-------+-------+-------+-------+ | skylake 144 cpus | 44s | 22s | 16s | 19s | 20s | | cascade lake 192 cpus | 39s | 20s | 11s | 10s | 9s | +-----------------------+-------+-------+-------+-------+-------+ On skylake, we see an improvment of 2.75x when using 32 threads, on cascade lake we can get even better at 4.3x when we use 128 threads. This speedup is quite significant and users of large machines like these should have the option to make the machines boot as fast as possible. This patch (of 3): Before this patch, the kernel currently used a hard coded value of 2 threads per NUMA node for these allocations. This patch changes this policy and the kernel now uses 25% of the available hardware threads for the allocations. Link: https://lkml.kernel.org/r/20250227-hugepage-parameter-v2-0-7db8c6dc0453@cyberus-technology.de Link: https://lkml.kernel.org/r/20250227-hugepage-parameter-v2-1-7db8c6dc0453@cyberus-technology.de Signed-off-by: Thomas Prescher Cc: Jonathan Corbet Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a96c6edeaef..edb846452791 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -14,9 +14,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -3605,31 +3607,31 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) .numa_aware = true }; + unsigned int num_allocation_threads = max(num_online_cpus() / 4, 1); + job.thread_fn = hugetlb_pages_alloc_boot_node; job.start = 0; job.size = h->max_huge_pages; /* - * job.max_threads is twice the num_node_state(N_MEMORY), + * job.max_threads is 25% of the available cpu threads by default. * - * Tests below indicate that a multiplier of 2 significantly improves - * performance, and although larger values also provide improvements, - * the gains are marginal. + * On large servers with terabytes of memory, huge page allocation + * can consume a considerably amount of time. * - * Therefore, choosing 2 as the multiplier strikes a good balance between - * enhancing parallel processing capabilities and maintaining efficient - * resource management. + * Tests below show how long it takes to allocate 1 TiB of memory with 2MiB huge pages. + * 2MiB huge pages. Using more threads can significantly improve allocation time. * - * +------------+-------+-------+-------+-------+-------+ - * | multiplier | 1 | 2 | 3 | 4 | 5 | - * +------------+-------+-------+-------+-------+-------+ - * | 256G 2node | 358ms | 215ms | 157ms | 134ms | 126ms | - * | 2T 4node | 979ms | 679ms | 543ms | 489ms | 481ms | - * | 50G 2node | 71ms | 44ms | 37ms | 30ms | 31ms | - * +------------+-------+-------+-------+-------+-------+ + * +-----------------------+-------+-------+-------+-------+-------+ + * | threads | 8 | 16 | 32 | 64 | 128 | + * +-----------------------+-------+-------+-------+-------+-------+ + * | skylake 144 cpus | 44s | 22s | 16s | 19s | 20s | + * | cascade lake 192 cpus | 39s | 20s | 11s | 10s | 9s | + * +-----------------------+-------+-------+-------+-------+-------+ */ - job.max_threads = num_node_state(N_MEMORY) * 2; - job.min_chunk = h->max_huge_pages / num_node_state(N_MEMORY) / 2; + + job.max_threads = num_allocation_threads; + job.min_chunk = h->max_huge_pages / num_allocation_threads; padata_do_multithreaded(&job); return h->nr_huge_pages; From 71f745688985c59eee41ffe88497a9e62774a9bf Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Thu, 27 Feb 2025 23:45:06 +0100 Subject: [PATCH 1114/2157] mm: hugetlb: add hugetlb_alloc_threads cmdline option Add a command line option that enables control of how many threads should be used to allocate huge pages. [akpm@linux-foundation.org: tidy up a comment] Link: https://lkml.kernel.org/r/20250227-hugepage-parameter-v2-2-7db8c6dc0453@cyberus-technology.de Signed-off-by: Thomas Prescher Cc: Jonathan Corbet Cc: Muchun Song Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 9 +++++ Documentation/admin-guide/mm/hugetlbpage.rst | 10 ++++++ mm/hugetlb.c | 33 ++++++++++++++++--- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 491628ac071a..2758bc124f16 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1882,6 +1882,15 @@ Documentation/admin-guide/mm/hugetlbpage.rst. Format: size[KMG] + hugepage_alloc_threads= + [HW] The number of threads that should be used to + allocate hugepages during boot. This option can be + used to improve system bootup time when allocating + a large amount of huge pages. + The default value is 25% of the available hardware threads. + + Note that this parameter only applies to non-gigantic huge pages. + hugetlb_cma= [HW,CMA,EARLY] The size of a CMA area used for allocation of gigantic hugepages. Or using node format, the size of a CMA area per node can be specified. diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index f34a0d798d5b..67a941903fd2 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -145,7 +145,17 @@ hugepages It will allocate 1 2M hugepage on node0 and 2 2M hugepages on node1. If the node number is invalid, the parameter will be ignored. +hugepage_alloc_threads + Specify the number of threads that should be used to allocate hugepages + during boot. This parameter can be used to improve system bootup time + when allocating a large amount of huge pages. + The default value is 25% of the available hardware threads. + Example to use 8 allocation threads:: + + hugepage_alloc_threads=8 + + Note that this parameter only applies to non-gigantic huge pages. default_hugepagesz Specify the default huge page size. This parameter can only be specified once on the command line. default_hugepagesz can diff --git a/mm/hugetlb.c b/mm/hugetlb.c index edb846452791..486365cb2ece 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -85,6 +85,7 @@ static unsigned long __initdata default_hstate_max_huge_pages; static bool __initdata parsed_valid_hugepagesz = true; static bool __initdata parsed_default_hugepagesz; static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata; +static unsigned long hugepage_allocation_threads __initdata; static char hstate_cmdline_buf[COMMAND_LINE_SIZE] __initdata; static int hstate_cmdline_index __initdata; @@ -3607,8 +3608,6 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) .numa_aware = true }; - unsigned int num_allocation_threads = max(num_online_cpus() / 4, 1); - job.thread_fn = hugetlb_pages_alloc_boot_node; job.start = 0; job.size = h->max_huge_pages; @@ -3629,9 +3628,13 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) * | cascade lake 192 cpus | 39s | 20s | 11s | 10s | 9s | * +-----------------------+-------+-------+-------+-------+-------+ */ + if (hugepage_allocation_threads == 0) { + hugepage_allocation_threads = num_online_cpus() / 4; + hugepage_allocation_threads = max(hugepage_allocation_threads, 1); + } - job.max_threads = num_allocation_threads; - job.min_chunk = h->max_huge_pages / num_allocation_threads; + job.max_threads = hugepage_allocation_threads; + job.min_chunk = h->max_huge_pages / hugepage_allocation_threads; padata_do_multithreaded(&job); return h->nr_huge_pages; @@ -5000,6 +5003,28 @@ void __init hugetlb_bootmem_alloc(void) __hugetlb_bootmem_allocated = true; } +/* + * hugepage_alloc_threads command line parsing. + * + * When set, use this specific number of threads for the boot + * allocation of hugepages. + */ +static int __init hugepage_alloc_threads_setup(char *s) +{ + unsigned long allocation_threads; + + if (kstrtoul(s, 0, &allocation_threads) != 0) + return 1; + + if (allocation_threads == 0) + return 1; + + hugepage_allocation_threads = allocation_threads; + + return 1; +} +__setup("hugepage_alloc_threads=", hugepage_alloc_threads_setup); + static unsigned int allowed_mems_nr(struct hstate *h) { int node; From 70478a5534af757f9bbc65bbb25b607bd0e69890 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Thu, 27 Feb 2025 23:45:07 +0100 Subject: [PATCH 1115/2157] mm: hugetlb: log time needed to allocate hugepages Having this information allows users to easily tune the hugepages_node_threads parameter. Link: https://lkml.kernel.org/r/20250227-hugepage-parameter-v2-3-7db8c6dc0453@cyberus-technology.de Signed-off-by: Thomas Prescher Cc: Jonathan Corbet Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 486365cb2ece..7a47a08e8526 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3608,6 +3608,9 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) .numa_aware = true }; + unsigned long jiffies_start; + unsigned long jiffies_end; + job.thread_fn = hugetlb_pages_alloc_boot_node; job.start = 0; job.size = h->max_huge_pages; @@ -3635,7 +3638,14 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h) job.max_threads = hugepage_allocation_threads; job.min_chunk = h->max_huge_pages / hugepage_allocation_threads; + + jiffies_start = jiffies; padata_do_multithreaded(&job); + jiffies_end = jiffies; + + pr_info("HugeTLB: allocation took %dms with hugepage_allocation_threads=%ld\n", + jiffies_to_msecs(jiffies_end - jiffies_start), + hugepage_allocation_threads); return h->nr_huge_pages; } From f1ab2831e2a4312046bca79256b2efc41d373eaf Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 4 Mar 2025 19:03:16 +0800 Subject: [PATCH 1116/2157] writeback: let trace_balance_dirty_pages() take struct dtc as parameter Patch series "Fix calculations in trace_balance_dirty_pages() for cgwb", v2. In my experiment, I found that the output of trace_balance_dirty_pages() in the cgroup writeback scenario was strange because trace_balance_dirty_pages() always uses global_wb_domain.dirty_limit for related calculations instead of the dirty_limit of the corresponding memcg's wb_domain. The basic idea of the fix is to store the hard dirty limit value computed in wb_position_ratio() into struct dirty_throttle_control and use it for calculations in trace_balance_dirty_pages(). This patch (of 3): Currently, trace_balance_dirty_pages() already has 12 parameters. In the patch #3, I initially attempted to introduce an additional parameter. However, in include/linux/trace_events.h, bpf_trace_run12() only supports up to 12 parameters and bpf_trace_run13() does not exist. To reduce the number of parameters in trace_balance_dirty_pages(), we can make it accept a pointer to struct dirty_throttle_control as a parameter. To achieve this, we need to move the definition of struct dirty_throttle_control from mm/page-writeback.c to include/linux/writeback.h. Link: https://lkml.kernel.org/r/20250304110318.159567-1-yizhou.tang@shopee.com Link: https://lkml.kernel.org/r/20250304110318.159567-2-yizhou.tang@shopee.com Signed-off-by: Tang Yizhou Cc: Alexei Starovoitov Cc: Christian Brauner Cc: Steven Rostedt Cc: Jan Kara Cc: "Masami Hiramatsu (Google)" Cc: Matthew Wilcow (Oracle) Cc: Tang Yizhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/writeback.h | 23 +++++++++++++++++++++ include/trace/events/writeback.h | 16 ++++++--------- mm/page-writeback.c | 35 ++------------------------------ 3 files changed, 31 insertions(+), 43 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d11b903c2edb..32095928365c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -313,6 +313,29 @@ static inline void cgroup_writeback_umount(struct super_block *sb) /* * mm/page-writeback.c */ +/* consolidated parameters for balance_dirty_pages() and its subroutines */ +struct dirty_throttle_control { +#ifdef CONFIG_CGROUP_WRITEBACK + struct wb_domain *dom; + struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */ +#endif + struct bdi_writeback *wb; + struct fprop_local_percpu *wb_completions; + + unsigned long avail; /* dirtyable */ + unsigned long dirty; /* file_dirty + write + nfs */ + unsigned long thresh; /* dirty threshold */ + unsigned long bg_thresh; /* dirty background threshold */ + + unsigned long wb_dirty; /* per-wb counterparts */ + unsigned long wb_thresh; + unsigned long wb_bg_thresh; + + unsigned long pos_ratio; + bool freerun; + bool dirty_exceeded; +}; + void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_timer_fn(struct timer_list *t); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index a261e86e61fa..3213b9023794 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -629,11 +629,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, TRACE_EVENT(balance_dirty_pages, TP_PROTO(struct bdi_writeback *wb, - unsigned long thresh, - unsigned long bg_thresh, - unsigned long dirty, - unsigned long bdi_thresh, - unsigned long bdi_dirty, + struct dirty_throttle_control *dtc, unsigned long dirty_ratelimit, unsigned long task_ratelimit, unsigned long dirtied, @@ -641,7 +637,7 @@ TRACE_EVENT(balance_dirty_pages, long pause, unsigned long start_time), - TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, + TP_ARGS(wb, dtc, dirty_ratelimit, task_ratelimit, dirtied, period, pause, start_time), @@ -664,16 +660,16 @@ TRACE_EVENT(balance_dirty_pages, ), TP_fast_assign( - unsigned long freerun = (thresh + bg_thresh) / 2; + unsigned long freerun = (dtc->thresh + dtc->bg_thresh) / 2; strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + freerun) / 2; - __entry->dirty = dirty; + __entry->dirty = dtc->dirty; __entry->bdi_setpoint = __entry->setpoint * - bdi_thresh / (thresh + 1); - __entry->bdi_dirty = bdi_dirty; + dtc->wb_thresh / (dtc->thresh + 1); + __entry->bdi_dirty = dtc->wb_dirty; __entry->dirty_ratelimit = KBps(dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->dirtied = dirtied; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8b325aa525eb..149f8b815904 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -120,29 +120,6 @@ EXPORT_SYMBOL(laptop_mode); struct wb_domain global_wb_domain; -/* consolidated parameters for balance_dirty_pages() and its subroutines */ -struct dirty_throttle_control { -#ifdef CONFIG_CGROUP_WRITEBACK - struct wb_domain *dom; - struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */ -#endif - struct bdi_writeback *wb; - struct fprop_local_percpu *wb_completions; - - unsigned long avail; /* dirtyable */ - unsigned long dirty; /* file_dirty + write + nfs */ - unsigned long thresh; /* dirty threshold */ - unsigned long bg_thresh; /* dirty background threshold */ - - unsigned long wb_dirty; /* per-wb counterparts */ - unsigned long wb_thresh; - unsigned long wb_bg_thresh; - - unsigned long pos_ratio; - bool freerun; - bool dirty_exceeded; -}; - /* * Length of period for aging writeout fractions of bdis. This is an * arbitrarily chosen number. The longer the period, the slower fractions will @@ -1962,11 +1939,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb, */ if (pause < min_pause) { trace_balance_dirty_pages(wb, - sdtc->thresh, - sdtc->bg_thresh, - sdtc->dirty, - sdtc->wb_thresh, - sdtc->wb_dirty, + sdtc, dirty_ratelimit, task_ratelimit, pages_dirtied, @@ -1991,11 +1964,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb, pause: trace_balance_dirty_pages(wb, - sdtc->thresh, - sdtc->bg_thresh, - sdtc->dirty, - sdtc->wb_thresh, - sdtc->wb_dirty, + sdtc, dirty_ratelimit, task_ratelimit, pages_dirtied, From 28c24ef9e04f95672b72b1297eff7dae91cceea8 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 4 Mar 2025 19:03:17 +0800 Subject: [PATCH 1117/2157] writeback: rename variables in trace_balance_dirty_pages() Rename bdi_setpoint and bdi_dirty in the tracepoint to wb_setpoint and wb_dirty, respectively. These changes were omitted by Tejun in the cgroup writeback patchset. Link: https://lkml.kernel.org/r/20250304110318.159567-3-yizhou.tang@shopee.com Signed-off-by: Tang Yizhou Cc: Alexei Starovoitov Cc: Christian Brauner Cc: Jan Kara Cc: "Masami Hiramatsu (Google)" Cc: Matthew Wilcow (Oracle) Cc: Steven Rostedt Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/trace/events/writeback.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 3213b9023794..3046ca6b08ea 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -646,8 +646,8 @@ TRACE_EVENT(balance_dirty_pages, __field(unsigned long, limit) __field(unsigned long, setpoint) __field(unsigned long, dirty) - __field(unsigned long, bdi_setpoint) - __field(unsigned long, bdi_dirty) + __field(unsigned long, wb_setpoint) + __field(unsigned long, wb_dirty) __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned int, dirtied) @@ -667,9 +667,9 @@ TRACE_EVENT(balance_dirty_pages, __entry->setpoint = (global_wb_domain.dirty_limit + freerun) / 2; __entry->dirty = dtc->dirty; - __entry->bdi_setpoint = __entry->setpoint * + __entry->wb_setpoint = __entry->setpoint * dtc->wb_thresh / (dtc->thresh + 1); - __entry->bdi_dirty = dtc->wb_dirty; + __entry->wb_dirty = dtc->wb_dirty; __entry->dirty_ratelimit = KBps(dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->dirtied = dirtied; @@ -685,7 +685,7 @@ TRACE_EVENT(balance_dirty_pages, TP_printk("bdi %s: " "limit=%lu setpoint=%lu dirty=%lu " - "bdi_setpoint=%lu bdi_dirty=%lu " + "wb_setpoint=%lu wb_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%lu", @@ -693,8 +693,8 @@ TRACE_EVENT(balance_dirty_pages, __entry->limit, __entry->setpoint, __entry->dirty, - __entry->bdi_setpoint, - __entry->bdi_dirty, + __entry->wb_setpoint, + __entry->wb_dirty, __entry->dirty_ratelimit, __entry->task_ratelimit, __entry->dirtied, From 6cc4c3aa714bc58ec5d20f3054ca5f23534984d1 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 4 Mar 2025 19:03:18 +0800 Subject: [PATCH 1118/2157] writeback: fix calculations in trace_balance_dirty_pages() for cgwb In the commit dcc25ae76eb7 ("writeback: move global_dirty_limit into wb_domain") of the cgroup writeback backpressure propagation patchset, Tejun made some adaptations to trace_balance_dirty_pages() for cgroup writeback. However, this adaptation was incomplete and Tejun missed further adaptation in the subsequent patches. In the cgroup writeback scenario, if sdtc in balance_dirty_pages() is assigned to mdtc, then upon entering trace_balance_dirty_pages(), __entry->limit should be assigned based on the dirty_limit of the corresponding memcg's wb_domain, rather than global_wb_domain. To address this issue and simplify the implementation, introduce a 'limit' field in struct dirty_throttle_control to store the hard_limit value computed in wb_position_ratio() by calling hard_dirty_limit(). This field will then be used in trace_balance_dirty_pages() to assign the value to __entry->limit. Link: https://lkml.kernel.org/r/20250304110318.159567-4-yizhou.tang@shopee.com Fixes: dcc25ae76eb7 ("writeback: move global_dirty_limit into wb_domain") Signed-off-by: Tang Yizhou Acked-by: Tejun Heo Cc: Alexei Starovoitov Cc: Christian Brauner Cc: Jan Kara Cc: "Masami Hiramatsu (Google)" Cc: Matthew Wilcow (Oracle) Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/writeback.h | 1 + include/trace/events/writeback.h | 5 ++--- mm/page-writeback.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 32095928365c..58bda3347914 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -326,6 +326,7 @@ struct dirty_throttle_control { unsigned long dirty; /* file_dirty + write + nfs */ unsigned long thresh; /* dirty threshold */ unsigned long bg_thresh; /* dirty background threshold */ + unsigned long limit; /* hard dirty limit */ unsigned long wb_dirty; /* per-wb counterparts */ unsigned long wb_thresh; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 3046ca6b08ea..0ff388131fc9 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -663,9 +663,8 @@ TRACE_EVENT(balance_dirty_pages, unsigned long freerun = (dtc->thresh + dtc->bg_thresh) / 2; strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); - __entry->limit = global_wb_domain.dirty_limit; - __entry->setpoint = (global_wb_domain.dirty_limit + - freerun) / 2; + __entry->limit = dtc->limit; + __entry->setpoint = (dtc->limit + freerun) / 2; __entry->dirty = dtc->dirty; __entry->wb_setpoint = __entry->setpoint * dtc->wb_thresh / (dtc->thresh + 1); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 149f8b815904..18456ddd463b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1072,7 +1072,7 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc) struct bdi_writeback *wb = dtc->wb; unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth); unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); - unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); + unsigned long limit = dtc->limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); unsigned long wb_thresh = dtc->wb_thresh; unsigned long x_intercept; unsigned long setpoint; /* dirty pages' target balance point */ From ab82e57981d0e4cb46d2817e7b65b9d5fdcf3832 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:05 -0800 Subject: [PATCH 1119/2157] mm/damon/core: introduce damos->ops_filters Patch series "mm/damon: make allow filters after reject filters useful and intuitive". DAMOS filters do allow or reject elements of memory for given DAMOS scheme only if those match the filter criterias. For elements that don't match any DAMOS filter, 'allowing' is the default behavior. This makes allow-filters that don't have any reject-filter after them meaningless sources of overhead. The decision was made to keep the behavior consistent with that before the introduction of allow-filters. This, however, makes usage of DAMOS filters confusing and inefficient. It is more intuitive and still consistent behavior to reject by default unless there is no filter at all or the last filter is a reject filter. Update the filtering logic in the way and update documents to clarify the behavior. Note that this is changing the old behavior. But the old behavior for the problematic filter combination was definitely confusing, inefficient and anyway useless. Also, the behavior has relatively recently introduced. It is difficult to anticipate any user that depends on the behavior. Hence this is not a user-breaking behavior change but an obvious improvement. This patch (of 9): DAMOS filters can be categorized into two groups depending on which layer they are handled, namely core layer and ops layer. The groups are important because the filtering behavior depends on evaluation sequence of filters, and core layer-handled filters are evaluated before operations layer-handled ones. The behavior is clearly documented, but the implementation is bit inefficient and complicated. All filters are maintained in a single list (damos->filters) in mix. Filters evaluation logics in core layer and operations layer iterates all the filters on the list, while skipping filters that should be not handled by the layer of the logic. It is inefficient. Making future extensions having differentiations for filters of different handling layers will also be complicated. Add a new list that will be used for having all operations layer-handled DAMOS filters to DAMOS scheme data structure. Also add the support of its initialization and basic traversal functions. Link: https://lkml.kernel.org/r/20250304211913.53574-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250304211913.53574-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 8 ++++++++ mm/damon/core.c | 1 + 2 files changed, 9 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index b3e2c793c1f4..7f76e2e99f37 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -448,6 +448,7 @@ struct damos_access_pattern { * @wmarks: Watermarks for automated (in)activation of this scheme. * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. + * @ops_filters: ops layer handling &struct damos_filter objects list. * @last_applied: Last @action applied ops-managing entity. * @stat: Statistics of this scheme. * @list: List head for siblings. @@ -508,6 +509,7 @@ struct damos { int target_nid; }; struct list_head filters; + struct list_head ops_filters; void *last_applied; struct damos_stat stat; struct list_head list; @@ -858,6 +860,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damos_for_each_filter_safe(f, next, scheme) \ list_for_each_entry_safe(f, next, &(scheme)->filters, list) +#define damos_for_each_ops_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->ops_filters, list) + +#define damos_for_each_ops_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->ops_filters, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); diff --git a/mm/damon/core.c b/mm/damon/core.c index 9d37d3664030..5415b7603d01 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -375,6 +375,7 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, scheme->next_apply_sis = 0; scheme->walk_completed = false; INIT_LIST_HEAD(&scheme->filters); + INIT_LIST_HEAD(&scheme->ops_filters); scheme->stat = (struct damos_stat){}; INIT_LIST_HEAD(&scheme->list); From ac7b094bf4d6bd34cea84d1f97f4fe5c45984b6a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:06 -0800 Subject: [PATCH 1120/2157] mm/damon/paddr: support ops_filters DAMON keeps all DAMOS filters in damos->filters. Upcoming changes will make it to use damos->ops_filters for all operations layer handled DAMOS filters, though. DAMON physical address space operations set implementation (paddr) is not ready for the changes, since it handles only damos->filters. To avoid any breakage during the upcoming changes, make paddr to handle both lists. After the change is made, ->filters support on paddr can be safely removed. Link: https://lkml.kernel.org/r/20250304211913.53574-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index d5db313ca717..2b1ea568a431 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -260,6 +260,10 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) if (damos_pa_filter_match(filter, folio)) return !filter->allow; } + damos_for_each_ops_filter(filter, scheme) { + if (damos_pa_filter_match(filter, folio)) + return !filter->allow; + } return false; } @@ -290,6 +294,12 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, break; } } + damos_for_each_ops_filter(filter, s) { + if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { + install_young_filter = false; + break; + } + } if (install_young_filter) { filter = damos_new_filter( DAMOS_FILTER_TYPE_YOUNG, true, false); @@ -538,6 +548,8 @@ static bool damon_pa_scheme_has_filter(struct damos *s) damos_for_each_filter(f, s) return true; + damos_for_each_ops_filter(f, s) + return true; return false; } From 3607cc590f183179dd804faac27ee7284f6b6bf8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:07 -0800 Subject: [PATCH 1121/2157] mm/damon/core: support committing ops_filters DAMON kernel API callers should use damon_commit_ctx() to install DAMON parameters including DAMOS filters. But damos_commit_ops_filters(), which is called by damon_commit_ctx() for filters installing, is not handling damos->ops_filters. Hence, no DAMON kernel API caller can use damos->ops_filters. Do the committing of the ops_filters to make it usable. Link: https://lkml.kernel.org/r/20250304211913.53574-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/core.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 5415b7603d01..1daccccb5d67 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -820,7 +820,7 @@ static void damos_commit_filter( damos_commit_filter_arg(dst, src); } -static int damos_commit_filters(struct damos *dst, struct damos *src) +static int damos_commit_core_filters(struct damos *dst, struct damos *src) { struct damos_filter *dst_filter, *next, *src_filter, *new_filter; int i = 0, j = 0; @@ -848,6 +848,44 @@ static int damos_commit_filters(struct damos *dst, struct damos *src) return 0; } +static int damos_commit_ops_filters(struct damos *dst, struct damos *src) +{ + struct damos_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0; + + damos_for_each_ops_filter_safe(dst_filter, next, dst) { + src_filter = damos_nth_filter(i++, src); + if (src_filter) + damos_commit_filter(dst_filter, src_filter); + else + damos_destroy_filter(dst_filter); + } + + damos_for_each_ops_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damos_new_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + damos_commit_filter_arg(new_filter, src_filter); + damos_add_filter(dst, new_filter); + } + return 0; +} + +static int damos_commit_filters(struct damos *dst, struct damos *src) +{ + int err; + + err = damos_commit_core_filters(dst, src); + if (err) + return err; + return damos_commit_ops_filters(dst, src); +} + static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx) { struct damos *s; From 2a689e4e83bdc90cd00ca21aa28d337d202f4950 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:08 -0800 Subject: [PATCH 1122/2157] mm/damon/core: put ops-handled filters to damos->ops_filters damos->ops_filters has introduced to be used for all operations layer handled filters. But DAMON kernel API callers can put any type of DAMOS filters to any of damos->filters and damos->ops_filters. DAMON user-space ABI users have no way to use ->ops_filters at all. Update damos_add_filter(), which should be used by API callers to install DAMOS filters, to add filters to ->filters and ->ops_filters depending on their handling layer. The change forces both API callers and ABI users to use proper lists since ABI users use the API internally. Link: https://lkml.kernel.org/r/20250304211913.53574-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/core.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 1daccccb5d67..3fbc31d17239 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -281,9 +281,24 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type, return filter; } +static bool damos_filter_for_ops(enum damos_filter_type type) +{ + switch (type) { + case DAMOS_FILTER_TYPE_ADDR: + case DAMOS_FILTER_TYPE_TARGET: + return false; + default: + break; + } + return true; +} + void damos_add_filter(struct damos *s, struct damos_filter *f) { - list_add_tail(&f->list, &s->filters); + if (damos_filter_for_ops(f->type)) + list_add_tail(&f->list, &s->ops_filters); + else + list_add_tail(&f->list, &s->filters); } static void damos_del_filter(struct damos_filter *f) From 627983a55221d429db4fe9ecb75c4ef2f04acd15 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:09 -0800 Subject: [PATCH 1123/2157] mm/damon/paddr: support only damos->ops_filters DAMON physical address space operation set implementation (paddr) started handling both damos->filters and damos->ops_filters to avoid breakage during the change for the ->ops_filters setup. Now the change is done, so paddr's support of ->filters is only a waste that can safely be dropped. Remove it. Link: https://lkml.kernel.org/r/20250304211913.53574-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 2b1ea568a431..dded659bb110 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -256,10 +256,6 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) if (scheme->core_filters_allowed) return false; - damos_for_each_filter(filter, scheme) { - if (damos_pa_filter_match(filter, folio)) - return !filter->allow; - } damos_for_each_ops_filter(filter, scheme) { if (damos_pa_filter_match(filter, folio)) return !filter->allow; @@ -288,12 +284,6 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, struct folio *folio; /* check access in page level again by default */ - damos_for_each_filter(filter, s) { - if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { - install_young_filter = false; - break; - } - } damos_for_each_ops_filter(filter, s) { if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { install_young_filter = false; @@ -546,8 +536,6 @@ static bool damon_pa_scheme_has_filter(struct damos *s) { struct damos_filter *f; - damos_for_each_filter(f, s) - return true; damos_for_each_ops_filter(f, s) return true; return false; From dd038b728c8a2a0e1a632b767a50f09f076dab79 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:10 -0800 Subject: [PATCH 1124/2157] mm/damon: add default allow/reject behavior fields to struct damos Current default allow/reject behavior of filters handling stage has made before introduction of the allow behavior. For allow-filters usage, it is confusing and inefficient. It is more intuitive to decide the default filtering stage allow/reject behavior as opposite to the last filter's behavior. The decision should be made separately for core and operations layers' filtering stages, since last core layer-handled filter is not really a last filter if there are operations layer handling filters. Keeping separate decisions for the two categories can make the logic simpler. Add fields for storing the two decisions. Link: https://lkml.kernel.org/r/20250304211913.53574-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 7f76e2e99f37..52559475dbe7 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -502,6 +502,9 @@ struct damos { * layer-handled filters. If true, operations layer allows it, too. */ bool core_filters_allowed; + /* whether to reject core/ops filters umatched regions */ + bool core_filters_default_reject; + bool ops_filters_default_reject; /* public: */ struct damos_quota quota; struct damos_watermarks wmarks; From 961df88e4688bf94cfa49d644e49b74d34806d3d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:11 -0800 Subject: [PATCH 1125/2157] mm/damon/core: set damos_filter default allowance behavior based on installed filters Decide whether to allow or reject by default on core and opertions layer handled filters evaluation stages. It is decided as the opposite of the last installed filter's behavior. If there is no filter at all, allow by default. If there is any operations layer handled filters, core layer's filtering stage sets allowing as the default behavior regardless of the last filter of core layer-handling ones, since the last filter of core layer handled filters in the case is not really the last filter of the entire filtering stage. Also, make the core layer's DAMOS filters handling stage uses the newly set behavior field. [sj@kernel.org: setup damos->{core,ops}_filters_default_reject for initial start] Link: https://lkml.kernel.org/r/20250315222610.35245-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250304211913.53574-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/core.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 3fbc31d17239..511c464adcc5 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -518,7 +518,7 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.ops_update_interval = 60 * 1000 * 1000; ctx->passed_sample_intervals = 0; - /* These will be set from kdamond_init_intervals_sis() */ + /* These will be set from kdamond_init_ctx() */ ctx->next_aggregation_sis = 0; ctx->next_ops_update_sis = 0; @@ -891,6 +891,32 @@ static int damos_commit_ops_filters(struct damos *dst, struct damos *src) return 0; } +/** + * damos_filters_default_reject() - decide whether to reject memory that didn't + * match with any given filter. + * @filters: Given DAMOS filters of a group. + */ +static bool damos_filters_default_reject(struct list_head *filters) +{ + struct damos_filter *last_filter; + + if (list_empty(filters)) + return false; + last_filter = list_last_entry(filters, struct damos_filter, list); + return last_filter->allow; +} + +static void damos_set_filters_default_reject(struct damos *s) +{ + if (!list_empty(&s->ops_filters)) + s->core_filters_default_reject = false; + else + s->core_filters_default_reject = + damos_filters_default_reject(&s->filters); + s->ops_filters_default_reject = + damos_filters_default_reject(&s->ops_filters); +} + static int damos_commit_filters(struct damos *dst, struct damos *src) { int err; @@ -898,7 +924,11 @@ static int damos_commit_filters(struct damos *dst, struct damos *src) err = damos_commit_core_filters(dst, src); if (err) return err; - return damos_commit_ops_filters(dst, src); + err = damos_commit_ops_filters(dst, src); + if (err) + return err; + damos_set_filters_default_reject(dst); + return 0; } static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx) @@ -1580,7 +1610,7 @@ static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, return !filter->allow; } } - return false; + return s->core_filters_default_reject; } /* @@ -2315,7 +2345,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) return -EBUSY; } -static void kdamond_init_intervals_sis(struct damon_ctx *ctx) +static void kdamond_init_ctx(struct damon_ctx *ctx) { unsigned long sample_interval = ctx->attrs.sample_interval ? ctx->attrs.sample_interval : 1; @@ -2333,6 +2363,7 @@ static void kdamond_init_intervals_sis(struct damon_ctx *ctx) apply_interval = scheme->apply_interval_us ? scheme->apply_interval_us : ctx->attrs.aggr_interval; scheme->next_apply_sis = apply_interval / sample_interval; + damos_set_filters_default_reject(scheme); } } @@ -2350,7 +2381,7 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); complete(&ctx->kdamond_started); - kdamond_init_intervals_sis(ctx); + kdamond_init_ctx(ctx); if (ctx->ops.init) ctx->ops.init(ctx); From a54c42f6873d0fc9d7667433112e34a732c3b228 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:12 -0800 Subject: [PATCH 1126/2157] mm/damon/paddr: respect ops_filters_default_reject Use damos->ops_filters_default_reject, which is set based on the installed filters' behaviors, from physical address space DAMON operations set. Link: https://lkml.kernel.org/r/20250304211913.53574-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index dded659bb110..fba8b3c8ba30 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -260,7 +260,7 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) if (damos_pa_filter_match(filter, folio)) return !filter->allow; } - return false; + return scheme->ops_filters_default_reject; } static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s) From 9ea705a54badbc3f33daf60c2da989c24c467e77 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 4 Mar 2025 13:19:13 -0800 Subject: [PATCH 1127/2157] Docs/mm/damon/design: update for changed filter-default behavior Update the design documentation for changed DAMOS filters default allowance behaviors. Link: https://lkml.kernel.org/r/20250304211913.53574-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index e6fd3b604e70..aae3a691ee69 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -631,9 +631,10 @@ When multiple filters are installed, the group of filters that handled by the core layer are evaluated first. After that, the group of filters that handled by the operations layer are evaluated. Filters in each of the groups are evaluated in the installed order. If a part of memory is matched to one of the -filter, next filters are ignored. If the memory passes through the filters +filter, next filters are ignored. If the part passes through the filters evaluation stage because it is not matched to any of the filters, applying the -scheme's action to it is allowed, same to the behavior when no filter exists. +scheme's action to it depends on the last filter's allowance type. If the last +filter was for allowing, the part of memory will be rejected, and vice versa. For example, let's assume 1) a filter for allowing anonymous pages and 2) another filter for rejecting young pages are installed in the order. If a page @@ -645,11 +646,6 @@ second reject-filter blocks it. If the page is neither anonymous nor young, the page will pass through the filters evaluation stage since there is no matching filter, and the action will be applied to the page. -Note that the action can equally be applied to memory that either explicitly -filter-allowed or filters evaluation stage passed. It means that installing -allow-filters at the end of the list makes no practical change but only -filters-checking overhead. - Below ``type`` of filters are currently supported. - Core layer handled From ac55b38fe2f9b486031439c5c4ed7fce07d0d838 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Wed, 5 Mar 2025 15:17:59 +0800 Subject: [PATCH 1128/2157] mm/shrinker: fix name consistency issue in shrinker_debugfs_rename() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After calling debugfs_change_name function, the return value should be checked and the old name restored. If debugfs_change_name fails, the new name memory should be freed. The effect is that the shrinker->name is not consistent with the name displayed in debugfs. Link: https://lkml.kernel.org/r/20250305071759.661055-1-liuye@kylinos.cn Signed-off-by: Liu Ye Reviewed-by: Muchun Song Reviewed-by:Qi Zheng Cc: Dave Chinner Cc: Muchun Song Cc: Qi Zheng Signed-off-by: Andrew Morton --- mm/shrinker_debug.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index 794bd433cce0..20eaee3e97f7 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -214,10 +214,14 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) ret = debugfs_change_name(shrinker->debugfs_entry, "%s-%d", shrinker->name, shrinker->debugfs_id); + if (ret) { + shrinker->name = old; + kfree_const(new); + } else { + kfree_const(old); + } mutex_unlock(&shrinker_mutex); - kfree_const(old); - return ret; } EXPORT_SYMBOL(shrinker_debugfs_rename); From 9bbe033c75a56d72fc35e7c8ca6f3258d9782fa5 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 5 Mar 2025 06:11:29 +0000 Subject: [PATCH 1129/2157] mm: zpool: add interfaces for object read/write APIs Patch series "Switch zswap to object read/write APIs". This patch series updates zswap to use the new object read/write APIs defined by zsmalloc in [1], and remove the old object mapping APIs and the related code from zpool and zsmalloc. This patch (of 5): Zsmalloc introduced new APIs to read/write objects besides mapping them. Add the necessary zpool interfaces. Link: https://lkml.kernel.org/r/20250305061134.4105762-1-yosry.ahmed@linux.dev Link: https://lkml.kernel.org/r/20250305061134.4105762-2-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Reviewed-by: Sergey Senozhatsky Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Chengming Zhou Cc: Herbert Xu Cc: Minchan Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/zpool.h | 17 +++++++++++++++ mm/zpool.c | 48 +++++++++++++++++++++++++++++++++++++++++++ mm/zsmalloc.c | 21 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 5e6dc46b8cc4..1784e735ee04 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -52,6 +52,16 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle, void zpool_unmap_handle(struct zpool *pool, unsigned long handle); + +void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, + void *local_copy); + +void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, + void *handle_mem); + +void zpool_obj_write(struct zpool *zpool, unsigned long handle, + void *handle_mem, size_t mem_len); + u64 zpool_get_total_pages(struct zpool *pool); @@ -90,6 +100,13 @@ struct zpool_driver { enum zpool_mapmode mm); void (*unmap)(void *pool, unsigned long handle); + void *(*obj_read_begin)(void *pool, unsigned long handle, + void *local_copy); + void (*obj_read_end)(void *pool, unsigned long handle, + void *handle_mem); + void (*obj_write)(void *pool, unsigned long handle, + void *handle_mem, size_t mem_len); + u64 (*total_pages)(void *pool); }; diff --git a/mm/zpool.c b/mm/zpool.c index 4bbd12d4b659..378c2d1e5638 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -320,6 +320,54 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) zpool->driver->unmap(zpool->pool, handle); } +/** + * zpool_obj_read_begin() - Start reading from a previously allocated handle. + * @zpool: The zpool that the handle was allocated from + * @handle: The handle to read from + * @local_copy: A local buffer to use if needed. + * + * This starts a read operation of a previously allocated handle. The passed + * @local_copy buffer may be used if needed by copying the memory into. + * zpool_obj_read_end() MUST be called after the read is completed to undo any + * actions taken (e.g. release locks). + * + * Returns: A pointer to the handle memory to be read, if @local_copy is used, + * the returned pointer is @local_copy. + */ +void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, + void *local_copy) +{ + return zpool->driver->obj_read_begin(zpool->pool, handle, local_copy); +} + +/** + * zpool_obj_read_end() - Finish reading from a previously allocated handle. + * @zpool: The zpool that the handle was allocated from + * @handle: The handle to read from + * @handle_mem: The pointer returned by zpool_obj_read_begin() + * + * Finishes a read operation previously started by zpool_obj_read_begin(). + */ +void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, + void *handle_mem) +{ + zpool->driver->obj_read_end(zpool->pool, handle, handle_mem); +} + +/** + * zpool_obj_write() - Write to a previously allocated handle. + * @zpool: The zpool that the handle was allocated from + * @handle: The handle to read from + * @handle_mem: The memory to copy from into the handle. + * @mem_len: The length of memory to be written. + * + */ +void zpool_obj_write(struct zpool *zpool, unsigned long handle, + void *handle_mem, size_t mem_len) +{ + zpool->driver->obj_write(zpool->pool, handle, handle_mem, mem_len); +} + /** * zpool_get_total_pages() - The total size of the pool * @zpool: The zpool to check diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 63c99db71dc1..d84b300db64e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -507,6 +507,24 @@ static void zs_zpool_unmap(void *pool, unsigned long handle) zs_unmap_object(pool, handle); } +static void *zs_zpool_obj_read_begin(void *pool, unsigned long handle, + void *local_copy) +{ + return zs_obj_read_begin(pool, handle, local_copy); +} + +static void zs_zpool_obj_read_end(void *pool, unsigned long handle, + void *handle_mem) +{ + zs_obj_read_end(pool, handle, handle_mem); +} + +static void zs_zpool_obj_write(void *pool, unsigned long handle, + void *handle_mem, size_t mem_len) +{ + zs_obj_write(pool, handle, handle_mem, mem_len); +} + static u64 zs_zpool_total_pages(void *pool) { return zs_get_total_pages(pool); @@ -522,6 +540,9 @@ static struct zpool_driver zs_zpool_driver = { .free = zs_zpool_free, .map = zs_zpool_map, .unmap = zs_zpool_unmap, + .obj_read_begin = zs_zpool_obj_read_begin, + .obj_read_end = zs_zpool_obj_read_end, + .obj_write = zs_zpool_obj_write, .total_pages = zs_zpool_total_pages, }; From 7d4c9629b74ff7ad3b58e57324e235d710e55c21 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 5 Mar 2025 06:11:30 +0000 Subject: [PATCH 1130/2157] mm: zswap: use object read/write APIs instead of object mapping APIs Use the new object read/write APIs instead of mapping APIs. On compress side, zpool_obj_write() is more concise and provides exactly what zswap needs to write the compressed object to the zpool, instead of map->copy->unmap. On the decompress side, zpool_obj_read_begin() is sleepable, which allows avoiding the memcpy() for zsmalloc and slightly simplifying the code by: - Avoiding checking if the zpool driver is sleepable, reducing special cases and shrinking the huge comment. - Having a single zpool_obj_read_end() call rather than multiple conditional zpool_unmap_handle() calls. The !virt_addr_valid() case can be removed in the future if the crypto API supports kmap addresses or by using kmap_to_page(), completely eliminating the memcpy() path in zswap_decompress(). This a step toward that. In that spirit, opportunistically make the comment more specific about the kmap case instead of generic non-linear addresses. This is the only case that needs to be handled in practice, and the generic comment makes it seem like a bigger problem that it actually is. Link: https://lkml.kernel.org/r/20250305061134.4105762-3-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Chengming Zhou Cc: Herbert Xu Cc: Minchan Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- mm/zswap.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 8a1ded8fa973..7de54f105d04 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -930,7 +930,6 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, unsigned int dlen = PAGE_SIZE; unsigned long handle; struct zpool *zpool; - char *buf; gfp_t gfp; u8 *dst; @@ -972,10 +971,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, if (alloc_ret) goto unlock; - buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); - memcpy(buf, dst, dlen); - zpool_unmap_handle(zpool, handle); - + zpool_obj_write(zpool, handle, dst, dlen); entry->handle = handle; entry->length = dlen; @@ -996,24 +992,22 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; - u8 *src; + u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); - src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); + obj = zpool_obj_read_begin(zpool, entry->handle, acomp_ctx->buffer); + /* - * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer - * to do crypto_acomp_decompress() which might sleep. In such cases, we must - * resort to copying the buffer to a temporary one. - * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer, - * such as a kmap address of high memory or even ever a vmap address. - * However, sg_init_one is only equipped to handle linearly mapped low memory. - * In such cases, we also must copy the buffer to a temporary and lowmem one. + * zpool_obj_read_begin() might return a kmap address of highmem when + * acomp_ctx->buffer is not used. However, sg_init_one() does not + * handle highmem addresses, so copy the object to acomp_ctx->buffer. */ - if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) || - !virt_addr_valid(src)) { - memcpy(acomp_ctx->buffer, src, entry->length); + if (virt_addr_valid(obj)) { + src = obj; + } else { + WARN_ON_ONCE(obj == acomp_ctx->buffer); + memcpy(acomp_ctx->buffer, obj, entry->length); src = acomp_ctx->buffer; - zpool_unmap_handle(zpool, entry->handle); } sg_init_one(&input, src, entry->length); @@ -1023,8 +1017,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); - if (src != acomp_ctx->buffer) - zpool_unmap_handle(zpool, entry->handle); + zpool_obj_read_end(zpool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); } From fcbea574754c63f7035d0c4ef7dfb161b60b5bde Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 5 Mar 2025 06:11:31 +0000 Subject: [PATCH 1131/2157] mm: zpool: remove object mapping APIs zpool_map_handle(), zpool_unmap_handle(), and zpool_can_sleep_mapped() are no longer used. Remove them with the underlying driver callbacks. Link: https://lkml.kernel.org/r/20250305061134.4105762-4-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Reviewed-by: Sergey Senozhatsky Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Chengming Zhou Cc: Herbert Xu Cc: Minchan Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/zpool.h | 30 --------------------- mm/zpool.c | 61 ------------------------------------------- mm/zsmalloc.c | 27 ------------------- 3 files changed, 118 deletions(-) diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 1784e735ee04..2c8a9d2654f6 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -13,25 +13,6 @@ struct zpool; -/* - * Control how a handle is mapped. It will be ignored if the - * implementation does not support it. Its use is optional. - * Note that this does not refer to memory protection, it - * refers to how the memory will be copied in/out if copying - * is necessary during mapping; read-write is the safest as - * it copies the existing memory in on map, and copies the - * changed memory back out on unmap. Write-only does not copy - * in the memory and should only be used for initialization. - * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. - */ -enum zpool_mapmode { - ZPOOL_MM_RW, /* normal read-write mapping */ - ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ - ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ - - ZPOOL_MM_DEFAULT = ZPOOL_MM_RW -}; - bool zpool_has_pool(char *type); struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp); @@ -47,12 +28,6 @@ int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, void zpool_free(struct zpool *pool, unsigned long handle); -void *zpool_map_handle(struct zpool *pool, unsigned long handle, - enum zpool_mapmode mm); - -void zpool_unmap_handle(struct zpool *pool, unsigned long handle); - - void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, void *local_copy); @@ -95,11 +70,6 @@ struct zpool_driver { unsigned long *handle); void (*free)(void *pool, unsigned long handle); - bool sleep_mapped; - void *(*map)(void *pool, unsigned long handle, - enum zpool_mapmode mm); - void (*unmap)(void *pool, unsigned long handle); - void *(*obj_read_begin)(void *pool, unsigned long handle, void *local_copy); void (*obj_read_end)(void *pool, unsigned long handle, diff --git a/mm/zpool.c b/mm/zpool.c index 378c2d1e5638..4fc665b42f5e 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -277,49 +277,6 @@ void zpool_free(struct zpool *zpool, unsigned long handle) zpool->driver->free(zpool->pool, handle); } -/** - * zpool_map_handle() - Map a previously allocated handle into memory - * @zpool: The zpool that the handle was allocated from - * @handle: The handle to map - * @mapmode: How the memory should be mapped - * - * This maps a previously allocated handle into memory. The @mapmode - * param indicates to the implementation how the memory will be - * used, i.e. read-only, write-only, read-write. If the - * implementation does not support it, the memory will be treated - * as read-write. - * - * This may hold locks, disable interrupts, and/or preemption, - * and the zpool_unmap_handle() must be called to undo those - * actions. The code that uses the mapped handle should complete - * its operations on the mapped handle memory quickly and unmap - * as soon as possible. As the implementation may use per-cpu - * data, multiple handles should not be mapped concurrently on - * any cpu. - * - * Returns: A pointer to the handle's mapped memory area. - */ -void *zpool_map_handle(struct zpool *zpool, unsigned long handle, - enum zpool_mapmode mapmode) -{ - return zpool->driver->map(zpool->pool, handle, mapmode); -} - -/** - * zpool_unmap_handle() - Unmap a previously mapped handle - * @zpool: The zpool that the handle was allocated from - * @handle: The handle to unmap - * - * This unmaps a previously mapped handle. Any locks or other - * actions that the implementation took in zpool_map_handle() - * will be undone here. The memory area returned from - * zpool_map_handle() should no longer be used after this. - */ -void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) -{ - zpool->driver->unmap(zpool->pool, handle); -} - /** * zpool_obj_read_begin() - Start reading from a previously allocated handle. * @zpool: The zpool that the handle was allocated from @@ -381,23 +338,5 @@ u64 zpool_get_total_pages(struct zpool *zpool) return zpool->driver->total_pages(zpool->pool); } -/** - * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped. - * @zpool: The zpool to test - * - * Some allocators enter non-preemptible context in ->map() callback (e.g. - * disable pagefaults) and exit that context in ->unmap(), which limits what - * we can do with the mapped object. For instance, we cannot wait for - * asynchronous crypto API to decompress such an object or take mutexes - * since those will call into the scheduler. This function tells us whether - * we use such an allocator. - * - * Returns: true if zpool can sleep; false otherwise. - */ -bool zpool_can_sleep_mapped(struct zpool *zpool) -{ - return zpool->driver->sleep_mapped; -} - MODULE_AUTHOR("Dan Streetman "); MODULE_DESCRIPTION("Common API for compressed memory storage"); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index d84b300db64e..56d6ed5c675b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -482,31 +482,6 @@ static void zs_zpool_free(void *pool, unsigned long handle) zs_free(pool, handle); } -static void *zs_zpool_map(void *pool, unsigned long handle, - enum zpool_mapmode mm) -{ - enum zs_mapmode zs_mm; - - switch (mm) { - case ZPOOL_MM_RO: - zs_mm = ZS_MM_RO; - break; - case ZPOOL_MM_WO: - zs_mm = ZS_MM_WO; - break; - case ZPOOL_MM_RW: - default: - zs_mm = ZS_MM_RW; - break; - } - - return zs_map_object(pool, handle, zs_mm); -} -static void zs_zpool_unmap(void *pool, unsigned long handle) -{ - zs_unmap_object(pool, handle); -} - static void *zs_zpool_obj_read_begin(void *pool, unsigned long handle, void *local_copy) { @@ -538,8 +513,6 @@ static struct zpool_driver zs_zpool_driver = { .malloc_support_movable = true, .malloc = zs_zpool_malloc, .free = zs_zpool_free, - .map = zs_zpool_map, - .unmap = zs_zpool_unmap, .obj_read_begin = zs_zpool_obj_read_begin, .obj_read_end = zs_zpool_obj_read_end, .obj_write = zs_zpool_obj_write, From 07864f1a57fb1f798a7d21f13e4929c9cb52daf7 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 5 Mar 2025 06:11:32 +0000 Subject: [PATCH 1132/2157] mm: zsmalloc: remove object mapping APIs and per-CPU map areas zs_map_object() and zs_unmap_object() are no longer used, remove them. Since these are the only users of per-CPU mapping_areas, remove them and the associated CPU hotplug callbacks too. [yosry.ahmed@linux.dev: update the docs] Link: https://lkml.kernel.org/r/Z8ier-ZZp8T6MOTH@google.com Link: https://lkml.kernel.org/r/20250305061134.4105762-5-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Acked-by: Sergey Senozhatsky Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Chengming Zhou Cc: Herbert Xu Cc: Minchan Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- Documentation/mm/zsmalloc.rst | 5 +- include/linux/cpuhotplug.h | 1 - include/linux/zsmalloc.h | 21 ---- mm/zsmalloc.c | 226 +--------------------------------- 4 files changed, 3 insertions(+), 250 deletions(-) diff --git a/Documentation/mm/zsmalloc.rst b/Documentation/mm/zsmalloc.rst index 76902835e68e..d2bbecd78e14 100644 --- a/Documentation/mm/zsmalloc.rst +++ b/Documentation/mm/zsmalloc.rst @@ -27,9 +27,8 @@ Instead, it returns an opaque handle (unsigned long) which encodes actual location of the allocated object. The reason for this indirection is that zsmalloc does not keep zspages permanently mapped since that would cause issues on 32-bit systems where the VA region for kernel space mappings -is very small. So, before using the allocating memory, the object has to -be mapped using zs_map_object() to get a usable pointer and subsequently -unmapped using zs_unmap_object(). +is very small. So, using the allocated memory should be done through the +proper handle-based APIs. stat ==== diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6cc5e484547c..1987400000b4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -116,7 +116,6 @@ enum cpuhp_state { CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, - CPUHP_MM_ZS_PREPARE, CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 7d70983cf398..c26baf9fb331 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -16,23 +16,6 @@ #include -/* - * zsmalloc mapping modes - * - * NOTE: These only make a difference when a mapped object spans pages. - */ -enum zs_mapmode { - ZS_MM_RW, /* normal read-write mapping */ - ZS_MM_RO, /* read-only (no copy-out at unmap time) */ - ZS_MM_WO /* write-only (no copy-in at map time) */ - /* - * NOTE: ZS_MM_WO should only be used for initializing new - * (uninitialized) allocations. Partial writes to already - * initialized allocations should use ZS_MM_RW to preserve the - * existing data. - */ -}; - struct zs_pool_stats { /* How many pages were migrated (freed) */ atomic_long_t pages_compacted; @@ -48,10 +31,6 @@ void zs_free(struct zs_pool *pool, unsigned long obj); size_t zs_huge_class_size(struct zs_pool *pool); -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm); -void zs_unmap_object(struct zs_pool *pool, unsigned long handle); - unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 56d6ed5c675b..cd1c2a8ffef0 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -281,13 +281,6 @@ struct zspage { struct zspage_lock zsl; }; -struct mapping_area { - local_lock_t lock; - char *vm_buf; /* copy buffer for objects that span pages */ - char *vm_addr; /* address of kmap_local_page()'ed pages */ - enum zs_mapmode vm_mm; /* mapping mode */ -}; - static void zspage_lock_init(struct zspage *zspage) { static struct lock_class_key __key; @@ -522,11 +515,6 @@ static struct zpool_driver zs_zpool_driver = { MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ -/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { - .lock = INIT_LOCAL_LOCK(lock), -}; - static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) { return PagePrivate(zpdesc_page(zpdesc)); @@ -1111,93 +1099,6 @@ static struct zspage *find_get_zspage(struct size_class *class) return zspage; } -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm_buf) - return 0; - area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL); - if (!area->vm_buf) - return -ENOMEM; - return 0; -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - kfree(area->vm_buf); - area->vm_buf = NULL; -} - -static void *__zs_map_object(struct mapping_area *area, - struct zpdesc *zpdescs[2], int off, int size) -{ - size_t sizes[2]; - char *buf = area->vm_buf; - - /* disable page faults to match kmap_local_page() return conditions */ - pagefault_disable(); - - /* no read fastpath */ - if (area->vm_mm == ZS_MM_WO) - goto out; - - sizes[0] = PAGE_SIZE - off; - sizes[1] = size - sizes[0]; - - /* copy object to per-cpu buffer */ - memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]); - memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]); -out: - return area->vm_buf; -} - -static void __zs_unmap_object(struct mapping_area *area, - struct zpdesc *zpdescs[2], int off, int size) -{ - size_t sizes[2]; - char *buf; - - /* no write fastpath */ - if (area->vm_mm == ZS_MM_RO) - goto out; - - buf = area->vm_buf; - buf = buf + ZS_HANDLE_SIZE; - size -= ZS_HANDLE_SIZE; - off += ZS_HANDLE_SIZE; - - sizes[0] = PAGE_SIZE - off; - sizes[1] = size - sizes[0]; - - /* copy per-cpu buffer to object */ - memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]); - memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]); - -out: - /* enable page faults to match kunmap_local() return conditions */ - pagefault_enable(); -} - -static int zs_cpu_prepare(unsigned int cpu) -{ - struct mapping_area *area; - - area = &per_cpu(zs_map_area, cpu); - return __zs_cpu_up(area); -} - -static int zs_cpu_dead(unsigned int cpu) -{ - struct mapping_area *area; - - area = &per_cpu(zs_map_area, cpu); - __zs_cpu_down(area); - return 0; -} - static bool can_merge(struct size_class *prev, int pages_per_zspage, int objs_per_zspage) { @@ -1245,117 +1146,6 @@ unsigned long zs_get_total_pages(struct zs_pool *pool) } EXPORT_SYMBOL_GPL(zs_get_total_pages); -/** - * zs_map_object - get address of allocated object from handle. - * @pool: pool from which the object was allocated - * @handle: handle returned from zs_malloc - * @mm: mapping mode to use - * - * Before using an object allocated from zs_malloc, it must be mapped using - * this function. When done with the object, it must be unmapped using - * zs_unmap_object. - * - * Only one object can be mapped per cpu at a time. There is no protection - * against nested mappings. - * - * This function returns with preemption and page faults disabled. - */ -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm) -{ - struct zspage *zspage; - struct zpdesc *zpdesc; - unsigned long obj, off; - unsigned int obj_idx; - - struct size_class *class; - struct mapping_area *area; - struct zpdesc *zpdescs[2]; - void *ret; - - /* - * Because we use per-cpu mapping areas shared among the - * pools/users, we can't allow mapping in interrupt context - * because it can corrupt another users mappings. - */ - BUG_ON(in_interrupt()); - - /* It guarantees it can get zspage from handle safely */ - read_lock(&pool->lock); - obj = handle_to_obj(handle); - obj_to_location(obj, &zpdesc, &obj_idx); - zspage = get_zspage(zpdesc); - - /* - * migration cannot move any zpages in this zspage. Here, class->lock - * is too heavy since callers would take some time until they calls - * zs_unmap_object API so delegate the locking from class to zspage - * which is smaller granularity. - */ - zspage_read_lock(zspage); - read_unlock(&pool->lock); - - class = zspage_class(pool, zspage); - off = offset_in_page(class->size * obj_idx); - - local_lock(&zs_map_area.lock); - area = this_cpu_ptr(&zs_map_area); - area->vm_mm = mm; - if (off + class->size <= PAGE_SIZE) { - /* this object is contained entirely within a page */ - area->vm_addr = kmap_local_zpdesc(zpdesc); - ret = area->vm_addr + off; - goto out; - } - - /* this object spans two pages */ - zpdescs[0] = zpdesc; - zpdescs[1] = get_next_zpdesc(zpdesc); - BUG_ON(!zpdescs[1]); - - ret = __zs_map_object(area, zpdescs, off, class->size); -out: - if (likely(!ZsHugePage(zspage))) - ret += ZS_HANDLE_SIZE; - - return ret; -} -EXPORT_SYMBOL_GPL(zs_map_object); - -void zs_unmap_object(struct zs_pool *pool, unsigned long handle) -{ - struct zspage *zspage; - struct zpdesc *zpdesc; - unsigned long obj, off; - unsigned int obj_idx; - - struct size_class *class; - struct mapping_area *area; - - obj = handle_to_obj(handle); - obj_to_location(obj, &zpdesc, &obj_idx); - zspage = get_zspage(zpdesc); - class = zspage_class(pool, zspage); - off = offset_in_page(class->size * obj_idx); - - area = this_cpu_ptr(&zs_map_area); - if (off + class->size <= PAGE_SIZE) - kunmap_local(area->vm_addr); - else { - struct zpdesc *zpdescs[2]; - - zpdescs[0] = zpdesc; - zpdescs[1] = get_next_zpdesc(zpdesc); - BUG_ON(!zpdescs[1]); - - __zs_unmap_object(area, zpdescs, off, class->size); - } - local_unlock(&zs_map_area.lock); - - zspage_read_unlock(zspage); -} -EXPORT_SYMBOL_GPL(zs_unmap_object); - void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, void *local_copy) { @@ -1975,7 +1765,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * the class lock protects zpage alloc/free in the zspage. */ spin_lock(&class->lock); - /* the zspage write_lock protects zpage access via zs_map_object */ + /* the zspage write_lock protects zpage access via zs_obj_read/write() */ if (!zspage_write_trylock(zspage)) { spin_unlock(&class->lock); write_unlock(&pool->lock); @@ -2459,23 +2249,11 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); static int __init zs_init(void) { - int ret; - - ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare", - zs_cpu_prepare, zs_cpu_dead); - if (ret) - goto out; - #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); #endif - zs_stat_init(); - return 0; - -out: - return ret; } static void __exit zs_exit(void) @@ -2483,8 +2261,6 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); #endif - cpuhp_remove_state(CPUHP_MM_ZS_PREPARE); - zs_stat_exit(); } From 7b60041156079f256a7df3f7de469de7db618580 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 5 Mar 2025 06:11:33 +0000 Subject: [PATCH 1133/2157] mm: zpool: remove zpool_malloc_support_movable() zpool_malloc_support_movable() always returns true for zsmalloc, the only remaining zpool driver. Remove it and set the gfp flags in zswap_compress() accordingly. Opportunistically use GFP_NOWAIT instead of __GFP_NOWARN | __GFP_KSWAPD_RECLAIM for conciseness as they are equivalent. Link: https://lkml.kernel.org/r/20250305061134.4105762-6-yosry.ahmed@linux.dev Signed-off-by: Yosry Ahmed Reviewed-by: Sergey Senozhatsky Acked-by: Johannes Weiner Acked-by: Nhat Pham Cc: Thomas Gleixner Cc: Chengming Zhou Cc: Herbert Xu Cc: Minchan Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/zpool.h | 3 --- mm/zpool.c | 16 ---------------- mm/zsmalloc.c | 1 - mm/zswap.c | 4 +--- 4 files changed, 1 insertion(+), 23 deletions(-) diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 2c8a9d2654f6..52f30e526607 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -21,8 +21,6 @@ const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); -bool zpool_malloc_support_movable(struct zpool *pool); - int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, unsigned long *handle); @@ -65,7 +63,6 @@ struct zpool_driver { void *(*create)(const char *name, gfp_t gfp); void (*destroy)(void *pool); - bool malloc_support_movable; int (*malloc)(void *pool, size_t size, gfp_t gfp, unsigned long *handle); void (*free)(void *pool, unsigned long handle); diff --git a/mm/zpool.c b/mm/zpool.c index 4fc665b42f5e..6d6d88930932 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -220,22 +220,6 @@ const char *zpool_get_type(struct zpool *zpool) return zpool->driver->type; } -/** - * zpool_malloc_support_movable() - Check if the zpool supports - * allocating movable memory - * @zpool: The zpool to check - * - * This returns if the zpool supports allocating movable memory. - * - * Implementations must guarantee this to be thread-safe. - * - * Returns: true if the zpool supports allocating movable memory, false if not - */ -bool zpool_malloc_support_movable(struct zpool *zpool) -{ - return zpool->driver->malloc_support_movable; -} - /** * zpool_malloc() - Allocate memory * @zpool: The zpool to allocate from. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index cd1c2a8ffef0..961b270f023c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -503,7 +503,6 @@ static struct zpool_driver zs_zpool_driver = { .owner = THIS_MODULE, .create = zs_zpool_create, .destroy = zs_zpool_destroy, - .malloc_support_movable = true, .malloc = zs_zpool_malloc, .free = zs_zpool_free, .obj_read_begin = zs_zpool_obj_read_begin, diff --git a/mm/zswap.c b/mm/zswap.c index 7de54f105d04..5f0e62289444 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -964,9 +964,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, goto unlock; zpool = pool->zpool; - gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; - if (zpool_malloc_support_movable(zpool)) - gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; + gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE; alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle); if (alloc_ret) goto unlock; From c0d017896b72d7dd251dabf64765196ff9a46a0f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:17 +0800 Subject: [PATCH 1134/2157] mm: shmem: drop the unused macro Patch series "Some trivial cleanups for shmem". Patch 1 - Patch 5 do some trivial cleanups and refactoring for shmem. Patch 6 adds myself as shmem reviewer. This patch (of 6): Drop the unused 'BLOCKS_PER_PAGE' macro. Link: https://lkml.kernel.org/r/cover.1738918357.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/69264cee1d938442477e657004e4924f8a5c4dd4.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index b276ae233dfa..6d6d5fce2120 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -86,7 +86,6 @@ static struct vfsmount *shm_mnt __ro_after_init; #include "internal.h" -#define BLOCKS_PER_PAGE (PAGE_SIZE/512) #define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) /* Pretend that each entry is of this size in directory's i_size */ From 6d26a149f5483acdc8a9a7a8fcf5e737a324a2b6 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:18 +0800 Subject: [PATCH 1135/2157] mm: shmem: remove 'fadvise()' comments Similar to commit 255ff62d1586 ("docs: tmpfs: drop 'fadvise()' from the documentation"), fadvise() has no HUGEPAGE advise currently. Remove the confusing fadvise() comments. Link: https://lkml.kernel.org/r/fae702b9775f58b55b45be5eaad22d8586d0290a.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6d6d5fce2120..c63fd18cea50 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -525,9 +525,9 @@ static bool shmem_confirm_swap(struct address_space *mapping, * enables huge pages for the mount; * SHMEM_HUGE_WITHIN_SIZE: * only allocate huge pages if the page will be fully within i_size, - * also respect fadvise()/madvise() hints; + * also respect madvise() hints; * SHMEM_HUGE_ADVISE: - * only allocate huge pages if requested with fadvise()/madvise(); + * only allocate huge pages if requested with madvise(); */ #define SHMEM_HUGE_NEVER 0 From d5e4e147c0f59259cd527d58295ba1bfefbad481 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:19 +0800 Subject: [PATCH 1136/2157] mm: shmem: remove duplicate error validation Remove duplicate error code checks for 'start' and 'end', as the get_order_from_str() will only return -EINVAL if the cmdline string is configured incorrectly. Link: https://lkml.kernel.org/r/dfadaba4c8b24c5ae1467fe8b6744b654c65ec91.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index c63fd18cea50..51bdeea828a0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5664,19 +5664,19 @@ static int __init setup_thp_shmem(char *str) THP_ORDERS_ALL_FILE_DEFAULT); } - if (start == -EINVAL) { + if (start < 0) { pr_err("invalid size %s in thp_shmem boot parameter\n", start_size); goto err; } - if (end == -EINVAL) { + if (end < 0) { pr_err("invalid size %s in thp_shmem boot parameter\n", end_size); goto err; } - if (start < 0 || end < 0 || start > end) + if (start > end) goto err; nr = end - start + 1; From cd81c424b53fa174df1e18b021806bc978c8fb7f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:20 +0800 Subject: [PATCH 1137/2157] mm: shmem: change the return value of shmem_find_swap_entries() The shmem_find_swap_entries() originally returned the index corresponding to the swap entry, but no callers used this return value. It should return the number of entries that were found like other functions, which can be used by the callers. No functional changes. Link: https://lkml.kernel.org/r/070489b5946b8379b2a2d25f78115cef167cd145.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 51bdeea828a0..f5a081563022 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1379,9 +1379,9 @@ static void shmem_evict_inode(struct inode *inode) #endif } -static int shmem_find_swap_entries(struct address_space *mapping, - pgoff_t start, struct folio_batch *fbatch, - pgoff_t *indices, unsigned int type) +static unsigned int shmem_find_swap_entries(struct address_space *mapping, + pgoff_t start, struct folio_batch *fbatch, + pgoff_t *indices, unsigned int type) { XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; @@ -1414,7 +1414,7 @@ static int shmem_find_swap_entries(struct address_space *mapping, } rcu_read_unlock(); - return xas.xa_index; + return folio_batch_count(fbatch); } /* @@ -1461,8 +1461,8 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type) do { folio_batch_init(&fbatch); - shmem_find_swap_entries(mapping, start, &fbatch, indices, type); - if (folio_batch_count(&fbatch) == 0) { + if (!shmem_find_swap_entries(mapping, start, &fbatch, + indices, type)) { ret = 0; break; } From 086e66b690ae41c1c8c0ef0366cadeb089dff990 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:21 +0800 Subject: [PATCH 1138/2157] mm: shmem: factor out the within_size logic into a new helper Factor out the within_size logic into a new helper to remove duplicate code. Link: https://lkml.kernel.org/r/527dea9d7e32fe6b94c7fe00df2c126203017911.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Suggested-by: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index f5a081563022..8de9b4a07a8a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -590,6 +590,28 @@ shmem_mapping_size_orders(struct address_space *mapping, pgoff_t index, loff_t w return order > 0 ? BIT(order + 1) - 1 : 0; } +static unsigned int shmem_get_orders_within_size(struct inode *inode, + unsigned long within_size_orders, pgoff_t index, + loff_t write_end) +{ + pgoff_t aligned_index; + unsigned long order; + loff_t i_size; + + order = highest_order(within_size_orders); + while (within_size_orders) { + aligned_index = round_up(index + 1, 1 << order); + i_size = max(write_end, i_size_read(inode)); + i_size = round_up(i_size, PAGE_SIZE); + if (i_size >> PAGE_SHIFT >= aligned_index) + return within_size_orders; + + order = next_order(&within_size_orders, order); + } + + return 0; +} + static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index, loff_t write_end, bool shmem_huge_force, struct vm_area_struct *vma, @@ -598,9 +620,6 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index unsigned int maybe_pmd_order = HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER ? 0 : BIT(HPAGE_PMD_ORDER); unsigned long within_size_orders; - unsigned int order; - pgoff_t aligned_index; - loff_t i_size; if (!S_ISREG(inode->i_mode)) return 0; @@ -634,16 +653,11 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index within_size_orders = shmem_mapping_size_orders(inode->i_mapping, index, write_end); - order = highest_order(within_size_orders); - while (within_size_orders) { - aligned_index = round_up(index + 1, 1 << order); - i_size = max(write_end, i_size_read(inode)); - i_size = round_up(i_size, PAGE_SIZE); - if (i_size >> PAGE_SHIFT >= aligned_index) - return within_size_orders; + within_size_orders = shmem_get_orders_within_size(inode, within_size_orders, + index, write_end); + if (within_size_orders > 0) + return within_size_orders; - order = next_order(&within_size_orders, order); - } fallthrough; case SHMEM_HUGE_ADVISE: if (vm_flags & VM_HUGEPAGE) @@ -1747,10 +1761,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma ? vma->vm_flags : 0; - pgoff_t aligned_index; unsigned int global_orders; - loff_t i_size; - int order; if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) return 0; @@ -1776,17 +1787,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, return READ_ONCE(huge_shmem_orders_inherit); /* Allow mTHP that will be fully within i_size. */ - order = highest_order(within_size_orders); - while (within_size_orders) { - aligned_index = round_up(index + 1, 1 << order); - i_size = round_up(i_size_read(inode), PAGE_SIZE); - if (i_size >> PAGE_SHIFT >= aligned_index) { - mask |= within_size_orders; - break; - } - - order = next_order(&within_size_orders, order); - } + mask |= shmem_get_orders_within_size(inode, within_size_orders, index, 0); if (vm_flags & VM_HUGEPAGE) mask |= READ_ONCE(huge_shmem_orders_madvise); From a91aaf8dd549dcee9caab227ecaa6cbc243bbc5a Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 7 Feb 2025 17:44:22 +0800 Subject: [PATCH 1139/2157] MAINTAINERS: add Baolin as shmem reviewer In the past year, I've primarily focused on shmem and added several features to it, such as support for mTHP, large folio swap-out and swap-in support, mTHP collapse support, skipping swapcache, and tmpfs support for large folios, and so on. Meanwhile I've also been helping with testing and reviewing shmem related patches. So I am willing to continue assisting with testing and reviewing shmem related patches. Let me be Cc'd on patches related to shmem. Link: https://lkml.kernel.org/r/bcefbba9b2b44d4e661e6cc2c4187292a5beb467.1738918357.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c13201979633..fb408698086e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23952,6 +23952,7 @@ F: drivers/hwmon/tmp513.c TMPFS (SHMEM FILESYSTEM) M: Hugh Dickins +R: Baolin Wang L: linux-mm@kvack.org S: Maintained F: include/linux/shmem_fs.h From a24b18aa4fd619cc7961dae3395473c355bb5b6a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 13 Mar 2025 08:52:44 +0000 Subject: [PATCH 1140/2157] samples: rust_misc_device: fix markup in top-level docs The meaning of /// is to document the thing that comes after it, so currently the example is documentation for the `use core::pin::Pin;` statement. To write top-level docs (and have them rendered as such in the html by rustdoc), use //! instead. This does not change the contents of the docs at all. The only change is changing /// to //!. Signed-off-by: Alice Ryhl Fixes: 8d9b095b8f89 ("samples: rust_misc_device: Provide an example C program to exercise functionality") Reviewed-by: Benno Lossin Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/r/20250313-rust_misc_device_tld-v1-1-a519bced9a6d@google.com Signed-off-by: Greg Kroah-Hartman --- samples/rust/rust_misc_device.rs | 181 ++++++++++++++++--------------- 1 file changed, 91 insertions(+), 90 deletions(-) diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs index 40ad7266c225..17ae59979dd0 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -3,97 +3,98 @@ // Copyright (C) 2024 Google LLC. //! Rust misc device sample. +//! +//! Below is an example userspace C program that exercises this sample's functionality. +//! +//! ```c +//! #include +//! #include +//! #include +//! #include +//! #include +//! #include +//! +//! #define RUST_MISC_DEV_FAIL _IO('|', 0) +//! #define RUST_MISC_DEV_HELLO _IO('|', 0x80) +//! #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int) +//! #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int) +//! +//! int main() { +//! int value, new_value; +//! int fd, ret; +//! +//! // Open the device file +//! printf("Opening /dev/rust-misc-device for reading and writing\n"); +//! fd = open("/dev/rust-misc-device", O_RDWR); +//! if (fd < 0) { +//! perror("open"); +//! return errno; +//! } +//! +//! // Make call into driver to say "hello" +//! printf("Calling Hello\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL); +//! if (ret < 0) { +//! perror("ioctl: Failed to call into Hello"); +//! close(fd); +//! return errno; +//! } +//! +//! // Get initial value +//! printf("Fetching initial value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the initial value"); +//! close(fd); +//! return errno; +//! } +//! +//! value++; +//! +//! // Set value to something different +//! printf("Submitting new value (%d)\n", value); +//! ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to submit new value"); +//! close(fd); +//! return errno; +//! } +//! +//! // Ensure new value was applied +//! printf("Fetching new value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the new value"); +//! close(fd); +//! return errno; +//! } +//! +//! if (value != new_value) { +//! printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value); +//! close(fd); +//! return -1; +//! } +//! +//! // Call the unsuccessful ioctl +//! printf("Attempting to call in to an non-existent IOCTL\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL); +//! if (ret < 0) { +//! perror("ioctl: Succeeded to fail - this was expected"); +//! } else { +//! printf("ioctl: Failed to fail\n"); +//! close(fd); +//! return -1; +//! } +//! +//! // Close the device file +//! printf("Closing /dev/rust-misc-device\n"); +//! close(fd); +//! +//! printf("Success\n"); +//! return 0; +//! } +//! ``` -/// Below is an example userspace C program that exercises this sample's functionality. -/// -/// ```c -/// #include -/// #include -/// #include -/// #include -/// #include -/// #include -/// -/// #define RUST_MISC_DEV_FAIL _IO('|', 0) -/// #define RUST_MISC_DEV_HELLO _IO('|', 0x80) -/// #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int) -/// #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int) -/// -/// int main() { -/// int value, new_value; -/// int fd, ret; -/// -/// // Open the device file -/// printf("Opening /dev/rust-misc-device for reading and writing\n"); -/// fd = open("/dev/rust-misc-device", O_RDWR); -/// if (fd < 0) { -/// perror("open"); -/// return errno; -/// } -/// -/// // Make call into driver to say "hello" -/// printf("Calling Hello\n"); -/// ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL); -/// if (ret < 0) { -/// perror("ioctl: Failed to call into Hello"); -/// close(fd); -/// return errno; -/// } -/// -/// // Get initial value -/// printf("Fetching initial value\n"); -/// ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value); -/// if (ret < 0) { -/// perror("ioctl: Failed to fetch the initial value"); -/// close(fd); -/// return errno; -/// } -/// -/// value++; -/// -/// // Set value to something different -/// printf("Submitting new value (%d)\n", value); -/// ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value); -/// if (ret < 0) { -/// perror("ioctl: Failed to submit new value"); -/// close(fd); -/// return errno; -/// } -/// -/// // Ensure new value was applied -/// printf("Fetching new value\n"); -/// ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value); -/// if (ret < 0) { -/// perror("ioctl: Failed to fetch the new value"); -/// close(fd); -/// return errno; -/// } -/// -/// if (value != new_value) { -/// printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value); -/// close(fd); -/// return -1; -/// } -/// -/// // Call the unsuccessful ioctl -/// printf("Attempting to call in to an non-existent IOCTL\n"); -/// ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL); -/// if (ret < 0) { -/// perror("ioctl: Succeeded to fail - this was expected"); -/// } else { -/// printf("ioctl: Failed to fail\n"); -/// close(fd); -/// return -1; -/// } -/// -/// // Close the device file -/// printf("Closing /dev/rust-misc-device\n"); -/// close(fd); -/// -/// printf("Success\n"); -/// return 0; -/// } -/// ``` use core::pin::Pin; use kernel::{ From 75749d2c1d8cef439f8b69fa1f4f36d0fc3193e6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 4 Mar 2025 10:53:21 +0200 Subject: [PATCH 1141/2157] thunderbolt: Scan retimers after device router has been enumerated Thomas reported connection issues on AMD system with Pluggable UD-4VPD dock. After some experiments it looks like the device has some sort of internal timeout that triggers reconnect. This is completely against the USB4 spec, as there is no requirement for the host to enumerate the device right away or even at all. In Linux case the delay is caused by scanning of retimers on the link so we can work this around by doing the scanning after the device router has been enumerated. Reported-by: Thomas Lynema Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219748 Reviewed-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 390abcfe7188..8c527af98927 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1305,11 +1305,15 @@ static void tb_scan_port(struct tb_port *port) goto out_rpm_put; } - tb_retimer_scan(port, true); - sw = tb_switch_alloc(port->sw->tb, &port->sw->dev, tb_downstream_route(port)); if (IS_ERR(sw)) { + /* + * Make the downstream retimers available even if there + * is no router connected. + */ + tb_retimer_scan(port, true); + /* * If there is an error accessing the connected switch * it may be connected to another domain. Also we allow @@ -1359,6 +1363,14 @@ static void tb_scan_port(struct tb_port *port) upstream_port = tb_upstream_port(sw); tb_configure_link(port, upstream_port, sw); + /* + * Scan for downstream retimers. We only scan them after the + * router has been enumerated to avoid issues with certain + * Pluggable devices that expect the host to enumerate them + * within certain timeout. + */ + tb_retimer_scan(port, true); + /* * CL0s and CL1 are enabled and supported together. * Silently ignore CLx enabling in case CLx is not supported. From ad79c278e478ca8c1a3bf8e7a0afba8f862a48a1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 5 Mar 2025 14:56:20 +0200 Subject: [PATCH 1142/2157] thunderbolt: Do not add non-active NVM if NVM upgrade is disabled for retimer This is only used to write a new NVM in order to upgrade the retimer firmware. It does not make sense to expose it if upgrade is disabled. This also makes it consistent with the router NVM upgrade. Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 1f25529fe05d..361fece3d818 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -93,9 +93,11 @@ static int tb_retimer_nvm_add(struct tb_retimer *rt) if (ret) goto err_nvm; - ret = tb_nvm_add_non_active(nvm, nvm_write); - if (ret) - goto err_nvm; + if (!rt->no_nvm_upgrade) { + ret = tb_nvm_add_non_active(nvm, nvm_write); + if (ret) + goto err_nvm; + } rt->nvm = nvm; dev_dbg(&rt->dev, "NVM version %x.%x\n", nvm->major, nvm->minor); From de404fc19628352e507290f0a192ca1f537b4150 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 3 Mar 2025 23:35:56 +0100 Subject: [PATCH 1143/2157] rtc: mpfs: switch to devm_device_init_wakeup Switch to devm_device_init_wakeup to avoid a possible memory leak as wakeup is never disabled. Link: https://lore.kernel.org/r/20250303223600.1135142-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-mpfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mpfs.c b/drivers/rtc/rtc-mpfs.c index 5a38649cbd43..6aa3eae575d2 100644 --- a/drivers/rtc/rtc-mpfs.c +++ b/drivers/rtc/rtc-mpfs.c @@ -266,7 +266,7 @@ static int mpfs_rtc_probe(struct platform_device *pdev) writel(prescaler, rtcdev->base + PRESCALER_REG); dev_info(&pdev->dev, "prescaler set to: %lu\n", prescaler); - device_init_wakeup(&pdev->dev, true); + devm_device_init_wakeup(&pdev->dev); ret = devm_pm_set_wake_irq(&pdev->dev, wakeup_irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); From 252f49cd71ba9d66089b447a18be792ea272a57e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 3 Mar 2025 23:35:57 +0100 Subject: [PATCH 1144/2157] rtc: pm8xxx: fix possible race condition probe must not fail after devm_rtc_register_device is successful because the character device will be seen by userspace and may be opened right away. Call it last to avoid opening the race window. Link: https://lore.kernel.org/r/20250303223600.1135142-2-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 852d80188bd0..3b9709214a08 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -519,11 +519,11 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (rc < 0) return rc; - rc = devm_rtc_register_device(rtc_dd->rtc); + rc = devm_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); if (rc) return rc; - return devm_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); + return devm_rtc_register_device(rtc_dd->rtc); } static struct platform_driver pm8xxx_rtc_driver = { From 2eaa2998f8a2bbc1c120fe0e5d9da373b3084601 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 3 Mar 2025 23:35:58 +0100 Subject: [PATCH 1145/2157] rtc: pm8xxx: switch to devm_device_init_wakeup Switch to devm_device_init_wakeup to avoid a possible memory leak as wakeup is never disabled. Link: https://lore.kernel.org/r/20250303223600.1135142-3-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 3b9709214a08..a88073efffb3 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -503,7 +503,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); - device_init_wakeup(&pdev->dev, true); + devm_device_init_wakeup(&pdev->dev); rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc_dd->rtc)) From b0f9cb4a0706b0356e84d67e48500b77b343debe Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Mar 2025 22:42:41 +0100 Subject: [PATCH 1146/2157] rtc: rv3032: fix EERD location EERD is bit 2 in CTRL1 Link: https://lore.kernel.org/r/20250306214243.1167692-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3032.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 35b2e36b426a..cb01038a2e27 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -69,7 +69,7 @@ #define RV3032_CLKOUT2_FD_MSK GENMASK(6, 5) #define RV3032_CLKOUT2_OS BIT(7) -#define RV3032_CTRL1_EERD BIT(3) +#define RV3032_CTRL1_EERD BIT(2) #define RV3032_CTRL1_WADA BIT(5) #define RV3032_CTRL2_STOP BIT(0) From 9a25a657227279a7a627c317314108123cbc679c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Mar 2025 22:42:42 +0100 Subject: [PATCH 1147/2157] rtc: rv3032: drop WADA WADA doesn't actually exist in CTRL1 of the RV-3032, drop it. Link: https://lore.kernel.org/r/20250306214243.1167692-2-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3032.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index cb01038a2e27..2c6a8918acba 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -70,7 +70,6 @@ #define RV3032_CLKOUT2_OS BIT(7) #define RV3032_CTRL1_EERD BIT(2) -#define RV3032_CTRL1_WADA BIT(5) #define RV3032_CTRL2_STOP BIT(0) #define RV3032_CTRL2_EIE BIT(2) @@ -947,11 +946,6 @@ static int rv3032_probe(struct i2c_client *client) if (!client->irq) clear_bit(RTC_FEATURE_ALARM, rv3032->rtc->features); - ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL1, - RV3032_CTRL1_WADA, RV3032_CTRL1_WADA); - if (ret) - return ret; - rv3032_trickle_charger_setup(&client->dev, rv3032); set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3032->rtc->features); From 931a88914ad6da6731c0510aa11c15d297567616 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 19 Feb 2025 14:41:13 +0100 Subject: [PATCH 1148/2157] dt-bindings: rtc: qcom-pm8xxx: document qcom,no-alarm flag Qualcomm x1e80100 firmware sets the ownership of the RTC alarm to ADSP. Thus writing to RTC alarm registers and receiving alarm interrupts is not possible. Add a qcom,no-alarm flag to support RTC on this platform. Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20241015004945.3676-3-jonathan@marek.ca [ johan: move vendor property; use boolean; reword description ] Acked-by: Rob Herring (Arm) Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250219134118.31017-2-johan+linaro@kernel.org Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml index d274bb7a534b..68ef3208c886 100644 --- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml @@ -50,6 +50,11 @@ properties: items: - const: offset + qcom,no-alarm: + type: boolean + description: + RTC alarm is not owned by the OS + wakeup-source: true required: From bba38b874886b227283d56ecb2f7ebbaa01a781d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 Feb 2025 14:41:14 +0100 Subject: [PATCH 1149/2157] rtc: pm8xxx: add support for uefi offset On many Qualcomm platforms the PMIC RTC control and time registers are read-only so that the RTC time can not be updated. Instead an offset needs be stored in some machine-specific non-volatile memory, which the driver can take into account. Add support for storing a 32-bit offset from the GPS time epoch in a UEFI variable so that the RTC time can be set on such platforms. The UEFI variable is 882f8c2b-9646-435f-8de5-f208ff80c1bd-RTCInfo and holds a 12-byte structure where the first four bytes is a GPS time offset in little-endian byte order. Note that this format is not arbitrary as the variable is shared with the UEFI firmware (and Windows). Tested-by: Jens Glathe Tested-by: Steev Klimaszewski Tested-by: Joel Stanley Tested-by: Sebastian Reichel # Lenovo T14s Gen6 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250219134118.31017-3-johan+linaro@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 156 +++++++++++++++++++++++++++++++++------ include/linux/rtc.h | 1 + 2 files changed, 133 insertions(+), 24 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index a88073efffb3..ef47411c9429 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -5,6 +5,7 @@ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * Copyright (c) 2023, Linaro Limited */ +#include #include #include #include @@ -16,9 +17,10 @@ #include #include #include - #include +#include + /* RTC_CTRL register bit fields */ #define PM8xxx_RTC_ENABLE BIT(7) #define PM8xxx_RTC_ALARM_CLEAR BIT(0) @@ -46,14 +48,21 @@ struct pm8xxx_rtc_regs { unsigned int alarm_en; }; +struct qcom_uefi_rtc_info { + __le32 offset_gps; + u8 reserved[8]; +} __packed; + /** * struct pm8xxx_rtc - RTC driver internal structure * @rtc: RTC device * @regmap: regmap used to access registers * @allow_set_time: whether the time can be set + * @use_uefi: use UEFI variable as fallback for offset * @alarm_irq: alarm irq number * @regs: register description * @dev: device structure + * @rtc_info: qcom uefi rtc-info structure * @nvmem_cell: nvmem cell for offset * @offset: offset from epoch in seconds */ @@ -61,13 +70,101 @@ struct pm8xxx_rtc { struct rtc_device *rtc; struct regmap *regmap; bool allow_set_time; + bool use_uefi; int alarm_irq; const struct pm8xxx_rtc_regs *regs; struct device *dev; + struct qcom_uefi_rtc_info rtc_info; struct nvmem_cell *nvmem_cell; u32 offset; }; +#ifdef CONFIG_EFI + +MODULE_IMPORT_NS("EFIVAR"); + +#define QCOM_UEFI_NAME L"RTCInfo" +#define QCOM_UEFI_GUID EFI_GUID(0x882f8c2b, 0x9646, 0x435f, \ + 0x8d, 0xe5, 0xf2, 0x08, 0xff, 0x80, 0xc1, 0xbd) +#define QCOM_UEFI_ATTRS (EFI_VARIABLE_NON_VOLATILE | \ + EFI_VARIABLE_BOOTSERVICE_ACCESS | \ + EFI_VARIABLE_RUNTIME_ACCESS) + +static int pm8xxx_rtc_read_uefi_offset(struct pm8xxx_rtc *rtc_dd) +{ + struct qcom_uefi_rtc_info *rtc_info = &rtc_dd->rtc_info; + unsigned long size = sizeof(*rtc_info); + struct device *dev = rtc_dd->dev; + efi_status_t status; + u32 offset_gps; + int rc; + + rc = efivar_lock(); + if (rc) + return rc; + + status = efivar_get_variable(QCOM_UEFI_NAME, &QCOM_UEFI_GUID, NULL, + &size, rtc_info); + efivar_unlock(); + + if (status != EFI_SUCCESS) { + dev_dbg(dev, "failed to read UEFI offset: %lu\n", status); + return efi_status_to_err(status); + } + + if (size != sizeof(*rtc_info)) { + dev_dbg(dev, "unexpected UEFI structure size %lu\n", size); + return -EINVAL; + } + + dev_dbg(dev, "uefi_rtc_info = %*ph\n", (int)size, rtc_info); + + /* Convert from GPS to Unix time offset */ + offset_gps = le32_to_cpu(rtc_info->offset_gps); + rtc_dd->offset = offset_gps + (u32)RTC_TIMESTAMP_EPOCH_GPS; + + return 0; +} + +static int pm8xxx_rtc_write_uefi_offset(struct pm8xxx_rtc *rtc_dd, u32 offset) +{ + struct qcom_uefi_rtc_info *rtc_info = &rtc_dd->rtc_info; + unsigned long size = sizeof(*rtc_info); + struct device *dev = rtc_dd->dev; + efi_status_t status; + u32 offset_gps; + + /* Convert from Unix to GPS time offset */ + offset_gps = offset - (u32)RTC_TIMESTAMP_EPOCH_GPS; + + rtc_info->offset_gps = cpu_to_le32(offset_gps); + + dev_dbg(dev, "efi_rtc_info = %*ph\n", (int)size, rtc_info); + + status = efivar_set_variable(QCOM_UEFI_NAME, &QCOM_UEFI_GUID, + QCOM_UEFI_ATTRS, size, rtc_info); + if (status != EFI_SUCCESS) { + dev_dbg(dev, "failed to write UEFI offset: %lx\n", status); + return efi_status_to_err(status); + } + + return 0; +} + +#else /* CONFIG_EFI */ + +static int pm8xxx_rtc_read_uefi_offset(struct pm8xxx_rtc *rtc_dd) +{ + return -ENODEV; +} + +static int pm8xxx_rtc_write_uefi_offset(struct pm8xxx_rtc *rtc_dd, u32 offset) +{ + return -ENODEV; +} + +#endif /* CONFIG_EFI */ + static int pm8xxx_rtc_read_nvmem_offset(struct pm8xxx_rtc *rtc_dd) { size_t len; @@ -110,14 +207,6 @@ static int pm8xxx_rtc_write_nvmem_offset(struct pm8xxx_rtc *rtc_dd, u32 offset) return 0; } -static int pm8xxx_rtc_read_offset(struct pm8xxx_rtc *rtc_dd) -{ - if (!rtc_dd->nvmem_cell) - return 0; - - return pm8xxx_rtc_read_nvmem_offset(rtc_dd); -} - static int pm8xxx_rtc_read_raw(struct pm8xxx_rtc *rtc_dd, u32 *secs) { const struct pm8xxx_rtc_regs *regs = rtc_dd->regs; @@ -155,7 +244,7 @@ static int pm8xxx_rtc_update_offset(struct pm8xxx_rtc *rtc_dd, u32 secs) u32 offset; int rc; - if (!rtc_dd->nvmem_cell) + if (!rtc_dd->nvmem_cell && !rtc_dd->use_uefi) return -ENODEV; rc = pm8xxx_rtc_read_raw(rtc_dd, &raw_secs); @@ -167,7 +256,11 @@ static int pm8xxx_rtc_update_offset(struct pm8xxx_rtc *rtc_dd, u32 secs) if (offset == rtc_dd->offset) return 0; - rc = pm8xxx_rtc_write_nvmem_offset(rtc_dd, offset); + if (rtc_dd->nvmem_cell) + rc = pm8xxx_rtc_write_nvmem_offset(rtc_dd, offset); + else + rc = pm8xxx_rtc_write_uefi_offset(rtc_dd, offset); + if (rc) return rc; @@ -455,6 +548,30 @@ static const struct of_device_id pm8xxx_id_table[] = { }; MODULE_DEVICE_TABLE(of, pm8xxx_id_table); +static int pm8xxx_rtc_probe_offset(struct pm8xxx_rtc *rtc_dd) +{ + int rc; + + rtc_dd->nvmem_cell = devm_nvmem_cell_get(rtc_dd->dev, "offset"); + if (IS_ERR(rtc_dd->nvmem_cell)) { + rc = PTR_ERR(rtc_dd->nvmem_cell); + if (rc != -ENOENT) + return rc; + rtc_dd->nvmem_cell = NULL; + } else { + return pm8xxx_rtc_read_nvmem_offset(rtc_dd); + } + + /* Use UEFI storage as fallback if available */ + if (efivar_is_available()) { + rc = pm8xxx_rtc_read_uefi_offset(rtc_dd); + if (rc == 0) + rtc_dd->use_uefi = true; + } + + return 0; +} + static int pm8xxx_rtc_probe(struct platform_device *pdev) { const struct of_device_id *match; @@ -469,6 +586,9 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (rtc_dd == NULL) return -ENOMEM; + rtc_dd->regs = match->data; + rtc_dd->dev = &pdev->dev; + rtc_dd->regmap = dev_get_regmap(pdev->dev.parent, NULL); if (!rtc_dd->regmap) return -ENXIO; @@ -479,20 +599,8 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) rtc_dd->allow_set_time = of_property_read_bool(pdev->dev.of_node, "allow-set-time"); - - rtc_dd->nvmem_cell = devm_nvmem_cell_get(&pdev->dev, "offset"); - if (IS_ERR(rtc_dd->nvmem_cell)) { - rc = PTR_ERR(rtc_dd->nvmem_cell); - if (rc != -ENOENT) - return rc; - rtc_dd->nvmem_cell = NULL; - } - - rtc_dd->regs = match->data; - rtc_dd->dev = &pdev->dev; - if (!rtc_dd->allow_set_time) { - rc = pm8xxx_rtc_read_offset(rtc_dd); + rc = pm8xxx_rtc_probe_offset(rtc_dd); if (rc) return rc; } diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 3f4d315aaec9..95da051fb155 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -170,6 +170,7 @@ struct rtc_device { /* useful timestamps */ #define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ +#define RTC_TIMESTAMP_EPOCH_GPS 315964800LL /* 1980-01-06 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2079 3471292799LL /* 2079-12-31 23:59:59 */ From e853658de5ef87a6476987a30f38ce661270cb67 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 Feb 2025 14:41:15 +0100 Subject: [PATCH 1150/2157] rtc: pm8xxx: mitigate flash wear On many Qualcomm platforms the PMIC RTC control and time registers are read-only so that the RTC time can not be updated. Instead an offset needs be stored in some machine-specific non-volatile memory, which the driver can take into account. On machines like the Lenovo ThinkPad X13s the PMIC RTC drifts about one second every 3.5 hours, something which leads to repeated updates of the offset when NTP synchronisation is enabled. Reduce wear of the underlying flash storage (used for UEFI variables) by deferring writes until shutdown in case they appear to be due to clock drift. As an example, deferring writes when the new offset differs up to 30 s from the previous one reduces the number of writes on the X13s during a ten day session with the machine not suspending for more than four days in a row from up to 68 writes (every 3.5 h) to at most two (boot and shutdown). Tested-by: Jens Glathe Tested-by: Steev Klimaszewski Tested-by: Joel Stanley Tested-by: Sebastian Reichel # Lenovo T14s Gen6 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250219134118.31017-4-johan+linaro@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index ef47411c9429..a547729d2877 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -65,6 +65,7 @@ struct qcom_uefi_rtc_info { * @rtc_info: qcom uefi rtc-info structure * @nvmem_cell: nvmem cell for offset * @offset: offset from epoch in seconds + * @offset_dirty: offset needs to be stored on shutdown */ struct pm8xxx_rtc { struct rtc_device *rtc; @@ -77,6 +78,7 @@ struct pm8xxx_rtc { struct qcom_uefi_rtc_info rtc_info; struct nvmem_cell *nvmem_cell; u32 offset; + bool offset_dirty; }; #ifdef CONFIG_EFI @@ -256,6 +258,15 @@ static int pm8xxx_rtc_update_offset(struct pm8xxx_rtc *rtc_dd, u32 secs) if (offset == rtc_dd->offset) return 0; + /* + * Reduce flash wear by deferring updates due to clock drift until + * shutdown. + */ + if (abs_diff(offset, rtc_dd->offset) < 30) { + rtc_dd->offset_dirty = true; + goto out; + } + if (rtc_dd->nvmem_cell) rc = pm8xxx_rtc_write_nvmem_offset(rtc_dd, offset); else @@ -264,6 +275,8 @@ static int pm8xxx_rtc_update_offset(struct pm8xxx_rtc *rtc_dd, u32 secs) if (rc) return rc; + rtc_dd->offset_dirty = false; +out: rtc_dd->offset = offset; return 0; @@ -634,8 +647,21 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) return devm_rtc_register_device(rtc_dd->rtc); } +static void pm8xxx_shutdown(struct platform_device *pdev) +{ + struct pm8xxx_rtc *rtc_dd = platform_get_drvdata(pdev); + + if (rtc_dd->offset_dirty) { + if (rtc_dd->nvmem_cell) + pm8xxx_rtc_write_nvmem_offset(rtc_dd, rtc_dd->offset); + else + pm8xxx_rtc_write_uefi_offset(rtc_dd, rtc_dd->offset); + } +} + static struct platform_driver pm8xxx_rtc_driver = { .probe = pm8xxx_rtc_probe, + .shutdown = pm8xxx_shutdown, .driver = { .name = "rtc-pm8xxx", .of_match_table = pm8xxx_id_table, From 015b70bd6c759af9e4fd5824a4ffe145ccf6a615 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 19 Feb 2025 14:41:16 +0100 Subject: [PATCH 1151/2157] rtc: pm8xxx: implement qcom,no-alarm flag for non-HLOS owned alarm Qualcomm x1e80100 firmware sets the ownership of the RTC alarm to ADSP. Thus writing to RTC alarm registers and receiving alarm interrupts is not possible. Add a qcom,no-alarm flag to support RTC on this platform. Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20241015004945.3676-2-jonathan@marek.ca [ johan: drop no_alarm flag and restructure probe() ] Tested-by: Jens Glathe Tested-by: Steev Klimaszewski Tested-by: Joel Stanley Tested-by: Sebastian Reichel # Lenovo T14s Gen6 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250219134118.31017-5-johan+linaro@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index a547729d2877..3c1dddcc81df 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -606,9 +606,11 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (!rtc_dd->regmap) return -ENXIO; - rtc_dd->alarm_irq = platform_get_irq(pdev, 0); - if (rtc_dd->alarm_irq < 0) - return -ENXIO; + if (!of_property_read_bool(pdev->dev.of_node, "qcom,no-alarm")) { + rtc_dd->alarm_irq = platform_get_irq(pdev, 0); + if (rtc_dd->alarm_irq < 0) + return -ENXIO; + } rtc_dd->allow_set_time = of_property_read_bool(pdev->dev.of_node, "allow-set-time"); @@ -624,8 +626,6 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); - devm_device_init_wakeup(&pdev->dev); - rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc_dd->rtc)) return PTR_ERR(rtc_dd->rtc); @@ -633,16 +633,22 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) rtc_dd->rtc->ops = &pm8xxx_rtc_ops; rtc_dd->rtc->range_max = U32_MAX; - rc = devm_request_any_context_irq(&pdev->dev, rtc_dd->alarm_irq, - pm8xxx_alarm_trigger, - IRQF_TRIGGER_RISING, - "pm8xxx_rtc_alarm", rtc_dd); - if (rc < 0) - return rc; + if (rtc_dd->alarm_irq) { + rc = devm_request_any_context_irq(&pdev->dev, rtc_dd->alarm_irq, + pm8xxx_alarm_trigger, + IRQF_TRIGGER_RISING, + "pm8xxx_rtc_alarm", rtc_dd); + if (rc < 0) + return rc; - rc = devm_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); - if (rc) - return rc; + rc = devm_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq); + if (rc) + return rc; + + devm_device_init_wakeup(&pdev->dev); + } else { + clear_bit(RTC_FEATURE_ALARM, rtc_dd->rtc->features); + } return devm_rtc_register_device(rtc_dd->rtc); } From dbf00d7d89125802113a9d8ea4c77ab9f4de8866 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:19 -0700 Subject: [PATCH 1152/2157] iommufd/fault: Move two fault functions out of the header There is no need to keep them in the header. The vEVENTQ version of these two functions will turn out to be a different implementation and will not share with this fault version. Thus, move them out of the header. Link: https://patch.msgid.link/r/7eebe32f3d354799f5e28128c693c3c284740b21.1741719725.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 25 +++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 25 ------------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index c48d72c9668c..29e3a97c73c6 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -138,6 +138,31 @@ static void iommufd_compose_fault_message(struct iommu_fault *fault, hwpt_fault->cookie = cookie; } +/* Fetch the first node out of the fault->deliver list */ +static struct iopf_group * +iommufd_fault_deliver_fetch(struct iommufd_fault *fault) +{ + struct list_head *list = &fault->deliver; + struct iopf_group *group = NULL; + + spin_lock(&fault->lock); + if (!list_empty(list)) { + group = list_first_entry(list, struct iopf_group, node); + list_del(&group->node); + } + spin_unlock(&fault->lock); + return group; +} + +/* Restore a node back to the head of the fault->deliver list */ +static void iommufd_fault_deliver_restore(struct iommufd_fault *fault, + struct iopf_group *group) +{ + spin_lock(&fault->lock); + list_add(&group->node, &fault->deliver); + spin_unlock(&fault->lock); +} + static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 246297452a44..1c58f5fe17b4 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -472,31 +472,6 @@ struct iommufd_fault { struct wait_queue_head wait_queue; }; -/* Fetch the first node out of the fault->deliver list */ -static inline struct iopf_group * -iommufd_fault_deliver_fetch(struct iommufd_fault *fault) -{ - struct list_head *list = &fault->deliver; - struct iopf_group *group = NULL; - - spin_lock(&fault->lock); - if (!list_empty(list)) { - group = list_first_entry(list, struct iopf_group, node); - list_del(&group->node); - } - spin_unlock(&fault->lock); - return group; -} - -/* Restore a node back to the head of the fault->deliver list */ -static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault, - struct iopf_group *group) -{ - spin_lock(&fault->lock); - list_add(&group->node, &fault->deliver); - spin_unlock(&fault->lock); -} - struct iommufd_attach_handle { struct iommu_attach_handle handle; struct iommufd_device *idev; From 927dabc9aa4dbebf92b34da9b7acd7d8d5c6331b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:20 -0700 Subject: [PATCH 1153/2157] iommufd/fault: Add an iommufd_fault_init() helper The infrastructure of a fault object will be shared with a new vEVENTQ object in a following change. Add an iommufd_fault_init helper and an INIT_EVENTQ_FOPS marco for a vEVENTQ allocator to use too. Reorder the iommufd_ctx_get and refcount_inc, to keep them symmetrical with the iommufd_fault_fops_release(). Since the new vEVENTQ doesn't need "response" and its "mutex", so keep the xa_init_flags and mutex_init in their original locations. Link: https://patch.msgid.link/r/a9522c521909baeb1bd843950b2490478f3d06e0.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 70 +++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 29e3a97c73c6..5d8de98732b6 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -280,20 +280,49 @@ static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) return 0; } -static const struct file_operations iommufd_fault_fops = { - .owner = THIS_MODULE, - .open = nonseekable_open, - .read = iommufd_fault_fops_read, - .write = iommufd_fault_fops_write, - .poll = iommufd_fault_fops_poll, - .release = iommufd_fault_fops_release, -}; +#define INIT_FAULT_FOPS(read_op, write_op) \ + ((const struct file_operations){ \ + .owner = THIS_MODULE, \ + .open = nonseekable_open, \ + .read = read_op, \ + .write = write_op, \ + .poll = iommufd_fault_fops_poll, \ + .release = iommufd_fault_fops_release, \ + }) + +static int iommufd_fault_init(struct iommufd_fault *fault, char *name, + struct iommufd_ctx *ictx, + const struct file_operations *fops) +{ + struct file *filep; + int fdno; + + spin_lock_init(&fault->lock); + INIT_LIST_HEAD(&fault->deliver); + init_waitqueue_head(&fault->wait_queue); + + filep = anon_inode_getfile(name, fops, fault, O_RDWR); + if (IS_ERR(filep)) + return PTR_ERR(filep); + + fault->ictx = ictx; + iommufd_ctx_get(fault->ictx); + fault->filep = filep; + refcount_inc(&fault->obj.users); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) + fput(filep); + return fdno; +} + +static const struct file_operations iommufd_fault_fops = + INIT_FAULT_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) { struct iommu_fault_alloc *cmd = ucmd->cmd; struct iommufd_fault *fault; - struct file *filep; int fdno; int rc; @@ -304,28 +333,14 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) if (IS_ERR(fault)) return PTR_ERR(fault); - fault->ictx = ucmd->ictx; - INIT_LIST_HEAD(&fault->deliver); xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); mutex_init(&fault->mutex); - spin_lock_init(&fault->lock); - init_waitqueue_head(&fault->wait_queue); - filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, - fault, O_RDWR); - if (IS_ERR(filep)) { - rc = PTR_ERR(filep); - goto out_abort; - } - - refcount_inc(&fault->obj.users); - iommufd_ctx_get(fault->ictx); - fault->filep = filep; - - fdno = get_unused_fd_flags(O_CLOEXEC); + fdno = iommufd_fault_init(fault, "[iommufd-pgfault]", ucmd->ictx, + &iommufd_fault_fops); if (fdno < 0) { rc = fdno; - goto out_fput; + goto out_abort; } cmd->out_fault_id = fault->obj.id; @@ -341,8 +356,7 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) return 0; out_put_fdno: put_unused_fd(fdno); -out_fput: - fput(filep); + fput(fault->filep); out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); From 5426a78bebefbb32643ee85320e977f3971c5521 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:21 -0700 Subject: [PATCH 1154/2157] iommufd: Abstract an iommufd_eventq from iommufd_fault The fault object was designed exclusively for hwpt's IO page faults (PRI). But its queue implementation can be reused for other purposes too, such as hardware IRQ and event injections to user space. Meanwhile, a fault object holds a list of faults. So it's more accurate to call it a "fault queue". Combining the reusing idea above, abstract a new iommufd_eventq as a common structure embedded into struct iommufd_fault, similar to hwpt_paging holding a common hwpt. Add a common iommufd_eventq_ops and iommufd_eventq_init to prepare for an IOMMUFD_OBJ_VEVENTQ (vIOMMU Event Queue). Link: https://patch.msgid.link/r/e7336a857954209aabb466e0694aab323da95d90.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 111 +++++++++++++----------- drivers/iommu/iommufd/hw_pagetable.c | 6 +- drivers/iommu/iommufd/iommufd_private.h | 28 ++++-- 3 files changed, 82 insertions(+), 63 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 5d8de98732b6..f8e60e5879d1 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -17,6 +17,8 @@ #include "../iommu-priv.h" #include "iommufd_private.h" +/* IOMMUFD_OBJ_FAULT Functions */ + int iommufd_fault_iopf_enable(struct iommufd_device *idev) { struct device *dev = idev->dev; @@ -73,13 +75,13 @@ void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, INIT_LIST_HEAD(&free_list); mutex_lock(&fault->mutex); - spin_lock(&fault->lock); - list_for_each_entry_safe(group, next, &fault->deliver, node) { + spin_lock(&fault->common.lock); + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { if (group->attach_handle != &handle->handle) continue; list_move(&group->node, &free_list); } - spin_unlock(&fault->lock); + spin_unlock(&fault->common.lock); list_for_each_entry_safe(group, next, &free_list, node) { list_del(&group->node); @@ -99,7 +101,9 @@ void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, void iommufd_fault_destroy(struct iommufd_object *obj) { - struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_fault *fault = eventq_to_fault(eventq); struct iopf_group *group, *next; unsigned long index; @@ -109,7 +113,7 @@ void iommufd_fault_destroy(struct iommufd_object *obj) * accessing this pointer. Therefore, acquiring the mutex here * is unnecessary. */ - list_for_each_entry_safe(group, next, &fault->deliver, node) { + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { list_del(&group->node); iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); iopf_free_group(group); @@ -142,15 +146,15 @@ static void iommufd_compose_fault_message(struct iommu_fault *fault, static struct iopf_group * iommufd_fault_deliver_fetch(struct iommufd_fault *fault) { - struct list_head *list = &fault->deliver; + struct list_head *list = &fault->common.deliver; struct iopf_group *group = NULL; - spin_lock(&fault->lock); + spin_lock(&fault->common.lock); if (!list_empty(list)) { group = list_first_entry(list, struct iopf_group, node); list_del(&group->node); } - spin_unlock(&fault->lock); + spin_unlock(&fault->common.lock); return group; } @@ -158,16 +162,17 @@ iommufd_fault_deliver_fetch(struct iommufd_fault *fault) static void iommufd_fault_deliver_restore(struct iommufd_fault *fault, struct iopf_group *group) { - spin_lock(&fault->lock); - list_add(&group->node, &fault->deliver); - spin_unlock(&fault->lock); + spin_lock(&fault->common.lock); + list_add(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); } static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { size_t fault_size = sizeof(struct iommu_hwpt_pgfault); - struct iommufd_fault *fault = filep->private_data; + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); struct iommu_hwpt_pgfault data = {}; struct iommufd_device *idev; struct iopf_group *group; @@ -216,7 +221,8 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b size_t count, loff_t *ppos) { size_t response_size = sizeof(struct iommu_hwpt_page_response); - struct iommufd_fault *fault = filep->private_data; + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); struct iommu_hwpt_page_response response; struct iopf_group *group; size_t done = 0; @@ -256,59 +262,61 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b return done == 0 ? rc : done; } -static __poll_t iommufd_fault_fops_poll(struct file *filep, - struct poll_table_struct *wait) +/* Common Event Queue Functions */ + +static __poll_t iommufd_eventq_fops_poll(struct file *filep, + struct poll_table_struct *wait) { - struct iommufd_fault *fault = filep->private_data; + struct iommufd_eventq *eventq = filep->private_data; __poll_t pollflags = EPOLLOUT; - poll_wait(filep, &fault->wait_queue, wait); - spin_lock(&fault->lock); - if (!list_empty(&fault->deliver)) + poll_wait(filep, &eventq->wait_queue, wait); + spin_lock(&eventq->lock); + if (!list_empty(&eventq->deliver)) pollflags |= EPOLLIN | EPOLLRDNORM; - spin_unlock(&fault->lock); + spin_unlock(&eventq->lock); return pollflags; } -static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) +static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep) { - struct iommufd_fault *fault = filep->private_data; + struct iommufd_eventq *eventq = filep->private_data; - refcount_dec(&fault->obj.users); - iommufd_ctx_put(fault->ictx); + refcount_dec(&eventq->obj.users); + iommufd_ctx_put(eventq->ictx); return 0; } -#define INIT_FAULT_FOPS(read_op, write_op) \ +#define INIT_EVENTQ_FOPS(read_op, write_op) \ ((const struct file_operations){ \ .owner = THIS_MODULE, \ .open = nonseekable_open, \ .read = read_op, \ .write = write_op, \ - .poll = iommufd_fault_fops_poll, \ - .release = iommufd_fault_fops_release, \ + .poll = iommufd_eventq_fops_poll, \ + .release = iommufd_eventq_fops_release, \ }) -static int iommufd_fault_init(struct iommufd_fault *fault, char *name, - struct iommufd_ctx *ictx, - const struct file_operations *fops) +static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, + struct iommufd_ctx *ictx, + const struct file_operations *fops) { struct file *filep; int fdno; - spin_lock_init(&fault->lock); - INIT_LIST_HEAD(&fault->deliver); - init_waitqueue_head(&fault->wait_queue); + spin_lock_init(&eventq->lock); + INIT_LIST_HEAD(&eventq->deliver); + init_waitqueue_head(&eventq->wait_queue); - filep = anon_inode_getfile(name, fops, fault, O_RDWR); + filep = anon_inode_getfile(name, fops, eventq, O_RDWR); if (IS_ERR(filep)) return PTR_ERR(filep); - fault->ictx = ictx; - iommufd_ctx_get(fault->ictx); - fault->filep = filep; - refcount_inc(&fault->obj.users); + eventq->ictx = ictx; + iommufd_ctx_get(eventq->ictx); + eventq->filep = filep; + refcount_inc(&eventq->obj.users); fdno = get_unused_fd_flags(O_CLOEXEC); if (fdno < 0) @@ -317,7 +325,7 @@ static int iommufd_fault_init(struct iommufd_fault *fault, char *name, } static const struct file_operations iommufd_fault_fops = - INIT_FAULT_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); + INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) { @@ -329,36 +337,37 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) if (cmd->flags) return -EOPNOTSUPP; - fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); + fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT, + common.obj); if (IS_ERR(fault)) return PTR_ERR(fault); xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); mutex_init(&fault->mutex); - fdno = iommufd_fault_init(fault, "[iommufd-pgfault]", ucmd->ictx, - &iommufd_fault_fops); + fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]", + ucmd->ictx, &iommufd_fault_fops); if (fdno < 0) { rc = fdno; goto out_abort; } - cmd->out_fault_id = fault->obj.id; + cmd->out_fault_id = fault->common.obj.id; cmd->out_fault_fd = fdno; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) goto out_put_fdno; - iommufd_object_finalize(ucmd->ictx, &fault->obj); + iommufd_object_finalize(ucmd->ictx, &fault->common.obj); - fd_install(fdno, fault->filep); + fd_install(fdno, fault->common.filep); return 0; out_put_fdno: put_unused_fd(fdno); - fput(fault->filep); + fput(fault->common.filep); out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); + iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj); return rc; } @@ -371,11 +380,11 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) hwpt = group->attach_handle->domain->iommufd_hwpt; fault = hwpt->fault; - spin_lock(&fault->lock); - list_add_tail(&group->node, &fault->deliver); - spin_unlock(&fault->lock); + spin_lock(&fault->common.lock); + list_add_tail(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); - wake_up_interruptible(&fault->wait_queue); + wake_up_interruptible(&fault->common.wait_queue); return 0; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 1d4cfe3677dc..9a89f3a28dc5 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt) iommu_domain_free(hwpt->domain); if (hwpt->fault) - refcount_dec(&hwpt->fault->obj.users); + refcount_dec(&hwpt->fault->common.obj.users); } void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) @@ -415,8 +415,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) } hwpt->fault = fault; hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; - refcount_inc(&fault->obj.users); - iommufd_put_object(ucmd->ictx, &fault->obj); + refcount_inc(&fault->common.obj.users); + iommufd_put_object(ucmd->ictx, &fault->common.obj); } cmd->out_hwpt_id = hwpt->obj.id; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 1c58f5fe17b4..44fb30af10b0 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -454,20 +454,13 @@ void iopt_remove_access(struct io_pagetable *iopt, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); -/* - * An iommufd_fault object represents an interface to deliver I/O page faults - * to the user space. These objects are created/destroyed by the user space and - * associated with hardware page table objects during page-table allocation. - */ -struct iommufd_fault { +struct iommufd_eventq { struct iommufd_object obj; struct iommufd_ctx *ictx; struct file *filep; spinlock_t lock; /* protects the deliver list */ struct list_head deliver; - struct mutex mutex; /* serializes response flows */ - struct xarray response; struct wait_queue_head wait_queue; }; @@ -480,12 +473,29 @@ struct iommufd_attach_handle { /* Convert an iommu attach handle to iommufd handle. */ #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_eventq common; + struct mutex mutex; /* serializes response flows */ + struct xarray response; +}; + +static inline struct iommufd_fault * +eventq_to_fault(struct iommufd_eventq *eventq) +{ + return container_of(eventq, struct iommufd_fault, common); +} + static inline struct iommufd_fault * iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_FAULT), - struct iommufd_fault, obj); + struct iommufd_fault, common.obj); } int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); From 0507f337fc0c3a10f802b42834e6532edcf605be Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:22 -0700 Subject: [PATCH 1155/2157] iommufd: Rename fault.c to eventq.c Rename the file, aligning with the new eventq object. Link: https://patch.msgid.link/r/d726397e2d08028e25a1cb6eb9febefac35a32ba.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 2 +- drivers/iommu/iommufd/{fault.c => eventq.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/iommu/iommufd/{fault.c => eventq.c} (100%) diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index cb784da6cddc..71d692c9a8f4 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ - fault.o \ + eventq.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/eventq.c similarity index 100% rename from drivers/iommu/iommufd/fault.c rename to drivers/iommu/iommufd/eventq.c From d5834614a4d97b2c88be83f736602b7c3dbc3a4d Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:22 +0000 Subject: [PATCH 1156/2157] ata: libata-zpodd: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-8-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Damien Le Moal Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/ata/libata-zpodd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 4b83b517caec..799531218ea2 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -160,8 +160,7 @@ void zpodd_on_suspend(struct ata_device *dev) return; } - expires = zpodd->last_ready + - msecs_to_jiffies(zpodd_poweroff_delay * 1000); + expires = zpodd->last_ready + secs_to_jiffies(zpodd_poweroff_delay); if (time_before(jiffies, expires)) return; From a877cd2a6487f9f3ba8aa701d4d780efdc034596 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:23 +0000 Subject: [PATCH 1157/2157] xfs: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-9-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_sysfs.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7b6c026d01a1..7a1feb8dc21f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -230,7 +230,7 @@ xfs_blockgc_queue( rcu_read_lock(); if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) queue_delayed_work(mp->m_blockgc_wq, &pag->pag_blockgc_work, - msecs_to_jiffies(xfs_blockgc_secs * 1000)); + secs_to_jiffies(xfs_blockgc_secs)); rcu_read_unlock(); } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 60cb5318fdae..c9ca8cd8a2f2 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -568,8 +568,8 @@ retry_timeout_seconds_store( if (val == -1) cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; else { - cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC); - ASSERT(msecs_to_jiffies(val * MSEC_PER_SEC) < LONG_MAX); + cfg->retry_timeout = secs_to_jiffies(val); + ASSERT(secs_to_jiffies(val) < LONG_MAX); } return count; } @@ -686,8 +686,8 @@ xfs_error_sysfs_init_class( if (init[i].retry_timeout == XFS_ERR_RETRY_FOREVER) cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; else - cfg->retry_timeout = msecs_to_jiffies( - init[i].retry_timeout * MSEC_PER_SEC); + cfg->retry_timeout = + secs_to_jiffies(init[i].retry_timeout); } return 0; From 2c54b24fefa8481e1c2e2330db636e5781acc229 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:24 +0000 Subject: [PATCH 1158/2157] power: supply: da9030: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-10-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/power/supply/da9030_battery.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/power/supply/da9030_battery.c b/drivers/power/supply/da9030_battery.c index ac2e319e9517..d25279c26030 100644 --- a/drivers/power/supply/da9030_battery.c +++ b/drivers/power/supply/da9030_battery.c @@ -502,8 +502,7 @@ static int da9030_battery_probe(struct platform_device *pdev) /* 10 seconds between monitor runs unless platform defines other interval */ - charger->interval = msecs_to_jiffies( - (pdata->batmon_interval ? : 10) * 1000); + charger->interval = secs_to_jiffies(pdata->batmon_interval ? : 10); charger->charge_milliamp = pdata->charge_milliamp; charger->charge_millivolt = pdata->charge_millivolt; From 6cf828d2fbebd41d26f8233624bab88e5afc323b Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:25 +0000 Subject: [PATCH 1159/2157] nvme: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-11-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Keith Busch Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/nvme/host/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f028913e2e62..24c3e1765d49 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4466,11 +4466,9 @@ static void nvme_fw_act_work(struct work_struct *work) nvme_auth_stop(ctrl); if (ctrl->mtfa) - fw_act_timeout = jiffies + - msecs_to_jiffies(ctrl->mtfa * 100); + fw_act_timeout = jiffies + msecs_to_jiffies(ctrl->mtfa * 100); else - fw_act_timeout = jiffies + - msecs_to_jiffies(admin_timeout * 1000); + fw_act_timeout = jiffies + secs_to_jiffies(admin_timeout); nvme_quiesce_io_queues(ctrl); while (nvme_ctrl_pp_status(ctrl)) { From eee6af2319f81497bf02389a2036c14e0cf01e38 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:26 +0000 Subject: [PATCH 1160/2157] spi: spi-fsl-lpspi: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-12-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Mark Brown Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/spi/spi-fsl-lpspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 40f5c8fdba76..5e3818445234 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -572,7 +572,7 @@ static int fsl_lpspi_calculate_timeout(struct fsl_lpspi_data *fsl_lpspi, timeout += 1; /* Double calculated timeout */ - return msecs_to_jiffies(2 * timeout * MSEC_PER_SEC); + return secs_to_jiffies(2 * timeout); } static int fsl_lpspi_dma_transfer(struct spi_controller *controller, From 7c9a8c57143c2163407967790fd46ab9545f51d4 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:27 +0000 Subject: [PATCH 1161/2157] spi: spi-imx: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-13-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Acked-by: Mark Brown Acked-by: Marc Kleine-Budde Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index eeb7d082c247..832d6e9009eb 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1449,7 +1449,7 @@ static int spi_imx_calculate_timeout(struct spi_imx_data *spi_imx, int size) timeout += 1; /* Double calculated timeout */ - return msecs_to_jiffies(2 * timeout * MSEC_PER_SEC); + return secs_to_jiffies(2 * timeout); } static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, From 8ba1b428cf1a0905c260b5da4e44502a8457edae Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:28 +0000 Subject: [PATCH 1162/2157] platform/x86/amd/pmf: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-14-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/platform/x86/amd/pmf/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/acpi.c b/drivers/platform/x86/amd/pmf/acpi.c index dd5780a1d06e..f75f7ecd8cd9 100644 --- a/drivers/platform/x86/amd/pmf/acpi.c +++ b/drivers/platform/x86/amd/pmf/acpi.c @@ -220,7 +220,7 @@ static void apmf_sbios_heartbeat_notify(struct work_struct *work) if (!info) return; - schedule_delayed_work(&dev->heart_beat, msecs_to_jiffies(dev->hb_interval * 1000)); + schedule_delayed_work(&dev->heart_beat, secs_to_jiffies(dev->hb_interval)); kfree(info); } From 66644d80a4f9835f3dae0bfee6c6e529f6b082fe Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:29 +0000 Subject: [PATCH 1163/2157] platform/x86: thinkpad_acpi: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-15-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/platform/x86/thinkpad_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1cc91173e012..76b1f7a00be3 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8513,7 +8513,7 @@ static void fan_watchdog_reset(void) if (fan_watchdog_maxinterval > 0 && tpacpi_lifecycle != TPACPI_LIFE_EXITING) mod_delayed_work(tpacpi_wq, &fan_watchdog_task, - msecs_to_jiffies(fan_watchdog_maxinterval * 1000)); + secs_to_jiffies(fan_watchdog_maxinterval)); else cancel_delayed_work(&fan_watchdog_task); } From f873136416293b786e7611d36226c9f5a8f6d20b Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 25 Feb 2025 20:17:30 +0000 Subject: [PATCH 1164/2157] RDMA/bnxt_re: convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies() to avoid the multiplication This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @depends on patch@ expression E; @@ -msecs_to_jiffies +secs_to_jiffies (E - * \( 1000 \| MSEC_PER_SEC \) ) Link: https://lkml.kernel.org/r/20250225-converge-secs-to-jiffies-part-two-v3-16-a43967e36c88@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Maiolino Cc: Carlos Maiolino Cc: Chris Mason Cc: Christoph Hellwig Cc: Damien Le Maol Cc: "Darrick J. Wong" Cc: David Sterba Cc: Dick Kennedy Cc: Dongsheng Yang Cc: Fabio Estevam Cc: Frank Li Cc: Hans de Goede Cc: Henrique de Moraes Holschuh Cc: Ilpo Jarvinen Cc: Ilya Dryomov Cc: James Bottomley Cc: James Smart Cc: Jaroslav Kysela Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Josef Bacik Cc: Julia Lawall Cc: Kalesh Anakkur Purayil Cc: Keith Busch Cc: Leon Romanovsky Cc: Marc Kleine-Budde Cc: Mark Brown Cc: "Martin K. Petersen" Cc: Nicolas Palix Cc: Niklas Cassel Cc: Oded Gabbay Cc: Sagi Grimberg Cc: Sascha Hauer Cc: Sebastian Reichel Cc: Selvin Thyparampil Xavier Cc: Shawn Guo Cc: Shyam-sundar S-k Cc: Takashi Iwai Cc: Takashi Iwai Cc: Xiubo Li Signed-off-by: Andrew Morton --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 17e62f22683b..b1a18c9cb7f6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -160,7 +160,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) wait_event_timeout(cmdq->waitq, !crsqe->is_in_used || test_bit(ERR_DEVICE_DETACHED, &cmdq->flags), - msecs_to_jiffies(rcfw->max_timeout * 1000)); + secs_to_jiffies(rcfw->max_timeout)); if (!crsqe->is_in_used) return 0; From e0349c46cb4fbbb507fa34476bd70f9c82bad359 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 21 Feb 2025 21:40:34 +0100 Subject: [PATCH 1165/2157] scripts/gdb/linux/symbols.py: address changes to module_sect_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When loading symbols from kernel modules we used to iterate from 0 to module_sect_attrs::nsections, in order to retrieve their name and address. However module_sect_attrs::nsections has been removed from the struct by a previous commit. Re-arrange the iteration by accessing all items in module_sect_attrs::grp::bin_attrs[] until NULL is found (it's a NULL terminated array). At the same time the symbol address cannot be extracted from module_sect_attrs::attrs[]::address anymore because it has also been deleted. Fetch it from module_sect_attrs::grp::bin_attrs[]::private as described in 4b2c11e4aaf7. Link: https://lkml.kernel.org/r/20250221204034.4430-1-antonio@mandelbit.com Fixes: d8959b947a8d ("module: sysfs: Drop member 'module_sect_attrs::nsections'") Fixes: 4b2c11e4aaf7 ("module: sysfs: Drop member 'module_sect_attr::address'") Signed-off-by: Antonio Quartulli Reviewed-by: Jan Kiszka Cc: Thomas Weißschuh Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/symbols.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index f6c1b063775a..15d76f7d8ebc 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -15,6 +15,7 @@ import gdb import os import re +from itertools import count from linux import modules, utils, constants @@ -95,10 +96,14 @@ lx-symbols command.""" except gdb.error: return str(module_addr) - attrs = sect_attrs['attrs'] - section_name_to_address = { - attrs[n]['battr']['attr']['name'].string(): attrs[n]['address'] - for n in range(int(sect_attrs['nsections']))} + section_name_to_address = {} + for i in count(): + # this is a NULL terminated array + if sect_attrs['grp']['bin_attrs'][i] == 0x0: + break + + attr = sect_attrs['grp']['bin_attrs'][i].dereference() + section_name_to_address[attr['attr']['name'].string()] = attr['private'] textaddr = section_name_to_address.get(".text", module_addr) args = [] From 8a56f26607418ab517476de4a27e38be51f6fbca Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 3 Mar 2025 14:49:08 +0100 Subject: [PATCH 1166/2157] signal: avoid clearing TIF_SIGPENDING in recalc_sigpending() if unset Clearing is an atomic op and the flag is not set most of the time. When creating and destroying threads in the same process with the pthread family, the primary bottleneck is calls to sigprocmask which take the process-wide sighand lock. Avoiding the atomic gives me a 2% bump in start/teardown rate at 24-core scale. [akpm@linux-foundation.org: add unlikely() as well] Link: https://lkml.kernel.org/r/20250303134908.423242-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton --- kernel/signal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 875e97f6205a..ce3fae2f1fb5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -176,9 +176,10 @@ static bool recalc_sigpending_tsk(struct task_struct *t) void recalc_sigpending(void) { - if (!recalc_sigpending_tsk(current) && !freezing(current)) - clear_thread_flag(TIF_SIGPENDING); - + if (!recalc_sigpending_tsk(current) && !freezing(current)) { + if (unlikely(test_thread_flag(TIF_SIGPENDING))) + clear_thread_flag(TIF_SIGPENDING); + } } EXPORT_SYMBOL(recalc_sigpending); From 28939c3e9925735166e7b6e42f9e1dc828093510 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 3 Mar 2025 12:03:58 +0100 Subject: [PATCH 1167/2157] scripts/gdb/symbols: determine KASLR offset on s390 Use QEMU's qemu.PhyMemMode [1] functionality to read vmcore from the physical memory the same way the existing dump tooling does this. Gracefully handle non-QEMU targets, early boot, and memory corruptions; print a warning if such situation is detected. [1] https://qemu-project.gitlab.io/qemu/system/gdb.html#examining-physical-memory Link: https://lkml.kernel.org/r/20250303110437.79070-1-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Acked-by: Jan Kiszka Cc: Alexander Gordeev Cc: Andrew Donnellan Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Kieran Bingham Cc: Nina Schoetterl-Glausch Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- scripts/gdb/linux/symbols.py | 31 ++++++++++++++++++++++++++++++- scripts/gdb/linux/utils.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 15d76f7d8ebc..b255177301e9 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -14,6 +14,7 @@ import gdb import os import re +import struct from itertools import count from linux import modules, utils, constants @@ -54,6 +55,29 @@ if hasattr(gdb, 'Breakpoint'): return False +def get_vmcore_s390(): + with utils.qemu_phy_mem_mode(): + vmcore_info = 0x0e0c + paddr_vmcoreinfo_note = gdb.parse_and_eval("*(unsigned long long *)" + + hex(vmcore_info)) + inferior = gdb.selected_inferior() + elf_note = inferior.read_memory(paddr_vmcoreinfo_note, 12) + n_namesz, n_descsz, n_type = struct.unpack(">III", elf_note) + desc_paddr = paddr_vmcoreinfo_note + len(elf_note) + n_namesz + 1 + return gdb.parse_and_eval("(char *)" + hex(desc_paddr)).string() + + +def get_kerneloffset(): + if utils.is_target_arch('s390'): + try: + vmcore_str = get_vmcore_s390() + except gdb.error as e: + gdb.write("{}\n".format(e)) + return None + return utils.parse_vmcore(vmcore_str).kerneloffset + return None + + class LxSymbols(gdb.Command): """(Re-)load symbols of Linux kernel and currently loaded modules. @@ -160,7 +184,12 @@ lx-symbols command.""" obj.filename.endswith('vmlinux.debug')): orig_vmlinux = obj.filename gdb.execute("symbol-file", to_string=True) - gdb.execute("symbol-file {0}".format(orig_vmlinux)) + kerneloffset = get_kerneloffset() + if kerneloffset is None: + offset_arg = "" + else: + offset_arg = " -o " + hex(kerneloffset) + gdb.execute("symbol-file {0}{1}".format(orig_vmlinux, offset_arg)) self.loaded_modules = [] module_list = modules.module_list() diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index 245ab297ea84..03ebdccf5f69 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -11,6 +11,11 @@ # This work is licensed under the terms of the GNU GPL version 2. # +import contextlib +import dataclasses +import re +import typing + import gdb @@ -216,3 +221,33 @@ def gdb_eval_or_none(expresssion): return gdb.parse_and_eval(expresssion) except gdb.error: return None + + +@contextlib.contextmanager +def qemu_phy_mem_mode(): + connection = gdb.selected_inferior().connection + orig = connection.send_packet("qqemu.PhyMemMode") + if orig not in b"01": + raise gdb.error("Unexpected qemu.PhyMemMode") + orig = orig.decode() + if connection.send_packet("Qqemu.PhyMemMode:1") != b"OK": + raise gdb.error("Failed to set qemu.PhyMemMode") + try: + yield + finally: + if connection.send_packet("Qqemu.PhyMemMode:" + orig) != b"OK": + raise gdb.error("Failed to restore qemu.PhyMemMode") + + +@dataclasses.dataclass +class VmCore: + kerneloffset: typing.Optional[int] + + +def parse_vmcore(s): + match = re.search(r"KERNELOFFSET=([0-9a-f]+)", s) + if match is None: + kerneloffset = None + else: + kerneloffset = int(match.group(1), 16) + return VmCore(kerneloffset=kerneloffset) From bc2f19d65373bc166c18c486e2ec2611f5a12d8f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 10 Mar 2025 12:22:27 +0100 Subject: [PATCH 1168/2157] checkpatch: describe --min-conf-desc-length Neither the warning nor the help message gives any hint on the unit for length: Could be meters, inches, bytes, characters or ... lines. Extend the output of `--help` to name the unit "lines" and the default: - --min-conf-desc-length=n set the min description length, if shorter, warn + --min-conf-desc-length=n set the minimum description length for config symbols + in lines, if shorter, warn (default 4) Include the minimum number of lines as other error messages already do: - WARNING: please write a help paragraph that fully describes the config symbol + WARNING: please write a help paragraph that fully describes the config symbol with at least 4 lines Link: https://lkml.kernel.org/r/c71c170c90eba26265951e248adfedd3245fe575.1741605695.git.p.hahn@avm.de Signed-off-by: Philipp Hahn Cc: Andy Whitcroft Cc: Joe Perches Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7b28ad331742..784912f570e9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -113,7 +113,8 @@ Options: --max-line-length=n set the maximum line length, (default $max_line_length) if exceeded, warn on patches requires --strict for use with --file - --min-conf-desc-length=n set the min description length, if shorter, warn + --min-conf-desc-length=n set the minimum description length for config symbols + in lines, if shorter, warn (default $min_conf_desc_length) --tab-size=n set the number of spaces for tab (default $tabsize) --root=PATH PATH to the kernel tree root --no-summary suppress the per-file summary @@ -3645,7 +3646,7 @@ sub process { $help_length < $min_conf_desc_length) { my $stat_real = get_stat_real($linenr, $ln - 1); WARN("CONFIG_DESCRIPTION", - "please write a help paragraph that fully describes the config symbol\n" . "$here\n$stat_real\n"); + "please write a help paragraph that fully describes the config symbol with at least $min_conf_desc_length lines\n" . "$here\n$stat_real\n"); } } From 4164e1525d37d463bbd0a808709fd75abcfc89a5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:32 +0000 Subject: [PATCH 1169/2157] lib/rbtree: enable userland test suite for rbtree related data structure Patch series "lib/interval_tree: add some test cases and cleanup", v2. Since rbtree/augmented tree/interval tree share similar data structure, besides new cases for interval tree, this patch set also does cleanup for others. This patch (of 7): Currently we have some tests for rbtree related data structure, e.g. rbtree, augmented rbtree, interval tree, in lib/ as kernel module. To facilitate the test and debug for those fundamental data structure, this patch enable those tests in userland. Link: https://lkml.kernel.org/r/20250310074938.26756-1-richard.weiyang@gmail.com Link: https://lkml.kernel.org/r/20250310074938.26756-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- tools/include/asm/timex.h | 13 +++++ tools/include/linux/container_of.h | 18 +++++++ tools/include/linux/kernel.h | 14 +---- tools/include/linux/math64.h | 5 ++ tools/include/linux/moduleparam.h | 7 +++ tools/include/linux/prandom.h | 51 ++++++++++++++++++ tools/include/linux/slab.h | 1 + tools/lib/slab.c | 16 ++++++ tools/testing/rbtree/Makefile | 31 +++++++++++ tools/testing/rbtree/interval_tree_test.c | 53 +++++++++++++++++++ tools/testing/rbtree/rbtree_test.c | 45 ++++++++++++++++ tools/testing/rbtree/test.h | 4 ++ tools/testing/shared/interval_tree-shim.c | 5 ++ tools/testing/shared/linux/interval_tree.h | 7 +++ .../shared/linux/interval_tree_generic.h | 2 + tools/testing/shared/linux/rbtree.h | 8 +++ tools/testing/shared/linux/rbtree_augmented.h | 7 +++ tools/testing/shared/linux/rbtree_types.h | 8 +++ tools/testing/shared/rbtree-shim.c | 6 +++ 19 files changed, 288 insertions(+), 13 deletions(-) create mode 100644 tools/include/asm/timex.h create mode 100644 tools/include/linux/container_of.h create mode 100644 tools/include/linux/moduleparam.h create mode 100644 tools/include/linux/prandom.h create mode 100644 tools/testing/rbtree/Makefile create mode 100644 tools/testing/rbtree/interval_tree_test.c create mode 100644 tools/testing/rbtree/rbtree_test.c create mode 100644 tools/testing/rbtree/test.h create mode 100644 tools/testing/shared/interval_tree-shim.c create mode 100644 tools/testing/shared/linux/interval_tree.h create mode 100644 tools/testing/shared/linux/interval_tree_generic.h create mode 100644 tools/testing/shared/linux/rbtree.h create mode 100644 tools/testing/shared/linux/rbtree_augmented.h create mode 100644 tools/testing/shared/linux/rbtree_types.h create mode 100644 tools/testing/shared/rbtree-shim.c diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h new file mode 100644 index 000000000000..5adfe3c6d326 --- /dev/null +++ b/tools/include/asm/timex.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_ASM_TIMEX_H +#define __TOOLS_LINUX_ASM_TIMEX_H + +#include + +#define cycles_t clock_t + +static inline cycles_t get_cycles(void) +{ + return clock(); +} +#endif // __TOOLS_LINUX_ASM_TIMEX_H diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h new file mode 100644 index 000000000000..c879e14c3dd6 --- /dev/null +++ b/tools/include/linux/container_of.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_CONTAINER_OF_H +#define _TOOLS_LINUX_CONTAINER_OF_H + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif /* _TOOLS_LINUX_CONTAINER_OF_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 07cfad817d53..c8c18d3908a9 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -25,19 +26,6 @@ #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif -#ifndef container_of -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h index 4ad45d5943dc..8a67d478bf19 100644 --- a/tools/include/linux/math64.h +++ b/tools/include/linux/math64.h @@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) } #endif +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + return dividend / divisor; +} + #endif /* _LINUX_MATH64_H */ diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h new file mode 100644 index 000000000000..4c4d05bef0cb --- /dev/null +++ b/tools/include/linux/moduleparam.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MODULE_PARAMS_H +#define _TOOLS_LINUX_MODULE_PARAMS_H + +#define MODULE_PARM_DESC(parm, desc) + +#endif // _TOOLS_LINUX_MODULE_PARAMS_H diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h new file mode 100644 index 000000000000..b745041ccd6a --- /dev/null +++ b/tools/include/linux/prandom.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_PRANDOM_H +#define __TOOLS_LINUX_PRANDOM_H + +#include + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/** + * prandom_u32_state - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use get_random_u32(). + */ +static inline u32 prandom_u32_state(struct rnd_state *state) +{ +#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b)) + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); + + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); +} +#endif // __TOOLS_LINUX_PRANDOM_H diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 51b25e9c4ec7..c87051e2b26f 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -12,6 +12,7 @@ void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); +void *kmalloc_array(size_t n, size_t size, gfp_t gfp); bool slab_is_available(void); diff --git a/tools/lib/slab.c b/tools/lib/slab.c index 959997fb0652..981a21404f32 100644 --- a/tools/lib/slab.c +++ b/tools/lib/slab.c @@ -36,3 +36,19 @@ void kfree(void *p) printf("Freeing %p to malloc\n", p); free(p); } + +void *kmalloc_array(size_t n, size_t size, gfp_t gfp) +{ + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = calloc(n, size); + uatomic_inc(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Allocating %p from calloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, n * size); + return ret; +} diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile new file mode 100644 index 000000000000..bac6931b499d --- /dev/null +++ b/tools/testing/rbtree/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 + +.PHONY: clean + +TARGETS = rbtree_test interval_tree_test +OFILES = $(LIBS) rbtree-shim.o interval_tree-shim.o +DEPS = ../../../include/linux/rbtree.h \ + ../../../include/linux/rbtree_types.h \ + ../../../include/linux/rbtree_augmented.h \ + ../../../include/linux/interval_tree.h \ + ../../../include/linux/interval_tree_generic.h \ + ../../../lib/rbtree.c \ + ../../../lib/interval_tree.c + +targets: $(TARGETS) + +include ../shared/shared.mk + +ifeq ($(DEBUG), 1) + CFLAGS += -g +endif + +$(TARGETS): $(OFILES) + +rbtree-shim.o: $(DEPS) +rbtree_test.o: ../../../lib/rbtree_test.c +interval_tree-shim.o: $(DEPS) +interval_tree_test.o: ../../../lib/interval_tree_test.c + +clean: + $(RM) $(TARGETS) *.o generated/* diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c new file mode 100644 index 000000000000..f1c41f5e28ba --- /dev/null +++ b/tools/testing/rbtree/interval_tree_test.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * interval_tree.c: Userspace Interval Tree test-suite + * Copyright (c) 2025 Wei Yang + */ +#include +#include +#include "shared.h" + +#include "../../../lib/interval_tree_test.c" + +int usage(void) +{ + fprintf(stderr, "Userland interval tree test cases\n"); + fprintf(stderr, " -n: Number of nodes in the interval tree\n"); + fprintf(stderr, " -p: Number of iterations modifying the tree\n"); + fprintf(stderr, " -q: Number of searches to the interval tree\n"); + fprintf(stderr, " -s: Number of iterations searching the tree\n"); + fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n"); + fprintf(stderr, " -m: Largest value for the interval's endpoint\n"); + exit(-1); +} + +void interval_tree_tests(void) +{ + interval_tree_test_init(); + interval_tree_test_exit(); +} + +int main(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "n:p:q:s:am:")) != -1) { + if (opt == 'n') + nnodes = strtoul(optarg, NULL, 0); + else if (opt == 'p') + perf_loops = strtoul(optarg, NULL, 0); + else if (opt == 'q') + nsearches = strtoul(optarg, NULL, 0); + else if (opt == 's') + search_loops = strtoul(optarg, NULL, 0); + else if (opt == 'a') + search_all = true; + else if (opt == 'm') + max_endpoint = strtoul(optarg, NULL, 0); + else + usage(); + } + + interval_tree_tests(); + return 0; +} diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c new file mode 100644 index 000000000000..c723e751b9a9 --- /dev/null +++ b/tools/testing/rbtree/rbtree_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rbtree_test.c: Userspace Red Black Tree test-suite + * Copyright (c) 2025 Wei Yang + */ +#include +#include +#include +#include "shared.h" + +#include "../../../lib/rbtree_test.c" + +int usage(void) +{ + fprintf(stderr, "Userland rbtree test cases\n"); + fprintf(stderr, " -n: Number of nodes in the rb-tree\n"); + fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n"); + fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n"); + exit(-1); +} + +void rbtree_tests(void) +{ + rbtree_test_init(); + rbtree_test_exit(); +} + +int main(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "n:p:c:")) != -1) { + if (opt == 'n') + nnodes = strtoul(optarg, NULL, 0); + else if (opt == 'p') + perf_loops = strtoul(optarg, NULL, 0); + else if (opt == 'c') + check_loops = strtoul(optarg, NULL, 0); + else + usage(); + } + + rbtree_tests(); + return 0; +} diff --git a/tools/testing/rbtree/test.h b/tools/testing/rbtree/test.h new file mode 100644 index 000000000000..f1f1b545b55a --- /dev/null +++ b/tools/testing/rbtree/test.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +void rbtree_tests(void); +void interval_tree_tests(void); diff --git a/tools/testing/shared/interval_tree-shim.c b/tools/testing/shared/interval_tree-shim.c new file mode 100644 index 000000000000..122e74756571 --- /dev/null +++ b/tools/testing/shared/interval_tree-shim.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Very simple shim around the interval tree. */ + +#include "../../../lib/interval_tree.c" diff --git a/tools/testing/shared/linux/interval_tree.h b/tools/testing/shared/linux/interval_tree.h new file mode 100644 index 000000000000..129faf9f1d0a --- /dev/null +++ b/tools/testing/shared/linux/interval_tree.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_INTERVAL_TREE_H +#define _TEST_INTERVAL_TREE_H + +#include "../../../../include/linux/interval_tree.h" + +#endif /* _TEST_INTERVAL_TREE_H */ diff --git a/tools/testing/shared/linux/interval_tree_generic.h b/tools/testing/shared/linux/interval_tree_generic.h new file mode 100644 index 000000000000..34cd654bee61 --- /dev/null +++ b/tools/testing/shared/linux/interval_tree_generic.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/interval_tree_generic.h" diff --git a/tools/testing/shared/linux/rbtree.h b/tools/testing/shared/linux/rbtree.h new file mode 100644 index 000000000000..d644bb7360bf --- /dev/null +++ b/tools/testing/shared/linux/rbtree.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_H +#define _TEST_RBTREE_H + +#include +#include "../../../../include/linux/rbtree.h" + +#endif /* _TEST_RBTREE_H */ diff --git a/tools/testing/shared/linux/rbtree_augmented.h b/tools/testing/shared/linux/rbtree_augmented.h new file mode 100644 index 000000000000..ad138fcf6652 --- /dev/null +++ b/tools/testing/shared/linux/rbtree_augmented.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_AUGMENTED_H +#define _TEST_RBTREE_AUGMENTED_H + +#include "../../../../include/linux/rbtree_augmented.h" + +#endif /* _TEST_RBTREE_AUGMENTED_H */ diff --git a/tools/testing/shared/linux/rbtree_types.h b/tools/testing/shared/linux/rbtree_types.h new file mode 100644 index 000000000000..194194a5bf92 --- /dev/null +++ b/tools/testing/shared/linux/rbtree_types.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_TYPES_H +#define _TEST_RBTREE_TYPES_H + +#include "../../../../include/linux/rbtree_types.h" + +#endif /* _TEST_RBTREE_TYPES_H */ + diff --git a/tools/testing/shared/rbtree-shim.c b/tools/testing/shared/rbtree-shim.c new file mode 100644 index 000000000000..7692a993e5f1 --- /dev/null +++ b/tools/testing/shared/rbtree-shim.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Very simple shim around the rbtree. */ + +#include "../../../lib/rbtree.c" + From 3e1d58cd5dae95de731dc3128a7bcffa7c5e7ed9 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:33 +0000 Subject: [PATCH 1170/2157] lib/rbtree: split tests Current tests are gathered in one big function. Split tests into its own function for better understanding and also it is a preparation for introducing new test cases. Link: https://lkml.kernel.org/r/20250310074938.26756-3-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- lib/interval_tree_test.c | 50 +++++++++++++++++++++++++++++----------- lib/rbtree_test.c | 29 ++++++++++++++++++----- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 837064b83a6c..12880d772945 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -59,26 +59,13 @@ static void init(void) queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint; } -static int interval_tree_test_init(void) +static int basic_check(void) { int i, j; - unsigned long results; cycles_t time1, time2, time; - nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), - GFP_KERNEL); - if (!nodes) - return -ENOMEM; - - queries = kmalloc_array(nsearches, sizeof(int), GFP_KERNEL); - if (!queries) { - kfree(nodes); - return -ENOMEM; - } - printk(KERN_ALERT "interval tree insert/remove"); - prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); @@ -96,8 +83,19 @@ static int interval_tree_test_init(void) time = div_u64(time, perf_loops); printk(" -> %llu cycles\n", (unsigned long long)time); + return 0; +} + +static int search_check(void) +{ + int i, j; + unsigned long results; + cycles_t time1, time2, time; + printk(KERN_ALERT "interval tree search"); + init(); + for (j = 0; j < nnodes; j++) interval_tree_insert(nodes + j, &root); @@ -120,6 +118,30 @@ static int interval_tree_test_init(void) printk(" -> %llu cycles (%lu results)\n", (unsigned long long)time, results); + for (j = 0; j < nnodes; j++) + interval_tree_remove(nodes + j, &root); + + return 0; +} + +static int interval_tree_test_init(void) +{ + nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), + GFP_KERNEL); + if (!nodes) + return -ENOMEM; + + queries = kmalloc_array(nsearches, sizeof(int), GFP_KERNEL); + if (!queries) { + kfree(nodes); + return -ENOMEM; + } + + prandom_seed_state(&rnd, 3141592653589793238ULL); + + basic_check(); + search_check(); + kfree(queries); kfree(nodes); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 8655a76d29a1..b0e0b26506cb 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -239,19 +239,14 @@ static void check_augmented(int nr_nodes) } } -static int __init rbtree_test_init(void) +static int basic_check(void) { int i, j; cycles_t time1, time2, time; struct rb_node *node; - nodes = kmalloc_array(nnodes, sizeof(*nodes), GFP_KERNEL); - if (!nodes) - return -ENOMEM; - printk(KERN_ALERT "rbtree testing"); - prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); @@ -343,6 +338,14 @@ static int __init rbtree_test_init(void) check(0); } + return 0; +} + +static int augmented_check(void) +{ + int i, j; + cycles_t time1, time2, time; + printk(KERN_ALERT "augmented rbtree testing"); init(); @@ -390,6 +393,20 @@ static int __init rbtree_test_init(void) check_augmented(0); } + return 0; +} + +static int __init rbtree_test_init(void) +{ + nodes = kmalloc_array(nnodes, sizeof(*nodes), GFP_KERNEL); + if (!nodes) + return -ENOMEM; + + prandom_seed_state(&rnd, 3141592653589793238ULL); + + basic_check(); + augmented_check(); + kfree(nodes); return -EAGAIN; /* Fail will directly unload the module */ From 16b1936ae6d1858252413bf4bae8bcf247eb4b4c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:34 +0000 Subject: [PATCH 1171/2157] lib/rbtree: add random seed Current test use pseudo rand function with fixed seed, which means the test data is the same pattern each time. Add random seed parameter to randomize the test. Link: https://lkml.kernel.org/r/20250310074938.26756-4-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/types.h | 1 + lib/interval_tree_test.c | 3 ++- lib/rbtree_test.c | 3 ++- tools/include/linux/types.h | 2 ++ tools/testing/rbtree/interval_tree_test.c | 5 ++++- tools/testing/rbtree/rbtree_test.c | 5 ++++- 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/types.h b/include/linux/types.h index 1c509ce8f7f6..864ab701f297 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -92,6 +92,7 @@ typedef unsigned char unchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; +typedef unsigned long long ullong; #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 12880d772945..51863077d4ec 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -19,6 +19,7 @@ __param(int, search_loops, 1000, "Number of iterations searching the tree"); __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); +__param(ullong, seed, 3141592653589793238ULL, "Random seed"); static struct rb_root_cached root = RB_ROOT_CACHED; static struct interval_tree_node *nodes = NULL; @@ -137,7 +138,7 @@ static int interval_tree_test_init(void) return -ENOMEM; } - prandom_seed_state(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, seed); basic_check(); search_check(); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index b0e0b26506cb..690cede46ac2 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -14,6 +14,7 @@ __param(int, nnodes, 100, "Number of nodes in the rb-tree"); __param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); __param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); +__param(ullong, seed, 3141592653589793238ULL, "Random seed"); struct test_node { u32 key; @@ -402,7 +403,7 @@ static int __init rbtree_test_init(void) if (!nodes) return -ENOMEM; - prandom_seed_state(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, seed); basic_check(); augmented_check(); diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 8519386acd23..4928e33d44ac 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -42,6 +42,8 @@ typedef __s16 s16; typedef __u8 u8; typedef __s8 s8; +typedef unsigned long long ullong; + #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c index f1c41f5e28ba..63775b831c1c 100644 --- a/tools/testing/rbtree/interval_tree_test.c +++ b/tools/testing/rbtree/interval_tree_test.c @@ -18,6 +18,7 @@ int usage(void) fprintf(stderr, " -s: Number of iterations searching the tree\n"); fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n"); fprintf(stderr, " -m: Largest value for the interval's endpoint\n"); + fprintf(stderr, " -r: Random seed\n"); exit(-1); } @@ -31,7 +32,7 @@ int main(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "n:p:q:s:am:")) != -1) { + while ((opt = getopt(argc, argv, "n:p:q:s:am:r:")) != -1) { if (opt == 'n') nnodes = strtoul(optarg, NULL, 0); else if (opt == 'p') @@ -44,6 +45,8 @@ int main(int argc, char **argv) search_all = true; else if (opt == 'm') max_endpoint = strtoul(optarg, NULL, 0); + else if (opt == 'r') + seed = strtoul(optarg, NULL, 0); else usage(); } diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c index c723e751b9a9..585c970f679e 100644 --- a/tools/testing/rbtree/rbtree_test.c +++ b/tools/testing/rbtree/rbtree_test.c @@ -16,6 +16,7 @@ int usage(void) fprintf(stderr, " -n: Number of nodes in the rb-tree\n"); fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n"); fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n"); + fprintf(stderr, " -r: Random seed\n"); exit(-1); } @@ -29,13 +30,15 @@ int main(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "n:p:c:")) != -1) { + while ((opt = getopt(argc, argv, "n:p:c:r:")) != -1) { if (opt == 'n') nnodes = strtoul(optarg, NULL, 0); else if (opt == 'p') perf_loops = strtoul(optarg, NULL, 0); else if (opt == 'c') check_loops = strtoul(optarg, NULL, 0); + else if (opt == 'r') + seed = strtoul(optarg, NULL, 0); else usage(); } From 82114e45131ff8006435ce40f4275c9c8910b404 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:35 +0000 Subject: [PATCH 1172/2157] lib/interval_tree: add test case for interval_tree_iter_xxx() helpers Verify interval_tree_iter_xxx() helpers could find intersection ranges as expected. [sfr@canb.auug.org.au: some of tools/ uses -Wno-unused-parameter] Link: https://lkml.kernel.org/r/20250312113612.31ac808e@canb.auug.org.au Link: https://lkml.kernel.org/r/20250310074938.26756-5-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- lib/interval_tree_test.c | 69 ++++++++++++++++++++++++++++++++++++ tools/include/linux/bitmap.h | 21 +++++++++++ tools/lib/bitmap.c | 20 +++++++++++ 3 files changed, 110 insertions(+) diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 51863077d4ec..7821379e2c21 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -5,6 +5,7 @@ #include #include #include +#include #define __param(type, name, init, msg) \ static type name = init; \ @@ -125,6 +126,73 @@ static int search_check(void) return 0; } +static int intersection_range_check(void) +{ + int i, j, k; + unsigned long start, last; + struct interval_tree_node *node; + unsigned long *intxn1; + unsigned long *intxn2; + + printk(KERN_ALERT "interval tree iteration\n"); + + intxn1 = bitmap_alloc(nnodes, GFP_KERNEL); + if (!intxn1) { + WARN_ON_ONCE("Failed to allocate intxn1\n"); + return -ENOMEM; + } + + intxn2 = bitmap_alloc(nnodes, GFP_KERNEL); + if (!intxn2) { + WARN_ON_ONCE("Failed to allocate intxn2\n"); + bitmap_free(intxn1); + return -ENOMEM; + } + + for (i = 0; i < search_loops; i++) { + /* Initialize interval tree for each round */ + init(); + for (j = 0; j < nnodes; j++) + interval_tree_insert(nodes + j, &root); + + /* Let's try nsearches different ranges */ + for (k = 0; k < nsearches; k++) { + /* Try whole range once */ + if (!k) { + start = 0UL; + last = ULONG_MAX; + } else { + last = (prandom_u32_state(&rnd) >> 4) % max_endpoint; + start = (prandom_u32_state(&rnd) >> 4) % last; + } + + /* Walk nodes to mark intersection nodes */ + bitmap_zero(intxn1, nnodes); + for (j = 0; j < nnodes; j++) { + node = nodes + j; + + if (start <= node->last && last >= node->start) + bitmap_set(intxn1, j, 1); + } + + /* Iterate tree to clear intersection nodes */ + bitmap_zero(intxn2, nnodes); + for (node = interval_tree_iter_first(&root, start, last); node; + node = interval_tree_iter_next(node, start, last)) + bitmap_set(intxn2, node - nodes, 1); + + WARN_ON_ONCE(!bitmap_equal(intxn1, intxn2, nnodes)); + } + + for (j = 0; j < nnodes; j++) + interval_tree_remove(nodes + j, &root); + } + + bitmap_free(intxn1); + bitmap_free(intxn2); + return 0; +} + static int interval_tree_test_init(void) { nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), @@ -142,6 +210,7 @@ static int interval_tree_test_init(void) basic_check(); search_check(); + intersection_range_check(); kfree(queries); kfree(nodes); diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 2a7f260ef9dc..d4d300040d01 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -19,6 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -79,6 +80,11 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) +{ + return malloc(bitmap_size(nbits)); +} + /** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits @@ -150,6 +156,21 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map |= GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0xff, nbits / 8); + else + __bitmap_set(map, start, nbits); +} + static inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 2178862bb114..51255c69754d 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -101,6 +101,26 @@ bool __bitmap_intersects(const unsigned long *bitmap1, return false; } +void __bitmap_set(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (len) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} + void __bitmap_clear(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); From ccaf3efceefc2c3abc6c23277d5a93238efa5c58 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:36 +0000 Subject: [PATCH 1173/2157] lib/interval_tree: add test case for span iteration Verify interval_tree_span_iter_xxx() helpers works as expected. Link: https://lkml.kernel.org/r/20250310074938.26756-6-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- lib/interval_tree_test.c | 117 ++++++++++++++++++++++ tools/testing/rbtree/Makefile | 6 +- tools/testing/rbtree/interval_tree_test.c | 2 + 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 7821379e2c21..5fd62656f42e 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -6,6 +6,7 @@ #include #include #include +#include #define __param(type, name, init, msg) \ static type name = init; \ @@ -193,6 +194,121 @@ static int intersection_range_check(void) return 0; } +#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER +/* + * Helper function to get span of current position from maple tree point of + * view. + */ +static void mas_cur_span(struct ma_state *mas, struct interval_tree_span_iter *state) +{ + unsigned long cur_start; + unsigned long cur_last; + int is_hole; + + if (mas->status == ma_overflow) + return; + + /* walk to current position */ + state->is_hole = mas_walk(mas) ? 0 : 1; + + cur_start = mas->index < state->first_index ? + state->first_index : mas->index; + + /* whether we have followers */ + do { + + cur_last = mas->last > state->last_index ? + state->last_index : mas->last; + + is_hole = mas_next_range(mas, state->last_index) ? 0 : 1; + + } while (mas->status != ma_overflow && is_hole == state->is_hole); + + if (state->is_hole) { + state->start_hole = cur_start; + state->last_hole = cur_last; + } else { + state->start_used = cur_start; + state->last_used = cur_last; + } + + /* advance position for next round */ + if (mas->status != ma_overflow) + mas_set(mas, cur_last + 1); +} + +static int span_iteration_check(void) +{ + int i, j, k; + unsigned long start, last; + struct interval_tree_span_iter span, mas_span; + + DEFINE_MTREE(tree); + + MA_STATE(mas, &tree, 0, 0); + + printk(KERN_ALERT "interval tree span iteration\n"); + + for (i = 0; i < search_loops; i++) { + /* Initialize interval tree for each round */ + init(); + for (j = 0; j < nnodes; j++) + interval_tree_insert(nodes + j, &root); + + /* Put all the range into maple tree */ + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + mt_set_in_rcu(&tree); + + for (j = 0; j < nnodes; j++) + WARN_ON_ONCE(mtree_store_range(&tree, nodes[j].start, + nodes[j].last, nodes + j, GFP_KERNEL)); + + /* Let's try nsearches different ranges */ + for (k = 0; k < nsearches; k++) { + /* Try whole range once */ + if (!k) { + start = 0UL; + last = ULONG_MAX; + } else { + last = (prandom_u32_state(&rnd) >> 4) % max_endpoint; + start = (prandom_u32_state(&rnd) >> 4) % last; + } + + mas_span.first_index = start; + mas_span.last_index = last; + mas_span.is_hole = -1; + mas_set(&mas, start); + + interval_tree_for_each_span(&span, &root, start, last) { + mas_cur_span(&mas, &mas_span); + + WARN_ON_ONCE(span.is_hole != mas_span.is_hole); + + if (span.is_hole) { + WARN_ON_ONCE(span.start_hole != mas_span.start_hole); + WARN_ON_ONCE(span.last_hole != mas_span.last_hole); + } else { + WARN_ON_ONCE(span.start_used != mas_span.start_used); + WARN_ON_ONCE(span.last_used != mas_span.last_used); + } + } + + } + + WARN_ON_ONCE(mas.status != ma_overflow); + + /* Cleanup maple tree for each round */ + mtree_destroy(&tree); + /* Cleanup interval tree for each round */ + for (j = 0; j < nnodes; j++) + interval_tree_remove(nodes + j, &root); + } + return 0; +} +#else +static inline int span_iteration_check(void) {return 0; } +#endif + static int interval_tree_test_init(void) { nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), @@ -211,6 +327,7 @@ static int interval_tree_test_init(void) basic_check(); search_check(); intersection_range_check(); + span_iteration_check(); kfree(queries); kfree(nodes); diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile index bac6931b499d..d7bbae2af4c7 100644 --- a/tools/testing/rbtree/Makefile +++ b/tools/testing/rbtree/Makefile @@ -3,7 +3,7 @@ .PHONY: clean TARGETS = rbtree_test interval_tree_test -OFILES = $(LIBS) rbtree-shim.o interval_tree-shim.o +OFILES = $(SHARED_OFILES) rbtree-shim.o interval_tree-shim.o maple-shim.o DEPS = ../../../include/linux/rbtree.h \ ../../../include/linux/rbtree_types.h \ ../../../include/linux/rbtree_augmented.h \ @@ -25,7 +25,9 @@ $(TARGETS): $(OFILES) rbtree-shim.o: $(DEPS) rbtree_test.o: ../../../lib/rbtree_test.c interval_tree-shim.o: $(DEPS) +interval_tree-shim.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER interval_tree_test.o: ../../../lib/interval_tree_test.c +interval_tree_test.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER clean: - $(RM) $(TARGETS) *.o generated/* + $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/* diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c index 63775b831c1c..49bc5b534330 100644 --- a/tools/testing/rbtree/interval_tree_test.c +++ b/tools/testing/rbtree/interval_tree_test.c @@ -6,6 +6,7 @@ #include #include #include "shared.h" +#include "maple-shared.h" #include "../../../lib/interval_tree_test.c" @@ -51,6 +52,7 @@ int main(int argc, char **argv) usage(); } + maple_tree_init(); interval_tree_tests(); return 0; } From 19811285784f7f3d4abaa6e94623f2e7d10219d0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:37 +0000 Subject: [PATCH 1174/2157] lib/interval_tree: skip the check before go to the right subtree The interval_tree_subtree_search() holds the loop invariant: start <= node->ITSUBTREE Let's say we have a following tree: node / \ left right So we know node->ITSUBTREE is contributed by one of the following: * left->ITSUBTREE * ITLAST(node) * right->ITSUBTREE When we come to the right node, we are sure the first two don't contribute to node->ITSUBTREE and it must be the right node does the job. So skip the check before go to the right subtree. Link: https://lkml.kernel.org/r/20250310074938.26756-7-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/interval_tree_generic.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index aaa8a0767aa3..1b400f26f63d 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ if (ITSTART(node) <= last) { /* Cond1 */ \ if (start <= ITLAST(node)) /* Cond2 */ \ return node; /* node is leftmost match */ \ - if (node->ITRB.rb_right) { \ - node = rb_entry(node->ITRB.rb_right, \ - ITSTRUCT, ITRB); \ - if (start <= node->ITSUBTREE) \ - continue; \ - } \ + node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \ + continue; \ } \ return NULL; /* No match */ \ } \ From ceb08ee965a20dd1eecc4cdcaa0328fc7c03b1e5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 10 Mar 2025 07:49:38 +0000 Subject: [PATCH 1175/2157] lib/interval_tree: fix the comment of interval_tree_span_iter_next_gap() The comment of interval_tree_span_iter_next_gap() is not exact, nodes[1] is not always !NULL. There are threes cases here. If there is an interior hole, the statement is correct. If there is a tailing hole or the contiguous used range span to the end, nodes[1] is NULL. Link: https://lkml.kernel.org/r/20250310074938.26756-8-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Jason Gunthorpe Cc: Matthew Wilcox Cc: Michel Lespinasse Signed-off-by: Andrew Morton --- lib/interval_tree.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 3412737ff365..324766e9bf63 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -20,9 +20,15 @@ EXPORT_SYMBOL_GPL(interval_tree_iter_next); /* * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous * span of nodes. This makes nodes[0]->last the end of that contiguous used span - * indexes that started at the original nodes[1]->start. nodes[1] is now the - * first node starting the next used span. A hole span is between nodes[0]->last - * and nodes[1]->start. nodes[1] must be !NULL. + * of indexes that started at the original nodes[1]->start. + * + * If there is an interior hole, nodes[1] is now the first node starting the + * next used span. A hole span is between nodes[0]->last and nodes[1]->start. + * + * If there is a tailing hole, nodes[1] is now NULL. A hole span is between + * nodes[0]->last and last_index. + * + * If the contiguous used range span to last_index, nodes[1] is set to NULL. */ static void interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state) From 6164be01f1799458b5c6f59a547d6241e4a4b4d6 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 13 Mar 2025 14:30:02 +0100 Subject: [PATCH 1176/2157] watchdog/perf: optimize bytes copied and remove manual NUL-termination Currently, up to 23 bytes of the source string are copied to the destination buffer (including the comma and anything after it), only to then manually NUL-terminate the destination buffer again at index 'len' (where the comma was found). Fix this by calling strscpy() with 'len' instead of the destination buffer size to copy only as many bytes from the source string as needed. Change the length check to allow 'len' to be less than or equal to the destination buffer size to fill the whole buffer if needed. Remove the if-check for the return value of strscpy(), because calling strscpy() with 'len' always truncates the source string at the comma as expected and NUL-terminates the destination buffer at the corresponding index instead. Remove the manual NUL-termination. No functional changes intended. Link: https://lkml.kernel.org/r/20250313133004.36406-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Cc: Song Liu Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- kernel/watchdog_perf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 59c1d86a73a2..b81167cb0dfc 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -294,12 +294,10 @@ void __init hardlockup_config_perf_event(const char *str) } else { unsigned int len = comma - str; - if (len >= sizeof(buf)) + if (len > sizeof(buf)) return; - if (strscpy(buf, str, sizeof(buf)) < 0) - return; - buf[len] = 0; + strscpy(buf, str, len); if (kstrtoull(buf, 16, &config)) return; } From caeb8ba598f0238b86ee967a77c54173e036469c Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Fri, 7 Mar 2025 10:44:11 +0200 Subject: [PATCH 1177/2157] kexec_core: accept unaccepted kexec segments' destination addresses The UEFI Specification version 2.9 introduces the concept of memory acceptance: some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, require memory to be accepted before it can be used by the guest. Accepting memory is expensive. The memory must be allocated by the VMM and then brought to a known safe state: cache must be flushed, memory must be zeroed with the guest's encryption key, and associated metadata must be manipulated. These operations must be performed from a trusted environment (firmware or TDX module). Switching context to and from it also takes time. This cost adds up. On large confidential VMs, memory acceptance alone can take minutes. It is better to delay memory acceptance until the memory is actually needed. The kernel accepts memory when it is allocated from buddy allocator for the first time. This reduces boot time and decreases memory overhead as the VMM can allocate memory as needed. It does not work when the guest attempts to kexec into a new kernel. The kexec segments' destination addresses are not allocated by the buddy allocator. Instead, they are searched from normal system RAM (top-down or bottom-up) and exclude driver-managed memory, ACPI, persistent, and reserved memory. Unaccepted memory is normal system RAM from kernel point of view and kexec can place segments there. Kexec bypasses the code path in buddy allocator where memory gets accepted and it leads to a crash when kexec accesses segments' memory. Accept the destination addresses during the kexec load, immediately after they pass sanity checks. This ensures the code is located in a common place shared by both the kexec_load and kexec_file_load system calls. This will not conflict with the accounting in try_to_accept_memory_one() since the accounting is set during kernel boot and decremented when pages are moved to the freelists. There is no harm in invoking accept_memory() on a page before making it available to the buddy allocator. No need to worry about re-accepting memory since accept_memory() checks the unaccepted bitmap before accepting a memory page. Although a user may perform kexec loading without ever triggering the jump, it doesn't impact much since kexec loading is not in a performance-critical path. Additionally, the destination addresses are always searched and found in the same location on a given system. Changes to the destination address searching logic to locate only memory in either unaccepted or accepted status are unnecessary and complicated. [kirill.shutemov@linux.intel.com: update the commit message] Link: https://lkml.kernel.org/r/20250307084411.2150367-1-kirill.shutemov@linux.intel.com Signed-off-by: Yan Zhao Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc: "Eric W. Biederman" Cc: Ashish Kalra Cc: Baoquan He Cc: Jianxiong Gao Signed-off-by: Andrew Morton --- kernel/kexec_core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index c0bdc1686154..9a2095216f4f 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -210,6 +210,16 @@ int sanity_check_segment_list(struct kimage *image) } #endif + /* + * The destination addresses are searched from system RAM rather than + * being allocated from the buddy allocator, so they are not guaranteed + * to be accepted by the current kernel. Accept the destination + * addresses before kexec swaps their content with the segments' source + * pages to avoid accessing memory before it is accepted. + */ + for (i = 0; i < nr_segments; i++) + accept_memory(image->segment[i].mem, image->segment[i].memsz); + return 0; } From 47acca884f714f41d95dc654f802845544554784 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 16:50:30 -0500 Subject: [PATCH 1178/2157] NFSv4: Don't trigger uneccessary scans for return-on-close delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. Avoid at least some loops by not requiring the NFSv4 state manager to scan for delegations that are marked for return-on-close. Instead, either mark them for immediate return (if possible) or else leave it up to nfs4_inode_return_delegation_on_close() to return them once the file is closed by the application. Fixes: b757144fd77c ("NFSv4: Be less aggressive about returning delegations for open files") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4db912f56230..df77d68d9ff9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -590,17 +590,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { - struct inode *inode; - - spin_lock(&delegation->lock); - inode = delegation->inode; - if (inode && list_empty(&NFS_I(inode)->open_files)) - ret = true; - spin_unlock(&delegation->lock); - } - if (ret) - clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -878,11 +867,25 @@ int nfs4_inode_make_writeable(struct inode *inode) return nfs4_inode_return_delegation(inode); } -static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, - struct nfs_delegation *delegation) +static void +nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); + struct inode *inode; + + if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) || + test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) + return; + spin_lock(&delegation->lock); + inode = delegation->inode; + if (!inode) + goto out; + if (list_empty(&NFS_I(inode)->open_files)) + nfs_mark_return_delegation(server, delegation); + else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out: + spin_unlock(&delegation->lock); } static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) From 35a566a24e58f1b5f89737edf60b77de58719ed0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 18:14:26 -0500 Subject: [PATCH 1179/2157] NFSv4: Avoid unnecessary scans of filesystems for returning delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to return delegations asynchronously, it should only scan those filesystems that hold delegations that need to be returned. Fixes: af3b61bf6131 ("NFSv4: Clean up nfs_client_return_marked_delegations()") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 5 +++++ include/linux/nfs_fs_sb.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index df77d68d9ff9..d1f5e497729c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -79,6 +79,7 @@ static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } @@ -608,6 +609,9 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server, struct nfs_delegation *place_holder_deleg = NULL; int err = 0; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN, + &server->delegation_flags)) + return 0; restart: /* * To avoid quadratic looping we hold a reference @@ -659,6 +663,7 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server, cond_resched(); if (!err) goto restart; + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); goto out; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index f00bfcee7120..4e9ad6f6e907 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -250,6 +250,8 @@ struct nfs_server { struct list_head ss_copies; struct list_head ss_src_copies; + unsigned long delegation_flags; +#define NFS4SERV_DELEGRETURN (1) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; From f163aa81a799e2d46d7f8f0b42a0e7770eaa0d06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 19:03:21 -0500 Subject: [PATCH 1180/2157] NFSv4: Avoid unnecessary scans of filesystems for expired delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to reap the expired delegations, it should scan only those filesystems that hold delegations that need to be reaped. Fixes: 7f156ef0bf45 ("NFSv4: Clean up nfs_delegation_reap_expired()") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 7 +++++++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 8 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d1f5e497729c..abd952cc47e4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1284,6 +1284,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server, return; clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); } @@ -1362,6 +1363,9 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, nfs4_stateid stateid; unsigned long gen = ++server->delegation_gen; + if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED, + &server->delegation_flags)) + return 0; restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &server->delegations, super_list) { @@ -1391,6 +1395,9 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, goto restart; } nfs_inode_mark_test_expired_delegation(server,inode); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, + &server->nfs_client->cl_state); iput(inode); return -EAGAIN; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4e9ad6f6e907..7d6f164036fa 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -252,6 +252,7 @@ struct nfs_server { unsigned long delegation_flags; #define NFS4SERV_DELEGRETURN (1) +#define NFS4SERV_DELEGATION_EXPIRED (2) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; From e767b59e29b8327d25edde65efc743f479f30d0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 18:37:51 -0500 Subject: [PATCH 1181/2157] NFSv4: Avoid unnecessary scans of filesystems for delayed delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to manage the delayed return of delegations, then only scan those filesystems containing delegations that were marked as being delayed. Fixes: be20037725d1 ("NFSv4: Fix delegation return in cases where we have to retry") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 18 ++++++++++++------ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index abd952cc47e4..325ba0663a6d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -331,14 +331,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi) } static void nfs_abort_delegation_return(struct nfs_delegation *delegation, - struct nfs_client *clp, int err) + struct nfs_server *server, int err) { - spin_lock(&delegation->lock); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); if (err == -EAGAIN) { set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state); + set_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags); + set_bit(NFS4CLNT_DELEGRETURN_DELAYED, + &server->nfs_client->cl_state); } spin_unlock(&delegation->lock); } @@ -548,7 +550,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, */ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); unsigned int mode = O_WRONLY | O_RDWR; int err = 0; @@ -570,11 +572,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation /* * Guard against state recovery */ - err = nfs4_wait_clnt_recover(clp); + err = nfs4_wait_clnt_recover(server->nfs_client); } if (err) { - nfs_abort_delegation_return(delegation, clp, err); + nfs_abort_delegation_return(delegation, server, err); goto out; } @@ -678,6 +680,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) struct nfs_delegation *d; bool ret = false; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags)) + goto out; list_for_each_entry_rcu (d, &server->delegations, super_list) { if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags)) continue; @@ -685,6 +690,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags); ret = true; } +out: return ret; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7d6f164036fa..108862d81b57 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -253,6 +253,7 @@ struct nfs_server { unsigned long delegation_flags; #define NFS4SERV_DELEGRETURN (1) #define NFS4SERV_DELEGATION_EXPIRED (2) +#define NFS4SERV_DELEGRETURN_DELAYED (3) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; From 43502f6e8d1e767d6736ea0676cc784025cf6eeb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 4 Dec 2024 13:53:09 +1100 Subject: [PATCH 1182/2157] NFS: fix open_owner_id_maxsz and related fields. A recent change increased the size of an NFSv4 open owner, but didn't increase the corresponding max_sz defines. This is not know to have caused failure, but should be fixed. This patch also fixes some relates _maxsz fields that are wrong. Note that the XXX_owner_id_maxsz values now are only the size of the id and do NOT include the len field that will always preceed the id in xdr encoding. I think this is clearer. Reported-by: David Disseldorp Fixes: d98f72272500 ("nfs: simplify and guarantee owner uniqueness.") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e8ac3f615f93..71f45cc0ca74 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -82,9 +82,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ #define pagepad_maxsz (1) -#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) -#define lock_owner_id_maxsz (1 + 1 + 4) -#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define open_owner_id_maxsz (2 + 1 + 2 + 2) +#define lock_owner_id_maxsz (2 + 1 + 2) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -185,7 +184,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_claim_null_maxsz (1 + nfs4_name_maxsz) #define encode_open_maxsz (op_encode_hdr_maxsz + \ 2 + encode_share_access_maxsz + 2 + \ - open_owner_id_maxsz + \ + 1 + open_owner_id_maxsz + \ encode_opentype_maxsz + \ encode_claim_null_maxsz) #define decode_space_limit_maxsz (3) @@ -255,13 +254,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) -#define encode_lockowner_maxsz (7) +#define encode_lockowner_maxsz (2 + 1 + lock_owner_id_maxsz) + #define encode_lock_maxsz (op_encode_hdr_maxsz + \ 7 + \ 1 + encode_stateid_maxsz + 1 + \ encode_lockowner_maxsz) #define decode_lock_denied_maxsz \ - (8 + decode_lockowner_maxsz) + (2 + 2 + 1 + 2 + 1 + lock_owner_id_maxsz) #define decode_lock_maxsz (op_decode_hdr_maxsz + \ decode_lock_denied_maxsz) #define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \ @@ -617,7 +617,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_lockowner_maxsz) #define NFS4_dec_release_lockowner_sz \ (compound_decode_hdr_maxsz + \ - decode_lockowner_maxsz) + decode_release_lockowner_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -1412,7 +1412,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena __be32 *p; /* * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4 = 32 + * owner 28 */ encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->share_access); @@ -5077,7 +5077,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) /* * We create the owner, so we know a proper owner.id length is 4. */ -static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) +static int decode_lock_denied(struct xdr_stream *xdr, struct file_lock *fl) { uint64_t offset, length, clientid; __be32 *p; From 8955e7ce61fb6ba28f88e0a38479c992991ba6ab Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:39 -0500 Subject: [PATCH 1183/2157] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR Add XDR encoding and decoding functions for the NFSv4.2 OFFLOAD_STATUS operation. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-13-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 86 +++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 5 ++- 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 5072d7ea72e9..b1b663468249 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -35,6 +35,11 @@ #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) +#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \ + 2 /* osr_count */ + \ + 2 /* osr_complete */) #define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 1 + /* nl4_type */ \ @@ -143,6 +148,14 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) +#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_offload_status_maxsz) +#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_offload_status_maxsz) #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -345,6 +358,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->osa_stateid); } +static void encode_offload_status(struct xdr_stream *xdr, + const struct nfs42_offload_status_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->osa_stateid); +} + static void encode_copy_notify(struct xdr_stream *xdr, const struct nfs42_copy_notify_args *args, struct compound_hdr *hdr) @@ -569,6 +590,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode OFFLOAD_STATUS request + */ +static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_offload_status_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->osa_seq_args, &hdr); + encode_putfh(xdr, args->osa_src_fh, &hdr); + encode_offload_status(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode COPY_NOTIFY request */ @@ -921,6 +961,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr, return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); } +static int decode_offload_status(struct xdr_stream *xdr, + struct nfs42_offload_status_res *res) +{ + ssize_t result; + int status; + + status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS); + if (status) + return status; + /* osr_count */ + if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0) + return -EIO; + /* osr_complete<1> */ + result = xdr_stream_decode_uint32_array(xdr, &res->osr_complete, 1); + if (result < 0) + return -EIO; + res->complete_count = result; + return 0; +} + static int decode_copy_notify(struct xdr_stream *xdr, struct nfs42_copy_notify_res *res) { @@ -1370,6 +1430,32 @@ static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp, return status; } +/* + * Decode OFFLOAD_STATUS response + */ +static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_offload_status_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->osr_seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_offload_status(xdr, res); + +out: + return status; +} + /* * Decode COPY_NOTIFY response */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 71f45cc0ca74..55bef5fbfa47 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7702,6 +7702,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(CLONE, enc_clone, dec_clone), PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), + PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status), PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9ac83ca88326..5fa60fe441b5 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -691,6 +691,7 @@ enum { NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, NFSPROC4_CLNT_READ_PLUS, + NFSPROC4_CLNT_OFFLOAD_STATUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9155a6ffc370..dc5288111999 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1515,8 +1515,9 @@ struct nfs42_offload_status_args { struct nfs42_offload_status_res { struct nfs4_sequence_res osr_seq_res; - uint64_t osr_count; - int osr_status; + u64 osr_count; + int complete_count; + u32 osr_complete; }; struct nfs42_copy_notify_args { From 77dd8a302f56679178924f82a29eaf437857ea75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:40 -0500 Subject: [PATCH 1184/2157] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation Enable the Linux NFS client to observe the progress of an offloaded asynchronous COPY operation. This new operation will be put to use in a subsequent patch. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-14-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 103 ++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 +- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1924c4a2077b..3e359e16c324 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -21,6 +21,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid, + u64 *copied); static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) { @@ -582,6 +584,107 @@ static int nfs42_do_offload_cancel_async(struct file *dst, return status; } +static int +_nfs42_proc_offload_status(struct nfs_server *server, struct file *file, + struct nfs42_offload_data *data) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = ctx->cred, + }; + int status; + + status = nfs4_call_sync(server->client, server, &msg, + &data->args.osa_seq_args, + &data->res.osr_seq_res, 1); + switch (status) { + case 0: + break; + + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + /* + * Server does not recognize the COPY stateid. CB_OFFLOAD + * could have purged it, or server might have rebooted. + * Since COPY stateids don't have an associated inode, + * avoid triggering state recovery. + */ + status = -EBADF; + break; + case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: + case -EOPNOTSUPP: + server->caps &= ~NFS_CAP_OFFLOAD_STATUS; + status = -EOPNOTSUPP; + break; + } + + return status; +} + +/** + * nfs42_proc_offload_status - Poll completion status of an async copy operation + * @dst: handle of file being copied into + * @stateid: copy stateid (from async COPY result) + * @copied: OUT: number of bytes copied so far + * + * Return values: + * %0: Server returned an NFS4_OK completion status + * %-EINPROGRESS: Server returned no completion status + * %-EREMOTEIO: Server returned an error completion status + * %-EBADF: Server did not recognize the copy stateid + * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS + * %-ERESTARTSYS: Wait interrupted by signal + * + * Other negative errnos indicate the client could not complete the + * request. + */ +static int __maybe_unused +nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied) +{ + struct inode *inode = file_inode(dst); + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { + .inode = inode, + }; + struct nfs42_offload_data *data; + int status; + + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS)) + return -EOPNOTSUPP; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->seq_server = server; + data->args.osa_src_fh = NFS_FH(inode); + memcpy(&data->args.osa_stateid, stateid, + sizeof(data->args.osa_stateid)); + exception.stateid = &data->args.osa_stateid; + do { + status = _nfs42_proc_offload_status(server, dst, data); + if (status == -EOPNOTSUPP) + goto out; + status = nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + if (status) + goto out; + + *copied = data->res.osr_count; + if (!data->res.complete_count) + status = -EINPROGRESS; + else if (data->res.osr_complete != NFS_OK) + status = -EREMOTEIO; + +out: + kfree(data); + return status; +} + static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, struct nfs42_copy_notify_args *args, struct nfs42_copy_notify_res *res) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6e95db6c17e9..24406fc1352d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10781,7 +10781,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CLONE | NFS_CAP_LAYOUTERROR | NFS_CAP_READ_PLUS - | NFS_CAP_MOVEABLE, + | NFS_CAP_MOVEABLE + | NFS_CAP_OFFLOAD_STATUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 108862d81b57..1711eefcf24f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -293,6 +293,7 @@ struct nfs_server { #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) +#define NFS_CAP_OFFLOAD_STATUS (1U << 9) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) From adcc0aef9ed41a82590be4f0090bb404c28a2f2f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:41 -0500 Subject: [PATCH 1185/2157] NFS: Use NFSv4.2's OFFLOAD_STATUS operation We've found that there are cases where a transport disconnection results in the loss of callback RPCs. NFS servers typically do not retransmit callback operations after a disconnect. This can be a problem for the Linux NFS client's current implementation of asynchronous COPY, which waits indefinitely for a CB_OFFLOAD callback. If a transport disconnect occurs while an async COPY is running, there's a good chance the client will never get the completing CB_OFFLOAD. Fix this by implementing the OFFLOAD_STATUS operation so that the Linux NFS client can probe the NFS server if it doesn't see a CB_OFFLOAD in a reasonable amount of time. This patch implements a simplistic check. As future work, the client might also be able to detect whether there is no forward progress on the request asynchronous COPY operation, and CANCEL it. Suggested-by: Olga Kornievskaia Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735 Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-15-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 70 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3e359e16c324..82ada136ee6a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -175,6 +175,20 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } +static void nfs4_copy_dequeue_callback(struct nfs_server *dst_server, + struct nfs_server *src_server, + struct nfs4_copy_state *copy) +{ + spin_lock(&dst_server->nfs_client->cl_lock); + list_del_init(©->copies); + spin_unlock(&dst_server->nfs_client->cl_lock); + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_del_init(©->src_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } +} + static int handle_async_copy(struct nfs42_copy_res *res, struct nfs_server *dst_server, struct nfs_server *src_server, @@ -184,9 +198,12 @@ static int handle_async_copy(struct nfs42_copy_res *res, bool *restart) { struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter; - int status = NFS4_OK; struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); struct nfs_open_context *src_ctx = nfs_file_open_context(src); + struct nfs_client *clp = dst_server->nfs_client; + unsigned long timeout = 3 * HZ; + int status = NFS4_OK; + u64 copied; copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); if (!copy) @@ -224,15 +241,12 @@ static int handle_async_copy(struct nfs42_copy_res *res, spin_unlock(&src_server->nfs_client->cl_lock); } - status = wait_for_completion_interruptible(©->completion); - spin_lock(&dst_server->nfs_client->cl_lock); - list_del_init(©->copies); - spin_unlock(&dst_server->nfs_client->cl_lock); - if (dst_server != src_server) { - spin_lock(&src_server->nfs_client->cl_lock); - list_del_init(©->src_copies); - spin_unlock(&src_server->nfs_client->cl_lock); - } +wait: + status = wait_for_completion_interruptible_timeout(©->completion, + timeout); + if (!status) + goto timeout; + nfs4_copy_dequeue_callback(dst_server, src_server, copy); if (status == -ERESTARTSYS) { goto out_cancel; } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) { @@ -242,6 +256,7 @@ static int handle_async_copy(struct nfs42_copy_res *res, } out: res->write_res.count = copy->count; + /* Copy out the updated write verifier provided by CB_OFFLOAD. */ memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf)); status = -copy->error; @@ -253,6 +268,39 @@ static int handle_async_copy(struct nfs42_copy_res *res, if (!nfs42_files_from_same_server(src, dst)) nfs42_do_offload_cancel_async(src, src_stateid); goto out_free; +timeout: + timeout <<= 1; + if (timeout > (clp->cl_lease_time >> 1)) + timeout = clp->cl_lease_time >> 1; + status = nfs42_proc_offload_status(dst, ©->stateid, &copied); + if (status == -EINPROGRESS) + goto wait; + nfs4_copy_dequeue_callback(dst_server, src_server, copy); + switch (status) { + case 0: + /* The server recognized the copy stateid, so it hasn't + * rebooted. Don't overwrite the verifier returned in the + * COPY result. */ + res->write_res.count = copied; + goto out_free; + case -EREMOTEIO: + /* COPY operation failed on the server. */ + status = -EOPNOTSUPP; + res->write_res.count = copied; + goto out_free; + case -EBADF: + /* Server did not recognize the copy stateid. It has + * probably restarted and lost the plot. */ + res->write_res.count = 0; + status = -EOPNOTSUPP; + break; + case -EOPNOTSUPP: + /* RFC 7862 REQUIREs server to support OFFLOAD_STATUS when + * it has signed up for an async COPY, so server is not + * spec-compliant. */ + res->write_res.count = 0; + } + goto out_free; } static int process_copy_commit(struct file *dst, loff_t pos_dst, @@ -643,7 +691,7 @@ _nfs42_proc_offload_status(struct nfs_server *server, struct file *file, * Other negative errnos indicate the client could not complete the * request. */ -static int __maybe_unused +static int nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied) { struct inode *inode = file_inode(dst); From 2d6a194f4b27e0fe524bb6da433d6b0f7032e27d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:42 -0500 Subject: [PATCH 1186/2157] NFS: Refactor trace_nfs4_offload_cancel Add a trace_nfs4_offload_status trace point that looks just like trace_nfs4_offload_cancel. Promote that event to an event class to avoid duplicating code. An alternative approach would be to expand trace_nfs4_offload_status to report more of the actual OFFLOAD_STATUS result. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-16-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + fs/nfs/nfs4trace.h | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 82ada136ee6a..5cf52ece96ac 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -648,6 +648,7 @@ _nfs42_proc_offload_status(struct nfs_server *server, struct file *file, status = nfs4_call_sync(server->client, server, &msg, &data->args.osa_seq_args, &data->res.osr_seq_res, 1); + trace_nfs4_offload_status(&data->args, status); switch (status) { case 0: break; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 22c973316f0b..bc67fe6801b1 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2608,7 +2608,7 @@ TRACE_EVENT(nfs4_copy_notify, ) ); -TRACE_EVENT(nfs4_offload_cancel, +DECLARE_EVENT_CLASS(nfs4_offload_class, TP_PROTO( const struct nfs42_offload_status_args *args, int error @@ -2640,6 +2640,15 @@ TRACE_EVENT(nfs4_offload_cancel, __entry->stateid_seq, __entry->stateid_hash ) ); +#define DEFINE_NFS4_OFFLOAD_EVENT(name) \ + DEFINE_EVENT(nfs4_offload_class, name, \ + TP_PROTO( \ + const struct nfs42_offload_status_args *args, \ + int error \ + ), \ + TP_ARGS(args, error)) +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel); +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status); DECLARE_EVENT_CLASS(nfs4_xattr_event, TP_PROTO( From 860be250fc32de9cb24154bf21b4e36f40925707 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 11 Mar 2025 17:06:21 -0600 Subject: [PATCH 1187/2157] vfio/pci: Handle INTx IRQ_NOTCONNECTED Some systems report INTx as not routed by setting pdev->irq to IRQ_NOTCONNECTED, resulting in a -ENOTCONN error when trying to setup eventfd signaling. Include this in the set of conditions for which the PIN register is virtualized to zero. Additionally consolidate vfio_pci_get_irq_count() to use this virtualized value in reporting INTx support via ioctl and sanity checking ioctl paths since pdev->irq is re-used when the device is in MSI mode. The combination of these results in both the config space of the device and the ioctl interface behaving as if the device does not support INTx. Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250311230623.1264283-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 3 ++- drivers/vfio/pci/vfio_pci_core.c | 10 +--------- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 94142581c98c..14437396d721 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1814,7 +1814,8 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) cpu_to_le16(PCI_COMMAND_MEMORY); } - if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx || + vdev->pdev->irq == IRQ_NOTCONNECTED) vconfig[PCI_INTERRUPT_PIN] = 0; ret = vfio_cap_init(vdev); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 586e49efb81b..c857630f447b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -727,15 +727,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { - u8 pin; - - if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || - vdev->nointx || vdev->pdev->is_virtfn) - return 0; - - pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - - return pin ? 1 : 0; + return vdev->vconfig[PCI_INTERRUPT_PIN] ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 8382c5834335..565966351dfa 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -259,7 +259,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, if (!is_irq_none(vdev)) return -EINVAL; - if (!pdev->irq) + if (!pdev->irq || pdev->irq == IRQ_NOTCONNECTED) return -ENODEV; name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); From 0a243de9d009087fc99f591faa2c494ff5907fcd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 11 Mar 2025 01:49:52 +0000 Subject: [PATCH 1188/2157] rtc: pcf50633: Remove The pcf50633 was used as part of the OpenMoko devices but the support for its main chip was recently removed in: commit 61b7f8920b17 ("ARM: s3c: remove all s3c24xx support") See https://lore.kernel.org/all/Z8z236h4B5A6Ki3D@gallifrey/ Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250311014959.743322-3-linux@treblig.org Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 7 - drivers/rtc/Makefile | 1 - drivers/rtc/rtc-pcf50633.c | 284 ------------------------------------- 3 files changed, 292 deletions(-) delete mode 100644 drivers/rtc/rtc-pcf50633.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 0bbbf778ecfa..838bdc138ffe 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1321,13 +1321,6 @@ config RTC_DRV_SPEAR If you say Y here you will get support for the RTC found on spear -config RTC_DRV_PCF50633 - depends on MFD_PCF50633 - tristate "NXP PCF50633 RTC" - help - If you say yes here you get support for the RTC subsystem of the - NXP PCF50633 used in embedded systems. - config RTC_DRV_AB8500 tristate "ST-Ericsson AB8500 RTC" depends on AB8500_CORE diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 489b4ab07068..31473b3276d9 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -126,7 +126,6 @@ obj-$(CONFIG_RTC_DRV_PALMAS) += rtc-palmas.o obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o obj-$(CONFIG_RTC_DRV_PCF2123) += rtc-pcf2123.o obj-$(CONFIG_RTC_DRV_PCF2127) += rtc-pcf2127.o -obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o obj-$(CONFIG_RTC_DRV_PCF85063) += rtc-pcf85063.o obj-$(CONFIG_RTC_DRV_PCF8523) += rtc-pcf8523.o obj-$(CONFIG_RTC_DRV_PCF85363) += rtc-pcf85363.o diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c deleted file mode 100644 index c019c4d91c7d..000000000000 --- a/drivers/rtc/rtc-pcf50633.c +++ /dev/null @@ -1,284 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* NXP PCF50633 RTC Driver - * - * (C) 2006-2008 by Openmoko, Inc. - * Author: Balaji Rao - * All rights reserved. - * - * Broken down from monstrous PCF50633 driver mainly by - * Harald Welte, Andy Green and Werner Almesberger - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define PCF50633_REG_RTCSC 0x59 /* Second */ -#define PCF50633_REG_RTCMN 0x5a /* Minute */ -#define PCF50633_REG_RTCHR 0x5b /* Hour */ -#define PCF50633_REG_RTCWD 0x5c /* Weekday */ -#define PCF50633_REG_RTCDT 0x5d /* Day */ -#define PCF50633_REG_RTCMT 0x5e /* Month */ -#define PCF50633_REG_RTCYR 0x5f /* Year */ -#define PCF50633_REG_RTCSCA 0x60 /* Alarm Second */ -#define PCF50633_REG_RTCMNA 0x61 /* Alarm Minute */ -#define PCF50633_REG_RTCHRA 0x62 /* Alarm Hour */ -#define PCF50633_REG_RTCWDA 0x63 /* Alarm Weekday */ -#define PCF50633_REG_RTCDTA 0x64 /* Alarm Day */ -#define PCF50633_REG_RTCMTA 0x65 /* Alarm Month */ -#define PCF50633_REG_RTCYRA 0x66 /* Alarm Year */ - -enum pcf50633_time_indexes { - PCF50633_TI_SEC, - PCF50633_TI_MIN, - PCF50633_TI_HOUR, - PCF50633_TI_WKDAY, - PCF50633_TI_DAY, - PCF50633_TI_MONTH, - PCF50633_TI_YEAR, - PCF50633_TI_EXTENT /* always last */ -}; - -struct pcf50633_time { - u_int8_t time[PCF50633_TI_EXTENT]; -}; - -struct pcf50633_rtc { - int alarm_enabled; - int alarm_pending; - - struct pcf50633 *pcf; - struct rtc_device *rtc_dev; -}; - -static void pcf2rtc_time(struct rtc_time *rtc, struct pcf50633_time *pcf) -{ - rtc->tm_sec = bcd2bin(pcf->time[PCF50633_TI_SEC]); - rtc->tm_min = bcd2bin(pcf->time[PCF50633_TI_MIN]); - rtc->tm_hour = bcd2bin(pcf->time[PCF50633_TI_HOUR]); - rtc->tm_wday = bcd2bin(pcf->time[PCF50633_TI_WKDAY]); - rtc->tm_mday = bcd2bin(pcf->time[PCF50633_TI_DAY]); - rtc->tm_mon = bcd2bin(pcf->time[PCF50633_TI_MONTH]) - 1; - rtc->tm_year = bcd2bin(pcf->time[PCF50633_TI_YEAR]) + 100; -} - -static void rtc2pcf_time(struct pcf50633_time *pcf, struct rtc_time *rtc) -{ - pcf->time[PCF50633_TI_SEC] = bin2bcd(rtc->tm_sec); - pcf->time[PCF50633_TI_MIN] = bin2bcd(rtc->tm_min); - pcf->time[PCF50633_TI_HOUR] = bin2bcd(rtc->tm_hour); - pcf->time[PCF50633_TI_WKDAY] = bin2bcd(rtc->tm_wday); - pcf->time[PCF50633_TI_DAY] = bin2bcd(rtc->tm_mday); - pcf->time[PCF50633_TI_MONTH] = bin2bcd(rtc->tm_mon + 1); - pcf->time[PCF50633_TI_YEAR] = bin2bcd(rtc->tm_year % 100); -} - -static int -pcf50633_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) -{ - struct pcf50633_rtc *rtc = dev_get_drvdata(dev); - int err; - - if (enabled) - err = pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_ALARM); - else - err = pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_ALARM); - - if (err < 0) - return err; - - rtc->alarm_enabled = enabled; - - return 0; -} - -static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm) -{ - struct pcf50633_rtc *rtc; - struct pcf50633_time pcf_tm; - int ret; - - rtc = dev_get_drvdata(dev); - - ret = pcf50633_read_block(rtc->pcf, PCF50633_REG_RTCSC, - PCF50633_TI_EXTENT, - &pcf_tm.time[0]); - if (ret != PCF50633_TI_EXTENT) { - dev_err(dev, "Failed to read time\n"); - return -EIO; - } - - dev_dbg(dev, "PCF_TIME: %02x.%02x.%02x %02x:%02x:%02x\n", - pcf_tm.time[PCF50633_TI_DAY], - pcf_tm.time[PCF50633_TI_MONTH], - pcf_tm.time[PCF50633_TI_YEAR], - pcf_tm.time[PCF50633_TI_HOUR], - pcf_tm.time[PCF50633_TI_MIN], - pcf_tm.time[PCF50633_TI_SEC]); - - pcf2rtc_time(tm, &pcf_tm); - - dev_dbg(dev, "RTC_TIME: %ptRr\n", tm); - - return 0; -} - -static int pcf50633_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - struct pcf50633_rtc *rtc; - struct pcf50633_time pcf_tm; - int alarm_masked, ret = 0; - - rtc = dev_get_drvdata(dev); - - dev_dbg(dev, "RTC_TIME: %ptRr\n", tm); - - rtc2pcf_time(&pcf_tm, tm); - - dev_dbg(dev, "PCF_TIME: %02x.%02x.%02x %02x:%02x:%02x\n", - pcf_tm.time[PCF50633_TI_DAY], - pcf_tm.time[PCF50633_TI_MONTH], - pcf_tm.time[PCF50633_TI_YEAR], - pcf_tm.time[PCF50633_TI_HOUR], - pcf_tm.time[PCF50633_TI_MIN], - pcf_tm.time[PCF50633_TI_SEC]); - - - alarm_masked = pcf50633_irq_mask_get(rtc->pcf, PCF50633_IRQ_ALARM); - - if (!alarm_masked) - pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_ALARM); - - /* Returns 0 on success */ - ret = pcf50633_write_block(rtc->pcf, PCF50633_REG_RTCSC, - PCF50633_TI_EXTENT, - &pcf_tm.time[0]); - - if (!alarm_masked) - pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_ALARM); - - return ret; -} - -static int pcf50633_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) -{ - struct pcf50633_rtc *rtc; - struct pcf50633_time pcf_tm; - int ret = 0; - - rtc = dev_get_drvdata(dev); - - alrm->enabled = rtc->alarm_enabled; - alrm->pending = rtc->alarm_pending; - - ret = pcf50633_read_block(rtc->pcf, PCF50633_REG_RTCSCA, - PCF50633_TI_EXTENT, &pcf_tm.time[0]); - if (ret != PCF50633_TI_EXTENT) { - dev_err(dev, "Failed to read time\n"); - return -EIO; - } - - pcf2rtc_time(&alrm->time, &pcf_tm); - - return rtc_valid_tm(&alrm->time); -} - -static int pcf50633_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) -{ - struct pcf50633_rtc *rtc; - struct pcf50633_time pcf_tm; - int alarm_masked, ret = 0; - - rtc = dev_get_drvdata(dev); - - rtc2pcf_time(&pcf_tm, &alrm->time); - - /* do like mktime does and ignore tm_wday */ - pcf_tm.time[PCF50633_TI_WKDAY] = 7; - - alarm_masked = pcf50633_irq_mask_get(rtc->pcf, PCF50633_IRQ_ALARM); - - /* disable alarm interrupt */ - if (!alarm_masked) - pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_ALARM); - - /* Returns 0 on success */ - ret = pcf50633_write_block(rtc->pcf, PCF50633_REG_RTCSCA, - PCF50633_TI_EXTENT, &pcf_tm.time[0]); - if (!alrm->enabled) - rtc->alarm_pending = 0; - - if (!alarm_masked || alrm->enabled) - pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_ALARM); - rtc->alarm_enabled = alrm->enabled; - - return ret; -} - -static const struct rtc_class_ops pcf50633_rtc_ops = { - .read_time = pcf50633_rtc_read_time, - .set_time = pcf50633_rtc_set_time, - .read_alarm = pcf50633_rtc_read_alarm, - .set_alarm = pcf50633_rtc_set_alarm, - .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable, -}; - -static void pcf50633_rtc_irq(int irq, void *data) -{ - struct pcf50633_rtc *rtc = data; - - rtc_update_irq(rtc->rtc_dev, 1, RTC_AF | RTC_IRQF); - rtc->alarm_pending = 1; -} - -static int pcf50633_rtc_probe(struct platform_device *pdev) -{ - struct pcf50633_rtc *rtc; - - rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); - if (!rtc) - return -ENOMEM; - - rtc->pcf = dev_to_pcf50633(pdev->dev.parent); - platform_set_drvdata(pdev, rtc); - rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, "pcf50633-rtc", - &pcf50633_rtc_ops, THIS_MODULE); - - if (IS_ERR(rtc->rtc_dev)) - return PTR_ERR(rtc->rtc_dev); - - pcf50633_register_irq(rtc->pcf, PCF50633_IRQ_ALARM, - pcf50633_rtc_irq, rtc); - return 0; -} - -static void pcf50633_rtc_remove(struct platform_device *pdev) -{ - struct pcf50633_rtc *rtc; - - rtc = platform_get_drvdata(pdev); - pcf50633_free_irq(rtc->pcf, PCF50633_IRQ_ALARM); -} - -static struct platform_driver pcf50633_rtc_driver = { - .driver = { - .name = "pcf50633-rtc", - }, - .probe = pcf50633_rtc_probe, - .remove = pcf50633_rtc_remove, -}; - -module_platform_driver(pcf50633_rtc_driver); - -MODULE_DESCRIPTION("PCF50633 RTC driver"); -MODULE_AUTHOR("Balaji Rao "); -MODULE_LICENSE("GPL"); - From eea7791e00f33a2a7a0c56479805bf1642ba378d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 5 Mar 2025 11:08:16 +0100 Subject: [PATCH 1189/2157] rtc: rzn1: implement one-second accuracy for alarms The hardware alarm only supports one-minute accuracy which is coarse and disables UIE usage. Use the 1-second interrupt to achieve per-second accuracy. It is activated once we hit the per-minute alarm. The new feature is optional. When there is no 1-second interrupt, old behaviour with per-minute accuracy is used as before. With this feature, all tests of 'rtctest' are successfully passed. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250305101038.9933-2-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rzn1.c | 108 ++++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 17 deletions(-) diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c index cb220807d925..eeb9612a666f 100644 --- a/drivers/rtc/rtc-rzn1.c +++ b/drivers/rtc/rtc-rzn1.c @@ -19,6 +19,7 @@ #include #include #include +#include #define RZN1_RTC_CTL0 0x00 #define RZN1_RTC_CTL0_SLSB_SUBU 0 @@ -27,6 +28,7 @@ #define RZN1_RTC_CTL0_CE BIT(7) #define RZN1_RTC_CTL1 0x04 +#define RZN1_RTC_CTL1_1SE BIT(3) #define RZN1_RTC_CTL1_ALME BIT(4) #define RZN1_RTC_CTL2 0x08 @@ -58,6 +60,13 @@ struct rzn1_rtc { struct rtc_device *rtcdev; void __iomem *base; + /* + * Protects access to RZN1_RTC_CTL1 reg. rtc_lock with threaded_irqs + * would introduce race conditions when switching interrupts because + * of potential sleeps + */ + spinlock_t ctl1_access_lock; + struct rtc_time tm_alarm; }; static void rzn1_rtc_get_time_snapshot(struct rzn1_rtc *rtc, struct rtc_time *tm) @@ -135,8 +144,38 @@ static int rzn1_rtc_set_time(struct device *dev, struct rtc_time *tm) static irqreturn_t rzn1_rtc_alarm_irq(int irq, void *dev_id) { struct rzn1_rtc *rtc = dev_id; + u32 ctl1, set_irq_bits = 0; - rtc_update_irq(rtc->rtcdev, 1, RTC_AF | RTC_IRQF); + if (rtc->tm_alarm.tm_sec == 0) + rtc_update_irq(rtc->rtcdev, 1, RTC_AF | RTC_IRQF); + else + /* Switch to 1s interrupts */ + set_irq_bits = RZN1_RTC_CTL1_1SE; + + guard(spinlock)(&rtc->ctl1_access_lock); + + ctl1 = readl(rtc->base + RZN1_RTC_CTL1); + ctl1 &= ~RZN1_RTC_CTL1_ALME; + ctl1 |= set_irq_bits; + writel(ctl1, rtc->base + RZN1_RTC_CTL1); + + return IRQ_HANDLED; +} + +static irqreturn_t rzn1_rtc_1s_irq(int irq, void *dev_id) +{ + struct rzn1_rtc *rtc = dev_id; + u32 ctl1; + + if (readl(rtc->base + RZN1_RTC_SECC) == bin2bcd(rtc->tm_alarm.tm_sec)) { + guard(spinlock)(&rtc->ctl1_access_lock); + + ctl1 = readl(rtc->base + RZN1_RTC_CTL1); + ctl1 &= ~RZN1_RTC_CTL1_1SE; + writel(ctl1, rtc->base + RZN1_RTC_CTL1); + + rtc_update_irq(rtc->rtcdev, 1, RTC_AF | RTC_IRQF); + } return IRQ_HANDLED; } @@ -144,14 +183,38 @@ static irqreturn_t rzn1_rtc_alarm_irq(int irq, void *dev_id) static int rzn1_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { struct rzn1_rtc *rtc = dev_get_drvdata(dev); - u32 ctl1 = readl(rtc->base + RZN1_RTC_CTL1); + struct rtc_time *tm = &rtc->tm_alarm, tm_now; + u32 ctl1; + int ret; - if (enable) - ctl1 |= RZN1_RTC_CTL1_ALME; - else - ctl1 &= ~RZN1_RTC_CTL1_ALME; + guard(spinlock_irqsave)(&rtc->ctl1_access_lock); - writel(ctl1, rtc->base + RZN1_RTC_CTL1); + ctl1 = readl(rtc->base + RZN1_RTC_CTL1); + + if (enable) { + /* + * Use alarm interrupt if alarm time is at least a minute away + * or less than a minute but in the next minute. Otherwise use + * 1 second interrupt to wait for the proper second + */ + do { + ctl1 &= ~(RZN1_RTC_CTL1_ALME | RZN1_RTC_CTL1_1SE); + + ret = rzn1_rtc_read_time(dev, &tm_now); + if (ret) + return ret; + + if (rtc_tm_sub(tm, &tm_now) > 59 || tm->tm_min != tm_now.tm_min) + ctl1 |= RZN1_RTC_CTL1_ALME; + else + ctl1 |= RZN1_RTC_CTL1_1SE; + + writel(ctl1, rtc->base + RZN1_RTC_CTL1); + } while (readl(rtc->base + RZN1_RTC_SECC) != bin2bcd(tm_now.tm_sec)); + } else { + ctl1 &= ~(RZN1_RTC_CTL1_ALME | RZN1_RTC_CTL1_1SE); + writel(ctl1, rtc->base + RZN1_RTC_CTL1); + } return 0; } @@ -185,7 +248,7 @@ static int rzn1_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) } ctl1 = readl(rtc->base + RZN1_RTC_CTL1); - alrm->enabled = !!(ctl1 & RZN1_RTC_CTL1_ALME); + alrm->enabled = !!(ctl1 & (RZN1_RTC_CTL1_ALME | RZN1_RTC_CTL1_1SE)); return 0; } @@ -216,6 +279,8 @@ static int rzn1_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) writel(bin2bcd(tm->tm_hour), rtc->base + RZN1_RTC_ALH); writel(BIT(wday), rtc->base + RZN1_RTC_ALW); + rtc->tm_alarm = alrm->time; + rzn1_rtc_alarm_irq_enable(dev, alrm->enabled); return 0; @@ -304,7 +369,7 @@ static const struct rtc_class_ops rzn1_rtc_ops = { static int rzn1_rtc_probe(struct platform_device *pdev) { struct rzn1_rtc *rtc; - int alarm_irq; + int irq; int ret; rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); @@ -317,9 +382,9 @@ static int rzn1_rtc_probe(struct platform_device *pdev) if (IS_ERR(rtc->base)) return dev_err_probe(&pdev->dev, PTR_ERR(rtc->base), "Missing reg\n"); - alarm_irq = platform_get_irq(pdev, 0); - if (alarm_irq < 0) - return alarm_irq; + irq = platform_get_irq_byname(pdev, "alarm"); + if (irq < 0) + return irq; rtc->rtcdev = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc->rtcdev)) @@ -329,8 +394,6 @@ static int rzn1_rtc_probe(struct platform_device *pdev) rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099; rtc->rtcdev->alarm_offset_max = 7 * 86400; rtc->rtcdev->ops = &rzn1_rtc_ops; - set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features); - clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features); ret = devm_pm_runtime_enable(&pdev->dev); if (ret < 0) @@ -349,13 +412,24 @@ static int rzn1_rtc_probe(struct platform_device *pdev) /* Disable all interrupts */ writel(0, rtc->base + RZN1_RTC_CTL1); - ret = devm_request_irq(&pdev->dev, alarm_irq, rzn1_rtc_alarm_irq, 0, - dev_name(&pdev->dev), rtc); + spin_lock_init(&rtc->ctl1_access_lock); + + ret = devm_request_irq(&pdev->dev, irq, rzn1_rtc_alarm_irq, 0, "RZN1 RTC Alarm", rtc); if (ret) { - dev_err(&pdev->dev, "RTC timer interrupt not available\n"); + dev_err(&pdev->dev, "RTC alarm interrupt not available\n"); goto dis_runtime_pm; } + irq = platform_get_irq_byname_optional(pdev, "pps"); + if (irq >= 0) + ret = devm_request_irq(&pdev->dev, irq, rzn1_rtc_1s_irq, 0, "RZN1 RTC 1s", rtc); + + if (irq < 0 || ret) { + set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features); + dev_warn(&pdev->dev, "RTC pps interrupt not available. Alarm has only minute accuracy\n"); + } + ret = devm_rtc_register_device(rtc->rtcdev); if (ret) goto dis_runtime_pm; From 6c2b833525ebce518ff7056b2700dfe3d0244d8b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 17 Mar 2025 09:03:56 -0300 Subject: [PATCH 1190/2157] dt-bindings: rtc: pcf2127: Reference spi-peripheral-props.yaml PCF2127 is an SPI device, thus its binding should reference spi-peripheral-props.yaml. Add a reference to spi-peripheral-props.yaml to fix the following dt-schema warning: imx7d-flex-concentrator.dtb: rtc@0: 'spi-max-frequency' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Fabio Estevam Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250317120356.2195670-1-festevam@gmail.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml index 2d9fe5a75b06..11fcf0ca1ae0 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml @@ -8,6 +8,7 @@ title: NXP PCF2127 Real Time Clock allOf: - $ref: rtc.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# maintainers: - Alexandre Belloni @@ -34,7 +35,7 @@ required: - compatible - reg -additionalProperties: false +unevaluatedProperties: false examples: - | From 0176188220a7822b7a5f57f971d8af291d05e98c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 14 Mar 2025 19:00:54 +1030 Subject: [PATCH 1191/2157] rtc: cros-ec: Avoid a couple of -Wflex-array-member-not-at-end warnings Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of a flexible structure where the size of the flexible-array member is known at compile-time, and refactor the rest of the code, accordingly. So, with these changes, fix the following warning: drivers/rtc/rtc-cros-ec.c:62:40: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/rtc/rtc-cros-ec.c:40:40: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/Z9PpPg06OK8ghNvm@kspp Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cros-ec.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 865c2e82c7a5..e956505a06fb 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -35,21 +35,18 @@ struct cros_ec_rtc { static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, u32 *response) { + DEFINE_RAW_FLEX(struct cros_ec_command, msg, data, + sizeof(struct ec_response_rtc)); int ret; - struct { - struct cros_ec_command msg; - struct ec_response_rtc data; - } __packed msg; - memset(&msg, 0, sizeof(msg)); - msg.msg.command = command; - msg.msg.insize = sizeof(msg.data); + msg->command = command; + msg->insize = sizeof(struct ec_response_rtc); - ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); + ret = cros_ec_cmd_xfer_status(cros_ec, msg); if (ret < 0) return ret; - *response = msg.data.time; + *response = ((struct ec_response_rtc *)msg->data)->time; return 0; } @@ -57,18 +54,15 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command, u32 param) { + DEFINE_RAW_FLEX(struct cros_ec_command, msg, data, + sizeof(struct ec_response_rtc)); int ret; - struct { - struct cros_ec_command msg; - struct ec_response_rtc data; - } __packed msg; - memset(&msg, 0, sizeof(msg)); - msg.msg.command = command; - msg.msg.outsize = sizeof(msg.data); - msg.data.time = param; + msg->command = command; + msg->outsize = sizeof(struct ec_response_rtc); + ((struct ec_response_rtc *)msg->data)->time = param; - ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); + ret = cros_ec_cmd_xfer_status(cros_ec, msg); if (ret < 0) return ret; return 0; From d6cb667b8e15bac47f19aec4bd81f3916b2ca9f5 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 10 Mar 2025 10:33:04 +0800 Subject: [PATCH 1192/2157] i3c: master: svc: Fix i3c_master_get_free_addr return check The return value of i3c_master_get_free_addr is assigned to a variable with wrong type, so it can't be negative. Use a signed integer for the return value. If the value is negative, break the process and propagate the error code. This commit also fixes the uninitialized symbol 'dyn_addr', reported by Smatch static checker. Fixes: 4008a74e0f9b ("i3c: master: svc: Fix npcm845 FIFO empty issue") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/029e5ac0-5444-4a8e-bca4-cec55950d2b9@stanley.mountain/ Signed-off-by: Stanley Chu Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250310023304.2335792-1-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index f22fb9e75142..1d1f351b9a85 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -940,7 +940,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, u8 *addrs, unsigned int *count) { u64 prov_id[SVC_I3C_MAX_DEVS] = {}, nacking_prov_id = 0; - unsigned int dev_nb = 0, last_addr = 0, dyn_addr; + unsigned int dev_nb = 0, last_addr = 0, dyn_addr = 0; u32 reg; int ret, i; @@ -998,10 +998,11 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, * filling within a few hundred nanoseconds, which is significantly * faster compared to the 64 SCL clock cycles. */ - dyn_addr = i3c_master_get_free_addr(&master->base, last_addr + 1); - if (dyn_addr < 0) - return -ENOSPC; + ret = i3c_master_get_free_addr(&master->base, last_addr + 1); + if (ret < 0) + break; + dyn_addr = ret; writel(dyn_addr, master->regs + SVC_I3C_MWDATAB); /* From 00cdfdcfa0806202aea56b02cedbf87ef1e75df8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Mar 2025 22:06:04 -0400 Subject: [PATCH 1193/2157] hypfs_create_cpu_files(): add missing check for hypfs_mkdir() failure Signed-off-by: Al Viro --- arch/s390/hypfs/hypfs_diag_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 00a6d370a280..280266a74f37 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -208,6 +208,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); From 995abaaadd30e2f9e49127694d41403839d3e1bd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Dec 2024 15:53:55 +0000 Subject: [PATCH 1194/2157] dax: remove access to page->index This looks like a complete mess (why are we setting page->index at page fault time?), but I no longer care about DAX, and there's no reason to let DAX hold us back from removing page->index. Link: https://lkml.kernel.org/r/20241216155408.8102-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jane Chu Reviewed-by: Dan Williams Cc: Alistair Popple Cc: Dave Jiang Cc: Vishal Verma Signed-off-by: Andrew Morton --- drivers/dax/device.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6d74e62bbee0..bc871a34b9cd 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -89,14 +89,13 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, ALIGN_DOWN(vmf->address, fault_size)); for (i = 0; i < nr_pages; i++) { - struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i); + struct folio *folio = pfn_folio(pfn_t_to_pfn(pfn) + i); - page = compound_head(page); - if (page->mapping) + if (folio->mapping) continue; - page->mapping = filp->f_mapping; - page->index = pgoff + i; + folio->mapping = filp->f_mapping; + folio->index = pgoff + i; } } From 37cd93fc61037889461d6437a1e1a5caa730ef38 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Dec 2024 15:53:56 +0000 Subject: [PATCH 1195/2157] dax: use folios more widely within DAX Convert from pfn to folio instead of page and use those folios throughout to avoid accesses to page->index and page->mapping. Link: https://lkml.kernel.org/r/20241216155408.8102-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jane Chu Cc: Dan Willaims Cc: Dave Jiang Cc: Vishal Verma Cc: Alistair Popple Signed-off-by: Andrew Morton --- fs/dax.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 21b47402b3dc..972febc6fb9d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -320,38 +320,39 @@ static unsigned long dax_end_pfn(void *entry) for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) -static inline bool dax_page_is_shared(struct page *page) +static inline bool dax_folio_is_shared(struct folio *folio) { - return page->mapping == PAGE_MAPPING_DAX_SHARED; + return folio->mapping == PAGE_MAPPING_DAX_SHARED; } /* - * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the + * Set the folio->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the * refcount. */ -static inline void dax_page_share_get(struct page *page) +static inline void dax_folio_share_get(struct folio *folio) { - if (page->mapping != PAGE_MAPPING_DAX_SHARED) { + if (folio->mapping != PAGE_MAPPING_DAX_SHARED) { /* * Reset the index if the page was already mapped * regularly before. */ - if (page->mapping) - page->share = 1; - page->mapping = PAGE_MAPPING_DAX_SHARED; + if (folio->mapping) + folio->page.share = 1; + folio->mapping = PAGE_MAPPING_DAX_SHARED; } - page->share++; + folio->page.share++; } -static inline unsigned long dax_page_share_put(struct page *page) +static inline unsigned long dax_folio_share_put(struct folio *folio) { - return --page->share; + return --folio->page.share; } /* - * When it is called in dax_insert_entry(), the shared flag will indicate that - * whether this entry is shared by multiple files. If so, set the page->mapping - * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount. + * When it is called in dax_insert_entry(), the shared flag will indicate + * that whether this entry is shared by multiple files. If so, set + * the folio->mapping PAGE_MAPPING_DAX_SHARED, and use page->share + * as refcount. */ static void dax_associate_entry(void *entry, struct address_space *mapping, struct vm_area_struct *vma, unsigned long address, bool shared) @@ -364,14 +365,14 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, index = linear_page_index(vma, address & ~(size - 1)); for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); + struct folio *folio = pfn_folio(pfn); if (shared) { - dax_page_share_get(page); + dax_folio_share_get(folio); } else { - WARN_ON_ONCE(page->mapping); - page->mapping = mapping; - page->index = index + i++; + WARN_ON_ONCE(folio->mapping); + folio->mapping = mapping; + folio->index = index + i++; } } } @@ -385,17 +386,17 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, return; for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); + struct folio *folio = pfn_folio(pfn); - WARN_ON_ONCE(trunc && page_ref_count(page) > 1); - if (dax_page_is_shared(page)) { + WARN_ON_ONCE(trunc && folio_ref_count(folio) > 1); + if (dax_folio_is_shared(folio)) { /* keep the shared flag if this page is still shared */ - if (dax_page_share_put(page) > 0) + if (dax_folio_share_put(folio) > 0) continue; } else - WARN_ON_ONCE(page->mapping && page->mapping != mapping); - page->mapping = NULL; - page->index = 0; + WARN_ON_ONCE(folio->mapping && folio->mapping != mapping); + folio->mapping = NULL; + folio->index = 0; } } From 7851bf649d423edd7286b292739f2eefded3d35c Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:30:56 +1100 Subject: [PATCH 1196/2157] fuse: fix dax truncate/punch_hole fault path Patch series "fs/dax: Fix ZONE_DEVICE page reference counts", v9. Device and FS DAX pages have always maintained their own page reference counts without following the normal rules for page reference counting. In particular pages are considered free when the refcount hits one rather than zero and refcounts are not added when mapping the page. Tracking this requires special PTE bits (PTE_DEVMAP) and a secondary mechanism for allowing GUP to hold references on the page (see get_dev_pagemap). However there doesn't seem to be any reason why FS DAX pages need their own reference counting scheme. By treating the refcounts on these pages the same way as normal pages we can remove a lot of special checks. In particular pXd_trans_huge() becomes the same as pXd_leaf(), although I haven't made that change here. It also frees up a valuable SW define PTE bit on architectures that have devmap PTE bits defined. It also almost certainly allows further clean-up of the devmap managed functions, but I have left that as a future improvment. It also enables support for compound ZONE_DEVICE pages which is one of my primary motivators for doing this work. This patch (of 20): FS DAX requires file systems to call into the DAX layout prior to unlinking inodes to ensure there is no ongoing DMA or other remote access to the direct mapped page. The fuse file system implements fuse_dax_break_layouts() to do this which includes a comment indicating that passing dmap_end == 0 leads to unmapping of the whole file. However this is not true - passing dmap_end == 0 will not unmap anything before dmap_start, and further more dax_layout_busy_page_range() will not scan any of the range to see if there maybe ongoing DMA access to the range. Fix this by passing -1 for dmap_end to fuse_dax_break_layouts() which will invalidate the entire file range to dax_layout_busy_page_range(). Link: https://lkml.kernel.org/r/cover.8068ad144a7eea4a813670301f4d2a86a8e68ec4.1740713401.git-series.apopple@nvidia.com Link: https://lkml.kernel.org/r/f09a34b6c40032022e4ddee6fadb7cc676f08867.1740713401.git-series.apopple@nvidia.com Fixes: 6ae330cad6ef ("virtiofs: serialize truncate/punch_hole and dax fault path") Signed-off-by: Alistair Popple Co-developed-by: Dan Williams Signed-off-by: Dan Williams Reviewed-by: Balbir Singh Tested-by: Alison Schofield Cc: Vivek Goyal Cc: Alexander Gordeev Cc: Asahi Lina Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/fuse/dax.c | 1 - fs/fuse/dir.c | 2 +- fs/fuse/file.c | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 0b6ee6dd1fd6..b7f805d2a14f 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -682,7 +682,6 @@ static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, 0, 0, fuse_wait_dax_page(inode)); } -/* dmap_end == 0 leads to unmapping of whole file */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3805f9b06c9d..3b031d24d369 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1940,7 +1940,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (FUSE_IS_DAX(inode) && is_truncate) { filemap_invalidate_lock(mapping); fault_blocked = true; - err = fuse_dax_break_layouts(inode, 0, 0); + err = fuse_dax_break_layouts(inode, 0, -1); if (err) { filemap_invalidate_unlock(mapping); return err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d63e56fd3dd2..754378dd9f71 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -253,7 +253,7 @@ static int fuse_open(struct inode *inode, struct file *file) if (dax_truncate) { filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); + err = fuse_dax_break_layouts(inode, 0, -1); if (err) goto out_inode_unlock; } @@ -3205,7 +3205,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, inode_lock(inode); if (block_faults) { filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); + err = fuse_dax_break_layouts(inode, 0, -1); if (err) goto out; } From cee91fa13a8e95f49d0ee6be020e68a71a209117 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:30:57 +1100 Subject: [PATCH 1197/2157] fs/dax: return unmapped busy pages from dax_layout_busy_page_range() dax_layout_busy_page_range() is used by file systems to scan the DAX page-cache to unmap mapping pages from user-space and to determine if any pages in the given range are busy, either due to ongoing DMA or other get_user_pages() usage. Currently it checks to see the file mapping is mapped into user-space with mapping_mapped() and returns early if not, skipping the check for DMA busy pages. This is wrong as pages may still be undergoing DMA access even if they have subsequently been unmapped from user-space. Fix this by dropping the check for mapping_mapped(). Link: https://lkml.kernel.org/r/d85ce6c2d1400ff111ed7302d9eef223d0243c57.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Suggested-by: Dan Williams Reviewed-by: Dan Williams Reviewed-by: Balbir Singh Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 972febc6fb9d..b35f538c4330 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -691,7 +691,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; - if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + if (!dax_mapping(mapping)) return NULL; /* If end == LLONG_MAX, all pages from start to till end of file */ From 6be3e21d25ca2dbb7ca4f3f7db808a3e1a944bd1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:30:58 +1100 Subject: [PATCH 1198/2157] fs/dax: don't skip locked entries when scanning entries Several functions internal to FS DAX use the following pattern when trying to obtain an unlocked entry: xas_for_each(&xas, entry, end_idx) { if (dax_is_locked(entry)) entry = get_unlocked_entry(&xas, 0); This is problematic because get_unlocked_entry() will get the next present entry in the range, and the next entry may not be locked. Therefore any processing of the original locked entry will be skipped. This can cause dax_layout_busy_page_range() to miss DMA-busy pages in the range, leading file systems to free blocks whilst DMA operations are ongoing which can lead to file system corruption. Instead callers from within a xas_for_each() loop should be waiting for the current entry to be unlocked without advancing the XArray state so a new function is introduced to wait. Also while we are here rename get_unlocked_entry() to get_next_unlocked_entry() to make it clear that it may advance the iterator state. Link: https://lkml.kernel.org/r/b11b2baed7157dc900bf07a4571bf71b7cd82d97.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b35f538c4330..f5fdb43f5de3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -206,7 +206,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, * * Must be called with the i_pages lock held. */ -static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) +static void *get_next_unlocked_entry(struct xa_state *xas, unsigned int order) { void *entry; struct wait_exceptional_entry_queue ewait; @@ -235,6 +235,37 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) } } +/* + * Wait for the given entry to become unlocked. Caller must hold the i_pages + * lock and call either put_unlocked_entry() if it did not lock the entry or + * dax_unlock_entry() if it did. Returns an unlocked entry if still present. + */ +static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) +{ + struct wait_exceptional_entry_queue ewait; + wait_queue_head_t *wq; + + init_wait(&ewait.wait); + ewait.wait.func = wake_exceptional_entry_func; + + while (unlikely(dax_is_locked(entry))) { + wq = dax_entry_waitqueue(xas, entry, &ewait.key); + prepare_to_wait_exclusive(wq, &ewait.wait, + TASK_UNINTERRUPTIBLE); + xas_pause(xas); + xas_unlock_irq(xas); + schedule(); + finish_wait(wq, &ewait.wait); + xas_lock_irq(xas); + entry = xas_load(xas); + } + + if (xa_is_internal(entry)) + return NULL; + + return entry; +} + /* * The only thing keeping the address space around is the i_pages lock * (it's cycled in clear_inode() after removing the entries from i_pages) @@ -250,7 +281,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); /* - * Unlike get_unlocked_entry() there is no guarantee that this + * Unlike get_next_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an * inode dies. Perform a non-exclusive wait in case this path * never successfully performs its own wake up. @@ -581,7 +612,7 @@ static void *grab_mapping_entry(struct xa_state *xas, retry: pmd_downgrade = false; xas_lock_irq(xas); - entry = get_unlocked_entry(xas, order); + entry = get_next_unlocked_entry(xas, order); if (entry) { if (dax_is_conflict(entry)) @@ -717,8 +748,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, xas_for_each(&xas, entry, end_idx) { if (WARN_ON_ONCE(!xa_is_value(entry))) continue; - if (unlikely(dax_is_locked(entry))) - entry = get_unlocked_entry(&xas, 0); + entry = wait_entry_unlocked_exclusive(&xas, entry); if (entry) page = dax_busy_page(entry); put_unlocked_entry(&xas, entry, WAKE_NEXT); @@ -751,7 +781,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, void *entry; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas, 0); + entry = get_next_unlocked_entry(&xas, 0); if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto out; if (!trunc && @@ -777,7 +807,9 @@ static int __dax_clear_dirty_range(struct address_space *mapping, xas_lock_irq(&xas); xas_for_each(&xas, entry, end) { - entry = get_unlocked_entry(&xas, 0); + entry = wait_entry_unlocked_exclusive(&xas, entry); + if (!entry) + continue; xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); put_unlocked_entry(&xas, entry, WAKE_NEXT); @@ -941,7 +973,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, if (unlikely(dax_is_locked(entry))) { void *old_entry = entry; - entry = get_unlocked_entry(xas, 0); + entry = get_next_unlocked_entry(xas, 0); /* Entry got punched out / reallocated? */ if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) @@ -1950,7 +1982,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) vm_fault_t ret; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas, order); + entry = get_next_unlocked_entry(&xas, order); /* Did we race with someone splitting entry or so? */ if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { From e6fa3963a30d53427d33a577c5ba4bfd3373bc93 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:30:59 +1100 Subject: [PATCH 1199/2157] fs/dax: refactor wait for dax idle page A FS DAX page is considered idle when its refcount drops to one. This is currently open-coded in all file systems supporting FS DAX. Move the idle detection to a common function to make future changes easier. Link: https://lkml.kernel.org/r/c2c9d269110b90224eeb1dc661ffbc1d82aa20c9.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Dan Williams Acked-by: Theodore Ts'o Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/ext4/inode.c | 5 +---- fs/fuse/dax.c | 4 +--- fs/xfs/xfs_inode.c | 4 +--- include/linux/dax.h | 8 ++++++++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c54ae5fcbd4..cc1acb1fdec6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3922,10 +3922,7 @@ int ext4_break_layouts(struct inode *inode) if (!page) return 0; - error = ___wait_var_event(&page->_refcount, - atomic_read(&page->_refcount) == 1, - TASK_INTERRUPTIBLE, 0, 0, - ext4_wait_dax_page(inode)); + error = dax_wait_page_idle(page, ext4_wait_dax_page, inode); } while (error == 0); return error; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index b7f805d2a14f..bf6faa3536a4 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -677,9 +677,7 @@ static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, return 0; *retry = true; - return ___wait_var_event(&page->_refcount, - atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, - 0, 0, fuse_wait_dax_page(inode)); + return dax_wait_page_idle(page, fuse_wait_dax_page, inode); } int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b1f9f156ec88..1b5613dfed7f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3020,9 +3020,7 @@ xfs_break_dax_layouts( return 0; *retry = true; - return ___wait_var_event(&page->_refcount, - atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, - 0, 0, xfs_wait_dax_page(inode)); + return dax_wait_page_idle(page, xfs_wait_dax_page, inode); } int diff --git a/include/linux/dax.h b/include/linux/dax.h index df41a0017b31..9b1ce984d410 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -207,6 +207,14 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); +static inline int dax_wait_page_idle(struct page *page, + void (cb)(struct inode *), + struct inode *inode) +{ + return ___wait_var_event(page, page_ref_count(page) == 1, + TASK_INTERRUPTIBLE, 0, 0, cb(inode)); +} + #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); From d5b3afea22a52517e6bc835432e6dfd079b8bf7c Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:00 +1100 Subject: [PATCH 1200/2157] fs/dax: create a common implementation to break DAX layouts Prior to freeing a block file systems supporting FS DAX must check that the associated pages are both unmapped from user-space and not undergoing DMA or other access from eg. get_user_pages(). This is achieved by unmapping the file range and scanning the FS DAX page-cache to see if any pages within the mapping have an elevated refcount. This is done using two functions - dax_layout_busy_page_range() which returns a page to wait for the refcount to become idle on. Rather than open-code this introduce a common implementation to both unmap and wait for the page to become idle. Link: https://lkml.kernel.org/r/c4d381e41fc618296cee2820403c166d80599d5c.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 33 +++++++++++++++++++++++++++++++++ fs/ext4/inode.c | 13 +------------ fs/fuse/dax.c | 27 +++------------------------ fs/xfs/xfs_inode.c | 26 +++++++------------------- fs/xfs/xfs_inode.h | 2 +- include/linux/dax.h | 23 ++++++++++++++++++----- 6 files changed, 63 insertions(+), 61 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index f5fdb43f5de3..f1945aa65eb0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -846,6 +846,39 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } +static int wait_page_idle(struct page *page, + void (cb)(struct inode *), + struct inode *inode) +{ + return ___wait_var_event(page, dax_page_is_idle(page), + TASK_INTERRUPTIBLE, 0, 0, cb(inode)); +} + +/* + * Unmaps the inode and waits for any DMA to complete prior to deleting the + * DAX mapping entries for the range. + */ +int dax_break_layout(struct inode *inode, loff_t start, loff_t end, + void (cb)(struct inode *)) +{ + struct page *page; + int error = 0; + + if (!dax_mapping(inode->i_mapping)) + return 0; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, start, end); + if (!page) + break; + + error = wait_page_idle(page, cb, inode); + } while (error == 0); + + return error; +} +EXPORT_SYMBOL_GPL(dax_break_layout); + /* * Invalidate DAX entry if it is clean. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cc1acb1fdec6..2342bac14a9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3911,21 +3911,10 @@ static void ext4_wait_dax_page(struct inode *inode) int ext4_break_layouts(struct inode *inode) { - struct page *page; - int error; - if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock))) return -EINVAL; - do { - page = dax_layout_busy_page(inode->i_mapping); - if (!page) - return 0; - - error = dax_wait_page_idle(page, ext4_wait_dax_page, inode); - } while (error == 0); - - return error; + return dax_break_layout_inode(inode, ext4_wait_dax_page); } /* diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index bf6faa3536a4..0502bf3cdf6a 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -666,33 +666,12 @@ static void fuse_wait_dax_page(struct inode *inode) filemap_invalidate_lock(inode->i_mapping); } -/* Should be called with mapping->invalidate_lock held exclusively */ -static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, - loff_t start, loff_t end) -{ - struct page *page; - - page = dax_layout_busy_page_range(inode->i_mapping, start, end); - if (!page) - return 0; - - *retry = true; - return dax_wait_page_idle(page, fuse_wait_dax_page, inode); -} - +/* Should be called with mapping->invalidate_lock held exclusively. */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { - bool retry; - int ret; - - do { - retry = false; - ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, - dmap_end); - } while (ret == 0 && retry); - - return ret; + return dax_break_layout(inode, dmap_start, dmap_end, + fuse_wait_dax_page); } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1b5613dfed7f..d4f07e02b28b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2735,21 +2735,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( struct xfs_inode *ip2) { int error; - bool retry; struct page *page; if (ip1->i_ino > ip2->i_ino) swap(ip1, ip2); again: - retry = false; /* Lock the first inode */ xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - error = xfs_break_dax_layouts(VFS_I(ip1), &retry); - if (error || retry) { + error = xfs_break_dax_layouts(VFS_I(ip1)); + if (error) { xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); - if (error == 0 && retry) - goto again; return error; } @@ -2764,7 +2760,7 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( * for this nested lock case. */ page = dax_layout_busy_page(VFS_I(ip2)->i_mapping); - if (page && page_ref_count(page) != 1) { + if (!dax_page_is_idle(page)) { xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); goto again; @@ -3008,19 +3004,11 @@ xfs_wait_dax_page( int xfs_break_dax_layouts( - struct inode *inode, - bool *retry) + struct inode *inode) { - struct page *page; - xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL); - page = dax_layout_busy_page(inode->i_mapping); - if (!page) - return 0; - - *retry = true; - return dax_wait_page_idle(page, xfs_wait_dax_page, inode); + return dax_break_layout_inode(inode, xfs_wait_dax_page); } int @@ -3038,8 +3026,8 @@ xfs_break_layouts( retry = false; switch (reason) { case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, &retry); - if (error || retry) + error = xfs_break_dax_layouts(inode); + if (error) break; fallthrough; case BREAK_WRITE: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c08093a65352..123dfa965c6e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -603,7 +603,7 @@ xfs_itruncate_extents( return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0); } -int xfs_break_dax_layouts(struct inode *inode, bool *retry); +int xfs_break_dax_layouts(struct inode *inode); int xfs_break_layouts(struct inode *inode, uint *iolock, enum layout_break_reason reason); diff --git a/include/linux/dax.h b/include/linux/dax.h index 9b1ce984d410..a6b277f1e13a 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -207,12 +207,9 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); -static inline int dax_wait_page_idle(struct page *page, - void (cb)(struct inode *), - struct inode *inode) +static inline bool dax_page_is_idle(struct page *page) { - return ___wait_var_event(page, page_ref_count(page) == 1, - TASK_INTERRUPTIBLE, 0, 0, cb(inode)); + return page && page_ref_count(page) == 1; } #if IS_ENABLED(CONFIG_DAX) @@ -228,6 +225,15 @@ static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ + +#if !IS_ENABLED(CONFIG_FS_DAX) +static inline int __must_check dax_break_layout(struct inode *inode, + loff_t start, loff_t end, void (cb)(struct inode *)) +{ + return 0; +} +#endif + bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, @@ -251,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int __must_check dax_break_layout(struct inode *inode, loff_t start, + loff_t end, void (cb)(struct inode *)); +static inline int __must_check dax_break_layout_inode(struct inode *inode, + void (cb)(struct inode *)) +{ + return dax_break_layout(inode, 0, LLONG_MAX, cb); +} int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, From bde708f1a65d025c45575bfe1e7bf7bdf7e71e87 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:01 +1100 Subject: [PATCH 1201/2157] fs/dax: always remove DAX page-cache entries when breaking layouts Prior to any truncation operations file systems call dax_break_mapping() to ensure pages in the range are not under going DMA. Later DAX page-cache entries will be removed by truncate_folio_batch_exceptionals() in the generic page-cache code. However this makes it possible for folios to be removed from the page-cache even though they are still DMA busy if the file-system hasn't called dax_break_mapping(). It also means they can never be waited on in future because FS DAX will lose track of them once the page-cache entry has been deleted. Instead it is better to delete the FS DAX entry when the file-system calls dax_break_mapping() as part of it's truncate operation. This ensures only idle pages can be removed from the FS DAX page-cache and makes it easy to detect if a file-system hasn't called dax_break_mapping() prior to a truncate operation. Link: https://lkml.kernel.org/r/3be6115eaaa8d28fee37fcba3287be4f226a7d24.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode.c | 5 ++--- include/linux/dax.h | 2 ++ mm/truncate.c | 16 +++++++++++++++- 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index f1945aa65eb0..14fbe5163037 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -846,6 +846,36 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end) +{ + void *entry; + pgoff_t start_idx = start >> PAGE_SHIFT; + pgoff_t end_idx; + XA_STATE(xas, &mapping->i_pages, start_idx); + + /* If end == LLONG_MAX, all pages from start to till end of file */ + if (end == LLONG_MAX) + end_idx = ULONG_MAX; + else + end_idx = end >> PAGE_SHIFT; + + xas_lock_irq(&xas); + xas_for_each(&xas, entry, end_idx) { + if (!xa_is_value(entry)) + continue; + entry = wait_entry_unlocked_exclusive(&xas, entry); + if (!entry) + continue; + dax_disassociate_entry(entry, mapping, true); + xas_store(&xas, NULL); + mapping->nrpages -= 1UL << dax_entry_order(entry); + put_unlocked_entry(&xas, entry, WAKE_ALL); + } + xas_unlock_irq(&xas); +} +EXPORT_SYMBOL_GPL(dax_delete_mapping_range); + static int wait_page_idle(struct page *page, void (cb)(struct inode *), struct inode *inode) @@ -857,6 +887,9 @@ static int wait_page_idle(struct page *page, /* * Unmaps the inode and waits for any DMA to complete prior to deleting the * DAX mapping entries for the range. + * + * For NOWAIT behavior, pass @cb as NULL to early-exit on first found + * busy page */ int dax_break_layout(struct inode *inode, loff_t start, loff_t end, void (cb)(struct inode *)) @@ -871,10 +904,17 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end, page = dax_layout_busy_page_range(inode->i_mapping, start, end); if (!page) break; + if (!cb) { + error = -ERESTARTSYS; + break; + } error = wait_page_idle(page, cb, inode); } while (error == 0); + if (!page) + dax_delete_mapping_range(inode->i_mapping, start, end); + return error; } EXPORT_SYMBOL_GPL(dax_break_layout); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d4f07e02b28b..80083376a1d0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2735,7 +2735,6 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( struct xfs_inode *ip2) { int error; - struct page *page; if (ip1->i_ino > ip2->i_ino) swap(ip1, ip2); @@ -2759,8 +2758,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable * for this nested lock case. */ - page = dax_layout_busy_page(VFS_I(ip2)->i_mapping); - if (!dax_page_is_idle(page)) { + error = dax_break_layout(VFS_I(ip2), 0, -1, NULL); + if (error) { xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); goto again; diff --git a/include/linux/dax.h b/include/linux/dax.h index a6b277f1e13a..2fbb262092ca 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -255,6 +255,8 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); int __must_check dax_break_layout(struct inode *inode, loff_t start, diff --git a/mm/truncate.c b/mm/truncate.c index 76d8fcd89bd0..79570045071c 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -78,8 +78,22 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping, if (dax_mapping(mapping)) { for (i = j; i < nr; i++) { - if (xa_is_value(fbatch->folios[i])) + if (xa_is_value(fbatch->folios[i])) { + /* + * File systems should already have called + * dax_break_layout_entry() to remove all DAX + * entries while holding a lock to prevent + * establishing new entries. Therefore we + * shouldn't find any here. + */ + WARN_ON_ONCE(1); + + /* + * Delete the mapping so truncate_pagecache() + * doesn't loop forever. + */ dax_delete_mapping_entry(mapping, indices[i]); + } } goto out; } From 0e2f80afcfa699ce722c01afc9286a942bd57211 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:02 +1100 Subject: [PATCH 1202/2157] fs/dax: ensure all pages are idle prior to filesystem unmount File systems call dax_break_mapping() prior to reallocating file system blocks to ensure the page is not undergoing any DMA or other accesses. Generally this is needed when a file is truncated to ensure that if a block is reallocated nothing is writing to it. However filesystems currently don't call this when an FS DAX inode is evicted. This can cause problems when the file system is unmounted as a page can continue to be under going DMA or other remote access after unmount. This means if the file system is remounted any truncate or other operation which requires the underlying file system block to be freed will not wait for the remote access to complete. Therefore a busy block may be reallocated to a new file leading to corruption. Link: https://lkml.kernel.org/r/2d3cf575bbd095084993154be2f0aa7442e5cd28.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 27 +++++++++++++++++++++++++++ fs/ext4/inode.c | 2 ++ fs/xfs/xfs_super.c | 12 ++++++++++++ include/linux/dax.h | 5 +++++ 4 files changed, 46 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 14fbe5163037..bc538ba56058 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -884,6 +884,13 @@ static int wait_page_idle(struct page *page, TASK_INTERRUPTIBLE, 0, 0, cb(inode)); } +static void wait_page_idle_uninterruptible(struct page *page, + struct inode *inode) +{ + ___wait_var_event(page, dax_page_is_idle(page), + TASK_UNINTERRUPTIBLE, 0, 0, schedule()); +} + /* * Unmaps the inode and waits for any DMA to complete prior to deleting the * DAX mapping entries for the range. @@ -919,6 +926,26 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end, } EXPORT_SYMBOL_GPL(dax_break_layout); +void dax_break_layout_final(struct inode *inode) +{ + struct page *page; + + if (!dax_mapping(inode->i_mapping)) + return; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, 0, + LLONG_MAX); + if (!page) + break; + + wait_page_idle_uninterruptible(page, inode); + } while (true); + + dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); +} +EXPORT_SYMBOL_GPL(dax_break_layout_final); + /* * Invalidate DAX entry if it is clean. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2342bac14a9e..3cc8da6357aa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -181,6 +181,8 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); + dax_break_layout_final(inode); + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0055066fb1d9..37898f89b3ea 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -751,6 +751,17 @@ xfs_fs_drop_inode( return generic_drop_inode(inode); } +STATIC void +xfs_fs_evict_inode( + struct inode *inode) +{ + if (IS_DAX(inode)) + dax_break_layout_final(inode); + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); +} + static void xfs_mount_free( struct xfs_mount *mp) @@ -1215,6 +1226,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .drop_inode = xfs_fs_drop_inode, + .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, diff --git a/include/linux/dax.h b/include/linux/dax.h index 2fbb262092ca..2333c30f6d36 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -232,6 +232,10 @@ static inline int __must_check dax_break_layout(struct inode *inode, { return 0; } + +static inline void dax_break_layout_final(struct inode *inode) +{ +} #endif bool dax_alive(struct dax_device *dax_dev); @@ -266,6 +270,7 @@ static inline int __must_check dax_break_layout_inode(struct inode *inode, { return dax_break_layout(inode, 0, LLONG_MAX, cb); } +void dax_break_layout_final(struct inode *inode); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, From cbe298d82cf70aa6bb16cfc8ae4db522f39a0034 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:03 +1100 Subject: [PATCH 1203/2157] fs/dax: remove PAGE_MAPPING_DAX_SHARED mapping flag The page ->mapping pointer can have magic values like PAGE_MAPPING_DAX_SHARED and PAGE_MAPPING_ANON for page owner specific usage. Currently PAGE_MAPPING_DAX_SHARED and PAGE_MAPPING_ANON alias to the same value. This isn't a problem because FS DAX pages are never seen by the anonymous mapping code and vice versa. However a future change will make FS DAX pages more like normal pages, so folio_test_anon() must not return true for a FS DAX page. We could explicitly test for a FS DAX page in folio_test_anon(), etc. however the PAGE_MAPPING_DAX_SHARED flag isn't actually needed. Instead we can use the page->mapping field to implicitly track the first mapping of a page. If page->mapping is non-NULL it implies the page is associated with a single mapping at page->index. If the page is associated with a second mapping clear page->mapping and set page->share to 1. This is possible because a shared mapping implies the file-system implements dax_holder_operations which makes the ->mapping and ->index, which is a union with ->share, unused. The page is considered shared when page->mapping == NULL and page->share > 0 or page->mapping != NULL, implying it is present in at least one address space. This also makes it easier for a future change to detect when a page is first mapped into an address space which requires special handling. Link: https://lkml.kernel.org/r/c22f699202db0acee2f7039eb026e68261ce42d6.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Tested-by: Alison Schofield Cc: Asahi Lina Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Ted Ts'o Cc: Vishal Verma Cc: WANG Xuerui Cc: Will Deacon Cc: Alexander Gordeev Cc: Balbir Singh Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Jason Gunthorpe Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vivek Goyal Signed-off-by: Andrew Morton --- fs/dax.c | 55 +++++++++++++++++++++++--------------- include/linux/page-flags.h | 6 ----- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index bc538ba56058..6674540363e8 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -351,27 +351,40 @@ static unsigned long dax_end_pfn(void *entry) for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) +/* + * A DAX folio is considered shared if it has no mapping set and ->share (which + * shares the ->index field) is non-zero. Note this may return false even if the + * page is shared between multiple files but has not yet actually been mapped + * into multiple address spaces. + */ static inline bool dax_folio_is_shared(struct folio *folio) { - return folio->mapping == PAGE_MAPPING_DAX_SHARED; + return !folio->mapping && folio->page.share; } /* - * Set the folio->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the - * refcount. + * When it is called by dax_insert_entry(), the shared flag will indicate + * whether this entry is shared by multiple files. If the page has not + * previously been associated with any mappings the ->mapping and ->index + * fields will be set. If it has already been associated with a mapping + * the mapping will be cleared and the share count set. It's then up to + * reverse map users like memory_failure() to call back into the filesystem to + * recover ->mapping and ->index information. For example by implementing + * dax_holder_operations. */ -static inline void dax_folio_share_get(struct folio *folio) +static void dax_folio_make_shared(struct folio *folio) { - if (folio->mapping != PAGE_MAPPING_DAX_SHARED) { - /* - * Reset the index if the page was already mapped - * regularly before. - */ - if (folio->mapping) - folio->page.share = 1; - folio->mapping = PAGE_MAPPING_DAX_SHARED; - } - folio->page.share++; + /* + * folio is not currently shared so mark it as shared by clearing + * folio->mapping. + */ + folio->mapping = NULL; + + /* + * folio has previously been mapped into one address space so set the + * share count. + */ + folio->page.share = 1; } static inline unsigned long dax_folio_share_put(struct folio *folio) @@ -379,12 +392,6 @@ static inline unsigned long dax_folio_share_put(struct folio *folio) return --folio->page.share; } -/* - * When it is called in dax_insert_entry(), the shared flag will indicate - * that whether this entry is shared by multiple files. If so, set - * the folio->mapping PAGE_MAPPING_DAX_SHARED, and use page->share - * as refcount. - */ static void dax_associate_entry(void *entry, struct address_space *mapping, struct vm_area_struct *vma, unsigned long address, bool shared) { @@ -398,8 +405,12 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, for_each_mapped_pfn(entry, pfn) { struct folio *folio = pfn_folio(pfn); - if (shared) { - dax_folio_share_get(folio); + if (shared && (folio->mapping || folio->page.share)) { + if (folio->mapping) + dax_folio_make_shared(folio); + + WARN_ON_ONCE(!folio->page.share); + folio->page.share++; } else { WARN_ON_ONCE(folio->mapping); folio->mapping = mapping; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 36d283552f80..cab382bd965e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -673,12 +673,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -/* - * Different with flags above, this flag is used only for fsdax mode. It - * indicates that this page->mapping is now under reflink case. - */ -#define PAGE_MAPPING_DAX_SHARED ((void *)0x1) - static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; From a58c6fb6623d8aef36c0c4e0b8a48770deef3213 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:04 +1100 Subject: [PATCH 1204/2157] mm/gup: remove redundant check for PCI P2PDMA page PCI P2PDMA pages are not mapped with pXX_devmap PTEs therefore the check in __gup_device_huge() is redundant. Remove it Link: https://lkml.kernel.org/r/260e3dcfaf05ff1c734a49698ed4332b5dae04c2.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Wiliams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/gup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index e42e4fdaf765..e5d6454df41d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3013,11 +3013,6 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, break; } - if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); - break; - } - folio = try_grab_folio_fast(page, 1, flags); if (!folio) { gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); From b7e2823787735ca009e63f35f164b46df0ef096c Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:05 +1100 Subject: [PATCH 1205/2157] mm/mm_init: move p2pdma page refcount initialisation to p2pdma Currently ZONE_DEVICE page reference counts are initialised by core memory management code in __init_zone_device_page() as part of the memremap() call which driver modules make to obtain ZONE_DEVICE pages. This initialises page refcounts to 1 before returning them to the driver. This was presumably done because it drivers had a reference of sorts on the page. It also ensured the page could always be mapped with vm_insert_page() for example and would never get freed (ie. have a zero refcount), freeing drivers of manipulating page reference counts. However it complicates figuring out whether or not a page is free from the mm perspective because it is no longer possible to just look at the refcount. Instead the page type must be known and if GUP is used a secondary pgmap reference is also sometimes needed. To simplify this it is desirable to remove the page reference count for the driver, so core mm can just use the refcount without having to account for page type or do other types of tracking. This is possible because drivers can always assume the page is valid as core kernel will never offline or remove the struct page. This means it is now up to drivers to initialise the page refcount as required. P2PDMA uses vm_insert_page() to map the page, and that requires a non-zero reference count when initialising the page so set that when the page is first mapped. Link: https://lkml.kernel.org/r/6aedb0ac2886dcc4503cb705273db5b3863a0b66.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- drivers/pci/p2pdma.c | 13 +++++++++++-- mm/memremap.c | 17 +++++++++++++---- mm/mm_init.c | 22 ++++++++++++++++++---- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 0cb7e0aaba0e..04773a865819 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -140,13 +140,22 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, rcu_read_unlock(); for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { - ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr)); + struct page *page = virt_to_page(kaddr); + + /* + * Initialise the refcount for the freshly allocated page. As + * we have just allocated the page no one else should be + * using it. + */ + VM_WARN_ON_ONCE_PAGE(!page_ref_count(page), page); + set_page_count(page, 1); + ret = vm_insert_page(vma, vaddr, page); if (ret) { gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); return ret; } percpu_ref_get(ref); - put_page(virt_to_page(kaddr)); + put_page(page); kaddr += PAGE_SIZE; len -= PAGE_SIZE; } diff --git a/mm/memremap.c b/mm/memremap.c index 40d4547ce514..07bbe0eed084 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -488,15 +488,24 @@ void free_zone_device_folio(struct folio *folio) folio->mapping = NULL; folio->page.pgmap->ops->page_free(folio_page(folio, 0)); - if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE && - folio->page.pgmap->type != MEMORY_DEVICE_COHERENT) + switch (folio->page.pgmap->type) { + case MEMORY_DEVICE_PRIVATE: + case MEMORY_DEVICE_COHERENT: + put_dev_pagemap(folio->page.pgmap); + break; + + case MEMORY_DEVICE_FS_DAX: + case MEMORY_DEVICE_GENERIC: /* * Reset the refcount to 1 to prepare for handing out the page * again. */ folio_set_count(folio, 1); - else - put_dev_pagemap(folio->page.pgmap); + break; + + case MEMORY_DEVICE_PCI_P2PDMA: + break; + } } void zone_device_page_init(struct page *page) diff --git a/mm/mm_init.c b/mm/mm_init.c index b5047c5ef7d6..dbb92fdf36fe 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1026,12 +1026,26 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, } /* - * ZONE_DEVICE pages are released directly to the driver page allocator - * which will set the page count to 1 when allocating the page. + * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC and + * MEMORY_TYPE_FS_DAX pages are released directly to the driver page + * allocator which will set the page count to 1 when allocating the + * page. + * + * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have + * their refcount reset to one whenever they are freed (ie. after + * their refcount drops to 0). */ - if (pgmap->type == MEMORY_DEVICE_PRIVATE || - pgmap->type == MEMORY_DEVICE_COHERENT) + switch (pgmap->type) { + case MEMORY_DEVICE_PRIVATE: + case MEMORY_DEVICE_COHERENT: + case MEMORY_DEVICE_PCI_P2PDMA: set_page_count(page, 0); + break; + + case MEMORY_DEVICE_FS_DAX: + case MEMORY_DEVICE_GENERIC: + break; + } } /* From 82ba975e4c43d98afebced82d940ddb7aec42a9d Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:06 +1100 Subject: [PATCH 1206/2157] mm: allow compound zone device pages Zone device pages are used to represent various type of device memory managed by device drivers. Currently compound zone device pages are not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only user of higher order zone device pages and have their own page reference counting. A future change will unify FS DAX reference counting with normal page reference counting rules and remove the special FS DAX reference counting. Supporting that requires compound zone device pages. Supporting compound zone device pages requires compound_head() to distinguish between head and tail pages whilst still preserving the special struct page fields that are specific to zone device pages. A tail page is distinguished by having bit zero being set in page->compound_head, with the remaining bits pointing to the head page. For zone device pages page->compound_head is shared with page->pgmap. The page->pgmap field must be common to all pages within a folio, even if the folio spans memory sections. Therefore pgmap is the same for both head and tail pages and can be moved into the folio and we can use the standard scheme to find compound_head from a tail page. Link: https://lkml.kernel.org/r/67055d772e6102accf85161d0b57b0b3944292bf.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Signed-off-by: Balbir Singh Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++- drivers/pci/p2pdma.c | 6 +++--- include/linux/memremap.h | 6 +++--- include/linux/migrate.h | 4 ++-- include/linux/mm_types.h | 9 +++++++-- include/linux/mmzone.h | 12 +++++++++++- lib/test_hmm.c | 3 ++- mm/hmm.c | 2 +- mm/memory.c | 4 +++- mm/memremap.c | 14 +++++++------- mm/migrate_device.c | 18 ++++++++++++------ mm/mlock.c | 2 ++ mm/mm_init.c | 2 +- 13 files changed, 56 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1a072568cef6..61d0f411ef84 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -88,7 +88,8 @@ struct nouveau_dmem { static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) { - return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap); + return container_of(page_pgmap(page), struct nouveau_dmem_chunk, + pagemap); } static struct nouveau_drm *page_to_drm(struct page *page) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 04773a865819..19214ec81fbb 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -202,7 +202,7 @@ static const struct attribute_group p2pmem_group = { static void p2pdma_page_free(struct page *page) { - struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page)); /* safe to dereference while a reference is held to the percpu ref */ struct pci_p2pdma *p2pdma = rcu_dereference_protected(pgmap->provider->p2pdma, 1); @@ -1025,8 +1025,8 @@ enum pci_p2pdma_map_type pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, struct scatterlist *sg) { - if (state->pgmap != sg_page(sg)->pgmap) { - state->pgmap = sg_page(sg)->pgmap; + if (state->pgmap != page_pgmap(sg_page(sg))) { + state->pgmap = page_pgmap(sg_page(sg)); state->map = pci_p2pdma_map_type(state->pgmap, dev); state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; } diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3f7143ade32c..0256a4218dc3 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; + page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; + page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_COHERENT; + page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 29919faea2f1..61899ec7a9a3 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -205,8 +205,8 @@ struct migrate_vma { unsigned long end; /* - * Set to the owner value also stored in page->pgmap->owner for - * migrating out of device private memory. The flags also need to + * Set to the owner value also stored in page_pgmap(page)->owner + * for migrating out of device private memory. The flags also need to * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE. * The caller should always set this field when using mmu notifier * callbacks to avoid device MMU invalidations for device private diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6a93abb4452b..0fa7907d437e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -130,8 +130,11 @@ struct page { unsigned long compound_head; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ - /** @pgmap: Points to the hosting device page map. */ - struct dev_pagemap *pgmap; + /* + * The first word is used for compound_head or folio + * pgmap + */ + void *_unused_pgmap_compound_head; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being @@ -300,6 +303,7 @@ typedef struct { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @pgmap: Metadata for ZONE_DEVICE mappings * @virtual: Virtual address in the kernel direct map. * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). @@ -338,6 +342,7 @@ struct folio { /* private: */ }; /* public: */ + struct dev_pagemap *pgmap; }; struct address_space *mapping; pgoff_t index; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 44ecb2f90db4..550dbba92521 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1158,6 +1158,12 @@ static inline bool is_zone_device_page(const struct page *page) return page_zonenum(page) == ZONE_DEVICE; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + return page_folio(page)->pgmap; +} + /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different @@ -1173,7 +1179,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, return false; if (!is_zone_device_page(a)) return true; - return a->pgmap == b->pgmap; + return page_pgmap(a) == page_pgmap(b); } extern void memmap_init_zone_device(struct zone *, unsigned long, @@ -1188,6 +1194,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, { return true; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + return NULL; +} #endif static inline bool folio_is_zone_device(const struct folio *folio) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 39a2286f8592..5b144bc5c4ec 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -195,7 +195,8 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp) static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) { - return container_of(page->pgmap, struct dmirror_chunk, pagemap); + return container_of(page_pgmap(page), struct dmirror_chunk, + pagemap); } static struct dmirror_device *dmirror_page_to_device(struct page *page) diff --git a/mm/hmm.c b/mm/hmm.c index 7e0229ae4a5a..082f7b7c0b9e 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -248,7 +248,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, * just report the PFN. */ if (is_device_private_entry(entry) && - pfn_swap_entry_to_page(entry)->pgmap->owner == + page_pgmap(pfn_swap_entry_to_page(entry))->owner == range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; if (is_writable_device_private_entry(entry)) diff --git a/mm/memory.c b/mm/memory.c index 4c12a05fabd9..c9ddd991abda 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4338,6 +4338,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vmf->page = pfn_swap_entry_to_page(entry); ret = remove_device_exclusive_entry(vmf); } else if (is_device_private_entry(entry)) { + struct dev_pagemap *pgmap; if (vmf->flags & FAULT_FLAG_VMA_LOCK) { /* * migrate_to_ram is not yet ready to operate @@ -4362,7 +4363,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); + pgmap = page_pgmap(vmf->page); + ret = pgmap->ops->migrate_to_ram(vmf); put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/memremap.c b/mm/memremap.c index 07bbe0eed084..68099af9df4c 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -458,8 +458,8 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap); void free_zone_device_folio(struct folio *folio) { - if (WARN_ON_ONCE(!folio->page.pgmap->ops || - !folio->page.pgmap->ops->page_free)) + if (WARN_ON_ONCE(!folio->pgmap->ops || + !folio->pgmap->ops->page_free)) return; mem_cgroup_uncharge(folio); @@ -486,12 +486,12 @@ void free_zone_device_folio(struct folio *folio) * to clear folio->mapping. */ folio->mapping = NULL; - folio->page.pgmap->ops->page_free(folio_page(folio, 0)); + folio->pgmap->ops->page_free(folio_page(folio, 0)); - switch (folio->page.pgmap->type) { + switch (folio->pgmap->type) { case MEMORY_DEVICE_PRIVATE: case MEMORY_DEVICE_COHERENT: - put_dev_pagemap(folio->page.pgmap); + put_dev_pagemap(folio->pgmap); break; case MEMORY_DEVICE_FS_DAX: @@ -514,7 +514,7 @@ void zone_device_page_init(struct page *page) * Drivers shouldn't be allocating pages after calling * memunmap_pages(). */ - WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref)); + WARN_ON_ONCE(!percpu_ref_tryget_live(&page_pgmap(page)->ref)); set_page_count(page, 1); lock_page(page); } @@ -523,7 +523,7 @@ EXPORT_SYMBOL_GPL(zone_device_page_init); #ifdef CONFIG_FS_DAX bool __put_devmap_managed_folio_refs(struct folio *folio, int refs) { - if (folio->page.pgmap->type != MEMORY_DEVICE_FS_DAX) + if (folio->pgmap->type != MEMORY_DEVICE_FS_DAX) return false; /* diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 5bd888223cc8..7d0d64f67cdf 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -106,6 +106,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, arch_enter_lazy_mmu_mode(); for (; addr < end; addr += PAGE_SIZE, ptep++) { + struct dev_pagemap *pgmap; unsigned long mpfn = 0, pfn; struct folio *folio; struct page *page; @@ -133,9 +134,10 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, goto next; page = pfn_swap_entry_to_page(entry); + pgmap = page_pgmap(page); if (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || - page->pgmap->owner != migrate->pgmap_owner) + pgmap->owner != migrate->pgmap_owner) goto next; mpfn = migrate_pfn(page_to_pfn(page)) | @@ -152,12 +154,16 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, } page = vm_normal_page(migrate->vma, addr, pte); if (page && !is_zone_device_page(page) && - !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) - goto next; - else if (page && is_device_coherent_page(page) && - (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) || - page->pgmap->owner != migrate->pgmap_owner)) + !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) { goto next; + } else if (page && is_device_coherent_page(page)) { + pgmap = page_pgmap(page); + + if (!(migrate->flags & + MIGRATE_VMA_SELECT_DEVICE_COHERENT) || + pgmap->owner != migrate->pgmap_owner) + goto next; + } mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } diff --git a/mm/mlock.c b/mm/mlock.c index cde076fa7d5e..3cb72b579ffd 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -368,6 +368,8 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, if (is_huge_zero_pmd(*pmd)) goto out; folio = pmd_folio(*pmd); + if (folio_is_zone_device(folio)) + goto out; if (vma->vm_flags & VM_LOCKED) mlock_folio(folio); else diff --git a/mm/mm_init.c b/mm/mm_init.c index dbb92fdf36fe..73e97ce95f58 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1007,7 +1007,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, * and zone_device_data. It is a bug if a ZONE_DEVICE page is * ever freed or placed on a driver-private list. */ - page->pgmap = pgmap; + page_folio(page)->pgmap = pgmap; page->zone_device_data = NULL; /* From 15a64311e0ae069196b7d3b88d0c11e0b2ad733e Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:07 +1100 Subject: [PATCH 1207/2157] mm/memory: enhance insert_page_into_pte_locked() to create writable mappings In preparation for using insert_page() for DAX, enhance insert_page_into_pte_locked() to handle establishing writable mappings. Recall that DAX returns VM_FAULT_NOPAGE after installing a PTE which bypasses the typical set_pte_range() in finish_fault. Link: https://lkml.kernel.org/r/f7354fd9c2f5d0c2fa321733039f9f87e791023e.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Suggested-by: Dan Williams Reviewed-by: Dan Williams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- mm/memory.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c9ddd991abda..705c902cf56a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2145,19 +2145,39 @@ static int validate_page_before_insert(struct vm_area_struct *vma, } static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte, - unsigned long addr, struct page *page, pgprot_t prot) + unsigned long addr, struct page *page, + pgprot_t prot, bool mkwrite) { struct folio *folio = page_folio(page); - pte_t pteval; + pte_t pteval = ptep_get(pte); + + if (!pte_none(pteval)) { + if (!mkwrite) + return -EBUSY; + + /* see insert_pfn(). */ + if (pte_pfn(pteval) != page_to_pfn(page)) { + WARN_ON_ONCE(!is_zero_pfn(pte_pfn(pteval))); + return -EFAULT; + } + pteval = maybe_mkwrite(pteval, vma); + pteval = pte_mkyoung(pteval); + if (ptep_set_access_flags(vma, addr, pte, pteval, 1)) + update_mmu_cache(vma, addr, pte); + return 0; + } - if (!pte_none(ptep_get(pte))) - return -EBUSY; /* Ok, finally just insert the thing.. */ pteval = mk_pte(page, prot); if (unlikely(is_zero_folio(folio))) { pteval = pte_mkspecial(pteval); } else { folio_get(folio); + pteval = mk_pte(page, prot); + if (mkwrite) { + pteval = pte_mkyoung(pteval); + pteval = maybe_mkwrite(pte_mkdirty(pteval), vma); + } inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); folio_add_file_rmap_pte(folio, page, vma); } @@ -2166,7 +2186,7 @@ static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte, } static int insert_page(struct vm_area_struct *vma, unsigned long addr, - struct page *page, pgprot_t prot) + struct page *page, pgprot_t prot, bool mkwrite) { int retval; pte_t *pte; @@ -2179,7 +2199,8 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte = get_locked_pte(vma->vm_mm, addr, &ptl); if (!pte) goto out; - retval = insert_page_into_pte_locked(vma, pte, addr, page, prot); + retval = insert_page_into_pte_locked(vma, pte, addr, page, prot, + mkwrite); pte_unmap_unlock(pte, ptl); out: return retval; @@ -2193,7 +2214,7 @@ static int insert_page_in_batch_locked(struct vm_area_struct *vma, pte_t *pte, err = validate_page_before_insert(vma, page); if (err) return err; - return insert_page_into_pte_locked(vma, pte, addr, page, prot); + return insert_page_into_pte_locked(vma, pte, addr, page, prot, false); } /* insert_pages() amortizes the cost of spinlock operations @@ -2329,7 +2350,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, BUG_ON(vma->vm_flags & VM_PFNMAP); vm_flags_set(vma, VM_MIXEDMAP); } - return insert_page(vma, addr, page, vma->vm_page_prot); + return insert_page(vma, addr, page, vma->vm_page_prot, false); } EXPORT_SYMBOL(vm_insert_page); @@ -2609,7 +2630,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * result in pfn_t_has_page() == false. */ page = pfn_to_page(pfn_t_to_pfn(pfn)); - err = insert_page(vma, addr, page, pgprot); + err = insert_page(vma, addr, page, pgprot, mkwrite); } else { return insert_pfn(vma, addr, pfn, pgprot, mkwrite); } From ec2e0cc67f9c3ed5926da390ad86b25c142a2e4a Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:08 +1100 Subject: [PATCH 1208/2157] mm/memory: add vmf_insert_page_mkwrite() Currently to map a DAX page the DAX driver calls vmf_insert_pfn. This creates a special devmap PTE entry for the pfn but does not take a reference on the underlying struct page for the mapping. This is because DAX page refcounts are treated specially, as indicated by the presence of a devmap entry. To allow DAX page refcounts to be managed the same as normal page refcounts introduce vmf_insert_page_mkwrite(). This will take a reference on the underlying page much the same as vmf_insert_page, except it also permits upgrading an existing mapping to be writable if requested/possible. Link: https://lkml.kernel.org/r/4ce3aa984c060f370105e0bfef1035869578be47.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ mm/memory.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a74a3ee68bc..c6fb9300f6c0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3644,6 +3644,8 @@ int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, unsigned long num); +vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page, + bool write); vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/memory.c b/mm/memory.c index 705c902cf56a..f09b4a1d09a8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2643,6 +2643,26 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page, + bool write) +{ + pgprot_t pgprot = vmf->vma->vm_page_prot; + unsigned long addr = vmf->address; + int err; + + if (addr < vmf->vma->vm_start || addr >= vmf->vma->vm_end) + return VM_FAULT_SIGBUS; + + err = insert_page(vmf->vma, addr, page, pgprot, write); + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(vmf_insert_page_mkwrite); + vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { From 349994cf61e6eaa5996d53e45ffd2272d32d5e4e Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:09 +1100 Subject: [PATCH 1209/2157] mm/rmap: add support for PUD sized mappings to rmap The rmap doesn't currently support adding a PUD mapping of a folio. This patch adds support for entire PUD mappings of folios, primarily to allow for more standard refcounting of device DAX folios. Currently DAX is the only user of this and it doesn't require support for partially mapped PUD-sized folios so we don't support for that for now. Link: https://lkml.kernel.org/r/248582c07896e30627d1aeaeebc6949cfd91b851.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Reviewed-by: Dan Williams Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/rmap.h | 15 ++++++++++ mm/rmap.c | 67 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 69e9a431a40e..6abf7960077a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -192,6 +192,7 @@ typedef int __bitwise rmap_t; enum rmap_level { RMAP_LEVEL_PTE = 0, RMAP_LEVEL_PMD, + RMAP_LEVEL_PUD, }; static inline void __folio_rmap_sanity_checks(const struct folio *folio, @@ -228,6 +229,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; + case RMAP_LEVEL_PUD: + /* + * Assume that we are creating a single "entire" mapping of the + * folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); + break; default: VM_WARN_ON_ONCE(true); } @@ -251,12 +260,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, folio_add_file_rmap_ptes(folio, page, 1, vma) void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_add_file_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_remove_rmap_pte(folio, page, vma) \ folio_remove_rmap_ptes(folio, page, 1, vma) void folio_remove_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_remove_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); @@ -341,6 +354,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); atomic_inc(&folio->_large_mapcount); break; @@ -437,6 +451,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; diff --git a/mm/rmap.c b/mm/rmap.c index 333ecac049b2..bcec8677f68d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1269,12 +1269,19 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: first = atomic_inc_and_test(&folio->_entire_mapcount); if (first) { nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) { - *nr_pmdmapped = folio_nr_pages(folio); - nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); + nr_pages = folio_nr_pages(folio); + /* + * We only track PMD mappings of PMD-sized + * folios separately. + */ + if (level == RMAP_LEVEL_PMD) + *nr_pmdmapped = nr_pages; + nr = nr_pages - (nr & FOLIO_PAGES_MAPPED); /* Raced ahead of a remove and another add? */ if (unlikely(nr < 0)) nr = 0; @@ -1420,6 +1427,13 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, case RMAP_LEVEL_PMD: SetPageAnonExclusive(page); break; + case RMAP_LEVEL_PUD: + /* + * Keep the compiler happy, we don't support anonymous + * PUD mappings. + */ + WARN_ON_ONCE(1); + break; } } for (i = 0; i < nr_pages; i++) { @@ -1613,6 +1627,27 @@ void folio_add_file_rmap_pmd(struct folio *folio, struct page *page, #endif } +/** + * folio_add_file_rmap_pud - add a PUD mapping to a page range of a folio + * @folio: The folio to add the mapping to + * @page: The first page to add + * @vma: The vm area in which the mapping is added + * + * The page range of the folio is defined by [page, page + HPAGE_PUD_NR) + * + * The caller needs to hold the page table lock. + */ +void folio_add_file_rmap_pud(struct folio *folio, struct page *page, + struct vm_area_struct *vma) +{ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + __folio_add_file_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD); +#else + WARN_ON_ONCE(true); +#endif +} + static __always_inline void __folio_remove_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, enum rmap_level level) @@ -1642,13 +1677,16 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, partially_mapped = nr && atomic_read(mapped); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: atomic_dec(&folio->_large_mapcount); last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED)) { - nr_pmdmapped = folio_nr_pages(folio); - nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); + nr_pages = folio_nr_pages(folio); + if (level == RMAP_LEVEL_PMD) + nr_pmdmapped = nr_pages; + nr = nr_pages - (nr & FOLIO_PAGES_MAPPED); /* Raced ahead of another remove and an add? */ if (unlikely(nr < 0)) nr = 0; @@ -1722,6 +1760,27 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page, #endif } +/** + * folio_remove_rmap_pud - remove a PUD mapping from a page range of a folio + * @folio: The folio to remove the mapping from + * @page: The first page to remove + * @vma: The vm area from which the mapping is removed + * + * The page range of the folio is defined by [page, page + HPAGE_PUD_NR) + * + * The caller needs to hold the page table lock. + */ +void folio_remove_rmap_pud(struct folio *folio, struct page *page, + struct vm_area_struct *vma) +{ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + __folio_remove_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD); +#else + WARN_ON_ONCE(true); +#endif +} + /* We support batch unmapping of PTEs for lazyfree large folios */ static inline bool can_batch_unmap_folio_ptes(unsigned long addr, struct folio *folio, pte_t *ptep) From dbe54153296d0931144a63295fc9367794bbe797 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:10 +1100 Subject: [PATCH 1210/2157] mm/huge_memory: add vmf_insert_folio_pud() Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce vmf_insert_folio_pud. This will map the entire PUD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pud, which simply inserts a special devmap PUD entry into the page table without holding a reference to the page for the mapping. Link: https://lkml.kernel.org/r/649a1ef91d556593948351e94f51ef73a14f6794.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 + mm/huge_memory.c | 99 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e1bea54820ff..4eeb54b50621 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, + bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_UNSUPPORTED, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index acb12653484e..4cd79784f841 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1482,19 +1482,17 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; pgprot_t prot = vma->vm_page_prot; pud_t entry; - spinlock_t *ptl; - ptl = pud_lock(mm, pud); if (!pud_none(*pud)) { if (write) { if (WARN_ON_ONCE(pud_pfn(*pud) != pfn_t_to_pfn(pfn))) - goto out_unlock; + return; entry = pud_mkyoung(*pud); entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); if (pudp_set_access_flags(vma, addr, pud, entry, 1)) update_mmu_cache_pud(vma, addr, pud); } - goto out_unlock; + return; } entry = pud_mkhuge(pfn_t_pud(pfn, prot)); @@ -1508,9 +1506,6 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); - -out_unlock: - spin_unlock(ptl); } /** @@ -1528,6 +1523,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) unsigned long addr = vmf->address & PUD_MASK; struct vm_area_struct *vma = vmf->vma; pgprot_t pgprot = vma->vm_page_prot; + spinlock_t *ptl; /* * If we had pud_special, we could avoid all these restrictions, @@ -1545,10 +1541,57 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) track_pfn_insert(vma, &pgprot, pfn); + ptl = pud_lock(vma->vm_mm, vmf->pud); insert_pfn_pud(vma, addr, vmf->pud, pfn, write); + spin_unlock(ptl); + return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); + +/** + * vmf_insert_folio_pud - insert a pud size folio mapped by a pud entry + * @vmf: Structure describing the fault + * @folio: folio to insert + * @write: whether it's a write fault + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, + bool write) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long addr = vmf->address & PUD_MASK; + pud_t *pud = vmf->pud; + struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + if (WARN_ON_ONCE(folio_order(folio) != PUD_ORDER)) + return VM_FAULT_SIGBUS; + + ptl = pud_lock(mm, pud); + + /* + * If there is already an entry present we assume the folio is + * already mapped, hence no need to take another reference. We + * still call insert_pfn_pud() though in case the mapping needs + * upgrading to writeable. + */ + if (pud_none(*vmf->pud)) { + folio_get(folio); + folio_add_file_rmap_pud(folio, &folio->page, vma); + add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR); + } + insert_pfn_pud(vma, addr, vmf->pud, pfn_to_pfn_t(folio_pfn(folio)), + write); + spin_unlock(ptl); + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(vmf_insert_folio_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ void touch_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -2146,7 +2189,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); } else if (is_huge_zero_pmd(orig_pmd)) { - zap_deposited_table(tlb->mm, pmd); + if (!vma_is_dax(vma) || arch_needs_pgtable_deposit()) + zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); } else { struct folio *folio = NULL; @@ -2646,12 +2690,24 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, orig_pud = pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm); arch_check_zapped_pud(vma, orig_pud); tlb_remove_pud_tlb_entry(tlb, pud, addr); - if (vma_is_special_huge(vma)) { + if (!vma_is_dax(vma) && vma_is_special_huge(vma)) { spin_unlock(ptl); /* No zero page support yet */ } else { - /* No support for anonymous PUD pages yet */ - BUG(); + struct page *page = NULL; + struct folio *folio; + + /* No support for anonymous PUD pages or migration yet */ + VM_WARN_ON_ONCE(vma_is_anonymous(vma) || + !pud_present(orig_pud)); + + page = pud_page(orig_pud); + folio = page_folio(page); + folio_remove_rmap_pud(folio, page, vma); + add_mm_counter(tlb->mm, mm_counter_file(folio), -HPAGE_PUD_NR); + + spin_unlock(ptl); + tlb_remove_page_size(tlb, page, HPAGE_PUD_SIZE); } return 1; } @@ -2659,6 +2715,10 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, unsigned long haddr) { + struct folio *folio; + struct page *page; + pud_t old_pud; + VM_BUG_ON(haddr & ~HPAGE_PUD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); @@ -2666,7 +2726,22 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, count_vm_event(THP_SPLIT_PUD); - pudp_huge_clear_flush(vma, haddr, pud); + old_pud = pudp_huge_clear_flush(vma, haddr, pud); + + if (!vma_is_dax(vma)) + return; + + page = pud_page(old_pud); + folio = page_folio(page); + + if (!folio_test_dirty(folio) && pud_dirty(old_pud)) + folio_mark_dirty(folio); + if (!folio_test_referenced(folio) && pud_young(old_pud)) + folio_set_referenced(folio); + folio_remove_rmap_pud(folio, page, vma); + folio_put(folio); + add_mm_counter(vma->vm_mm, mm_counter_file(folio), + -HPAGE_PUD_NR); } void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, From 6c88f72691f88fd1fc493a3c2eed97f91b82b3f0 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:11 +1100 Subject: [PATCH 1211/2157] mm/huge_memory: add vmf_insert_folio_pmd() Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce vmf_insert_folio_pmd. This will map the entire PMD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pmd, which simply inserts a special devmap PMD entry into the page table without holding a reference to the page for the mapping. It is not currently useful to implement a more generic vmf_insert_folio() which selects the correct behaviour based on folio_order(). This is because PTE faults require only a subpage of the folio to be PTE mapped rather than the entire folio. It would be possible to add this context somewhere but callers already need to handle PTE faults and PMD faults separately so a more generic function is not useful. Link: https://lkml.kernel.org/r/7bf92a2e68225d13ea368d53bbfee327314d1c40.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 ++ mm/huge_memory.c | 65 +++++++++++++++++++++++++++++++++-------- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4eeb54b50621..e57e811cfd3c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, + bool write); vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, bool write); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4cd79784f841..e6f4189c1c94 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1375,20 +1375,20 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) return __do_huge_pmd_anonymous_page(vmf); } -static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, +static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write, pgtable_t pgtable) { struct mm_struct *mm = vma->vm_mm; pmd_t entry; - spinlock_t *ptl; - ptl = pmd_lock(mm, pmd); + lockdep_assert_held(pmd_lockptr(mm, pmd)); + if (!pmd_none(*pmd)) { if (write) { if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); - goto out_unlock; + return -EEXIST; } entry = pmd_mkyoung(*pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); @@ -1396,7 +1396,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, update_mmu_cache_pmd(vma, addr, pmd); } - goto out_unlock; + return -EEXIST; } entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); @@ -1412,16 +1412,11 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pgtable) { pgtable_trans_huge_deposit(mm, pmd, pgtable); mm_inc_nr_ptes(mm); - pgtable = NULL; } set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); - -out_unlock: - spin_unlock(ptl); - if (pgtable) - pte_free(mm, pgtable); + return 0; } /** @@ -1440,6 +1435,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) struct vm_area_struct *vma = vmf->vma; pgprot_t pgprot = vma->vm_page_prot; pgtable_t pgtable = NULL; + spinlock_t *ptl; + int error; /* * If we had pmd_special, we could avoid all these restrictions, @@ -1462,12 +1459,56 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) } track_pfn_insert(vma, &pgprot, pfn); + ptl = pmd_lock(vma->vm_mm, vmf->pmd); + error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, + pgtable); + spin_unlock(ptl); + if (error && pgtable) + pte_free(vma->vm_mm, pgtable); - insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, + bool write) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long addr = vmf->address & PMD_MASK; + struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; + pgtable_t pgtable = NULL; + int error; + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER)) + return VM_FAULT_SIGBUS; + + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm); + if (!pgtable) + return VM_FAULT_OOM; + } + + ptl = pmd_lock(mm, vmf->pmd); + if (pmd_none(*vmf->pmd)) { + folio_get(folio); + folio_add_file_rmap_pmd(folio, &folio->page, vma); + add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR); + } + error = insert_pfn_pmd(vma, addr, vmf->pmd, + pfn_to_pfn_t(folio_pfn(folio)), vma->vm_page_prot, + write, pgtable); + spin_unlock(ptl); + if (error && pgtable) + pte_free(mm, pgtable); + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd); + #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) { From e5cb23256347469c80138a2a8804887239465d0b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:12 +1100 Subject: [PATCH 1212/2157] mm/gup: don't allow FOLL_LONGTERM pinning of FS DAX pages Longterm pinning of FS DAX pages should already be disallowed by various pXX_devmap checks. However a future change will cause these checks to be invalid for FS DAX pages so make folio_is_longterm_pinnable() return false for FS DAX pages. Link: https://lkml.kernel.org/r/250a31876704b79f7c65b159f3c835e547f052df.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: John Hubbard Reviewed-by: Dan Williams Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/memremap.h | 11 +++++++++++ include/linux/mm.h | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 0256a4218dc3..4aa151914eab 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -187,6 +187,17 @@ static inline bool folio_is_device_coherent(const struct folio *folio) return is_device_coherent_page(&folio->page); } +static inline bool is_fsdax_page(const struct page *page) +{ + return is_zone_device_page(page) && + page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; +} + +static inline bool folio_is_fsdax(const struct folio *folio) +{ + return is_fsdax_page(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fb9300f6c0..009484a07074 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2115,6 +2115,13 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio) if (folio_is_device_coherent(folio)) return false; + /* + * Filesystems can only tolerate transient delays to truncate and + * hole-punch operations + */ + if (folio_is_fsdax(folio)) + return false; + /* Otherwise, non-movable zone folios can be pinned. */ return !folio_is_zone_movable(folio); From 653d7825c149932f254e0cd22153ccc945e7e545 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Feb 2025 14:31:13 +1100 Subject: [PATCH 1213/2157] dcssblk: mark DAX broken, remove FS_DAX_LIMITED support The dcssblk driver has long needed special case supoprt to enable limited dax operation, so called CONFIG_FS_DAX_LIMITED. This mode works around the incomplete support for ZONE_DEVICE on s390 by forgoing the ability of dax-mapped pages to support GUP. Now, pending cleanups to fsdax that fix its reference counting [1] depend on the ability of all dax drivers to supply ZONE_DEVICE pages. To allow that work to move forward, dax support needs to be paused for dcssblk until ZONE_DEVICE support arrives. That work has been known for a few years [2], and the removal of "pte_devmap" requirements [3] makes the conversion easier. For now, place the support behind CONFIG_BROKEN, and remove PFN_SPECIAL (dcssblk was the only user). Link: http://lore.kernel.org/cover.9f0e45d52f5cff58807831b6b867084d0b14b61c.1725941415.git-series.apopple@nvidia.com [1] Link: http://lore.kernel.org/20210820210318.187742e8@thinkpad/ [2] Link: http://lore.kernel.org/4511465a4f8429f45e2ac70d2e65dc5e1df1eb47.1725941415.git-series.apopple@nvidia.com [3] Link: https://lkml.kernel.org/r/33eef2379c0d240f40cc15453fad2df1a4ae34c8.1740713401.git-series.apopple@nvidia.com Signed-off-by: Dan Williams Reviewed-by: Gerald Schaefer Tested-by: Alexander Gordeev Acked-by: David Hildenbrand Tested-by: Alison Schofield Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Jan Kara Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Alistair Popple Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Huacai Chen Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Ted Ts'o Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- Documentation/filesystems/dax.rst | 1 - drivers/s390/block/Kconfig | 12 ++++++++++-- drivers/s390/block/dcssblk.c | 27 +++++++++++++++++---------- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/dax.rst b/Documentation/filesystems/dax.rst index 719e90f1988e..08dd5e254cc5 100644 --- a/Documentation/filesystems/dax.rst +++ b/Documentation/filesystems/dax.rst @@ -207,7 +207,6 @@ implement direct_access. These block devices may be used for inspiration: - brd: RAM backed block device driver -- dcssblk: s390 dcss block device driver - pmem: NVDIMM persistent memory driver diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index e3710a762aba..4bfe469c04aa 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -4,13 +4,21 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m - select FS_DAX_LIMITED - select DAX prompt "DCSSBLK support" depends on S390 && BLOCK help Support for dcss block device +config DCSSBLK_DAX + def_bool y + depends on DCSSBLK + # requires S390 ZONE_DEVICE support + depends on BROKEN + select DAX + prompt "DCSSBLK DAX support" + help + Enable DAX operation for the dcss block device + config DASD def_tristate y prompt "Support for DASD devices" diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 0f14d279d30b..7248e547fefb 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -534,6 +534,21 @@ static const struct attribute_group *dcssblk_dev_attr_groups[] = { NULL, }; +static int dcssblk_setup_dax(struct dcssblk_dev_info *dev_info) +{ + struct dax_device *dax_dev; + + if (!IS_ENABLED(CONFIG_DCSSBLK_DAX)) + return 0; + + dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops); + if (IS_ERR(dax_dev)) + return PTR_ERR(dax_dev); + set_dax_synchronous(dax_dev); + dev_info->dax_dev = dax_dev; + return dax_add_host(dev_info->dax_dev, dev_info->gd); +} + /* * device attribute for adding devices */ @@ -547,7 +562,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char int rc, i, j, num_of_segments; struct dcssblk_dev_info *dev_info; struct segment_info *seg_info, *temp; - struct dax_device *dax_dev; char *local_buf; unsigned long seg_byte_size; @@ -674,14 +688,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char if (rc) goto put_dev; - dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops); - if (IS_ERR(dax_dev)) { - rc = PTR_ERR(dax_dev); - goto put_dev; - } - set_dax_synchronous(dax_dev); - dev_info->dax_dev = dax_dev; - rc = dax_add_host(dev_info->dax_dev, dev_info->gd); + rc = dcssblk_setup_dax(dev_info); if (rc) goto out_dax; @@ -917,7 +924,7 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, *kaddr = __va(dev_info->start + offset); if (pfn) *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), - PFN_DEV|PFN_SPECIAL); + PFN_DEV); return (dev_sz - offset) / PAGE_SIZE; } From 38607c62b34b46317c46d5baf1df03ac6e48a1c6 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:14 +1100 Subject: [PATCH 1214/2157] fs/dax: properly refcount fs dax pages Currently fs dax pages are considered free when the refcount drops to one and their refcounts are not increased when mapped via PTEs or decreased when unmapped. This requires special logic in mm paths to detect that these pages should not be properly refcounted, and to detect when the refcount drops to one instead of zero. On the other hand get_user_pages(), etc. will properly refcount fs dax pages by taking a reference and dropping it when the page is unpinned. Tracking this special behaviour requires extra PTE bits (eg. pte_devmap) and introduces rules that are potentially confusing and specific to FS DAX pages. To fix this, and to possibly allow removal of the special PTE bits in future, convert the fs dax page refcounts to be zero based and instead take a reference on the page each time it is mapped as is currently the case for normal pages. This may also allow a future clean-up to remove the pgmap refcounting that is currently done in mm/gup.c. Link: https://lkml.kernel.org/r/c7d886ad7468a20452ef6e0ddab6cfe220874e7c.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Reviewed-by: Dan Williams Tested-by: Alison Schofield Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- drivers/nvdimm/pmem.c | 4 +- fs/dax.c | 186 ++++++++++++++++++++++++--------------- fs/fuse/virtio_fs.c | 3 +- include/linux/dax.h | 2 +- include/linux/mm.h | 27 +----- include/linux/mm_types.h | 7 +- mm/gup.c | 9 +- mm/huge_memory.c | 6 +- mm/internal.h | 2 - mm/memory-failure.c | 6 +- mm/memory.c | 6 +- mm/memremap.c | 47 +++++----- mm/mm_init.c | 9 +- mm/swap.c | 2 - 14 files changed, 165 insertions(+), 151 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d81faa9d89c9..785b2d2dbc82 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -513,7 +513,7 @@ static int pmem_attach_disk(struct device *dev, pmem->disk = disk; pmem->pgmap.owner = pmem; - pmem->pfn_flags = PFN_DEV; + pmem->pfn_flags = 0; if (is_nd_pfn(dev)) { pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; @@ -522,7 +522,6 @@ static int pmem_attach_disk(struct device *dev, pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->pfn_pad = resource_size(res) - range_len(&pmem->pgmap.range); - pmem->pfn_flags |= PFN_MAP; bb_range = pmem->pgmap.range; bb_range.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { @@ -532,7 +531,6 @@ static int pmem_attach_disk(struct device *dev, pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); - pmem->pfn_flags |= PFN_MAP; bb_range = pmem->pgmap.range; } else { addr = devm_memremap(dev, pmem->phys_addr, diff --git a/fs/dax.c b/fs/dax.c index 6674540363e8..cf96f3dd4e5f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -71,6 +71,11 @@ static unsigned long dax_to_pfn(void *entry) return xa_to_value(entry) >> DAX_SHIFT; } +static struct folio *dax_to_folio(void *entry) +{ + return page_folio(pfn_to_page(dax_to_pfn(entry))); +} + static void *dax_make_entry(pfn_t pfn, unsigned long flags) { return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); @@ -338,19 +343,6 @@ static unsigned long dax_entry_size(void *entry) return PAGE_SIZE; } -static unsigned long dax_end_pfn(void *entry) -{ - return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; -} - -/* - * Iterate through all mapped pfns represented by an entry, i.e. skip - * 'empty' and 'zero' entries. - */ -#define for_each_mapped_pfn(entry, pfn) \ - for (pfn = dax_to_pfn(entry); \ - pfn < dax_end_pfn(entry); pfn++) - /* * A DAX folio is considered shared if it has no mapping set and ->share (which * shares the ->index field) is non-zero. Note this may return false even if the @@ -359,7 +351,7 @@ static unsigned long dax_end_pfn(void *entry) */ static inline bool dax_folio_is_shared(struct folio *folio) { - return !folio->mapping && folio->page.share; + return !folio->mapping && folio->share; } /* @@ -384,75 +376,117 @@ static void dax_folio_make_shared(struct folio *folio) * folio has previously been mapped into one address space so set the * share count. */ - folio->page.share = 1; + folio->share = 1; } -static inline unsigned long dax_folio_share_put(struct folio *folio) +static inline unsigned long dax_folio_put(struct folio *folio) { - return --folio->page.share; + unsigned long ref; + int order, i; + + if (!dax_folio_is_shared(folio)) + ref = 0; + else + ref = --folio->share; + + if (ref) + return ref; + + folio->mapping = NULL; + order = folio_order(folio); + if (!order) + return 0; + + for (i = 0; i < (1UL << order); i++) { + struct dev_pagemap *pgmap = page_pgmap(&folio->page); + struct page *page = folio_page(folio, i); + struct folio *new_folio = (struct folio *)page; + + ClearPageHead(page); + clear_compound_head(page); + + new_folio->mapping = NULL; + /* + * Reset pgmap which was over-written by + * prep_compound_page(). + */ + new_folio->pgmap = pgmap; + new_folio->share = 0; + WARN_ON_ONCE(folio_ref_count(new_folio)); + } + + return ref; +} + +static void dax_folio_init(void *entry) +{ + struct folio *folio = dax_to_folio(entry); + int order = dax_entry_order(entry); + + /* + * Folio should have been split back to order-0 pages in + * dax_folio_put() when they were removed from their + * final mapping. + */ + WARN_ON_ONCE(folio_order(folio)); + + if (order > 0) { + prep_compound_page(&folio->page, order); + if (order > 1) + INIT_LIST_HEAD(&folio->_deferred_list); + WARN_ON_ONCE(folio_ref_count(folio)); + } } static void dax_associate_entry(void *entry, struct address_space *mapping, - struct vm_area_struct *vma, unsigned long address, bool shared) + struct vm_area_struct *vma, + unsigned long address, bool shared) { - unsigned long size = dax_entry_size(entry), pfn, index; - int i = 0; + unsigned long size = dax_entry_size(entry), index; + struct folio *folio = dax_to_folio(entry); if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; index = linear_page_index(vma, address & ~(size - 1)); - for_each_mapped_pfn(entry, pfn) { - struct folio *folio = pfn_folio(pfn); + if (shared && (folio->mapping || dax_folio_is_shared(folio))) { + if (folio->mapping) + dax_folio_make_shared(folio); - if (shared && (folio->mapping || folio->page.share)) { - if (folio->mapping) - dax_folio_make_shared(folio); - - WARN_ON_ONCE(!folio->page.share); - folio->page.share++; - } else { - WARN_ON_ONCE(folio->mapping); - folio->mapping = mapping; - folio->index = index + i++; - } + WARN_ON_ONCE(!folio->share); + WARN_ON_ONCE(dax_entry_order(entry) != folio_order(folio)); + folio->share++; + } else { + WARN_ON_ONCE(folio->mapping); + dax_folio_init(entry); + folio = dax_to_folio(entry); + folio->mapping = mapping; + folio->index = index; } } static void dax_disassociate_entry(void *entry, struct address_space *mapping, - bool trunc) + bool trunc) { - unsigned long pfn; + struct folio *folio = dax_to_folio(entry); if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; - for_each_mapped_pfn(entry, pfn) { - struct folio *folio = pfn_folio(pfn); - - WARN_ON_ONCE(trunc && folio_ref_count(folio) > 1); - if (dax_folio_is_shared(folio)) { - /* keep the shared flag if this page is still shared */ - if (dax_folio_share_put(folio) > 0) - continue; - } else - WARN_ON_ONCE(folio->mapping && folio->mapping != mapping); - folio->mapping = NULL; - folio->index = 0; - } + dax_folio_put(folio); } static struct page *dax_busy_page(void *entry) { - unsigned long pfn; + struct folio *folio = dax_to_folio(entry); - for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return NULL; - if (page_ref_count(page) > 1) - return page; - } - return NULL; + if (folio_ref_count(folio) - folio_mapcount(folio)) + return &folio->page; + else + return NULL; } /** @@ -785,7 +819,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) EXPORT_SYMBOL_GPL(dax_layout_busy_page); static int __dax_invalidate_entry(struct address_space *mapping, - pgoff_t index, bool trunc) + pgoff_t index, bool trunc) { XA_STATE(xas, &mapping->i_pages, index); int ret = 0; @@ -953,7 +987,8 @@ void dax_break_layout_final(struct inode *inode) wait_page_idle_uninterruptible(page, inode); } while (true); - dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); + if (!page) + dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); } EXPORT_SYMBOL_GPL(dax_break_layout_final); @@ -1039,8 +1074,10 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, void *old; dax_disassociate_entry(entry, mapping, false); - dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, - shared); + if (!(flags & DAX_ZERO_PAGE)) + dax_associate_entry(new_entry, mapping, vmf->vma, + vmf->address, shared); + /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -1228,9 +1265,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, goto out; if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) goto out; - /* For larger pages we need devmap */ - if (length > 1 && !pfn_t_devmap(*pfnp)) - goto out; + rc = 0; out_check_addr: @@ -1337,7 +1372,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); - ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); + ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false); trace_dax_load_hole(inode, vmf, ret); return ret; } @@ -1808,7 +1843,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; bool write = iter->flags & IOMAP_WRITE; unsigned long entry_flags = pmd ? DAX_PMD : 0; - int err = 0; + struct folio *folio; + int ret, err = 0; pfn_t pfn; void *kaddr; @@ -1840,17 +1876,19 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_fault_return(err); } + folio = dax_to_folio(*entry); if (dax_fault_is_synchronous(iter, vmf->vma)) return dax_fault_synchronous_pfnp(pfnp, pfn); - /* insert PMD pfn */ + folio_ref_inc(folio); if (pmd) - return vmf_insert_pfn_pmd(vmf, pfn, write); + ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)), + write); + else + ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write); + folio_put(folio); - /* insert PTE pfn */ - if (write) - return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); - return vmf_insert_mixed(vmf->vma, vmf->address, pfn); + return ret; } static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, @@ -2089,6 +2127,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); + struct folio *folio; void *entry; vm_fault_t ret; @@ -2106,14 +2145,17 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); + folio = pfn_folio(pfn_t_to_pfn(pfn)); + folio_ref_inc(folio); if (order == 0) - ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); + ret = vmf_insert_page_mkwrite(vmf, &folio->page, true); #ifdef CONFIG_FS_DAX_PMD else if (order == PMD_ORDER) - ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); + ret = vmf_insert_folio_pmd(vmf, folio, FAULT_FLAG_WRITE); #endif else ret = VM_FAULT_FALLBACK; + folio_put(folio); dax_unlock_entry(&xas, entry); trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 82afe78ec542..2c7b24cb67ad 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1017,8 +1017,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (kaddr) *kaddr = fs->window_kaddr + offset; if (pfn) - *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, - PFN_DEV | PFN_MAP); + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 0); return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } diff --git a/include/linux/dax.h b/include/linux/dax.h index 2333c30f6d36..dcc9fcdf14e4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -209,7 +209,7 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, static inline bool dax_page_is_idle(struct page *page) { - return page && page_ref_count(page) == 1; + return page && page_ref_count(page) == 0; } #if IS_ENABLED(CONFIG_DAX) diff --git a/include/linux/mm.h b/include/linux/mm.h index 009484a07074..de008efd96aa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1192,6 +1192,8 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +extern void prep_compound_page(struct page *page, unsigned int order); + /* * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be @@ -1513,25 +1515,6 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * back into memory. */ -#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) -DECLARE_STATIC_KEY_FALSE(devmap_managed_key); - -bool __put_devmap_managed_folio_refs(struct folio *folio, int refs); -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - if (!static_branch_unlikely(&devmap_managed_key)) - return false; - if (!folio_is_zone_device(folio)) - return false; - return __put_devmap_managed_folio_refs(folio, refs); -} -#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - return false; -} -#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) @@ -1652,12 +1635,6 @@ static inline void put_page(struct page *page) if (folio_test_slab(folio)) return; - /* - * For some devmap managed pages we need to catch refcount transition - * from 2 to 1: - */ - if (put_devmap_managed_folio_refs(folio, 1)) - return; folio_put(folio); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0fa7907d437e..b1827d78ff89 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -296,6 +296,8 @@ typedef struct { * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. + * @share: number of DAX mappings that reference this folio. See + * dax_associate_entry. * @private: Filesystem per-folio data (see folio_attach_private()). * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to @@ -345,7 +347,10 @@ struct folio { struct dev_pagemap *pgmap; }; struct address_space *mapping; - pgoff_t index; + union { + pgoff_t index; + unsigned long share; + }; union { void *private; swp_entry_t swap; diff --git a/mm/gup.c b/mm/gup.c index e5d6454df41d..e5040657870e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -96,8 +96,7 @@ static inline struct folio *try_get_folio(struct page *page, int refs) * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); goto retry; } @@ -116,8 +115,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); } /** @@ -565,8 +563,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs, */ if (unlikely((flags & FOLL_LONGTERM) && !folio_is_longterm_pinnable(folio))) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); return NULL; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e6f4189c1c94..9a15fd3453ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2225,7 +2225,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb->fullmm); arch_check_zapped_pmd(vma, orig_pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); - if (vma_is_special_huge(vma)) { + if (!vma_is_dax(vma) && vma_is_special_huge(vma)) { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); @@ -2882,13 +2882,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, */ if (arch_needs_pgtable_deposit()) zap_deposited_table(mm, pmd); - if (vma_is_special_huge(vma)) + if (!vma_is_dax(vma) && vma_is_special_huge(vma)) return; if (unlikely(is_pmd_migration_entry(old_pmd))) { swp_entry_t entry; entry = pmd_to_swp_entry(old_pmd); folio = pfn_swap_entry_folio(entry); + } else if (is_huge_zero_pmd(old_pmd)) { + return; } else { page = pmd_page(old_pmd); folio = page_folio(page); diff --git a/mm/internal.h b/mm/internal.h index 70fa96e61c76..aa30282a774a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -737,8 +737,6 @@ static inline void prep_compound_tail(struct page *head, int tail_idx) set_page_private(p, 0); } -extern void prep_compound_page(struct page *page, unsigned int order); - void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern bool free_pages_prepare(struct page *page, unsigned int order); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 327e02fdc029..6257c7f5e941 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -419,18 +419,18 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma, pud = pud_offset(p4d, address); if (!pud_present(*pud)) return 0; - if (pud_devmap(*pud)) + if (pud_trans_huge(*pud)) return PUD_SHIFT; pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; - if (pmd_devmap(*pmd)) + if (pmd_trans_huge(*pmd)) return PMD_SHIFT; pte = pte_offset_map(pmd, address); if (!pte) return 0; ptent = ptep_get(pte); - if (pte_present(ptent) && pte_devmap(ptent)) + if (pte_present(ptent)) ret = PAGE_SHIFT; pte_unmap(pte); return ret; diff --git a/mm/memory.c b/mm/memory.c index f09b4a1d09a8..8d1ea1dd6b52 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3848,13 +3848,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a - * VM_PFNMAP VMA. + * VM_PFNMAP VMA. FS DAX also wants ops->pfn_mkwrite called. * * We should not cow pages in a shared writeable mapping. * Just mark the pages writable and/or call ops->pfn_mkwrite. */ - if (!vmf->page) + if (!vmf->page || is_fsdax_page(vmf->page)) { + vmf->page = NULL; return wp_pfn_shared(vmf); + } return wp_page_shared(vmf, folio); } diff --git a/mm/memremap.c b/mm/memremap.c index 68099af9df4c..9a8879bf1ae4 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -458,8 +458,13 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap); void free_zone_device_folio(struct folio *folio) { - if (WARN_ON_ONCE(!folio->pgmap->ops || - !folio->pgmap->ops->page_free)) + struct dev_pagemap *pgmap = folio->pgmap; + + if (WARN_ON_ONCE(!pgmap->ops)) + return; + + if (WARN_ON_ONCE(pgmap->type != MEMORY_DEVICE_FS_DAX && + !pgmap->ops->page_free)) return; mem_cgroup_uncharge(folio); @@ -484,26 +489,36 @@ void free_zone_device_folio(struct folio *folio) * For other types of ZONE_DEVICE pages, migration is either * handled differently or not done at all, so there is no need * to clear folio->mapping. + * + * FS DAX pages clear the mapping when the folio->share count hits + * zero which indicating the page has been removed from the file + * system mapping. */ - folio->mapping = NULL; - folio->pgmap->ops->page_free(folio_page(folio, 0)); + if (pgmap->type != MEMORY_DEVICE_FS_DAX) + folio->mapping = NULL; - switch (folio->pgmap->type) { + switch (pgmap->type) { case MEMORY_DEVICE_PRIVATE: case MEMORY_DEVICE_COHERENT: - put_dev_pagemap(folio->pgmap); + pgmap->ops->page_free(folio_page(folio, 0)); + put_dev_pagemap(pgmap); break; - case MEMORY_DEVICE_FS_DAX: case MEMORY_DEVICE_GENERIC: /* * Reset the refcount to 1 to prepare for handing out the page * again. */ + pgmap->ops->page_free(folio_page(folio, 0)); folio_set_count(folio, 1); break; + case MEMORY_DEVICE_FS_DAX: + wake_up_var(&folio->page); + break; + case MEMORY_DEVICE_PCI_P2PDMA: + pgmap->ops->page_free(folio_page(folio, 0)); break; } } @@ -519,21 +534,3 @@ void zone_device_page_init(struct page *page) lock_page(page); } EXPORT_SYMBOL_GPL(zone_device_page_init); - -#ifdef CONFIG_FS_DAX -bool __put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - if (folio->pgmap->type != MEMORY_DEVICE_FS_DAX) - return false; - - /* - * fsdax page refcounts are 1-based, rather than 0-based: if - * refcount is 1, then the page is free and the refcount is - * stable because nobody holds a reference on the page. - */ - if (folio_ref_sub_return(folio, refs) == 1) - wake_up_var(&folio->_refcount); - return true; -} -EXPORT_SYMBOL(__put_devmap_managed_folio_refs); -#endif /* CONFIG_FS_DAX */ diff --git a/mm/mm_init.c b/mm/mm_init.c index 73e97ce95f58..133640a93d1d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1026,23 +1026,22 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, } /* - * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC and - * MEMORY_TYPE_FS_DAX pages are released directly to the driver page - * allocator which will set the page count to 1 when allocating the - * page. + * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released + * directly to the driver page allocator which will set the page count + * to 1 when allocating the page. * * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have * their refcount reset to one whenever they are freed (ie. after * their refcount drops to 0). */ switch (pgmap->type) { + case MEMORY_DEVICE_FS_DAX: case MEMORY_DEVICE_PRIVATE: case MEMORY_DEVICE_COHERENT: case MEMORY_DEVICE_PCI_P2PDMA: set_page_count(page, 0); break; - case MEMORY_DEVICE_FS_DAX: case MEMORY_DEVICE_GENERIC: break; } diff --git a/mm/swap.c b/mm/swap.c index fc8281ef4241..7523b65d8caa 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -956,8 +956,6 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } - if (put_devmap_managed_folio_refs(folio, nr_refs)) - continue; if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_folio(folio); continue; From aed877c2b4257a25b2429f165542f86125871071 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:15 +1100 Subject: [PATCH 1215/2157] device/dax: properly refcount device dax pages when mapping Device DAX pages are currently not reference counted when mapped, instead relying on the devmap PTE bit to ensure mapping code will not get/put references. This requires special handling in various page table walkers, particularly GUP, to manage references on the underlying pgmap to ensure the pages remain valid. However there is no reason these pages can't be refcounted properly at map time. Doning so eliminates the need for the devmap PTE bit, freeing up a precious PTE bit. It also simplifies GUP as it no longer needs to manage the special pgmap references and can instead just treat the pages normally as defined by vm_normal_page(). Link: https://lkml.kernel.org/r/968d3a8e9157e7492e85d065765c027e525f9fc9.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- drivers/dax/device.c | 15 +++++++++------ mm/memremap.c | 14 +++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index bc871a34b9cd..328231cfb028 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -125,11 +125,12 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, 0); dax_set_mapping(vmf, pfn, fault_size); - return vmf_insert_mixed(vmf->vma, vmf->address, pfn); + return vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), + vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, @@ -168,11 +169,12 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, 0); dax_set_mapping(vmf, pfn, fault_size); - return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_folio_pmd(vmf, page_folio(pfn_t_to_page(pfn)), + vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD @@ -213,11 +215,12 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, 0); dax_set_mapping(vmf, pfn, fault_size); - return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_folio_pud(vmf, page_folio(pfn_t_to_page(pfn)), + vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, diff --git a/mm/memremap.c b/mm/memremap.c index 9a8879bf1ae4..2aebc1b192da 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -460,11 +460,7 @@ void free_zone_device_folio(struct folio *folio) { struct dev_pagemap *pgmap = folio->pgmap; - if (WARN_ON_ONCE(!pgmap->ops)) - return; - - if (WARN_ON_ONCE(pgmap->type != MEMORY_DEVICE_FS_DAX && - !pgmap->ops->page_free)) + if (WARN_ON_ONCE(!pgmap)) return; mem_cgroup_uncharge(folio); @@ -494,12 +490,15 @@ void free_zone_device_folio(struct folio *folio) * zero which indicating the page has been removed from the file * system mapping. */ - if (pgmap->type != MEMORY_DEVICE_FS_DAX) + if (pgmap->type != MEMORY_DEVICE_FS_DAX && + pgmap->type != MEMORY_DEVICE_GENERIC) folio->mapping = NULL; switch (pgmap->type) { case MEMORY_DEVICE_PRIVATE: case MEMORY_DEVICE_COHERENT: + if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->page_free)) + break; pgmap->ops->page_free(folio_page(folio, 0)); put_dev_pagemap(pgmap); break; @@ -509,7 +508,6 @@ void free_zone_device_folio(struct folio *folio) * Reset the refcount to 1 to prepare for handing out the page * again. */ - pgmap->ops->page_free(folio_page(folio, 0)); folio_set_count(folio, 1); break; @@ -518,6 +516,8 @@ void free_zone_device_folio(struct folio *folio) break; case MEMORY_DEVICE_PCI_P2PDMA: + if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->page_free)) + break; pgmap->ops->page_free(folio_page(folio, 0)); break; } From 937582ee8e8d227c30ec147629a0179131feaa80 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:34 +0000 Subject: [PATCH 1216/2157] mm/mremap: correctly handle partial mremap() of VMA starting at 0 Patch series "refactor mremap and fix bug", v3. The existing mremap() logic has grown organically over a very long period of time, resulting in code that is in many parts, very difficult to follow and full of subtleties and sources of confusion. In addition, it is difficult to thread state through the operation correctly, as function arguments have expanded, some parameters are expected to be temporarily altered during the operation, others are intended to remain static and some can be overridden. This series completely refactors the mremap implementation, sensibly separating functions, adding comments to explain the more subtle aspects of the implementation and making use of small structs to thread state through everything. The reason for doing so is to lay the groundwork for planned future changes to the mremap logic, changes which require the ability to easily pass around state. Additionally, it would be unhelpful to add yet more logic to code that is already difficult to follow without first refactoring it like this. The first patch in this series additionally fixes a bug when a VMA with start address zero is partially remapped. Tested on real hardware under heavy workload and all self tests are passing. This patch (of 3): Consider the case of a partial mremap() (that results in a VMA split) of an accountable VMA (i.e. which has the VM_ACCOUNT flag set) whose start address is zero, with the MREMAP_MAYMOVE flag specified and a scenario where a move does in fact occur: addr end | | v v |-------------| | vma | |-------------| 0 This move is affected by unmapping the range [addr, end). In order to prevent an incorrect decrement of accounted memory which has already been determined, the mremap() code in move_vma() clears VM_ACCOUNT from the VMA prior to doing so, before reestablishing it in each of the VMAs post-split: addr end | | v v |---| |---| | A | | B | |---| |---| Commit 6b73cff239e5 ("mm: change munmap splitting order and move_vma()") changed this logic such as to determine whether there is a need to do so by establishing account_start and account_end and, in the instance where such an operation is required, assigning them to vma->vm_start and vma->vm_end. Later the code checks if the operation is required for 'A' referenced above thusly: if (account_start) { ... } However, if the VMA described above has vma->vm_start == 0, which is now assigned to account_start, this branch will not be executed. As a result, the VMA 'A' above will remain stripped of its VM_ACCOUNT flag, incorrectly. The fix is to simply convert these variables to booleans and set them as required. Link: https://lkml.kernel.org/r/cover.1741639347.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/dc55cb6db25d97c3d9e460de4986a323fa959676.1741639347.git.lorenzo.stoakes@oracle.com Fixes: 6b73cff239e5 ("mm: change munmap splitting order and move_vma()") Signed-off-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index cff7f552f909..c3e4c86d0b8d 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -705,8 +705,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, unsigned long vm_flags = vma->vm_flags; unsigned long new_pgoff; unsigned long moved_len; - unsigned long account_start = 0; - unsigned long account_end = 0; + bool account_start = false; + bool account_end = false; unsigned long hiwater_vm; int err = 0; bool need_rmap_locks; @@ -790,9 +790,9 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { vm_flags_clear(vma, VM_ACCOUNT); if (vma->vm_start < old_addr) - account_start = vma->vm_start; + account_start = true; if (vma->vm_end > old_addr + old_len) - account_end = vma->vm_end; + account_end = true; } /* @@ -832,7 +832,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, /* OOM: unable to split vma, just get accounts right */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) vm_acct_memory(old_len >> PAGE_SHIFT); - account_start = account_end = 0; + account_start = account_end = false; } if (vm_flags & VM_LOCKED) { From 85ea6bdd88a27317875088b54119bec209c6c09c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:35 +0000 Subject: [PATCH 1217/2157] mm/mremap: refactor mremap() system call implementation Place checks into a separate function so the mremap() system call is less egregiously long, remove unnecessary mremap_to() offset_in_page() check and just check that earlier so we keep all such basic checks together. Separate out the VMA in-place expansion, hugetlb and expand/move logic into separate, readable functions. De-duplicate code where possible, add comments and ensure that all error handling explicitly specifies the error at the point of it occurring rather than setting a prefixed error value and implicitly setting (which is bug prone). This lays the groundwork for subsequent patches further simplifying and extending the mremap() implementation. Link: https://lkml.kernel.org/r/fc4a925396dc3cc36791ec92c4d329209e816308.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Harry Yoo Reviewed-by: Vlastimil Babka Cc: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 405 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 251 insertions(+), 154 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index c3e4c86d0b8d..c4abda8dfc57 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -942,33 +942,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long ret; unsigned long map_flags = 0; - if (offset_in_page(new_addr)) - return -EINVAL; - + /* Is the new length or address silly? */ if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) return -EINVAL; - /* Ensure the old/new locations do not overlap */ + /* Ensure the old/new locations do not overlap. */ if (addr + old_len > new_addr && new_addr + new_len > addr) return -EINVAL; - /* - * move_vma() need us to stay 4 maps below the threshold, otherwise - * it will bail out at the very beginning. - * That is a problem if we have already unmaped the regions here - * (new_addr, and old_addr), because userspace will not know the - * state of the vma's after it gets -ENOMEM. - * So, to avoid such scenario we can pre-compute if the whole - * operation has high chances to success map-wise. - * Worst-scenario case is when both vma's (new_addr and old_addr) get - * split in 3 before unmapping it. - * That means 2 more maps (1 for each) to the ones we already hold. - * Check whether current map count plus 2 still leads us to 4 maps below - * the threshold, otherwise return -ENOMEM here to be more safe. - */ - if ((mm->map_count + 2) >= sysctl_max_map_count - 3) - return -ENOMEM; - if (flags & MREMAP_FIXED) { /* * In mremap_to(). @@ -1035,6 +1016,218 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) return 1; } +/* Do the mremap() flags require that the new_addr parameter be specified? */ +static bool implies_new_addr(unsigned long flags) +{ + return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); +} + +/* + * Are the parameters passed to mremap() valid? If so return 0, otherwise return + * error. + */ +static unsigned long check_mremap_params(unsigned long addr, + unsigned long flags, + unsigned long old_len, + unsigned long new_len, + unsigned long new_addr) +{ + /* Ensure no unexpected flag values. */ + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) + return -EINVAL; + + /* Start address must be page-aligned. */ + if (offset_in_page(addr)) + return -EINVAL; + + /* + * We allow a zero old-len as a special case + * for DOS-emu "duplicate shm area" thing. But + * a zero new-len is nonsensical. + */ + if (!PAGE_ALIGN(new_len)) + return -EINVAL; + + /* Remainder of checks are for cases with specific new_addr. */ + if (!implies_new_addr(flags)) + return 0; + + /* The new address must be page-aligned. */ + if (offset_in_page(new_addr)) + return -EINVAL; + + /* A fixed address implies a move. */ + if (!(flags & MREMAP_MAYMOVE)) + return -EINVAL; + + /* MREMAP_DONTUNMAP does not allow resizing in the process. */ + if (flags & MREMAP_DONTUNMAP && old_len != new_len) + return -EINVAL; + + /* + * move_vma() need us to stay 4 maps below the threshold, otherwise + * it will bail out at the very beginning. + * That is a problem if we have already unmaped the regions here + * (new_addr, and old_addr), because userspace will not know the + * state of the vma's after it gets -ENOMEM. + * So, to avoid such scenario we can pre-compute if the whole + * operation has high chances to success map-wise. + * Worst-scenario case is when both vma's (new_addr and old_addr) get + * split in 3 before unmapping it. + * That means 2 more maps (1 for each) to the ones we already hold. + * Check whether current map count plus 2 still leads us to 4 maps below + * the threshold, otherwise return -ENOMEM here to be more safe. + */ + if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3) + return -ENOMEM; + + return 0; +} + +/* + * We know we can expand the VMA in-place by delta pages, so do so. + * + * If we discover the VMA is locked, update mm_struct statistics accordingly and + * indicate so to the caller. + */ +static unsigned long expand_vma_inplace(struct vm_area_struct *vma, + unsigned long delta, bool *locked) +{ + struct mm_struct *mm = current->mm; + long pages = delta >> PAGE_SHIFT; + VMA_ITERATOR(vmi, mm, vma->vm_end); + long charged = 0; + + if (vma->vm_flags & VM_ACCOUNT) { + if (security_vm_enough_memory_mm(mm, pages)) + return -ENOMEM; + + charged = pages; + } + + /* + * Function vma_merge_extend() is called on the + * extension we are adding to the already existing vma, + * vma_merge_extend() will merge this extension with the + * already existing vma (expand operation itself) and + * possibly also with the next vma if it becomes + * adjacent to the expanded vma and otherwise + * compatible. + */ + vma = vma_merge_extend(&vmi, vma, delta); + if (!vma) { + vm_unacct_memory(charged); + return -ENOMEM; + } + + vm_stat_account(mm, vma->vm_flags, pages); + if (vma->vm_flags & VM_LOCKED) { + mm->locked_vm += pages; + *locked = true; + } + + return 0; +} + +static bool align_hugetlb(struct vm_area_struct *vma, + unsigned long addr, + unsigned long new_addr, + unsigned long *old_len_ptr, + unsigned long *new_len_ptr, + unsigned long *delta_ptr) +{ + unsigned long old_len = *old_len_ptr; + unsigned long new_len = *new_len_ptr; + struct hstate *h __maybe_unused = hstate_vma(vma); + + old_len = ALIGN(old_len, huge_page_size(h)); + new_len = ALIGN(new_len, huge_page_size(h)); + + /* addrs must be huge page aligned */ + if (addr & ~huge_page_mask(h)) + return false; + if (new_addr & ~huge_page_mask(h)) + return false; + + /* + * Don't allow remap expansion, because the underlying hugetlb + * reservation is not yet capable to handle split reservation. + */ + if (new_len > old_len) + return false; + + *old_len_ptr = old_len; + *new_len_ptr = new_len; + *delta_ptr = abs_diff(old_len, new_len); + return true; +} + +/* + * We are mremap()'ing without specifying a fixed address to move to, but are + * requesting that the VMA's size be increased. + * + * Try to do so in-place, if this fails, then move the VMA to a new location to + * action the change. + */ +static unsigned long expand_vma(struct vm_area_struct *vma, + unsigned long addr, unsigned long old_len, + unsigned long new_len, unsigned long flags, + bool *locked_ptr, unsigned long *new_addr_ptr, + struct vm_userfaultfd_ctx *uf_ptr, + struct list_head *uf_unmap_ptr) +{ + unsigned long err; + unsigned long map_flags; + unsigned long new_addr; /* We ignore any user-supplied one. */ + pgoff_t pgoff; + + err = resize_is_valid(vma, addr, old_len, new_len, flags); + if (err) + return err; + + /* + * [addr, old_len) spans precisely to the end of the VMA, so try to + * expand it in-place. + */ + if (old_len == vma->vm_end - addr && + vma_expandable(vma, new_len - old_len)) { + err = expand_vma_inplace(vma, new_len - old_len, locked_ptr); + if (IS_ERR_VALUE(err)) + return err; + + /* + * We want to populate the newly expanded portion of the VMA to + * satisfy the expectation that mlock()'ing a VMA maintains all + * of its pages in memory. + */ + if (*locked_ptr) + *new_addr_ptr = addr; + + /* OK we're done! */ + return addr; + } + + /* + * We weren't able to just expand or shrink the area, + * we need to create a new one and move it. + */ + + /* We're not allowed to move the VMA, so error out. */ + if (!(flags & MREMAP_MAYMOVE)) + return -ENOMEM; + + /* Find a new location to move the VMA to. */ + map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0; + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags); + if (IS_ERR_VALUE(new_addr)) + return new_addr; + *new_addr_ptr = new_addr; + + return move_vma(vma, addr, old_len, new_len, new_addr, + locked_ptr, flags, uf_ptr, uf_unmap_ptr); +} + /* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) @@ -1048,7 +1241,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long ret = -EINVAL; + unsigned long ret; + unsigned long delta; bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; LIST_HEAD(uf_unmap_early); @@ -1067,70 +1261,38 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, */ addr = untagged_addr(addr); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) - return ret; - - if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) - return ret; - - /* - * MREMAP_DONTUNMAP is always a move and it does not allow resizing - * in the process. - */ - if (flags & MREMAP_DONTUNMAP && - (!(flags & MREMAP_MAYMOVE) || old_len != new_len)) - return ret; - - - if (offset_in_page(addr)) + ret = check_mremap_params(addr, flags, old_len, new_len, new_addr); + if (ret) return ret; old_len = PAGE_ALIGN(old_len); new_len = PAGE_ALIGN(new_len); + delta = abs_diff(old_len, new_len); - /* - * We allow a zero old-len as a special case - * for DOS-emu "duplicate shm area" thing. But - * a zero new-len is nonsensical. - */ - if (!new_len) - return ret; - - if (mmap_write_lock_killable(current->mm)) + if (mmap_write_lock_killable(mm)) return -EINTR; + vma = vma_lookup(mm, addr); if (!vma) { ret = -EFAULT; goto out; } - /* Don't allow remapping vmas when they have already been sealed */ + /* If mseal()'d, mremap() is prohibited. */ if (!can_modify_vma(vma)) { ret = -EPERM; goto out; } - if (is_vm_hugetlb_page(vma)) { - struct hstate *h __maybe_unused = hstate_vma(vma); - - old_len = ALIGN(old_len, huge_page_size(h)); - new_len = ALIGN(new_len, huge_page_size(h)); - - /* addrs must be huge page aligned */ - if (addr & ~huge_page_mask(h)) - goto out; - if (new_addr & ~huge_page_mask(h)) - goto out; - - /* - * Don't allow remap expansion, because the underlying hugetlb - * reservation is not yet capable to handle split reservation. - */ - if (new_len > old_len) - goto out; + /* Align to hugetlb page size, if required. */ + if (is_vm_hugetlb_page(vma) && + !align_hugetlb(vma, addr, new_addr, &old_len, &new_len, &delta)) { + ret = -EINVAL; + goto out; } - if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { + /* Are we RELOCATING the VMA to a SPECIFIC address? */ + if (implies_new_addr(flags)) { ret = mremap_to(addr, old_len, new_addr, new_len, &locked, flags, &uf, &uf_unmap_early, &uf_unmap); @@ -1138,109 +1300,44 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } /* - * Always allow a shrinking remap: that just unmaps - * the unnecessary pages.. - * do_vmi_munmap does all the needed commit accounting, and - * unlocks the mmap_lock if so directed. + * From here on in we are only RESIZING the VMA, attempting to do so + * in-place, moving the VMA if we cannot. */ - if (old_len >= new_len) { + + /* NO-OP CASE - resizing to the same size. */ + if (new_len == old_len) { + ret = addr; + goto out; + } + + /* SHRINK CASE. Can always be done in-place. */ + if (new_len < old_len) { VMA_ITERATOR(vmi, mm, addr + new_len); - if (old_len == new_len) { - ret = addr; - goto out; - } - - ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len, + /* + * Simply unmap the shrunken portion of the VMA. This does all + * the needed commit accounting, unlocking the mmap lock. + */ + ret = do_vmi_munmap(&vmi, mm, addr + new_len, delta, &uf_unmap, true); if (ret) goto out; + /* We succeeded, mmap lock released for us. */ ret = addr; goto out_unlocked; } - /* - * Ok, we need to grow.. - */ - ret = resize_is_valid(vma, addr, old_len, new_len, flags); - if (ret) - goto out; + /* EXPAND case. We try to do in-place, if we can't, then we move it. */ + ret = expand_vma(vma, addr, old_len, new_len, flags, &locked, &new_addr, + &uf, &uf_unmap); - /* old_len exactly to the end of the area.. - */ - if (old_len == vma->vm_end - addr) { - unsigned long delta = new_len - old_len; - - /* can we just expand the current mapping? */ - if (vma_expandable(vma, delta)) { - long pages = delta >> PAGE_SHIFT; - VMA_ITERATOR(vmi, mm, vma->vm_end); - long charged = 0; - - if (vma->vm_flags & VM_ACCOUNT) { - if (security_vm_enough_memory_mm(mm, pages)) { - ret = -ENOMEM; - goto out; - } - charged = pages; - } - - /* - * Function vma_merge_extend() is called on the - * extension we are adding to the already existing vma, - * vma_merge_extend() will merge this extension with the - * already existing vma (expand operation itself) and - * possibly also with the next vma if it becomes - * adjacent to the expanded vma and otherwise - * compatible. - */ - vma = vma_merge_extend(&vmi, vma, delta); - if (!vma) { - vm_unacct_memory(charged); - ret = -ENOMEM; - goto out; - } - - vm_stat_account(mm, vma->vm_flags, pages); - if (vma->vm_flags & VM_LOCKED) { - mm->locked_vm += pages; - locked = true; - new_addr = addr; - } - ret = addr; - goto out; - } - } - - /* - * We weren't able to just expand or shrink the area, - * we need to create a new one and move it.. - */ - ret = -ENOMEM; - if (flags & MREMAP_MAYMOVE) { - unsigned long map_flags = 0; - if (vma->vm_flags & VM_MAYSHARE) - map_flags |= MAP_SHARED; - - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, - vma->vm_pgoff + - ((addr - vma->vm_start) >> PAGE_SHIFT), - map_flags); - if (IS_ERR_VALUE(new_addr)) { - ret = new_addr; - goto out; - } - - ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, flags, &uf, &uf_unmap); - } out: if (offset_in_page(ret)) locked = false; - mmap_write_unlock(current->mm); + mmap_write_unlock(mm); if (locked && new_len > old_len) - mm_populate(new_addr + old_len, new_len - old_len); + mm_populate(new_addr + old_len, delta); out_unlocked: userfaultfd_unmap_complete(mm, &uf_unmap_early); mremap_userfaultfd_complete(&uf, addr, ret, old_len); From 221bf5cac5c2f3b9d8a07f4b2738225a59962945 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:36 +0000 Subject: [PATCH 1218/2157] mm/mremap: introduce and use vma_remap_struct threaded state A number of mremap() calls both pass around and modify a large number of parameters, making the code less readable and often repeatedly having to determine things such as VMA, size delta, and more. Avoid this by using the common pattern of passing a state object through the operation, updating it as we go. We introduce the vma_remap_struct or 'VRM' for this purpose. This also gives us the ability to accumulate further state through the operation that would otherwise require awkward and error-prone pointer passing. We can also now trivially define helper functions that operate on a VRM object. This pattern has proven itself to be very powerful when implemented for VMA merge, VMA unmapping and memory mapping operations, so it is battle-tested and functional. We both introduce the data structure and use it, introducing helper functions as needed to make things readable, we move some state such as mmap lock and mlock() status to the VRM, we introduce a means of classifying the type of mremap() operation and de-duplicate the get_unmapped_area() lookup. We also neatly thread userfaultfd state throughout the operation. Note that there is further refactoring to be done, chiefly adjust move_vma() to accept a VRM parameter. We defer this as there is pre-requisite work required to be able to do so which we will do in a subsequent patch. Link: https://lkml.kernel.org/r/27951739dc83b2b1523b81fa9c009ba348388d40.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Reviewed-by: Harry Yoo Cc: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 592 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 374 insertions(+), 218 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index c4abda8dfc57..af022e3b89e2 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -32,6 +32,44 @@ #include "internal.h" +/* Classify the kind of remap operation being performed. */ +enum mremap_type { + MREMAP_INVALID, /* Initial state. */ + MREMAP_NO_RESIZE, /* old_len == new_len, if not moved, do nothing. */ + MREMAP_SHRINK, /* old_len > new_len. */ + MREMAP_EXPAND, /* old_len < new_len. */ +}; + +/* + * Describes a VMA mremap() operation and is threaded throughout it. + * + * Any of the fields may be mutated by the operation, however these values will + * always accurately reflect the remap (for instance, we may adjust lengths and + * delta to account for hugetlb alignment). + */ +struct vma_remap_struct { + /* User-provided state. */ + unsigned long addr; /* User-specified address from which we remap. */ + unsigned long old_len; /* Length of range being remapped. */ + unsigned long new_len; /* Desired new length of mapping. */ + unsigned long flags; /* user-specified MREMAP_* flags. */ + unsigned long new_addr; /* Optionally, desired new address. */ + + /* uffd state. */ + struct vm_userfaultfd_ctx *uf; + struct list_head *uf_unmap_early; + struct list_head *uf_unmap; + + /* VMA state, determined in do_mremap(). */ + struct vm_area_struct *vma; + + /* Internal state, determined in do_mremap(). */ + unsigned long delta; /* Absolute delta of old_len,new_len. */ + bool mlocked; /* Was the VMA mlock()'d? */ + enum mremap_type remap_type; /* expand, shrink, etc. */ + bool mmap_locked; /* Is mm currently write-locked? */ +}; + static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -693,10 +731,95 @@ unsigned long move_page_tables(struct vm_area_struct *vma, return len + old_addr - old_end; /* how much done */ } +/* Set vrm->delta to the difference in VMA size specified by user. */ +static void vrm_set_delta(struct vma_remap_struct *vrm) +{ + vrm->delta = abs_diff(vrm->old_len, vrm->new_len); +} + +/* Determine what kind of remap this is - shrink, expand or no resize at all. */ +static enum mremap_type vrm_remap_type(struct vma_remap_struct *vrm) +{ + if (vrm->delta == 0) + return MREMAP_NO_RESIZE; + + if (vrm->old_len > vrm->new_len) + return MREMAP_SHRINK; + + return MREMAP_EXPAND; +} + +/* + * When moving a VMA to vrm->new_adr, does this result in the new and old VMAs + * overlapping? + */ +static bool vrm_overlaps(struct vma_remap_struct *vrm) +{ + unsigned long start_old = vrm->addr; + unsigned long start_new = vrm->new_addr; + unsigned long end_old = vrm->addr + vrm->old_len; + unsigned long end_new = vrm->new_addr + vrm->new_len; + + /* + * start_old end_old + * |-----------| + * | | + * |-----------| + * |-------------| + * | | + * |-------------| + * start_new end_new + */ + if (end_old > start_new && end_new > start_old) + return true; + + return false; +} + +/* Do the mremap() flags require that the new_addr parameter be specified? */ +static bool vrm_implies_new_addr(struct vma_remap_struct *vrm) +{ + return vrm->flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); +} + +/* + * Find an unmapped area for the requested vrm->new_addr. + * + * If MREMAP_FIXED then this is equivalent to a MAP_FIXED mmap() call. If only + * MREMAP_DONTUNMAP is set, then this is equivalent to providing a hint to + * mmap(), otherwise this is equivalent to mmap() specifying a NULL address. + * + * Returns 0 on success (with vrm->new_addr updated), or an error code upon + * failure. + */ +static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm) +{ + struct vm_area_struct *vma = vrm->vma; + unsigned long map_flags = 0; + /* Page Offset _into_ the VMA. */ + pgoff_t internal_pgoff = (vrm->addr - vma->vm_start) >> PAGE_SHIFT; + pgoff_t pgoff = vma->vm_pgoff + internal_pgoff; + unsigned long new_addr = vrm_implies_new_addr(vrm) ? vrm->new_addr : 0; + unsigned long res; + + if (vrm->flags & MREMAP_FIXED) + map_flags |= MAP_FIXED; + if (vma->vm_flags & VM_MAYSHARE) + map_flags |= MAP_SHARED; + + res = get_unmapped_area(vma->vm_file, new_addr, vrm->new_len, pgoff, + map_flags); + if (IS_ERR_VALUE(res)) + return res; + + vrm->new_addr = res; + return 0; +} + static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, - bool *locked, unsigned long flags, + bool *mlocked, unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { long to_account = new_len - old_len; @@ -837,7 +960,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; - *locked = true; + *mlocked = true; } mm->hiwater_vm = hiwater_vm; @@ -860,18 +983,15 @@ static unsigned long move_vma(struct vm_area_struct *vma, * resize_is_valid() - Ensure the vma can be resized to the new length at the give * address. * - * @vma: The vma to resize - * @addr: The old address - * @old_len: The current size - * @new_len: The desired size - * @flags: The vma flags - * * Return 0 on success, error otherwise. */ -static int resize_is_valid(struct vm_area_struct *vma, unsigned long addr, - unsigned long old_len, unsigned long new_len, unsigned long flags) +static int resize_is_valid(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = vrm->vma; + unsigned long addr = vrm->addr; + unsigned long old_len = vrm->old_len; + unsigned long new_len = vrm->new_len; unsigned long pgoff; /* @@ -883,11 +1003,12 @@ static int resize_is_valid(struct vm_area_struct *vma, unsigned long addr, * behavior. As a result, fail such attempts. */ if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) { - pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid); + pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", + current->comm, current->pid); return -EINVAL; } - if ((flags & MREMAP_DONTUNMAP) && + if ((vrm->flags & MREMAP_DONTUNMAP) && (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) return -EINVAL; @@ -907,99 +1028,122 @@ static int resize_is_valid(struct vm_area_struct *vma, unsigned long addr, if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) return -EFAULT; - if (!mlock_future_ok(mm, vma->vm_flags, new_len - old_len)) + if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta)) return -EAGAIN; - if (!may_expand_vm(mm, vma->vm_flags, - (new_len - old_len) >> PAGE_SHIFT)) + if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT)) return -ENOMEM; return 0; } /* - * mremap_to() - remap a vma to a new location - * @addr: The old address - * @old_len: The old size - * @new_addr: The target address - * @new_len: The new size - * @locked: If the returned vma is locked (VM_LOCKED) - * @flags: the mremap flags - * @uf: The mremap userfaultfd context - * @uf_unmap_early: The userfaultfd unmap early context - * @uf_unmap: The userfaultfd unmap context + * The user has requested that the VMA be shrunk (i.e., old_len > new_len), so + * execute this, optionally dropping the mmap lock when we do so. * - * Returns: The new address of the vma or an error. + * In both cases this invalidates the VMA, however if we don't drop the lock, + * then load the correct VMA into vrm->vma afterwards. */ -static unsigned long mremap_to(unsigned long addr, unsigned long old_len, - unsigned long new_addr, unsigned long new_len, bool *locked, - unsigned long flags, struct vm_userfaultfd_ctx *uf, - struct list_head *uf_unmap_early, - struct list_head *uf_unmap) +static unsigned long shrink_vma(struct vma_remap_struct *vrm, + bool drop_lock) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long ret; - unsigned long map_flags = 0; + unsigned long unmap_start = vrm->addr + vrm->new_len; + unsigned long unmap_bytes = vrm->delta; + unsigned long res; + VMA_ITERATOR(vmi, mm, unmap_start); + + VM_BUG_ON(vrm->remap_type != MREMAP_SHRINK); + + res = do_vmi_munmap(&vmi, mm, unmap_start, unmap_bytes, + vrm->uf_unmap, drop_lock); + vrm->vma = NULL; /* Invalidated. */ + if (res) + return res; + + /* + * If we've not dropped the lock, then we should reload the VMA to + * replace the invalidated VMA with the one that may have now been + * split. + */ + if (drop_lock) { + vrm->mmap_locked = false; + } else { + vrm->vma = vma_lookup(mm, vrm->addr); + if (!vrm->vma) + return -EFAULT; + } + + return 0; +} + +/* + * mremap_to() - remap a vma to a new location. + * Returns: The new address of the vma or an error. + */ +static unsigned long mremap_to(struct vma_remap_struct *vrm) +{ + struct mm_struct *mm = current->mm; + unsigned long err; /* Is the new length or address silly? */ - if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) + if (vrm->new_len > TASK_SIZE || + vrm->new_addr > TASK_SIZE - vrm->new_len) return -EINVAL; - /* Ensure the old/new locations do not overlap. */ - if (addr + old_len > new_addr && new_addr + new_len > addr) + if (vrm_overlaps(vrm)) return -EINVAL; - if (flags & MREMAP_FIXED) { + if (vrm->flags & MREMAP_FIXED) { /* * In mremap_to(). * VMA is moved to dst address, and munmap dst first. * do_munmap will check if dst is sealed. */ - ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); - if (ret) - return ret; + err = do_munmap(mm, vrm->new_addr, vrm->new_len, + vrm->uf_unmap_early); + vrm->vma = NULL; /* Invalidated. */ + if (err) + return err; + + /* + * If we remap a portion of a VMA elsewhere in the same VMA, + * this can invalidate the old VMA. Reset. + */ + vrm->vma = vma_lookup(mm, vrm->addr); + if (!vrm->vma) + return -EFAULT; } - if (old_len > new_len) { - ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap); - if (ret) - return ret; - old_len = new_len; + if (vrm->remap_type == MREMAP_SHRINK) { + err = shrink_vma(vrm, /* drop_lock= */false); + if (err) + return err; + + /* Set up for the move now shrink has been executed. */ + vrm->old_len = vrm->new_len; } - vma = vma_lookup(mm, addr); - if (!vma) - return -EFAULT; - - ret = resize_is_valid(vma, addr, old_len, new_len, flags); - if (ret) - return ret; + err = resize_is_valid(vrm); + if (err) + return err; /* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */ - if (flags & MREMAP_DONTUNMAP && - !may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) { - return -ENOMEM; + if (vrm->flags & MREMAP_DONTUNMAP) { + vm_flags_t vm_flags = vrm->vma->vm_flags; + unsigned long pages = vrm->old_len >> PAGE_SHIFT; + + if (!may_expand_vm(mm, vm_flags, pages)) + return -ENOMEM; } - if (flags & MREMAP_FIXED) - map_flags |= MAP_FIXED; + err = vrm_set_new_addr(vrm); + if (err) + return err; - if (vma->vm_flags & VM_MAYSHARE) - map_flags |= MAP_SHARED; - - ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff + - ((addr - vma->vm_start) >> PAGE_SHIFT), - map_flags); - if (IS_ERR_VALUE(ret)) - return ret; - - /* We got a new mapping */ - if (!(flags & MREMAP_FIXED)) - new_addr = ret; - - return move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, - uf, uf_unmap); + return move_vma(vrm->vma, vrm->addr, vrm->old_len, vrm->new_len, + vrm->new_addr, &vrm->mlocked, vrm->flags, + vrm->uf, vrm->uf_unmap); } static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) @@ -1016,22 +1160,33 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) return 1; } -/* Do the mremap() flags require that the new_addr parameter be specified? */ -static bool implies_new_addr(unsigned long flags) +/* Determine whether we are actually able to execute an in-place expansion. */ +static bool vrm_can_expand_in_place(struct vma_remap_struct *vrm) { - return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); + /* Number of bytes from vrm->addr to end of VMA. */ + unsigned long suffix_bytes = vrm->vma->vm_end - vrm->addr; + + /* If end of range aligns to end of VMA, we can just expand in-place. */ + if (suffix_bytes != vrm->old_len) + return false; + + /* Check whether this is feasible. */ + if (!vma_expandable(vrm->vma, vrm->delta)) + return false; + + return true; } /* * Are the parameters passed to mremap() valid? If so return 0, otherwise return * error. */ -static unsigned long check_mremap_params(unsigned long addr, - unsigned long flags, - unsigned long old_len, - unsigned long new_len, - unsigned long new_addr) +static unsigned long check_mremap_params(struct vma_remap_struct *vrm) + { + unsigned long addr = vrm->addr; + unsigned long flags = vrm->flags; + /* Ensure no unexpected flag values. */ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) return -EINVAL; @@ -1045,15 +1200,15 @@ static unsigned long check_mremap_params(unsigned long addr, * for DOS-emu "duplicate shm area" thing. But * a zero new-len is nonsensical. */ - if (!PAGE_ALIGN(new_len)) + if (!PAGE_ALIGN(vrm->new_len)) return -EINVAL; /* Remainder of checks are for cases with specific new_addr. */ - if (!implies_new_addr(flags)) + if (!vrm_implies_new_addr(vrm)) return 0; /* The new address must be page-aligned. */ - if (offset_in_page(new_addr)) + if (offset_in_page(vrm->new_addr)) return -EINVAL; /* A fixed address implies a move. */ @@ -1061,7 +1216,7 @@ static unsigned long check_mremap_params(unsigned long addr, return -EINVAL; /* MREMAP_DONTUNMAP does not allow resizing in the process. */ - if (flags & MREMAP_DONTUNMAP && old_len != new_len) + if (flags & MREMAP_DONTUNMAP && vrm->old_len != vrm->new_len) return -EINVAL; /* @@ -1090,11 +1245,11 @@ static unsigned long check_mremap_params(unsigned long addr, * If we discover the VMA is locked, update mm_struct statistics accordingly and * indicate so to the caller. */ -static unsigned long expand_vma_inplace(struct vm_area_struct *vma, - unsigned long delta, bool *locked) +static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; - long pages = delta >> PAGE_SHIFT; + long pages = vrm->delta >> PAGE_SHIFT; + struct vm_area_struct *vma = vrm->vma; VMA_ITERATOR(vmi, mm, vma->vm_end); long charged = 0; @@ -1114,7 +1269,7 @@ static unsigned long expand_vma_inplace(struct vm_area_struct *vma, * adjacent to the expanded vma and otherwise * compatible. */ - vma = vma_merge_extend(&vmi, vma, delta); + vma = vrm->vma = vma_merge_extend(&vmi, vma, vrm->delta); if (!vma) { vm_unacct_memory(charged); return -ENOMEM; @@ -1123,42 +1278,34 @@ static unsigned long expand_vma_inplace(struct vm_area_struct *vma, vm_stat_account(mm, vma->vm_flags, pages); if (vma->vm_flags & VM_LOCKED) { mm->locked_vm += pages; - *locked = true; + vrm->mlocked = true; } return 0; } -static bool align_hugetlb(struct vm_area_struct *vma, - unsigned long addr, - unsigned long new_addr, - unsigned long *old_len_ptr, - unsigned long *new_len_ptr, - unsigned long *delta_ptr) +static bool align_hugetlb(struct vma_remap_struct *vrm) { - unsigned long old_len = *old_len_ptr; - unsigned long new_len = *new_len_ptr; - struct hstate *h __maybe_unused = hstate_vma(vma); + struct hstate *h __maybe_unused = hstate_vma(vrm->vma); - old_len = ALIGN(old_len, huge_page_size(h)); - new_len = ALIGN(new_len, huge_page_size(h)); + vrm->old_len = ALIGN(vrm->old_len, huge_page_size(h)); + vrm->new_len = ALIGN(vrm->new_len, huge_page_size(h)); /* addrs must be huge page aligned */ - if (addr & ~huge_page_mask(h)) + if (vrm->addr & ~huge_page_mask(h)) return false; - if (new_addr & ~huge_page_mask(h)) + if (vrm->new_addr & ~huge_page_mask(h)) return false; /* * Don't allow remap expansion, because the underlying hugetlb * reservation is not yet capable to handle split reservation. */ - if (new_len > old_len) + if (vrm->new_len > vrm->old_len) return false; - *old_len_ptr = old_len; - *new_len_ptr = new_len; - *delta_ptr = abs_diff(old_len, new_len); + vrm_set_delta(vrm); + return true; } @@ -1169,19 +1316,16 @@ static bool align_hugetlb(struct vm_area_struct *vma, * Try to do so in-place, if this fails, then move the VMA to a new location to * action the change. */ -static unsigned long expand_vma(struct vm_area_struct *vma, - unsigned long addr, unsigned long old_len, - unsigned long new_len, unsigned long flags, - bool *locked_ptr, unsigned long *new_addr_ptr, - struct vm_userfaultfd_ctx *uf_ptr, - struct list_head *uf_unmap_ptr) +static unsigned long expand_vma(struct vma_remap_struct *vrm) { unsigned long err; - unsigned long map_flags; - unsigned long new_addr; /* We ignore any user-supplied one. */ - pgoff_t pgoff; + struct vm_area_struct *vma = vrm->vma; + unsigned long addr = vrm->addr; + unsigned long old_len = vrm->old_len; + unsigned long new_len = vrm->new_len; + unsigned long flags = vrm->flags; - err = resize_is_valid(vma, addr, old_len, new_len, flags); + err = resize_is_valid(vrm); if (err) return err; @@ -1189,10 +1333,9 @@ static unsigned long expand_vma(struct vm_area_struct *vma, * [addr, old_len) spans precisely to the end of the VMA, so try to * expand it in-place. */ - if (old_len == vma->vm_end - addr && - vma_expandable(vma, new_len - old_len)) { - err = expand_vma_inplace(vma, new_len - old_len, locked_ptr); - if (IS_ERR_VALUE(err)) + if (vrm_can_expand_in_place(vrm)) { + err = expand_vma_in_place(vrm); + if (err) return err; /* @@ -1200,8 +1343,8 @@ static unsigned long expand_vma(struct vm_area_struct *vma, * satisfy the expectation that mlock()'ing a VMA maintains all * of its pages in memory. */ - if (*locked_ptr) - *new_addr_ptr = addr; + if (vrm->mlocked) + vrm->new_addr = addr; /* OK we're done! */ return addr; @@ -1217,15 +1360,103 @@ static unsigned long expand_vma(struct vm_area_struct *vma, return -ENOMEM; /* Find a new location to move the VMA to. */ - map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0; - pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags); - if (IS_ERR_VALUE(new_addr)) - return new_addr; - *new_addr_ptr = new_addr; + err = vrm_set_new_addr(vrm); + if (err) + return err; - return move_vma(vma, addr, old_len, new_len, new_addr, - locked_ptr, flags, uf_ptr, uf_unmap_ptr); + return move_vma(vma, addr, old_len, new_len, vrm->new_addr, + &vrm->mlocked, flags, vrm->uf, vrm->uf_unmap); +} + +/* + * Attempt to resize the VMA in-place, if we cannot, then move the VMA to the + * first available address to perform the operation. + */ +static unsigned long mremap_at(struct vma_remap_struct *vrm) +{ + unsigned long res; + + switch (vrm->remap_type) { + case MREMAP_INVALID: + break; + case MREMAP_NO_RESIZE: + /* NO-OP CASE - resizing to the same size. */ + return vrm->addr; + case MREMAP_SHRINK: + /* + * SHRINK CASE. Can always be done in-place. + * + * Simply unmap the shrunken portion of the VMA. This does all + * the needed commit accounting, and we indicate that the mmap + * lock should be dropped. + */ + res = shrink_vma(vrm, /* drop_lock= */true); + if (res) + return res; + + return vrm->addr; + case MREMAP_EXPAND: + return expand_vma(vrm); + } + + BUG(); +} + +static unsigned long do_mremap(struct vma_remap_struct *vrm) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long ret; + + ret = check_mremap_params(vrm); + if (ret) + return ret; + + vrm->old_len = PAGE_ALIGN(vrm->old_len); + vrm->new_len = PAGE_ALIGN(vrm->new_len); + vrm_set_delta(vrm); + + if (mmap_write_lock_killable(mm)) + return -EINTR; + vrm->mmap_locked = true; + + vma = vrm->vma = vma_lookup(mm, vrm->addr); + if (!vma) { + ret = -EFAULT; + goto out; + } + + /* If mseal()'d, mremap() is prohibited. */ + if (!can_modify_vma(vma)) { + ret = -EPERM; + goto out; + } + + /* Align to hugetlb page size, if required. */ + if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm)) { + ret = -EINVAL; + goto out; + } + + vrm->remap_type = vrm_remap_type(vrm); + + /* Actually execute mremap. */ + ret = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm); + +out: + if (vrm->mmap_locked) { + mmap_write_unlock(mm); + vrm->mmap_locked = false; + + if (!offset_in_page(ret) && vrm->mlocked && vrm->new_len > vrm->old_len) + mm_populate(vrm->new_addr + vrm->old_len, vrm->delta); + } + + userfaultfd_unmap_complete(mm, vrm->uf_unmap_early); + mremap_userfaultfd_complete(vrm->uf, vrm->addr, ret, vrm->old_len); + userfaultfd_unmap_complete(mm, vrm->uf_unmap); + + return ret; } /* @@ -1239,15 +1470,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long ret; - unsigned long delta; - bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap); - /* * There is a deliberate asymmetry here: we strip the pointer tag * from the old address but leave the new address alone. This is @@ -1259,88 +1484,19 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, * See Documentation/arch/arm64/tagged-address-abi.rst for more * information. */ - addr = untagged_addr(addr); + struct vma_remap_struct vrm = { + .addr = untagged_addr(addr), + .old_len = old_len, + .new_len = new_len, + .flags = flags, + .new_addr = new_addr, - ret = check_mremap_params(addr, flags, old_len, new_len, new_addr); - if (ret) - return ret; + .uf = &uf, + .uf_unmap_early = &uf_unmap_early, + .uf_unmap = &uf_unmap, - old_len = PAGE_ALIGN(old_len); - new_len = PAGE_ALIGN(new_len); - delta = abs_diff(old_len, new_len); + .remap_type = MREMAP_INVALID, /* We set later. */ + }; - if (mmap_write_lock_killable(mm)) - return -EINTR; - - vma = vma_lookup(mm, addr); - if (!vma) { - ret = -EFAULT; - goto out; - } - - /* If mseal()'d, mremap() is prohibited. */ - if (!can_modify_vma(vma)) { - ret = -EPERM; - goto out; - } - - /* Align to hugetlb page size, if required. */ - if (is_vm_hugetlb_page(vma) && - !align_hugetlb(vma, addr, new_addr, &old_len, &new_len, &delta)) { - ret = -EINVAL; - goto out; - } - - /* Are we RELOCATING the VMA to a SPECIFIC address? */ - if (implies_new_addr(flags)) { - ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, flags, &uf, &uf_unmap_early, - &uf_unmap); - goto out; - } - - /* - * From here on in we are only RESIZING the VMA, attempting to do so - * in-place, moving the VMA if we cannot. - */ - - /* NO-OP CASE - resizing to the same size. */ - if (new_len == old_len) { - ret = addr; - goto out; - } - - /* SHRINK CASE. Can always be done in-place. */ - if (new_len < old_len) { - VMA_ITERATOR(vmi, mm, addr + new_len); - - /* - * Simply unmap the shrunken portion of the VMA. This does all - * the needed commit accounting, unlocking the mmap lock. - */ - ret = do_vmi_munmap(&vmi, mm, addr + new_len, delta, - &uf_unmap, true); - if (ret) - goto out; - - /* We succeeded, mmap lock released for us. */ - ret = addr; - goto out_unlocked; - } - - /* EXPAND case. We try to do in-place, if we can't, then we move it. */ - ret = expand_vma(vma, addr, old_len, new_len, flags, &locked, &new_addr, - &uf, &uf_unmap); - -out: - if (offset_in_page(ret)) - locked = false; - mmap_write_unlock(mm); - if (locked && new_len > old_len) - mm_populate(new_addr + old_len, delta); -out_unlocked: - userfaultfd_unmap_complete(mm, &uf_unmap_early); - mremap_userfaultfd_complete(&uf, addr, ret, old_len); - userfaultfd_unmap_complete(mm, &uf_unmap); - return ret; + return do_mremap(&vrm); } From d5c8aec0542e2d79b64de9089b88fabdebe05c1e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:37 +0000 Subject: [PATCH 1219/2157] mm/mremap: initial refactor of move_vma() Update move_vma() to use the threaded VRM object, de-duplicate code and separate into smaller functions to aid readability and debug-ability. This in turn allows further simplification of expand_vma() as we can simply thread VRM through the function. We also take the opportunity to abstract the account charging page count into the VRM in order that we can correctly thread this through the operation. We additionally do the same for tracking mm statistics - exec_vm, stack_vm, data_vm, and locked_vm. As part of this change, we slightly modify when locked pages statistics are counted for in mm_struct statistics. However this should cause no issues, as there is no chance of underflow, nor will any rlimit failures occur as a result. This is an intermediate step before a further refactoring of move_vma() in order to aid review. Link: https://lkml.kernel.org/r/ab611d6efae11bddab2db2b8bb3925b1d1954c7d.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Cc: Harry Yoo Signed-off-by: Andrew Morton --- mm/mremap.c | 186 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 122 insertions(+), 64 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index af022e3b89e2..6305cb9a86f6 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -68,6 +68,7 @@ struct vma_remap_struct { bool mlocked; /* Was the VMA mlock()'d? */ enum mremap_type remap_type; /* expand, shrink, etc. */ bool mmap_locked; /* Is mm currently write-locked? */ + unsigned long charged; /* If VM_ACCOUNT, # pages to account. */ }; static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) @@ -816,35 +817,88 @@ static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm) return 0; } -static unsigned long move_vma(struct vm_area_struct *vma, - unsigned long old_addr, unsigned long old_len, - unsigned long new_len, unsigned long new_addr, - bool *mlocked, unsigned long flags, - struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) +/* + * Keep track of pages which have been added to the memory mapping. If the VMA + * is accounted, also check to see if there is sufficient memory. + * + * Returns true on success, false if insufficient memory to charge. + */ +static bool vrm_charge(struct vma_remap_struct *vrm) { - long to_account = new_len - old_len; - struct mm_struct *mm = vma->vm_mm; - struct vm_area_struct *new_vma; - unsigned long vm_flags = vma->vm_flags; - unsigned long new_pgoff; - unsigned long moved_len; - bool account_start = false; - bool account_end = false; - unsigned long hiwater_vm; - int err = 0; - bool need_rmap_locks; - struct vma_iterator vmi; + unsigned long charged; + + if (!(vrm->vma->vm_flags & VM_ACCOUNT)) + return true; + + /* + * If we don't unmap the old mapping, then we account the entirety of + * the length of the new one. Otherwise it's just the delta in size. + */ + if (vrm->flags & MREMAP_DONTUNMAP) + charged = vrm->new_len >> PAGE_SHIFT; + else + charged = vrm->delta >> PAGE_SHIFT; + + + /* This accounts 'charged' pages of memory. */ + if (security_vm_enough_memory_mm(current->mm, charged)) + return false; + + vrm->charged = charged; + return true; +} + +/* + * an error has occurred so we will not be using vrm->charged memory. Unaccount + * this memory if the VMA is accounted. + */ +static void vrm_uncharge(struct vma_remap_struct *vrm) +{ + if (!(vrm->vma->vm_flags & VM_ACCOUNT)) + return; + + vm_unacct_memory(vrm->charged); + vrm->charged = 0; +} + +/* + * Update mm exec_vm, stack_vm, data_vm, and locked_vm fields as needed to + * account for 'bytes' memory used, and if locked, indicate this in the VRM so + * we can handle this correctly later. + */ +static void vrm_stat_account(struct vma_remap_struct *vrm, + unsigned long bytes) +{ + unsigned long pages = bytes >> PAGE_SHIFT; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = vrm->vma; + + vm_stat_account(mm, vma->vm_flags, pages); + if (vma->vm_flags & VM_LOCKED) { + mm->locked_vm += pages; + vrm->mlocked = true; + } +} + +/* + * Perform checks before attempting to write a VMA prior to it being + * moved. + */ +static unsigned long prep_move_vma(struct vma_remap_struct *vrm, + unsigned long *vm_flags_ptr) +{ + unsigned long err = 0; + struct vm_area_struct *vma = vrm->vma; + unsigned long old_addr = vrm->addr; + unsigned long old_len = vrm->old_len; /* * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */ - if (mm->map_count >= sysctl_max_map_count - 3) + if (current->mm->map_count >= sysctl_max_map_count - 3) return -ENOMEM; - if (unlikely(flags & MREMAP_DONTUNMAP)) - to_account = new_len; - if (vma->vm_ops && vma->vm_ops->may_split) { if (vma->vm_start != old_addr) err = vma->vm_ops->may_split(vma, old_addr); @@ -862,22 +916,46 @@ static unsigned long move_vma(struct vm_area_struct *vma, * so KSM can come around to merge on vma and new_vma afterwards. */ err = ksm_madvise(vma, old_addr, old_addr + old_len, - MADV_UNMERGEABLE, &vm_flags); + MADV_UNMERGEABLE, vm_flags_ptr); if (err) return err; - if (vm_flags & VM_ACCOUNT) { - if (security_vm_enough_memory_mm(mm, to_account >> PAGE_SHIFT)) - return -ENOMEM; - } + return 0; +} + +static unsigned long move_vma(struct vma_remap_struct *vrm) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = vrm->vma; + struct vm_area_struct *new_vma; + unsigned long vm_flags = vma->vm_flags; + unsigned long old_addr = vrm->addr, new_addr = vrm->new_addr; + unsigned long old_len = vrm->old_len, new_len = vrm->new_len; + unsigned long new_pgoff; + unsigned long moved_len; + unsigned long account_start = false; + unsigned long account_end = false; + unsigned long hiwater_vm; + int err; + bool need_rmap_locks; + struct vma_iterator vmi; + + err = prep_move_vma(vrm, &vm_flags); + if (err) + return err; + + /* If accounted, charge the number of bytes the operation will use. */ + if (!vrm_charge(vrm)) + return -ENOMEM; vma_start_write(vma); new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); - new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, + new_vma = copy_vma(&vrm->vma, new_addr, new_len, new_pgoff, &need_rmap_locks); + /* This may have been updated. */ + vma = vrm->vma; if (!new_vma) { - if (vm_flags & VM_ACCOUNT) - vm_unacct_memory(to_account >> PAGE_SHIFT); + vrm_uncharge(vrm); return -ENOMEM; } @@ -902,7 +980,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, old_addr = new_addr; new_addr = err; } else { - mremap_userfaultfd_prep(new_vma, uf); + mremap_userfaultfd_prep(new_vma, vrm->uf); } if (is_vm_hugetlb_page(vma)) { @@ -910,7 +988,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } /* Conceal VM_ACCOUNT so old reservation is not undone */ - if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { + if (vm_flags & VM_ACCOUNT && !(vrm->flags & MREMAP_DONTUNMAP)) { vm_flags_clear(vma, VM_ACCOUNT); if (vma->vm_start < old_addr) account_start = true; @@ -928,13 +1006,12 @@ static unsigned long move_vma(struct vm_area_struct *vma, * If this were a serious issue, we'd add a flag to do_munmap(). */ hiwater_vm = mm->hiwater_vm; - vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT); /* Tell pfnmap has moved from this vma */ if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_clear(vma); - if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { + if (unlikely(!err && (vrm->flags & MREMAP_DONTUNMAP))) { /* We always clear VM_LOCKED[ONFAULT] on the old vma */ vm_flags_clear(vma, VM_LOCKED_MASK); @@ -947,22 +1024,20 @@ static unsigned long move_vma(struct vm_area_struct *vma, unlink_anon_vmas(vma); /* Because we won't unmap we don't need to touch locked_vm */ + vrm_stat_account(vrm, new_len); return new_addr; } + vrm_stat_account(vrm, new_len); + vma_iter_init(&vmi, mm, old_addr); - if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) { + if (do_vmi_munmap(&vmi, mm, old_addr, old_len, vrm->uf_unmap, false) < 0) { /* OOM: unable to split vma, just get accounts right */ - if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) + if (vm_flags & VM_ACCOUNT && !(vrm->flags & MREMAP_DONTUNMAP)) vm_acct_memory(old_len >> PAGE_SHIFT); account_start = account_end = false; } - if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; - *mlocked = true; - } - mm->hiwater_vm = hiwater_vm; /* Restore VM_ACCOUNT if one or two pieces of vma left */ @@ -1141,9 +1216,7 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm) if (err) return err; - return move_vma(vrm->vma, vrm->addr, vrm->old_len, vrm->new_len, - vrm->new_addr, &vrm->mlocked, vrm->flags, - vrm->uf, vrm->uf_unmap); + return move_vma(vrm); } static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) @@ -1248,17 +1321,11 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; - long pages = vrm->delta >> PAGE_SHIFT; struct vm_area_struct *vma = vrm->vma; VMA_ITERATOR(vmi, mm, vma->vm_end); - long charged = 0; - if (vma->vm_flags & VM_ACCOUNT) { - if (security_vm_enough_memory_mm(mm, pages)) - return -ENOMEM; - - charged = pages; - } + if (!vrm_charge(vrm)) + return -ENOMEM; /* * Function vma_merge_extend() is called on the @@ -1271,15 +1338,11 @@ static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) */ vma = vrm->vma = vma_merge_extend(&vmi, vma, vrm->delta); if (!vma) { - vm_unacct_memory(charged); + vrm_uncharge(vrm); return -ENOMEM; } - vm_stat_account(mm, vma->vm_flags, pages); - if (vma->vm_flags & VM_LOCKED) { - mm->locked_vm += pages; - vrm->mlocked = true; - } + vrm_stat_account(vrm, vrm->delta); return 0; } @@ -1319,11 +1382,7 @@ static bool align_hugetlb(struct vma_remap_struct *vrm) static unsigned long expand_vma(struct vma_remap_struct *vrm) { unsigned long err; - struct vm_area_struct *vma = vrm->vma; unsigned long addr = vrm->addr; - unsigned long old_len = vrm->old_len; - unsigned long new_len = vrm->new_len; - unsigned long flags = vrm->flags; err = resize_is_valid(vrm); if (err) @@ -1356,7 +1415,7 @@ static unsigned long expand_vma(struct vma_remap_struct *vrm) */ /* We're not allowed to move the VMA, so error out. */ - if (!(flags & MREMAP_MAYMOVE)) + if (!(vrm->flags & MREMAP_MAYMOVE)) return -ENOMEM; /* Find a new location to move the VMA to. */ @@ -1364,8 +1423,7 @@ static unsigned long expand_vma(struct vma_remap_struct *vrm) if (err) return err; - return move_vma(vma, addr, old_len, new_len, vrm->new_addr, - &vrm->mlocked, flags, vrm->uf, vrm->uf_unmap); + return move_vma(vrm); } /* From b714ccb02a76e170f3e6475749ed0812ee25f777 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:38 +0000 Subject: [PATCH 1220/2157] mm/mremap: complete refactor of move_vma() We invoke ksm_madvise() with an intentionally dummy flags field, so no need to pass around. Additionally, the code tries to be 'clever' with account_start, account_end, using these to both check that vma->vm_start != 0 and that we ought to account the newly split portion of VMA post-move, either before or after it. We need to do this because we intentionally removed VM_ACCOUNT on the VMA prior to unmapping, so we don't erroneously unaccount memory (we have already calculated the correct amount to account and accounted it, any subsequent subtraction will be incorrect). This patch significantly expands the comment (from 2002!) about 'concealing' the flag to make it abundantly clear what's going on, as well as adding and expanding a number of other comments also. We can remove account_start, account_end by instead tracking when we account (i.e. vma->vm_flags has the VM_ACCOUNT flag set, and this is not an MREMAP_DONTUNMAP operation), and figuring out when to reinstate the VM_ACCOUNT flag on prior/subsequent VMAs separately. We additionally break the function into logical pieces and attack the very confusing error handling logic (where, for instance, new_addr is set to err). After this change the code is considerably more readable and easy to manipulate. Link: https://lkml.kernel.org/r/e7eaa307e444ba2b04d94fd985c907c8e896f893.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Harry Yoo Cc: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 295 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 205 insertions(+), 90 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 6305cb9a86f6..7dc058d5d5e2 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -884,13 +884,13 @@ static void vrm_stat_account(struct vma_remap_struct *vrm, * Perform checks before attempting to write a VMA prior to it being * moved. */ -static unsigned long prep_move_vma(struct vma_remap_struct *vrm, - unsigned long *vm_flags_ptr) +static unsigned long prep_move_vma(struct vma_remap_struct *vrm) { unsigned long err = 0; struct vm_area_struct *vma = vrm->vma; unsigned long old_addr = vrm->addr; unsigned long old_len = vrm->old_len; + unsigned long dummy = vma->vm_flags; /* * We'd prefer to avoid failure later on in do_munmap: @@ -916,56 +916,151 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm, * so KSM can come around to merge on vma and new_vma afterwards. */ err = ksm_madvise(vma, old_addr, old_addr + old_len, - MADV_UNMERGEABLE, vm_flags_ptr); + MADV_UNMERGEABLE, &dummy); if (err) return err; return 0; } -static unsigned long move_vma(struct vma_remap_struct *vrm) +/* + * Unmap source VMA for VMA move, turning it from a copy to a move, being + * careful to ensure we do not underflow memory account while doing so if an + * accountable move. + * + * This is best effort, if we fail to unmap then we simply try to correct + * accounting and exit. + */ +static void unmap_source_vma(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; + unsigned long addr = vrm->addr; + unsigned long len = vrm->old_len; struct vm_area_struct *vma = vrm->vma; - struct vm_area_struct *new_vma; - unsigned long vm_flags = vma->vm_flags; - unsigned long old_addr = vrm->addr, new_addr = vrm->new_addr; - unsigned long old_len = vrm->old_len, new_len = vrm->new_len; - unsigned long new_pgoff; - unsigned long moved_len; - unsigned long account_start = false; - unsigned long account_end = false; - unsigned long hiwater_vm; + VMA_ITERATOR(vmi, mm, addr); int err; + unsigned long vm_start; + unsigned long vm_end; + /* + * It might seem odd that we check for MREMAP_DONTUNMAP here, given this + * function implies that we unmap the original VMA, which seems + * contradictory. + * + * However, this occurs when this operation was attempted and an error + * arose, in which case we _do_ wish to unmap the _new_ VMA, which means + * we actually _do_ want it be unaccounted. + */ + bool accountable_move = (vma->vm_flags & VM_ACCOUNT) && + !(vrm->flags & MREMAP_DONTUNMAP); + + /* + * So we perform a trick here to prevent incorrect accounting. Any merge + * or new VMA allocation performed in copy_vma() does not adjust + * accounting, it is expected that callers handle this. + * + * And indeed we already have, accounting appropriately in the case of + * both in vrm_charge(). + * + * However, when we unmap the existing VMA (to effect the move), this + * code will, if the VMA has VM_ACCOUNT set, attempt to unaccount + * removed pages. + * + * To avoid this we temporarily clear this flag, reinstating on any + * portions of the original VMA that remain. + */ + if (accountable_move) { + vm_flags_clear(vma, VM_ACCOUNT); + /* We are about to split vma, so store the start/end. */ + vm_start = vma->vm_start; + vm_end = vma->vm_end; + } + + err = do_vmi_munmap(&vmi, mm, addr, len, vrm->uf_unmap, /* unlock= */false); + vrm->vma = NULL; /* Invalidated. */ + if (err) { + /* OOM: unable to split vma, just get accounts right */ + vm_acct_memory(len >> PAGE_SHIFT); + return; + } + + /* + * If we mremap() from a VMA like this: + * + * addr end + * | | + * v v + * |-------------| + * | | + * |-------------| + * + * Having cleared VM_ACCOUNT from the whole VMA, after we unmap above + * we'll end up with: + * + * addr end + * | | + * v v + * |---| |---| + * | A | | B | + * |---| |---| + * + * The VMI is still pointing at addr, so vma_prev() will give us A, and + * a subsequent or lone vma_next() will give as B. + * + * do_vmi_munmap() will have restored the VMI back to addr. + */ + if (accountable_move) { + unsigned long end = addr + len; + + if (vm_start < addr) { + struct vm_area_struct *prev = vma_prev(&vmi); + + vm_flags_set(prev, VM_ACCOUNT); /* Acquires VMA lock. */ + } + + if (vm_end > end) { + struct vm_area_struct *next = vma_next(&vmi); + + vm_flags_set(next, VM_ACCOUNT); /* Acquires VMA lock. */ + } + } +} + +/* + * Copy vrm->vma over to vrm->new_addr possibly adjusting size as part of the + * process. Additionally handle an error occurring on moving of page tables, + * where we reset vrm state to cause unmapping of the new VMA. + * + * Outputs the newly installed VMA to new_vma_ptr. Returns 0 on success or an + * error code. + */ +static int copy_vma_and_data(struct vma_remap_struct *vrm, + struct vm_area_struct **new_vma_ptr) +{ + unsigned long internal_offset = vrm->addr - vrm->vma->vm_start; + unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT; + unsigned long new_pgoff = vrm->vma->vm_pgoff + internal_pgoff; + unsigned long moved_len; bool need_rmap_locks; - struct vma_iterator vmi; + struct vm_area_struct *vma; + struct vm_area_struct *new_vma; + int err = 0; - err = prep_move_vma(vrm, &vm_flags); - if (err) - return err; - - /* If accounted, charge the number of bytes the operation will use. */ - if (!vrm_charge(vrm)) - return -ENOMEM; - - vma_start_write(vma); - new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); - new_vma = copy_vma(&vrm->vma, new_addr, new_len, new_pgoff, + new_vma = copy_vma(&vrm->vma, vrm->new_addr, vrm->new_len, new_pgoff, &need_rmap_locks); - /* This may have been updated. */ - vma = vrm->vma; if (!new_vma) { vrm_uncharge(vrm); + *new_vma_ptr = NULL; return -ENOMEM; } + vma = vrm->vma; - moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, - need_rmap_locks, false); - if (moved_len < old_len) { + moved_len = move_page_tables(vma, vrm->addr, new_vma, + vrm->new_addr, vrm->old_len, + need_rmap_locks, /* for_stack= */false); + if (moved_len < vrm->old_len) err = -ENOMEM; - } else if (vma->vm_ops && vma->vm_ops->mremap) { + else if (vma->vm_ops && vma->vm_ops->mremap) err = vma->vm_ops->mremap(new_vma); - } if (unlikely(err)) { /* @@ -973,28 +1068,84 @@ static unsigned long move_vma(struct vma_remap_struct *vrm) * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ - move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, - true, false); - vma = new_vma; - old_len = new_len; - old_addr = new_addr; - new_addr = err; + move_page_tables(new_vma, vrm->new_addr, vma, vrm->addr, + moved_len, /* need_rmap_locks = */true, + /* for_stack= */false); + vrm->vma = new_vma; + vrm->old_len = vrm->new_len; + vrm->addr = vrm->new_addr; } else { mremap_userfaultfd_prep(new_vma, vrm->uf); } - if (is_vm_hugetlb_page(vma)) { + if (is_vm_hugetlb_page(vma)) clear_vma_resv_huge_pages(vma); - } - /* Conceal VM_ACCOUNT so old reservation is not undone */ - if (vm_flags & VM_ACCOUNT && !(vrm->flags & MREMAP_DONTUNMAP)) { - vm_flags_clear(vma, VM_ACCOUNT); - if (vma->vm_start < old_addr) - account_start = true; - if (vma->vm_end > old_addr + old_len) - account_end = true; - } + /* Tell pfnmap has moved from this vma */ + if (unlikely(vma->vm_flags & VM_PFNMAP)) + untrack_pfn_clear(vma); + + *new_vma_ptr = new_vma; + return err; +} + +/* + * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and + * account flags on remaining VMA by convention (it cannot be mlock()'d any + * longer, as pages in range are no longer mapped), and removing anon_vma_chain + * links from it (if the entire VMA was copied over). + */ +static void dontunmap_complete(struct vma_remap_struct *vrm, + struct vm_area_struct *new_vma) +{ + unsigned long start = vrm->addr; + unsigned long end = vrm->addr + vrm->old_len; + unsigned long old_start = vrm->vma->vm_start; + unsigned long old_end = vrm->vma->vm_end; + + /* + * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old + * vma. + */ + vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); + + /* + * anon_vma links of the old vma is no longer needed after its page + * table has been moved. + */ + if (new_vma != vrm->vma && start == old_start && end == old_end) + unlink_anon_vmas(vrm->vma); + + /* Because we won't unmap we don't need to touch locked_vm. */ +} + +static unsigned long move_vma(struct vma_remap_struct *vrm) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *new_vma; + unsigned long hiwater_vm; + int err; + + err = prep_move_vma(vrm); + if (err) + return err; + + /* If accounted, charge the number of bytes the operation will use. */ + if (!vrm_charge(vrm)) + return -ENOMEM; + + /* We don't want racing faults. */ + vma_start_write(vrm->vma); + + /* Perform copy step. */ + err = copy_vma_and_data(vrm, &new_vma); + /* + * If we established the copied-to VMA, we attempt to recover from the + * error by setting the destination VMA to the source VMA and unmapping + * it below. + */ + if (err && !new_vma) + return err; /* * If we failed to move page tables we still do total_vm increment @@ -1007,51 +1158,15 @@ static unsigned long move_vma(struct vma_remap_struct *vrm) */ hiwater_vm = mm->hiwater_vm; - /* Tell pfnmap has moved from this vma */ - if (unlikely(vma->vm_flags & VM_PFNMAP)) - untrack_pfn_clear(vma); - - if (unlikely(!err && (vrm->flags & MREMAP_DONTUNMAP))) { - /* We always clear VM_LOCKED[ONFAULT] on the old vma */ - vm_flags_clear(vma, VM_LOCKED_MASK); - - /* - * anon_vma links of the old vma is no longer needed after its page - * table has been moved. - */ - if (new_vma != vma && vma->vm_start == old_addr && - vma->vm_end == (old_addr + old_len)) - unlink_anon_vmas(vma); - - /* Because we won't unmap we don't need to touch locked_vm */ - vrm_stat_account(vrm, new_len); - return new_addr; - } - - vrm_stat_account(vrm, new_len); - - vma_iter_init(&vmi, mm, old_addr); - if (do_vmi_munmap(&vmi, mm, old_addr, old_len, vrm->uf_unmap, false) < 0) { - /* OOM: unable to split vma, just get accounts right */ - if (vm_flags & VM_ACCOUNT && !(vrm->flags & MREMAP_DONTUNMAP)) - vm_acct_memory(old_len >> PAGE_SHIFT); - account_start = account_end = false; - } + vrm_stat_account(vrm, vrm->new_len); + if (unlikely(!err && (vrm->flags & MREMAP_DONTUNMAP))) + dontunmap_complete(vrm, new_vma); + else + unmap_source_vma(vrm); mm->hiwater_vm = hiwater_vm; - /* Restore VM_ACCOUNT if one or two pieces of vma left */ - if (account_start) { - vma = vma_prev(&vmi); - vm_flags_set(vma, VM_ACCOUNT); - } - - if (account_end) { - vma = vma_next(&vmi); - vm_flags_set(vma, VM_ACCOUNT); - } - - return new_addr; + return err ? (unsigned long)err : vrm->new_addr; } /* From 2a4077f49ccd6f904be6edb363714646e47292c9 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:39 +0000 Subject: [PATCH 1221/2157] mm/mremap: refactor move_page_tables(), abstracting state A lot of state is threaded throughout the page table moving logic within the mremap code, including boolean values which control behaviour specifically in regard to whether rmap locks need be held over the operation and whether the VMA belongs to a temporary stack being moved by move_arg_pages() (and consequently, relocate_vma_down()). As we already transmit state throughout this operation, it is neater and more readable to maintain a small state object. We do so in the form of pagetable_move_control. In addition, this allows us to update parameters within the state as we manipulate things, for instance with regard to the page table realignment logic. In future I want to add additional functionality to the page table logic, so this is an additional motivation for making it easier to do so. This patch changes move_page_tables() to accept a pointer to a pagetable_move_control struct, and performs changes at this level only. Further page table logic will be updated in a subsequent patch. We additionally also take the opportunity to add significant comments describing the address realignment logic to make it abundantly clear what is going on in this code. Link: https://lkml.kernel.org/r/e20180add9c8746184aa3f23a61fff69a06cdaa9.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Harry Yoo Cc: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/internal.h | 43 +++++++++++-- mm/mmap.c | 5 +- mm/mremap.c | 174 ++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 169 insertions(+), 53 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index aa30282a774a..06f816cdb43b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,6 +24,44 @@ struct folio_batch; +/* + * Maintains state across a page table move. The operation assumes both source + * and destination VMAs already exist and are specified by the user. + * + * Partial moves are permitted, but the old and new ranges must both reside + * within a VMA. + * + * mmap lock must be held in write and VMA write locks must be held on any VMA + * that is visible. + * + * Use the PAGETABLE_MOVE() macro to initialise this struct. + * + * NOTE: The page table move is affected by reading from [old_addr, old_end), + * and old_addr may be updated for better page table alignment, so len_in + * represents the length of the range being copied as specified by the user. + */ +struct pagetable_move_control { + struct vm_area_struct *old; /* Source VMA. */ + struct vm_area_struct *new; /* Destination VMA. */ + unsigned long old_addr; /* Address from which the move begins. */ + unsigned long old_end; /* Exclusive address at which old range ends. */ + unsigned long new_addr; /* Address to move page tables to. */ + unsigned long len_in; /* Bytes to remap specified by user. */ + + bool need_rmap_locks; /* Do rmap locks need to be taken? */ + bool for_stack; /* Is this an early temp stack being moved? */ +}; + +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ + struct pagetable_move_control name = { \ + .old = old_, \ + .new = new_, \ + .old_addr = old_addr_, \ + .old_end = (old_addr_) + (len_), \ + .new_addr = new_addr_, \ + .len_in = len_, \ + } + /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints @@ -1527,10 +1565,7 @@ extern struct list_lru shadow_nodes; } while (0) /* mremap.c */ -unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack); +unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); diff --git a/mm/mmap.c b/mm/mmap.c index 15d6cd7cc845..efcc4ca7500d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1694,6 +1694,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); struct vm_area_struct *next; struct mmu_gather tlb; + PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); BUG_ON(new_start > new_end); @@ -1716,8 +1717,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) * move the page tables downwards, on failure we rely on * process cleanup to remove whatever mess we made. */ - if (length != move_page_tables(vma, old_start, - vma, new_start, length, false, true)) + pmc.for_stack = true; + if (length != move_page_tables(&pmc)) return -ENOMEM; tlb_gather_mmu(&tlb, mm); diff --git a/mm/mremap.c b/mm/mremap.c index 7dc058d5d5e2..3a2ac167e876 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -580,8 +580,9 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, * the VMA that is created to span the source and destination of the move, * so we make an exception for it. */ -static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_align, - unsigned long mask, bool for_stack) +static bool can_align_down(struct pagetable_move_control *pmc, + struct vm_area_struct *vma, unsigned long addr_to_align, + unsigned long mask) { unsigned long addr_masked = addr_to_align & mask; @@ -590,11 +591,11 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali * of the corresponding VMA, we can't align down or we will destroy part * of the current mapping. */ - if (!for_stack && vma->vm_start != addr_to_align) + if (!pmc->for_stack && vma->vm_start != addr_to_align) return false; /* In the stack case we explicitly permit in-VMA alignment. */ - if (for_stack && addr_masked >= vma->vm_start) + if (pmc->for_stack && addr_masked >= vma->vm_start) return true; /* @@ -604,54 +605,131 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali return find_vma_intersection(vma->vm_mm, addr_masked, vma->vm_start) == NULL; } -/* Opportunistically realign to specified boundary for faster copy. */ -static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old_vma, - unsigned long *new_addr, struct vm_area_struct *new_vma, - unsigned long mask, bool for_stack) +/* + * Determine if are in fact able to realign for efficiency to a higher page + * table boundary. + */ +static bool can_realign_addr(struct pagetable_move_control *pmc, + unsigned long pagetable_mask) { + unsigned long align_mask = ~pagetable_mask; + unsigned long old_align = pmc->old_addr & align_mask; + unsigned long new_align = pmc->new_addr & align_mask; + unsigned long pagetable_size = align_mask + 1; + unsigned long old_align_next = pagetable_size - old_align; + + /* + * We don't want to have to go hunting for VMAs from the end of the old + * VMA to the next page table boundary, also we want to make sure the + * operation is wortwhile. + * + * So ensure that we only perform this realignment if the end of the + * range being copied reaches or crosses the page table boundary. + * + * boundary boundary + * .<- old_align -> . + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * . <----------------.-----------> + * . len_in + * <-------------------------------> + * . pagetable_size . + * . <----------------> + * . old_align_next . + */ + if (pmc->len_in < old_align_next) + return false; + /* Skip if the addresses are already aligned. */ - if ((*old_addr & ~mask) == 0) - return; + if (old_align == 0) + return false; /* Only realign if the new and old addresses are mutually aligned. */ - if ((*old_addr & ~mask) != (*new_addr & ~mask)) - return; + if (old_align != new_align) + return false; /* Ensure realignment doesn't cause overlap with existing mappings. */ - if (!can_align_down(old_vma, *old_addr, mask, for_stack) || - !can_align_down(new_vma, *new_addr, mask, for_stack)) - return; + if (!can_align_down(pmc, pmc->old, pmc->old_addr, pagetable_mask) || + !can_align_down(pmc, pmc->new, pmc->new_addr, pagetable_mask)) + return false; - *old_addr = *old_addr & mask; - *new_addr = *new_addr & mask; + return true; } -unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack) +/* + * Opportunistically realign to specified boundary for faster copy. + * + * Consider an mremap() of a VMA with page table boundaries as below, and no + * preceding VMAs from the lower page table boundary to the start of the VMA, + * with the end of the range reaching or crossing the page table boundary. + * + * boundary boundary + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * . pmc->old_addr . pmc->old_end + * . <----------------------------> + * . move these page tables + * + * If we proceed with moving page tables in this scenario, we will have a lot of + * work to do traversing old page tables and establishing new ones in the + * destination across multiple lower level page tables. + * + * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the + * page table boundary, so we can simply copy a single page table entry for the + * aligned portion of the VMA instead: + * + * boundary boundary + * . |----------------.-----------| + * . | vma . | + * . |----------------.-----------| + * pmc->old_addr . pmc->old_end + * <-------------------------------------------> + * . move these page tables + */ +static void try_realign_addr(struct pagetable_move_control *pmc, + unsigned long pagetable_mask) +{ + + if (!can_realign_addr(pmc, pagetable_mask)) + return; + + /* + * Simply align to page table boundaries. Note that we do NOT update the + * pmc->old_end value, and since the move_page_tables() operation spans + * from [old_addr, old_end) (offsetting new_addr as it is performed), + * this simply changes the start of the copy, not the end. + */ + pmc->old_addr &= pagetable_mask; + pmc->new_addr &= pagetable_mask; +} + +unsigned long move_page_tables(struct pagetable_move_control *pmc) { unsigned long extent, old_end; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + unsigned long old_addr, new_addr; + struct vm_area_struct *vma = pmc->old; - if (!len) + if (!pmc->len_in) return 0; - old_end = old_addr + len; - if (is_vm_hugetlb_page(vma)) - return move_hugetlb_page_tables(vma, new_vma, old_addr, - new_addr, len); + return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, + pmc->new_addr, pmc->len_in); + old_end = pmc->old_end; /* * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ - if (len >= PMD_SIZE - (old_addr & ~PMD_MASK)) - try_realign_addr(&old_addr, vma, &new_addr, new_vma, PMD_MASK, - for_stack); + try_realign_addr(pmc, PMD_MASK); + /* These may have been changed. */ + old_addr = pmc->old_addr; + new_addr = pmc->new_addr; flush_cache_range(vma, old_addr, old_end); mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, @@ -675,12 +753,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) { if (extent == HPAGE_PUD_SIZE) { move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr, - old_pud, new_pud, need_rmap_locks); + old_pud, new_pud, pmc->need_rmap_locks); /* We ignore and continue on error? */ continue; } } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { - if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, old_pud, new_pud, true)) continue; @@ -698,7 +775,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, pmd_devmap(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, need_rmap_locks)) + old_pmd, new_pmd, pmc->need_rmap_locks)) continue; split_huge_pmd(vma, old_pmd, old_addr); } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && @@ -713,10 +790,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, } if (pmd_none(*old_pmd)) continue; - if (pte_alloc(new_vma->vm_mm, new_pmd)) + if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; if (move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks) < 0) + pmc->new, new_pmd, new_addr, pmc->need_rmap_locks) < 0) goto again; } @@ -726,10 +803,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, * Prevent negative return values when {old,new}_addr was realigned * but we broke out of the above loop for the first PMD itself. */ - if (old_addr < old_end - len) + if (old_addr < old_end - pmc->len_in) return 0; - return len + old_addr - old_end; /* how much done */ + return pmc->len_in + old_addr - old_end; /* how much done */ } /* Set vrm->delta to the difference in VMA size specified by user. */ @@ -1040,37 +1117,40 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT; unsigned long new_pgoff = vrm->vma->vm_pgoff + internal_pgoff; unsigned long moved_len; - bool need_rmap_locks; - struct vm_area_struct *vma; + struct vm_area_struct *vma = vrm->vma; struct vm_area_struct *new_vma; int err = 0; + PAGETABLE_MOVE(pmc, NULL, NULL, vrm->addr, vrm->new_addr, vrm->old_len); - new_vma = copy_vma(&vrm->vma, vrm->new_addr, vrm->new_len, new_pgoff, - &need_rmap_locks); + new_vma = copy_vma(&vma, vrm->new_addr, vrm->new_len, new_pgoff, + &pmc.need_rmap_locks); if (!new_vma) { vrm_uncharge(vrm); *new_vma_ptr = NULL; return -ENOMEM; } - vma = vrm->vma; + vrm->vma = vma; + pmc.old = vma; + pmc.new = new_vma; - moved_len = move_page_tables(vma, vrm->addr, new_vma, - vrm->new_addr, vrm->old_len, - need_rmap_locks, /* for_stack= */false); + moved_len = move_page_tables(&pmc); if (moved_len < vrm->old_len) err = -ENOMEM; else if (vma->vm_ops && vma->vm_ops->mremap) err = vma->vm_ops->mremap(new_vma); if (unlikely(err)) { + PAGETABLE_MOVE(pmc_revert, new_vma, vma, vrm->new_addr, + vrm->addr, moved_len); + /* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. */ - move_page_tables(new_vma, vrm->new_addr, vma, vrm->addr, - moved_len, /* need_rmap_locks = */true, - /* for_stack= */false); + pmc_revert.need_rmap_locks = true; + move_page_tables(&pmc_revert); + vrm->vma = new_vma; vrm->old_len = vrm->new_len; vrm->addr = vrm->new_addr; From 664dc4da2694fa56e49dc3e4a041bcf7608a060b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Mar 2025 20:50:40 +0000 Subject: [PATCH 1222/2157] mm/mremap: thread state through move page table operation Finish refactoring the page table logic by threading the PMC state throughout the operation, allowing us to control the operation as we go. Additionally, update the old_addr, new_addr fields in move_page_tables() as we progress through the process making use of the fact we have this state object now to track this. With these changes made, not only is the code far more readable, but we can finally transmit state throughout the entire operation, which lays the groundwork for sensibly making changes in future to how the mremap() operation is performed. Additionally take the opportunity to refactor the means of determining the progress of the operation, abstracting this to pmc_progress() and simplifying the logic to make it clearer what's going on. Link: https://lkml.kernel.org/r/230dd7a2b7b01a6eef442678f284d575e800356e.1741639347.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Harry Yoo Cc: Liam R. Howlett Signed-off-by: Andrew Morton --- mm/internal.h | 3 + mm/mremap.c | 196 +++++++++++++++++++++++++++++--------------------- 2 files changed, 116 insertions(+), 83 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 06f816cdb43b..1ac433d2092b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -36,6 +36,9 @@ struct folio_batch; * * Use the PAGETABLE_MOVE() macro to initialise this struct. * + * The old_addr and new_addr fields are updated as the page table move is + * executed. + * * NOTE: The page table move is affected by reading from [old_addr, old_end), * and old_addr may be updated for better page table alignment, so len_in * represents the length of the range being copied as specified by the user. diff --git a/mm/mremap.c b/mm/mremap.c index 3a2ac167e876..0865387531ed 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -108,8 +108,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return pmd; } -static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr) +static pud_t *alloc_new_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -122,13 +121,12 @@ static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, return pud_alloc(mm, p4d, addr); } -static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr) +static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) { pud_t *pud; pmd_t *pmd; - pud = alloc_new_pud(mm, vma, addr); + pud = alloc_new_pud(mm, addr); if (!pud) return NULL; @@ -172,17 +170,19 @@ static pte_t move_soft_dirty_pte(pte_t pte) return pte; } -static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, - unsigned long old_addr, unsigned long old_end, - struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) +static int move_ptes(struct pagetable_move_control *pmc, + unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd) { + struct vm_area_struct *vma = pmc->old; bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; pmd_t dummy_pmdval; spinlock_t *old_ptl, *new_ptl; bool force_flush = false; + unsigned long old_addr = pmc->old_addr; + unsigned long new_addr = pmc->new_addr; + unsigned long old_end = old_addr + extent; unsigned long len = old_end - old_addr; int err = 0; @@ -204,7 +204,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, * serialize access to individual ptes, but only rmap traversal * order guarantees that we won't miss both the old and new ptes). */ - if (need_rmap_locks) + if (pmc->need_rmap_locks) take_rmap_locks(vma); /* @@ -278,7 +278,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); out: - if (need_rmap_locks) + if (pmc->need_rmap_locks) drop_rmap_locks(vma); return err; } @@ -293,10 +293,11 @@ static inline bool arch_supports_page_table_move(void) #endif #ifdef CONFIG_HAVE_MOVE_PMD -static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) +static bool move_normal_pmd(struct pagetable_move_control *pmc, + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; bool res = false; pmd_t pmd; @@ -342,7 +343,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pmd_lock(vma->vm_mm, old_pmd); + old_ptl = pmd_lock(mm, old_pmd); new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -359,7 +360,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pmd_none(*new_pmd)); pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); - flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PMD_SIZE); out_unlock: if (new_ptl != old_ptl) spin_unlock(new_ptl); @@ -368,19 +369,19 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, return res; } #else -static inline bool move_normal_pmd(struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, - pmd_t *new_pmd) +static inline bool move_normal_pmd(struct pagetable_move_control *pmc, + pmd_t *old_pmd, pmd_t *new_pmd) { return false; } #endif #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) -static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_normal_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; @@ -406,7 +407,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pud_lock(vma->vm_mm, old_pud); + old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -418,7 +419,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pud_none(*new_pud)); pud_populate(mm, new_pud, pud_pgtable(pud)); - flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); + flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); @@ -426,19 +427,19 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, return true; } #else -static inline bool move_normal_pud(struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, - pud_t *new_pud) +static inline bool move_normal_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { return false; } #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) -static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_huge_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; + struct vm_area_struct *vma = pmc->old; struct mm_struct *mm = vma->vm_mm; pud_t pud; @@ -453,7 +454,7 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. */ - old_ptl = pud_lock(vma->vm_mm, old_pud); + old_ptl = pud_lock(mm, old_pud); new_ptl = pud_lockptr(mm, new_pud); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -466,8 +467,8 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, /* Set the new pud */ /* mark soft_ditry when we add pud level soft dirty support */ - set_pud_at(mm, new_addr, new_pud, pud); - flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE); + set_pud_at(mm, pmc->new_addr, new_pud, pud); + flush_pud_tlb_range(vma, pmc->old_addr, pmc->old_addr + HPAGE_PUD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); @@ -475,8 +476,9 @@ static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, return true; } #else -static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +static bool move_huge_pud(struct pagetable_move_control *pmc, + pud_t *old_pud, pud_t *new_pud) + { WARN_ON_ONCE(1); return false; @@ -497,10 +499,12 @@ enum pgt_entry { * destination pgt_entry. */ static __always_inline unsigned long get_extent(enum pgt_entry entry, - unsigned long old_addr, unsigned long old_end, - unsigned long new_addr) + struct pagetable_move_control *pmc) { unsigned long next, extent, mask, size; + unsigned long old_addr = pmc->old_addr; + unsigned long old_end = pmc->old_end; + unsigned long new_addr = pmc->new_addr; switch (entry) { case HPAGE_PMD: @@ -529,38 +533,51 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry, return extent; } +/* + * Should move_pgt_entry() acquire the rmap locks? This is either expressed in + * the PMC, or overridden in the case of normal, larger page tables. + */ +static bool should_take_rmap_locks(struct pagetable_move_control *pmc, + enum pgt_entry entry) +{ + switch (entry) { + case NORMAL_PMD: + case NORMAL_PUD: + return true; + default: + return pmc->need_rmap_locks; + } +} + /* * Attempts to speedup the move by moving entry at the level corresponding to * pgt_entry. Returns true if the move was successful, else false. */ -static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, - unsigned long old_addr, unsigned long new_addr, - void *old_entry, void *new_entry, bool need_rmap_locks) +static bool move_pgt_entry(struct pagetable_move_control *pmc, + enum pgt_entry entry, void *old_entry, void *new_entry) { bool moved = false; + bool need_rmap_locks = should_take_rmap_locks(pmc, entry); /* See comment in move_ptes() */ if (need_rmap_locks) - take_rmap_locks(vma); + take_rmap_locks(pmc->old); switch (entry) { case NORMAL_PMD: - moved = move_normal_pmd(vma, old_addr, new_addr, old_entry, - new_entry); + moved = move_normal_pmd(pmc, old_entry, new_entry); break; case NORMAL_PUD: - moved = move_normal_pud(vma, old_addr, new_addr, old_entry, - new_entry); + moved = move_normal_pud(pmc, old_entry, new_entry); break; case HPAGE_PMD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - move_huge_pmd(vma, old_addr, new_addr, old_entry, + move_huge_pmd(pmc->old, pmc->old_addr, pmc->new_addr, old_entry, new_entry); break; case HPAGE_PUD: moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - move_huge_pud(vma, old_addr, new_addr, old_entry, - new_entry); + move_huge_pud(pmc, old_entry, new_entry); break; default: @@ -569,7 +586,7 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, } if (need_rmap_locks) - drop_rmap_locks(vma); + drop_rmap_locks(pmc->old); return moved; } @@ -705,108 +722,121 @@ static void try_realign_addr(struct pagetable_move_control *pmc, pmc->new_addr &= pagetable_mask; } +/* Is the page table move operation done? */ +static bool pmc_done(struct pagetable_move_control *pmc) +{ + return pmc->old_addr >= pmc->old_end; +} + +/* Advance to the next page table, offset by extent bytes. */ +static void pmc_next(struct pagetable_move_control *pmc, unsigned long extent) +{ + pmc->old_addr += extent; + pmc->new_addr += extent; +} + +/* + * Determine how many bytes in the specified input range have had their page + * tables moved so far. + */ +static unsigned long pmc_progress(struct pagetable_move_control *pmc) +{ + unsigned long orig_old_addr = pmc->old_end - pmc->len_in; + unsigned long old_addr = pmc->old_addr; + + /* + * Prevent negative return values when {old,new}_addr was realigned but + * we broke out of the loop in move_page_tables() for the first PMD + * itself. + */ + return old_addr < orig_old_addr ? 0 : old_addr - orig_old_addr; +} + unsigned long move_page_tables(struct pagetable_move_control *pmc) { - unsigned long extent, old_end; + unsigned long extent; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; - unsigned long old_addr, new_addr; - struct vm_area_struct *vma = pmc->old; + struct mm_struct *mm = pmc->old->vm_mm; if (!pmc->len_in) return 0; - if (is_vm_hugetlb_page(vma)) + if (is_vm_hugetlb_page(pmc->old)) return move_hugetlb_page_tables(pmc->old, pmc->new, pmc->old_addr, pmc->new_addr, pmc->len_in); - old_end = pmc->old_end; /* * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ try_realign_addr(pmc, PMD_MASK); - /* These may have been changed. */ - old_addr = pmc->old_addr; - new_addr = pmc->new_addr; - flush_cache_range(vma, old_addr, old_end); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, - old_addr, old_end); + flush_cache_range(pmc->old, pmc->old_addr, pmc->old_end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mm, + pmc->old_addr, pmc->old_end); mmu_notifier_invalidate_range_start(&range); - for (; old_addr < old_end; old_addr += extent, new_addr += extent) { + for (; !pmc_done(pmc); pmc_next(pmc, extent)) { cond_resched(); /* * If extent is PUD-sized try to speed up the move by moving at the * PUD level if possible. */ - extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr); + extent = get_extent(NORMAL_PUD, pmc); - old_pud = get_old_pud(vma->vm_mm, old_addr); + old_pud = get_old_pud(mm, pmc->old_addr); if (!old_pud) continue; - new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr); + new_pud = alloc_new_pud(mm, pmc->new_addr); if (!new_pud) break; if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) { if (extent == HPAGE_PUD_SIZE) { - move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr, - old_pud, new_pud, pmc->need_rmap_locks); + move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud); /* We ignore and continue on error? */ continue; } } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { - if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, - old_pud, new_pud, true)) + if (move_pgt_entry(pmc, NORMAL_PUD, old_pud, new_pud)) continue; } - extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr); - old_pmd = get_old_pmd(vma->vm_mm, old_addr); + extent = get_extent(NORMAL_PMD, pmc); + old_pmd = get_old_pmd(mm, pmc->old_addr); if (!old_pmd) continue; - new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); + new_pmd = alloc_new_pmd(mm, pmc->new_addr); if (!new_pmd) break; again: if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && - move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, pmc->need_rmap_locks)) + move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; - split_huge_pmd(vma, old_pmd, old_addr); + split_huge_pmd(pmc->old, old_pmd, pmc->old_addr); } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && extent == PMD_SIZE) { /* * If the extent is PMD-sized, try to speed the move by * moving at the PMD level if possible. */ - if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr, - old_pmd, new_pmd, true)) + if (move_pgt_entry(pmc, NORMAL_PMD, old_pmd, new_pmd)) continue; } if (pmd_none(*old_pmd)) continue; if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; - if (move_ptes(vma, old_pmd, old_addr, old_addr + extent, - pmc->new, new_pmd, new_addr, pmc->need_rmap_locks) < 0) + if (move_ptes(pmc, extent, old_pmd, new_pmd) < 0) goto again; } mmu_notifier_invalidate_range_end(&range); - /* - * Prevent negative return values when {old,new}_addr was realigned - * but we broke out of the above loop for the first PMD itself. - */ - if (old_addr < old_end - pmc->len_in) - return 0; - - return pmc->len_in + old_addr - old_end; /* how much done */ + return pmc_progress(pmc); } /* Set vrm->delta to the difference in VMA size specified by user. */ From 6220ea5583e977e1eeea06c237cc440e5ac3de46 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:54 +0100 Subject: [PATCH 1223/2157] mm: factor out large folio handling from folio_order() into folio_large_order() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: MM owner tracking for large folios (!hugetlb) + CONFIG_NO_PAGE_MAPCOUNT", v3. Let's add an "easy" way to decide -- without false positives, without page-mapcounts and without page table/rmap scanning -- whether a large folio is "certainly mapped exclusively" into a single MM, or whether it "maybe mapped shared" into multiple MMs. Use that information to implement Copy-on-Write reuse, to convert folio_likely_mapped_shared() to folio_maybe_mapped_share(), and to introduce a kernel config option that lets us not use+maintain per-page mapcounts in large folios anymore. The bigger picture was presented at LSF/MM [1]. This series is effectively a follow-up on my early work [2], which implemented a more precise, but also more complicated, way to identify whether a large folio is "mapped shared" into multiple MMs or "mapped exclusively" into a single MM. 1 Patch Organization ==================== Patch #1 -> #6: make more room in order-1 folios, so we have two "unsigned long" available for our purposes Patch #7 -> #11: preparations Patch #12: MM owner tracking for large folios Patch #13: COW reuse for PTE-mapped anon THP Patch #14: folio_maybe_mapped_shared() Patch #15 -> #20: introduce and implement CONFIG_NO_PAGE_MAPCOUNT 2 MM owner tracking =================== We assign each MM a unique ID ("MM ID"), to be able to squeeze more information in our folios. On 32bit we use 15-bit IDs, on 64bit we use 31-bit IDs. For each large folios, we now store two MM-ID+mapcount ("slot") combinations: * mm0_id + mm0_mapcount * mm1_id + mm1_mapcount On 32bit, we use a 16-bit per-MM mapcount, on 64bit an ordinary 32bit mapcount. This way, we require 2x "unsigned long" on 32bit and 64bit for both slots. Paired with the large mapcount, we can reliably identify whether one of these MMs is the current owner (-> owns all mappings) or even holds all folio references (-> owns all mappings, and all references are from mappings). As long as only two MMs map folio pages at a time, we can reliably and precisely identify whether a large folio is "mapped shared" or "mapped exclusively". Any additional MM that starts mapping the folio while there are no free slots becomes an "untracked MM". If one such "untracked MM" is the last one mapping a folio exclusively, we will not detect the folio as "mapped exclusively" but instead as "maybe mapped shared". (exception: only a single mapping remains) So that's where the approach gets imprecise. For now, we use a bit-spinlock to sync the large mapcount + slots, and make sure we do keep the machinery fast, to not degrade (un)map performance drastically: for example, we make sure to only use a single atomic (when grabbing the bit-spinlock), like we would already perform when updating the large mapcount. 3 CONFIG_NO_PAGE_MAPCOUNT ========================= patch #15 -> #20 spell out and document what exactly is affected when not maintaining the per-page mapcounts in large folios anymore. Most importantly, as we cannot maintain folio->_nr_pages_mapped anymore when (un)mapping pages, we'll account a complete folio as mapped if a single page is mapped. In addition, we'll not detect partially mapped anonymous folios as such in all cases yet. Likely less relevant changes include that we might now under-estimate the USS (Unique Set Size) of a process, but never over-estimate it. The goal is to make CONFIG_NO_PAGE_MAPCOUNT the default at some point, to then slowly make it the only option, as we learn about real-life impacts and possible ways to mitigate them. 4 Performance ============= Detailed performance numbers were included in v1 [3], and not that much changed between v1 and v2. I did plenty of measurements on different systems in the meantime, that all revealed slightly different results. The pte-mapped-folio micro-benchmarks [4] are fairly sensitive to code layout changes on some systems. Especially the fork() benchmark started being more-shaky-than-before on recent kernels for some reason. In summary, with my micro-benchmarks: * Small folios are not impacted. * CoW performance seems to be mostly unchanged across all folios sizes. * CoW reuse performance of large folios now matches CoW reuse performance of small folios, because we now actually implement the CoW reuse optimization. On an Intel Xeon Silver 4210R I measured a ~65% reduction in runtime, on an arm64 system I measured ~54% reduction. * munmap() performance improves with CONFIG_NO_PAGE_MAPCOUNT. I saw double-digit % reduction (up to ~30% on an Intel Xeon Silver 4210R and up to ~70% on an AmpereOne A192-32X) with larger folios. The larger the folios, the larger the performance improvement. * munmao() performance very slightly (couple percent) degrades without CONFIG_NO_PAGE_MAPCOUNT for smaller folios. For larger folios, there seems to be no change at all. * fork() performance improves with CONFIG_NO_PAGE_MAPCOUNT. I saw double-digit % reduction (up to ~20% on an Intel Xeon Silver 4210R and up to ~10% on an AmpereOne A192-32X) with larger folios. The larger the folios, the larger the performance improvement. * While fork() performance without CONFIG_NO_PAGE_MAPCOUNT seems to be almost unchanged on some systems, I saw some degradation for smaller folios on the AmpereOne A192-32X. I did not investigate the details yet, but I suspect code layout changes or suboptimal code placement / inlining. I'm not to worried about the fork() micro-benchmarks for smaller folios given how shaky the results are lately and by how much we improved fork() performance recently. I also ran case-anon-cow-rand and case-anon-cow-seq part of vm-scalability, to assess the scalability and the impact of the bit-spinlock. My measurements on a two 2-socket 10-core Intel Xeon Silver 4210R CPU revealed no significant changes. Similarly, running these benchmarks with 2 MiB THPs enabled on the AmpereOne A192-32X with 192 cores, I got < 1% difference with < 1% stdev, which is nice. So far, I did not get my hands on a similarly large system with multiple sockets. I found no other fitting scalability benchmarks that seem to really hammer on concurrent mapping/unmapping of large folio pages like case-anon-cow-seq does. 5 Concerns ========== 5.1 Bit spinlock ---------------- I'm not quite happy about the bit-spinlock, but so far it does not seem to affect scalability in my measurements. If it ever becomes a problem we could either investigate improving the locking, or simply stopping the MM tracking once there are "too many mappings" and simply assume that the folio is "mapped shared" until it was freed. This would be similar (but slightly different) to the "0,1,2,stopped" counting idea Willy had at some point. Adding that logic to "stop tracking" adds more code to the hot path, so I avoided that for now. 5.2 folio_maybe_mapped_shared() ------------------------------- I documented the change from folio_likely_mapped_shared() to folio_maybe_mapped_shared() quite extensively. If we run into surprises, I have some ideas on how to resolve them. For now, I think we should be fine. 5.3 Added code to map/unmap hot path ------------------------------------ So far, it looks like the added code on the rmap hot path does not really seem to matter much in the bigger picture. I'd like to further reduce it (and possibly improve fork() performance further), but I don't easily see how right now. Well, and I am out of puff 🙂 Having that said, alternatives I considered (e.g., per-MM per-folio mapcount) would add a lot more overhead to these hot paths. 6 Future Work ============= 6.1 Large mapcount ------------------ It would be very handy if the large mapcount would count how often folio pages are actually mapped into page tables: a PMD on x86-64 would count 512 times. Calculating the average per-page mapcount will be easy, and remapping (PMD->PTE) folios would get even faster. That would also remove the need for the entire mapcount (except for PMD-sized folios for memory statistics reasons ...), and allow for mapping folios larger than PMDs (e.g., 4 MiB) easily. We likely would also have to take the same number of folio references to make our folio_mapcount() == folio_ref_count() work, and we'd want to be able to avoid mapcount+refcount overflows: this could already become an issue with pte-mapped PUD-sized folios (fsdax). One approach we discussed in the THP cabal meeting is (1) extending the mapcount for large folios to 64bit (at least on 64bit systems) and (2) keeping the refcount at 32bit, but (3) having exactly one reference if the the mapcount != 0. It should be doable, but there are some corner cases to consider on the unmap path; it is something that I will be looking into next. 6.2 hugetlb ----------- I'd love to make use of the same tracking also for hugetlb. The real problem is PMD table sharing: getting a page mapped by MM X and unmapped by MM Y will not work. With mshare, that problem should not exist (all mapping/unmapping will be routed through the mshare MM). [1] https://lwn.net/Articles/974223/ [2] https://lore.kernel.org/linux-mm/a9922f58-8129-4f15-b160-e0ace581bcbe@redhat.com/T/ [3] https://lkml.kernel.org/r/20240829165627.2256514-1-david@redhat.com [4] https://gitlab.com/davidhildenbrand/scratchspace/-/raw/main/pte-mapped-folio-benchmarks.c This patch (of 20): Let's factor it out into a simple helper function. This helper will also come in handy when working with code where we know that our folio is large. Maybe in the future we'll have the order readily available for small and large folios; in that case, folio_large_order() would simply translate to folio_order(). Link: https://lkml.kernel.org/r/20250303163014.1128035-1-david@redhat.com Link: https://lkml.kernel.org/r/20250303163014.1128035-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lance Yang Reviewed-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: David Hildenbrand Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index de008efd96aa..e5cdbb94ef81 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1194,6 +1194,11 @@ struct inode; extern void prep_compound_page(struct page *page, unsigned int order); +static inline unsigned int folio_large_order(const struct folio *folio) +{ + return folio->_flags_1 & 0xff; +} + /* * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be @@ -1207,7 +1212,7 @@ static inline unsigned int compound_order(struct page *page) if (!test_bit(PG_head, &folio->flags)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); } /** @@ -1223,7 +1228,7 @@ static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); } #include @@ -2145,7 +2150,7 @@ static inline long folio_nr_pages(const struct folio *folio) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << (folio->_flags_1 & 0xff); + return 1L << folio_large_order(folio); #endif } @@ -2170,7 +2175,7 @@ static inline unsigned long compound_nr(struct page *page) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << (folio->_flags_1 & 0xff); + return 1L << folio_large_order(folio); #endif } From 1ea5212aed0682ae1249cba9df7ad7ef539d0a7d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:55 +0100 Subject: [PATCH 1224/2157] mm: factor out large folio handling from folio_nr_pages() into folio_large_nr_pages() Let's factor it out into a simple helper function. This helper will also come in handy when working with code where we know that our folio is large. While at it, let's consistently return a "long" value from all these similar functions. Note that we cannot use "unsigned int" (even though _folio_nr_pages is of that type), because it would break some callers that do stuff like "-folio_nr_pages()". Both "int" or "unsigned long" would work as well. Maybe in the future we'll have the nr_pages readily available for all large folios, maybe even for small folios, or maybe for none. Link: https://lkml.kernel.org/r/20250303163014.1128035-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Kirill A. Shutemov Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e5cdbb94ef81..cf892c08a88c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1199,6 +1199,18 @@ static inline unsigned int folio_large_order(const struct folio *folio) return folio->_flags_1 & 0xff; } +#ifdef CONFIG_64BIT +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return folio->_folio_nr_pages; +} +#else +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return 1L << folio_large_order(folio); +} +#endif + /* * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be @@ -2147,11 +2159,7 @@ static inline long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << folio_large_order(folio); -#endif + return folio_large_nr_pages(folio); } /* Only hugetlbfs can allocate folios larger than MAX_ORDER */ @@ -2166,24 +2174,20 @@ static inline long folio_nr_pages(const struct folio *folio) * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline unsigned long compound_nr(struct page *page) +static inline long compound_nr(struct page *page) { struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << folio_large_order(folio); -#endif + return folio_large_nr_pages(folio); } /** * thp_nr_pages - The number of regular pages in this huge page. * @page: The head page of a huge page. */ -static inline int thp_nr_pages(struct page *page) +static inline long thp_nr_pages(struct page *page) { return folio_nr_pages((struct folio *)page); } From 4996fc547f5b49f4a43c261dfadb02cf165cdb51 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:56 +0100 Subject: [PATCH 1225/2157] mm: let _folio_nr_pages overlay memcg_data in first tail page Let's free up some more of the "unconditionally available on 64BIT" space in order-1 folios by letting _folio_nr_pages overlay memcg_data in the first tail page (second folio page). Consequently, we have the optimization now whenever we have CONFIG_MEMCG, independent of 64BIT. We have to make sure that page->memcg on tail pages does not return "surprises". page_memcg_check() already properly refuses PageTail(). Let's do that earlier in print_page_owner_memcg() to avoid printing wrong "Slab cache page" information. No other code should touch that field on tail pages of compound pages. Reset the "_nr_pages" to 0 when splitting folios, or when freeing them back to the buddy (to avoid false page->memcg_data "bad page" reports). Note that in __split_huge_page(), folio_nr_pages() would stop working already as soon as we start messing with the subpages. Most kernel configs should have at least CONFIG_MEMCG enabled, even if disabled at runtime. 64byte "struct memmap" is what we usually have on 64BIT. While at it, rename "_folio_nr_pages" to "_nr_pages". Hopefully memdescs / dynamically allocating "strut folio" in the future will further clean this up, e.g., making _nr_pages available in all configs and maybe even in small folios. Doing that should be fairly easy on top of this change. [david@redhat.com: make "make htmldoc" happy] Link: https://lkml.kernel.org/r/a97f8a91-ec41-4796-81e3-7c9e0e491ba4@redhat.com Link: https://lkml.kernel.org/r/20250303163014.1128035-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Kirill A. Shutemov Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ++-- include/linux/mm_types.h | 32 ++++++++++++++++++++++++-------- mm/huge_memory.c | 8 ++++++++ mm/internal.h | 4 ++-- mm/page_alloc.c | 6 +++++- mm/page_owner.c | 2 +- 6 files changed, 42 insertions(+), 14 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cf892c08a88c..c9c2ca345350 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1199,10 +1199,10 @@ static inline unsigned int folio_large_order(const struct folio *folio) return folio->_flags_1 & 0xff; } -#ifdef CONFIG_64BIT +#ifdef NR_PAGES_IN_LARGE_FOLIO static inline long folio_large_nr_pages(const struct folio *folio) { - return folio->_folio_nr_pages; + return folio->_nr_pages; } #else static inline long folio_large_nr_pages(const struct folio *folio) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b1827d78ff89..f3b519fbeca1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -287,6 +287,11 @@ typedef struct { unsigned long val; } swp_entry_t; +#if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT) +/* We have some extra room after the refcount in tail pages. */ +#define NR_PAGES_IN_LARGE_FOLIO +#endif + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -312,7 +317,7 @@ typedef struct { * @_large_mapcount: Do not use directly, call folio_mapcount(). * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). - * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_nr_pages: Do not use directly, call folio_nr_pages(). * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -376,14 +381,23 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + union { + struct { /* public: */ - atomic_t _large_mapcount; - atomic_t _entire_mapcount; - atomic_t _nr_pages_mapped; - atomic_t _pincount; -#ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; -#endif + atomic_t _large_mapcount; + atomic_t _entire_mapcount; + atomic_t _nr_pages_mapped; + atomic_t _pincount; + /* private: the union with struct page is transitional */ + }; + unsigned long _usable_1[4]; + }; + atomic_t _mapcount_1; + atomic_t _refcount_1; + /* public: */ +#ifdef NR_PAGES_IN_LARGE_FOLIO + unsigned int _nr_pages; +#endif /* NR_PAGES_IN_LARGE_FOLIO */ /* private: the union with struct page is transitional */ }; struct page __page_1; @@ -435,6 +449,8 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); +FOLIO_MATCH(_mapcount, _mapcount_1); +FOLIO_MATCH(_refcount, _refcount_1); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9a15fd3453ff..2364990974ba 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3399,6 +3399,14 @@ static void __split_huge_page(struct page *page, struct list_head *list, int order = folio_order(folio); unsigned int nr = 1 << order; + /* + * Reset any memcg data overlay in the tail pages. folio_nr_pages() + * is unreliable after this point. + */ +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif + /* complete memcg works before add pages to LRU */ split_page_memcg(head, order, new_order); diff --git a/mm/internal.h b/mm/internal.h index 1ac433d2092b..1cd977413859 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -725,8 +725,8 @@ static inline void folio_set_order(struct folio *folio, unsigned int order) return; folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order; -#ifdef CONFIG_64BIT - folio->_folio_nr_pages = 1U << order; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 1U << order; #endif } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 57f959af79c5..e1135dff9a86 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1174,8 +1174,12 @@ __always_inline bool free_pages_prepare(struct page *page, if (unlikely(order)) { int i; - if (compound) + if (compound) { page[1].flags &= ~PAGE_FLAGS_SECOND; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif + } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_page_prepare(page, page + i); diff --git a/mm/page_owner.c b/mm/page_owner.c index 2d6360eaccbb..a409e2561a8f 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -507,7 +507,7 @@ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret, rcu_read_lock(); memcg_data = READ_ONCE(page->memcg_data); - if (!memcg_data) + if (!memcg_data || PageTail(page)) goto out_unlock; if (memcg_data & MEMCG_DATA_OBJEXTS) From 4eeec8c89a0c4a8c20fb13a4e7093cc8efce383d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:57 +0100 Subject: [PATCH 1226/2157] mm: move hugetlb specific things in folio to page[3] Let's just move the hugetlb specific stuff to a separate page, and stop letting it overlay other fields for now. This frees up some space in page[2], which we will use on 32bit to free up some space in page[1]. While we could move these things to page[3] instead, it's cleaner to just move the hugetlb specific things out of the way and pack the core-folio stuff as tight as possible. ... and we can minimize the work required in dump_folio. We can now avoid re-initializing &folio->_deferred_list in hugetlb code. Hopefully dynamically allocating "strut folio" in the future will further clean this up. Link: https://lkml.kernel.org/r/20250303163014.1128035-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 27 +++++++++++++++++---------- mm/hugetlb.c | 1 - mm/page_alloc.c | 5 +++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f3b519fbeca1..727322ecbfdd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -406,6 +406,16 @@ struct folio { struct { unsigned long _flags_2; unsigned long _head_2; + /* public: */ + struct list_head _deferred_list; + /* private: the union with struct page is transitional */ + }; + struct page __page_2; + }; + union { + struct { + unsigned long _flags_3; + unsigned long _head_3; /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; @@ -413,14 +423,7 @@ struct folio { void *_hugetlb_hwpoison; /* private: the union with struct page is transitional */ }; - struct { - unsigned long _flags_2a; - unsigned long _head_2a; - /* public: */ - struct list_head _deferred_list; - /* private: the union with struct page is transitional */ - }; - struct page __page_2; + struct page __page_3; }; }; @@ -457,8 +460,12 @@ FOLIO_MATCH(_refcount, _refcount_1); offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(flags, _flags_2a); -FOLIO_MATCH(compound_head, _head_2a); +#undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 3 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_3); +FOLIO_MATCH(compound_head, _head_3); #undef FOLIO_MATCH /** diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a47a08e8526..438de55dd38d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1649,7 +1649,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, folio_ref_unfreeze(folio, 1); - INIT_LIST_HEAD(&folio->_deferred_list); hugetlb_free_folio(folio); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e1135dff9a86..735192222c36 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -971,6 +971,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) goto out; } break; + case 3: + /* the third tail page: hugetlb specifics overlap ->mappings */ + if (IS_ENABLED(CONFIG_HUGETLB_PAGE)) + break; + fallthrough; default: if (page->mapping != TAIL_MAPPING) { bad_page(page, "corrupted mapping in tail page"); From 31a31da8a6187f1e5448ec73222e01d7d3fed4aa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:58 +0100 Subject: [PATCH 1227/2157] mm: move _pincount in folio to page[2] on 32bit Let's free up some space on 32bit in page[1] by moving the _pincount to page[2]. For order-1 folios (never anon folios!) on 32bit, we will now also use the GUP_PIN_COUNTING_BIAS approach. A fully-mapped order-1 folio requires 2 references. With GUP_PIN_COUNTING_BIAS being 1024, we'd detect such folios as "maybe pinned" with 512 full mappings, instead of 1024 for order-0. As anon folios are out of the picture (which are the most relevant users of checking for pinnings on *mapped* pages) and we are talking about 32bit, this is not expected to cause any trouble. In __dump_page(), copy one additional folio page if we detect a folio with an order > 1, so we can dump the pincount on order > 1 folios reliably. Note that THPs on 32bit are not particularly common (and we don't care too much about performance), but we want to keep it working reliably, because likely we want to use large folios there as well in the future, independent of PMD leaf support. Once we dynamically allocate "struct folio", fortunately the 32bit specifics will likely go away again; even small folios could then have a pincount and folio_has_pincount() would essentially always return "true". Link: https://lkml.kernel.org/r/20250303163014.1128035-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 +++++++++-- include/linux/mm_types.h | 5 +++++ mm/debug.c | 10 +++++++++- mm/gup.c | 8 ++++---- mm/internal.h | 3 ++- mm/page_alloc.c | 14 +++++++++++--- 6 files changed, 40 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index c9c2ca345350..860082ba8978 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2010,6 +2010,13 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } +static inline bool folio_has_pincount(const struct folio *folio) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return folio_test_large(folio); + return folio_order(folio) > 1; +} + /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -2026,7 +2033,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) * get that many refcounts, and b) all the callers of this routine are * expected to be able to deal gracefully with a false positive. * - * For large folios, the result will be exactly correct. That's because + * For most large folios, the result will be exactly correct. That's because * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * @@ -2037,7 +2044,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) */ static inline bool folio_maybe_dma_pinned(struct folio *folio) { - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) return atomic_read(&folio->_pincount) > 0; /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 727322ecbfdd..3ea2019a1aac 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -387,7 +387,9 @@ struct folio { atomic_t _large_mapcount; atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; +#ifdef CONFIG_64BIT atomic_t _pincount; +#endif /* CONFIG_64BIT */ /* private: the union with struct page is transitional */ }; unsigned long _usable_1[4]; @@ -408,6 +410,9 @@ struct folio { unsigned long _head_2; /* public: */ struct list_head _deferred_list; +#ifndef CONFIG_64BIT + atomic_t _pincount; +#endif /* !CONFIG_64BIT */ /* private: the union with struct page is transitional */ }; struct page __page_2; diff --git a/mm/debug.c b/mm/debug.c index 2d1bd67d957b..83ef3bd0ccd3 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -79,12 +79,17 @@ static void __dump_folio(struct folio *folio, struct page *page, folio_ref_count(folio), mapcount, mapping, folio->index + idx, pfn); if (folio_test_large(folio)) { + int pincount = 0; + + if (folio_has_pincount(folio)) + pincount = atomic_read(&folio->_pincount); + pr_warn("head: order:%u mapcount:%d entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n", folio_order(folio), folio_mapcount(folio), folio_entire_mapcount(folio), folio_nr_pages_mapped(folio), - atomic_read(&folio->_pincount)); + pincount); } #ifdef CONFIG_MEMCG @@ -146,6 +151,9 @@ static void __dump_page(const struct page *page) if (idx < MAX_FOLIO_NR_PAGES) { memcpy(&folio, foliop, 2 * sizeof(struct page)); nr_pages = folio_nr_pages(&folio); + if (nr_pages > 1) + memcpy(&folio.__page_2, &foliop->__page_2, + sizeof(struct page)); foliop = &folio; } diff --git a/mm/gup.c b/mm/gup.c index e5040657870e..2944fe8cf317 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -109,7 +109,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) if (is_zero_folio(folio)) return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) atomic_sub(refs, &folio->_pincount); else refs *= GUP_PIN_COUNTING_BIAS; @@ -164,7 +164,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs, * Increment the normal page refcount field at least once, * so that the page really is pinned. */ - if (folio_test_large(folio)) { + if (folio_has_pincount(folio)) { folio_ref_add(folio, refs); atomic_add(refs, &folio->_pincount); } else { @@ -223,7 +223,7 @@ void folio_add_pin(struct folio *folio) * page refcount field at least once, so that the page really is * pinned. */ - if (folio_test_large(folio)) { + if (folio_has_pincount(folio)) { WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1); folio_ref_inc(folio); atomic_inc(&folio->_pincount); @@ -575,7 +575,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs, * is pinned. That's why the refcount from the earlier * try_get_folio() is left intact. */ - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) atomic_add(refs, &folio->_pincount); else folio_ref_add(folio, diff --git a/mm/internal.h b/mm/internal.h index 1cd977413859..2d44a4c9d282 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -764,7 +764,8 @@ static inline void prep_compound_head(struct page *page, unsigned int order) atomic_set(&folio->_large_mapcount, -1); atomic_set(&folio->_entire_mapcount, -1); atomic_set(&folio->_nr_pages_mapped, 0); - atomic_set(&folio->_pincount, 0); + if (IS_ENABLED(CONFIG_64BIT) || order > 1) + atomic_set(&folio->_pincount, 0); if (order > 1) INIT_LIST_HEAD(&folio->_deferred_list); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 735192222c36..2a9aa4439a66 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -959,9 +959,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "nonzero nr_pages_mapped"); goto out; } - if (unlikely(atomic_read(&folio->_pincount))) { - bad_page(page, "nonzero pincount"); - goto out; + if (IS_ENABLED(CONFIG_64BIT)) { + if (unlikely(atomic_read(&folio->_pincount))) { + bad_page(page, "nonzero pincount"); + goto out; + } } break; case 2: @@ -970,6 +972,12 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "on deferred list"); goto out; } + if (!IS_ENABLED(CONFIG_64BIT)) { + if (unlikely(atomic_read(&folio->_pincount))) { + bad_page(page, "nonzero pincount"); + goto out; + } + } break; case 3: /* the third tail page: hugetlb specifics overlap ->mappings */ From 845d2be6d41f016da670dcc4c8f5357c22172be8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:29:59 +0100 Subject: [PATCH 1228/2157] mm: move _entire_mapcount in folio to page[2] on 32bit Let's free up some space on 32bit in page[1] by moving the _pincount to page[2]. Ordinary folios only use the entire mapcount with PMD mappings, so order-1 folios don't apply. Similarly, hugetlb folios are always larger than order-1, turning the entire mapcount essentially unused for all order-1 folios. Moving it to order-1 folios will not change anything. On 32bit, simply check in folio_entire_mapcount() whether we have an order-1 folio, and return 0 in that case. Note that THPs on 32bit are not particularly common (and we don't care too much about performance), but we want to keep it working reliably, because likely we want to use large folios there as well in the future, independent of PMD leaf support. Once we dynamically allocate "struct folio", the 32bit specifics will go away again; even small folios could then have a pincount. Link: https://lkml.kernel.org/r/20250303163014.1128035-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ include/linux/mm_types.h | 3 ++- mm/internal.h | 5 +++-- mm/page_alloc.c | 12 ++++++++---- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 860082ba8978..f366c180f2b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1333,6 +1333,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline int folio_entire_mapcount(const struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == 1)) + return 0; return atomic_read(&folio->_entire_mapcount) + 1; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3ea2019a1aac..9499eb8e8e66 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -385,9 +385,9 @@ struct folio { struct { /* public: */ atomic_t _large_mapcount; - atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; #ifdef CONFIG_64BIT + atomic_t _entire_mapcount; atomic_t _pincount; #endif /* CONFIG_64BIT */ /* private: the union with struct page is transitional */ @@ -411,6 +411,7 @@ struct folio { /* public: */ struct list_head _deferred_list; #ifndef CONFIG_64BIT + atomic_t _entire_mapcount; atomic_t _pincount; #endif /* !CONFIG_64BIT */ /* private: the union with struct page is transitional */ diff --git a/mm/internal.h b/mm/internal.h index 2d44a4c9d282..fcf0aeae3934 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -762,10 +762,11 @@ static inline void prep_compound_head(struct page *page, unsigned int order) folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); - atomic_set(&folio->_entire_mapcount, -1); atomic_set(&folio->_nr_pages_mapped, 0); - if (IS_ENABLED(CONFIG_64BIT) || order > 1) + if (IS_ENABLED(CONFIG_64BIT) || order > 1) { atomic_set(&folio->_pincount, 0); + atomic_set(&folio->_entire_mapcount, -1); + } if (order > 1) INIT_LIST_HEAD(&folio->_deferred_list); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2a9aa4439a66..e456a43811fd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -947,10 +947,6 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) switch (page - head_page) { case 1: /* the first tail page: these may be in place of ->mapping */ - if (unlikely(folio_entire_mapcount(folio))) { - bad_page(page, "nonzero entire_mapcount"); - goto out; - } if (unlikely(folio_large_mapcount(folio))) { bad_page(page, "nonzero large_mapcount"); goto out; @@ -960,6 +956,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) goto out; } if (IS_ENABLED(CONFIG_64BIT)) { + if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) { + bad_page(page, "nonzero entire_mapcount"); + goto out; + } if (unlikely(atomic_read(&folio->_pincount))) { bad_page(page, "nonzero pincount"); goto out; @@ -973,6 +973,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) goto out; } if (!IS_ENABLED(CONFIG_64BIT)) { + if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) { + bad_page(page, "nonzero entire_mapcount"); + goto out; + } if (unlikely(atomic_read(&folio->_pincount))) { bad_page(page, "nonzero pincount"); goto out; From 405c4ef769c7c5e83e3556b48a190fe1afe82425 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:00 +0100 Subject: [PATCH 1229/2157] mm/rmap: pass dst_vma to folio_dup_file_rmap_pte() and friends We'll need access to the destination MM when modifying the large mapcount of a non-hugetlb large folios next. So pass in the destination VMA. Link: https://lkml.kernel.org/r/20250303163014.1128035-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/rmap.h | 42 +++++++++++++++++++++++++----------------- mm/huge_memory.c | 2 +- mm/memory.c | 10 +++++----- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6abf7960077a..e795610bade8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -335,7 +335,8 @@ static inline void hugetlb_remove_rmap(struct folio *folio) } static __always_inline void __folio_dup_file_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + enum rmap_level level) { const int orig_nr_pages = nr_pages; @@ -366,45 +367,47 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); } /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_pmd(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif } static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma, - enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, enum rmap_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -470,6 +473,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mappings are duplicated * * The page range of the folio is defined by [page, page + nr_pages) @@ -488,16 +492,18 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, - RMAP_LEVEL_PTE); + return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, + src_vma, RMAP_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, + return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, RMAP_LEVEL_PTE); } @@ -506,6 +512,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mapping is duplicated * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) @@ -524,11 +531,12 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, - RMAP_LEVEL_PMD); + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, + src_vma, RMAP_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2364990974ba..22e4e1194e9e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1782,7 +1782,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, src_folio = page_folio(src_page); folio_get(src_folio); - if (unlikely(folio_try_dup_anon_rmap_pmd(src_folio, src_page, src_vma))) { + if (unlikely(folio_try_dup_anon_rmap_pmd(src_folio, src_page, dst_vma, src_vma))) { /* Page maybe pinned: split and retry the fault on PTEs. */ folio_put(src_folio); pte_free(dst_mm, pgtable); diff --git a/mm/memory.c b/mm/memory.c index 8d1ea1dd6b52..f745f5e28f0c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -864,7 +864,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, folio_get(folio); rss[mm_counter(folio)]++; /* Cannot fail as these pages cannot get pinned. */ - folio_try_dup_anon_rmap_pte(folio, page, src_vma); + folio_try_dup_anon_rmap_pte(folio, page, dst_vma, src_vma); /* * We do not preserve soft-dirty information, because so @@ -1018,14 +1018,14 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma folio_ref_add(folio, nr); if (folio_test_anon(folio)) { if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page, - nr, src_vma))) { + nr, dst_vma, src_vma))) { folio_ref_sub(folio, nr); return -EAGAIN; } rss[MM_ANONPAGES] += nr; VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio); } else { - folio_dup_file_rmap_ptes(folio, page, nr); + folio_dup_file_rmap_ptes(folio, page, nr, dst_vma); rss[mm_counter_file(folio)] += nr; } if (any_writable) @@ -1043,7 +1043,7 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * guarantee the pinned page won't be randomly replaced in the * future. */ - if (unlikely(folio_try_dup_anon_rmap_pte(folio, page, src_vma))) { + if (unlikely(folio_try_dup_anon_rmap_pte(folio, page, dst_vma, src_vma))) { /* Page may be pinned, we have to copy. */ folio_put(folio); err = copy_present_page(dst_vma, src_vma, dst_pte, src_pte, @@ -1053,7 +1053,7 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma rss[MM_ANONPAGES]++; VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio); } else { - folio_dup_file_rmap_pte(folio, page); + folio_dup_file_rmap_pte(folio, page, dst_vma); rss[mm_counter_file(folio)]++; } From 1862a4af107ec8fc090f291fa9273c3f91c406a0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:01 +0100 Subject: [PATCH 1230/2157] mm/rmap: pass vma to __folio_add_rmap() We'll need access to the destination MM when modifying the mapcount large folios next. So pass in the VMA. Link: https://lkml.kernel.org/r/20250303163014.1128035-9-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- mm/rmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index bcec8677f68d..8a7d023b02e0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1242,8 +1242,8 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, } static __always_inline unsigned int __folio_add_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level, - int *nr_pmdmapped) + struct page *page, int nr_pages, struct vm_area_struct *vma, + enum rmap_level level, int *nr_pmdmapped) { atomic_t *mapped = &folio->_nr_pages_mapped; const int orig_nr_pages = nr_pages; @@ -1411,7 +1411,7 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); - nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); + nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped); if (likely(!folio_test_ksm(folio))) __page_check_anon_rmap(folio, page, vma, address); @@ -1582,7 +1582,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); - nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); + nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped); __folio_mod_stat(folio, nr, nr_pmdmapped); /* See comments in folio_add_anon_rmap_*() */ From 932961c4b666937e27f7ec5358fb7aabf0f17d41 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:02 +0100 Subject: [PATCH 1231/2157] mm/rmap: abstract large mapcount operations for large folios (!hugetlb) Let's abstract the operations so we can extend these operations easily. Link: https://lkml.kernel.org/r/20250303163014.1128035-10-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/rmap.h | 32 ++++++++++++++++++++++++++++---- mm/rmap.c | 14 ++++++-------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e795610bade8..d1e888cc97a5 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -173,6 +173,30 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, struct anon_vma *folio_get_anon_vma(const struct folio *folio); +static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, + struct vm_area_struct *vma) +{ + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); +} + +static inline void folio_add_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_add(diff, &folio->_large_mapcount); +} + +static inline void folio_sub_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_sub(diff, &folio->_large_mapcount); +} + +#define folio_inc_large_mapcount(folio, vma) \ + folio_add_large_mapcount(folio, 1, vma) +#define folio_dec_large_mapcount(folio, vma) \ + folio_sub_large_mapcount(folio, 1, vma) + /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -352,12 +376,12 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, do { atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } } @@ -451,7 +475,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(page); atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: @@ -461,7 +485,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } return 0; diff --git a/mm/rmap.c b/mm/rmap.c index 8a7d023b02e0..08846b7eced6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1266,7 +1266,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED) nr = first; - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, vma); break; case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: @@ -1290,7 +1290,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, nr = 0; } } - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, vma); break; } return nr; @@ -1556,14 +1556,12 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, SetPageAnonExclusive(page); } - /* increment count (starts at -1) */ - atomic_set(&folio->_large_mapcount, nr - 1); + folio_set_large_mapcount(folio, nr, vma); atomic_set(&folio->_nr_pages_mapped, nr); } else { /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); - /* increment count (starts at -1) */ - atomic_set(&folio->_large_mapcount, 0); + folio_set_large_mapcount(folio, 1, vma); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); if (exclusive) SetPageAnonExclusive(&folio->page); @@ -1665,7 +1663,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; } - atomic_sub(nr_pages, &folio->_large_mapcount); + folio_sub_large_mapcount(folio, nr_pages, vma); do { last += atomic_add_negative(-1, &page->_mapcount); } while (page++, --nr_pages > 0); @@ -1678,7 +1676,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: - atomic_dec(&folio->_large_mapcount); + folio_dec_large_mapcount(folio, vma); last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); From b85aa9b11b67ed4b086bf5366392adec1b4a6a81 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:03 +0100 Subject: [PATCH 1232/2157] bit_spinlock: __always_inline (un)lock functions The compiler might decide that it is a smart idea to not inline bit_spin_lock(), primarily when a couple of functions in the same file end up calling it. Especially when used in RMAP map/unmap code next, the compiler sometimes decides to not inline, which is then observable in some micro-benchmarks. Let's simply flag all lock/unlock functions as __always_inline; arch_test_and_set_bit_lock() and friends are already tagged like that (but not test_and_set_bit_lock() for some reason). If ever a problem, we could split it into a fast and a slow path, and only force the fast path to be inlined. But there is nothing particularly "big" here. Link: https://lkml.kernel.org/r/20250303163014.1128035-11-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/bit_spinlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index bbc4730a6505..c0989b5b0407 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -13,7 +13,7 @@ * Don't use this unless you really need to: spin_lock() and spin_unlock() * are significantly faster. */ -static inline void bit_spin_lock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr) { /* * Assuming the lock is uncontended, this never enters @@ -38,7 +38,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr) /* * Return true if it was acquired */ -static inline int bit_spin_trylock(int bitnum, unsigned long *addr) +static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) { preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -54,7 +54,7 @@ static inline int bit_spin_trylock(int bitnum, unsigned long *addr) /* * bit-based spin_unlock() */ -static inline void bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); @@ -71,7 +71,7 @@ static inline void bit_spin_unlock(int bitnum, unsigned long *addr) * non-atomic version, which can be used eg. if the bit lock itself is * protecting the rest of the flags in the word. */ -static inline void __bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); From 448854478ab2f4770f4e8b2bebff97c44e5bc54a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:04 +0100 Subject: [PATCH 1233/2157] mm/rmap: use folio_large_nr_pages() in add/remove functions Let's just use the "large" variant in code where we are sure that we have a large folio in our hands: this way we are sure that we don't perform any unnecessary "large" checks. While at it, convert the VM_BUG_ON_VMA to a VM_WARN_ON_ONCE. Maybe in the future there will not be a difference in that regard between large and small folios; in that case, unifying the handling again will be easy. E.g., folio_large_nr_pages() will simply translate to folio_nr_pages() until we replace all instances. Link: https://lkml.kernel.org/r/20250303163014.1128035-12-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Kirill A. Shutemov Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- mm/rmap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 08846b7eced6..c9922928616e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1274,7 +1274,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, if (first) { nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) { - nr_pages = folio_nr_pages(folio); + nr_pages = folio_large_nr_pages(folio); /* * We only track PMD mappings of PMD-sized * folios separately. @@ -1522,14 +1522,11 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, unsigned long address, rmap_t flags) { - const int nr = folio_nr_pages(folio); const bool exclusive = flags & RMAP_EXCLUSIVE; - int nr_pmdmapped = 0; + int nr = 1, nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio); - VM_BUG_ON_VMA(address < vma->vm_start || - address + (nr << PAGE_SHIFT) > vma->vm_end, vma); /* * VM_DROPPABLE mappings don't swap; instead they're just dropped when @@ -1547,6 +1544,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, } else if (!folio_test_pmd_mappable(folio)) { int i; + nr = folio_large_nr_pages(folio); for (i = 0; i < nr; i++) { struct page *page = folio_page(folio, i); @@ -1559,6 +1557,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, folio_set_large_mapcount(folio, nr, vma); atomic_set(&folio->_nr_pages_mapped, nr); } else { + nr = folio_large_nr_pages(folio); /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); folio_set_large_mapcount(folio, 1, vma); @@ -1568,6 +1567,9 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, nr_pmdmapped = nr; } + VM_WARN_ON_ONCE(address < vma->vm_start || + address + (nr << PAGE_SHIFT) > vma->vm_end); + __folio_mod_stat(folio, nr, nr_pmdmapped); mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1); } @@ -1681,7 +1683,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, if (last) { nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED)) { - nr_pages = folio_nr_pages(folio); + nr_pages = folio_large_nr_pages(folio); if (level == RMAP_LEVEL_PMD) nr_pmdmapped = nr_pages; nr = nr_pages - (nr & FOLIO_PAGES_MAPPED); From 6af8cb80d3a9a6bbd521d8a7c949b4eafb7dba5d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:05 +0100 Subject: [PATCH 1234/2157] mm/rmap: basic MM owner tracking for large folios (!hugetlb) For small folios, we traditionally use the mapcount to decide whether it was "certainly mapped exclusively" by a single MM (mapcount == 1) or whether it "maybe mapped shared" by multiple MMs (mapcount > 1). For PMD-sized folios that were PMD-mapped, we were able to use a similar mechanism (single PMD mapping), but for PTE-mapped folios and in the future folios that span multiple PMDs, this does not work. So we need a different mechanism to handle large folios. Let's add a new mechanism to detect whether a large folio is "certainly mapped exclusively", or whether it is "maybe mapped shared". We'll use this information next to optimize CoW reuse for PTE-mapped anonymous THP, and to convert folio_likely_mapped_shared() to folio_maybe_mapped_shared(), independent of per-page mapcounts. For each large folio, we'll have two slots, whereby a slot stores: (1) an MM id: unique id assigned to each MM (2) a per-MM mapcount If a slot is unoccupied, it can be taken by the next MM that maps folio page. In addition, we'll remember the current state -- "mapped exclusively" vs. "maybe mapped shared" -- and use a bit spinlock to sync on updates and to reduce the total number of atomic accesses on updates. In the future, it might be possible to squeeze a proper spinlock into "struct folio". For now, keep it simple, as we require the whole thing with THP only, that is incompatible with RT. As we have to squeeze this information into the "struct folio" of even folios of order-1 (2 pages), and we generally want to reduce the required metadata, we'll assign each MM a unique ID that can fit into an int. In total, we can squeeze everything into 4x int (2x long) on 64bit. 32bit support is a bit challenging, because we only have 2x long == 2x int in order-1 folios. But we can make it work for now, because we neither expect many MMs nor very large folios on 32bit. We will reliably detect folios as "mapped exclusively" vs. "mapped shared" as long as only two MMs map pages of a folio at one point in time -- for example with fork() and short-lived child processes, or with apps that hand over state from one instance to another. As soon as three MMs are involved at the same time, we might detect "maybe mapped shared" although the folio is "mapped exclusively". Example 1: (1) App1 faults in a (shmem/file-backed) folio page -> Tracked as MM0 (2) App2 faults in a folio page -> Tracked as MM1 (4) App1 unmaps all folio pages -> We will detect "mapped exclusively". Example 2: (1) App1 faults in a (shmem/file-backed) folio page -> Tracked as MM0 (2) App2 faults in a folio page -> Tracked as MM1 (3) App3 faults in a folio page -> No slot available, tracked as "unknown" (4) App1 and App2 unmap all folio pages -> We will detect "maybe mapped shared". Make use of __always_inline to keep possible performance degradation when (un)mapping large folios to a minimum. Note: by squeezing the two flags into the "unsigned long" that stores the MM ids, we can use non-atomic __bit_spin_unlock() and non-atomic setting/clearing of the "maybe mapped shared" bit, effectively not adding any new atomics on the hot path when updating the large mapcount + new metadata, which further helps reduce the runtime overhead in micro-benchmarks. Link: https://lkml.kernel.org/r/20250303163014.1128035-13-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- Documentation/mm/transhuge.rst | 8 ++ include/linux/mm_types.h | 49 ++++++++++ include/linux/page-flags.h | 4 + include/linux/rmap.h | 165 +++++++++++++++++++++++++++++++++ kernel/fork.c | 36 +++++++ mm/Kconfig | 4 + mm/internal.h | 5 + mm/page_alloc.c | 10 ++ 8 files changed, 281 insertions(+) diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index a2cd8800d527..baa17d718a76 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -120,11 +120,19 @@ pages: and also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes from -1 to 0 or 0 to -1. + We also maintain the two slots for tracking MM owners (MM ID and + corresponding mapcount), and the current status ("maybe mapped shared" vs. + "mapped exclusively"). + - map/unmap of individual pages with PTE entry increment/decrement page->_mapcount, increment/decrement folio->_large_mapcount and also increment/decrement folio->_nr_pages_mapped when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE. + We also maintain the two slots for tracking MM owners (MM ID and + corresponding mapcount), and the current status ("maybe mapped shared" vs. + "mapped exclusively"). + split_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page structures. It can be done easily for refcounts taken by page table diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9499eb8e8e66..aac7c87b04e1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -292,6 +292,44 @@ typedef struct { #define NR_PAGES_IN_LARGE_FOLIO #endif +/* + * On 32bit, we can cut the required metadata in half, because: + * (a) PID_MAX_LIMIT implicitly limits the number of MMs we could ever have, + * so we can limit MM IDs to 15 bit (32767). + * (b) We don't expect folios where even a single complete PTE mapping by + * one MM would exceed 15 bits (order-15). + */ +#ifdef CONFIG_64BIT +typedef int mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX INT_MAX +typedef unsigned int mm_id_t; +#else /* !CONFIG_64BIT */ +typedef short mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX SHRT_MAX +typedef unsigned short mm_id_t; +#endif /* CONFIG_64BIT */ + +/* We implicitly use the dummy ID for init-mm etc. where we never rmap pages. */ +#define MM_ID_DUMMY 0 +#define MM_ID_MIN (MM_ID_DUMMY + 1) + +/* + * We leave the highest bit of each MM id unused, so we can store a flag + * in the highest bit of each folio->_mm_id[]. + */ +#define MM_ID_BITS ((sizeof(mm_id_t) * BITS_PER_BYTE) - 1) +#define MM_ID_MASK ((1U << MM_ID_BITS) - 1) +#define MM_ID_MAX MM_ID_MASK + +/* + * In order to use bit_spin_lock(), which requires an unsigned long, we + * operate on folio->_mm_ids when working on flags. + */ +#define FOLIO_MM_IDS_LOCK_BITNUM MM_ID_BITS +#define FOLIO_MM_IDS_LOCK_BIT BIT(FOLIO_MM_IDS_LOCK_BITNUM) +#define FOLIO_MM_IDS_SHARED_BITNUM (2 * MM_ID_BITS + 1) +#define FOLIO_MM_IDS_SHARED_BIT BIT(FOLIO_MM_IDS_SHARED_BITNUM) + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -318,6 +356,9 @@ typedef struct { * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_nr_pages: Do not use directly, call folio_nr_pages(). + * @_mm_id: Do not use outside of rmap code. + * @_mm_ids: Do not use outside of rmap code. + * @_mm_id_mapcount: Do not use outside of rmap code. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -390,6 +431,11 @@ struct folio { atomic_t _entire_mapcount; atomic_t _pincount; #endif /* CONFIG_64BIT */ + mm_id_mapcount_t _mm_id_mapcount[2]; + union { + mm_id_t _mm_id[2]; + unsigned long _mm_ids; + }; /* private: the union with struct page is transitional */ }; unsigned long _usable_1[4]; @@ -1114,6 +1160,9 @@ struct mm_struct { #endif } lru_gen; #endif /* CONFIG_LRU_GEN_WALKS_MMU */ +#ifdef CONFIG_MM_ID + mm_id_t mm_id; +#endif /* CONFIG_MM_ID */ } __randomize_layout; /* diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index cab382bd965e..f26ce54c7aa4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1185,6 +1185,10 @@ static inline int folio_has_private(const struct folio *folio) return !!(folio->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio) +{ + return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); +} #undef PF_ANY #undef PF_HEAD #undef PF_NO_TAIL diff --git a/include/linux/rmap.h b/include/linux/rmap.h index d1e888cc97a5..c131b0efff0f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -13,6 +13,7 @@ #include #include #include +#include /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -173,6 +174,169 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, struct anon_vma *folio_get_anon_vma(const struct folio *folio); +#ifdef CONFIG_MM_ID +static __always_inline void folio_lock_large_mapcount(struct folio *folio) +{ + bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static __always_inline void folio_unlock_large_mapcount(struct folio *folio) +{ + __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static inline unsigned int folio_mm_id(const struct folio *folio, int idx) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + return folio->_mm_id[idx] & MM_ID_MASK; +} + +static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + folio->_mm_id[idx] &= ~MM_ID_MASK; + folio->_mm_id[idx] |= id; +} + +static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio, + int diff, mm_id_t mm_id) +{ + VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio)); + VM_WARN_ON_ONCE(diff <= 0); + VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX); + + /* + * Make sure we can detect at least one complete PTE mapping of the + * folio in a single MM as "exclusively mapped". This is primarily + * a check on 32bit, where we currently reduce the size of the per-MM + * mapcount to a short. + */ + VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio)); + VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY && + folio->_mm_id_mapcount[0] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY && + folio->_mm_id_mapcount[0] < 0); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY && + folio->_mm_id_mapcount[1] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && + folio->_mm_id_mapcount[1] < 0); + VM_WARN_ON_ONCE(!folio_mapped(folio) && + folio_test_large_maybe_mapped_shared(folio)); +} + +static __always_inline void folio_set_large_mapcount(struct folio *folio, + int mapcount, struct vm_area_struct *vma) +{ + __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); + + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); + folio->_mm_id_mapcount[0] = mapcount - 1; + folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); +} + +static __always_inline void folio_add_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * If a folio is mapped more than once into an MM on 32bit, we + * can in theory overflow the per-MM mapcount (although only for + * fairly large folios), turning it negative. In that case, just + * free up the slot and mark the folio "mapped shared", otherwise + * we might be in trouble when unmapping pages later. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) { + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) { + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 0, mm_id); + folio->_mm_id_mapcount[0] = diff - 1; + /* We might have other mappings already. */ + if (new_mapcount_val != diff - 1) + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 1, mm_id); + folio->_mm_id_mapcount[1] = diff - 1; + /* Slot 0 certainly has mappings as well. */ + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + folio_unlock_large_mapcount(folio); +} + +static __always_inline void folio_sub_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * There are valid corner cases where we might underflow a per-MM + * mapcount (some mappings added when no slot was free, some mappings + * added once a slot was free), so we always set it to -1 once we go + * negative. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] -= diff; + if (folio->_mm_id_mapcount[0] >= 0) + goto out; + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] -= diff; + if (folio->_mm_id_mapcount[1] >= 0) + goto out; + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + } + + /* + * If one MM slot owns all mappings, the folio is mapped exclusively. + * Note that if the folio is now unmapped (new_mapcount_val == -1), both + * slots must be free (mapcount == -1), and we'll also mark it as + * exclusive. + */ + if (folio->_mm_id_mapcount[0] == new_mapcount_val || + folio->_mm_id_mapcount[1] == new_mapcount_val) + folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; +out: + folio_unlock_large_mapcount(folio); +} +#else /* !CONFIG_MM_ID */ +/* + * See __folio_rmap_sanity_checks(), we might map large folios even without + * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now. + */ static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, struct vm_area_struct *vma) { @@ -191,6 +355,7 @@ static inline void folio_sub_large_mapcount(struct folio *folio, { atomic_sub(diff, &folio->_large_mapcount); } +#endif /* CONFIG_MM_ID */ #define folio_inc_large_mapcount(folio, vma) \ folio_add_large_mapcount(folio, 1, vma) diff --git a/kernel/fork.c b/kernel/fork.c index 364b2d4fd3ef..f9cf0f056eb6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -802,6 +802,36 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ +#ifdef CONFIG_MM_ID +static DEFINE_IDA(mm_ida); + +static inline int mm_alloc_id(struct mm_struct *mm) +{ + int ret; + + ret = ida_alloc_range(&mm_ida, MM_ID_MIN, MM_ID_MAX, GFP_KERNEL); + if (ret < 0) + return ret; + mm->mm_id = ret; + return 0; +} + +static inline void mm_free_id(struct mm_struct *mm) +{ + const mm_id_t id = mm->mm_id; + + mm->mm_id = MM_ID_DUMMY; + if (id == MM_ID_DUMMY) + return; + if (WARN_ON_ONCE(id < MM_ID_MIN || id > MM_ID_MAX)) + return; + ida_free(&mm_ida, id); +} +#else /* !CONFIG_MM_ID */ +static inline int mm_alloc_id(struct mm_struct *mm) { return 0; } +static inline void mm_free_id(struct mm_struct *mm) {} +#endif /* CONFIG_MM_ID */ + static void check_mm(struct mm_struct *mm) { int i; @@ -905,6 +935,7 @@ void __mmdrop(struct mm_struct *mm) WARN_ON_ONCE(mm == current->active_mm); mm_free_pgd(mm); + mm_free_id(mm); destroy_context(mm); mmu_notifier_subscriptions_destroy(mm); check_mm(mm); @@ -1289,6 +1320,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, if (mm_alloc_pgd(mm)) goto fail_nopgd; + if (mm_alloc_id(mm)) + goto fail_noid; + if (init_new_context(p, mm)) goto fail_nocontext; @@ -1308,6 +1342,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, fail_cid: destroy_context(mm); fail_nocontext: + mm_free_id(mm); +fail_noid: mm_free_pgd(mm); fail_nopgd: free_mm(mm); diff --git a/mm/Kconfig b/mm/Kconfig index 4c1640a197a0..a25c5476b3ad 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -815,11 +815,15 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_WANTS_THP_SWAP def_bool n +config MM_ID + def_bool n + menuconfig TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT select COMPACTION select XARRAY_MULTI + select MM_ID help Transparent Hugepages allows the kernel to use huge pages and huge tlb transparently to the applications whenever possible. diff --git a/mm/internal.h b/mm/internal.h index fcf0aeae3934..04724971379c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -763,6 +763,11 @@ static inline void prep_compound_head(struct page *page, unsigned int order) folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); atomic_set(&folio->_nr_pages_mapped, 0); + if (IS_ENABLED(CONFIG_MM_ID)) { + folio->_mm_ids = 0; + folio->_mm_id_mapcount[0] = -1; + folio->_mm_id_mapcount[1] = -1; + } if (IS_ENABLED(CONFIG_64BIT) || order > 1) { atomic_set(&folio->_pincount, 0); atomic_set(&folio->_entire_mapcount, -1); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e456a43811fd..c8daa3e64266 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -955,6 +955,16 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "nonzero nr_pages_mapped"); goto out; } + if (IS_ENABLED(CONFIG_MM_ID)) { + if (unlikely(folio->_mm_id_mapcount[0] != -1)) { + bad_page(page, "nonzero mm mapcount 0"); + goto out; + } + if (unlikely(folio->_mm_id_mapcount[1] != -1)) { + bad_page(page, "nonzero mm mapcount 1"); + goto out; + } + } if (IS_ENABLED(CONFIG_64BIT)) { if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) { bad_page(page, "nonzero entire_mapcount"); From 1da190f4d0a604ac919e63731441201bd08ef4ac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:06 +0100 Subject: [PATCH 1235/2157] mm: Copy-on-Write (COW) reuse support for PTE-mapped THP Currently, we never end up reusing PTE-mapped THPs after fork. This wasn't really a problem with PMD-sized THPs, because they would have to be PTE-mapped first, but it's getting a problem with smaller THP sizes that are effectively always PTE-mapped. With our new "mapped exclusively" vs "maybe mapped shared" logic for large folios, implementing CoW reuse for PTE-mapped THPs is straight forward: if exclusively mapped, make sure that all references are from these (our) mappings. Add some helpful comments to explain the details. CONFIG_TRANSPARENT_HUGEPAGE selects CONFIG_MM_ID. If we spot an anon large folio without CONFIG_TRANSPARENT_HUGEPAGE in that code, something is seriously messed up. There are plenty of things we can optimize in the future: For example, we could remember that the folio is fully exclusive so we could speedup the next fault further. Also, we could try "faulting around", turning surrounding PTEs that map the same folio writable. But especially the latter might increase COW latency, so it would need further investigation. Link: https://lkml.kernel.org/r/20250303163014.1128035-14-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- mm/memory.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index f745f5e28f0c..5d5a2d81f05a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3727,18 +3727,85 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio) return ret; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static bool __wp_can_reuse_large_anon_folio(struct folio *folio, + struct vm_area_struct *vma) +{ + bool exclusive = false; + + /* Let's just free up a large folio if only a single page is mapped. */ + if (folio_large_mapcount(folio) <= 1) + return false; + + /* + * The assumption for anonymous folios is that each page can only get + * mapped once into each MM. The only exception are KSM folios, which + * are always small. + * + * Each taken mapcount must be paired with exactly one taken reference, + * whereby the refcount must be incremented before the mapcount when + * mapping a page, and the refcount must be decremented after the + * mapcount when unmapping a page. + * + * If all folio references are from mappings, and all mappings are in + * the page tables of this MM, then this folio is exclusive to this MM. + */ + if (folio_test_large_maybe_mapped_shared(folio)) + return false; + + VM_WARN_ON_ONCE(folio_test_ksm(folio)); + VM_WARN_ON_ONCE(folio_mapcount(folio) > folio_nr_pages(folio)); + VM_WARN_ON_ONCE(folio_entire_mapcount(folio)); + + if (unlikely(folio_test_swapcache(folio))) { + /* + * Note: freeing up the swapcache will fail if some PTEs are + * still swap entries. + */ + if (!folio_trylock(folio)) + return false; + folio_free_swap(folio); + folio_unlock(folio); + } + + if (folio_large_mapcount(folio) != folio_ref_count(folio)) + return false; + + /* Stabilize the mapcount vs. refcount and recheck. */ + folio_lock_large_mapcount(folio); + VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio)); + + if (folio_test_large_maybe_mapped_shared(folio)) + goto unlock; + if (folio_large_mapcount(folio) != folio_ref_count(folio)) + goto unlock; + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id && + folio_mm_id(folio, 1) != vma->vm_mm->mm_id); + + /* + * Do we need the folio lock? Likely not. If there would have been + * references from page migration/swapout, we would have detected + * an additional folio reference and never ended up here. + */ + exclusive = true; +unlock: + folio_unlock_large_mapcount(folio); + return exclusive; +} +#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ +static bool __wp_can_reuse_large_anon_folio(struct folio *folio, + struct vm_area_struct *vma) +{ + BUILD_BUG(); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static bool wp_can_reuse_anon_folio(struct folio *folio, struct vm_area_struct *vma) { - /* - * We could currently only reuse a subpage of a large folio if no - * other subpages of the large folios are still mapped. However, - * let's just consistently not reuse subpages even if we could - * reuse in that scenario, and give back a large folio a bit - * sooner. - */ - if (folio_test_large(folio)) - return false; + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && folio_test_large(folio)) + return __wp_can_reuse_large_anon_folio(folio, vma); /* * We have to verify under folio lock: these early checks are From 003fde4492c88ac3a1fee3d97b3834a679780af3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:07 +0100 Subject: [PATCH 1236/2157] mm: convert folio_likely_mapped_shared() to folio_maybe_mapped_shared() Let's reuse our new MM ownership tracking infrastructure for large folios to make folio_likely_mapped_shared() never return false negatives -- never indicating "not mapped shared" although the folio *is* mapped shared. With that, we can rename it to folio_maybe_mapped_shared() and get rid of the dependency on the mapcount of the first folio page. The semantics are now arguably clearer: no mixture of "false negatives" and "false positives", only the remaining possibility for "false positives". Thoroughly document the new semantics. We might now detect that a large folio is "maybe mapped shared" although it *no longer* is -- but once was. Now, if more than two MMs mapped a folio at the same time, and the MM mapping the folio exclusively at the end is not one tracked in the two folio MM slots, we will detect the folio as "maybe mapped shared". For anonymous folios, usually (except weird corner cases) all PTEs that target a "maybe mapped shared" folio are R/O. As soon as a child process would write to them (iow, actively use them), we would CoW and effectively replace these PTEs. Most cases (below) are not expected to really matter with large anonymous folios for this reason. Most importantly, there will be no change at all for: * small folios * hugetlb folios * PMD-mapped PMD-sized THPs (single mapping) This change has the potential to affect existing callers of folio_likely_mapped_shared() -> folio_maybe_mapped_shared(): (1) fs/proc/task_mmu.c: no change (hugetlb) (2) khugepaged counts PTEs that target shared folios towards max_ptes_shared (default: HPAGE_PMD_NR / 2), meaning we could skip a collapse where we would have previously collapsed. This only applies to anonymous folios and is not expected to matter in practice. Worth noting that this change sorts out case (A) documented in commit 1bafe96e89f0 ("mm/khugepaged: replace page_mapcount() check by folio_likely_mapped_shared()") by removing the possibility for "false negatives". (3) MADV_COLD / MADV_PAGEOUT / MADV_FREE will not try splitting PTE-mapped THPs that are considered shared but not fully covered by the requested range, consequently not processing them. PMD-mapped PMD-sized THP are not affected, or when all PTEs are covered. These functions are usually only called on anon/file folios that are exclusively mapped most of the time (no other file mappings or no fork()), so the "false negatives" are not expected to matter in practice. (4) mbind() / migrate_pages() / move_pages() will refuse to migrate shared folios unless MPOL_MF_MOVE_ALL is effective (requires CAP_SYS_NICE). We will now reject some folios that could be migrated. Similar to (3), especially with MPOL_MF_MOVE_ALL, so this is not expected to matter in practice. Note that cpuset_migrate_mm_workfn() calls do_migrate_pages() with MPOL_MF_MOVE_ALL. (5) NUMA hinting mm/migrate.c:migrate_misplaced_folio_prepare() will skip file folios that are probably shared libraries (-> "mapped shared" and executable). This check would have detected it as a shared library at some point (at least 3 MMs mapping it), so detecting it afterwards does not sound wrong (still a shared library). Not expected to matter. mm/memory.c:numa_migrate_check() will indicate TNF_SHARED in MAP_SHARED file mappings when encountering a shared folio. Similar reasoning, not expected to matter. mm/mprotect.c:change_pte_range() will skip folios detected as shared in CoW mappings. Similarly, this is not expected to matter in practice, but if it would ever be a problem we could relax that check a bit (e.g., basing it on the average page-mapcount in a folio), because it was only an optimization when many (e.g., 288) processes were mapping the same folios -- see commit 859d4adc3415 ("mm: numa: do not trap faults on shared data section pages.") (6) mm/rmap.c:folio_referenced_one() will skip exclusive swapbacked folios in dying processes. Applies to anonymous folios only. Without "false negatives", we'll now skip all actually shared ones. Skipping ones that are actually exclusive won't really matter, it's a pure optimization, and is not expected to matter in practice. In theory, one can detect the problematic scenario: folio_mapcount() > 0 and no folio MM slot is occupied ("state unknown"). One could reset the MM slots while doing an rmap walk, which migration / folio split already do when setting everything up. Further, when batching PTEs we might naturally learn about a owner (e.g., folio_mapcount() == nr_ptes) and could update the owner. However, we'll defer that until the scenarios where it would really matter are clear. Link: https://lkml.kernel.org/r/20250303163014.1128035-15-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 4 ++-- include/linux/mm.h | 43 ++++++++++++++++++++++--------------------- mm/huge_memory.c | 2 +- mm/khugepaged.c | 8 +++----- mm/madvise.c | 6 +++--- mm/memory.c | 2 +- mm/mempolicy.c | 8 ++++---- mm/migrate.c | 7 +++---- mm/mprotect.c | 2 +- mm/rmap.c | 2 +- 10 files changed, 41 insertions(+), 43 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b0f189815512..d811b24db65b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1023,7 +1023,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (folio) { /* We treat non-present entries as "maybe shared". */ - if (!present || folio_likely_mapped_shared(folio) || + if (!present || folio_maybe_mapped_shared(folio) || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else @@ -1882,7 +1882,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, if (!folio_test_anon(folio)) flags |= PM_FILE; - if (!folio_likely_mapped_shared(folio) && + if (!folio_maybe_mapped_shared(folio) && !hugetlb_pmd_shared(ptep)) flags |= PM_MMAP_EXCLUSIVE; diff --git a/include/linux/mm.h b/include/linux/mm.h index f366c180f2b6..82776b409391 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2251,23 +2251,18 @@ static inline size_t folio_size(const struct folio *folio) } /** - * folio_likely_mapped_shared - Estimate if the folio is mapped into the page - * tables of more than one MM + * folio_maybe_mapped_shared - Whether the folio is mapped into the page + * tables of more than one MM * @folio: The folio. * - * This function checks if the folio is currently mapped into more than one - * MM ("mapped shared"), or if the folio is only mapped into a single MM - * ("mapped exclusively"). + * This function checks if the folio maybe currently mapped into more than one + * MM ("maybe mapped shared"), or if the folio is certainly mapped into a single + * MM ("mapped exclusively"). * * For KSM folios, this function also returns "mapped shared" when a folio is * mapped multiple times into the same MM, because the individual page mappings * are independent. * - * As precise information is not easily available for all folios, this function - * estimates the number of MMs ("sharers") that are currently mapping a folio - * using the number of times the first page of the folio is currently mapped - * into page tables. - * * For small anonymous folios and anonymous hugetlb folios, the return * value will be exactly correct: non-KSM folios can only be mapped at most once * into an MM, and they cannot be partially mapped. KSM folios are @@ -2275,8 +2270,8 @@ static inline size_t folio_size(const struct folio *folio) * * For other folios, the result can be fuzzy: * #. For partially-mappable large folios (THP), the return value can wrongly - * indicate "mapped exclusively" (false negative) when the folio is - * only partially mapped into at least one MM. + * indicate "mapped shared" (false positive) if a folio was mapped by + * more than two MMs at one point in time. * #. For pagecache folios (including hugetlb), the return value can wrongly * indicate "mapped shared" (false positive) when two VMAs in the same MM * cover the same file range. @@ -2293,7 +2288,7 @@ static inline size_t folio_size(const struct folio *folio) * * Return: Whether the folio is estimated to be mapped into more than one MM. */ -static inline bool folio_likely_mapped_shared(struct folio *folio) +static inline bool folio_maybe_mapped_shared(struct folio *folio) { int mapcount = folio_mapcount(folio); @@ -2301,16 +2296,22 @@ static inline bool folio_likely_mapped_shared(struct folio *folio) if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio))) return mapcount > 1; - /* A single mapping implies "mapped exclusively". */ - if (mapcount <= 1) - return false; - - /* If any page is mapped more than once we treat it "mapped shared". */ - if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio)) + /* + * vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ... + * simply assume "mapped shared", nobody should really care + * about this for arbitrary kernel allocations. + */ + if (!IS_ENABLED(CONFIG_MM_ID)) return true; - /* Let's guess based on the first subpage. */ - return atomic_read(&folio->_mapcount) > 0; + /* + * A single mapping implies "mapped exclusively", even if the + * folio flag says something different: it's easier to handle this + * case here instead of on the RMAP hot path. + */ + if (mapcount <= 1) + return false; + return folio_test_large_maybe_mapped_shared(folio); } #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 22e4e1194e9e..7433369d5d1f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2155,7 +2155,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * If other processes are mapping this folio, we couldn't discard * the folio unless they all do MADV_FREE so let's skip the folio. */ - if (folio_likely_mapped_shared(folio)) + if (folio_maybe_mapped_shared(folio)) goto out; if (!folio_trylock(folio)) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 5f0be134141e..cc945c6ab3bd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -607,7 +607,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON_FOLIO(!folio_test_anon(folio), folio); /* See hpage_collapse_scan_pmd(). */ - if (folio_likely_mapped_shared(folio)) { + if (folio_maybe_mapped_shared(folio)) { ++shared; if (cc->is_khugepaged && shared > khugepaged_max_ptes_shared) { @@ -1359,11 +1359,9 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, /* * We treat a single page as shared if any part of the THP - * is shared. "False negatives" from - * folio_likely_mapped_shared() are not expected to matter - * much in practice. + * is shared. */ - if (folio_likely_mapped_shared(folio)) { + if (folio_maybe_mapped_shared(folio)) { ++shared; if (cc->is_khugepaged && shared > khugepaged_max_ptes_shared) { diff --git a/mm/madvise.c b/mm/madvise.c index e01e93e179a8..388dc289b5d1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -387,7 +387,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, folio = pmd_folio(orig_pmd); /* Do not interfere with other mappings of this folio */ - if (folio_likely_mapped_shared(folio)) + if (folio_maybe_mapped_shared(folio)) goto huge_unlock; if (pageout_anon_only_filter && !folio_test_anon(folio)) @@ -486,7 +486,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (nr < folio_nr_pages(folio)) { int err; - if (folio_likely_mapped_shared(folio)) + if (folio_maybe_mapped_shared(folio)) continue; if (pageout_anon_only_filter && !folio_test_anon(folio)) continue; @@ -721,7 +721,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (nr < folio_nr_pages(folio)) { int err; - if (folio_likely_mapped_shared(folio)) + if (folio_maybe_mapped_shared(folio)) continue; if (!folio_trylock(folio)) continue; diff --git a/mm/memory.c b/mm/memory.c index 5d5a2d81f05a..8873b7a4962c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5698,7 +5698,7 @@ int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, * Flag if the folio is shared between multiple address spaces. This * is later used when determining whether to group tasks together */ - if (folio_likely_mapped_shared(folio) && (vma->vm_flags & VM_SHARED)) + if (folio_maybe_mapped_shared(folio) && (vma->vm_flags & VM_SHARED)) *flags |= TNF_SHARED; /* * For memory tiering mode, cpupid of slow memory page is used diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bbaadbeeb291..530e71fe9147 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -642,11 +642,11 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * - * See folio_likely_mapped_shared() on possible imprecision when we + * See folio_maybe_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ if ((flags & MPOL_MF_MOVE_ALL) || - (!folio_likely_mapped_shared(folio) && !hugetlb_pmd_shared(pte))) + (!folio_maybe_mapped_shared(folio) && !hugetlb_pmd_shared(pte))) if (!folio_isolate_hugetlb(folio, qp->pagelist)) qp->nr_failed++; unlock: @@ -1033,10 +1033,10 @@ static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio. * Choosing not to migrate a shared folio is not counted as a failure. * - * See folio_likely_mapped_shared() on possible imprecision when we + * See folio_maybe_mapped_shared() on possible imprecision when we * cannot easily detect if a folio is shared. */ - if ((flags & MPOL_MF_MOVE_ALL) || !folio_likely_mapped_shared(folio)) { + if ((flags & MPOL_MF_MOVE_ALL) || !folio_maybe_mapped_shared(folio)) { if (folio_isolate_lru(folio)) { list_add_tail(&folio->lru, foliolist); node_stat_mod_folio(folio, diff --git a/mm/migrate.c b/mm/migrate.c index a991d3691bda..c0adea67cd62 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2226,7 +2226,7 @@ static int __add_folio_for_migration(struct folio *folio, int node, if (folio_nid(folio) == node) return 0; - if (folio_likely_mapped_shared(folio) && !migrate_all) + if (folio_maybe_mapped_shared(folio) && !migrate_all) return -EACCES; if (folio_test_hugetlb(folio)) { @@ -2651,11 +2651,10 @@ int migrate_misplaced_folio_prepare(struct folio *folio, * processes with execute permissions as they are probably * shared libraries. * - * See folio_likely_mapped_shared() on possible imprecision + * See folio_maybe_mapped_shared() on possible imprecision * when we cannot easily detect if a folio is shared. */ - if ((vma->vm_flags & VM_EXEC) && - folio_likely_mapped_shared(folio)) + if ((vma->vm_flags & VM_EXEC) && folio_maybe_mapped_shared(folio)) return -EACCES; /* diff --git a/mm/mprotect.c b/mm/mprotect.c index 1444878f7aeb..62c1f7945741 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -133,7 +133,7 @@ static long change_pte_range(struct mmu_gather *tlb, /* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && (folio_maybe_dma_pinned(folio) || - folio_likely_mapped_shared(folio))) + folio_maybe_mapped_shared(folio))) continue; /* diff --git a/mm/rmap.c b/mm/rmap.c index c9922928616e..8de415157bc8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -889,7 +889,7 @@ static bool folio_referenced_one(struct folio *folio, if ((!atomic_read(&vma->vm_mm->mm_users) || check_stable_address_space(vma->vm_mm)) && folio_test_anon(folio) && folio_test_swapbacked(folio) && - !folio_likely_mapped_shared(folio)) { + !folio_maybe_mapped_shared(folio)) { pra->referenced = -1; page_vma_mapped_walk_done(&pvmw); return false; From e63ee43e3edaacfca04454b34aee8d5e303953df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:08 +0100 Subject: [PATCH 1237/2157] mm: CONFIG_NO_PAGE_MAPCOUNT to prepare for not maintain per-page mapcounts in large folios We're close to the finishing line: let's introduce a new CONFIG_NO_PAGE_MAPCOUNT config option where we will incrementally remove any dependencies on per-page mapcounts in large folios. Once that's done, we'll stop maintaining the per-page mapcounts with this config option enabled. CONFIG_NO_PAGE_MAPCOUNT will be EXPERIMENTAL for now, as we'll have to learn about some of the real world impact of some of the implications. As writing "!CONFIG_NO_PAGE_MAPCOUNT" is really nasty, let's introduce a helper config option "CONFIG_PAGE_MAPCOUNT" that expresses the negation. Link: https://lkml.kernel.org/r/20250303163014.1128035-16-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- mm/Kconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mm/Kconfig b/mm/Kconfig index a25c5476b3ad..4a4e7b63d30a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -887,8 +887,25 @@ config READ_ONLY_THP_FOR_FS support of file THPs will be developed in the next few release cycles. +config NO_PAGE_MAPCOUNT + bool "No per-page mapcount (EXPERIMENTAL)" + help + Do not maintain per-page mapcounts for pages part of larger + allocations, such as transparent huge pages. + + When this config option is enabled, some interfaces that relied on + this information will rely on less-precise per-allocation information + instead: for example, using the average per-page mapcount in such + a large allocation instead of the per-page mapcount. + + EXPERIMENTAL because the impact of some changes is still unclear. + endif # TRANSPARENT_HUGEPAGE +# simple helper to make the code a bit easier to read +config PAGE_MAPCOUNT + def_bool !NO_PAGE_MAPCOUNT + # # The architecture supports pgtable leaves that is larger than PAGE_SIZE # From ae4192b7691cbd18aa6e286f4ccaf5ab574fc9cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:09 +0100 Subject: [PATCH 1238/2157] fs/proc/page: remove per-page mapcount dependency for /proc/kpagecount (CONFIG_NO_PAGE_MAPCOUNT) Let's implement an alternative when per-page mapcounts in large folios are no longer maintained -- soon with CONFIG_NO_PAGE_MAPCOUNT. For large folios, we'll return the per-page average mapcount within the folio, whereby we round to the closest integer when calculating the average: however, we'll always return at least 1 if the folio is mapped. So assuming a folio with 512 pages, the average would be: * 0 if not pages are mapped * 1 if there are 1 .. 767 per-page mappings * 2 if there are 767 .. 1279 per-page mappings ... For hugetlb folios and for large folios that are fully mapped into all address spaces, there is no change. We'll make use of this helper in other context next. As an alternative, we could simply return 0 for non-hugetlb large folios, or disable this legacy interface with CONFIG_NO_PAGE_MAPCOUNT. But the information exposed by this interface can still be valuable, and frequently we deal with fully-mapped large folios where the average corresponds to the actual page mapcount. So we'll leave it like this for now and document the new behavior. Note: this interface is likely not very relevant for performance. If ever required, we could try doing a rather expensive rmap walk to collect precisely how often this folio page is mapped. Link: https://lkml.kernel.org/r/20250303163014.1128035-17-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/pagemap.rst | 7 ++++- fs/proc/internal.h | 35 ++++++++++++++++++++++++ fs/proc/page.c | 11 ++++++-- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index a297e824f990..d6647daca912 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -43,7 +43,12 @@ There are four components to pagemap: skip over unmapped regions. * ``/proc/kpagecount``. This file contains a 64-bit count of the number of - times each page is mapped, indexed by PFN. + times each page is mapped, indexed by PFN. Some kernel configurations do + not track the precise number of times a page part of a larger allocation + (e.g., THP) is mapped. In these configurations, the average number of + mappings per page in this larger allocation is returned instead. However, + if any page of the large allocation is mapped, the returned value will + be at least 1. The page-types tool in the tools/mm directory can be used to query the number of times a page is mapped. diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 77a517f91821..5f5271852b53 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -188,6 +188,41 @@ static inline int folio_precise_page_mapcount(struct folio *folio, return mapcount; } +/** + * folio_average_page_mapcount() - Average number of mappings per page in this + * folio + * @folio: The folio. + * + * The average number of user page table entries that reference each page in + * this folio as tracked via the RMAP: either referenced directly (PTE) or + * as part of a larger area that covers this page (e.g., PMD). + * + * The average is calculated by rounding to the nearest integer; however, + * to avoid duplicated code in current callers, the average is at least + * 1 if any page of the folio is mapped. + * + * Returns: The average number of mappings per page in this folio. + */ +static inline int folio_average_page_mapcount(struct folio *folio) +{ + int mapcount, entire_mapcount, avg; + + if (!folio_test_large(folio)) + return atomic_read(&folio->_mapcount) + 1; + + mapcount = folio_large_mapcount(folio); + if (unlikely(mapcount <= 0)) + return 0; + entire_mapcount = folio_entire_mapcount(folio); + if (mapcount <= entire_mapcount) + return entire_mapcount; + mapcount -= entire_mapcount; + + /* Round to closest integer ... */ + avg = ((unsigned int)mapcount + folio_large_nr_pages(folio) / 2) >> folio_large_order(folio); + /* ... but return at least 1. */ + return max_t(int, avg + entire_mapcount, 1); +} /* * array.c */ diff --git a/fs/proc/page.c b/fs/proc/page.c index a55f5acefa97..23fc771100ae 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,9 +67,14 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, * memmaps that were actually initialized. */ page = pfn_to_online_page(pfn); - if (page) - mapcount = folio_precise_page_mapcount(page_folio(page), - page); + if (page) { + struct folio *folio = page_folio(page); + + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + mapcount = folio_precise_page_mapcount(folio, page); + else + mapcount = folio_average_page_mapcount(folio); + } if (put_user(mapcount, out)) { ret = -EFAULT; From eb16876971ea8f26c5bf839120ff308246d9cc7b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:10 +0100 Subject: [PATCH 1239/2157] fs/proc/task_mmu: remove per-page mapcount dependency for PM_MMAP_EXCLUSIVE (CONFIG_NO_PAGE_MAPCOUNT) Let's implement an alternative when per-page mapcounts in large folios are no longer maintained -- soon with CONFIG_NO_PAGE_MAPCOUNT. PM_MMAP_EXCLUSIVE will now be set if folio_likely_mapped_shared() is true -- when the folio is considered "mapped shared", including when it once was "mapped shared" but no longer is, as documented. This might result in and under-indication of "exclusively mapped", which is considered better than over-indicating it: under-estimating the USS (Unique Set Size) is better than over-estimating it. As an alternative, we could simply remove that flag with CONFIG_NO_PAGE_MAPCOUNT completely, but there might be value to it. So, let's keep it like that and document the behavior. Link: https://lkml.kernel.org/r/20250303163014.1128035-18-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/pagemap.rst | 11 +++++++++++ fs/proc/task_mmu.c | 11 +++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index d6647daca912..afce291649dd 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -38,6 +38,17 @@ There are four components to pagemap: precisely which pages are mapped (or in swap) and comparing mapped pages between processes. + Traditionally, bit 56 indicates that a page is mapped exactly once and bit + 56 is clear when a page is mapped multiple times, even when mapped in the + same process multiple times. In some kernel configurations, the semantics + for pages part of a larger allocation (e.g., THP) can differ: bit 56 is set + if all pages part of the corresponding large allocation are *certainly* + mapped in the same process, even if the page is mapped multiple times in that + process. Bit 56 is clear when any page page of the larger allocation + is *maybe* mapped in a different process. In some cases, a large allocation + might be treated as "maybe mapped by multiple processes" even though this + is no longer the case. + Efficient users of this interface will use ``/proc/pid/maps`` to determine which areas of memory are actually mapped and llseek to skip over unmapped regions. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index d811b24db65b..8192cbe4f356 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1652,6 +1652,13 @@ static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm) return 0; } +static bool __folio_page_mapped_exclusively(struct folio *folio, struct page *page) +{ + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + return folio_precise_page_mapcount(folio, page) == 1; + return !folio_maybe_mapped_shared(folio); +} + static int pagemap_pte_hole(unsigned long start, unsigned long end, __always_unused int depth, struct mm_walk *walk) { @@ -1742,7 +1749,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (!folio_test_anon(folio)) flags |= PM_FILE; if ((flags & PM_PRESENT) && - folio_precise_page_mapcount(folio, page) == 1) + __folio_page_mapped_exclusively(folio, page)) flags |= PM_MMAP_EXCLUSIVE; } if (vma->vm_flags & VM_SOFTDIRTY) @@ -1817,7 +1824,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, pagemap_entry_t pme; if (folio && (flags & PM_PRESENT) && - folio_precise_page_mapcount(folio, page + idx) == 1) + __folio_page_mapped_exclusively(folio, page)) cur_flags |= PM_MMAP_EXCLUSIVE; pme = make_pme(frame, cur_flags); From 7a34ae14491e03b64c6002abd23a8920144ae7d8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:11 +0100 Subject: [PATCH 1240/2157] fs/proc/task_mmu: remove per-page mapcount dependency for "mapmax" (CONFIG_NO_PAGE_MAPCOUNT) Let's implement an alternative when per-page mapcounts in large folios are no longer maintained -- soon with CONFIG_NO_PAGE_MAPCOUNT. For calculating "mapmax", we now use the average per-page mapcount in a large folio instead of the per-page mapcount. For hugetlb folios and folios that are not partially mapped into MMs, there is no change. Likely, this change will not matter much in practice, and an alternative might be to simple remove this stat with CONFIG_NO_PAGE_MAPCOUNT. However, there might be value to it, so let's keep it like that and document the behavior. Link: https://lkml.kernel.org/r/20250303163014.1128035-19-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 5 +++++ fs/proc/task_mmu.c | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 09f0aed5a08b..1aa190017f79 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -686,6 +686,11 @@ Where: node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page size, in KB, that is backing the mapping up. +Note that some kernel configurations do not track the precise number of times +a page part of a larger allocation (e.g., THP) is mapped. In these +configurations, "mapmax" might corresponds to the average number of mappings +per page in such a larger allocation instead. + 1.2 Kernel data --------------- diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8192cbe4f356..dc4f819d1549 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2863,7 +2863,12 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { struct folio *folio = page_folio(page); - int count = folio_precise_page_mapcount(folio, page); + int count; + + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + count = folio_precise_page_mapcount(folio, page); + else + count = folio_average_page_mapcount(folio); md->pages += nr_pages; if (pte_dirty || folio_test_dirty(folio)) From 6dd55dd1c55553f42605ebf0bdc8def5f2fd9309 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:12 +0100 Subject: [PATCH 1241/2157] fs/proc/task_mmu: remove per-page mapcount dependency for smaps/smaps_rollup (CONFIG_NO_PAGE_MAPCOUNT) Let's implement an alternative when per-page mapcounts in large folios are no longer maintained -- soon with CONFIG_NO_PAGE_MAPCOUNT. When computing the output for smaps / smaps_rollups, in particular when calculating the USS (Unique Set Size) and the PSS (Proportional Set Size), we still rely on per-page mapcounts. To determine private vs. shared, we'll use folio_likely_mapped_shared(), similar to how we handle PM_MMAP_EXCLUSIVE. Similarly, we might now under-estimate the USS and count pages towards "shared" that are actually "private" ("exclusively mapped"). When calculating the PSS, we'll now also use the average per-page mapcount for large folios: this can result in both, an over-estimation and an under-estimation of the PSS. The difference is not expected to matter much in practice, but we'll have to learn as we go. We can now provide folio_precise_page_mapcount() only with CONFIG_PAGE_MAPCOUNT, and remove one of the last users of per-page mapcounts when CONFIG_NO_PAGE_MAPCOUNT is enabled. Document the new behavior. Link: https://lkml.kernel.org/r/20250303163014.1128035-20-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 22 +++++++++++++++++++--- fs/proc/internal.h | 8 ++++++++ fs/proc/task_mmu.c | 17 +++++++++++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 1aa190017f79..c9e62e8e0685 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -502,9 +502,25 @@ process, its PSS will be 1500. "Pss_Dirty" is the portion of PSS which consists of dirty pages. ("Pss_Clean" is not included, but it can be calculated by subtracting "Pss_Dirty" from "Pss".) -Note that even a page which is part of a MAP_SHARED mapping, but has only -a single pte mapped, i.e. is currently used by only one process, is accounted -as private and not as shared. +Traditionally, a page is accounted as "private" if it is mapped exactly once, +and a page is accounted as "shared" when mapped multiple times, even when +mapped in the same process multiple times. Note that this accounting is +independent of MAP_SHARED. + +In some kernel configurations, the semantics of pages part of a larger +allocation (e.g., THP) can differ: a page is accounted as "private" if all +pages part of the corresponding large allocation are *certainly* mapped in the +same process, even if the page is mapped multiple times in that process. A +page is accounted as "shared" if any page page of the larger allocation +is *maybe* mapped in a different process. In some cases, a large allocation +might be treated as "maybe mapped by multiple processes" even though this +is no longer the case. + +Some kernel configurations do not track the precise number of times a page part +of a larger allocation is mapped. In this case, when calculating the PSS, the +average number of mappings per page in this larger allocation might be used +as an approximation for the number of mappings of a page. The PSS calculation +will be imprecise in this case. "Referenced" indicates the amount of memory currently marked as referenced or accessed. diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5f5271852b53..96122e91c645 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -157,6 +157,7 @@ unsigned name_to_int(const struct qstr *qstr); /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 +#ifdef CONFIG_PAGE_MAPCOUNT /** * folio_precise_page_mapcount() - Number of mappings of this folio page. * @folio: The folio. @@ -187,6 +188,13 @@ static inline int folio_precise_page_mapcount(struct folio *folio, return mapcount; } +#else /* !CONFIG_PAGE_MAPCOUNT */ +static inline int folio_precise_page_mapcount(struct folio *folio, + struct page *page) +{ + BUILD_BUG(); +} +#endif /* CONFIG_PAGE_MAPCOUNT */ /** * folio_average_page_mapcount() - Average number of mappings per page in this diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dc4f819d1549..994cde10e3f4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -707,6 +707,8 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, struct folio *folio = page_folio(page); int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; + bool exclusive; + int mapcount; /* * First accumulate quantities that depend only on |size| and the type @@ -747,18 +749,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, dirty, locked, present); return; } + + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { + mapcount = folio_average_page_mapcount(folio); + exclusive = !folio_maybe_mapped_shared(folio); + } + /* * We obtain a snapshot of the mapcount. Without holding the folio lock * this snapshot can be slightly wrong as we cannot always read the * mapcount atomically. */ for (i = 0; i < nr; i++, page++) { - int mapcount = folio_precise_page_mapcount(folio, page); unsigned long pss = PAGE_SIZE << PSS_SHIFT; + + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { + mapcount = folio_precise_page_mapcount(folio, page); + exclusive = mapcount < 2; + } + if (mapcount >= 2) pss /= mapcount; smaps_page_accumulate(mss, folio, PAGE_SIZE, pss, - dirty, locked, mapcount < 2); + dirty, locked, exclusive); } } From 749492229e3bd6222dda7267b8244135229d1fd8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Mar 2025 17:30:13 +0100 Subject: [PATCH 1242/2157] mm: stop maintaining the per-page mapcount of large folios (CONFIG_NO_PAGE_MAPCOUNT) Everything is in place to stop using the per-page mapcounts in large folios: the mapcount of tail pages will always be logically 0 (-1 value), just like it currently is for hugetlb folios already, and the page mapcount of the head page is either 0 (-1 value) or contains a page type (e.g., hugetlb). Maintaining _nr_pages_mapped without per-page mapcounts is impossible, so that one also has to go with CONFIG_NO_PAGE_MAPCOUNT. There are two remaining implications: (1) Per-node, per-cgroup and per-lruvec stats of "NR_ANON_MAPPED" ("mapped anonymous memory") and "NR_FILE_MAPPED" ("mapped file memory"): As soon as any page of the folio is mapped -- folio_mapped() -- we now account the complete folio as mapped. Once the last page is unmapped -- !folio_mapped() -- we account the complete folio as unmapped. This implies that ... * "AnonPages" and "Mapped" in /proc/meminfo and /sys/devices/system/node/*/meminfo * cgroup v2: "anon" and "file_mapped" in "memory.stat" and "memory.numa_stat" * cgroup v1: "rss" and "mapped_file" in "memory.stat" and "memory.numa_stat ... can now appear higher than before. But note that these folios do consume that memory, simply not all pages are actually currently mapped. It's worth nothing that other accounting in the kernel (esp. cgroup charging on allocation) is not affected by this change. [why oh why is "anon" called "rss" in cgroup v1] (2) Detecting partial mappings Detecting whether anon THPs are partially mapped gets a bit more unreliable. As long as a single MM maps such a large folio ("exclusively mapped"), we can reliably detect it. Especially before fork() / after a short-lived child process quit, we will detect partial mappings reliably, which is the common case. In essence, if the average per-page mapcount in an anon THP is < 1, we know for sure that we have a partial mapping. However, as soon as multiple MMs are involved, we might miss detecting partial mappings: this might be relevant with long-lived child processes. If we have a fully-mapped anon folio before fork(), once our child processes and our parent all unmap (zap/COW) the same pages (but not the complete folio), we might not detect the partial mapping. However, once the child processes quit we would detect the partial mapping. How relevant this case is in practice remains to be seen. Swapout/migration will likely mitigate this. In the future, RMAP walkers could check for that for that case (e.g., when collecting access bits during reclaim) and simply flag them for deferred-splitting. Link: https://lkml.kernel.org/r/20250303163014.1128035-21-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andy Lutomirks^H^Hski Cc: Borislav Betkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcow (Oracle) Cc: Michal Koutn Cc: Muchun Song Cc: tejun heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Zefan Li Signed-off-by: Andrew Morton --- .../admin-guide/cgroup-v1/memory.rst | 4 + Documentation/admin-guide/cgroup-v2.rst | 10 ++- Documentation/filesystems/proc.rst | 10 ++- Documentation/mm/transhuge.rst | 31 +++++-- include/linux/rmap.h | 35 ++++++-- mm/internal.h | 5 +- mm/page_alloc.c | 3 +- mm/rmap.c | 80 +++++++++++++++++-- 8 files changed, 150 insertions(+), 28 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 286d16fc22eb..53cf081b22e8 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -609,6 +609,10 @@ memory.stat file includes following statistics: 'rss + mapped_file" will give you resident set size of cgroup. + Note that some kernel configurations might account complete larger + allocations (e.g., THP) towards 'rss' and 'mapped_file', even if + only some, but not all that memory is mapped. + (Note: file and shmem may be shared among other cgroups. In that case, mapped_file is accounted only when the memory cgroup is owner of page cache.) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index cb1b4e759b7e..f8a894a16307 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1440,7 +1440,10 @@ The following nested keys are defined. anon Amount of memory used in anonymous mappings such as - brk(), sbrk(), and mmap(MAP_ANONYMOUS) + brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that + some kernel configurations might account complete larger + allocations (e.g., THP) if only some, but not all the + memory of such an allocation is mapped anymore. file Amount of memory used to cache filesystem data, @@ -1483,7 +1486,10 @@ The following nested keys are defined. Amount of application memory swapped out to zswap. file_mapped - Amount of cached filesystem data mapped with mmap() + Amount of cached filesystem data mapped with mmap(). Note + that some kernel configurations might account complete + larger allocations (e.g., THP) if only some, but not + not all the memory of such an allocation is mapped. file_dirty Amount of cached filesystem data that was modified but diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index c9e62e8e0685..3c37b248fc4f 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1153,9 +1153,15 @@ Dirty Writeback Memory which is actively being written back to the disk AnonPages - Non-file backed pages mapped into userspace page tables + Non-file backed pages mapped into userspace page tables. Note that + some kernel configurations might consider all pages part of a + larger allocation (e.g., THP) as "mapped", as soon as a single + page is mapped. Mapped - files which have been mmapped, such as libraries + files which have been mmapped, such as libraries. Note that some + kernel configurations might consider all pages part of a larger + allocation (e.g., THP) as "mapped", as soon as a single page is + mapped. Shmem Total memory used by shared memory (shmem) and tmpfs KReclaimable diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst index baa17d718a76..0e7f8e4cd2e3 100644 --- a/Documentation/mm/transhuge.rst +++ b/Documentation/mm/transhuge.rst @@ -116,23 +116,28 @@ pages: succeeds on tail pages. - map/unmap of a PMD entry for the whole THP increment/decrement - folio->_entire_mapcount, increment/decrement folio->_large_mapcount - and also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED - when _entire_mapcount goes from -1 to 0 or 0 to -1. + folio->_entire_mapcount and folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). + With CONFIG_PAGE_MAPCOUNT, we also increment/decrement + folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes + from -1 to 0 or 0 to -1. + - map/unmap of individual pages with PTE entry increment/decrement - page->_mapcount, increment/decrement folio->_large_mapcount and also - increment/decrement folio->_nr_pages_mapped when page->_mapcount goes - from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE. + folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). + With CONFIG_PAGE_MAPCOUNT, we also increment/decrement + page->_mapcount and increment/decrement folio->_nr_pages_mapped when + page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number + of pages mapped by PTE. + split_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page structures. It can be done easily for refcounts taken by page table @@ -159,8 +164,8 @@ clear where references should go after split: it will stay on the head page. Note that split_huge_pmd() doesn't have any limitations on refcounting: pmd can be split at any point and never fails. -Partial unmap and deferred_split_folio() -======================================== +Partial unmap and deferred_split_folio() (anon THP only) +======================================================== Unmapping part of THP (with munmap() or other way) is not going to free memory immediately. Instead, we detect that a subpage of THP is not in use @@ -175,3 +180,13 @@ a THP crosses a VMA boundary. The function deferred_split_folio() is used to queue a folio for splitting. The splitting itself will happen when we get memory pressure via shrinker interface. + +With CONFIG_PAGE_MAPCOUNT, we reliably detect partial mappings based on +folio->_nr_pages_mapped. + +With CONFIG_NO_PAGE_MAPCOUNT, we detect partial mappings based on the +average per-page mapcount in a THP: if the average is < 1, an anon THP is +certainly partially mapped. As long as only a single process maps a THP, +this detection is reliable. With long-running child processes, there can +be scenarios where partial mappings can currently not be detected, and +might need asynchronous detection during memory reclaim in the future. diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c131b0efff0f..6b82b618846e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -240,7 +240,7 @@ static __always_inline void folio_set_large_mapcount(struct folio *folio, folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); } -static __always_inline void folio_add_large_mapcount(struct folio *folio, +static __always_inline int folio_add_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { const mm_id_t mm_id = vma->vm_mm->mm_id; @@ -286,9 +286,11 @@ static __always_inline void folio_add_large_mapcount(struct folio *folio, folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; } folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; } +#define folio_add_large_mapcount folio_add_return_large_mapcount -static __always_inline void folio_sub_large_mapcount(struct folio *folio, +static __always_inline int folio_sub_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { const mm_id_t mm_id = vma->vm_mm->mm_id; @@ -331,7 +333,9 @@ static __always_inline void folio_sub_large_mapcount(struct folio *folio, folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; out: folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; } +#define folio_sub_large_mapcount folio_sub_return_large_mapcount #else /* !CONFIG_MM_ID */ /* * See __folio_rmap_sanity_checks(), we might map large folios even without @@ -350,17 +354,33 @@ static inline void folio_add_large_mapcount(struct folio *folio, atomic_add(diff, &folio->_large_mapcount); } +static inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} + static inline void folio_sub_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { atomic_sub(diff, &folio->_large_mapcount); } + +static inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} #endif /* CONFIG_MM_ID */ #define folio_inc_large_mapcount(folio, vma) \ folio_add_large_mapcount(folio, 1, vma) +#define folio_inc_return_large_mapcount(folio, vma) \ + folio_add_return_large_mapcount(folio, 1, vma) #define folio_dec_large_mapcount(folio, vma) \ folio_sub_large_mapcount(folio, 1, vma) +#define folio_dec_return_large_mapcount(folio, vma) \ + folio_sub_return_large_mapcount(folio, 1, vma) /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -538,9 +558,11 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, break; } - do { - atomic_inc(&page->_mapcount); - } while (page++, --nr_pages > 0); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + } folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: @@ -638,7 +660,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; diff --git a/mm/internal.h b/mm/internal.h index 04724971379c..558c8e2a3d94 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -125,6 +125,8 @@ void page_writeback_init(void); */ static inline int folio_nr_pages_mapped(const struct folio *folio) { + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) + return -1; return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; } @@ -762,7 +764,8 @@ static inline void prep_compound_head(struct page *page, unsigned int order) folio_set_order(folio, order); atomic_set(&folio->_large_mapcount, -1); - atomic_set(&folio->_nr_pages_mapped, 0); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_set(&folio->_nr_pages_mapped, 0); if (IS_ENABLED(CONFIG_MM_ID)) { folio->_mm_ids = 0; folio->_mm_id_mapcount[0] = -1; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c8daa3e64266..2c6ae7e5aaad 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -951,7 +951,8 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "nonzero large_mapcount"); goto out; } - if (unlikely(atomic_read(&folio->_nr_pages_mapped))) { + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT) && + unlikely(atomic_read(&folio->_nr_pages_mapped))) { bad_page(page, "nonzero nr_pages_mapped"); goto out; } diff --git a/mm/rmap.c b/mm/rmap.c index 8de415157bc8..67bb273dfb80 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1258,6 +1258,16 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, break; } + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { + nr = folio_add_return_large_mapcount(folio, orig_nr_pages, vma); + if (nr == orig_nr_pages) + /* Was completely unmapped. */ + nr = folio_large_nr_pages(folio); + else + nr = 0; + break; + } + do { first += atomic_inc_and_test(&page->_mapcount); } while (page++, --nr_pages > 0); @@ -1271,6 +1281,18 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: first = atomic_inc_and_test(&folio->_entire_mapcount); + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { + if (level == RMAP_LEVEL_PMD && first) + *nr_pmdmapped = folio_large_nr_pages(folio); + nr = folio_inc_return_large_mapcount(folio, vma); + if (nr == 1) + /* Was completely unmapped. */ + nr = folio_large_nr_pages(folio); + else + nr = 0; + break; + } + if (first) { nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) { @@ -1436,13 +1458,23 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, break; } } + + VM_WARN_ON_FOLIO(!folio_test_large(folio) && PageAnonExclusive(page) && + atomic_read(&folio->_mapcount) > 0, folio); for (i = 0; i < nr_pages; i++) { struct page *cur_page = page + i; - /* While PTE-mapping a THP we have a PMD and a PTE mapping. */ - VM_WARN_ON_FOLIO((atomic_read(&cur_page->_mapcount) > 0 || - (folio_test_large(folio) && - folio_entire_mapcount(folio) > 1)) && + VM_WARN_ON_FOLIO(folio_test_large(folio) && + folio_entire_mapcount(folio) > 1 && + PageAnonExclusive(cur_page), folio); + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) + continue; + + /* + * While PTE-mapping a THP we have a PMD and a PTE + * mapping. + */ + VM_WARN_ON_FOLIO(atomic_read(&cur_page->_mapcount) > 0 && PageAnonExclusive(cur_page), folio); } @@ -1548,20 +1580,23 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, for (i = 0; i < nr; i++) { struct page *page = folio_page(folio, i); - /* increment count (starts at -1) */ - atomic_set(&page->_mapcount, 0); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + /* increment count (starts at -1) */ + atomic_set(&page->_mapcount, 0); if (exclusive) SetPageAnonExclusive(page); } folio_set_large_mapcount(folio, nr, vma); - atomic_set(&folio->_nr_pages_mapped, nr); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_set(&folio->_nr_pages_mapped, nr); } else { nr = folio_large_nr_pages(folio); /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); folio_set_large_mapcount(folio, 1, vma); - atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); if (exclusive) SetPageAnonExclusive(&folio->page); nr_pmdmapped = nr; @@ -1665,6 +1700,19 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; } + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { + nr = folio_sub_return_large_mapcount(folio, nr_pages, vma); + if (!nr) { + /* Now completely unmapped. */ + nr = folio_nr_pages(folio); + } else { + partially_mapped = nr < folio_large_nr_pages(folio) && + !folio_entire_mapcount(folio); + nr = 0; + } + break; + } + folio_sub_large_mapcount(folio, nr_pages, vma); do { last += atomic_add_negative(-1, &page->_mapcount); @@ -1678,6 +1726,22 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; case RMAP_LEVEL_PMD: case RMAP_LEVEL_PUD: + if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { + last = atomic_add_negative(-1, &folio->_entire_mapcount); + if (level == RMAP_LEVEL_PMD && last) + nr_pmdmapped = folio_large_nr_pages(folio); + nr = folio_dec_return_large_mapcount(folio, vma); + if (!nr) { + /* Now completely unmapped. */ + nr = folio_large_nr_pages(folio); + } else { + partially_mapped = last && + nr < folio_large_nr_pages(folio); + nr = 0; + } + break; + } + folio_dec_large_mapcount(folio, vma); last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { From ab71d2d301211a45420b1fb085072c79ea6a8027 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:26 -0800 Subject: [PATCH 1243/2157] mm/damon/sysfs-schemes: let damon_sysfs_scheme_set_filters() be used for different named directories Patch series "mm/damon: add sysfs dirs for managing DAMOS filters based on handling layers". DAMOS filters are categorized into two groups based on their handling layers, namely core and operations layers. The categorization affects when each filter is evaluated. Core layer handled filters are evaluated first. The order meant nothing before, but introduction of allow filters changed that. DAMOS sysfs interface provides single directory for filters, namely 'filters'. Users can install any filters in any order there. DAMON will internally categorize those into core and operations layer handled ones, and apply the evaluation order rule. The ordering rule is clearly documented. But the interface could still confuse users since it is allowed to install filters on the directory in mixed ways. Add two sysfs directories for managing filters by handling layers, namely 'core_filters' and 'ops_filters' for filters that handled by core and operations layer, respectively. Those are avoided to be used for installing filters that not handled by the assumed layers. For backward compatibility, keep 'filters' directory with its curernt behavior. Filters installed in the directory will be added to DAMON after those of 'core_filters' and 'ops_filters' directories, with the automatic categorizations. Also recommend users to use the new directories while noticing 'filters' directory could be deprecated in future on the usage documents. Note that new directories provide all features that were provided with 'filters', but just in a more clear way. Deprecating 'filters' in future will hence not make an irreversal feature loss. This patch (of 8): damon_sysfs_scheme_set_filters() is using a hard-coded directory name, "filters". Refactor for general named directories of same files hierarchy, to use from upcoming changes for adding sibling directories having files same to those of "filters", and named as "core_filters" and "ops_filters". [arnd@arndb.deL avoid Wformat-security warning] Link: https://lkml.kernel.org/r/20250310135142.4176976-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20250305222733.59089-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250305222733.59089-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Arnd Bergmann Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 66a1c46cee84..d769b24f90a4 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1604,7 +1604,9 @@ static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) return err; } -static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme) +static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme, + const char *name, + struct damon_sysfs_scheme_filters **filters_ptr) { struct damon_sysfs_scheme_filters *filters = damon_sysfs_scheme_filters_alloc(); @@ -1614,11 +1616,11 @@ static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme) return -ENOMEM; err = kobject_init_and_add(&filters->kobj, &damon_sysfs_scheme_filters_ktype, &scheme->kobj, - "filters"); + "%s", name); if (err) kobject_put(&filters->kobj); else - scheme->filters = filters; + *filters_ptr = filters; return err; } @@ -1670,7 +1672,8 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) err = damon_sysfs_scheme_set_watermarks(scheme); if (err) goto put_quotas_access_pattern_out; - err = damon_sysfs_scheme_set_filters(scheme); + err = damon_sysfs_scheme_set_filters(scheme, "filters", + &scheme->filters); if (err) goto put_watermarks_quotas_access_pattern_out; err = damon_sysfs_scheme_set_stats(scheme); From db2e76ceb40b85f5d6302b05f40dd99955b938f4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:27 -0800 Subject: [PATCH 1244/2157] mm/damon/sysfs-schemes: implement core_filters and ops_filters directories Implement two DAMOS sysfs directories for managing core and operations layer handled filters separately. Those are named as 'core_filters' and 'ops_filters', and have files hierarchy same to 'filters'. This commit is only populating and cleaning up the directories, not really connecting the files with DAMON. Following changes will make the connections. Link: https://lkml.kernel.org/r/20250305222733.59089-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index d769b24f90a4..30a7f288bd4c 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1504,6 +1504,8 @@ struct damon_sysfs_scheme { unsigned long apply_interval_us; struct damon_sysfs_quotas *quotas; struct damon_sysfs_watermarks *watermarks; + struct damon_sysfs_scheme_filters *core_filters; + struct damon_sysfs_scheme_filters *ops_filters; struct damon_sysfs_scheme_filters *filters; struct damon_sysfs_stats *stats; struct damon_sysfs_scheme_regions *tried_regions; @@ -1624,6 +1626,33 @@ static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme, return err; } +static int damos_sysfs_set_filter_dirs(struct damon_sysfs_scheme *scheme) +{ + int err; + + err = damon_sysfs_scheme_set_filters(scheme, "filters", + &scheme->filters); + if (err) + return err; + err = damon_sysfs_scheme_set_filters(scheme, "core_filters", + &scheme->core_filters); + if (err) + goto put_filters_out; + err = damon_sysfs_scheme_set_filters(scheme, "ops_filters", + &scheme->ops_filters); + if (err) + goto put_core_filters_out; + return 0; + +put_core_filters_out: + kobject_put(&scheme->core_filters->kobj); + scheme->core_filters = NULL; +put_filters_out: + kobject_put(&scheme->filters->kobj); + scheme->filters = NULL; + return err; +} + static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) { struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); @@ -1672,8 +1701,7 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) err = damon_sysfs_scheme_set_watermarks(scheme); if (err) goto put_quotas_access_pattern_out; - err = damon_sysfs_scheme_set_filters(scheme, "filters", - &scheme->filters); + err = damos_sysfs_set_filter_dirs(scheme); if (err) goto put_watermarks_quotas_access_pattern_out; err = damon_sysfs_scheme_set_stats(scheme); @@ -1688,6 +1716,10 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) kobject_put(&scheme->tried_regions->kobj); scheme->tried_regions = NULL; put_filters_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->ops_filters->kobj); + scheme->ops_filters = NULL; + kobject_put(&scheme->core_filters->kobj); + scheme->core_filters = NULL; kobject_put(&scheme->filters->kobj); scheme->filters = NULL; put_watermarks_quotas_access_pattern_out: @@ -1711,6 +1743,10 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) kobject_put(&scheme->watermarks->kobj); damon_sysfs_scheme_filters_rm_dirs(scheme->filters); kobject_put(&scheme->filters->kobj); + damon_sysfs_scheme_filters_rm_dirs(scheme->core_filters); + kobject_put(&scheme->core_filters->kobj); + damon_sysfs_scheme_filters_rm_dirs(scheme->ops_filters); + kobject_put(&scheme->ops_filters->kobj); kobject_put(&scheme->stats->kobj); damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions); kobject_put(&scheme->tried_regions->kobj); From 968cbea1bb0e4f608f4c87a3c7d67ef9fd720c33 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:28 -0800 Subject: [PATCH 1245/2157] mm/damon/sysfs-schemes: commit filters in {core,ops}_filters directories Connect user inputs for files under core_filters and ops_filters with DAMON, so that the files can really function. Becasuse {core,ops}_filters are easier to be managed in terms of expecting filters evaluation order, add filters in {core,ops}_filters before 'filters' directory. Link: https://lkml.kernel.org/r/20250305222733.59089-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 30a7f288bd4c..65c6091cd879 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2143,8 +2143,6 @@ static struct damos *damon_sysfs_mk_scheme( struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; - struct damon_sysfs_scheme_filters *sysfs_filters = - sysfs_scheme->filters; struct damos *scheme; int err; @@ -2184,7 +2182,17 @@ static struct damos *damon_sysfs_mk_scheme( return NULL; } - err = damon_sysfs_add_scheme_filters(scheme, sysfs_filters); + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->core_filters); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->ops_filters); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->filters); if (err) { damon_destroy_scheme(scheme); return NULL; From f7f0d88b7d6da29d2b073740aa130685f0e18609 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:29 -0800 Subject: [PATCH 1246/2157] mm/damon/core: expose damos_filter_for_ops() to DAMON kernel API callers damos_filter_for_ops() can be useful to avoid putting wrong type of filters in wrong place. Make it be exposed to DAMON kernel API callers. Link: https://lkml.kernel.org/r/20250305222733.59089-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 + mm/damon/core.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 52559475dbe7..eed008b64a23 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -894,6 +894,7 @@ void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching, bool allow); void damos_add_filter(struct damos *s, struct damos_filter *f); +bool damos_filter_for_ops(enum damos_filter_type type); void damos_destroy_filter(struct damos_filter *f); struct damos_quota_goal *damos_new_quota_goal( diff --git a/mm/damon/core.c b/mm/damon/core.c index 511c464adcc5..ebbb22840435 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -281,7 +281,14 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type, return filter; } -static bool damos_filter_for_ops(enum damos_filter_type type) +/** + * damos_filter_for_ops() - Return if the filter is ops-hndled one. + * @type: type of the filter. + * + * Return: true if the filter of @type needs to be handled by ops layer, false + * otherwise. + */ +bool damos_filter_for_ops(enum damos_filter_type type) { switch (type) { case DAMOS_FILTER_TYPE_ADDR: From 9f643a9854df682548bbcdf68cbd89e74cbfd6dc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:30 -0800 Subject: [PATCH 1247/2157] mm/damon/sysfs-schemes: record filters of which layer should be added to the given filters directory Unlike their name and assumed purposes, {core,ops}_filters DAMOS sysfs directories are allowing installing any type of filters. As a first step for preventing such wrong installments, add information about filters that handled by what layer should the installed to the given filters directory in the DAMOS sysfs internal data structures. Link: https://lkml.kernel.org/r/20250305222733.59089-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 46 +++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 65c6091cd879..e4e36c34ec0e 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -309,8 +309,18 @@ static const struct kobj_type damon_sysfs_stats_ktype = { * filter directory */ +/* + * enum damos_sysfs_filter_handle_layer - Layers handling filters of a dir. + */ +enum damos_sysfs_filter_handle_layer { + DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH, +}; + struct damon_sysfs_scheme_filter { struct kobject kobj; + enum damos_sysfs_filter_handle_layer handle_layer; enum damos_filter_type type; bool matching; bool allow; @@ -320,9 +330,15 @@ struct damon_sysfs_scheme_filter { int target_idx; }; -static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc(void) +static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc( + enum damos_sysfs_filter_handle_layer layer) { - return kzalloc(sizeof(struct damon_sysfs_scheme_filter), GFP_KERNEL); + struct damon_sysfs_scheme_filter *filter; + + filter = kzalloc(sizeof(struct damon_sysfs_scheme_filter), GFP_KERNEL); + if (filter) + filter->handle_layer = layer; + return filter; } /* Should match with enum damos_filter_type */ @@ -595,14 +611,20 @@ static const struct kobj_type damon_sysfs_scheme_filter_ktype = { struct damon_sysfs_scheme_filters { struct kobject kobj; + enum damos_sysfs_filter_handle_layer handle_layer; struct damon_sysfs_scheme_filter **filters_arr; int nr; }; static struct damon_sysfs_scheme_filters * -damon_sysfs_scheme_filters_alloc(void) +damon_sysfs_scheme_filters_alloc(enum damos_sysfs_filter_handle_layer layer) { - return kzalloc(sizeof(struct damon_sysfs_scheme_filters), GFP_KERNEL); + struct damon_sysfs_scheme_filters *filters; + + filters = kzalloc(sizeof(struct damon_sysfs_scheme_filters), GFP_KERNEL); + if (filters) + filters->handle_layer = layer; + return filters; } static void damon_sysfs_scheme_filters_rm_dirs( @@ -635,7 +657,8 @@ static int damon_sysfs_scheme_filters_add_dirs( filters->filters_arr = filters_arr; for (i = 0; i < nr_filters; i++) { - filter = damon_sysfs_scheme_filter_alloc(); + filter = damon_sysfs_scheme_filter_alloc( + filters->handle_layer); if (!filter) { damon_sysfs_scheme_filters_rm_dirs(filters); return -ENOMEM; @@ -1607,11 +1630,11 @@ static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) } static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme, - const char *name, + enum damos_sysfs_filter_handle_layer layer, const char *name, struct damon_sysfs_scheme_filters **filters_ptr) { struct damon_sysfs_scheme_filters *filters = - damon_sysfs_scheme_filters_alloc(); + damon_sysfs_scheme_filters_alloc(layer); int err; if (!filters) @@ -1630,15 +1653,18 @@ static int damos_sysfs_set_filter_dirs(struct damon_sysfs_scheme *scheme) { int err; - err = damon_sysfs_scheme_set_filters(scheme, "filters", + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH, "filters", &scheme->filters); if (err) return err; - err = damon_sysfs_scheme_set_filters(scheme, "core_filters", + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE, "core_filters", &scheme->core_filters); if (err) goto put_filters_out; - err = damon_sysfs_scheme_set_filters(scheme, "ops_filters", + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS, "ops_filters", &scheme->ops_filters); if (err) goto put_core_filters_out; From ae8fd5b6666b0d99f485748b2b2259de1a7458dc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:31 -0800 Subject: [PATCH 1248/2157] mm/damon/sysfs-schemes: return error when for attempts to install filters on wrong sysfs directory Return error if the user tries to install a DAMOS filter on DAMOS filters sysfs directory that assumed to be used for filters that handled by a DAMON layer that not same to that for the installing filter. Link: https://lkml.kernel.org/r/20250305222733.59089-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index e4e36c34ec0e..1895d2d2c295 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -362,6 +362,23 @@ static ssize_t type_show(struct kobject *kobj, damon_sysfs_scheme_filter_type_strs[filter->type]); } +static bool damos_sysfs_scheme_filter_valid_type( + enum damos_sysfs_filter_handle_layer layer, + enum damos_filter_type type) +{ + switch (layer) { + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH: + return true; + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE: + return !damos_filter_for_ops(type); + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS: + return damos_filter_for_ops(type); + default: + break; + } + return false; +} + static ssize_t type_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -373,6 +390,9 @@ static ssize_t type_store(struct kobject *kobj, for (type = 0; type < NR_DAMOS_FILTER_TYPES; type++) { if (sysfs_streq(buf, damon_sysfs_scheme_filter_type_strs[ type])) { + if (!damos_sysfs_scheme_filter_valid_type( + filter->handle_layer, type)) + break; filter->type = type; ret = count; break; From 899e4c14afa640b8f7374e162f2336b67f07123d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:32 -0800 Subject: [PATCH 1249/2157] Docs/ABI/damon: document {core,ops}_filters directories Document the new DAMOS filters sysfs directories on ABI doc. Link: https://lkml.kernel.org/r/20250305222733.59089-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-kernel-mm-damon | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index 76da77d7f7b6..293197f180ad 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -409,6 +409,22 @@ Description: Writing 'Y' or 'N' to this file sets whether to allow or reject applying the scheme's action to the memory that satisfies the 'type' and the 'matching' of the directory. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//core_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON core layer-handled DAMOS filters. Files + under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//ops_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON operations set layer-handled DAMOS filters. + Files under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//stats/nr_tried Date: Mar 2022 Contact: SeongJae Park From 114b480877698f7835a5ba95c6fbd97b63b119f6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:33 -0800 Subject: [PATCH 1250/2157] Docs/admin-guide/mm/damon/usage: update for {core,ops}_filters directories Document {core,ops}_filters directories on usage document. Link: https://lkml.kernel.org/r/20250305222733.59089-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 31 ++++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index de549dd18107..ced2013db3df 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -83,7 +83,7 @@ comma (","). │ │ │ │ │ │ │ │ :ref:`goals `/nr_goals │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value │ │ │ │ │ │ │ :ref:`watermarks `/metric,interval_us,high,mid,low - │ │ │ │ │ │ │ :ref:`filters `/nr_filters + │ │ │ │ │ │ │ :ref:`{core_,ops_,}filters `/nr_filters │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max │ │ │ │ │ │ │ :ref:`stats `/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds │ │ │ │ │ │ │ :ref:`tried_regions `/total_bytes @@ -307,9 +307,10 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme. schemes// ------------ -In each scheme directory, five directories (``access_pattern``, ``quotas``, -``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and three files -(``action``, ``target_nid`` and ``apply_interval``) exist. +In each scheme directory, seven directories (``access_pattern``, ``quotas``, +``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``stats``, and +``tried_regions``) and three files (``action``, ``target_nid`` and +``apply_interval``) exist. The ``action`` file is for setting and getting the scheme's :ref:`action `. The keywords that can be written to and read @@ -420,13 +421,24 @@ The ``interval`` should written in microseconds unit. .. _sysfs_filters: -schemes//filters/ --------------------- +schemes//{core\_,ops\_,}filters/ +----------------------------------- -The directory for the :ref:`filters ` of the given +Directories for :ref:`filters ` of the given DAMON-based operation scheme. -In the beginning, this directory has only one file, ``nr_filters``. Writing a +``core_filters`` and ``ops_filters`` directories are for the filters handled by +the DAMON core layer and operations set layer, respectively. ``filters`` +directory can be used for installing filters regardless of their handled +layers. Filters that requested by ``core_filters`` and ``ops_filters`` will be +installed before those of ``filters``. All three directories have same files. + +Use of ``filters`` directory can make expecting evaluation orders of given +filters with the files under directory bit confusing. Users are hence +recommended to use ``core_filters`` and ``ops_filters`` directories. The +``filters`` directory could be deprecated in future. + +In the beginning, the directory has only one file, ``nr_filters``. Writing a number (``N``) to the file creates the number of child directories named ``0`` to ``N-1``. Each directory represents each filter. The filters are evaluated in the numeric order. @@ -435,7 +447,7 @@ Each filter directory contains nine files, namely ``type``, ``matching``, ``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max`` and ``target_idx``. To ``type`` file, you can write the type of the filter. Refer to :ref:`the design doc ` for available type -names and their meanings. +names, their meaning and on what layer those are handled. For ``memcg`` type, you can specify the memory cgroup of the interest by writing the path of the memory cgroup from the cgroups mount point to @@ -455,6 +467,7 @@ the ``type`` and ``matching`` should be allowed or not. For example, below restricts a DAMOS action to be applied to only non-anonymous pages of all memory cgroups except ``/having_care_already``.:: + # cd ops_filters/0/ # echo 2 > nr_filters # # disallow anonymous pages echo anon > 0/type From 2273dea6b1e1fcdd06d207048a2cd563ed80111a Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 5 Mar 2025 11:54:09 +0800 Subject: [PATCH 1251/2157] mm/hugetlb: update nr_huge_pages and surplus_huge_pages together In alloc_surplus_hugetlb_folio(), we increase nr_huge_pages and surplus_huge_pages separately. In the middle window, if we set nr_hugepages to smaller and satisfy count < persistent_huge_pages(h), the surplus_huge_pages will be increased by adjust_pool_surplus(). After adding delay in the middle window, we can reproduce the problem easily by following step: 1. echo 3 > /proc/sys/vm/nr_overcommit_hugepages 2. mmap two hugepages. When nr_huge_pages=2 and surplus_huge_pages=1, goto step 3. 3. echo 0 > /proc/sys/vm/nr_huge_pages Finally, nr_huge_pages is less than surplus_huge_pages. To fix the problem, call only_alloc_fresh_hugetlb_folio() instead and move down __prep_account_new_huge_page() into the hugetlb_lock. Link: https://lkml.kernel.org/r/20250305035409.2391344-1-liushixin2@huawei.com Fixes: 0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API") Signed-off-by: Liu Shixin Acked-by: Peter Xu Acked-by: Oscar Salvador Cc: David Hildenbrand Cc: Kefeng Wang Cc: Liu Shixin Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 438de55dd38d..af9b8c1fca67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2259,11 +2259,20 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask); + folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); if (!folio) return NULL; + hugetlb_vmemmap_optimize_folio(h, folio); + spin_lock_irq(&hugetlb_lock); + /* + * nr_huge_pages needs to be adjusted within the same lock cycle + * as surplus_pages, otherwise it might confuse + * persistent_huge_pages() momentarily. + */ + __prep_account_new_huge_page(h, nid); + /* * We could have raced with the pool size change. * Double check that and simply deallocate the new page From ff22f9299d7b2c7874b560993c21543708b7e1b6 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Thu, 6 Mar 2025 12:50:10 -0800 Subject: [PATCH 1252/2157] page_io: zswap: do not crash the kernel on decompression failure Currently, we crash the kernel when a decompression failure occurs in zswap (either because of memory corruption, or a bug in the compression algorithm). This is overkill. We should only SIGBUS the unfortunate process asking for the zswap entry on zswap load, and skip the corrupted entry in zswap writeback. See [1] for a recent upstream discussion about this. The zswap writeback case is relatively straightforward to fix. For the zswap_load() case, we change the return behavior: * Return 0 on success. * Return -ENOENT (with the folio locked) if zswap does not own the swapped out content. * Return -EIO if zswap owns the swapped out content, but encounters a decompression failure for some reasons. The folio will be unlocked, but not be marked up-to-date, which will eventually cause the process requesting the page to SIGBUS (see the handling of not-up-to-date folio in do_swap_page() in mm/memory.c), without crashing the kernel. * Return -EINVAL if we encounter a large folio, as large folio should not be swapped in while zswap is being used. Similar to the -EIO case, we also unlock the folio but do not mark it as up-to-date to SIGBUS the faulting process. As a side effect, we require one extra zswap tree traversal in the load and writeback paths. Quick benchmarking on a kernel build test shows no performance difference: With the new scheme: real: mean: 125.1s, stdev: 0.12s user: mean: 3265.23s, stdev: 9.62s sys: mean: 2156.41s, stdev: 13.98s The old scheme: real: mean: 125.78s, stdev: 0.45s user: mean: 3287.18s, stdev: 5.95s sys: mean: 2177.08s, stdev: 26.52s [nphamcs@gmail.com: fix documentation of zswap_load()] Link: https://lkml.kernel.org/r/20250306222453.1269456-1-nphamcs@gmail.com Link: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/ [1] Link: https://lkml.kernel.org/r/20250306205011.784787-1-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: Matthew Wilcox Suggested-by: Yosry Ahmed Suggested-by: Johannes Weiner Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/zswap.h | 6 +-- mm/page_io.c | 6 +-- mm/zswap.c | 119 +++++++++++++++++++++++++++++------------- 3 files changed, 88 insertions(+), 43 deletions(-) diff --git a/include/linux/zswap.h b/include/linux/zswap.h index d961ead91bf1..30c193a1207e 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -26,7 +26,7 @@ struct zswap_lruvec_state { unsigned long zswap_total_pages(void); bool zswap_store(struct folio *folio); -bool zswap_load(struct folio *folio); +int zswap_load(struct folio *folio); void zswap_invalidate(swp_entry_t swp); int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); @@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio) return false; } -static inline bool zswap_load(struct folio *folio) +static inline int zswap_load(struct folio *folio) { - return false; + return -ENOENT; } static inline void zswap_invalidate(swp_entry_t swp) {} diff --git a/mm/page_io.c b/mm/page_io.c index 9b983de351f9..4bce19df557b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -638,11 +638,11 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug) if (swap_read_folio_zeromap(folio)) { folio_unlock(folio); goto finish; - } else if (zswap_load(folio)) { - folio_unlock(folio); - goto finish; } + if (zswap_load(folio) != -ENOENT) + goto finish; + /* We have to read from slower devices. Increase zswap protection. */ zswap_folio_swapin(folio); diff --git a/mm/zswap.c b/mm/zswap.c index 5f0e62289444..0dcc54eab58b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -62,6 +62,8 @@ static u64 zswap_reject_reclaim_fail; static u64 zswap_reject_compress_fail; /* Compressed page was too big for the allocator to (optimally) store */ static u64 zswap_reject_compress_poor; +/* Load or writeback failed due to decompression failure */ +static u64 zswap_decompress_fail; /* Store failed because underlying allocator could not get memory */ static u64 zswap_reject_alloc_fail; /* Store failed because the entry metadata could not be allocated (rare) */ @@ -985,11 +987,12 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, return comp_ret == 0 && alloc_ret == 0; } -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) +static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) { struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; + int decomp_ret, dlen; u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); @@ -1012,11 +1015,21 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); - BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); - BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); + decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); + dlen = acomp_ctx->req->dlen; zpool_obj_read_end(zpool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); + + if (!decomp_ret && dlen == PAGE_SIZE) + return true; + + zswap_decompress_fail++; + pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n", + swp_type(entry->swpentry), + swp_offset(entry->swpentry), + entry->pool->tfm_name, entry->length, dlen); + return false; } /********************************* @@ -1046,6 +1059,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; + int ret = 0; /* try to allocate swap cache folio */ si = get_swap_device(swpentry); @@ -1067,8 +1081,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry, * and freed when invalidated by the concurrent shrinker anyway. */ if (!folio_was_allocated) { - folio_put(folio); - return -EEXIST; + ret = -EEXIST; + goto out; } /* @@ -1081,14 +1095,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry, * be dereferenced. */ tree = swap_zswap_tree(swpentry); - if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) { - delete_from_swap_cache(folio); - folio_unlock(folio); - folio_put(folio); - return -ENOMEM; + if (entry != xa_load(tree, offset)) { + ret = -ENOMEM; + goto out; } - zswap_decompress(entry, folio); + if (!zswap_decompress(entry, folio)) { + ret = -EIO; + goto out; + } + + xa_erase(tree, offset); count_vm_event(ZSWPWB); if (entry->objcg) @@ -1104,9 +1121,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry, /* start writeback */ __swap_writepage(folio, &wbc); - folio_put(folio); - return 0; +out: + if (ret && ret != -EEXIST) { + delete_from_swap_cache(folio); + folio_unlock(folio); + } + folio_put(folio); + return ret; } /********************************* @@ -1606,7 +1628,27 @@ bool zswap_store(struct folio *folio) return ret; } -bool zswap_load(struct folio *folio) +/** + * zswap_load() - load a folio from zswap + * @folio: folio to load + * + * Return: 0 on success, with the folio unlocked and marked up-to-date, or one + * of the following error codes: + * + * -EIO: if the swapped out content was in zswap, but could not be loaded + * into the page due to a decompression failure. The folio is unlocked, but + * NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page() + * will SIGBUS). + * + * -EINVAL: if the swapped out content was in zswap, but the page belongs + * to a large folio, which is not supported by zswap. The folio is unlocked, + * but NOT marked up-to-date, so that an IO error is emitted (e.g. + * do_swap_page() will SIGBUS). + * + * -ENOENT: if the swapped out content was not in zswap. The folio remains + * locked on return. + */ +int zswap_load(struct folio *folio) { swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); @@ -1617,18 +1659,32 @@ bool zswap_load(struct folio *folio) VM_WARN_ON_ONCE(!folio_test_locked(folio)); if (zswap_never_enabled()) - return false; + return -ENOENT; /* * Large folios should not be swapped in while zswap is being used, as * they are not properly handled. Zswap does not properly load large * folios, and a large folio may only be partially in zswap. - * - * Return true without marking the folio uptodate so that an IO error is - * emitted (e.g. do_swap_page() will sigbus). */ - if (WARN_ON_ONCE(folio_test_large(folio))) - return true; + if (WARN_ON_ONCE(folio_test_large(folio))) { + folio_unlock(folio); + return -EINVAL; + } + + entry = xa_load(tree, offset); + if (!entry) + return -ENOENT; + + if (!zswap_decompress(entry, folio)) { + folio_unlock(folio); + return -EIO; + } + + folio_mark_uptodate(folio); + + count_vm_event(ZSWPIN); + if (entry->objcg) + count_objcg_events(entry->objcg, ZSWPIN, 1); /* * When reading into the swapcache, invalidate our entry. The @@ -1642,27 +1698,14 @@ bool zswap_load(struct folio *folio) * files, which reads into a private page and may free it if * the fault fails. We remain the primary owner of the entry.) */ - if (swapcache) - entry = xa_erase(tree, offset); - else - entry = xa_load(tree, offset); - - if (!entry) - return false; - - zswap_decompress(entry, folio); - - count_vm_event(ZSWPIN); - if (entry->objcg) - count_objcg_events(entry->objcg, ZSWPIN, 1); - if (swapcache) { - zswap_entry_free(entry); folio_mark_dirty(folio); + xa_erase(tree, offset); + zswap_entry_free(entry); } - folio_mark_uptodate(folio); - return true; + folio_unlock(folio); + return 0; } void zswap_invalidate(swp_entry_t swp) @@ -1757,6 +1800,8 @@ static int zswap_debugfs_init(void) zswap_debugfs_root, &zswap_reject_compress_fail); debugfs_create_u64("reject_compress_poor", 0444, zswap_debugfs_root, &zswap_reject_compress_poor); + debugfs_create_u64("decompress_fail", 0444, + zswap_debugfs_root, &zswap_decompress_fail); debugfs_create_u64("written_back_pages", 0444, zswap_debugfs_root, &zswap_written_back_pages); debugfs_create_file("pool_total_size", 0444, From e11079dd25c525aaf238d81287851ef16a521ef3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:51 +0200 Subject: [PATCH 1253/2157] arm: mem_init: use memblock_phys_free() to free DMA memory on SA1111 Patch series "arch, mm: reduce code duplication in mem_init()", v2. Every architecture has implementation of mem_init() function and some even more than one. All these release free memory to the buddy allocator, most of them set high_memory to the end of directly addressable memory and many of them set max_mapnr for FLATMEM case. These patches pull the commonalities into the generic code and refactor some of the mem_init() implementations so that many of them can be just dropped. This patch (of 13): This will help to pull out memblock_free_all() to generic code. Link: https://lkml.kernel.org/r/20250313135003.836600-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250313135003.836600-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: "Mike Rapoport (IBM)" Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 5345d218899a..9aec1cb2386f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -277,14 +277,14 @@ void __init mem_init(void) set_max_mapnr(pfn_to_page(max_pfn) - mem_map); - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); + memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); #endif + /* this will put all unused low memory onto the freelists */ + memblock_free_all(); + free_highpages(); /* From 2b1d532e106ee63acb61a8e11608fafd75e52c4d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:52 +0200 Subject: [PATCH 1254/2157] csky: move setup_initrd() to setup.c Memory used by initrd should be reserved as soon as possible before there any memblock allocations that might overwrite that memory. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Guo Ren (csky) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/csky/kernel/setup.c | 43 ++++++++++++++++++++++++++++++++++++++++ arch/csky/mm/init.c | 43 ---------------------------------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index fe715b707fd0..e0d6ca86ea8c 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -12,6 +12,45 @@ #include #include +#ifdef CONFIG_BLK_DEV_INITRD +static void __init setup_initrd(void) +{ + unsigned long size; + + if (initrd_start >= initrd_end) { + pr_err("initrd not found or empty"); + goto disable; + } + + if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + pr_err("initrd extends beyond end of memory"); + goto disable; + } + + size = initrd_end - initrd_start; + + if (memblock_is_region_reserved(__pa(initrd_start), size)) { + pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", + __pa(initrd_start), size); + goto disable; + } + + memblock_reserve(__pa(initrd_start), size); + + pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), size); + + initrd_below_start_ok = 1; + + return; + +disable: + initrd_start = initrd_end = 0; + + pr_err(" - disabling initrd\n"); +} +#endif + static void __init csky_memblock_init(void) { unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); @@ -40,6 +79,10 @@ static void __init csky_memblock_init(void) max_low_pfn = min_low_pfn + sseg_size; } +#ifdef CONFIG_BLK_DEV_INITRD + setup_initrd(); +#endif + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; mmu_init(min_low_pfn, max_low_pfn); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index bde7cabd23df..ab51acbc19b2 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -42,45 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -#ifdef CONFIG_BLK_DEV_INITRD -static void __init setup_initrd(void) -{ - unsigned long size; - - if (initrd_start >= initrd_end) { - pr_err("initrd not found or empty"); - goto disable; - } - - if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { - pr_err("initrd extends beyond end of memory"); - goto disable; - } - - size = initrd_end - initrd_start; - - if (memblock_is_region_reserved(__pa(initrd_start), size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", - __pa(initrd_start), size); - goto disable; - } - - memblock_reserve(__pa(initrd_start), size); - - pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", - (void *)(initrd_start), size); - - initrd_below_start_ok = 1; - - return; - -disable: - initrd_start = initrd_end = 0; - - pr_err(" - disabling initrd\n"); -} -#endif - void __init mem_init(void) { #ifdef CONFIG_HIGHMEM @@ -92,10 +53,6 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); -#ifdef CONFIG_BLK_DEV_INITRD - setup_initrd(); -#endif - memblock_free_all(); #ifdef CONFIG_HIGHMEM From 30686816214b6062246e4918f3eadd1f55382425 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:53 +0200 Subject: [PATCH 1255/2157] hexagon: move initialization of init_mm.context init to paging_init() This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/hexagon/mm/init.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 3458f39ca2ac..508bb6a8dcc9 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -59,14 +59,6 @@ void __init mem_init(void) * To-Do: someone somewhere should wipe out the bootmem map * after we're done? */ - - /* - * This can be moved to some more virtual-memory-specific - * initialization hook at some point. Set the init_mm - * descriptors "context" value to point to the initial - * kernel segment table's physical address. - */ - init_mm.context.ptbase = __pa(init_mm.pgd); } void sync_icache_dcache(pte_t pte) @@ -103,6 +95,12 @@ static void __init paging_init(void) free_area_init(max_zone_pfn); /* sets up the zonelists and mem_map */ + /* + * Set the init_mm descriptors "context" value to point to the + * initial kernel segment table's physical address. + */ + init_mm.context.ptbase = __pa(init_mm.pgd); + /* * Start of high memory area. Will probably need something more * fancy if we... get more fancy. From 67e7a600869ca557e259f1ce20f8b89bb95ca97d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:54 +0200 Subject: [PATCH 1256/2157] MIPS: consolidate mem_init() for NUMA machines Both MIPS systems that support numa (loongsoon3 and sgi-ip27) have identical mem_init() for NUMA case. Move that into arch/mips/mm/init.c and drop duplicate per-machine definitions. Link: https://lkml.kernel.org/r/20250313135003.836600-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/mips/loongson64/numa.c | 7 ------- arch/mips/mm/init.c | 7 +++++++ arch/mips/sgi-ip27/ip27-memory.c | 9 --------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8388400d052f..95d5f553ce19 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -164,13 +164,6 @@ void __init paging_init(void) free_area_init(zones_size); } -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} - /* All PCI device belongs to logical Node-0 */ int pcibus_to_node(struct pci_bus *bus) { diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4583d1a2a73e..3db6082c611e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -482,6 +482,13 @@ void __init mem_init(void) 0x80000000 - 4, KCORE_TEXT); #endif } +#else /* CONFIG_NUMA */ +void __init mem_init(void) +{ + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + memblock_free_all(); + setup_zero_pages(); /* This comes from node 0 */ +} #endif /* !CONFIG_NUMA */ void free_init_pages(const char *what, unsigned long begin, unsigned long end) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 1963313f55d8..2b3e46e2e607 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -406,8 +406,6 @@ void __init prom_meminit(void) } } -extern void setup_zero_pages(void); - void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; @@ -416,10 +414,3 @@ void __init paging_init(void) zones_size[ZONE_NORMAL] = max_low_pfn; free_area_init(zones_size); } - -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} From e74e2b8eb424c26dff35727d242437edb87684aa Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:55 +0200 Subject: [PATCH 1257/2157] MIPS: make setup_zero_pages() use memblock Allocating the zero pages from memblock is simpler because the memory is already reserved. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-6-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/mips/include/asm/mmzone.h | 2 -- arch/mips/mm/init.c | 18 +++++------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h index 14226ea42036..602a21aee9d4 100644 --- a/arch/mips/include/asm/mmzone.h +++ b/arch/mips/include/asm/mmzone.h @@ -20,6 +20,4 @@ #define nid_to_addrbase(nid) 0 #endif -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 3db6082c611e..820e35a59d4d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -59,24 +59,16 @@ EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code */ -void setup_zero_pages(void) +static void __init setup_zero_pages(void) { - unsigned int order, i; - struct page *page; + unsigned int order; if (cpu_has_vce) order = 3; else order = 0; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -470,9 +462,9 @@ void __init mem_init(void) BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); maar_init(); - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ mem_init_free_highmem(); + memblock_free_all(); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -486,8 +478,8 @@ void __init mem_init(void) void __init mem_init(void) { high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); setup_zero_pages(); /* This comes from node 0 */ + memblock_free_all(); } #endif /* !CONFIG_NUMA */ From be971f957a80e2bbd7747a295886df1472803ff1 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:56 +0200 Subject: [PATCH 1258/2157] nios2: move pr_debug() about memory start and end to setup_arch() This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-7-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/nios2/kernel/setup.c | 2 ++ arch/nios2/mm/init.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index da122a5fa43b..a4cffbfc1399 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -149,6 +149,8 @@ void __init setup_arch(char **cmdline_p) memory_start = memblock_start_of_DRAM(); memory_end = memblock_end_of_DRAM(); + pr_debug("%s: start=%lx, end=%lx\n", __func__, memory_start, memory_end); + setup_initial_init_mm(_stext, _etext, _edata, _end); init_task.thread.kregs = &fake_regs; diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index a2278485de19..aa692ad30044 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -65,8 +65,6 @@ void __init mem_init(void) unsigned long end_mem = memory_end; /* this must not include kernel stack at top */ - pr_debug("mem_init: start=%lx, end=%lx\n", memory_start, memory_end); - end_mem &= PAGE_MASK; high_memory = __va(end_mem); From 54ccf66f99d6d2630895eb13156be19a033d4566 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:57 +0200 Subject: [PATCH 1259/2157] s390: make setup_zero_pages() use memblock Allocating the zero pages from memblock is simpler because the memory is already reserved. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Heiko Carstens Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/s390/mm/init.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index d88cb1c13f7d..2b41dc9b1fa3 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -73,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -83,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -176,9 +165,10 @@ void __init mem_init(void) pv_init(); kfence_split_mapping(); + setup_zero_pages(); /* Setup zeroed pages. */ + /* this will put all low memory onto the freelists */ memblock_free_all(); - setup_zero_pages(); /* Setup zeroed pages. */ } unsigned long memory_block_size_bytes(void) From d319c8b4918d24aea6fd90bd39cd5bc9fcf40859 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:58 +0200 Subject: [PATCH 1260/2157] xtensa: split out printing of virtual memory layout to a function This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Max Filippov Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/xtensa/mm/init.c | 107 ++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index b2587a1a7c46..01577d33e602 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -66,59 +66,8 @@ void __init bootmem_init(void) memblock_dump_all(); } - -void __init zones_init(void) +static void __init print_vm_layout(void) { - /* All pages are DMA-able, so we put them all in the DMA zone. */ - unsigned long max_zone_pfn[MAX_NR_ZONES] = { - [ZONE_NORMAL] = max_low_pfn, -#ifdef CONFIG_HIGHMEM - [ZONE_HIGHMEM] = max_pfn, -#endif - }; - free_area_init(max_zone_pfn); -} - -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - -/* - * Initialize memory pages. - */ - -void __init mem_init(void) -{ - free_highpages(); - - max_mapnr = max_pfn - ARCH_PFN_OFFSET; - high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); - - memblock_free_all(); - pr_info("virtual kernel memory layout:\n" #ifdef CONFIG_KASAN " kasan : 0x%08lx - 0x%08lx (%5lu MB)\n" @@ -167,6 +116,60 @@ void __init mem_init(void) (unsigned long)(__bss_stop - __bss_start) >> 10); } +void __init zones_init(void) +{ + /* All pages are DMA-able, so we put them all in the DMA zone. */ + unsigned long max_zone_pfn[MAX_NR_ZONES] = { + [ZONE_NORMAL] = max_low_pfn, +#ifdef CONFIG_HIGHMEM + [ZONE_HIGHMEM] = max_pfn, +#endif + }; + free_area_init(max_zone_pfn); + print_vm_layout(); +} + +static void __init free_highpages(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long max_low = max_low_pfn; + phys_addr_t range_start, range_end; + u64 i; + + /* set highmem page free */ + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &range_start, &range_end, NULL) { + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; + + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + for (; start < end; start++) + free_highmem_page(pfn_to_page(start)); + } +#endif +} + +/* + * Initialize memory pages. + */ + +void __init mem_init(void) +{ + free_highpages(); + + max_mapnr = max_pfn - ARCH_PFN_OFFSET; + high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); + + memblock_free_all(); +} + static void __init parse_memmap_one(char *p) { char *oldp; From 8268af309d07d1c6279080b4e6fd16ec75cc977c Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:59 +0200 Subject: [PATCH 1261/2157] arch, mm: set max_mapnr when allocating memory map for FLATMEM max_mapnr is essentially the size of the memory map for systems that use FLATMEM. There is no reason to calculate it in each and every architecture when it's anyway calculated in alloc_node_mem_map(). Drop setting of max_mapnr from architecture code and set it once in alloc_node_mem_map(). While on it, move definition of mem_map and max_mapnr to mm/mm_init.c so there won't be two copies for MMU and !MMU variants. Link: https://lkml.kernel.org/r/20250313135003.836600-10-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/mm/init.c | 1 - arch/arc/mm/init.c | 5 ----- arch/arm/mm/init.c | 2 -- arch/csky/mm/init.c | 4 ---- arch/loongarch/mm/init.c | 1 - arch/microblaze/mm/init.c | 4 ---- arch/mips/mm/init.c | 8 -------- arch/nios2/kernel/setup.c | 1 - arch/nios2/mm/init.c | 2 +- arch/openrisc/mm/init.c | 1 - arch/parisc/mm/init.c | 1 - arch/powerpc/kernel/setup-common.c | 2 -- arch/riscv/mm/init.c | 1 - arch/s390/mm/init.c | 1 - arch/sh/mm/init.c | 1 - arch/sparc/mm/init_32.c | 1 - arch/um/include/shared/mem_user.h | 1 - arch/um/kernel/physmem.c | 12 ------------ arch/um/kernel/um_arch.c | 1 - arch/x86/mm/init_32.c | 3 --- arch/xtensa/mm/init.c | 1 - include/asm-generic/memory_model.h | 5 +++-- include/linux/mm.h | 11 ----------- mm/memory.c | 8 -------- mm/mm_init.c | 25 +++++++++++++++++-------- mm/nommu.c | 4 ---- 26 files changed, 21 insertions(+), 86 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 61c2198b1359..ec0eeae9c653 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,7 +276,6 @@ srm_paging_stop (void) void __init mem_init(void) { - set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); memblock_free_all(); } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 6a71b23f1383..7ef883d58dc1 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -154,11 +154,6 @@ void __init setup_arch_memory(void) arch_pfn_offset = min(min_low_pfn, min_high_pfn); kmap_init(); - -#else /* CONFIG_HIGHMEM */ - /* pfn_valid() uses this when FLATMEM=y and HIGHMEM=n */ - max_mapnr = max_low_pfn - min_low_pfn; - #endif /* CONFIG_HIGHMEM */ free_area_init(max_zone_pfn); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9aec1cb2386f..d4bcc745a044 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -275,8 +275,6 @@ void __init mem_init(void) swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); #endif - set_max_mapnr(pfn_to_page(max_pfn) - mem_map); - #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index ab51acbc19b2..ba6694d6170a 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -46,10 +46,6 @@ void __init mem_init(void) { #ifdef CONFIG_HIGHMEM unsigned long tmp; - - set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET); -#else - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index ca5aa5f46a9f..00449df50db1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -78,7 +78,6 @@ void __init paging_init(void) void __init mem_init(void) { - max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 4520c5741579..857cd2b44bcf 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -104,17 +104,13 @@ void __init setup_memory(void) * * min_low_pfn - the first page (mm/bootmem.c - node_boot_start) * max_low_pfn - * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn) */ /* memory start is from the kernel end (aligned) to higher addr */ min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */ - /* RAM is assumed contiguous */ - max_mapnr = memory_size >> PAGE_SHIFT; max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT; max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT; - pr_info("%s: max_mapnr: %#lx\n", __func__, max_mapnr); pr_info("%s: min_low_pfn: %#lx\n", __func__, min_low_pfn); pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 820e35a59d4d..eb61a73520a0 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -415,15 +415,7 @@ void __init paging_init(void) " %ldk highmem ignored\n", (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; - - max_mapnr = max_low_pfn; - } else if (highend_pfn) { - max_mapnr = highend_pfn; - } else { - max_mapnr = max_low_pfn; } -#else - max_mapnr = max_low_pfn; #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index a4cffbfc1399..2a40150142c3 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -158,7 +158,6 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; find_limits(&min_low_pfn, &max_low_pfn, &max_pfn); - max_mapnr = max_low_pfn; memblock_reserve(__pa_symbol(_stext), _end - _stext); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index aa692ad30044..3cafa87ead9e 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -51,7 +51,7 @@ void __init paging_init(void) pagetable_init(); pgd_current = swapper_pg_dir; - max_zone_pfn[ZONE_NORMAL] = max_mapnr; + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; /* pass the memory from the bootmem allocator to the main allocator */ free_area_init(max_zone_pfn); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d0cb1a0126f9..9093c336e158 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -193,7 +193,6 @@ void __init mem_init(void) { BUG_ON(!mem_map); - max_mapnr = max_low_pfn; high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* clear the zero-page */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 61c0a2477072..2cdfc0b1195c 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -563,7 +563,6 @@ void __init mem_init(void) #endif high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(max_low_pfn); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a08b0ede4e64..68d47c53876c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -957,8 +957,6 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); - /* Set max_mapnr before paging_init() */ - set_max_mapnr(max_pfn); high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..157c9ca51541 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -298,7 +298,6 @@ static void __init setup_bootmem(void) high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 2b41dc9b1fa3..ad567e2100b7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -159,7 +159,6 @@ void __init mem_init(void) cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); pv_init(); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 289a2fecebef..72aea5cd1b85 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -290,7 +290,6 @@ void __init paging_init(void) */ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = __MEMORY_START >> PAGE_SHIFT; - set_max_mapnr(max_low_pfn - min_low_pfn); nodes_clear(node_online_map); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index d96a14ffceeb..6b58da14edc6 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -275,7 +275,6 @@ void __init mem_init(void) taint_real_pages(); - max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index adfa08062f88..d4727efcf23d 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,7 +47,6 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index a74f17b033c4..af02b5f9911d 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,18 +22,6 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -void __init mem_total_pages(unsigned long physmem, unsigned long iomem) -{ - unsigned long phys_pages, iomem_pages, total_pages; - - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - - total_pages = phys_pages + iomem_pages; - - max_mapnr = total_pages; -} - void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797e..6414cbf00572 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -419,7 +419,6 @@ void __init setup_arch(char **cmdline_p) stack_protections((unsigned long) init_task.stack); setup_physmem(uml_physmem, uml_reserved, physmem_size); - mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac41b1e0940d..6d2f8cb9451e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -650,9 +650,6 @@ void __init initmem_init(void) memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); -#ifdef CONFIG_FLATMEM - max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; -#endif __vmalloc_start_set = true; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 01577d33e602..9f1b0d5fccc7 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -164,7 +164,6 @@ void __init mem_init(void) { free_highpages(); - max_mapnr = max_pfn - ARCH_PFN_OFFSET; high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6d1fb6162ac1..a3b5029aebbd 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -19,11 +19,12 @@ #define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ ARCH_PFN_OFFSET) +/* avoid include hell */ +extern unsigned long max_mapnr; + #ifndef pfn_valid static inline int pfn_valid(unsigned long pfn) { - /* avoid include hell */ - extern unsigned long max_mapnr; unsigned long pfn_offset = ARCH_PFN_OFFSET; return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr; diff --git a/include/linux/mm.h b/include/linux/mm.h index 82776b409391..8c4cb8c28507 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -46,17 +46,6 @@ extern int sysctl_page_lock_unfairness; void mm_core_init(void); void init_mm_internals(void); -#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ -extern unsigned long max_mapnr; - -static inline void set_max_mapnr(unsigned long limit) -{ - max_mapnr = limit; -} -#else -static inline void set_max_mapnr(unsigned long limit) { } -#endif - extern atomic_long_t _totalram_pages; static inline unsigned long totalram_pages(void) { diff --git a/mm/memory.c b/mm/memory.c index 8873b7a4962c..a1d7664855f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -95,14 +95,6 @@ #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif -#ifndef CONFIG_NUMA -unsigned long max_mapnr; -EXPORT_SYMBOL(max_mapnr); - -struct page *mem_map; -EXPORT_SYMBOL(mem_map); -#endif - static vm_fault_t do_fault(struct vm_fault *vmf); static vm_fault_t do_anonymous_page(struct vm_fault *vmf); static bool vmf_pte_changed(struct vm_fault *vmf); diff --git a/mm/mm_init.c b/mm/mm_init.c index 133640a93d1d..7fd48d2d5064 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -37,6 +37,14 @@ #include +#ifndef CONFIG_NUMA +unsigned long max_mapnr; +EXPORT_SYMBOL(max_mapnr); + +struct page *mem_map; +EXPORT_SYMBOL(mem_map); +#endif + #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; @@ -1639,7 +1647,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); offset = pgdat->node_start_pfn - start; /* - * The zone's endpoints aren't required to be MAX_PAGE_ORDER + * The zone's endpoints aren't required to be MAX_PAGE_ORDER * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ @@ -1655,14 +1663,15 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); -#ifndef CONFIG_NUMA + /* the global mem_map is just set as node 0's */ - if (pgdat == NODE_DATA(0)) { - mem_map = NODE_DATA(0)->node_mem_map; - if (page_to_pfn(mem_map) != pgdat->node_start_pfn) - mem_map -= offset; - } -#endif + WARN_ON(pgdat != NODE_DATA(0)); + + mem_map = pgdat->node_mem_map; + if (page_to_pfn(mem_map) != pgdat->node_start_pfn) + mem_map -= offset; + + max_mapnr = end - start; } #else static inline void alloc_node_mem_map(struct pglist_data *pgdat) { } diff --git a/mm/nommu.c b/mm/nommu.c index 8b31d8396297..43751726f977 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -44,16 +44,12 @@ void *high_memory; EXPORT_SYMBOL(high_memory); -struct page *mem_map; -unsigned long max_mapnr; -EXPORT_SYMBOL(max_mapnr); unsigned long highest_memmap_pfn; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; atomic_long_t mmap_pages_allocated; -EXPORT_SYMBOL(mem_map); /* list of mapped, potentially shareable regions */ static struct kmem_cache *vm_region_jar; From e120d1bc12da5c1bb871c346f741296610fd6fcb Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:00 +0200 Subject: [PATCH 1262/2157] arch, mm: set high_memory in free_area_init() high_memory defines upper bound on the directly mapped memory. This bound is defined by the beginning of ZONE_HIGHMEM when a system has high memory and by the end of memory otherwise. All this is known to generic memory management initialization code that can set high_memory while initializing core mm structures. Add a generic calculation of high_memory to free_area_init() and remove per-architecture calculation except for the architectures that set and use high_memory earlier than that. Link: https://lkml.kernel.org/r/20250313135003.836600-11-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/mm/init.c | 1 - arch/arc/mm/init.c | 2 -- arch/arm64/mm/init.c | 2 -- arch/csky/mm/init.c | 1 - arch/hexagon/mm/init.c | 6 ------ arch/loongarch/kernel/numa.c | 1 - arch/loongarch/mm/init.c | 2 -- arch/microblaze/mm/init.c | 2 -- arch/mips/mm/init.c | 2 -- arch/nios2/mm/init.c | 6 ------ arch/openrisc/mm/init.c | 2 -- arch/parisc/mm/init.c | 1 - arch/riscv/mm/init.c | 1 - arch/s390/mm/init.c | 2 -- arch/sh/mm/init.c | 7 ------- arch/sparc/mm/init_32.c | 1 - arch/sparc/mm/init_64.c | 2 -- arch/um/kernel/um_arch.c | 1 - arch/x86/kernel/setup.c | 2 -- arch/x86/mm/init_32.c | 3 --- arch/x86/mm/numa_32.c | 3 --- arch/xtensa/mm/init.c | 2 -- mm/memory.c | 8 -------- mm/mm_init.c | 30 ++++++++++++++++++++++++++++++ mm/nommu.c | 2 -- 25 files changed, 30 insertions(+), 62 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index ec0eeae9c653..3ab2d2f3c917 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,7 +276,6 @@ srm_paging_stop (void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); memblock_free_all(); } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 7ef883d58dc1..05025122e965 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -150,8 +150,6 @@ void __init setup_arch_memory(void) */ max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; - high_memory = (void *)(min_high_pfn << PAGE_SHIFT); - arch_pfn_offset = min(min_low_pfn, min_high_pfn); kmap_init(); #endif /* CONFIG_HIGHMEM */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ccdef53872a0..53a0b105890b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -309,8 +309,6 @@ void __init arm64_memblock_init(void) } early_init_fdt_scan_reserved_mem(); - - high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } void __init bootmem_init(void) diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index ba6694d6170a..a22801aa503a 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -47,7 +47,6 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 508bb6a8dcc9..d412c2314509 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -100,12 +100,6 @@ static void __init paging_init(void) * initial kernel segment table's physical address. */ init_mm.context.ptbase = __pa(init_mm.pgd); - - /* - * Start of high memory area. Will probably need something more - * fancy if we... get more fancy. - */ - high_memory = (void *)((bootmem_lastpg + 1) << PAGE_SHIFT); } #ifndef DMA_RESERVE diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 84fe7f854820..8eb489725b1a 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -389,7 +389,6 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 00449df50db1..6affa3609188 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -78,8 +78,6 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); } #endif /* !CONFIG_NUMA */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 857cd2b44bcf..7e2e342e84c5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -120,8 +120,6 @@ void __init setup_memory(void) void __init mem_init(void) { - high_memory = (void *)__va(memory_start + lowmem_size - 1); - /* this will put all memory onto the freelists */ memblock_free_all(); #ifdef CONFIG_HIGHMEM diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index eb61a73520a0..ed9dde6a00f7 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -417,7 +417,6 @@ void __init paging_init(void) max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; } #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -469,7 +468,6 @@ void __init mem_init(void) #else /* CONFIG_NUMA */ void __init mem_init(void) { - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); setup_zero_pages(); /* This comes from node 0 */ memblock_free_all(); } diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 3cafa87ead9e..4ba8dfa0d238 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -62,12 +62,6 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long end_mem = memory_end; /* this must not include - kernel stack at top */ - - end_mem &= PAGE_MASK; - high_memory = __va(end_mem); - /* this will put all memory onto the freelists */ memblock_free_all(); } diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 9093c336e158..72c5952607ac 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -193,8 +193,6 @@ void __init mem_init(void) { BUG_ON(!mem_map); - high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 2cdfc0b1195c..4fbe354dc9b4 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -562,7 +562,6 @@ void __init mem_init(void) BUILD_BUG_ON(TMPALIAS_MAP_START >= 0x80000000); #endif - high_memory = __va((max_pfn << PAGE_SHIFT)); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 157c9ca51541..ac6d41e86243 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -295,7 +295,6 @@ static void __init setup_bootmem(void) phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ad567e2100b7..4bd6f316d71f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -159,8 +159,6 @@ void __init mem_init(void) cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); kfence_split_mapping(); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 72aea5cd1b85..6d459ffba4bc 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -330,13 +330,6 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - pg_data_t *pgdat; - - high_memory = NULL; - for_each_online_pgdat(pgdat) - high_memory = max_t(void *, high_memory, - __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - memblock_free_all(); /* Set this up early, so we can take care of the zero page */ diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 6b58da14edc6..81a468a9c223 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -275,7 +275,6 @@ void __init mem_init(void) taint_real_pages(); - high_memory = __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 05882bca5b73..34d46adb9571 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2505,8 +2505,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - high_memory = __va(last_valid_pfn << PAGE_SHIFT); - memblock_free_all(); /* diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 6414cbf00572..f24a3ce37ab7 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -385,7 +385,6 @@ int __init linux_main(int argc, char **argv, char **envp) high_physmem = uml_physmem + physmem_size; end_iomem = high_physmem + iomem_size; - high_memory = (void *) end_iomem; start_vm = VMALLOC_START; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ff8604007b08..74ac686d441a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -972,8 +972,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = e820__end_of_low_ram_pfn(); else max_low_pfn = max_pfn; - - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif /* Find and reserve MPTABLE area */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 6d2f8cb9451e..801b659ead0c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -643,9 +643,6 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 65fda406e6f2..442ef3facff0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -41,9 +41,6 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 9f1b0d5fccc7..9b662477b3d4 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -164,8 +164,6 @@ void __init mem_init(void) { free_highpages(); - high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); } diff --git a/mm/memory.c b/mm/memory.c index a1d7664855f2..3900225d99c5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -113,14 +113,6 @@ static __always_inline bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) return pte_marker_uffd_wp(vmf->orig_pte); } -/* - * A number of key systems in x86 including ioremap() rely on the assumption - * that high_memory defines the upper bound on direct map memory, then end - * of ZONE_NORMAL. - */ -void *high_memory; -EXPORT_SYMBOL(high_memory); - /* * Randomize the address space (stacks, mmaps, brk, etc.). * diff --git a/mm/mm_init.c b/mm/mm_init.c index 7fd48d2d5064..bd7071c32a44 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -45,6 +45,13 @@ struct page *mem_map; EXPORT_SYMBOL(mem_map); #endif +/* + * high_memory defines the upper bound on direct map memory, then end + * of ZONE_NORMAL. + */ +void *high_memory; +EXPORT_SYMBOL(high_memory); + #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; @@ -1778,6 +1785,27 @@ static bool arch_has_descending_max_zone_pfns(void) return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); } +static void set_high_memory(void) +{ + phys_addr_t highmem = memblock_end_of_DRAM(); + + /* + * Some architectures (e.g. ARM) set high_memory very early and + * use it in arch setup code. + * If an architecture already set high_memory don't overwrite it + */ + if (high_memory) + return; + +#ifdef CONFIG_HIGHMEM + if (arch_has_descending_max_zone_pfns() || + highmem > PFN_PHYS(arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])) + highmem = PFN_PHYS(arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]); +#endif + + high_memory = phys_to_virt(highmem - 1) + 1; +} + /** * free_area_init - Initialise all pg_data_t and zone data * @max_zone_pfn: an array of max PFNs for each zone @@ -1900,6 +1928,8 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* disable hash distribution for systems with a single node */ fixup_hashdist(); + + set_high_memory(); } /** diff --git a/mm/nommu.c b/mm/nommu.c index 43751726f977..15a396ce2553 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -42,8 +42,6 @@ #include #include "internal.h" -void *high_memory; -EXPORT_SYMBOL(high_memory); unsigned long highest_memmap_pfn; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; From 6faea3422e3b4e8de44a55aa3e6e843320da66d2 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:01 +0200 Subject: [PATCH 1263/2157] arch, mm: streamline HIGHMEM freeing All architectures that support HIGHMEM have their code that frees high memory pages to the buddy allocator while __free_memory_core() is limited to freeing only low memory. There is no actual reason for that. The memory map is completely ready by the time memblock_free_all() is called and high pages can be released to the buddy allocator along with low memory. Remove low memory limit from __free_memory_core() and drop per-architecture code that frees high memory pages. Link: https://lkml.kernel.org/r/20250313135003.836600-12-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arc/mm/init.c | 6 +----- arch/arm/mm/init.c | 29 ----------------------------- arch/csky/mm/init.c | 14 -------------- arch/microblaze/mm/init.c | 16 ---------------- arch/mips/mm/init.c | 20 -------------------- arch/powerpc/mm/mem.c | 14 -------------- arch/sparc/mm/init_32.c | 25 ------------------------- arch/x86/include/asm/highmem.h | 3 --- arch/x86/include/asm/numa.h | 4 ---- arch/x86/include/asm/numa_32.h | 13 ------------- arch/x86/mm/Makefile | 2 -- arch/x86/mm/highmem_32.c | 34 ---------------------------------- arch/x86/mm/init_32.c | 28 ---------------------------- arch/xtensa/mm/init.c | 29 ----------------------------- include/linux/mm.h | 1 - mm/memblock.c | 3 +-- 16 files changed, 2 insertions(+), 239 deletions(-) delete mode 100644 arch/x86/include/asm/numa_32.h delete mode 100644 arch/x86/mm/highmem_32.c diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 05025122e965..11ce638731c9 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -160,11 +160,7 @@ void __init setup_arch_memory(void) static void __init highmem_init(void) { #ifdef CONFIG_HIGHMEM - unsigned long tmp; - memblock_phys_free(high_mem_start, high_mem_sz); - for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); #endif } @@ -176,8 +172,8 @@ static void __init highmem_init(void) */ void __init mem_init(void) { - memblock_free_all(); highmem_init(); + memblock_free_all(); BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d4bcc745a044..7bb5ce02b9b5 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,33 +237,6 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - /* * mem_init() marks the free areas in the mem_map and tells us how much * memory is free. This is done after various parts of the system have @@ -283,8 +256,6 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ memblock_free_all(); - free_highpages(); - /* * Check boundaries twice: Some fundamental inconsistencies can * be detected at build time already. diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index a22801aa503a..3914c2b873da 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -44,21 +44,7 @@ EXPORT_SYMBOL(empty_zero_page); void __init mem_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; -#endif - memblock_free_all(); - -#ifdef CONFIG_HIGHMEM - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - /* FIXME not sure about */ - if (!memblock_is_reserved(tmp << PAGE_SHIFT)) - free_highmem_page(page); - } -#endif } void free_initmem(void) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 7e2e342e84c5..3e664e0efc33 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -52,19 +52,6 @@ static void __init highmem_init(void) map_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); } - -static void __meminit highmem_setup(void) -{ - unsigned long pfn; - - for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) { - struct page *page = pfn_to_page(pfn); - - /* FIXME not sure about */ - if (!memblock_is_reserved(pfn << PAGE_SHIFT)) - free_highmem_page(page); - } -} #endif /* CONFIG_HIGHMEM */ /* @@ -122,9 +109,6 @@ void __init mem_init(void) { /* this will put all memory onto the freelists */ memblock_free_all(); -#ifdef CONFIG_HIGHMEM - highmem_setup(); -#endif mem_init_done = 1; } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index ed9dde6a00f7..075177e817ac 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -425,25 +425,6 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -static inline void __init mem_init_free_highmem(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - if (cpu_has_dc_aliases) - return; - - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!memblock_is_memory(PFN_PHYS(tmp))) - SetPageReserved(page); - else - free_highmem_page(page); - } -#endif -} - void __init mem_init(void) { /* @@ -454,7 +435,6 @@ void __init mem_init(void) maar_init(); setup_zero_pages(); /* Setup zeroed pages. */ - mem_init_free_highmem(); memblock_free_all(); #ifdef CONFIG_64BIT diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c7708c8fad29..1bc94bca9944 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -297,20 +297,6 @@ void __init mem_init(void) memblock_free_all(); -#ifdef CONFIG_HIGHMEM - { - unsigned long pfn, highmem_mapnr; - - highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; - for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { - phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; - struct page *page = pfn_to_page(pfn); - if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr)) - free_highmem_page(page); - } - } -#endif /* CONFIG_HIGHMEM */ - #if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) /* * If smp is enabled, next_tlbcam_idx is initialized in the cpu up diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 81a468a9c223..043e9b6fadd0 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -232,18 +232,6 @@ static void __init taint_real_pages(void) } } -static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long tmp; - -#ifdef CONFIG_DEBUG_HIGHMEM - printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); -#endif - - for (tmp = start_pfn; tmp < end_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -} - void __init mem_init(void) { int i; @@ -276,19 +264,6 @@ void __init mem_init(void) taint_real_pages(); memblock_free_all(); - - for (i = 0; sp_banks[i].num_bytes != 0; i++) { - unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; - unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - - if (end_pfn <= highstart_pfn) - continue; - - if (start_pfn < highstart_pfn) - start_pfn = highstart_pfn; - - map_high_region(start_pfn, end_pfn); - } } void sparc_flush_page_to_ram(struct page *page) diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 731ee7cc40a5..585bdadba47d 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -69,9 +69,6 @@ extern unsigned long highstart_pfn, highend_pfn; arch_flush_lazy_mmu_mode(); \ } while (0) -extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); - #endif /* __KERNEL__ */ #endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 5469d7a7c40f..53ba39ce010c 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -41,10 +41,6 @@ static inline int numa_cpu_node(int cpu) } #endif /* CONFIG_NUMA */ -#ifdef CONFIG_X86_32 -# include -#endif - #ifdef CONFIG_NUMA extern void numa_set_node(int cpu, int node); extern void numa_clear_node(int cpu); diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h deleted file mode 100644 index 9c8e9e85be77..000000000000 --- a/arch/x86/include/asm/numa_32.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_NUMA_32_H -#define _ASM_X86_NUMA_32_H - -#ifdef CONFIG_HIGHMEM -extern void set_highmem_pages_init(void); -#else -static inline void set_highmem_pages_init(void) -{ -} -#endif - -#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index e0c99a8760ca..32035d5be5a0 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -42,8 +42,6 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o -obj-$(CONFIG_HIGHMEM) += highmem_32.o - KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c deleted file mode 100644 index d9efa35711ee..000000000000 --- a/arch/x86/mm/highmem_32.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include /* for totalram_pages */ -#include -#include - -void __init set_highmem_pages_init(void) -{ - struct zone *zone; - int nid; - - /* - * Explicitly reset zone->managed_pages because set_highmem_pages_init() - * is invoked before memblock_free_all() - */ - reset_all_zones_managed_pages(); - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } -} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 801b659ead0c..9ee8ec2bc5d1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -394,23 +394,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = virt_to_kpte(vaddr); } - -void __init add_highpages_with_active_regions(int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - phys_addr_t start, end; - u64 i; - - for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { - unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), - start_pfn, end_pfn); - unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), - start_pfn, end_pfn); - for ( ; pfn < e_pfn; pfn++) - if (pfn_valid(pfn)) - free_highmem_page(pfn_to_page(pfn)); - } -} #else static inline void permanent_kmaps_init(pgd_t *pgd_base) { @@ -715,17 +698,6 @@ void __init mem_init(void) #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - /* - * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to - * be done before memblock_free_all(). Memblock use free low memory for - * temporary data (see find_range_array()) and for this purpose can use - * pages that was already passed to the buddy allocator, hence marked as - * not accessible in the page tables when compiled with - * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not - * important here. - */ - set_highmem_pages_init(); - /* this will put all low memory onto the freelists */ memblock_free_all(); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 9b662477b3d4..47ecbe28263e 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -129,41 +129,12 @@ void __init zones_init(void) print_vm_layout(); } -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - /* * Initialize memory pages. */ void __init mem_init(void) { - free_highpages(); - memblock_free_all(); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8c4cb8c28507..6c519a5098d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3275,7 +3275,6 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ void free_reserved_page(struct page *page); -#define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) { diff --git a/mm/memblock.c b/mm/memblock.c index 95af35fd1389..64ae678cd1d1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2164,8 +2164,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start, phys_addr_t end) { unsigned long start_pfn = PFN_UP(start); - unsigned long end_pfn = min_t(unsigned long, - PFN_DOWN(end), max_low_pfn); + unsigned long end_pfn = PFN_DOWN(end); if (start_pfn >= end_pfn) return 0; From 0d98484ee3330c35d1dc0da0be0871b3596cbe0d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:02 +0200 Subject: [PATCH 1264/2157] arch, mm: introduce arch_mm_preinit Currently, implementation of mem_init() in every architecture consists of one or more of the following: * initializations that must run before page allocator is active, for instance swiotlb_init() * a call to memblock_free_all() to release all the memory to the buddy allocator * initializations that must run after page allocator is ready and there is no arch-specific hook other than mem_init() for that, like for example register_page_bootmem_info() in x86 and sparc64 or simple setting of mem_init_done = 1 in several architectures * a bunch of semi-related stuff that apparently had no better place to live, for example a ton of BUILD_BUG_ON()s in parisc. Introduce arch_mm_preinit() that will be the first thing called from mm_core_init(). On architectures that have initializations that must happen before the page allocator is ready, move those into arch_mm_preinit() along with the code that does not depend on ordering with page allocator setup. On several architectures this results in reduction of mem_init() to a single call to memblock_free_all() that allows its consolidation next. Link: https://lkml.kernel.org/r/20250313135003.836600-13-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arc/mm/init.c | 13 ++++++------- arch/arm/mm/init.c | 21 ++++++++++++--------- arch/arm64/mm/init.c | 21 ++++++++++++--------- arch/mips/mm/init.c | 11 +++++++---- arch/powerpc/mm/mem.c | 9 ++++++--- arch/riscv/mm/init.c | 8 ++++++-- arch/s390/mm/init.c | 5 ++++- arch/sparc/mm/init_32.c | 5 ++++- arch/um/kernel/mem.c | 7 +++++-- arch/x86/mm/init_32.c | 6 +++++- arch/x86/mm/init_64.c | 5 ++++- include/linux/mm.h | 1 + mm/mm_init.c | 5 +++++ 13 files changed, 77 insertions(+), 40 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 11ce638731c9..90715b4a0bfa 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,11 +157,16 @@ void __init setup_arch_memory(void) free_area_init(max_zone_pfn); } -static void __init highmem_init(void) +void __init arch_mm_preinit(void) { #ifdef CONFIG_HIGHMEM memblock_phys_free(high_mem_start, high_mem_sz); #endif + + BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } /* @@ -172,13 +177,7 @@ static void __init highmem_init(void) */ void __init mem_init(void) { - highmem_init(); memblock_free_all(); - - BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); - BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); - BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); - BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } #ifdef CONFIG_HIGHMEM diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7bb5ce02b9b5..7222100b0631 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,12 +237,7 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -/* - * mem_init() marks the free areas in the mem_map and tells us how much - * memory is free. This is done after various parts of the system have - * claimed their memory after the kernel image. - */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { #ifdef CONFIG_ARM_LPAE swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); @@ -253,9 +248,6 @@ void __init mem_init(void) memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); #endif - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - /* * Check boundaries twice: Some fundamental inconsistencies can * be detected at build time already. @@ -271,6 +263,17 @@ void __init mem_init(void) #endif } +/* + * mem_init() marks the free areas in the mem_map and tells us how much + * memory is free. This is done after various parts of the system have + * claimed their memory after the kernel image. + */ +void __init mem_init(void) +{ + /* this will put all unused low memory onto the freelists */ + memblock_free_all(); +} + #ifdef CONFIG_STRICT_KERNEL_RWX struct section_perm { const char *name; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 53a0b105890b..2312e3812043 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -357,12 +357,7 @@ void __init bootmem_init(void) memblock_dump_all(); } -/* - * mem_init() marks the free areas in the mem_map and tells us how much memory - * is free. This is done after various parts of the system have claimed their - * memory after the kernel image. - */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { unsigned int flags = SWIOTLB_VERBOSE; bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); @@ -386,9 +381,6 @@ void __init mem_init(void) swiotlb_init(swiotlb, flags); swiotlb_update_mem_attributes(); - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - /* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. @@ -414,6 +406,17 @@ void __init mem_init(void) } } +/* + * mem_init() marks the free areas in the mem_map and tells us how much memory + * is free. This is done after various parts of the system have claimed their + * memory after the kernel image. + */ +void __init mem_init(void) +{ + /* this will put all unused low memory onto the freelists */ + memblock_free_all(); +} + void free_initmem(void) { void *lm_init_begin = lm_alias(__init_begin); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 075177e817ac..eec38e7735dd 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -425,7 +425,7 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE @@ -435,7 +435,6 @@ void __init mem_init(void) maar_init(); setup_zero_pages(); /* Setup zeroed pages. */ - memblock_free_all(); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -446,13 +445,17 @@ void __init mem_init(void) #endif } #else /* CONFIG_NUMA */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { setup_zero_pages(); /* This comes from node 0 */ - memblock_free_all(); } #endif /* !CONFIG_NUMA */ +void __init mem_init(void) +{ + memblock_free_all(); +} + void free_init_pages(const char *what, unsigned long begin, unsigned long end) { unsigned long pfn; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1bc94bca9944..68efdaf14e58 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -273,7 +273,7 @@ void __init paging_init(void) mark_nonram_nosave(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* * book3s is limited to 16 page sizes due to encoding this in @@ -295,8 +295,6 @@ void __init mem_init(void) kasan_late_init(); - memblock_free_all(); - #if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) /* * If smp is enabled, next_tlbcam_idx is initialized in the cpu up @@ -329,6 +327,11 @@ void __init mem_init(void) #endif /* CONFIG_PPC32 */ } +void __init mem_init(void) +{ + memblock_free_all(); +} + void free_initmem(void) { ppc_md.progress = ppc_printk_progress; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ac6d41e86243..9efadabf6be1 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -171,7 +171,7 @@ static void __init print_vm_layout(void) static void print_vm_layout(void) { } #endif /* CONFIG_DEBUG_VM */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); #ifdef CONFIG_FLATMEM @@ -192,11 +192,15 @@ void __init mem_init(void) } swiotlb_init(swiotlb, SWIOTLB_VERBOSE); - memblock_free_all(); print_vm_layout(); } +void __init mem_init(void) +{ + memblock_free_all(); +} + /* Limit the memory size via mem. */ static phys_addr_t memory_limit; #ifdef CONFIG_XIP_KERNEL diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 4bd6f316d71f..e771b7458d8b 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -154,7 +154,7 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); @@ -163,7 +163,10 @@ void __init mem_init(void) kfence_split_mapping(); setup_zero_pages(); /* Setup zeroed pages. */ +} +void __init mem_init(void) +{ /* this will put all low memory onto the freelists */ memblock_free_all(); } diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 043e9b6fadd0..e16c32c5728f 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -232,7 +232,7 @@ static void __init taint_real_pages(void) } } -void __init mem_init(void) +void __init arch_mm_preinit(void) { int i; @@ -262,7 +262,10 @@ void __init mem_init(void) memset(sparc_valid_addr_bitmap, 0, i << 2); taint_real_pages(); +} +void __init mem_init(void) +{ memblock_free_all(); } diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index befed230aac2..cce387438e60 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -54,7 +54,7 @@ int kmalloc_ok = 0; /* Used during early boot */ static unsigned long brk_end; -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); @@ -66,10 +66,13 @@ void __init mem_init(void) map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; + max_pfn = max_low_pfn; +} +void __init mem_init(void) +{ /* this will put all low memory onto the freelists */ memblock_free_all(); - max_pfn = max_low_pfn; kmalloc_ok = 1; } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ee8ec2bc5d1..16664c5464b5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -691,13 +691,17 @@ static void __init test_wp_bit(void) panic("Linux doesn't support CPUs with broken WP."); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { pci_iommu_alloc(); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif +} + +void __init mem_init(void) +{ /* this will put all low memory onto the freelists */ memblock_free_all(); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6e8e4ef5312a..a88f7db8089e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1348,10 +1348,13 @@ static void __init preallocate_vmalloc_pages(void) panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { pci_iommu_alloc(); +} +void __init mem_init(void) +{ /* clear_bss() already clear the empty_zero_page */ /* this will put all memory onto the freelists */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 6c519a5098d4..c417e5634a58 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -43,6 +43,7 @@ struct folio_batch; extern int sysctl_page_lock_unfairness; +void arch_mm_preinit(void); void mm_core_init(void); void init_mm_internals(void); diff --git a/mm/mm_init.c b/mm/mm_init.c index bd7071c32a44..6844de516a50 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2734,11 +2734,16 @@ static void __init mem_init_print_info(void) ); } +void __init __weak arch_mm_preinit(void) +{ +} + /* * Set up kernel memory allocators */ void __init mm_core_init(void) { + arch_mm_preinit(); hugetlb_bootmem_alloc(); /* Initializations relying on SMP setup */ From 8afa901c147a41f92e83943cddf154bbb7995ee6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:03 +0200 Subject: [PATCH 1265/2157] arch, mm: make releasing of memory to page allocator more explicit The point where the memory is released from memblock to the buddy allocator is hidden inside arch-specific mem_init()s and the call to memblock_free_all() is needlessly duplicated in every artiste cure and after introduction of arch_mm_preinit() hook, mem_init() implementation on many architecture only contains the call to memblock_free_all(). Pull memblock_free_all() call into mm_core_init() and drop mem_init() on relevant architectures to make it more explicit where the free memory is released from memblock to the buddy allocator and to reduce code duplication in architecture specific code. Link: https://lkml.kernel.org/r/20250313135003.836600-14-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Acked-by: Geert Uytterhoeven [m68k] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/mm/init.c | 6 ------ arch/arc/mm/init.c | 11 ----------- arch/arm/mm/init.c | 11 ----------- arch/arm64/mm/init.c | 11 ----------- arch/csky/mm/init.c | 5 ----- arch/hexagon/mm/init.c | 18 ------------------ arch/loongarch/kernel/numa.c | 5 ----- arch/loongarch/mm/init.c | 5 ----- arch/m68k/mm/init.c | 2 -- arch/microblaze/mm/init.c | 3 --- arch/mips/mm/init.c | 5 ----- arch/nios2/mm/init.c | 6 ------ arch/openrisc/mm/init.c | 3 --- arch/parisc/mm/init.c | 2 -- arch/powerpc/mm/mem.c | 5 ----- arch/riscv/mm/init.c | 5 ----- arch/s390/mm/init.c | 6 ------ arch/sh/mm/init.c | 2 -- arch/sparc/mm/init_32.c | 5 ----- arch/sparc/mm/init_64.c | 2 -- arch/um/kernel/mem.c | 2 -- arch/x86/mm/init_32.c | 3 --- arch/x86/mm/init_64.c | 2 -- arch/xtensa/mm/init.c | 9 --------- include/linux/memblock.h | 1 - mm/internal.h | 3 ++- mm/mm_init.c | 5 +++++ 27 files changed, 7 insertions(+), 136 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 3ab2d2f3c917..2d491b8cdab9 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -273,12 +273,6 @@ srm_paging_stop (void) } #endif -void __init -mem_init(void) -{ - memblock_free_all(); -} - static const pgprot_t protection_map[16] = { [VM_NONE] = _PAGE_P(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR), diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 90715b4a0bfa..a73cc94f806e 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -169,17 +169,6 @@ void __init arch_mm_preinit(void) BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Calculates and displays memory available/used - */ -void __init mem_init(void) -{ - memblock_free_all(); -} - #ifdef CONFIG_HIGHMEM int pfn_valid(unsigned long pfn) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7222100b0631..54bdca025c9f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -263,17 +263,6 @@ void __init arch_mm_preinit(void) #endif } -/* - * mem_init() marks the free areas in the mem_map and tells us how much - * memory is free. This is done after various parts of the system have - * claimed their memory after the kernel image. - */ -void __init mem_init(void) -{ - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); -} - #ifdef CONFIG_STRICT_KERNEL_RWX struct section_perm { const char *name; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2312e3812043..4b966d5709d2 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -406,17 +406,6 @@ void __init arch_mm_preinit(void) } } -/* - * mem_init() marks the free areas in the mem_map and tells us how much memory - * is free. This is done after various parts of the system have claimed their - * memory after the kernel image. - */ -void __init mem_init(void) -{ - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); -} - void free_initmem(void) { void *lm_init_begin = lm_alias(__init_begin); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index 3914c2b873da..573da66b2543 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -42,11 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -void __init mem_init(void) -{ - memblock_free_all(); -} - void free_initmem(void) { free_initmem_default(-1); diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index d412c2314509..34eb9d424b96 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -43,24 +43,6 @@ DEFINE_SPINLOCK(kmap_gen_lock); /* checkpatch says don't init this to 0. */ unsigned long long kmap_generation; -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Fixes up more stuff for HIGHMEM - * Calculates and displays memory available/used - */ -void __init mem_init(void) -{ - /* No idea where this is actually declared. Seems to evade LXR. */ - memblock_free_all(); - - /* - * To-Do: someone somewhere should wipe out the bootmem map - * after we're done? - */ -} - void sync_icache_dcache(pte_t pte) { unsigned long addr; diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 8eb489725b1a..30a72fd528c0 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -387,11 +387,6 @@ void __init paging_init(void) free_area_init(zones_size); } -void __init mem_init(void) -{ - memblock_free_all(); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 6affa3609188..fdb7f73ad160 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -75,11 +75,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } - -void __init mem_init(void) -{ - memblock_free_all(); -} #endif /* !CONFIG_NUMA */ void __ref free_initmem(void) diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 8b11d0d545aa..488411af1b3f 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -121,7 +121,5 @@ static inline void init_pointer_tables(void) void __init mem_init(void) { - /* this will put all memory onto the freelists */ - memblock_free_all(); init_pointer_tables(); } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 3e664e0efc33..65f0d1fb8a2a 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -107,9 +107,6 @@ void __init setup_memory(void) void __init mem_init(void) { - /* this will put all memory onto the freelists */ - memblock_free_all(); - mem_init_done = 1; } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index eec38e7735dd..a673d3d68254 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -451,11 +451,6 @@ void __init arch_mm_preinit(void) } #endif /* !CONFIG_NUMA */ -void __init mem_init(void) -{ - memblock_free_all(); -} - void free_init_pages(const char *what, unsigned long begin, unsigned long end) { unsigned long pfn; diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 4ba8dfa0d238..94efa3de3933 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -60,12 +60,6 @@ void __init paging_init(void) (unsigned long)empty_zero_page + PAGE_SIZE); } -void __init mem_init(void) -{ - /* this will put all memory onto the freelists */ - memblock_free_all(); -} - void __init mmu_init(void) { flush_tlb_all(); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 72c5952607ac..be1c2eb8bb94 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -196,9 +196,6 @@ void __init mem_init(void) /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); - /* this will put all low memory onto the freelists */ - memblock_free_all(); - printk("mem_init_done ...........................................\n"); mem_init_done = 1; return; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 4fbe354dc9b4..14270715d754 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -562,8 +562,6 @@ void __init mem_init(void) BUILD_BUG_ON(TMPALIAS_MAP_START >= 0x80000000); #endif - memblock_free_all(); - #ifdef CONFIG_PA11 if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) { pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 68efdaf14e58..d8fe11b64259 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -327,11 +327,6 @@ void __init arch_mm_preinit(void) #endif /* CONFIG_PPC32 */ } -void __init mem_init(void) -{ - memblock_free_all(); -} - void free_initmem(void) { ppc_md.progress = ppc_printk_progress; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9efadabf6be1..79b649f6de72 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -196,11 +196,6 @@ void __init arch_mm_preinit(void) print_vm_layout(); } -void __init mem_init(void) -{ - memblock_free_all(); -} - /* Limit the memory size via mem. */ static phys_addr_t memory_limit; #ifdef CONFIG_XIP_KERNEL diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e771b7458d8b..5b7b7b281334 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -165,12 +165,6 @@ void __init arch_mm_preinit(void) setup_zero_pages(); /* Setup zeroed pages. */ } -void __init mem_init(void) -{ - /* this will put all low memory onto the freelists */ - memblock_free_all(); -} - unsigned long memory_block_size_bytes(void) { /* diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 6d459ffba4bc..99e302eeeec1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -330,8 +330,6 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - memblock_free_all(); - /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index e16c32c5728f..fdc93dd12c3e 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -264,11 +264,6 @@ void __init arch_mm_preinit(void) taint_real_pages(); } -void __init mem_init(void) -{ - memblock_free_all(); -} - void sparc_flush_page_to_ram(struct page *page) { unsigned long vaddr = (unsigned long)page_address(page); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 34d46adb9571..760818950464 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2505,8 +2505,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - memblock_free_all(); - /* * Must be done after boot memory is put on freelist, because here we * might set fields in deferred struct pages that have not yet been diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index cce387438e60..379f33a1babf 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -71,8 +71,6 @@ void __init arch_mm_preinit(void) void __init mem_init(void) { - /* this will put all low memory onto the freelists */ - memblock_free_all(); kmalloc_ok = 1; } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 16664c5464b5..95b2758b4e4d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -702,9 +702,6 @@ void __init arch_mm_preinit(void) void __init mem_init(void) { - /* this will put all low memory onto the freelists */ - memblock_free_all(); - after_bootmem = 1; x86_init.hyper.init_after_bootmem(); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a88f7db8089e..d67f15386ea2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1357,8 +1357,6 @@ void __init mem_init(void) { /* clear_bss() already clear the empty_zero_page */ - /* this will put all memory onto the freelists */ - memblock_free_all(); after_bootmem = 1; x86_init.hyper.init_after_bootmem(); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 47ecbe28263e..cc52733a0649 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -129,15 +129,6 @@ void __init zones_init(void) print_vm_layout(); } -/* - * Initialize memory pages. - */ - -void __init mem_init(void) -{ - memblock_free_all(); -} - static void __init parse_memmap_one(char *p) { char *oldp; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e79eb6ac516f..ef5a1ecc6e59 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -133,7 +133,6 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); -void memblock_free_all(void); void memblock_free(void *ptr, size_t size); void reset_all_zones_managed_pages(void); diff --git a/mm/internal.h b/mm/internal.h index 558c8e2a3d94..2f52a65272c1 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1475,7 +1475,8 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma, } extern bool mirrored_kernelcore; -extern bool memblock_has_mirror(void); +bool memblock_has_mirror(void); +void memblock_free_all(void); static __always_inline void vma_set_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, diff --git a/mm/mm_init.c b/mm/mm_init.c index 6844de516a50..c82b0162f1cb 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2738,6 +2738,10 @@ void __init __weak arch_mm_preinit(void) { } +void __init __weak mem_init(void) +{ +} + /* * Set up kernel memory allocators */ @@ -2761,6 +2765,7 @@ void __init mm_core_init(void) report_meminit(); kmsan_init_shadow(); stack_depot_early_init(); + memblock_free_all(); mem_init(); kmem_cache_init(); /* From 4c9ea539ad59ec60676930dacee02b7adde2e0c0 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:58:56 -0800 Subject: [PATCH 1266/2157] mm/damon/sysfs: validate user inputs from damon_sysfs_commit_input() Patch series "mm/damon/sysfs: commit parameters online via damon_call()". Due to the lack of ways to synchronously access DAMON internal data, DAMON sysfs interface is using damon_callback hooks with its own synchronization mechanism. The mechanism is built on top of damon_callback hooks in an ineifficient and complicated way. Patch series "mm/damon: replace most damon_callback usages in sysfs with new core functions", which starts with commit e035320fd38e ("mm/damon/sysfs-schemes: remove unnecessary schemes existence check in damon_sysfs_schemes_clear_regions()") introduced two new DAMON kernel API functions that providing the synchronous access, replaced most damon_callback hooks usage in DAMON sysfs interface, and cleaned up unnecessary code. Continue the replacement and cleanup works. Update the last DAMON sysfs' usage of its own synchronization mechanism, namely online DAMON parameters commit, to use damon_call() instead of the damon_callback hooks and the hard-to-maintain core-external synchronization mechanism. Then remove the no more be used code due to the change, and more unused code that just not yet cleaned up. The first four patches (patches 1-4) of this series makes DAMON sysfs interface's online parameters commit to use damon_call(). Then, following three patches (patches 5-7) remove the DAMON sysfs interface's own synchronization mechanism and its usages, which is no more be used by anyone due to the first four patches. Finally, six patches (8-13) do more cleanup of outdated comment and unused code. This patch (of 13): Online DAMON parameters commit via DAMON sysfs interface can make kdamond stop. This behavior was made because it can make the implementation simpler. The implementation tries committing the parameter without validation. If it finds something wrong in the middle of the parameters update, it returns error without reverting the partially committed parameters back. It is safe though, since it immediately breaks kdamond main loop in the case of the error return. Users can make the wrong parameters by mistake, though. Stopping kdamond in the case is not very useful behavior. Also this makes it difficult to utilize damon_call() instead of damon_callback hook for online parameters update, since damon_call() cannot immediately break kdamond main loop in the middle. Validate the input parameters and return error when it fails before starting parameters updates. In case of mistakenly wrong parameters, kdamond can continue running with the old and valid parameters. Link: https://lkml.kernel.org/r/20250306175908.66300-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250306175908.66300-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index ccd435d234b9..87e4c6e3614e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1449,11 +1449,11 @@ static struct damon_ctx *damon_sysfs_build_ctx( * damon_sysfs_commit_input() - Commit user inputs to a running kdamond. * @kdamond: The kobject wrapper for the associated kdamond. * - * If the sysfs input is wrong, the kdamond will be terminated. + * Returns error if the sysfs input is wrong. */ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) { - struct damon_ctx *param_ctx; + struct damon_ctx *param_ctx, *test_ctx; int err; if (!damon_sysfs_kdamond_running(kdamond)) @@ -1465,7 +1465,15 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) param_ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]); if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); + test_ctx = damon_new_ctx(); + err = damon_commit_ctx(test_ctx, param_ctx); + if (err) { + damon_sysfs_destroy_targets(test_ctx); + damon_destroy_ctx(test_ctx); + goto out; + } err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); +out: damon_sysfs_destroy_targets(param_ctx); damon_destroy_ctx(param_ctx); return err; From bf74bdfd2edb60ab44b48a6ef705207dc889dc3d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:58:57 -0800 Subject: [PATCH 1267/2157] mm/damon/core: invoke kdamond_call() after merging is done if possible kdamond_call() callers may iterate the regions, so better to call it when the number of regions is as small as possible. It is when kdamond_merge_regions() is finished. Invoke it on the point. This change is also aimed to make future changes for carrying online parameters commit with damon_call() easier. The commit operation should be able to make sequence between other aggregation interval based operations including regioins merging and aggregation reset. Placing damon_call() invocation after the regions merging makes the sequence handling simpler. Link: https://lkml.kernel.org/r/20250306175908.66300-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index ebbb22840435..b9a9db1a90b4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2421,7 +2421,6 @@ static int kdamond_fn(void *data) if (ctx->callback.after_sampling && ctx->callback.after_sampling(ctx)) break; - kdamond_call(ctx, false); kdamond_usleep(sample_interval); ctx->passed_sample_intervals++; @@ -2439,9 +2438,10 @@ static int kdamond_fn(void *data) } /* - * do kdamond_apply_schemes() after kdamond_merge_regions() if - * possible, to reduce overhead + * do kdamond_call() and kdamond_apply_schemes() after + * kdamond_merge_regions() if possible, to reduce overhead */ + kdamond_call(ctx, false); if (!list_empty(&ctx->schemes)) kdamond_apply_schemes(ctx); else From 258d941e5877f5fd40d5e636540c0a00458b8825 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:58:58 -0800 Subject: [PATCH 1268/2157] mm/damon/core: make damon_set_attrs() be safe to be called from damon_call() Currently all DAMON kernel API callers do online DAMON parameters commit from damon_callback->after_aggregation because only those are safe place to call the DAMON monitoring attributes update function, namely damon_set_attrs(). Because damon_callback hooks provide no synchronization, the callers work in asynchronous ways or implement their own inefficient and complicated synchronization mechanisms. It also means online DAMON parameters commit can take up to one aggregation interval. On large systems having long aggregation intervals, that can be too slow. The synchronization can be done in more efficient and simple way while removing the latency constraint if it can be done using damon_call(). The fact that damon_call() can be executed in the middle of the aggregation makes damon_set_attrs() unsafe to be called from it, though. Two real problems can occur in the case. First, converting the not yet completely aggregated nr_accesses for new user-set intervals can arguably degrade the accuracy or at least make the logic complicated. Second, kdamond_reset_aggregated() will not be called after the monitoring results update, so next aggregation starts from unclean state. This can result in inconsistent and unexpected nr_accesses_bp. Make it safe as follows. Catch the middle-of-the-aggregation case from damon_set_attrs() by checking the passed_sample_intervals and next_aggregationsis of the context. And pass the information to nr_accesses conversion logic. The logic works as before if it is not the case (called after the current aggregation is completed). If it is the case (committing parameters in the middle of the aggregation), it drops the nr_accesses information that so far aggregated, and make the status same to the beginning of this aggregation, but as if the last aggregation was started with the updated sampling/aggregation intervals. The middle-of-aggregastion check introduce yet another edge case, though. This happens because kdamond_tune_intervals() can also call damon_set_attrs() with the middle-of-aggregation check. Consider damon_call() for parameters commit and kdamond_tune_intervals() are called in same iteration of kdamond main loop. Because kdamond_tune_interval() is called for aggregation intervals, it should be the end of the aggregation. The first damon_set_attrs() call from kdamond_call() understands it is the end of the aggregation and correctly handle it. But, because the damon_set_attrs() updated next_aggregation_sis of the context. Hence, the second damon_set_attrs() invocation from kdamond_tune_interval() believes it is called in the middle of the aggregation. It therefore resets aggregated information so far. After that, kdamond_reset_interval() is called and double-reset the aggregated information. Avoid this case, too, by setting the next_aggregation_sis before kdamond_tune_intervals() is invoked. Link: https://lkml.kernel.org/r/20250306175908.66300-4-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 56 +++++++++++++++++++++++++++++-------- mm/damon/tests/core-kunit.h | 6 ++-- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index b9a9db1a90b4..2be4099e0666 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -603,11 +603,25 @@ static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, } static void damon_update_monitoring_result(struct damon_region *r, - struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs, + bool aggregating) { - r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, - old_attrs, new_attrs); - r->nr_accesses_bp = r->nr_accesses * 10000; + if (!aggregating) { + r->nr_accesses = damon_nr_accesses_for_new_attrs( + r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; + } else { + /* + * if this is called in the middle of the aggregation, reset + * the aggregations we made so far for this aggregation + * interval. In other words, make the status like + * kdamond_reset_aggregated() is called. + */ + r->last_nr_accesses = damon_nr_accesses_for_new_attrs( + r->last_nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->last_nr_accesses * 10000; + r->nr_accesses = 0; + } r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); } @@ -620,7 +634,7 @@ static void damon_update_monitoring_result(struct damon_region *r, * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. */ static void damon_update_monitoring_results(struct damon_ctx *ctx, - struct damon_attrs *new_attrs) + struct damon_attrs *new_attrs, bool aggregating) { struct damon_attrs *old_attrs = &ctx->attrs; struct damon_target *t; @@ -635,7 +649,7 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, damon_for_each_target(t, ctx) damon_for_each_region(r, t) damon_update_monitoring_result( - r, old_attrs, new_attrs); + r, old_attrs, new_attrs, aggregating); } /* @@ -662,10 +676,10 @@ static bool damon_valid_intervals_goal(struct damon_attrs *attrs) * @ctx: monitoring context * @attrs: monitoring attributes * - * This function should be called while the kdamond is not running, or an - * access check results aggregation is not ongoing (e.g., from - * &struct damon_callback->after_aggregation or - * &struct damon_callback->after_wmarks_check callbacks). + * This function should be called while the kdamond is not running, an access + * check results aggregation is not ongoing (e.g., from &struct + * damon_callback->after_aggregation or &struct + * damon_callback->after_wmarks_check callbacks), or from damon_call(). * * Every time interval is in micro-seconds. * @@ -676,6 +690,8 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) unsigned long sample_interval = attrs->sample_interval ? attrs->sample_interval : 1; struct damos *s; + bool aggregating = ctx->passed_sample_intervals < + ctx->next_aggregation_sis; if (!damon_valid_intervals_goal(attrs)) return -EINVAL; @@ -696,7 +712,7 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) ctx->next_ops_update_sis = ctx->passed_sample_intervals + attrs->ops_update_interval / sample_interval; - damon_update_monitoring_results(ctx, attrs); + damon_update_monitoring_results(ctx, attrs, aggregating); ctx->attrs = *attrs; damon_for_each_scheme(s, ctx) @@ -2453,6 +2469,24 @@ static int kdamond_fn(void *data) if (ctx->attrs.intervals_goal.aggrs && ctx->passed_sample_intervals >= ctx->next_intervals_tune_sis) { + /* + * ctx->next_aggregation_sis might be updated + * from kdamond_call(). In the case, + * damon_set_attrs() which will be called from + * kdamond_tune_interval() may wrongly think + * this is in the middle of the current + * aggregation, and make aggregation + * information reset for all regions. Then, + * following kdamond_reset_aggregated() call + * will make the region information invalid, + * particularly for ->nr_accesses_bp. + * + * Reset ->next_aggregation_sis to avoid that. + * It will anyway correctly updated after this + * if caluse. + */ + ctx->next_aggregation_sis = + next_aggregation_sis; ctx->next_intervals_tune_sis += ctx->attrs.aggr_samples * ctx->attrs.intervals_goal.aggrs; diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h index 532c6a6f21f9..be0fea9ee5fc 100644 --- a/mm/damon/tests/core-kunit.h +++ b/mm/damon/tests/core-kunit.h @@ -348,19 +348,19 @@ static void damon_test_update_monitoring_result(struct kunit *test) new_attrs = (struct damon_attrs){ .sample_interval = 100, .aggr_interval = 10000,}; - damon_update_monitoring_result(r, &old_attrs, &new_attrs); + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); KUNIT_EXPECT_EQ(test, r->nr_accesses, 15); KUNIT_EXPECT_EQ(test, r->age, 2); new_attrs = (struct damon_attrs){ .sample_interval = 1, .aggr_interval = 1000}; - damon_update_monitoring_result(r, &old_attrs, &new_attrs); + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); KUNIT_EXPECT_EQ(test, r->nr_accesses, 150); KUNIT_EXPECT_EQ(test, r->age, 2); new_attrs = (struct damon_attrs){ .sample_interval = 1, .aggr_interval = 100}; - damon_update_monitoring_result(r, &old_attrs, &new_attrs); + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); KUNIT_EXPECT_EQ(test, r->nr_accesses, 150); KUNIT_EXPECT_EQ(test, r->age, 20); From 3301f1861d34f53911a30a8f5f41b9141bd8ed39 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:58:59 -0800 Subject: [PATCH 1269/2157] mm/damon/sysfs: handle commit command using damon_call() DAMON sysfs interface is using damon_callback->after_aggregation hook with its self-implemented synchronization mechanism for the hook. It is inefficient, complicated, and take up to one aggregation interval to complete, which can be long on some configs. Use damon_call() instead. It provides a synchronization mechanism that built inside DAMON's core layer, so more efficient than DAMON sysfs interface's own one. Also it isolates the implementation inside the core layer, and hence it makes the code easier to maintain. Finally, it takes up to one sampling interval, which is much shorter than the aggregation interval in common setups. Link: https://lkml.kernel.org/r/20250306175908.66300-5-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 87e4c6e3614e..c55a2cee4b74 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1451,8 +1451,9 @@ static struct damon_ctx *damon_sysfs_build_ctx( * * Returns error if the sysfs input is wrong. */ -static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond) +static int damon_sysfs_commit_input(void *data) { + struct damon_sysfs_kdamond *kdamond = data; struct damon_ctx *param_ctx, *test_ctx; int err; @@ -1550,11 +1551,6 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active, if (!kdamond || kdamond->damon_ctx != c) goto out; switch (damon_sysfs_cmd_request.cmd) { - case DAMON_SYSFS_CMD_COMMIT: - if (!after_aggregation) - goto out; - err = damon_sysfs_commit_input(kdamond); - break; default: break; } @@ -1712,11 +1708,7 @@ static int damon_sysfs_update_schemes_tried_regions( * @cmd: The command to handle. * @kdamond: The kobject wrapper for the associated kdamond. * - * This function handles a DAMON sysfs command for a kdamond. For commands - * that need to access running DAMON context-internal data, it requests - * handling of the command to the DAMON callback - * (@damon_sysfs_cmd_request_callback()) and wait until it is properly handled, - * or the context is completed. + * This function handles a DAMON sysfs command for a kdamond. * * Return: 0 on success, negative error code otherwise. */ @@ -1730,6 +1722,9 @@ static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, return damon_sysfs_turn_damon_on(kdamond); case DAMON_SYSFS_CMD_OFF: return damon_sysfs_turn_damon_off(kdamond); + case DAMON_SYSFS_CMD_COMMIT: + return damon_sysfs_damon_call( + damon_sysfs_commit_input, kdamond); case DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS: return damon_sysfs_damon_call( damon_sysfs_commit_schemes_quota_goals, From 8b40db0edf3c4e02369509ace9c29f558f7b5cba Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:00 -0800 Subject: [PATCH 1270/2157] mm/damon/sysfs: remove damon_sysfs_cmd_request code from damon_sysfs_handle_cmd() damon_sysfs_handle_cmd() handles user requests that it can directly handle on its own. For requests that need to be handled from damon_callback hooks, it uses DAMON sysfs interface's own synchronous damon_callback hooks management mechanism, namely damon_sysfs_cmd_request. Now all user requests are handled without damon_callback hooks, so damon_sysfs_cmd_request client code in damon_sysfs_andle_cmd() does nothing in real. Remove the unnecessary code. Link: https://lkml.kernel.org/r/20250306175908.66300-6-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index c55a2cee4b74..166161f12c26 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1715,8 +1715,6 @@ static int damon_sysfs_update_schemes_tried_regions( static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, struct damon_sysfs_kdamond *kdamond) { - bool need_wait = true; - switch (cmd) { case DAMON_SYSFS_CMD_ON: return damon_sysfs_turn_damon_on(kdamond); @@ -1747,38 +1745,8 @@ static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, return damon_sysfs_damon_call( damon_sysfs_upd_tuned_intervals, kdamond); default: - break; - } - - /* Pass the command to DAMON callback for safe DAMON context access */ - if (damon_sysfs_cmd_request.kdamond) - return -EBUSY; - if (!damon_sysfs_kdamond_running(kdamond)) return -EINVAL; - damon_sysfs_cmd_request.cmd = cmd; - damon_sysfs_cmd_request.kdamond = kdamond; - - /* - * wait until damon_sysfs_cmd_request_callback() handles the request - * from kdamond context - */ - mutex_unlock(&damon_sysfs_lock); - while (need_wait) { - schedule_timeout_idle(msecs_to_jiffies(100)); - if (!mutex_trylock(&damon_sysfs_lock)) - continue; - if (!damon_sysfs_cmd_request.kdamond) { - /* damon_sysfs_cmd_request_callback() handled */ - need_wait = false; - } else if (!damon_sysfs_kdamond_running(kdamond)) { - /* kdamond has already finished */ - need_wait = false; - damon_sysfs_cmd_request.kdamond = NULL; - } - mutex_unlock(&damon_sysfs_lock); } - mutex_lock(&damon_sysfs_lock); - return 0; } static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, From 311f34ff85d2a0fb36b3566ef45dc21ee5089476 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:01 -0800 Subject: [PATCH 1271/2157] mm/damon/sysfs: remove damon_sysfs_cmd_request_callback() and its callers damon_sysfs_cmd_request_callback() is the damon_callback hook functions that were used to handle user requests that need to read and/or write DAMON internal data. All the usages are now updated to use damon_call() or damos_walk(), though. Remove it and its callers. Link: https://lkml.kernel.org/r/20250306175908.66300-7-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 62 ------------------------------------------------ 1 file changed, 62 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 166161f12c26..e5bcf019086f 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1529,65 +1529,6 @@ static int damon_sysfs_upd_tuned_intervals(void *data) return 0; } -/* - * damon_sysfs_cmd_request_callback() - DAMON callback for handling requests. - * @c: The DAMON context of the callback. - * @active: Whether @c is not deactivated due to watermarks. - * @after_aggr: Whether this is called from after_aggregation() callback. - * - * This function is periodically called back from the kdamond thread for @c. - * Then, it checks if there is a waiting DAMON sysfs request and handles it. - */ -static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active, - bool after_aggregation) -{ - struct damon_sysfs_kdamond *kdamond; - int err = 0; - - /* avoid deadlock due to concurrent state_store('off') */ - if (!mutex_trylock(&damon_sysfs_lock)) - return 0; - kdamond = damon_sysfs_cmd_request.kdamond; - if (!kdamond || kdamond->damon_ctx != c) - goto out; - switch (damon_sysfs_cmd_request.cmd) { - default: - break; - } - /* Mark the request as invalid now. */ - damon_sysfs_cmd_request.kdamond = NULL; -out: - mutex_unlock(&damon_sysfs_lock); - return err; -} - -static int damon_sysfs_after_wmarks_check(struct damon_ctx *c) -{ - /* - * after_wmarks_check() is called back while the context is deactivated - * by watermarks. - */ - return damon_sysfs_cmd_request_callback(c, false, false); -} - -static int damon_sysfs_after_sampling(struct damon_ctx *c) -{ - /* - * after_sampling() is called back only while the context is not - * deactivated by watermarks. - */ - return damon_sysfs_cmd_request_callback(c, true, false); -} - -static int damon_sysfs_after_aggregation(struct damon_ctx *c) -{ - /* - * after_aggregation() is called back only while the context is not - * deactivated by watermarks. - */ - return damon_sysfs_cmd_request_callback(c, true, true); -} - static struct damon_ctx *damon_sysfs_build_ctx( struct damon_sysfs_context *sys_ctx) { @@ -1603,9 +1544,6 @@ static struct damon_ctx *damon_sysfs_build_ctx( return ERR_PTR(err); } - ctx->callback.after_wmarks_check = damon_sysfs_after_wmarks_check; - ctx->callback.after_sampling = damon_sysfs_after_sampling; - ctx->callback.after_aggregation = damon_sysfs_after_aggregation; ctx->callback.before_terminate = damon_sysfs_before_terminate; return ctx; } From d682f5f643420aa5b06d34c679f3fb3fd60fbe14 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:02 -0800 Subject: [PATCH 1272/2157] mm/damon/sysfs: remove damon_sysfs_cmd_request and its readers damon_sysfs_cmd_request is DAMON sysfs interface's own synchronization mechanism for accessing DAMON internal data via damon_callback hooks. All the users are now migrated to damon_call() and damos_walk(), so nobody really uses it. No one writes to the data structure but reading code is still remained. Remove the reading code and the entire data structure. Link: https://lkml.kernel.org/r/20250306175908.66300-8-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index e5bcf019086f..1af6aff35d84 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1238,25 +1238,6 @@ static const char * const damon_sysfs_cmd_strs[] = { "update_tuned_intervals", }; -/* - * struct damon_sysfs_cmd_request - A request to the DAMON callback. - * @cmd: The command that needs to be handled by the callback. - * @kdamond: The kobject wrapper that associated to the kdamond thread. - * - * This structure represents a sysfs command request that need to access some - * DAMON context-internal data. Because DAMON context-internal data can be - * safely accessed from DAMON callbacks without additional synchronization, the - * request will be handled by the DAMON callback. None-``NULL`` @kdamond means - * the request is valid. - */ -struct damon_sysfs_cmd_request { - enum damon_sysfs_cmd cmd; - struct damon_sysfs_kdamond *kdamond; -}; - -/* Current DAMON callback request. Protected by damon_sysfs_lock. */ -static struct damon_sysfs_cmd_request damon_sysfs_cmd_request; - static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1555,8 +1536,6 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) if (damon_sysfs_kdamond_running(kdamond)) return -EBUSY; - if (damon_sysfs_cmd_request.kdamond == kdamond) - return -EBUSY; /* TODO: support multiple contexts per kdamond */ if (kdamond->contexts->nr != 1) return -EINVAL; @@ -1796,8 +1775,7 @@ static bool damon_sysfs_kdamonds_busy(struct damon_sysfs_kdamond **kdamonds, int i; for (i = 0; i < nr_kdamonds; i++) { - if (damon_sysfs_kdamond_running(kdamonds[i]) || - damon_sysfs_cmd_request.kdamond == kdamonds[i]) + if (damon_sysfs_kdamond_running(kdamonds[i])) return true; } From 52f7c351fc3e0bc372b5e3da21244f7e068ba9ec Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:03 -0800 Subject: [PATCH 1273/2157] mm/damon/sysfs-schemes: remove obsolete comment for damon_sysfs_schemes_clear_regions() The comment on damon_sysfs_schemes_clear_regions() function is obsolete, since it has updated to directly called from DAMON sysfs interface code. Remove the outdated comment. Link: https://lkml.kernel.org/r/20250306175908.66300-9-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 1895d2d2c295..985cfc750a90 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2341,7 +2341,6 @@ void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes, } } -/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ int damon_sysfs_schemes_clear_regions( struct damon_sysfs_schemes *sysfs_schemes) { From 53058c762afff714ceaec14b87cf8fdce3b6d33e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:04 -0800 Subject: [PATCH 1274/2157] mm/damon: remove damon_callback->private The field was added to let users keep their personal data to use inside of the callbacks. However, no one is actively using that now. Remove it. Link: https://lkml.kernel.org/r/20250306175908.66300-10-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index eed008b64a23..dab4bb0fe39d 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -609,12 +609,10 @@ struct damon_operations { * @after_aggregation: Called after each aggregation. * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. - * @private: User private data. * * The monitoring thread (&damon_ctx.kdamond) calls @before_start and * @before_terminate just before starting and finishing the monitoring, - * respectively. Therefore, those are good places for installing and cleaning - * @private. + * respectively. * * The monitoring thread calls @after_wmarks_check after each DAMON-based * operation schemes' watermarks check. If users need to make changes to the @@ -630,8 +628,6 @@ struct damon_operations { * If any callback returns non-zero, monitoring stops. */ struct damon_callback { - void *private; - int (*before_start)(struct damon_ctx *context); int (*after_wmarks_check)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); From 07da21855b270c17b2a2d20e644c1419fcaafdd1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:05 -0800 Subject: [PATCH 1275/2157] mm/damon: remove ->before_start of damon_callback The function pointer field was added to be used as a place to do some initialization works just before DAMON starts working. However, nobody is using it now. Remove it. Link: https://lkml.kernel.org/r/20250306175908.66300-11-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 7 ++----- mm/damon/core.c | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index dab4bb0fe39d..043de2408c65 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -603,16 +603,14 @@ struct damon_operations { /** * struct damon_callback - Monitoring events notification callbacks. * - * @before_start: Called before starting the monitoring. * @after_wmarks_check: Called after each schemes' watermarks check. * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. * - * The monitoring thread (&damon_ctx.kdamond) calls @before_start and - * @before_terminate just before starting and finishing the monitoring, - * respectively. + * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just + * before finishing the monitoring. * * The monitoring thread calls @after_wmarks_check after each DAMON-based * operation schemes' watermarks check. If users need to make changes to the @@ -628,7 +626,6 @@ struct damon_operations { * If any callback returns non-zero, monitoring stops. */ struct damon_callback { - int (*before_start)(struct damon_ctx *context); int (*after_wmarks_check)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); diff --git a/mm/damon/core.c b/mm/damon/core.c index 2be4099e0666..9175a49985d5 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2408,8 +2408,6 @@ static int kdamond_fn(void *data) if (ctx->ops.init) ctx->ops.init(ctx); - if (ctx->callback.before_start && ctx->callback.before_start(ctx)) - goto done; ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1, sizeof(*ctx->regions_score_histogram), GFP_KERNEL); if (!ctx->regions_score_histogram) From cedee98f68875605dad644a95a63eae04de250b2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:06 -0800 Subject: [PATCH 1276/2157] mm/damon: remove damon_callback->after_sampling The callback was used by DAMON sysfs interface for reading DAMON internal data. But it is no more being used, and damon_call() can do similar works in a better way. Remove it. Link: https://lkml.kernel.org/r/20250306175908.66300-12-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 11 ++++------- mm/damon/core.c | 3 --- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 043de2408c65..5aa277f4c948 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -604,7 +604,6 @@ struct damon_operations { * struct damon_callback - Monitoring events notification callbacks. * * @after_wmarks_check: Called after each schemes' watermarks check. - * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. @@ -617,17 +616,15 @@ struct damon_operations { * attributes of the monitoring context while it's deactivated due to the * watermarks, this is the good place to do. * - * The monitoring thread calls @after_sampling and @after_aggregation for each - * of the sampling intervals and aggregation intervals, respectively. - * Therefore, users can safely access the monitoring results without additional - * protection. For the reason, users are recommended to use these callback for - * the accesses to the results. + * The monitoring thread calls @after_aggregation for each of the aggregation + * intervals. Therefore, users can safely access the monitoring results + * without additional protection. For the reason, users are recommended to use + * these callback for the accesses to the results. * * If any callback returns non-zero, monitoring stops. */ struct damon_callback { int (*after_wmarks_check)(struct damon_ctx *context); - int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); int (*before_damos_apply)(struct damon_ctx *context, struct damon_target *target, diff --git a/mm/damon/core.c b/mm/damon/core.c index 9175a49985d5..812b1c70c723 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2432,9 +2432,6 @@ static int kdamond_fn(void *data) if (ctx->ops.prepare_access_checks) ctx->ops.prepare_access_checks(ctx); - if (ctx->callback.after_sampling && - ctx->callback.after_sampling(ctx)) - break; kdamond_usleep(sample_interval); ctx->passed_sample_intervals++; From 99ce7c9c6d855716356f2f839c53905592fd780b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:07 -0800 Subject: [PATCH 1277/2157] mm/damon: remove damon_callback->before_damos_apply The hook was introduced to let DAMON kernel API users access DAMOS schemes-eligible regions in a safe way. Now it is no more used by anyone, and the functionality is provided in a better way by damos_walk(). Remove it. Link: https://lkml.kernel.org/r/20250306175908.66300-13-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 ----- mm/damon/core.c | 13 ++++--------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 5aa277f4c948..be7b281fb922 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -605,7 +605,6 @@ struct damon_operations { * * @after_wmarks_check: Called after each schemes' watermarks check. * @after_aggregation: Called after each aggregation. - * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. * * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just @@ -626,10 +625,6 @@ struct damon_operations { struct damon_callback { int (*after_wmarks_check)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); - int (*before_damos_apply)(struct damon_ctx *context, - struct damon_target *target, - struct damon_region *region, - struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; diff --git a/mm/damon/core.c b/mm/damon/core.c index 812b1c70c723..177716847f4e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1731,7 +1731,6 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, struct timespec64 begin, end; unsigned long sz_applied = 0; unsigned long sz_ops_filter_passed = 0; - int err = 0; /* * We plan to support multiple context per kdamond, as DAMON sysfs * implies with 'nr_contexts' file. Nevertheless, only single context @@ -1771,14 +1770,10 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, if (damos_filter_out(c, t, r, s)) return; ktime_get_coarse_ts64(&begin); - if (c->callback.before_damos_apply) - err = c->callback.before_damos_apply(c, t, r, s); - if (!err) { - trace_damos_before_apply(cidx, sidx, tidx, r, - damon_nr_regions(t), do_trace); - sz_applied = c->ops.apply_scheme(c, t, r, s, - &sz_ops_filter_passed); - } + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); + sz_applied = c->ops.apply_scheme(c, t, r, s, + &sz_ops_filter_passed); damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed); ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - From 105f830fa35c49ada7db785a7f9b70386f193529 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 6 Mar 2025 09:59:08 -0800 Subject: [PATCH 1278/2157] mm/damon: remove damon_operations->reset_aggregated The operations layer hook was introduced to let operations set do any aggregation data reset if needed. But it is not really be used now. Remove it. Link: https://lkml.kernel.org/r/20250306175908.66300-14-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 7 +------ mm/damon/core.c | 2 -- mm/damon/paddr.c | 1 - mm/damon/vaddr.c | 1 - 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index be7b281fb922..3db4f77261f5 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -542,7 +542,6 @@ enum damon_ops_id { * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. - * @reset_aggregated: Reset aggregated accesses monitoring results. * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. @@ -554,8 +553,7 @@ enum damon_ops_id { * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting * the monitoring, @update after each &damon_attrs.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each - * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after - * each &damon_attrs.aggr_interval. + * &damon_attrs.sample_interval. * * Each &struct damon_operations instance having valid @id can be registered * via damon_register_ops() and selected by damon_select_ops() later. @@ -570,8 +568,6 @@ enum damon_ops_id { * last preparation and update the number of observed accesses of each region. * It should also return max number of observed accesses that made as a result * of its update. The value will be used for regions adjustment threshold. - * @reset_aggregated should reset the access monitoring results that aggregated - * by @check_accesses. * @get_scheme_score should return the priority score of a region for a scheme * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided @@ -589,7 +585,6 @@ struct damon_operations { void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); - void (*reset_aggregated)(struct damon_ctx *context); int (*get_scheme_score)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); diff --git a/mm/damon/core.c b/mm/damon/core.c index 177716847f4e..fc1eba3da419 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2490,8 +2490,6 @@ static int kdamond_fn(void *data) kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); - if (ctx->ops.reset_aggregated) - ctx->ops.reset_aggregated(ctx); } if (ctx->passed_sample_intervals >= next_ops_update_sis) { diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index fba8b3c8ba30..b08847ef9b81 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -621,7 +621,6 @@ static int __init damon_pa_initcall(void) .update = NULL, .prepare_access_checks = damon_pa_prepare_access_checks, .check_accesses = damon_pa_check_accesses, - .reset_aggregated = NULL, .target_valid = NULL, .cleanup = NULL, .apply_scheme = damon_pa_apply_scheme, diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index a6174f725bd7..e6d99106a7f9 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -710,7 +710,6 @@ static int __init damon_va_initcall(void) .update = damon_va_update, .prepare_access_checks = damon_va_prepare_access_checks, .check_accesses = damon_va_check_accesses, - .reset_aggregated = NULL, .target_valid = damon_va_target_valid, .cleanup = NULL, .apply_scheme = damon_va_apply_scheme, From 11e88e9265ec192cff33fc2e43e36c211851b32c Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 6 Mar 2025 20:13:15 +0530 Subject: [PATCH 1279/2157] mm: remove redundant return in set_huge_zero_folio() It is the responsibility of the caller to check pmd_none(); in any case, we are not achieving anything by returning since there is no return value to tell the caller that we succeeded or not. So remove this check. Link: https://lkml.kernel.org/r/20250306144315.21907-1-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: David Hildenbrand Cc: Matthew Wilcow (Oracle) Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/huge_memory.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7433369d5d1f..80cf15116ce7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1309,8 +1309,6 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm, struct folio *zero_folio) { pmd_t entry; - if (!pmd_none(*pmd)) - return; entry = mk_pmd(&zero_folio->page, vma->vm_page_prot); entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); From 9039b9096ea27a20f0349d1537537663c935c8ed Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Thu, 6 Mar 2025 17:44:50 -0500 Subject: [PATCH 1280/2157] mm: page_ext: add an iteration API for page extensions Patch series "mm: page_ext: Introduce new iteration API", v3. Introduction ============ [ Thanks to David Hildenbrand for identifying the root cause of this issue and proving guidance on how to fix it. The new API idea, bugs and misconceptions are all mine though ] Currently, trying to reserve 1G pages with page_owner=on and sparsemem causes a crash. The reproducer is very simple: 1. Build the kernel with CONFIG_SPARSEMEM=y and the table extensions 2. Pass 'default_hugepagesz=1 page_owner=on' in the kernel command-line 3. Reserve one 1G page at run-time, this should crash (see patch 1 for backtrace) [ A crash with page_table_check is also possible, but harder to trigger ] Apparently, starting with commit cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") we now pass the full allocation order to page extension clients and the page extension implementation assumes that all PFNs of an allocation range will be stored in the same memory section (which is not true for 1G pages). To fix this, this series introduces a new iteration API for page extension objects. The API checks if the next page extension object can be retrieved from the current section or if it needs to look up for it in another section. Please, find all details in patch 1. I tested this series on arm64 and x86 by reserving 1G pages at run-time and doing kernel builds (always with page_owner=on and page_table_check=on). This patch (of 3): The page extension implementation assumes that all page extensions of a given page order are stored in the same memory section. The function page_ext_next() relies on this assumption by adding an offset to the current object to return the next adjacent page extension. This behavior works as expected for flatmem but fails for sparsemem when using 1G pages. The commit cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") exposes this issue, making it possible for a crash when using page_owner or page_table_check page extensions. The problem is that for 1G pages, the page extensions may span memory section boundaries and be stored in different memory sections. This issue was not visible before commit cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") because alloc_contig_pages() never passed more than MAX_PAGE_ORDER to post_alloc_hook(). However, the series introducing mentioned commit changed this behavior allowing the full 1G page order to be passed. Reproducer: 1. Build the kernel with CONFIG_SPARSEMEM=y and table extensions support 2. Pass 'default_hugepagesz=1 page_owner=on' in the kernel command-line 3. Reserve one 1G page at run-time, this should crash (backtrace below) To address this issue, this commit introduces a new API for iterating through page extensions. The main iteration macro is for_each_page_ext() and it must be called with the RCU read lock taken. Here's an usage example: """ struct page_ext_iter iter; struct page_ext *page_ext; ... rcu_read_lock(); for_each_page_ext(page, 1 << order, page_ext, iter) { struct my_page_ext *obj = get_my_page_ext_obj(page_ext); ... } rcu_read_unlock(); """ The loop construct uses page_ext_iter_next() which checks to see if we have crossed sections in the iteration. In this case, page_ext_iter_next() retrieves the next page_ext object from another section. Thanks to David Hildenbrand for helping identify the root cause and providing suggestions on how to fix and optmize the solution (final implementation and bugs are all mine through). Lastly, here's the backtrace, without kasan you can get random crashes: [ 76.052526] BUG: KASAN: slab-out-of-bounds in __update_page_owner_handle+0x238/0x298 [ 76.060283] Write of size 4 at addr ffff07ff96240038 by task tee/3598 [ 76.066714] [ 76.068203] CPU: 88 UID: 0 PID: 3598 Comm: tee Kdump: loaded Not tainted 6.13.0-rep1 #3 [ 76.076202] Hardware name: WIWYNN Mt.Jade Server System B81.030Z1.0007/Mt.Jade Motherboard, BIOS 2.10.20220810 (SCP: 2.10.20220810) 2022/08/10 [ 76.088972] Call trace: [ 76.091411] show_stack+0x20/0x38 (C) [ 76.095073] dump_stack_lvl+0x80/0xf8 [ 76.098733] print_address_description.constprop.0+0x88/0x398 [ 76.104476] print_report+0xa8/0x278 [ 76.108041] kasan_report+0xa8/0xf8 [ 76.111520] __asan_report_store4_noabort+0x20/0x30 [ 76.116391] __update_page_owner_handle+0x238/0x298 [ 76.121259] __set_page_owner+0xdc/0x140 [ 76.125173] post_alloc_hook+0x190/0x1d8 [ 76.129090] alloc_contig_range_noprof+0x54c/0x890 [ 76.133874] alloc_contig_pages_noprof+0x35c/0x4a8 [ 76.138656] alloc_gigantic_folio.isra.0+0x2c0/0x368 [ 76.143616] only_alloc_fresh_hugetlb_folio.isra.0+0x24/0x150 [ 76.149353] alloc_pool_huge_folio+0x11c/0x1f8 [ 76.153787] set_max_huge_pages+0x364/0xca8 [ 76.157961] __nr_hugepages_store_common+0xb0/0x1a0 [ 76.162829] nr_hugepages_store+0x108/0x118 [ 76.167003] kobj_attr_store+0x3c/0x70 [ 76.170745] sysfs_kf_write+0xfc/0x188 [ 76.174492] kernfs_fop_write_iter+0x274/0x3e0 [ 76.178927] vfs_write+0x64c/0x8e0 [ 76.182323] ksys_write+0xf8/0x1f0 [ 76.185716] __arm64_sys_write+0x74/0xb0 [ 76.189630] invoke_syscall.constprop.0+0xd8/0x1e0 [ 76.194412] do_el0_svc+0x164/0x1e0 [ 76.197891] el0_svc+0x40/0xe0 [ 76.200939] el0t_64_sync_handler+0x144/0x168 [ 76.205287] el0t_64_sync+0x1ac/0x1b0 Link: https://lkml.kernel.org/r/cover.1741301089.git.luizcap@redhat.com Link: https://lkml.kernel.org/r/a45893880b7e1601082d39d2c5c8b50bcc096305.1741301089.git.luizcap@redhat.com Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") Signed-off-by: Luiz Capitulino Acked-by: David Hildenbrand Cc: Johannes Weiner Cc: Luiz Capitulino Cc: Muchun Song Cc: Pasha Tatashin Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 93 ++++++++++++++++++++++++++++++++++++++++ mm/page_ext.c | 13 ++++++ 2 files changed, 106 insertions(+) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index e4b48a0dda24..76c817162d2f 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -3,6 +3,7 @@ #define __LINUX_PAGE_EXT_H #include +#include #include struct pglist_data; @@ -69,16 +70,31 @@ extern void page_ext_init(void); static inline void page_ext_init_flatmem_late(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + /* + * page_ext is allocated per memory section. Once we cross a + * memory section, we have to fetch the new pointer. + */ + return next_pfn % PAGES_PER_SECTION; +} #else extern void page_ext_init_flatmem(void); extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + return true; +} #endif extern struct page_ext *page_ext_get(const struct page *page); extern void page_ext_put(struct page_ext *page_ext); +extern struct page_ext *page_ext_lookup(unsigned long pfn); static inline void *page_ext_data(struct page_ext *page_ext, struct page_ext_operations *ops) @@ -93,6 +109,83 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr) return next; } +struct page_ext_iter { + unsigned long index; + unsigned long start_pfn; + struct page_ext *page_ext; +}; + +/** + * page_ext_iter_begin() - Prepare for iterating through page extensions. + * @iter: page extension iterator. + * @pfn: PFN of the page we're interested in. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no page_ext exists for this page. + */ +static inline struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter, + unsigned long pfn) +{ + iter->index = 0; + iter->start_pfn = pfn; + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_next() - Get next page extension + * @iter: page extension iterator. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no next page_ext exists. + */ +static inline struct page_ext *page_ext_iter_next(struct page_ext_iter *iter) +{ + unsigned long pfn; + + if (WARN_ON_ONCE(!iter->page_ext)) + return NULL; + + iter->index++; + pfn = iter->start_pfn + iter->index; + + if (page_ext_iter_next_fast_possible(pfn)) + iter->page_ext = page_ext_next(iter->page_ext); + else + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_get() - Get current page extension + * @iter: page extension iterator. + * + * Return: NULL if no page_ext exists for this iterator. + */ +static inline struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter) +{ + return iter->page_ext; +} + +/** + * for_each_page_ext(): iterate through page_ext objects. + * @__page: the page we're interested in + * @__pgcount: how many pages to iterate through + * @__page_ext: struct page_ext pointer where the current page_ext + * object is returned + * @__iter: struct page_ext_iter object (defined in the stack) + * + * IMPORTANT: must be called with RCU read lock taken. + */ +#define for_each_page_ext(__page, __pgcount, __page_ext, __iter) \ + for (__page_ext = page_ext_iter_begin(&__iter, page_to_pfn(__page));\ + __page_ext && __iter.index < __pgcount; \ + __page_ext = page_ext_iter_next(&__iter)) + #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; diff --git a/mm/page_ext.c b/mm/page_ext.c index 641d93f6af4c..c351fdfe9e9a 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -507,6 +507,19 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) #endif +/** + * page_ext_lookup() - Lookup a page extension for a PFN. + * @pfn: PFN of the page we're interested in. + * + * Must be called with RCU read lock taken and @pfn must be valid. + * + * Return: NULL if no page_ext exists for this page. + */ +struct page_ext *page_ext_lookup(unsigned long pfn) +{ + return lookup_page_ext(pfn_to_page(pfn)); +} + /** * page_ext_get() - Get the extended information for a page. * @page: The page we're interested in. From 4e30b94cdad659d8ec5d32b61159138ce8d4ad1b Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Thu, 6 Mar 2025 17:44:51 -0500 Subject: [PATCH 1281/2157] mm: page_table_check: use new iteration API The page_ext_next() function assumes that page extension objects for a page order allocation always reside in the same memory section, which may not be true and could lead to crashes. Use the new page_ext iteration API instead. Link: https://lkml.kernel.org/r/ca2d53a020fe1cd65c442627ff6c0c40d591cbd8.1741301089.git.luizcap@redhat.com Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") Signed-off-by: Luiz Capitulino Acked-by: David Hildenbrand Cc: Johannes Weiner Cc: Muchun Song Cc: Pasha Tatashin Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/page_table_check.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/mm/page_table_check.c b/mm/page_table_check.c index c2b3600429a0..68109ee93841 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -62,24 +62,20 @@ static struct page_table_check *get_page_table_check(struct page_ext *page_ext) */ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) { + struct page_ext_iter iter; struct page_ext *page_ext; struct page *page; - unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); - page_ext = page_ext_get(page); - - if (!page_ext) - return; - BUG_ON(PageSlab(page)); anon = PageAnon(page); - for (i = 0; i < pgcnt; i++) { + rcu_read_lock(); + for_each_page_ext(page, pgcnt, page_ext, iter) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { @@ -89,9 +85,8 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0); } - page_ext = page_ext_next(page_ext); } - page_ext_put(page_ext); + rcu_read_unlock(); } /* @@ -102,24 +97,20 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, bool rw) { + struct page_ext_iter iter; struct page_ext *page_ext; struct page *page; - unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); - page_ext = page_ext_get(page); - - if (!page_ext) - return; - BUG_ON(PageSlab(page)); anon = PageAnon(page); - for (i = 0; i < pgcnt; i++) { + rcu_read_lock(); + for_each_page_ext(page, pgcnt, page_ext, iter) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { @@ -129,9 +120,8 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0); } - page_ext = page_ext_next(page_ext); } - page_ext_put(page_ext); + rcu_read_unlock(); } /* @@ -140,24 +130,19 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, */ void __page_table_check_zero(struct page *page, unsigned int order) { + struct page_ext_iter iter; struct page_ext *page_ext; - unsigned long i; BUG_ON(PageSlab(page)); - page_ext = page_ext_get(page); - - if (!page_ext) - return; - - for (i = 0; i < (1ul << order); i++) { + rcu_read_lock(); + for_each_page_ext(page, 1 << order, page_ext, iter) { struct page_table_check *ptc = get_page_table_check(page_ext); BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_read(&ptc->file_map_count)); - page_ext = page_ext_next(page_ext); } - page_ext_put(page_ext); + rcu_read_unlock(); } void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) From 3a812bed3d32ae8c7443d1dd82f20b9a7e503ed2 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Thu, 6 Mar 2025 17:44:52 -0500 Subject: [PATCH 1282/2157] mm: page_owner: use new iteration API The page_ext_next() function assumes that page extension objects for a page order allocation always reside in the same memory section, which may not be true and could lead to crashes. Use the new page_ext iteration API instead. Link: https://lkml.kernel.org/r/93c80b040960fa2ebab4a9729073f77a30649862.1741301089.git.luizcap@redhat.com Fixes: cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios") Signed-off-by: Luiz Capitulino Acked-by: David Hildenbrand Cc: Johannes Weiner Cc: Muchun Song Cc: Pasha Tatashin Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/page_owner.c | 86 +++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index a409e2561a8f..849d4a471b6c 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -229,17 +229,19 @@ static void dec_stack_record_count(depot_stack_handle_t handle, handle); } -static inline void __update_page_owner_handle(struct page_ext *page_ext, +static inline void __update_page_owner_handle(struct page *page, depot_stack_handle_t handle, unsigned short order, gfp_t gfp_mask, short last_migrate_reason, u64 ts_nsec, pid_t pid, pid_t tgid, char *comm) { - int i; + struct page_ext_iter iter; + struct page_ext *page_ext; struct page_owner *page_owner; - for (i = 0; i < (1 << order); i++) { + rcu_read_lock(); + for_each_page_ext(page, 1 << order, page_ext, iter) { page_owner = get_page_owner(page_ext); page_owner->handle = handle; page_owner->order = order; @@ -252,20 +254,22 @@ static inline void __update_page_owner_handle(struct page_ext *page_ext, sizeof(page_owner->comm)); __set_bit(PAGE_EXT_OWNER, &page_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); - page_ext = page_ext_next(page_ext); } + rcu_read_unlock(); } -static inline void __update_page_owner_free_handle(struct page_ext *page_ext, +static inline void __update_page_owner_free_handle(struct page *page, depot_stack_handle_t handle, unsigned short order, pid_t pid, pid_t tgid, u64 free_ts_nsec) { - int i; + struct page_ext_iter iter; + struct page_ext *page_ext; struct page_owner *page_owner; - for (i = 0; i < (1 << order); i++) { + rcu_read_lock(); + for_each_page_ext(page, 1 << order, page_ext, iter) { page_owner = get_page_owner(page_ext); /* Only __reset_page_owner() wants to clear the bit */ if (handle) { @@ -275,8 +279,8 @@ static inline void __update_page_owner_free_handle(struct page_ext *page_ext, page_owner->free_ts_nsec = free_ts_nsec; page_owner->free_pid = current->pid; page_owner->free_tgid = current->tgid; - page_ext = page_ext_next(page_ext); } + rcu_read_unlock(); } void __reset_page_owner(struct page *page, unsigned short order) @@ -293,11 +297,11 @@ void __reset_page_owner(struct page *page, unsigned short order) page_owner = get_page_owner(page_ext); alloc_handle = page_owner->handle; + page_ext_put(page_ext); handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); - __update_page_owner_free_handle(page_ext, handle, order, current->pid, + __update_page_owner_free_handle(page, handle, order, current->pid, current->tgid, free_ts_nsec); - page_ext_put(page_ext); if (alloc_handle != early_handle) /* @@ -313,19 +317,13 @@ void __reset_page_owner(struct page *page, unsigned short order) noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { - struct page_ext *page_ext; u64 ts_nsec = local_clock(); depot_stack_handle_t handle; handle = save_stack(gfp_mask); - - page_ext = page_ext_get(page); - if (unlikely(!page_ext)) - return; - __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1, + __update_page_owner_handle(page, handle, order, gfp_mask, -1, ts_nsec, current->pid, current->tgid, current->comm); - page_ext_put(page_ext); inc_stack_record_count(handle, gfp_mask, 1 << order); } @@ -344,44 +342,42 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) void __split_page_owner(struct page *page, int old_order, int new_order) { - int i; - struct page_ext *page_ext = page_ext_get(page); + struct page_ext_iter iter; + struct page_ext *page_ext; struct page_owner *page_owner; - if (unlikely(!page_ext)) - return; - - for (i = 0; i < (1 << old_order); i++) { + rcu_read_lock(); + for_each_page_ext(page, 1 << old_order, page_ext, iter) { page_owner = get_page_owner(page_ext); page_owner->order = new_order; - page_ext = page_ext_next(page_ext); } - page_ext_put(page_ext); + rcu_read_unlock(); } void __folio_copy_owner(struct folio *newfolio, struct folio *old) { - int i; - struct page_ext *old_ext; - struct page_ext *new_ext; + struct page_ext *page_ext; + struct page_ext_iter iter; struct page_owner *old_page_owner; struct page_owner *new_page_owner; depot_stack_handle_t migrate_handle; - old_ext = page_ext_get(&old->page); - if (unlikely(!old_ext)) + page_ext = page_ext_get(&old->page); + if (unlikely(!page_ext)) return; - new_ext = page_ext_get(&newfolio->page); - if (unlikely(!new_ext)) { - page_ext_put(old_ext); - return; - } + old_page_owner = get_page_owner(page_ext); + page_ext_put(page_ext); + + page_ext = page_ext_get(&newfolio->page); + if (unlikely(!page_ext)) + return; + + new_page_owner = get_page_owner(page_ext); + page_ext_put(page_ext); - old_page_owner = get_page_owner(old_ext); - new_page_owner = get_page_owner(new_ext); migrate_handle = new_page_owner->handle; - __update_page_owner_handle(new_ext, old_page_owner->handle, + __update_page_owner_handle(&newfolio->page, old_page_owner->handle, old_page_owner->order, old_page_owner->gfp_mask, old_page_owner->last_migrate_reason, old_page_owner->ts_nsec, old_page_owner->pid, @@ -391,7 +387,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old) * will be freed after migration. Keep them until then as they may be * useful. */ - __update_page_owner_free_handle(new_ext, 0, old_page_owner->order, + __update_page_owner_free_handle(&newfolio->page, 0, old_page_owner->order, old_page_owner->free_pid, old_page_owner->free_tgid, old_page_owner->free_ts_nsec); @@ -400,14 +396,12 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old) * for the new one and the old folio otherwise there will be an imbalance * when subtracting those pages from the stack. */ - for (i = 0; i < (1 << new_page_owner->order); i++) { + rcu_read_lock(); + for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) { + old_page_owner = get_page_owner(page_ext); old_page_owner->handle = migrate_handle; - old_ext = page_ext_next(old_ext); - old_page_owner = get_page_owner(old_ext); } - - page_ext_put(new_ext); - page_ext_put(old_ext); + rcu_read_unlock(); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -813,7 +807,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) goto ext_put_continue; /* Found early allocated page */ - __update_page_owner_handle(page_ext, early_handle, 0, 0, + __update_page_owner_handle(page, early_handle, 0, 0, -1, local_clock(), current->pid, current->tgid, current->comm); count++; From f0e11a997ab438ce91a7dc9a6dd64c0c4a6af112 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Thu, 6 Mar 2025 15:21:31 +0800 Subject: [PATCH 1283/2157] mm/vmalloc: refactor __vmalloc_node_range_noprof() According to the code logic, the first parameter of the sub-function __get_vm_area_node() should be size instead of real_size. Then in __get_vm_area_node(), the size will be aligned, so the redundant alignment operation is deleted. The use of the real_size variable causes code redundancy, so it is removed to simplify the code. The real prefix is generally used to indicate the adjusted value of a parameter, but according to the code logic, it should indicate the original value, so it is recommended to rename it to original_align. Link: https://lkml.kernel.org/r/20250306072131.800499-1-liuye@kylinos.cn Signed-off-by: Liu Ye Reviewed-by: "Uladzislau Rezki (Sony)" Cc: Christop Hellwig Signed-off-by: Andrew Morton --- mm/vmalloc.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 61981ee1c9d2..3ed720a787ec 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3771,8 +3771,7 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, struct vm_struct *area; void *ret; kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE; - unsigned long real_size = size; - unsigned long real_align = align; + unsigned long original_align = align; unsigned int shift = PAGE_SHIFT; if (WARN_ON_ONCE(!size)) @@ -3781,7 +3780,7 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, if ((size >> PAGE_SHIFT) > totalram_pages()) { warn_alloc(gfp_mask, NULL, "vmalloc error: size %lu, exceeds total pages", - real_size); + size); return NULL; } @@ -3798,19 +3797,18 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, else shift = arch_vmap_pte_supported_shift(size); - align = max(real_align, 1UL << shift); - size = ALIGN(real_size, 1UL << shift); + align = max(original_align, 1UL << shift); } again: - area = __get_vm_area_node(real_size, align, shift, VM_ALLOC | + area = __get_vm_area_node(size, align, shift, VM_ALLOC | VM_UNINITIALIZED | vm_flags, start, end, node, gfp_mask, caller); if (!area) { bool nofail = gfp_mask & __GFP_NOFAIL; warn_alloc(gfp_mask, NULL, "vmalloc error: size %lu, vm_struct allocation failed%s", - real_size, (nofail) ? ". Retrying." : ""); + size, (nofail) ? ". Retrying." : ""); if (nofail) { schedule_timeout_uninterruptible(1); goto again; @@ -3860,7 +3858,7 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, (gfp_mask & __GFP_SKIP_ZERO)) kasan_flags |= KASAN_VMALLOC_INIT; /* KASAN_VMALLOC_PROT_NORMAL already set if required. */ - area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); + area->addr = kasan_unpoison_vmalloc(area->addr, size, kasan_flags); /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED @@ -3869,17 +3867,15 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, */ clear_vm_uninitialized_flag(area); - size = PAGE_ALIGN(size); if (!(vm_flags & VM_DEFER_KMEMLEAK)) - kmemleak_vmalloc(area, size, gfp_mask); + kmemleak_vmalloc(area, PAGE_ALIGN(size), gfp_mask); return area->addr; fail: if (shift > PAGE_SHIFT) { shift = PAGE_SHIFT; - align = real_align; - size = real_size; + align = original_align; goto again; } From d9a04a2615c0b8767a42dc26a8c26383e8513cdc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Mar 2025 09:31:42 -0500 Subject: [PATCH 1284/2157] mm: swap_cgroup: remove double initialization of locals Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") Signed-off-by: Johannes Weiner Reviewed-by: Muchun Song Cc: Chris Li Cc: Kairui Song Cc: Michal Hocko Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- mm/swap_cgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index 1007c30f12e2..de779fed8c21 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -92,8 +92,7 @@ void swap_cgroup_record(struct folio *folio, unsigned short id, */ unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) { - pgoff_t offset = swp_offset(ent); - pgoff_t end = offset + nr_ents; + pgoff_t offset, end; struct swap_cgroup *map; unsigned short old, iter = 0; From fa17ad58f8328e5c089377fed55ca8ed62f7cd1d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 26 Feb 2025 16:31:29 +0000 Subject: [PATCH 1285/2157] hugetlb: convert hugetlb_vma_maps_page() to hugetlb_vma_maps_pfn() pte_page() is more expensive than pte_pfn() (often it's defined as pfn_to_page(pte_pfn())), so it makes sense to do the conversion to pfn once (by calling folio_pfn()) rather than convert the pfn to a page each time. While this is a very small advantage, the main motivation is removing a reference to folio->page. Link: https://lkml.kernel.org/r/20250226163131.3795869-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0fc179a59830..a427d41fbca0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -338,8 +338,8 @@ static void hugetlb_delete_from_page_cache(struct folio *folio) * mutex for the page in the mapping. So, we can not race with page being * faulted into the vma. */ -static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, - unsigned long addr, struct page *page) +static bool hugetlb_vma_maps_pfn(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) { pte_t *ptep, pte; @@ -351,7 +351,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, if (huge_pte_none(pte) || !pte_present(pte)) return false; - if (pte_page(pte) == page) + if (pte_pfn(pte) == pfn) return true; return false; @@ -396,7 +396,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h, { struct rb_root_cached *root = &mapping->i_mmap; struct hugetlb_vma_lock *vma_lock; - struct page *page = &folio->page; + unsigned long pfn = folio_pfn(folio); struct vm_area_struct *vma; unsigned long v_start; unsigned long v_end; @@ -412,7 +412,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h, v_start = vma_offset_start(vma, start); v_end = vma_offset_end(vma, end); - if (!hugetlb_vma_maps_page(vma, v_start, page)) + if (!hugetlb_vma_maps_pfn(vma, v_start, pfn)) continue; if (!hugetlb_vma_trylock_write(vma)) { @@ -462,7 +462,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h, */ v_start = vma_offset_start(vma, start); v_end = vma_offset_end(vma, end); - if (hugetlb_vma_maps_page(vma, v_start, page)) + if (hugetlb_vma_maps_pfn(vma, v_start, pfn)) unmap_hugepage_range(vma, v_start, v_end, NULL, ZAP_FLAG_DROP_MARKER); From fcc09f5b56601e618c3dafc9fdd74882924d9143 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 26 Feb 2025 16:31:30 +0000 Subject: [PATCH 1286/2157] hugetlb: convert adjust_range_hwpoison() to take a folio Remove a use of folio->page by passing the folio into adjust_range_hwpoison(). We need to convert to a page eventually, but that can happen inside adjust_range_hwpoison(). Link: https://lkml.kernel.org/r/20250226163131.3795869-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a427d41fbca0..a6fcaf8317a0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -193,19 +193,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } /* - * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset. + * Someone wants to read @bytes from a HWPOISON hugetlb @folio from @offset. * Returns the maximum number of bytes one can read without touching the 1st raw - * HWPOISON subpage. + * HWPOISON page. * * The implementation borrows the iteration logic from copy_page_to_iter*. */ -static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes) +static size_t adjust_range_hwpoison(struct folio *folio, size_t offset, + size_t bytes) { + struct page *page; size_t n = 0; size_t res = 0; - /* First subpage to start the loop. */ - page = nth_page(page, offset / PAGE_SIZE); + /* First page to start the loop. */ + page = folio_page(folio, offset / PAGE_SIZE); offset %= PAGE_SIZE; while (1) { if (is_raw_hwpoison_page_in_hugepage(page)) @@ -278,10 +280,10 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) else { /* * Adjust how many bytes safe to read without - * touching the 1st raw HWPOISON subpage after + * touching the 1st raw HWPOISON page after * offset. */ - want = adjust_range_hwpoison(&folio->page, offset, nr); + want = adjust_range_hwpoison(folio, offset, nr); if (want == 0) { folio_put(folio); retval = -EIO; From 3fec86f8aa8c7ae84567a0d1396e84ada96141d8 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:54 -0500 Subject: [PATCH 1287/2157] xarray: add xas_try_split() to split a multi-index entry Patch series "Buddy allocator like (or non-uniform) folio split", v10. This patchset adds a new buddy allocator like (or non-uniform) large folio split from a order-n folio to order-m with m < n. It reduces 1. the total number of after-split folios from 2^(n-m) to n-m+1; 2. the amount of memory needed for multi-index xarray split from 2^(n/6-m/6) to n/6-m/6, assuming XA_CHUNK_SHIFT=6; 3. keep more large folios after a split from all order-m folios to order-(n-1) to order-m folios. For example, to split an order-9 to order-0, folio split generates 10 (or 11 for anonymous memory) folios instead of 512, allocates 1 xa_node instead of 8, and leaves 1 order-8, 1 order-7, ..., 1 order-1 and 2 order-0 folios (or 4 order-0 for anonymous memory) instead of 512 order-0 folios. Instead of duplicating existing split_huge_page*() code, __folio_split() is introduced as the shared backend code for both split_huge_page_to_list_to_order() and folio_split(). __folio_split() can support both uniform split and buddy allocator like (or non-uniform) split. All existing split_huge_page*() users can be gradually converted to use folio_split() if possible. In this patchset, I converted truncate_inode_partial_folio() to use folio_split(). xfstests quick group passed for both tmpfs and xfs. I also semi-replicated Hugh's test[12] and ran it without any issue for almost 24 hours. This patch (of 8): A preparation patch for non-uniform folio split, which always split a folio into half iteratively, and minimal xarray entry split. Currently, xas_split_alloc() and xas_split() always split all slots from a multi-index entry. They cost the same number of xa_node as the to-be-split slots. For example, to split an order-9 entry, which takes 2^(9-6)=8 slots, assuming XA_CHUNK_SHIFT is 6 (!CONFIG_BASE_SMALL), 8 xa_node are needed. Instead xas_try_split() is intended to be used iteratively to split the order-9 entry into 2 order-8 entries, then split one order-8 entry, based on the given index, to 2 order-7 entries, ..., and split one order-1 entry to 2 order-0 entries. When splitting the order-6 entry and a new xa_node is needed, xas_try_split() will try to allocate one if possible. As a result, xas_try_split() would only need 1 xa_node instead of 8. When a new xa_node is needed during the split, xas_try_split() can try to allocate one but no more. -ENOMEM will be return if a node cannot be allocated. -EINVAL will be return if a sibling node is split or cascade split happens, where two or more new nodes are needed, and these are not supported by xas_try_split(). xas_split_alloc() and xas_split() split an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | | | ------- --- --- ------- | | ... | | V V V V ----------- ----------- ----------- ----------- | xa_node | | xa_node | ... | xa_node | | xa_node | ----------- ----------- ----------- ----------- xas_try_split() splits an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | V ----------- | xa_node | ----------- Link: https://lkml.kernel.org/r/20250307174001.242794-1-ziy@nvidia.com Link: https://lkml.kernel.org/r/20250307174001.242794-2-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Miaohe Lin Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Zi Yan Cc: Kairui Song Signed-off-by: Andrew Morton --- Documentation/core-api/xarray.rst | 14 +++- include/linux/xarray.h | 6 ++ lib/test_xarray.c | 52 ++++++++++++ lib/xarray.c | 132 +++++++++++++++++++++++++++--- tools/testing/radix-tree/Makefile | 1 + 5 files changed, 192 insertions(+), 13 deletions(-) diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index f6a3eef4fe7f..c6c91cbd0c3c 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -489,7 +489,19 @@ Storing ``NULL`` into any index of a multi-index entry will set the entry at every index to ``NULL`` and dissolve the tie. A multi-index entry can be split into entries occupying smaller ranges by calling xas_split_alloc() without the xa_lock held, followed by taking the lock -and calling xas_split(). +and calling xas_split() or calling xas_try_split() with xa_lock. The +difference between xas_split_alloc()+xas_split() and xas_try_alloc() is +that xas_split_alloc() + xas_split() split the entry from the original +order to the new order in one shot uniformly, whereas xas_try_split() +iteratively splits the entry containing the index non-uniformly. +For example, to split an order-9 entry, which takes 2^(9-6)=8 slots, +assuming ``XA_CHUNK_SHIFT`` is 6, xas_split_alloc() + xas_split() need +8 xa_node. xas_try_split() splits the order-9 entry into +2 order-8 entries, then split one order-8 entry, based on the given index, +to 2 order-7 entries, ..., and split one order-1 entry to 2 order-0 entries. +When splitting the order-6 entry and a new xa_node is needed, xas_try_split() +will try to allocate one if possible. As a result, xas_try_split() would only +need 1 xa_node instead of 8. Functions and structures ======================== diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0b618ec04115..4010195201c9 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1555,6 +1555,7 @@ int xa_get_order(struct xarray *, unsigned long index); int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); +void xas_try_split(struct xa_state *xas, void *entry, unsigned int order); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { @@ -1576,6 +1577,11 @@ static inline void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { } + +static inline void xas_try_split(struct xa_state *xas, void *entry, + unsigned int order) +{ +} #endif /** diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 0e865bab4a10..080a39d22e73 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1858,6 +1858,54 @@ static void check_split_1(struct xarray *xa, unsigned long index, xa_destroy(xa); } +static void check_split_2(struct xarray *xa, unsigned long index, + unsigned int order, unsigned int new_order) +{ + XA_STATE_ORDER(xas, xa, index, new_order); + unsigned int i, found; + void *entry; + + xa_store_order(xa, index, order, xa, GFP_KERNEL); + xa_set_mark(xa, index, XA_MARK_1); + + /* allocate a node for xas_try_split() */ + xas_set_err(&xas, -ENOMEM); + XA_BUG_ON(xa, !xas_nomem(&xas, GFP_KERNEL)); + + xas_lock(&xas); + xas_try_split(&xas, xa, order); + if (((new_order / XA_CHUNK_SHIFT) < (order / XA_CHUNK_SHIFT)) && + new_order < order - 1) { + XA_BUG_ON(xa, !xas_error(&xas) || xas_error(&xas) != -EINVAL); + xas_unlock(&xas); + goto out; + } + for (i = 0; i < (1 << order); i += (1 << new_order)) + __xa_store(xa, index + i, xa_mk_index(index + i), 0); + xas_unlock(&xas); + + for (i = 0; i < (1 << order); i++) { + unsigned int val = index + (i & ~((1 << new_order) - 1)); + XA_BUG_ON(xa, xa_load(xa, index + i) != xa_mk_index(val)); + } + + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + + xas_set_order(&xas, index, 0); + found = 0; + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_1) { + found++; + XA_BUG_ON(xa, xa_is_internal(entry)); + } + rcu_read_unlock(); + XA_BUG_ON(xa, found != 1 << (order - new_order)); +out: + xas_destroy(&xas); + xa_destroy(xa); +} + static noinline void check_split(struct xarray *xa) { unsigned int order, new_order; @@ -1869,6 +1917,10 @@ static noinline void check_split(struct xarray *xa) check_split_1(xa, 0, order, new_order); check_split_1(xa, 1UL << order, order, new_order); check_split_1(xa, 3UL << order, order, new_order); + + check_split_2(xa, 0, order, new_order); + check_split_2(xa, 1UL << order, order, new_order); + check_split_2(xa, 3UL << order, order, new_order); } } } diff --git a/lib/xarray.c b/lib/xarray.c index 116e9286c64e..3bae48558e21 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -278,6 +278,7 @@ void xas_destroy(struct xa_state *xas) xas->xa_alloc = node = next; } } +EXPORT_SYMBOL_GPL(xas_destroy); /** * xas_nomem() - Allocate memory if needed. @@ -1007,6 +1008,26 @@ static void node_set_marks(struct xa_node *node, unsigned int offset, } } +static void __xas_init_node_for_split(struct xa_state *xas, + struct xa_node *node, void *entry) +{ + unsigned int i; + void *sibling = NULL; + unsigned int mask = xas->xa_sibs; + + if (!node) + return; + node->array = xas->xa; + for (i = 0; i < XA_CHUNK_SIZE; i++) { + if ((i & mask) == 0) { + RCU_INIT_POINTER(node->slots[i], entry); + sibling = xa_mk_sibling(i); + } else { + RCU_INIT_POINTER(node->slots[i], sibling); + } + } +} + /** * xas_split_alloc() - Allocate memory for splitting an entry. * @xas: XArray operation state. @@ -1025,7 +1046,6 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; - unsigned int mask = xas->xa_sibs; /* XXX: no support for splitting really large entries yet */ if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT <= order)) @@ -1034,22 +1054,13 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, return; do { - unsigned int i; - void *sibling = NULL; struct xa_node *node; node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) goto nomem; - node->array = xas->xa; - for (i = 0; i < XA_CHUNK_SIZE; i++) { - if ((i & mask) == 0) { - RCU_INIT_POINTER(node->slots[i], entry); - sibling = xa_mk_sibling(i); - } else { - RCU_INIT_POINTER(node->slots[i], sibling); - } - } + + __xas_init_node_for_split(xas, node, entry); RCU_INIT_POINTER(node->parent, xas->xa_alloc); xas->xa_alloc = node; } while (sibs-- > 0); @@ -1122,6 +1133,103 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); + +/** + * xas_try_split() - Try to split a multi-index entry. + * @xas: XArray operation state. + * @entry: New entry to store in the array. + * @order: Current entry order. + * + * The size of the new entries is set in @xas. The value in @entry is + * copied to all the replacement entries. If and only if one new xa_node is + * needed, the function will use GFP_NOWAIT to get one if xas->xa_alloc is + * NULL. If more new xa_node are needed, the function gives EINVAL error. + * + * Context: Any context. The caller should hold the xa_lock. + */ +void xas_try_split(struct xa_state *xas, void *entry, unsigned int order) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int offset, marks; + struct xa_node *node; + void *curr = xas_load(xas); + int values = 0; + gfp_t gfp = GFP_NOWAIT; + + node = xas->xa_node; + if (xas_top(node)) + return; + + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; + + marks = node_get_marks(node, xas->xa_offset); + + offset = xas->xa_offset + sibs; + + if (xas->xa_shift < node->shift) { + struct xa_node *child = xas->xa_alloc; + unsigned int expected_sibs = + (1 << ((order - 1) % XA_CHUNK_SHIFT)) - 1; + + /* + * No support for splitting sibling entries + * (horizontally) or cascade split (vertically), which + * requires two or more new xa_nodes. + * Since if one xa_node allocation fails, + * it is hard to free the prior allocations. + */ + if (sibs || xas->xa_sibs != expected_sibs) { + xas_destroy(xas); + xas_set_err(xas, -EINVAL); + return; + } + + if (!child) { + child = kmem_cache_alloc_lru(radix_tree_node_cachep, + xas->xa_lru, gfp); + if (!child) { + xas_destroy(xas); + xas_set_err(xas, -ENOMEM); + return; + } + RCU_INIT_POINTER(child->parent, xas->xa_alloc); + } + __xas_init_node_for_split(xas, child, entry); + + xas->xa_alloc = rcu_dereference_raw(child->parent); + child->shift = node->shift - XA_CHUNK_SHIFT; + child->offset = offset; + child->count = XA_CHUNK_SIZE; + child->nr_values = xa_is_value(entry) ? + XA_CHUNK_SIZE : 0; + RCU_INIT_POINTER(child->parent, node); + node_set_marks(node, offset, child, xas->xa_sibs, + marks); + rcu_assign_pointer(node->slots[offset], + xa_mk_node(child)); + if (xa_is_value(curr)) + values--; + xas_update(xas, child); + + } else { + do { + unsigned int canon = offset - xas->xa_sibs; + + node_set_marks(node, canon, NULL, 0, marks); + rcu_assign_pointer(node->slots[canon], entry); + while (offset > canon) + rcu_assign_pointer(node->slots[offset--], + xa_mk_sibling(canon)); + values += (xa_is_value(entry) - xa_is_value(curr)) * + (xas->xa_sibs + 1); + } while (offset-- > xas->xa_offset); + } + + node->nr_values += values; + xas_update(xas, node); +} +EXPORT_SYMBOL_GPL(xas_try_split); #endif /** diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 8b3591a51e1f..b2a6660bbd92 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -14,6 +14,7 @@ include ../shared/shared.mk main: $(OFILES) +xarray.o: ../../../lib/test_xarray.c idr-test.o: ../../../lib/test_ida.c idr-test: idr-test.o $(CORE_OFILES) From 00527733d0dc806a72bb9a56cfbd6c44d5f74872 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:55 -0500 Subject: [PATCH 1288/2157] mm/huge_memory: add two new (not yet used) functions for folio_split() This is a preparation patch, both added functions are not used yet. The added __split_unmapped_folio() is able to split a folio with its mapping removed in two manners: 1) uniform split (the existing way), and 2) buddy allocator like (or non-uniform) split. The added __split_folio_to_order() can split a folio into any lower order. For uniform split, __split_unmapped_folio() calls it once to split the given folio to the new order. For buddy allocator like (non-uniform) split, __split_unmapped_folio() calls it (folio_order - new_order) times and each time splits the folio containing the given page to one lower order. [ziy@nvidia.com: unfreeze head folio after page cache entries are updated] Link: https://lkml.kernel.org/r/0F15DA7F-1977-412F-9A3E-F06B515D4BD2@nvidia.com [ziy@nvidia.com: use NULL instead of 0 for folio->private assignment] Link: https://lkml.kernel.org/r/1E11B9DD-3A87-4C9C-8FB4-E1324FB6A21A@nvidia.com Link: https://lkml.kernel.org/r/20250307174001.242794-3-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/huge_memory.c | 354 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 353 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 80cf15116ce7..c5661104ecca 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3265,7 +3265,6 @@ static void remap_page(struct folio *folio, unsigned long nr, int flags) static void lru_add_page_tail(struct folio *folio, struct page *tail, struct lruvec *lruvec, struct list_head *list) { - VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); VM_BUG_ON_FOLIO(PageLRU(tail), folio); lockdep_assert_held(&lruvec->lru_lock); @@ -3517,6 +3516,359 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) caller_pins; } +/* + * It splits @folio into @new_order folios and copies the @folio metadata to + * all the resulting folios. + */ +static void __split_folio_to_order(struct folio *folio, int old_order, + int new_order) +{ + long new_nr_pages = 1 << new_order; + long nr_pages = 1 << old_order; + long i; + + /* + * Skip the first new_nr_pages, since the new folio from them have all + * the flags from the original folio. + */ + for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { + struct page *new_head = &folio->page + i; + + /* + * Careful: new_folio is not a "real" folio before we cleared PageTail. + * Don't pass it around before clear_compound_head(). + */ + struct folio *new_folio = (struct folio *)new_head; + + VM_BUG_ON_PAGE(atomic_read(&new_folio->_mapcount) != -1, new_head); + + /* + * Clone page flags before unfreezing refcount. + * + * After successful get_page_unless_zero() might follow flags change, + * for example lock_page() which set PG_waiters. + * + * Note that for mapped sub-pages of an anonymous THP, + * PG_anon_exclusive has been cleared in unmap_folio() and is stored in + * the migration entry instead from where remap_page() will restore it. + * We can still have PG_anon_exclusive set on effectively unmapped and + * unreferenced sub-pages of an anonymous THP: we can simply drop + * PG_anon_exclusive (-> PG_mappedtodisk) for these here. + */ + new_folio->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; + new_folio->flags |= (folio->flags & + ((1L << PG_referenced) | + (1L << PG_swapbacked) | + (1L << PG_swapcache) | + (1L << PG_mlocked) | + (1L << PG_uptodate) | + (1L << PG_active) | + (1L << PG_workingset) | + (1L << PG_locked) | + (1L << PG_unevictable) | +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 + (1L << PG_arch_2) | +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 + (1L << PG_arch_3) | +#endif + (1L << PG_dirty) | + LRU_GEN_MASK | LRU_REFS_MASK)); + + new_folio->mapping = folio->mapping; + new_folio->index = folio->index + i; + + /* + * page->private should not be set in tail pages. Fix up and warn once + * if private is unexpectedly set. + */ + if (unlikely(new_folio->private)) { + VM_WARN_ON_ONCE_PAGE(true, new_head); + new_folio->private = NULL; + } + + if (folio_test_swapcache(folio)) + new_folio->swap.val = folio->swap.val + i; + + /* Page flags must be visible before we make the page non-compound. */ + smp_wmb(); + + /* + * Clear PageTail before unfreezing page refcount. + * + * After successful get_page_unless_zero() might follow put_page() + * which needs correct compound_head(). + */ + clear_compound_head(new_head); + if (new_order) { + prep_compound_page(new_head, new_order); + folio_set_large_rmappable(new_folio); + } + + if (folio_test_young(folio)) + folio_set_young(new_folio); + if (folio_test_idle(folio)) + folio_set_idle(new_folio); + + folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio)); + } + + if (new_order) + folio_set_order(folio, new_order); + else + ClearPageCompound(&folio->page); +} + +/* + * It splits an unmapped @folio to lower order smaller folios in two ways. + * @folio: the to-be-split folio + * @new_order: the smallest order of the after split folios (since buddy + * allocator like split generates folios with orders from @folio's + * order - 1 to new_order). + * @split_at: in buddy allocator like split, the folio containing @split_at + * will be split until its order becomes @new_order. + * @lock_at: the folio containing @lock_at is left locked for caller. + * @list: the after split folios will be added to @list if it is not NULL, + * otherwise to LRU lists. + * @end: the end of the file @folio maps to. -1 if @folio is anonymous memory. + * @xas: xa_state pointing to folio->mapping->i_pages and locked by caller + * @mapping: @folio->mapping + * @uniform_split: if the split is uniform or not (buddy allocator like split) + * + * + * 1. uniform split: the given @folio into multiple @new_order small folios, + * where all small folios have the same order. This is done when + * uniform_split is true. + * 2. buddy allocator like (non-uniform) split: the given @folio is split into + * half and one of the half (containing the given page) is split into half + * until the given @page's order becomes @new_order. This is done when + * uniform_split is false. + * + * The high level flow for these two methods are: + * 1. uniform split: a single __split_folio_to_order() is called to split the + * @folio into @new_order, then we traverse all the resulting folios one by + * one in PFN ascending order and perform stats, unfreeze, adding to list, + * and file mapping index operations. + * 2. non-uniform split: in general, folio_order - @new_order calls to + * __split_folio_to_order() are made in a for loop to split the @folio + * to one lower order at a time. The resulting small folios are processed + * like what is done during the traversal in 1, except the one containing + * @page, which is split in next for loop. + * + * After splitting, the caller's folio reference will be transferred to the + * folio containing @page. The other folios may be freed if they are not mapped. + * + * In terms of locking, after splitting, + * 1. uniform split leaves @page (or the folio contains it) locked; + * 2. buddy allocator like (non-uniform) split leaves @folio locked. + * + * + * For !uniform_split, when -ENOMEM is returned, the original folio might be + * split. The caller needs to check the input folio. + */ +static int __split_unmapped_folio(struct folio *folio, int new_order, + struct page *split_at, struct page *lock_at, + struct list_head *list, pgoff_t end, + struct xa_state *xas, struct address_space *mapping, + bool uniform_split) +{ + struct lruvec *lruvec; + struct address_space *swap_cache = NULL; + struct folio *origin_folio = folio; + struct folio *next_folio = folio_next(folio); + struct folio *new_folio; + struct folio *next; + int order = folio_order(folio); + int split_order; + int start_order = uniform_split ? new_order : order - 1; + int nr_dropped = 0; + int ret = 0; + bool stop_split = false; + + if (folio_test_swapcache(folio)) { + VM_BUG_ON(mapping); + + /* a swapcache folio can only be uniformly split to order-0 */ + if (!uniform_split || new_order != 0) + return -EINVAL; + + swap_cache = swap_address_space(folio->swap); + xa_lock(&swap_cache->i_pages); + } + + if (folio_test_anon(folio)) + mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); + + /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ + lruvec = folio_lruvec_lock(folio); + + folio_clear_has_hwpoisoned(folio); + + /* + * split to new_order one order at a time. For uniform split, + * folio is split to new_order directly. + */ + for (split_order = start_order; + split_order >= new_order && !stop_split; + split_order--) { + int old_order = folio_order(folio); + struct folio *release; + struct folio *end_folio = folio_next(folio); + + /* order-1 anonymous folio is not supported */ + if (folio_test_anon(folio) && split_order == 1) + continue; + if (uniform_split && split_order != new_order) + continue; + + if (mapping) { + /* + * uniform split has xas_split_alloc() called before + * irq is disabled to allocate enough memory, whereas + * non-uniform split can handle ENOMEM. + */ + if (uniform_split) + xas_split(xas, folio, old_order); + else { + xas_set_order(xas, folio->index, split_order); + xas_try_split(xas, folio, old_order); + if (xas_error(xas)) { + ret = xas_error(xas); + stop_split = true; + goto after_split; + } + } + } + + /* + * Reset any memcg data overlay in the tail pages. + * folio_nr_pages() is unreliable until prep_compound_page() + * was called again. + */ +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif + + + /* complete memcg works before add pages to LRU */ + split_page_memcg(&folio->page, old_order, split_order); + split_page_owner(&folio->page, old_order, split_order); + pgalloc_tag_split(folio, old_order, split_order); + + __split_folio_to_order(folio, old_order, split_order); + +after_split: + /* + * Iterate through after-split folios and perform related + * operations. But in buddy allocator like split, the folio + * containing the specified page is skipped until its order + * is new_order, since the folio will be worked on in next + * iteration. + */ + for (release = folio; release != end_folio; release = next) { + next = folio_next(release); + /* + * for buddy allocator like split, the folio containing + * page will be split next and should not be released, + * until the folio's order is new_order or stop_split + * is set to true by the above xas_split() failure. + */ + if (release == page_folio(split_at)) { + folio = release; + if (split_order != new_order && !stop_split) + continue; + } + if (folio_test_anon(release)) { + mod_mthp_stat(folio_order(release), + MTHP_STAT_NR_ANON, 1); + } + + /* + * origin_folio should be kept frozon until page cache + * entries are updated with all the other after-split + * folios to prevent others seeing stale page cache + * entries. + */ + if (release == origin_folio) + continue; + + folio_ref_unfreeze(release, 1 + + ((mapping || swap_cache) ? + folio_nr_pages(release) : 0)); + + lru_add_page_tail(origin_folio, &release->page, + lruvec, list); + + /* Some pages can be beyond EOF: drop them from cache */ + if (release->index >= end) { + if (shmem_mapping(mapping)) + nr_dropped += folio_nr_pages(release); + else if (folio_test_clear_dirty(release)) + folio_account_cleaned(release, + inode_to_wb(mapping->host)); + __filemap_remove_folio(release, NULL); + folio_put_refs(release, folio_nr_pages(release)); + } else if (mapping) { + __xa_store(&mapping->i_pages, + release->index, release, 0); + } else if (swap_cache) { + __xa_store(&swap_cache->i_pages, + swap_cache_index(release->swap), + release, 0); + } + } + } + + /* + * Unfreeze origin_folio only after all page cache entries, which used + * to point to it, have been updated with new folios. Otherwise, + * a parallel folio_try_get() can grab origin_folio and its caller can + * see stale page cache entries. + */ + folio_ref_unfreeze(origin_folio, 1 + + ((mapping || swap_cache) ? folio_nr_pages(origin_folio) : 0)); + + unlock_page_lruvec(lruvec); + + if (swap_cache) + xa_unlock(&swap_cache->i_pages); + if (mapping) + xa_unlock(&mapping->i_pages); + + /* Caller disabled irqs, so they are still disabled here */ + local_irq_enable(); + + if (nr_dropped) + shmem_uncharge(mapping->host, nr_dropped); + + remap_page(origin_folio, 1 << order, + folio_test_anon(origin_folio) ? + RMP_USE_SHARED_ZEROPAGE : 0); + + /* + * At this point, folio should contain the specified page. + * For uniform split, it is left for caller to unlock. + * For buddy allocator like split, the first after-split folio is left + * for caller to unlock. + */ + for (new_folio = origin_folio; new_folio != next_folio; new_folio = next) { + next = folio_next(new_folio); + if (new_folio == page_folio(lock_at)) + continue; + + folio_unlock(new_folio); + /* + * Subpages may be freed if there wasn't any mapping + * like if add_to_swap() is running on a lru page that + * had its mapping zapped. And freeing these pages + * requires taking the lru_lock so we do the put_page + * of the tail pages after the split is complete. + */ + free_page_and_swap_cache(&new_folio->page); + } + return ret; +} + /* * This function splits a large folio into smaller folios of order @new_order. * @page can point to any page of the large folio to split. The split operation From 6384dd1d18de7b84bc346981419e63a5fa72ced4 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:56 -0500 Subject: [PATCH 1289/2157] mm/huge_memory: move folio split common code to __folio_split() This is a preparation patch for folio_split(). In the upcoming patch folio_split() will share folio unmapping and remapping code with split_huge_page_to_list_to_order(), so move the code to a common function __folio_split() first. Add a TODO for splitting large shmem folio in swap cache. Link: https://lkml.kernel.org/r/20250307174001.242794-4-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/huge_memory.c | 112 ++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c5661104ecca..768c5a6662ae 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3869,57 +3869,9 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return ret; } -/* - * This function splits a large folio into smaller folios of order @new_order. - * @page can point to any page of the large folio to split. The split operation - * does not change the position of @page. - * - * Prerequisites: - * - * 1) The caller must hold a reference on the @page's owning folio, also known - * as the large folio. - * - * 2) The large folio must be locked. - * - * 3) The folio must not be pinned. Any unexpected folio references, including - * GUP pins, will result in the folio not getting split; instead, the caller - * will receive an -EAGAIN. - * - * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not - * supported for non-file-backed folios, because folio->_deferred_list, which - * is used by partially mapped folios, is stored in subpage 2, but an order-1 - * folio only has subpages 0 and 1. File-backed order-1 folios are supported, - * since they do not use _deferred_list. - * - * After splitting, the caller's folio reference will be transferred to @page, - * resulting in a raised refcount of @page after this call. The other pages may - * be freed if they are not mapped. - * - * If @list is null, tail pages will be added to LRU list, otherwise, to @list. - * - * Pages in @new_order will inherit the mapping, flags, and so on from the - * huge page. - * - * Returns 0 if the huge page was split successfully. - * - * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP) or if - * the folio was concurrently removed from the page cache. - * - * Returns -EBUSY when trying to split the huge zeropage, if the folio is - * under writeback, if fs-specific folio metadata cannot currently be - * released, or if some unexpected race happened (e.g., anon VMA disappeared, - * truncation). - * - * Callers should ensure that the order respects the address space mapping - * min-order if one is set for non-anonymous folios. - * - * Returns -EINVAL when trying to split to an order that is incompatible - * with the folio. Splitting to order 0 is compatible with all folios. - */ -int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, - unsigned int new_order) +static int __folio_split(struct folio *folio, unsigned int new_order, + struct page *page, struct list_head *list) { - struct folio *folio = page_folio(page); struct deferred_split *ds_queue = get_deferred_split_queue(folio); /* reset xarray order to new order after split */ XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order); @@ -3995,6 +3947,11 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, mapping = folio->mapping; /* Truncated ? */ + /* + * TODO: add support for large shmem folio in swap cache. + * When shmem is in swap cache, mapping is NULL and + * folio_test_swapcache() is true. + */ if (!mapping) { ret = -EBUSY; goto out; @@ -4129,6 +4086,61 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, return ret; } +/* + * This function splits a large folio into smaller folios of order @new_order. + * @page can point to any page of the large folio to split. The split operation + * does not change the position of @page. + * + * Prerequisites: + * + * 1) The caller must hold a reference on the @page's owning folio, also known + * as the large folio. + * + * 2) The large folio must be locked. + * + * 3) The folio must not be pinned. Any unexpected folio references, including + * GUP pins, will result in the folio not getting split; instead, the caller + * will receive an -EAGAIN. + * + * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not + * supported for non-file-backed folios, because folio->_deferred_list, which + * is used by partially mapped folios, is stored in subpage 2, but an order-1 + * folio only has subpages 0 and 1. File-backed order-1 folios are supported, + * since they do not use _deferred_list. + * + * After splitting, the caller's folio reference will be transferred to @page, + * resulting in a raised refcount of @page after this call. The other pages may + * be freed if they are not mapped. + * + * If @list is null, tail pages will be added to LRU list, otherwise, to @list. + * + * Pages in @new_order will inherit the mapping, flags, and so on from the + * huge page. + * + * Returns 0 if the huge page was split successfully. + * + * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP) or if + * the folio was concurrently removed from the page cache. + * + * Returns -EBUSY when trying to split the huge zeropage, if the folio is + * under writeback, if fs-specific folio metadata cannot currently be + * released, or if some unexpected race happened (e.g., anon VMA disappeared, + * truncation). + * + * Callers should ensure that the order respects the address space mapping + * min-order if one is set for non-anonymous folios. + * + * Returns -EINVAL when trying to split to an order that is incompatible + * with the folio. Splitting to order 0 is compatible with all folios. + */ +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) +{ + struct folio *folio = page_folio(page); + + return __folio_split(folio, new_order, page, list); +} + int min_order_for_split(struct folio *folio) { if (folio_test_anon(folio)) From 58729c04cf1092b87aeef0bf0998c9e2e4771133 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:57 -0500 Subject: [PATCH 1290/2157] mm/huge_memory: add buddy allocator like (non-uniform) folio_split() folio_split() splits a large folio in the same way as buddy allocator splits a large free page for allocation. The purpose is to minimize the number of folios after the split. For example, if user wants to free the 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio as: O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon, since anon folio does not support order-1 yet. ----------------------------------------------------------------- | | | | | | | | | |O-0|O-0|O-0|O-0| O-2 |...| O-7 | O-8 | | | | | | | | | | ----------------------------------------------------------------- O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache --------------------------------------------------------------- | | | | | | | | | O-1 |O-0|O-0| O-2 |...| O-7 | O-8 | | | | | | | | | --------------------------------------------------------------- It generates fewer folios (i.e., 11 or 10) than existing page split approach, which splits the order-9 to 512 order-0 folios. It also reduces the number of new xa_node needed during a pagecache folio split from 8 to 1, potentially decreasing the folio split failure rate due to memory constraints. folio_split() and existing split_huge_page_to_list_to_order() share the folio unmapping and remapping code in __folio_split() and the common backend split code in __split_unmapped_folio() using uniform_split variable to distinguish their operations. uniform_split_supported() and non_uniform_split_supported() are added to factor out check code and will be used outside __folio_split() in the following commit. Link: https://lkml.kernel.org/r/20250307174001.242794-5-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/huge_memory.c | 170 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 42 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 768c5a6662ae..6322fc138d92 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3869,12 +3869,85 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return ret; } +static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns) +{ + if (folio_test_anon(folio)) { + /* order-1 is not supported for anonymous THP. */ + VM_WARN_ONCE(warns && new_order == 1, + "Cannot split to order-1 folio"); + return new_order != 1; + } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + !mapping_large_folio_support(folio->mapping)) { + /* + * No split if the file system does not support large folio. + * Note that we might still have THPs in such mappings due to + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping + * does not actually support large folios properly. + */ + VM_WARN_ONCE(warns, + "Cannot split file folio to non-0 order"); + return false; + } + + /* Only swapping a whole PMD-mapped folio is supported */ + if (folio_test_swapcache(folio)) { + VM_WARN_ONCE(warns, + "Cannot split swapcache folio to non-0 order"); + return false; + } + + return true; +} + +/* See comments in non_uniform_split_supported() */ +static bool uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns) +{ + if (folio_test_anon(folio)) { + VM_WARN_ONCE(warns && new_order == 1, + "Cannot split to order-1 folio"); + return new_order != 1; + } else if (new_order) { + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + !mapping_large_folio_support(folio->mapping)) { + VM_WARN_ONCE(warns, + "Cannot split file folio to non-0 order"); + return false; + } + } + + if (new_order && folio_test_swapcache(folio)) { + VM_WARN_ONCE(warns, + "Cannot split swapcache folio to non-0 order"); + return false; + } + + return true; +} + +/* + * __folio_split: split a folio at @split_at to a @new_order folio + * @folio: folio to split + * @new_order: the order of the new folio + * @split_at: a page within the new folio + * @lock_at: a page within @folio to be left locked to caller + * @list: after-split folios will be put on it if non NULL + * @uniform_split: perform uniform split or not (non-uniform split) + * + * It calls __split_unmapped_folio() to perform uniform and non-uniform split. + * It is in charge of checking whether the split is supported or not and + * preparing @folio for __split_unmapped_folio(). + * + * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be + * split but not to @new_order, the caller needs to check) + */ static int __folio_split(struct folio *folio, unsigned int new_order, - struct page *page, struct list_head *list) + struct page *split_at, struct page *lock_at, + struct list_head *list, bool uniform_split) { struct deferred_split *ds_queue = get_deferred_split_queue(folio); - /* reset xarray order to new order after split */ - XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order); + XA_STATE(xas, &folio->mapping->i_pages, folio->index); bool is_anon = folio_test_anon(folio); struct address_space *mapping = NULL; struct anon_vma *anon_vma = NULL; @@ -3886,32 +3959,17 @@ static int __folio_split(struct folio *folio, unsigned int new_order, VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + if (folio != page_folio(split_at) || folio != page_folio(lock_at)) + return -EINVAL; + if (new_order >= folio_order(folio)) return -EINVAL; - if (is_anon) { - /* order-1 is not supported for anonymous THP. */ - if (new_order == 1) { - VM_WARN_ONCE(1, "Cannot split to order-1 folio"); - return -EINVAL; - } - } else if (new_order) { - /* - * No split if the file system does not support large folio. - * Note that we might still have THPs in such mappings due to - * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping - * does not actually support large folios properly. - */ - if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && - !mapping_large_folio_support(folio->mapping)) { - VM_WARN_ONCE(1, - "Cannot split file folio to non-0 order"); - return -EINVAL; - } - } + if (uniform_split && !uniform_split_supported(folio, new_order, true)) + return -EINVAL; - /* Only swapping a whole PMD-mapped folio is supported */ - if (folio_test_swapcache(folio) && new_order) + if (!uniform_split && + !non_uniform_split_supported(folio, new_order, true)) return -EINVAL; is_hzp = is_huge_zero_folio(folio); @@ -3973,21 +4031,24 @@ static int __folio_split(struct folio *folio, unsigned int new_order, goto out; } - xas_split_alloc(&xas, folio, folio_order(folio), gfp); - if (xas_error(&xas)) { - ret = xas_error(&xas); - goto out; + if (uniform_split) { + xas_set_order(&xas, folio->index, new_order); + xas_split_alloc(&xas, folio, folio_order(folio), gfp); + if (xas_error(&xas)) { + ret = xas_error(&xas); + goto out; + } } anon_vma = NULL; i_mmap_lock_read(mapping); /* - *__split_huge_page() may need to trim off pages beyond EOF: - * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, - * which cannot be nested inside the page tree lock. So note - * end now: i_size itself may be changed at any moment, but - * folio lock is good enough to serialize the trimming. + *__split_unmapped_folio() may need to trim off pages beyond + * EOF: but on 32-bit, i_size_read() takes an irq-unsafe + * seqlock, which cannot be nested inside the page tree lock. + * So note end now: i_size itself may be changed at any moment, + * but folio lock is good enough to serialize the trimming. */ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); if (shmem_mapping(mapping)) @@ -4041,7 +4102,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, if (mapping) { int nr = folio_nr_pages(folio); - xas_split(&xas, folio, folio_order(folio)); if (folio_test_pmd_mappable(folio) && new_order < HPAGE_PMD_ORDER) { if (folio_test_swapbacked(folio)) { @@ -4055,12 +4115,9 @@ static int __folio_split(struct folio *folio, unsigned int new_order, } } - if (is_anon) { - mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); - mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order)); - } - __split_huge_page(page, list, end, new_order); - ret = 0; + ret = __split_unmapped_folio(folio, new_order, + split_at, lock_at, list, end, &xas, mapping, + uniform_split); } else { spin_unlock(&ds_queue->split_queue_lock); fail: @@ -4138,7 +4195,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, { struct folio *folio = page_folio(page); - return __folio_split(folio, new_order, page, list); + return __folio_split(folio, new_order, &folio->page, page, list, true); +} + +/* + * folio_split: split a folio at @split_at to a @new_order folio + * @folio: folio to split + * @new_order: the order of the new folio + * @split_at: a page within the new folio + * + * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be + * split but not to @new_order, the caller needs to check) + * + * It has the same prerequisites and returns as + * split_huge_page_to_list_to_order(). + * + * Split a folio at @split_at to a new_order folio, leave the + * remaining subpages of the original folio as large as possible. For example, + * in the case of splitting an order-9 folio at its third order-3 subpages to + * an order-3 folio, there are 2^(9-3)=64 order-3 subpages in the order-9 folio. + * After the split, there will be a group of folios with different orders and + * the new folio containing @split_at is marked in bracket: + * [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8]. + * + * After split, folio is left locked for caller. + */ +static int folio_split(struct folio *folio, unsigned int new_order, + struct page *split_at, struct list_head *list) +{ + return __folio_split(folio, new_order, split_at, &folio->page, list, + false); } int min_order_for_split(struct folio *folio) From 1f43d5aa24b2741d9bf68b0dc2c5b10e87dc60a9 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:58 -0500 Subject: [PATCH 1291/2157] mm/huge_memory: remove the old, unused __split_huge_page() Now split_huge_page_to_list_to_order() uses the new backend split code in __split_unmapped_folio(), the old __split_huge_page() and __split_huge_page_tail() can be removed. Link: https://lkml.kernel.org/r/20250307174001.242794-6-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/huge_memory.c | 215 ----------------------------------------------- 1 file changed, 215 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6322fc138d92..995ed685c241 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3284,221 +3284,6 @@ static void lru_add_page_tail(struct folio *folio, struct page *tail, } } -static void __split_huge_page_tail(struct folio *folio, int tail, - struct lruvec *lruvec, struct list_head *list, - unsigned int new_order) -{ - struct page *head = &folio->page; - struct page *page_tail = head + tail; - /* - * Careful: new_folio is not a "real" folio before we cleared PageTail. - * Don't pass it around before clear_compound_head(). - */ - struct folio *new_folio = (struct folio *)page_tail; - - VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); - - /* - * Clone page flags before unfreezing refcount. - * - * After successful get_page_unless_zero() might follow flags change, - * for example lock_page() which set PG_waiters. - * - * Note that for mapped sub-pages of an anonymous THP, - * PG_anon_exclusive has been cleared in unmap_folio() and is stored in - * the migration entry instead from where remap_page() will restore it. - * We can still have PG_anon_exclusive set on effectively unmapped and - * unreferenced sub-pages of an anonymous THP: we can simply drop - * PG_anon_exclusive (-> PG_mappedtodisk) for these here. - */ - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; - page_tail->flags |= (head->flags & - ((1L << PG_referenced) | - (1L << PG_swapbacked) | - (1L << PG_swapcache) | - (1L << PG_mlocked) | - (1L << PG_uptodate) | - (1L << PG_active) | - (1L << PG_workingset) | - (1L << PG_locked) | - (1L << PG_unevictable) | -#ifdef CONFIG_ARCH_USES_PG_ARCH_2 - (1L << PG_arch_2) | -#endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_3 - (1L << PG_arch_3) | -#endif - (1L << PG_dirty) | - LRU_GEN_MASK | LRU_REFS_MASK)); - - /* ->mapping in first and second tail page is replaced by other uses */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); - new_folio->mapping = folio->mapping; - new_folio->index = folio->index + tail; - - /* - * page->private should not be set in tail pages. Fix up and warn once - * if private is unexpectedly set. - */ - if (unlikely(page_tail->private)) { - VM_WARN_ON_ONCE_PAGE(true, page_tail); - page_tail->private = 0; - } - if (folio_test_swapcache(folio)) - new_folio->swap.val = folio->swap.val + tail; - - /* Page flags must be visible before we make the page non-compound. */ - smp_wmb(); - - /* - * Clear PageTail before unfreezing page refcount. - * - * After successful get_page_unless_zero() might follow put_page() - * which needs correct compound_head(). - */ - clear_compound_head(page_tail); - if (new_order) { - prep_compound_page(page_tail, new_order); - folio_set_large_rmappable(new_folio); - } - - /* Finally unfreeze refcount. Additional reference from page cache. */ - page_ref_unfreeze(page_tail, - 1 + ((!folio_test_anon(folio) || folio_test_swapcache(folio)) ? - folio_nr_pages(new_folio) : 0)); - - if (folio_test_young(folio)) - folio_set_young(new_folio); - if (folio_test_idle(folio)) - folio_set_idle(new_folio); - - folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio)); - - /* - * always add to the tail because some iterators expect new - * pages to show after the currently processed elements - e.g. - * migrate_pages - */ - lru_add_page_tail(folio, page_tail, lruvec, list); -} - -static void __split_huge_page(struct page *page, struct list_head *list, - pgoff_t end, unsigned int new_order) -{ - struct folio *folio = page_folio(page); - struct page *head = &folio->page; - struct lruvec *lruvec; - struct address_space *swap_cache = NULL; - unsigned long offset = 0; - int i, nr_dropped = 0; - unsigned int new_nr = 1 << new_order; - int order = folio_order(folio); - unsigned int nr = 1 << order; - - /* - * Reset any memcg data overlay in the tail pages. folio_nr_pages() - * is unreliable after this point. - */ -#ifdef NR_PAGES_IN_LARGE_FOLIO - folio->_nr_pages = 0; -#endif - - /* complete memcg works before add pages to LRU */ - split_page_memcg(head, order, new_order); - - if (folio_test_anon(folio) && folio_test_swapcache(folio)) { - offset = swap_cache_index(folio->swap); - swap_cache = swap_address_space(folio->swap); - xa_lock(&swap_cache->i_pages); - } - - /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ - lruvec = folio_lruvec_lock(folio); - - folio_clear_has_hwpoisoned(folio); - - for (i = nr - new_nr; i >= new_nr; i -= new_nr) { - struct folio *tail; - __split_huge_page_tail(folio, i, lruvec, list, new_order); - tail = page_folio(head + i); - /* Some pages can be beyond EOF: drop them from page cache */ - if (tail->index >= end) { - if (shmem_mapping(folio->mapping)) - nr_dropped += new_nr; - else if (folio_test_clear_dirty(tail)) - folio_account_cleaned(tail, - inode_to_wb(folio->mapping->host)); - __filemap_remove_folio(tail, NULL); - folio_put_refs(tail, folio_nr_pages(tail)); - } else if (!folio_test_anon(folio)) { - __xa_store(&folio->mapping->i_pages, tail->index, - tail, 0); - } else if (swap_cache) { - __xa_store(&swap_cache->i_pages, offset + i, - tail, 0); - } - } - - if (!new_order) - ClearPageCompound(head); - else { - struct folio *new_folio = (struct folio *)head; - - folio_set_order(new_folio, new_order); - } - unlock_page_lruvec(lruvec); - /* Caller disabled irqs, so they are still disabled here */ - - split_page_owner(head, order, new_order); - pgalloc_tag_split(folio, order, new_order); - - /* See comment in __split_huge_page_tail() */ - if (folio_test_anon(folio)) { - /* Additional pin to swap cache */ - if (folio_test_swapcache(folio)) { - folio_ref_add(folio, 1 + new_nr); - xa_unlock(&swap_cache->i_pages); - } else { - folio_ref_inc(folio); - } - } else { - /* Additional pin to page cache */ - folio_ref_add(folio, 1 + new_nr); - xa_unlock(&folio->mapping->i_pages); - } - local_irq_enable(); - - if (nr_dropped) - shmem_uncharge(folio->mapping->host, nr_dropped); - remap_page(folio, nr, PageAnon(head) ? RMP_USE_SHARED_ZEROPAGE : 0); - - /* - * set page to its compound_head when split to non order-0 pages, so - * we can skip unlocking it below, since PG_locked is transferred to - * the compound_head of the page and the caller will unlock it. - */ - if (new_order) - page = compound_head(page); - - for (i = 0; i < nr; i += new_nr) { - struct page *subpage = head + i; - struct folio *new_folio = page_folio(subpage); - if (subpage == page) - continue; - folio_unlock(new_folio); - - /* - * Subpages may be freed if there wasn't any mapping - * like if add_to_swap() is running on a lru page that - * had its mapping zapped. And freeing these pages - * requires taking the lru_lock so we do the put_page - * of the tail pages after the split is complete. - */ - free_page_and_swap_cache(subpage); - } -} - /* Racy check whether the huge page can be split */ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) { From 4b94c18d15199658f1a86231663e97d3cc12d8de Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:39:59 -0500 Subject: [PATCH 1292/2157] mm/huge_memory: add folio_split() to debugfs testing interface This allows to test folio_split() by specifying an additional in folio page offset parameter to split_huge_page debugfs interface. Link: https://lkml.kernel.org/r/20250307174001.242794-7-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/huge_memory.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 995ed685c241..c6ad2e4053d8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4331,7 +4331,8 @@ static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma) } static int split_huge_pages_pid(int pid, unsigned long vaddr_start, - unsigned long vaddr_end, unsigned int new_order) + unsigned long vaddr_end, unsigned int new_order, + long in_folio_offset) { int ret = 0; struct task_struct *task; @@ -4415,8 +4416,16 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, if (!folio_test_anon(folio) && folio->mapping != mapping) goto unlock; - if (!split_folio_to_order(folio, target_order)) - split++; + if (in_folio_offset < 0 || + in_folio_offset >= folio_nr_pages(folio)) { + if (!split_folio_to_order(folio, target_order)) + split++; + } else { + struct page *split_at = folio_page(folio, + in_folio_offset); + if (!folio_split(folio, target_order, split_at, NULL)) + split++; + } unlock: @@ -4439,7 +4448,8 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, } static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start, - pgoff_t off_end, unsigned int new_order) + pgoff_t off_end, unsigned int new_order, + long in_folio_offset) { struct filename *file; struct file *candidate; @@ -4488,8 +4498,15 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start, if (folio->mapping != mapping) goto unlock; - if (!split_folio_to_order(folio, target_order)) - split++; + if (in_folio_offset < 0 || in_folio_offset >= nr_pages) { + if (!split_folio_to_order(folio, target_order)) + split++; + } else { + struct page *split_at = folio_page(folio, + in_folio_offset); + if (!folio_split(folio, target_order, split_at, NULL)) + split++; + } unlock: folio_unlock(folio); @@ -4522,6 +4539,7 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, int pid; unsigned long vaddr_start, vaddr_end; unsigned int new_order = 0; + long in_folio_offset = -1; ret = mutex_lock_interruptible(&split_debug_mutex); if (ret) @@ -4550,30 +4568,33 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, goto out; } - ret = sscanf(tok_buf, "0x%lx,0x%lx,%d", &off_start, - &off_end, &new_order); - if (ret != 2 && ret != 3) { + ret = sscanf(tok_buf, "0x%lx,0x%lx,%d,%ld", &off_start, &off_end, + &new_order, &in_folio_offset); + if (ret != 2 && ret != 3 && ret != 4) { ret = -EINVAL; goto out; } - ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order); + ret = split_huge_pages_in_file(file_path, off_start, off_end, + new_order, in_folio_offset); if (!ret) ret = input_len; goto out; } - ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order); + ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d,%ld", &pid, &vaddr_start, + &vaddr_end, &new_order, &in_folio_offset); if (ret == 1 && pid == 1) { split_huge_pages_all(); ret = strlen(input_buf); goto out; - } else if (ret != 3 && ret != 4) { + } else if (ret != 3 && ret != 4 && ret != 5) { ret = -EINVAL; goto out; } - ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order); + ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order, + in_folio_offset); if (!ret) ret = strlen(input_buf); out: From 7460b470a131f985a70302a322617121efdd7caa Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:40:00 -0500 Subject: [PATCH 1293/2157] mm/truncate: use folio_split() in truncate operation Instead of splitting the large folio uniformly during truncation, try to use buddy allocator like folio_split() at the start and the end of a truncation range to minimize the number of resulting folios if it is supported. try_folio_split() is introduced to use folio_split() if supported and it falls back to uniform split otherwise. For example, to truncate a order-4 folio [0, 1, 2, 3, 4, 5, ..., 15] between [3, 10] (inclusive), folio_split() splits the folio at 3 to [0,1], [2], [3], [4..7], [8..15] and [3], [4..7] can be dropped and [8..15] is kept with zeros in [8..10], then another folio_split() is done at 10, so [8..10] can be dropped. One possible optimization is to make folio_split() to split a folio based on a given range, like [3..10] above. But that complicates folio_split(), so it will be investigated when necessary. Link: https://lkml.kernel.org/r/20250226210032.2044041-8-ziy@nvidia.com Link: https://lkml.kernel.org/r/20250307174001.242794-8-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 36 ++++++++++++++++++++++++++++++++++++ mm/huge_memory.c | 6 +++--- mm/truncate.c | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e57e811cfd3c..e893d546a49f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -345,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); +bool uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +int folio_split(struct folio *folio, unsigned int new_order, struct page *page, + struct list_head *list); +/* + * try_folio_split - try to split a @folio at @page using non uniform split. + * @folio: folio to be split + * @page: split to order-0 at the given page + * @list: store the after-split folios + * + * Try to split a @folio at @page using non uniform split to order-0, if + * non uniform split is not supported, fall back to uniform split. + * + * Return: 0: split is successful, otherwise split failed. + */ +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + if (!non_uniform_split_supported(folio, 0, false)) + return split_huge_page_to_list_to_order(&folio->page, list, + ret); + return folio_split(folio, ret, page, list); +} static inline int split_huge_page(struct page *page) { struct folio *folio = page_folio(page); @@ -537,6 +567,12 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return 0; } +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + return 0; +} + static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c6ad2e4053d8..e3ed8e9523f5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3654,7 +3654,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return ret; } -static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, bool warns) { if (folio_test_anon(folio)) { @@ -3686,7 +3686,7 @@ static bool non_uniform_split_supported(struct folio *folio, unsigned int new_or } /* See comments in non_uniform_split_supported() */ -static bool uniform_split_supported(struct folio *folio, unsigned int new_order, +bool uniform_split_supported(struct folio *folio, unsigned int new_order, bool warns) { if (folio_test_anon(folio)) { @@ -4005,7 +4005,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, * * After split, folio is left locked for caller. */ -static int folio_split(struct folio *folio, unsigned int new_order, +int folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct list_head *list) { return __folio_split(folio, new_order, split_at, &folio->page, list, diff --git a/mm/truncate.c b/mm/truncate.c index 79570045071c..5d98054094d1 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -192,6 +192,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); unsigned int offset, length; + struct page *split_at, *split_at2; if (pos < start) offset = start - pos; @@ -221,8 +222,42 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; - if (split_folio(folio) == 0) + + split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); + split_at2 = folio_page(folio, + PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); + + if (!try_folio_split(folio, split_at, NULL)) { + /* + * try to split at offset + length to make sure folios within + * the range can be dropped, especially to avoid memory waste + * for shmem truncate + */ + struct folio *folio2 = page_folio(split_at2); + + if (!folio_try_get(folio2)) + goto no_split; + + if (!folio_test_large(folio2)) + goto out; + + if (!folio_trylock(folio2)) + goto out; + + /* + * make sure folio2 is large and does not change its mapping. + * Its split result does not matter here. + */ + if (folio_test_large(folio2) && + folio2->mapping == folio->mapping) + try_folio_split(folio2, split_at2, NULL); + + folio_unlock(folio2); +out: + folio_put(folio2); +no_split: return true; + } if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); From 80a5c494c89f73907ed659a9233a70253774cdae Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 7 Mar 2025 12:40:01 -0500 Subject: [PATCH 1294/2157] selftests/mm: add tests for folio_split(), buddy allocator like split It splits page cache folios to orders from 0 to 8 at different in-folio offset. Link: https://lkml.kernel.org/r/20250307174001.242794-9-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Cc: Kairui Song Signed-off-by: Andrew Morton --- .../selftests/mm/split_huge_page_test.c | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index e0304046b1a0..719c5e2a6624 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -456,7 +457,8 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, return -1; } -void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) +void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, + int order, int offset) { int fd; char *addr; @@ -474,7 +476,12 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l return; err = 0; - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); + if (offset == -1) + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order); + else + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -493,9 +500,15 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l munmap(addr, fd_size); close(fd); unlink(testfile); - if (err) - ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); - ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + if (offset == -1) { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + } else { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); + } } int main(int argc, char **argv) @@ -506,6 +519,7 @@ int main(int argc, char **argv) char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; const char *fs_loc; bool created_tmp; + int offset; ksft_print_header(); @@ -517,7 +531,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+1+9+9); + ksft_set_plan(1+8+1+9+9+8*4+2); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -540,7 +554,13 @@ int main(int argc, char **argv) created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); for (i = 8; i >= 0; i--) - split_thp_in_pagecache_to_order(fd_size, i, fs_loc); + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); + + for (i = 0; i < 9; i++) + for (offset = 0; + offset < pmd_pagesize / pagesize; + offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); ksft_finished(); From 200a89c159a7a416115e6e309183c82183bf98aa Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 14 Mar 2025 18:21:12 -0400 Subject: [PATCH 1295/2157] mm/filemap: use xas_try_split() in __filemap_add_folio() Patch series "Minimize xa_node allocation during xarry split", v3. When splitting a multi-index entry in XArray from order-n to order-m, existing xas_split_alloc()+xas_split() approach requires 2^(n % XA_CHUNK_SHIFT) xa_node allocations. But its callers, __filemap_add_folio() and shmem_split_large_entry(), use at most 1 xa_node. To minimize xa_node allocation and remove the limitation of no split from order-12 (or above) to order-0 (or anything between 0 and 5)[1], xas_try_split() was added[2], which allocates (n / XA_CHUNK_SHIFT - m / XA_CHUNK_SHIFT) xa_node. It is used for non-uniform folio split, but can be used by __filemap_add_folio() and shmem_split_large_entry(). xas_split_alloc() and xas_split() split an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | | | ------- --- --- ------- | | ... | | V V V V ----------- ----------- ----------- ----------- | xa_node | | xa_node | ... | xa_node | | xa_node | ----------- ----------- ----------- ----------- xas_try_split() splits an order-9 to order-0: --------------------------------- | | | | | | | | | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | | | | | | | | | --------------------------------- | | V ----------- | xa_node | ----------- xas_try_split() is designed to be called iteratively with n = m + 1. xas_try_split_mini_order() is added to minmize the number of calls to xas_try_split() by telling the caller the next minimal order to split to instead of n - 1. Splitting order-n to order-m when m= l * XA_CHUNK_SHIFT does not require xa_node allocation and requires 1 xa_node when n=l * XA_CHUNK_SHIFT and m = n - 1, so it is OK to use xas_try_split() with n > m + 1 when no new xa_node is needed. xfstests quick group test passed on xfs and tmpfs. [1] https://lore.kernel.org/linux-mm/Z6YX3RznGLUD07Ao@casper.infradead.org/ [2] https://lore.kernel.org/linux-mm/20250226210032.2044041-1-ziy@nvidia.com/ This patch (of 2): During __filemap_add_folio(), a shadow entry is covering n slots and a folio covers m slots with m < n is to be added. Instead of splitting all n slots, only the m slots covered by the folio need to be split and the remaining n-m shadow entries can be retained with orders ranging from m to n-1. This method only requires (n/XA_CHUNK_SHIFT) - (m/XA_CHUNK_SHIFT) new xa_nodes instead of (n % XA_CHUNK_SHIFT) * ((n/XA_CHUNK_SHIFT) - (m/XA_CHUNK_SHIFT)) new xa_nodes, compared to the original xas_split_alloc() + xas_split() one. For example, to insert an order-0 folio when an order-9 shadow entry is present (assuming XA_CHUNK_SHIFT is 6), 1 xa_node is needed instead of 8. xas_try_split_min_order() is introduced to reduce the number of calls to xas_try_split() during split. Link: https://lkml.kernel.org/r/20250314222113.711703-1-ziy@nvidia.com Link: https://lkml.kernel.org/r/20250314222113.711703-2-ziy@nvidia.com Signed-off-by: Zi Yan Cc: Baolin Wang Cc: Hugh Dickins Cc: Kairui Song Cc: Miaohe Lin Cc: Mattew Wilcox Cc: David Hildenbrand Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/xarray.h | 7 +++++++ lib/xarray.c | 25 +++++++++++++++++++++++ mm/filemap.c | 45 +++++++++++++++++------------------------- 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 4010195201c9..78eede109b1a 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1556,6 +1556,7 @@ int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); void xas_try_split(struct xa_state *xas, void *entry, unsigned int order); +unsigned int xas_try_split_min_order(unsigned int order); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { @@ -1582,6 +1583,12 @@ static inline void xas_try_split(struct xa_state *xas, void *entry, unsigned int order) { } + +static inline unsigned int xas_try_split_min_order(unsigned int order) +{ + return 0; +} + #endif /** diff --git a/lib/xarray.c b/lib/xarray.c index 3bae48558e21..9644b18af18d 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1134,6 +1134,28 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) } EXPORT_SYMBOL_GPL(xas_split); +/** + * xas_try_split_min_order() - Minimal split order xas_try_split() can accept + * @order: Current entry order. + * + * xas_try_split() can split a multi-index entry to smaller than @order - 1 if + * no new xa_node is needed. This function provides the minimal order + * xas_try_split() supports. + * + * Return: the minimal order xas_try_split() supports + * + * Context: Any context. + * + */ +unsigned int xas_try_split_min_order(unsigned int order) +{ + if (order % XA_CHUNK_SHIFT == 0) + return order == 0 ? 0 : order - 1; + + return order - (order % XA_CHUNK_SHIFT); +} +EXPORT_SYMBOL_GPL(xas_try_split_min_order); + /** * xas_try_split() - Try to split a multi-index entry. * @xas: XArray operation state. @@ -1145,6 +1167,9 @@ EXPORT_SYMBOL_GPL(xas_split); * needed, the function will use GFP_NOWAIT to get one if xas->xa_alloc is * NULL. If more new xa_node are needed, the function gives EINVAL error. * + * NOTE: use xas_try_split_min_order() to get next split order instead of + * @order - 1 if you want to minmize xas_try_split() calls. + * * Context: Any context. The caller should hold the xa_lock. */ void xas_try_split(struct xa_state *xas, void *entry, unsigned int order) diff --git a/mm/filemap.c b/mm/filemap.c index 152993a86de3..cc69f174f76b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -857,11 +857,10 @@ EXPORT_SYMBOL_GPL(replace_page_cache_folio); noinline int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { - XA_STATE(xas, &mapping->i_pages, index); - void *alloced_shadow = NULL; - int alloced_order = 0; + XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); bool huge; long nr; + unsigned int forder = folio_order(folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); @@ -870,7 +869,6 @@ noinline int __filemap_add_folio(struct address_space *mapping, mapping_set_update(&xas, mapping); VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); - xas_set_order(&xas, index, folio_order(folio)); huge = folio_test_hugetlb(folio); nr = folio_nr_pages(folio); @@ -880,7 +878,7 @@ noinline int __filemap_add_folio(struct address_space *mapping, folio->index = xas.xa_index; for (;;) { - int order = -1, split_order = 0; + int order = -1; void *entry, *old = NULL; xas_lock_irq(&xas); @@ -898,21 +896,25 @@ noinline int __filemap_add_folio(struct address_space *mapping, order = xas_get_order(&xas); } - /* entry may have changed before we re-acquire the lock */ - if (alloced_order && (old != alloced_shadow || order != alloced_order)) { - xas_destroy(&xas); - alloced_order = 0; - } - if (old) { - if (order > 0 && order > folio_order(folio)) { + if (order > 0 && order > forder) { + unsigned int split_order = max(forder, + xas_try_split_min_order(order)); + /* How to handle large swap entries? */ BUG_ON(shmem_mapping(mapping)); - if (!alloced_order) { - split_order = order; - goto unlock; + + while (order > forder) { + xas_set_order(&xas, index, split_order); + xas_try_split(&xas, old, order); + if (xas_error(&xas)) + goto unlock; + order = split_order; + split_order = + max(xas_try_split_min_order( + split_order), + forder); } - xas_split(&xas, old, order); xas_reset(&xas); } if (shadowp) @@ -936,17 +938,6 @@ noinline int __filemap_add_folio(struct address_space *mapping, unlock: xas_unlock_irq(&xas); - /* split needed, alloc here and retry. */ - if (split_order) { - xas_split_alloc(&xas, old, split_order, gfp); - if (xas_error(&xas)) - goto error; - alloced_shadow = old; - alloced_order = split_order; - xas_reset(&xas); - continue; - } - if (!xas_nomem(&xas, gfp)) break; } From d53c78fffe7ad364397c693522ceb4d152c2aacd Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 14 Mar 2025 18:21:13 -0400 Subject: [PATCH 1296/2157] mm/shmem: use xas_try_split() in shmem_split_large_entry() During shmem_split_large_entry(), large swap entries are covering n slots and an order-0 folio needs to be inserted. Instead of splitting all n slots, only the 1 slot covered by the folio need to be split and the remaining n-1 shadow entries can be retained with orders ranging from 0 to n-1. This method only requires (n/XA_CHUNK_SHIFT) new xa_nodes instead of (n % XA_CHUNK_SHIFT) * (n/XA_CHUNK_SHIFT) new xa_nodes, compared to the original xas_split_alloc() + xas_split() one. For example, to split an order-9 large swap entry (assuming XA_CHUNK_SHIFT is 6), 1 xa_node is needed instead of 8. xas_try_split_min_order() is used to reduce the number of calls to xas_try_split() during split. Link: https://lkml.kernel.org/r/20250314222113.711703-3-ziy@nvidia.com Signed-off-by: Zi Yan Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Baolin Wang Cc: Hugh Dickins Cc: Kairui Song Cc: Mattew Wilcox Cc: Miaohe Lin Cc: David Hildenbrand Cc: John Hubbard Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Ryan Roberts Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/shmem.c | 59 ++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 8de9b4a07a8a..405c898266d4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2153,15 +2153,16 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, { struct address_space *mapping = inode->i_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, index, 0); - void *alloced_shadow = NULL; - int alloced_order = 0, i; + int split_order = 0, entry_order; + int i; /* Convert user data gfp flags to xarray node gfp flags */ gfp &= GFP_RECLAIM_MASK; for (;;) { - int order = -1, split_order = 0; void *old = NULL; + int cur_order; + pgoff_t swap_index; xas_lock_irq(&xas); old = xas_load(&xas); @@ -2170,60 +2171,56 @@ static int shmem_split_large_entry(struct inode *inode, pgoff_t index, goto unlock; } - order = xas_get_order(&xas); + entry_order = xas_get_order(&xas); - /* Swap entry may have changed before we re-acquire the lock */ - if (alloced_order && - (old != alloced_shadow || order != alloced_order)) { - xas_destroy(&xas); - alloced_order = 0; - } + if (!entry_order) + goto unlock; /* Try to split large swap entry in pagecache */ - if (order > 0) { - if (!alloced_order) { - split_order = order; + cur_order = entry_order; + swap_index = round_down(index, 1 << entry_order); + + split_order = xas_try_split_min_order(cur_order); + + while (cur_order > 0) { + pgoff_t aligned_index = + round_down(index, 1 << cur_order); + pgoff_t swap_offset = aligned_index - swap_index; + + xas_set_order(&xas, index, split_order); + xas_try_split(&xas, old, cur_order); + if (xas_error(&xas)) goto unlock; - } - xas_split(&xas, old, order); /* * Re-set the swap entry after splitting, and the swap * offset of the original large entry must be continuous. */ - for (i = 0; i < 1 << order; i++) { - pgoff_t aligned_index = round_down(index, 1 << order); + for (i = 0; i < 1 << cur_order; + i += (1 << split_order)) { swp_entry_t tmp; - tmp = swp_entry(swp_type(swap), swp_offset(swap) + i); + tmp = swp_entry(swp_type(swap), + swp_offset(swap) + swap_offset + + i); __xa_store(&mapping->i_pages, aligned_index + i, swp_to_radix_entry(tmp), 0); } + cur_order = split_order; + split_order = xas_try_split_min_order(split_order); } unlock: xas_unlock_irq(&xas); - /* split needed, alloc here and retry. */ - if (split_order) { - xas_split_alloc(&xas, old, split_order, gfp); - if (xas_error(&xas)) - goto error; - alloced_shadow = old; - alloced_order = split_order; - xas_reset(&xas); - continue; - } - if (!xas_nomem(&xas, gfp)) break; } -error: if (xas_error(&xas)) return xas_error(&xas); - return alloced_order; + return entry_order; } /* From c637c61c9ed0203d9a1f2ba21fb7a49ddca3ef8f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 10 Mar 2025 09:50:09 -0700 Subject: [PATCH 1297/2157] mm/damon/sysfs-schemes: avoid Wformat-security warning on damon_sysfs_access_pattern_add_range_dir() When -Wformat-security is given, compiler warns as a potential security issue on damon_sysfs_access_pattern_add_range_dir() as below: mm/damon/sysfs-schemes.c: In function `damon_sysfs_access_pattern_add_range_dir': mm/damon/sysfs-schemes.c:1503:25: warning: format not a string literal and no format arguments [-Wformat-security] 1503 | &access_pattern->kobj, name); | ^ Fix it by using "%s" as the format and the name as the argument. Link: https://lkml.kernel.org/r/20250310165009.652491-1-sj@kernel.org Fixes: 7e84b1f8212a ("mm/damon/sysfs: support DAMON-based Operation Schemes") Signed-off-by: SeongJae Park Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 985cfc750a90..5023f2b690d6 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1471,7 +1471,7 @@ static int damon_sysfs_access_pattern_add_range_dir( if (!range) return -ENOMEM; err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, - &access_pattern->kobj, name); + &access_pattern->kobj, "%s", name); if (err) kobject_put(&range->kobj); else From 61659efdb35ce6c6ac7639342098f3c4548b794b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Mar 2025 09:30:43 +1000 Subject: [PATCH 1298/2157] drivers/base/memory: improve add_boot_memory_block() Patch series "drivers/base/memory: Two cleanups", v3. Two cleanups to drivers/base/memory. This patch (of 2)L It's unnecessary to count the present sections for the specified block since the block will be added if any section in the block is present. Besides, for_each_present_section_nr() can be reused as Andrew Morton suggested. Improve by using for_each_present_section_nr() and dropping the unnecessary @section_count. No functional changes intended. Link: https://lkml.kernel.org/r/20250311233045.148943-1-gshan@redhat.com Link: https://lkml.kernel.org/r/20250311233045.148943-2-gshan@redhat.com Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Acked-by: Oscar Salvador Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- drivers/base/memory.c | 17 ++++++++--------- include/linux/mmzone.h | 5 +++++ mm/sparse.c | 5 ----- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 4765f2928725..8f3a41d9bfaa 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -818,18 +818,17 @@ static int add_memory_block(unsigned long block_id, unsigned long state, static int __init add_boot_memory_block(unsigned long base_section_nr) { - int section_count = 0; unsigned long nr; - for (nr = base_section_nr; nr < base_section_nr + sections_per_block; - nr++) - if (present_section_nr(nr)) - section_count++; + for_each_present_section_nr(base_section_nr, nr) { + if (nr >= (base_section_nr + sections_per_block)) + break; - if (section_count == 0) - return 0; - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); + return add_memory_block(memory_block_id(base_section_nr), + MEM_ONLINE, NULL, NULL); + } + + return 0; } static int add_hotplug_memory_block(unsigned long block_id, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 550dbba92521..dbb0ad69e17f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2140,6 +2140,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) return -1; } +#define for_each_present_section_nr(start, section_nr) \ + for (section_nr = next_present_section_nr(start - 1); \ + section_nr != -1; \ + section_nr = next_present_section_nr(section_nr)) + /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate diff --git a/mm/sparse.c b/mm/sparse.c index ee0234a77c7f..3c012cf83cc2 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -170,11 +170,6 @@ static void __section_mark_present(struct mem_section *ms, ms->section_mem_map |= SECTION_MARKED_PRESENT; } -#define for_each_present_section_nr(start, section_nr) \ - for (section_nr = next_present_section_nr(start-1); \ - section_nr != -1; \ - section_nr = next_present_section_nr(section_nr)) - static inline unsigned long first_present_section_nr(void) { return next_present_section_nr(-1); From 1a24776fca39ed79d7f5e0b0592b5addd784981e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 12 Mar 2025 09:30:44 +1000 Subject: [PATCH 1299/2157] drivers/base/memory: correct the field name in the header Replace @blocks with @memory_blocks to match with the definition of struct memory_group. Link: https://lkml.kernel.org/r/20250311233045.148943-3-gshan@redhat.com Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Acked-by: Oscar Salvador Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memory.h b/include/linux/memory.h index c0afee5d126e..12daa6ec7d09 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -25,7 +25,7 @@ /** * struct memory_group - a logical group of memory blocks * @nid: The node id for all memory blocks inside the memory group. - * @blocks: List of all memory blocks belonging to this memory group. + * @memory_blocks: List of all memory blocks belonging to this memory group. * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this * memory group. * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this From 5d89666bd99831cee14abcf201b3867d9f15abae Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 10 Mar 2025 14:04:17 +0000 Subject: [PATCH 1300/2157] mm: use ptep_get() instead of directly dereferencing pte_t* It is best practice for all pte accesses to go via the arch helpers, to ensure non-torn values and to allow the arch to intervene where needed (contpte for arm64 for example). While in this case it was probably safe to directly dereference, let's tidy it up for consistency. Link: https://lkml.kernel.org/r/20250310140418.1737409-1-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Lorenzo Stoakes Reviewed-by: Qi Zheng Reviewed-by: Anshuman Khandual Reviewed-by: Dev Jain Signed-off-by: Andrew Morton --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index c0adea67cd62..f3ee6d8d5e2e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -202,7 +202,7 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, return false; VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page); + VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page); if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || mm_forbids_zeropage(pvmw->vma->vm_mm)) From 116eb468956b8532eae4d008803d4957c2220447 Mon Sep 17 00:00:00 2001 From: Enrico Bravi Date: Mon, 10 Mar 2025 12:25:37 +0100 Subject: [PATCH 1301/2157] mm/shmem: fix functions documentation Add missing parenthesis in @name parameter description. Link: https://lkml.kernel.org/r/20250310112535.84754-1-enrico.bravi@polito.it Signed-off-by: Enrico Bravi Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 405c898266d4..7b738d8d6581 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5841,7 +5841,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, * underlying inode. So users of this interface must do LSM checks at a * higher layer. The users are the big_key and shm implementations. LSM * checks are provided at the key or shm level rather than the inode. - * @name: name for dentry (to be seen in /proc//maps + * @name: name for dentry (to be seen in /proc//maps) * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ @@ -5853,7 +5853,7 @@ EXPORT_SYMBOL_GPL(shmem_kernel_file_setup); /** * shmem_file_setup - get an unlinked file living in tmpfs - * @name: name for dentry (to be seen in /proc//maps + * @name: name for dentry (to be seen in /proc//maps) * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ @@ -5866,7 +5866,7 @@ EXPORT_SYMBOL_GPL(shmem_file_setup); /** * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs * @mnt: the tmpfs mount where the file will be created - * @name: name for dentry (to be seen in /proc//maps + * @name: name for dentry (to be seen in /proc//maps) * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ From 8c02048d1c6126527f15752a5e0849dc49cefeeb Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Sat, 8 Mar 2025 03:46:00 +0000 Subject: [PATCH 1302/2157] mm/page_alloc: add trace event for per-zone watermark setup Patch series "Add tracepoints for lowmem reserves, watermarks and totalreserve_pages", v2. This patchset introduces tracepoints to track changes in the lowmem reserves, watermarks and totalreserve_pages. This helps to track the exact timing of such changes and understand their relation to reclaim activities. The tracepoints added are: mm_setup_per_zone_lowmem_reserve mm_setup_per_zone_wmarks mm_calculate_totalreserve_pagesi This patch (of 3): This commit introduces the `mm_setup_per_zone_wmarks` trace event, which provides detailed insights into the kernel's per-zone watermark configuration, offering precise timing and the ability to correlate watermark changes with specific kernel events. While `/proc/zoneinfo` provides some information about zone watermarks, this trace event offers: 1. The ability to link watermark changes to specific kernel events and logic. 2. The ability to capture rapid or short-lived changes in watermarks that may be missed by user-space polling 3. Diagnosing unexpected kswapd activity or excessive direct reclaim triggered by rapidly changing watermarks. Link: https://lkml.kernel.org/r/20250308034606.2036033-1-liumartin@google.com Link: https://lkml.kernel.org/r/20250308034606.2036033-2-liumartin@google.com Signed-off-by: Martin Liu Acked-by: David Rientjes Cc: Steven Rostedt Cc: Martin Liu Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 33 +++++++++++++++++++++++++++++++++ mm/page_alloc.c | 1 + 2 files changed, 34 insertions(+) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index b37eb0a7060f..5fd392dae503 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -342,6 +342,39 @@ TRACE_EVENT(mm_alloc_contig_migrate_range_info, __entry->nr_mapped) ); +TRACE_EVENT(mm_setup_per_zone_wmarks, + + TP_PROTO(struct zone *zone), + + TP_ARGS(zone), + + TP_STRUCT__entry( + __field(int, node_id) + __string(name, zone->name) + __field(unsigned long, watermark_min) + __field(unsigned long, watermark_low) + __field(unsigned long, watermark_high) + __field(unsigned long, watermark_promo) + ), + + TP_fast_assign( + __entry->node_id = zone->zone_pgdat->node_id; + __assign_str(name); + __entry->watermark_min = zone->_watermark[WMARK_MIN]; + __entry->watermark_low = zone->_watermark[WMARK_LOW]; + __entry->watermark_high = zone->_watermark[WMARK_HIGH]; + __entry->watermark_promo = zone->_watermark[WMARK_PROMO]; + ), + + TP_printk("node_id=%d zone name=%s watermark min=%lu low=%lu high=%lu promo=%lu", + __entry->node_id, + __get_str(name), + __entry->watermark_min, + __entry->watermark_low, + __entry->watermark_high, + __entry->watermark_promo) +); + /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c6ae7e5aaad..b739367434ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6006,6 +6006,7 @@ static void __setup_per_zone_wmarks(void) zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp; zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp; + trace_mm_setup_per_zone_wmarks(zone); spin_unlock_irqrestore(&zone->lock, flags); } From a293aba4a584709889f77a0ad0c45746aecf1b9f Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Sat, 8 Mar 2025 03:46:01 +0000 Subject: [PATCH 1303/2157] mm/page_alloc: add trace event for per-zone lowmem reserve setup This commit introduces the `mm_setup_per_zone_lowmem_reserve` trace event,which provides detailed insights into the kernel's per-zone lowmem reserve configuration. The trace event provides precise timestamps, allowing developers to 1. Correlate lowmem reserve changes with specific kernel events and able to diagnose unexpected kswapd or direct reclaim behavior triggered by dynamic changes in lowmem reserve. 2. Know memory allocation failures that occur due to insufficient lowmem reserve, by precisely correlating allocation attempts with reserve adjustments. Link: https://lkml.kernel.org/r/20250308034606.2036033-3-liumartin@google.com Signed-off-by: Martin Liu Acked-by: David Rientjes Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 27 +++++++++++++++++++++++++++ mm/page_alloc.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 5fd392dae503..9623e68d4d26 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -375,6 +375,33 @@ TRACE_EVENT(mm_setup_per_zone_wmarks, __entry->watermark_promo) ); +TRACE_EVENT(mm_setup_per_zone_lowmem_reserve, + + TP_PROTO(struct zone *zone, struct zone *upper_zone, long lowmem_reserve), + + TP_ARGS(zone, upper_zone, lowmem_reserve), + + TP_STRUCT__entry( + __field(int, node_id) + __string(name, zone->name) + __string(upper_name, upper_zone->name) + __field(long, lowmem_reserve) + ), + + TP_fast_assign( + __entry->node_id = zone->zone_pgdat->node_id; + __assign_str(name); + __assign_str(upper_name); + __entry->lowmem_reserve = lowmem_reserve; + ), + + TP_printk("node_id=%d zone name=%s upper_zone name=%s lowmem_reserve_pages=%ld", + __entry->node_id, + __get_str(name), + __get_str(upper_name), + __entry->lowmem_reserve) +); + /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b739367434ce..a82d96cb3044 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5943,6 +5943,8 @@ static void setup_per_zone_lowmem_reserve(void) zone->lowmem_reserve[j] = 0; else zone->lowmem_reserve[j] = managed_pages / ratio; + trace_mm_setup_per_zone_lowmem_reserve(zone, upper_zone, + zone->lowmem_reserve[j]); } } } From 15766485e4a51bec2dcce304c089a95550720033 Mon Sep 17 00:00:00 2001 From: Martin Liu Date: Sat, 8 Mar 2025 03:46:02 +0000 Subject: [PATCH 1304/2157] mm/page_alloc: add trace event for totalreserve_pages calculation This commit introduces a new trace event, `mm_calculate_totalreserve_pages`, which reports the new reserve value at the exact time when it takes effect. The `totalreserve_pages` value represents the total amount of memory reserved across all zones and nodes in the system. This reserved memory is crucial for ensuring that critical kernel operations have access to sufficient memory, even under memory pressure. By tracing the `totalreserve_pages` value, developers can gain insights that how the total reserved memory changes over time. Link: https://lkml.kernel.org/r/20250308034606.2036033-4-liumartin@google.com Signed-off-by: Martin Liu Acked-by: David Rientjes Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 18 ++++++++++++++++++ mm/page_alloc.c | 1 + 2 files changed, 19 insertions(+) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 9623e68d4d26..f74925a6cf69 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -402,6 +402,24 @@ TRACE_EVENT(mm_setup_per_zone_lowmem_reserve, __entry->lowmem_reserve) ); +TRACE_EVENT(mm_calculate_totalreserve_pages, + + TP_PROTO(unsigned long totalreserve_pages), + + TP_ARGS(totalreserve_pages), + + TP_STRUCT__entry( + __field(unsigned long, totalreserve_pages) + ), + + TP_fast_assign( + __entry->totalreserve_pages = totalreserve_pages; + ), + + TP_printk("totalreserve_pages=%lu", __entry->totalreserve_pages) +); + + /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a82d96cb3044..0be1fedd1201 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5914,6 +5914,7 @@ static void calculate_totalreserve_pages(void) } } totalreserve_pages = reserve_pages; + trace_mm_calculate_totalreserve_pages(totalreserve_pages); } /* From 9ecd2f839b2596aaa510f20e18d496c2e3e0fa56 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 12 Mar 2025 09:47:47 -0700 Subject: [PATCH 1305/2157] mm/madvise: use is_memory_failure() from madvise_do_behavior() Patch series "mm/madvise: cleanup requests validations and classifications". Cleanup madvise entry level code for cleaner request validations and classifications. This patch (of 4): To reduce redundant open-coded checks of CONFIG_MEMORY_FAILURE and MADV_{HWPOISON,SOFT_OFFLINE} in madvise_[un]lock(), is_memory_failure() is introduced. madvise_do_behavior() is still doing the same open-coded check, though. Use is_memory_failure() instead. To avoid build failure on !CONFIG_MEMORY_FAILURE case, implement an empty madvise_inject_error() under the config. Also move the definition of is_memory_failure() inside #ifdef CONFIG_MEMORY_FAILURE clause for madvise_inject_error() definition, to reduce duplicated ifdef clauses. Link: https://lkml.kernel.org/r/20250312164750.59215-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250312164750.59215-2-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes Reviewed-by: Shakeel Butt Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 388dc289b5d1..c3ab1f283b18 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1392,7 +1392,32 @@ static int madvise_inject_error(int behavior, return 0; } -#endif + +static bool is_memory_failure(int behavior) +{ + switch (behavior) { + case MADV_HWPOISON: + case MADV_SOFT_OFFLINE: + return true; + default: + return false; + } +} + +#else + +static int madvise_inject_error(int behavior, + unsigned long start, unsigned long end) +{ + return 0; +} + +static bool is_memory_failure(int behavior) +{ + return false; +} + +#endif /* CONFIG_MEMORY_FAILURE */ static bool madvise_behavior_valid(int behavior) @@ -1569,24 +1594,6 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif /* CONFIG_ANON_VMA_NAME */ -#ifdef CONFIG_MEMORY_FAILURE -static bool is_memory_failure(int behavior) -{ - switch (behavior) { - case MADV_HWPOISON: - case MADV_SOFT_OFFLINE: - return true; - default: - return false; - } -} -#else -static bool is_memory_failure(int behavior) -{ - return false; -} -#endif - static int madvise_lock(struct mm_struct *mm, int behavior) { if (is_memory_failure(behavior)) @@ -1640,10 +1647,8 @@ static int madvise_do_behavior(struct mm_struct *mm, unsigned long end; int error; -#ifdef CONFIG_MEMORY_FAILURE - if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) + if (is_memory_failure(behavior)) return madvise_inject_error(behavior, start, start + len_in); -#endif start = untagged_addr_remote(mm, start); end = start + len; From f4a578d34590db42b4870ac694193413421610c1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 12 Mar 2025 09:47:48 -0700 Subject: [PATCH 1306/2157] mm/madvise: split out populate behavior check logic madvise_do_behavior() has a long open-coded 'behavior' check for MADV_POPULATE_{READ,WRITE}. It adds multiple layers[1] and make the code arguably take longer time to read. Like is_memory_failure(), split out the check to a separate function. This is not technically removing the additional layer but discourage further extending the switch-case. Also it makes madvise_do_behavior() code shorter and therefore easier to read. [1] https://lore.kernel.org/bd6d0bf1-c79e-46bd-a810-9791efb9ad73@lucifer.local Link: https://lkml.kernel.org/r/20250312164750.59215-3-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes Reviewed-by: Shakeel Butt Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index c3ab1f283b18..611db868ae38 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1640,6 +1640,17 @@ static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior) return true; } +static bool is_madvise_populate(int behavior) +{ + switch (behavior) { + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: + return true; + default: + return false; + } +} + static int madvise_do_behavior(struct mm_struct *mm, unsigned long start, size_t len_in, size_t len, int behavior) { @@ -1653,16 +1664,11 @@ static int madvise_do_behavior(struct mm_struct *mm, end = start + len; blk_start_plug(&plug); - switch (behavior) { - case MADV_POPULATE_READ: - case MADV_POPULATE_WRITE: + if (is_madvise_populate(behavior)) error = madvise_populate(mm, start, end, behavior); - break; - default: + else error = madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); - break; - } blk_finish_plug(&plug); return error; } From 0a6ffacb3b42233fe44e0faedfcc8e83f9ad99c6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 12 Mar 2025 09:47:49 -0700 Subject: [PATCH 1307/2157] mm/madvise: deduplicate madvise_do_behavior() skip case handlings The logic for checking if a given madvise() request for a single memory range can skip real work, namely madvise_do_behavior(), is duplicated in do_madvise() and vector_madvise(). Split out the logic to a function and reuse it. Link: https://lkml.kernel.org/r/20250312164750.59215-4-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes Reviewed-by: Shakeel Butt Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 57 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 611db868ae38..ba006d05c7ea 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1640,6 +1640,31 @@ static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior) return true; } +/* + * madvise_should_skip() - Return if the request is invalid or nothing. + * @start: Start address of madvise-requested address range. + * @len_in: Length of madvise-requested address range. + * @behavior: Requested madvise behavor. + * @err: Pointer to store an error code from the check. + * + * If the specified behaviour is invalid or nothing would occur, we skip the + * operation. This function returns true in the cases, otherwise false. In + * the former case we store an error on @err. + */ +static bool madvise_should_skip(unsigned long start, size_t len_in, + int behavior, int *err) +{ + if (!is_valid_madvise(start, len_in, behavior)) { + *err = -EINVAL; + return true; + } + if (start + PAGE_ALIGN(len_in) == start) { + *err = 0; + return true; + } + return false; +} + static bool is_madvise_populate(int behavior) { switch (behavior) { @@ -1747,23 +1772,15 @@ static int madvise_do_behavior(struct mm_struct *mm, */ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { - unsigned long end; int error; - size_t len; - - if (!is_valid_madvise(start, len_in, behavior)) - return -EINVAL; - - len = PAGE_ALIGN(len_in); - end = start + len; - - if (end == start) - return 0; + if (madvise_should_skip(start, len_in, behavior, &error)) + return error; error = madvise_lock(mm, behavior); if (error) return error; - error = madvise_do_behavior(mm, start, len_in, len, behavior); + error = madvise_do_behavior(mm, start, len_in, PAGE_ALIGN(len_in), + behavior); madvise_unlock(mm, behavior); return error; @@ -1790,19 +1807,13 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, while (iov_iter_count(iter)) { unsigned long start = (unsigned long)iter_iov_addr(iter); size_t len_in = iter_iov_len(iter); - size_t len; + int error; - if (!is_valid_madvise(start, len_in, behavior)) { - ret = -EINVAL; - break; - } - - len = PAGE_ALIGN(len_in); - if (start + len == start) - ret = 0; + if (madvise_should_skip(start, len_in, behavior, &error)) + ret = error; else - ret = madvise_do_behavior(mm, start, len_in, len, - behavior); + ret = madvise_do_behavior(mm, start, len_in, + PAGE_ALIGN(len_in), behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat From be9258a6bf26d2272856214406721762a21aab1b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 12 Mar 2025 09:47:50 -0700 Subject: [PATCH 1308/2157] mm/madvise: remove len parameter of madvise_do_behavior() Because madise_should_skip() logic is factored out, making madvise_do_behavior() calculates 'len' on its own rather then receiving it as a parameter makes code simpler. Remove the parameter. Link: https://lkml.kernel.org/r/20250312164750.59215-5-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes Reviewed-by: Shakeel Butt Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/madvise.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index ba006d05c7ea..b17f684322ad 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1677,7 +1677,7 @@ static bool is_madvise_populate(int behavior) } static int madvise_do_behavior(struct mm_struct *mm, - unsigned long start, size_t len_in, size_t len, int behavior) + unsigned long start, size_t len_in, int behavior) { struct blk_plug plug; unsigned long end; @@ -1686,7 +1686,7 @@ static int madvise_do_behavior(struct mm_struct *mm, if (is_memory_failure(behavior)) return madvise_inject_error(behavior, start, start + len_in); start = untagged_addr_remote(mm, start); - end = start + len; + end = start + PAGE_ALIGN(len_in); blk_start_plug(&plug); if (is_madvise_populate(behavior)) @@ -1779,8 +1779,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh error = madvise_lock(mm, behavior); if (error) return error; - error = madvise_do_behavior(mm, start, len_in, PAGE_ALIGN(len_in), - behavior); + error = madvise_do_behavior(mm, start, len_in, behavior); madvise_unlock(mm, behavior); return error; @@ -1812,8 +1811,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, if (madvise_should_skip(start, len_in, behavior, &error)) ret = error; else - ret = madvise_do_behavior(mm, start, len_in, - PAGE_ALIGN(len_in), behavior); + ret = madvise_do_behavior(mm, start, len_in, behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat From c0ebbb3841e07c4493e6fe351698806b09a87a37 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 12 Mar 2025 10:10:13 -0400 Subject: [PATCH 1309/2157] mm: add missing release barrier on PGDAT_RECLAIM_LOCKED unlock The PGDAT_RECLAIM_LOCKED bit is used to provide mutual exclusion of node reclaim for struct pglist_data using a single bit. It is "locked" with a test_and_set_bit (similarly to a try lock) which provides full ordering with respect to loads and stores done within __node_reclaim(). It is "unlocked" with clear_bit(), which does not provide any ordering with respect to loads and stores done before clearing the bit. The lack of clear_bit() memory ordering with respect to stores within __node_reclaim() can cause a subsequent CPU to fail to observe stores from a prior node reclaim. This is not an issue in practice on TSO (e.g. x86), but it is an issue on weakly-ordered architectures (e.g. arm64). Fix this by using clear_bit_unlock rather than clear_bit to clear PGDAT_RECLAIM_LOCKED with a release memory ordering semantic. This provides stronger memory ordering (release rather than relaxed). Link: https://lkml.kernel.org/r/20250312141014.129725-1-mathieu.desnoyers@efficios.com Fixes: d773ed6b856a ("mm: test and set zone reclaim lock before starting reclaim") Signed-off-by: Mathieu Desnoyers Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Alan Stern Cc: Andrea Parri Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index be00af3763b5..bbd3913e3887 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7581,7 +7581,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) return NODE_RECLAIM_NOSCAN; ret = __node_reclaim(pgdat, gfp_mask, order); - clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); + clear_bit_unlock(PGDAT_RECLAIM_LOCKED, &pgdat->flags); if (ret) count_vm_event(PGSCAN_ZONE_RECLAIM_SUCCESS); From ca868cd77063ee670ade6d5d1554e3f5f223afd7 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 12 Mar 2025 10:10:14 -0400 Subject: [PATCH 1310/2157] mm: lock PGDAT_RECLAIM_LOCKED with acquire memory ordering The PGDAT_RECLAIM_LOCKED bit is used to provide mutual exclusion of node reclaim for struct pglist_data using a single bit. Use test_and_set_bit_lock rather than test_and_set_bit to test-and-set PGDAT_RECLAIM_LOCKED with an acquire memory ordering semantic. This changes the "lock" acquisition from a full barrier to an acquire memory ordering, which is weaker. The acquire semi-permeable barrier paired with the release on unlock is sufficient for this mutual exclusion use-case. No behavior change intended other than to reduce overhead by using the appropriate barrier. Link: https://lkml.kernel.org/r/20250312141014.129725-2-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Alan Stern Cc: Andrea Parri Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bbd3913e3887..2bc740637a6c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7577,7 +7577,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) return NODE_RECLAIM_NOSCAN; - if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) + if (test_and_set_bit_lock(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) return NODE_RECLAIM_NOSCAN; ret = __node_reclaim(pgdat, gfp_mask, order); From 1a15bb8303b6b104e78028b6c68f76a0d4562134 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 12 Mar 2025 19:28:50 +0800 Subject: [PATCH 1311/2157] x86/mce: use is_copy_from_user() to determine copy-from-user context Patch series "mm/hwpoison: Fix regressions in memory failure handling", v4. ## 1. What am I trying to do: This patchset resolves two critical regressions related to memory failure handling that have appeared in the upstream kernel since version 5.17, as compared to 5.10 LTS. - copyin case: poison found in user page while kernel copying from user space - instr case: poison found while instruction fetching in user space ## 2. What is the expected outcome and why - For copyin case: Kernel can recover from poison found where kernel is doing get_user() or copy_from_user() if those places get an error return and the kernel return -EFAULT to the process instead of crashing. More specifily, MCE handler checks the fixup handler type to decide whether an in kernel #MC can be recovered. When EX_TYPE_UACCESS is found, the PC jumps to recovery code specified in _ASM_EXTABLE_FAULT() and return a -EFAULT to user space. - For instr case: If a poison found while instruction fetching in user space, full recovery is possible. User process takes #PF, Linux allocates a new page and fills by reading from storage. ## 3. What actually happens and why - For copyin case: kernel panic since v5.17 Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the extable fixup type for copy-from-user operations, changing it from EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. It breaks previous EX_TYPE_UACCESS handling when posion found in get_user() or copy_from_user(). - For instr case: user process is killed by a SIGBUS signal due to #CMCI and #MCE race When an uncorrected memory error is consumed there is a race between the CMCI from the memory controller reporting an uncorrected error with a UCNA signature, and the core reporting and SRAR signature machine check when the data is about to be consumed. ### Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1] Prior to Icelake memory controllers reported patrol scrub events that detected a previously unseen uncorrected error in memory by signaling a broadcast machine check with an SRAO (Software Recoverable Action Optional) signature in the machine check bank. This was overkill because it's not an urgent problem that no core is on the verge of consuming that bad data. It's also found that multi SRAO UCE may cause nested MCE interrupts and finally become an IERR. Hence, Intel downgrades the machine check bank signature of patrol scrub from SRAO to UCNA (Uncorrected, No Action required), and signal changed to #CMCI. Just to add to the confusion, Linux does take an action (in uc_decode_notifier()) to try to offline the page despite the UC*NA* signature name. ### Background: why #CMCI and #MCE race when poison is consuming in Intel platform [1] Having decided that CMCI/UCNA is the best action for patrol scrub errors, the memory controller uses it for reads too. But the memory controller is executing asynchronously from the core, and can't tell the difference between a "real" read and a speculative read. So it will do CMCI/UCNA if an error is found in any read. Thus: 1) Core is clever and thinks address A is needed soon, issues a speculative read. 2) Core finds it is going to use address A soon after sending the read request 3) The CMCI from the memory controller is in a race with MCE from the core that will soon try to retire the load from address A. Quite often (because speculation has got better) the CMCI from the memory controller is delivered before the core is committed to the instruction reading address A, so the interrupt is taken, and Linux offlines the page (marking it as poison). ## Why user process is killed for instr case Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported "not recovered"") tries to fix noise message "Memory error not recovered" and skips duplicate SIGBUSs due to the race. But it also introduced a bug that kill_accessing_process() return -EHWPOISON for instr case, as result, kill_me_maybe() send a SIGBUS to user process. # 4. The fix, in my opinion, should be: - For copyin case: The key point is whether the error context is in a read from user memory. We do not care about the ex-type if we know its a MOV reading from userspace. is_copy_from_user() return true when both of the following two checks are true: - the current instruction is copy - source address is user memory If copy_user is true, we set m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; Then do_machine_check() will try fixup_exception() first. - For instr case: let kill_accessing_process() return 0 to prevent a SIGBUS. - For patch 3: The return value of memory_failure() is quite important while discussed instr case regression with Tony and Miaohe for patch 2, so add comment about the return value. This patch (of 3): Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new extable fixup type, EX_TYPE_EFAULT_REG, and commit 4c132d1d844a ("x86/futex: Remove .fixup usage") updated the extable fixup type for copy-from-user operations, changing it from EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. The error context for copy-from-user operations no longer functions as an in-kernel recovery context. Consequently, the error context for copy-from-user operations no longer functions as an in-kernel recovery context, resulting in kernel panics with the message: "Machine check: Data load in unrecoverable area of kernel." To address this, it is crucial to identify if an error context involves a read operation from user memory. The function is_copy_from_user() can be utilized to determine: - the current operation is copy - when reading user memory When these conditions are met, is_copy_from_user() will return true, confirming that it is indeed a direct copy from user memory. This check is essential for correctly handling the context of errors in these operations without relying on the extable fixup types that previously allowed for in-kernel recovery. So, use is_copy_from_user() to determine if a context is copy user directly. Link: https://lkml.kernel.org/r/20250312112852.82415-1-xueshuai@linux.alibaba.com Link: https://lkml.kernel.org/r/20250312112852.82415-2-xueshuai@linux.alibaba.com Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage") Signed-off-by: Shuai Xue Suggested-by: Peter Zijlstra Acked-by: Borislav Petkov (AMD) Tested-by: Tony Luck Cc: Baolin Wang Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Ruidong Tian Cc: Thomas Gleinxer Cc: Yazen Ghannam Cc: Jane Chu Cc: Jarkko Sakkinen Cc: Jonathan Cameron Cc: Signed-off-by: Andrew Morton --- arch/x86/kernel/cpu/mce/severity.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..2235a7477436 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) copy_user = is_copy_from_user(regs); instrumentation_end(); - switch (fixup_type) { - case EX_TYPE_UACCESS: - if (!copy_user) - return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; + if (copy_user) { + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; + } + switch (fixup_type) { case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; From aaf99ac2ceb7c974f758a635723eeaf48596388e Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 12 Mar 2025 19:28:51 +0800 Subject: [PATCH 1312/2157] mm/hwpoison: do not send SIGBUS to processes with recovered clean pages When an uncorrected memory error is consumed there is a race between the CMCI from the memory controller reporting an uncorrected error with a UCNA signature, and the core reporting and SRAR signature machine check when the data is about to be consumed. - Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1] Prior to Icelake memory controllers reported patrol scrub events that detected a previously unseen uncorrected error in memory by signaling a broadcast machine check with an SRAO (Software Recoverable Action Optional) signature in the machine check bank. This was overkill because it's not an urgent problem that no core is on the verge of consuming that bad data. It's also found that multi SRAO UCE may cause nested MCE interrupts and finally become an IERR. Hence, Intel downgrades the machine check bank signature of patrol scrub from SRAO to UCNA (Uncorrected, No Action required), and signal changed to #CMCI. Just to add to the confusion, Linux does take an action (in uc_decode_notifier()) to try to offline the page despite the UC*NA* signature name. - Background: why #CMCI and #MCE race when poison is consuming in Intel platform [1] Having decided that CMCI/UCNA is the best action for patrol scrub errors, the memory controller uses it for reads too. But the memory controller is executing asynchronously from the core, and can't tell the difference between a "real" read and a speculative read. So it will do CMCI/UCNA if an error is found in any read. Thus: 1) Core is clever and thinks address A is needed soon, issues a speculative read. 2) Core finds it is going to use address A soon after sending the read request 3) The CMCI from the memory controller is in a race with MCE from the core that will soon try to retire the load from address A. Quite often (because speculation has got better) the CMCI from the memory controller is delivered before the core is committed to the instruction reading address A, so the interrupt is taken, and Linux offlines the page (marking it as poison). - Why user process is killed for instr case Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported "not recovered"") tries to fix noise message "Memory error not recovered" and skips duplicate SIGBUSs due to the race. But it also introduced a bug that kill_accessing_process() return -EHWPOISON for instr case, as result, kill_me_maybe() send a SIGBUS to user process. If the CMCI wins that race, the page is marked poisoned when uc_decode_notifier() calls memory_failure(). For dirty pages, memory_failure() invokes try_to_unmap() with the TTU_HWPOISON flag, converting the PTE to a hwpoison entry. As a result, kill_accessing_process(): - call walk_page_range() and return 1 regardless of whether try_to_unmap() succeeds or fails, - call kill_proc() to make sure a SIGBUS is sent - return -EHWPOISON to indicate that SIGBUS is already sent to the process and kill_me_maybe() doesn't have to send it again. However, for clean pages, the TTU_HWPOISON flag is cleared, leaving the PTE unchanged and not converted to a hwpoison entry. Conversely, for clean pages where PTE entries are not marked as hwpoison, kill_accessing_process() returns -EFAULT, causing kill_me_maybe() to send a SIGBUS. Console log looks like this: Memory failure: 0x827ca68: corrupted page was clean: dropped without side effects Memory failure: 0x827ca68: recovery action for clean LRU page: Recovered Memory failure: 0x827ca68: already hardware poisoned mce: Memory error not recovered To fix it, return 0 for "corrupted page was clean", preventing an unnecessary SIGBUS to user process. [1] https://lore.kernel.org/lkml/20250217063335.22257-1-xueshuai@linux.alibaba.com/T/#mba94f1305b3009dd340ce4114d3221fe810d1871 Link: https://lkml.kernel.org/r/20250312112852.82415-3-xueshuai@linux.alibaba.com Fixes: 046545a661af ("mm/hwpoison: fix error page recovered but reported "not recovered"") Signed-off-by: Shuai Xue Tested-by: Tony Luck Acked-by: Miaohe Lin Cc: Baolin Wang Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jane Chu Cc: Jarkko Sakkinen Cc: Jonathan Cameron Cc: Josh Poimboeuf Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Ruidong Tian Cc: Thomas Gleinxer Cc: Yazen Ghannam Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6257c7f5e941..00d6da57ab06 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -881,12 +881,17 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, mmap_read_lock(p->mm); ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops, (void *)&priv); + /* + * ret = 1 when CMCI wins, regardless of whether try_to_unmap() + * succeeds or fails, then kill the process with SIGBUS. + * ret = 0 when poison page is a clean page and it's dropped, no + * SIGBUS is needed. + */ if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); - else - ret = 0; mmap_read_unlock(p->mm); - return ret > 0 ? -EHWPOISON : -EFAULT; + + return ret > 0 ? -EHWPOISON : 0; } /* From d2734f044f84833b2c9ec1b71b542d299d35202b Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 12 Mar 2025 19:28:52 +0800 Subject: [PATCH 1313/2157] mm: memory-failure: enhance comments for return value of memory_failure() The comments for the return value of memory_failure are not complete, supplement the comments. Link: https://lkml.kernel.org/r/20250312112852.82415-4-xueshuai@linux.alibaba.com Signed-off-by: Shuai Xue Reviewed-by: Jarkko Sakkinen Reviewed-by: Jonathan Cameron Reviewed-by: Yazen Ghannam Reviewed-by: Jane Chu Acked-by: Miaohe Lin Tested-by: Tony Luck Cc: Baolin Wang Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Ruidong Tian Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- mm/memory-failure.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 00d6da57ab06..b91a33fb6c69 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2215,9 +2215,13 @@ static void kill_procs_now(struct page *p, unsigned long pfn, int flags, * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks held. * - * Return: 0 for successfully handled the memory error, - * -EOPNOTSUPP for hwpoison_filter() filtered the error event, - * < 0(except -EOPNOTSUPP) on failure. + * Return: + * 0 - success, + * -ENXIO - memory not managed by the kernel + * -EOPNOTSUPP - hwpoison_filter() filtered the error event, + * -EHWPOISON - the page was already poisoned, potentially + * kill process, + * other negative values - failure. */ int memory_failure(unsigned long pfn, int flags) { From ce50f4bc42af4d3811d3863ed14b0c33ab5cef81 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 12 Mar 2025 11:52:45 +0100 Subject: [PATCH 1314/2157] MAINTAINERS: adjust file entry in MAPLE TREE Commit 0f3b602e1bad ("tools: separate out shared radix-tree components") moves files from radix-tree/linux to shared/linux in the ./tools/testing/ directory, but misses to adjust a file entry in MAPLE TREE. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken reference. Adjust the file entry in MAPLE TREE. Link: https://lkml.kernel.org/r/20250312105245.216302-1-lukas.bulwahn@redhat.com Signed-off-by: Lukas Bulwahn Acked-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fb408698086e..e714ea3a7c9f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13922,8 +13922,8 @@ F: include/linux/maple_tree.h F: include/trace/events/maple_tree.h F: lib/maple_tree.c F: lib/test_maple_tree.c -F: tools/testing/radix-tree/linux/maple_tree.h F: tools/testing/radix-tree/maple.c +F: tools/testing/shared/linux/maple_tree.h MARDUK (CREATOR CI40) DEVICE TREE SUPPORT M: Rahul Bedarkar From 456620c5cb238744f3e08a04af935fce25ca2df1 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Wed, 12 Mar 2025 17:37:17 +0800 Subject: [PATCH 1315/2157] mm/debug: add line breaks Missing a newline character at the end of the format string. Link: https://lkml.kernel.org/r/20250312093717.364031-1-liuye@kylinos.cn Signed-off-by: Liu Ye Signed-off-by: Andrew Morton --- mm/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/debug.c b/mm/debug.c index 83ef3bd0ccd3..db83e381a8ae 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -173,7 +173,7 @@ static void __dump_page(const struct page *page) void dump_page(const struct page *page, const char *reason) { if (PagePoisoned(page)) - pr_warn("page:%p is uninitialized and poisoned", page); + pr_warn("page:%p is uninitialized and poisoned\n", page); else __dump_page(page); if (reason) From f841ad9ca5007167c02de143980c9dc703f90b3d Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Wed, 12 Mar 2025 12:38:40 +0800 Subject: [PATCH 1316/2157] selftests/mm/cow: fix the incorrect error handling Error handling doesn't check the correct return value. This patch will fix it. Link: https://lkml.kernel.org/r/20250312043840.71799-1-cyan.yang@sifive.com Fixes: f4b5fd6946e2 ("selftests/vm: anon_cow: THP tests") Signed-off-by: Cyan Yang Reviewed-by: Dev Jain Reviewed-by: Muhammad Usama Anjum Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 9446673645eb..f0cb14ea8608 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -876,7 +876,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { + if (mremap_mem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } From 8defffa4c7b5d19a9a480aec675003f9c9e7daf6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 13 Mar 2025 15:14:56 +0000 Subject: [PATCH 1317/2157] mm: convert lru_add_page_tail() to lru_add_split_folio() Remove three hidden calls to compound_head() and accesses to page->lru. Link: https://lkml.kernel.org/r/20250313151458.4145978-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Signed-off-by: Andrew Morton --- mm/huge_memory.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e3ed8e9523f5..10a86b681cf1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3262,25 +3262,25 @@ static void remap_page(struct folio *folio, unsigned long nr, int flags) } } -static void lru_add_page_tail(struct folio *folio, struct page *tail, +static void lru_add_split_folio(struct folio *folio, struct folio *new_folio, struct lruvec *lruvec, struct list_head *list) { - VM_BUG_ON_FOLIO(PageLRU(tail), folio); + VM_BUG_ON_FOLIO(folio_test_lru(new_folio), folio); lockdep_assert_held(&lruvec->lru_lock); if (list) { /* page reclaim is reclaiming a huge page */ VM_WARN_ON(folio_test_lru(folio)); - get_page(tail); - list_add_tail(&tail->lru, list); + folio_get(new_folio); + list_add_tail(&new_folio->lru, list); } else { /* head is still on lru (and we have it frozen) */ VM_WARN_ON(!folio_test_lru(folio)); if (folio_test_unevictable(folio)) - tail->mlock_count = 0; + new_folio->mlock_count = 0; else - list_add_tail(&tail->lru, &folio->lru); - SetPageLRU(tail); + list_add_tail(&new_folio->lru, &folio->lru); + folio_set_lru(new_folio); } } @@ -3581,8 +3581,8 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, ((mapping || swap_cache) ? folio_nr_pages(release) : 0)); - lru_add_page_tail(origin_folio, &release->page, - lruvec, list); + lru_add_split_folio(origin_folio, release, lruvec, + list); /* Some pages can be beyond EOF: drop them from cache */ if (release->index >= end) { From 67914ac08604345f620566ccf5bac87b40d5881d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Mar 2025 17:05:32 -0400 Subject: [PATCH 1318/2157] mm: compaction: push watermark into compaction_suitable() callers Patch series "mm: reliable huge page allocator". This series makes changes to the allocator and reclaim/compaction code to try harder to avoid fragmentation. As a result, this makes huge page allocations cheaper, more reliable and more sustainable. It's a subset of the huge page allocator RFC initially proposed here: https://lore.kernel.org/lkml/20230418191313.268131-1-hannes@cmpxchg.org/ The following results are from a kernel build test, with additional concurrent bursts of THP allocations on a memory-constrained system. Comparing before and after the changes over 15 runs: before after Hugealloc Time mean 52739.45 ( +0.00%) 28904.00 ( -45.19%) Hugealloc Time stddev 56541.26 ( +0.00%) 33464.37 ( -40.81%) Kbuild Real time 197.47 ( +0.00%) 196.59 ( -0.44%) Kbuild User time 1240.49 ( +0.00%) 1231.67 ( -0.71%) Kbuild System time 70.08 ( +0.00%) 59.10 ( -15.45%) THP fault alloc 46727.07 ( +0.00%) 63223.67 ( +35.30%) THP fault fallback 21910.60 ( +0.00%) 5412.47 ( -75.29%) Direct compact fail 195.80 ( +0.00%) 59.07 ( -69.48%) Direct compact success 7.93 ( +0.00%) 2.80 ( -57.46%) Direct compact success rate % 3.51 ( +0.00%) 3.99 ( +10.49%) Compact daemon scanned migrate 3369601.27 ( +0.00%) 2267500.33 ( -32.71%) Compact daemon scanned free 5075474.47 ( +0.00%) 2339773.00 ( -53.90%) Compact direct scanned migrate 161787.27 ( +0.00%) 47659.93 ( -70.54%) Compact direct scanned free 163467.53 ( +0.00%) 40729.67 ( -75.08%) Compact total migrate scanned 3531388.53 ( +0.00%) 2315160.27 ( -34.44%) Compact total free scanned 5238942.00 ( +0.00%) 2380502.67 ( -54.56%) Alloc stall 2371.07 ( +0.00%) 638.87 ( -73.02%) Pages kswapd scanned 2160926.73 ( +0.00%) 4002186.33 ( +85.21%) Pages kswapd reclaimed 533191.07 ( +0.00%) 718577.80 ( +34.77%) Pages direct scanned 400450.33 ( +0.00%) 355172.73 ( -11.31%) Pages direct reclaimed 94441.73 ( +0.00%) 31162.80 ( -67.00%) Pages total scanned 2561377.07 ( +0.00%) 4357359.07 ( +70.12%) Pages total reclaimed 627632.80 ( +0.00%) 749740.60 ( +19.46%) Swap out 47959.53 ( +0.00%) 110084.33 ( +129.53%) Swap in 7276.00 ( +0.00%) 24457.00 ( +236.10%) File refaults 138043.00 ( +0.00%) 188226.93 ( +36.35%) THP latencies are cut in half, and failure rates are cut by 75%. These metrics also hold up over time, while the vanilla kernel sees a steady downward trend in success rates with each subsequent run, owed to the cumulative effects of fragmentation. A more detailed discussion of results is in the patch changelogs. The patches first introduce a vm.defrag_mode sysctl, which enforces the existing ALLOC_NOFRAGMENT alloc flag until after reclaim and compaction have run. They then change kswapd and kcompactd to target pageblocks, which boosts success in the ALLOC_NOFRAGMENT hotpaths. Patches #1 and #2 are somewhat unrelated cleanups, but touch the same code and so are included here to avoid conflicts from re-ordering. This patch (of 5): compaction_suitable() hardcodes the min watermark, with a boost to the low watermark for costly orders. However, compaction_ready() requires order-0 at the high watermark. It currently checks the marks twice. Make the watermark a parameter to compaction_suitable() and have the callers pass in what they require: - compaction_zonelist_suitable() is used by the direct reclaim path, so use the min watermark. - compact_suit_allocation_order() has a watermark in context derived from cc->alloc_flags. The only quirk is that kcompactd doesn't initialize cc->alloc_flags explicitly. There is a direct check in kcompactd_do_work() that passes ALLOC_WMARK_MIN, but there is another check downstack in compact_zone() that ends up passing the unset alloc_flags. Since they default to 0, and that coincides with ALLOC_WMARK_MIN, it is correct. But it's subtle. Set cc->alloc_flags explicitly. - should_continue_reclaim() is direct reclaim, use the min watermark. - Finally, consolidate the two checks in compaction_ready() to a single compaction_suitable() call passing the high watermark. There is a tiny change in behavior: before, compaction_suitable() would check order-0 against min or low, depending on costly order. Then there'd be another high watermark check. Now, the high watermark is passed to compaction_suitable(), and the costly order-boost (low - min) is added on top. This means compaction_ready() sets a marginally higher target for free pages. In a kernelbuild + THP pressure test, though, this didn't show any measurable negative effects on memory pressure or reclaim rates. As the comment above the check says, reclaim is usually stopped short on should_continue_reclaim(), and this just defines the worst-case reclaim cutoff in case compaction is not making any headway. [hughd@google.com: stop oops on out-of-range highest_zoneidx] Link: https://lkml.kernel.org/r/005ace8b-07fa-01d4-b54b-394a3e029c07@google.com Link: https://lkml.kernel.org/r/20250313210647.1314586-1-hannes@cmpxchg.org Link: https://lkml.kernel.org/r/20250313210647.1314586-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Hugh Dickins Acked-by: Zi Yan Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/compaction.h | 5 ++-- mm/compaction.c | 52 ++++++++++++++++++++------------------ mm/vmscan.c | 26 ++++++++++--------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 7bf0c521db63..173d9c07a895 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -95,7 +95,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx); + unsigned long watermark, int highest_zoneidx); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); @@ -113,7 +113,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) } static inline bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx) + unsigned long watermark, + int highest_zoneidx) { return false; } diff --git a/mm/compaction.c b/mm/compaction.c index 2e2d4db33e68..cf32e8053edb 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2381,40 +2381,42 @@ static enum compact_result compact_finished(struct compact_control *cc) } static bool __compaction_suitable(struct zone *zone, int order, - int highest_zoneidx, - unsigned long wmark_target) + unsigned long watermark, int highest_zoneidx, + unsigned long free_pages) { - unsigned long watermark; /* * Watermarks for order-0 must be met for compaction to be able to * isolate free pages for migration targets. This means that the - * watermark and alloc_flags have to match, or be more pessimistic than - * the check in __isolate_free_page(). We don't use the direct - * compactor's alloc_flags, as they are not relevant for freepage - * isolation. We however do use the direct compactor's highest_zoneidx - * to skip over zones where lowmem reserves would prevent allocation - * even if compaction succeeds. - * For costly orders, we require low watermark instead of min for - * compaction to proceed to increase its chances. + * watermark have to match, or be more pessimistic than the check in + * __isolate_free_page(). + * + * For costly orders, we require a higher watermark for compaction to + * proceed to increase its chances. + * + * We use the direct compactor's highest_zoneidx to skip over zones + * where lowmem reserves would prevent allocation even if compaction + * succeeds. + * * ALLOC_CMA is used, as pages in CMA pageblocks are considered - * suitable migration targets + * suitable migration targets. */ - watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? - low_wmark_pages(zone) : min_wmark_pages(zone); watermark += compact_gap(order); + if (order > PAGE_ALLOC_COSTLY_ORDER) + watermark += low_wmark_pages(zone) - min_wmark_pages(zone); return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx, - ALLOC_CMA, wmark_target); + ALLOC_CMA, free_pages); } /* * compaction_suitable: Is this suitable to run compaction on this zone now? */ -bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx) +bool compaction_suitable(struct zone *zone, int order, unsigned long watermark, + int highest_zoneidx) { enum compact_result compact_result; bool suitable; - suitable = __compaction_suitable(zone, order, highest_zoneidx, + suitable = __compaction_suitable(zone, order, watermark, highest_zoneidx, zone_page_state(zone, NR_FREE_PAGES)); /* * fragmentation index determines if allocation failures are due to @@ -2452,6 +2454,7 @@ bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx) return suitable; } +/* Used by direct reclaimers */ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, int alloc_flags) { @@ -2474,8 +2477,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, */ available = zone_reclaimable_pages(zone) / order; available += zone_page_state_snapshot(zone, NR_FREE_PAGES); - if (__compaction_suitable(zone, order, ac->highest_zoneidx, - available)) + if (__compaction_suitable(zone, order, min_wmark_pages(zone), + ac->highest_zoneidx, available)) return true; } @@ -2512,13 +2515,13 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order, */ if (order > PAGE_ALLOC_COSTLY_ORDER && async && !(alloc_flags & ALLOC_CMA)) { - watermark = low_wmark_pages(zone) + compact_gap(order); - if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx, - 0, zone_page_state(zone, NR_FREE_PAGES))) + if (!__zone_watermark_ok(zone, 0, watermark + compact_gap(order), + highest_zoneidx, 0, + zone_page_state(zone, NR_FREE_PAGES))) return COMPACT_SKIPPED; } - if (!compaction_suitable(zone, order, highest_zoneidx)) + if (!compaction_suitable(zone, order, watermark, highest_zoneidx)) return COMPACT_SKIPPED; return COMPACT_CONTINUE; @@ -3081,6 +3084,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) .mode = MIGRATE_SYNC_LIGHT, .ignore_skip_hint = false, .gfp_mask = GFP_KERNEL, + .alloc_flags = ALLOC_WMARK_MIN, }; enum compact_result ret; @@ -3099,7 +3103,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) continue; ret = compaction_suit_allocation_order(zone, - cc.order, zoneid, ALLOC_WMARK_MIN, + cc.order, zoneid, cc.alloc_flags, false); if (ret != COMPACT_CONTINUE) continue; diff --git a/mm/vmscan.c b/mm/vmscan.c index 2bc740637a6c..3370bdca6868 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5890,12 +5890,15 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, /* If compaction would go ahead or the allocation would succeed, stop */ for_each_managed_zone_pgdat(zone, pgdat, z, sc->reclaim_idx) { + unsigned long watermark = min_wmark_pages(zone); + /* Allocation can already succeed, nothing to do */ - if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), + if (zone_watermark_ok(zone, sc->order, watermark, sc->reclaim_idx, 0)) return false; - if (compaction_suitable(zone, sc->order, sc->reclaim_idx)) + if (compaction_suitable(zone, sc->order, watermark, + sc->reclaim_idx)) return false; } @@ -6122,22 +6125,21 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) sc->reclaim_idx, 0)) return true; - /* Compaction cannot yet proceed. Do reclaim. */ - if (!compaction_suitable(zone, sc->order, sc->reclaim_idx)) - return false; - /* - * Compaction is already possible, but it takes time to run and there - * are potentially other callers using the pages just freed. So proceed - * with reclaim to make a buffer of free pages available to give - * compaction a reasonable chance of completing and allocating the page. + * Direct reclaim usually targets the min watermark, but compaction + * takes time to run and there are potentially other callers using the + * pages just freed. So target a higher buffer to give compaction a + * reasonable chance of completing and allocating the pages. + * * Note that we won't actually reclaim the whole buffer in one attempt * as the target watermark in should_continue_reclaim() is lower. But if * we are already above the high+gap watermark, don't reclaim at all. */ - watermark = high_wmark_pages(zone) + compact_gap(sc->order); + watermark = high_wmark_pages(zone); + if (compaction_suitable(zone, sc->order, watermark, sc->reclaim_idx)) + return true; - return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); + return false; } static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) From f46012c0ec9f544998b81b2e3c6c702b9277f596 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Mar 2025 17:05:33 -0400 Subject: [PATCH 1319/2157] mm: page_alloc: trace type pollution from compaction capturing When the page allocator places pages of a certain migratetype into blocks of another type, it has lasting effects on the ability to compact and defragment down the line. For improving placement and compaction, visibility into such events is crucial. The most common case, allocator fallbacks, is already annotated, but compaction capturing is also allowed to grab pages of a different type. Extend the tracepoint to cover this case. Link: https://lkml.kernel.org/r/20250313210647.1314586-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Zi Yan Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0be1fedd1201..5b92b1acda0e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -614,6 +614,10 @@ compaction_capture(struct capture_control *capc, struct page *page, capc->cc->migratetype != MIGRATE_MOVABLE) return false; + if (migratetype != capc->cc->migratetype) + trace_mm_page_alloc_extfrag(page, capc->cc->order, order, + capc->cc->migratetype, migratetype); + capc->page = page; return true; } From e3aa7df331bca08742a212764348246e8e8a874e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Mar 2025 17:05:34 -0400 Subject: [PATCH 1320/2157] mm: page_alloc: defrag_mode The page allocator groups requests by migratetype to stave off fragmentation. However, in practice this is routinely defeated by the fact that it gives up *before* invoking reclaim and compaction - which may well produce suitable pages. As a result, fragmentation of physical memory is a common ongoing process in many load scenarios. Fragmentation deteriorates compaction's ability to produce huge pages. Depending on the lifetime of the fragmenting allocations, those effects can be long-lasting or even permanent, requiring drastic measures like forcible idle states or even reboots as the only reliable ways to recover the address space for THP production. In a kernel build test with supplemental THP pressure, the THP allocation rate steadily declines over 15 runs: thp_fault_alloc 61988 56474 57258 50187 52388 55409 52925 47648 43669 40621 36077 41721 36685 34641 33215 This is a hurdle in adopting THP in any environment where hosts are shared between multiple overlapping workloads (cloud environments), and rarely experience true idle periods. To make THP a reliable and predictable optimization, there needs to be a stronger guarantee to avoid such fragmentation. Introduce defrag_mode. When enabled, reclaim/compaction is invoked to its full extent *before* falling back. Specifically, ALLOC_NOFRAGMENT is enforced on the allocator fastpath and the reclaiming slowpath. For now, fallbacks are permitted to avert OOMs. There is a plan to add defrag_mode=2 to prefer OOMs over fragmentation, but this requires additional prep work in compaction and the reserve management to make it ready for all possible allocation contexts. The following test results are from a kernel build with periodic bursts of THP allocations, over 15 runs: vanilla defrag_mode=1 @claimer[unmovable]: 189 103 @claimer[movable]: 92 103 @claimer[reclaimable]: 207 61 @pollute[unmovable from movable]: 25 0 @pollute[unmovable from reclaimable]: 28 0 @pollute[movable from unmovable]: 38835 0 @pollute[movable from reclaimable]: 147136 0 @pollute[reclaimable from unmovable]: 178 0 @pollute[reclaimable from movable]: 33 0 @steal[unmovable from movable]: 11 0 @steal[unmovable from reclaimable]: 5 0 @steal[reclaimable from unmovable]: 107 0 @steal[reclaimable from movable]: 90 0 @steal[movable from reclaimable]: 354 0 @steal[movable from unmovable]: 130 0 Both types of polluting fallbacks are eliminated in this workload. Interestingly, whole block conversions are reduced as well. This is because once a block is claimed for a type, its empty space remains available for future allocations, instead of being padded with fallbacks; this allows the native type to group up instead of spreading out to new blocks. The assumption in the allocator has been that pollution from movable allocations is less harmful than from other types, since they can be reclaimed or migrated out should the space be needed. However, since fallbacks occur *before* reclaim/compaction is invoked, movable pollution will still cause non-movable allocations to spread out and claim more blocks. Without fragmentation, THP rates hold steady with defrag_mode=1: thp_fault_alloc 32478 20725 45045 32130 14018 21711 40791 29134 34458 45381 28305 17265 22584 28454 30850 While the downward trend is eliminated, the keen reader will of course notice that the baseline rate is much smaller than the vanilla kernel's to begin with. This is due to deficiencies in how reclaim and compaction are currently driven: ALLOC_NOFRAGMENT increases the extent to which smaller allocations are competing with THPs for pageblocks, while making no effort themselves to reclaim or compact beyond their own request size. This effect already exists with the current usage of ALLOC_NOFRAGMENT, but is amplified by defrag_mode insisting on whole block stealing much more strongly. Subsequent patches will address defrag_mode reclaim strategy to raise the THP success baseline above the vanilla kernel. Link: https://lkml.kernel.org/r/20250313210647.1314586-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/vm.rst | 9 +++++++++ mm/page_alloc.c | 27 +++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index f48eaa98d22d..8290177b4f75 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm: - compact_memory - compaction_proactiveness - compact_unevictable_allowed +- defrag_mode - dirty_background_bytes - dirty_background_ratio - dirty_bytes @@ -145,6 +146,14 @@ On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due to compaction, which would block the task from becoming active until the fault is resolved. +defrag_mode +=========== + +When set to 1, the page allocator tries harder to avoid fragmentation +and maintain the ability to produce huge pages / higher-order pages. + +It is recommended to enable this right after boot, as fragmentation, +once it occurred, can be long-lasting or even permanent. dirty_background_bytes ====================== diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b92b1acda0e..f849eb7146b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -273,6 +273,7 @@ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; static int watermark_boost_factor __read_mostly = 15000; static int watermark_scale_factor = 10; +static int defrag_mode; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone; @@ -3389,6 +3390,11 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) */ alloc_flags = (__force int) (gfp_mask & __GFP_KSWAPD_RECLAIM); + if (defrag_mode) { + alloc_flags |= ALLOC_NOFRAGMENT; + return alloc_flags; + } + #ifdef CONFIG_ZONE_DMA32 if (!zone) return alloc_flags; @@ -3480,7 +3486,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, continue; } - if (no_fallback && nr_online_nodes > 1 && + if (no_fallback && !defrag_mode && nr_online_nodes > 1 && zone != zonelist_zone(ac->preferred_zoneref)) { int local_nid; @@ -3591,7 +3597,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, * It's possible on a UMA machine to get through all zones that are * fragmented. If avoiding fragmentation, reset and try again. */ - if (no_fallback) { + if (no_fallback && !defrag_mode) { alloc_flags &= ~ALLOC_NOFRAGMENT; goto retry; } @@ -4128,6 +4134,9 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags); + if (defrag_mode) + alloc_flags |= ALLOC_NOFRAGMENT; + return alloc_flags; } @@ -4510,6 +4519,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, &compaction_retries)) goto retry; + /* Reclaim/compaction failed to prevent the fallback */ + if (defrag_mode) { + alloc_flags &= ALLOC_NOFRAGMENT; + goto retry; + } /* * Deal with possible cpuset update races or zonelist updates to avoid @@ -6286,6 +6300,15 @@ static const struct ctl_table page_alloc_sysctl_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_THREE_THOUSAND, }, + { + .procname = "defrag_mode", + .data = &defrag_mode, + .maxlen = sizeof(defrag_mode), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { .procname = "percpu_pagelist_high_fraction", .data = &percpu_pagelist_high_fraction, From 101f9d666e4d730e80caabe02446e8592ac44592 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Mar 2025 17:05:35 -0400 Subject: [PATCH 1321/2157] mm: page_alloc: defrag_mode kswapd/kcompactd assistance When defrag_mode is enabled, allocation fallbacks strongly prefer whole block conversions instead of polluting or stealing partially used blocks. This means there is a demand for pageblocks even from sub-block requests. Let kswapd/kcompactd help produce them. By the time kswapd gets woken up, normal rmqueue and block conversion fallbacks have been attempted and failed. So always wake kswapd with the block order; it will take care of producing a suitable compaction gap and then chain-wake kcompactd with the block order when its done. VANILLA DEFRAGMODE-ASYNC Hugealloc Time mean 52739.45 ( +0.00%) 34300.36 ( -34.96%) Hugealloc Time stddev 56541.26 ( +0.00%) 36390.42 ( -35.64%) Kbuild Real time 197.47 ( +0.00%) 196.13 ( -0.67%) Kbuild User time 1240.49 ( +0.00%) 1234.74 ( -0.46%) Kbuild System time 70.08 ( +0.00%) 62.62 ( -10.50%) THP fault alloc 46727.07 ( +0.00%) 57054.53 ( +22.10%) THP fault fallback 21910.60 ( +0.00%) 11581.40 ( -47.14%) Direct compact fail 195.80 ( +0.00%) 107.80 ( -44.72%) Direct compact success 7.93 ( +0.00%) 4.53 ( -38.06%) Direct compact success rate % 3.51 ( +0.00%) 3.20 ( -6.89%) Compact daemon scanned migrate 3369601.27 ( +0.00%) 5461033.93 ( +62.07%) Compact daemon scanned free 5075474.47 ( +0.00%) 5824897.93 ( +14.77%) Compact direct scanned migrate 161787.27 ( +0.00%) 58336.93 ( -63.94%) Compact direct scanned free 163467.53 ( +0.00%) 32791.87 ( -79.94%) Compact total migrate scanned 3531388.53 ( +0.00%) 5519370.87 ( +56.29%) Compact total free scanned 5238942.00 ( +0.00%) 5857689.80 ( +11.81%) Alloc stall 2371.07 ( +0.00%) 2424.60 ( +2.26%) Pages kswapd scanned 2160926.73 ( +0.00%) 2657018.33 ( +22.96%) Pages kswapd reclaimed 533191.07 ( +0.00%) 559583.07 ( +4.95%) Pages direct scanned 400450.33 ( +0.00%) 722094.07 ( +80.32%) Pages direct reclaimed 94441.73 ( +0.00%) 107257.80 ( +13.57%) Pages total scanned 2561377.07 ( +0.00%) 3379112.40 ( +31.93%) Pages total reclaimed 627632.80 ( +0.00%) 666840.87 ( +6.25%) Swap out 47959.53 ( +0.00%) 77238.20 ( +61.05%) Swap in 7276.00 ( +0.00%) 11712.80 ( +60.97%) File refaults 138043.00 ( +0.00%) 143438.80 ( +3.91%) With this patch, defrag_mode=1 beats the vanilla kernel in THP success rates and allocation latencies. The trend holds over time: thp_fault_alloc VANILLA DEFRAGMODE-ASYNC 61988 52066 56474 58844 57258 58233 50187 58476 52388 54516 55409 59938 52925 57204 47648 60238 43669 55733 40621 56211 36077 59861 41721 57771 36685 58579 34641 51868 33215 56280 DEFRAGMODE-ASYNC also wins on %sys as ~3/4 of the direct compaction work is shifted to kcompactd. Reclaim activity is higher. Part of that is simply due to the increased memory footprint from higher THP use. The other aspect is that *direct* reclaim/compaction are still going for requested orders rather than targeting the page blocks required for fallbacks, which is less efficient than it could be. However, this is already a useful tradeoff to make, as in many environments peak periods are short and retaining the ability to produce THP through them is more important. Link: https://lkml.kernel.org/r/20250313210647.1314586-5-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/page_alloc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f849eb7146b9..5a2ee82f723e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4076,15 +4076,21 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask, struct zone *zone; pg_data_t *last_pgdat = NULL; enum zone_type highest_zoneidx = ac->highest_zoneidx; + unsigned int reclaim_order; + + if (defrag_mode) + reclaim_order = max(order, pageblock_order); + else + reclaim_order = order; for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx, ac->nodemask) { if (!managed_zone(zone)) continue; - if (last_pgdat != zone->zone_pgdat) { - wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx); - last_pgdat = zone->zone_pgdat; - } + if (last_pgdat == zone->zone_pgdat) + continue; + wakeup_kswapd(zone, gfp_mask, reclaim_order, highest_zoneidx); + last_pgdat = zone->zone_pgdat; } } From a211c6550efcc87aa2459ca347bda10721c7a46a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 13 Mar 2025 17:05:36 -0400 Subject: [PATCH 1322/2157] mm: page_alloc: defrag_mode kswapd/kcompactd watermarks The previous patch added pageblock_order reclaim to kswapd/kcompactd, which helps, but produces only one block at a time. Allocation stalls and THP failure rates are still higher than they could be. To adequately reflect ALLOC_NOFRAGMENT demand for pageblocks, change the watermarking for kswapd & kcompactd: instead of targeting the high watermark in order-0 pages and checking for one suitable block, simply require that the high watermark is entirely met in pageblocks. To this end, track the number of free pages within contiguous pageblocks, then change pgdat_balanced() and compact_finished() to check watermarks against this new value. This further reduces THP latencies and allocation stalls, and improves THP success rates against the previous patch: DEFRAGMODE-ASYNC DEFRAGMODE-ASYNC-WMARKS Hugealloc Time mean 34300.36 ( +0.00%) 28904.00 ( -15.73%) Hugealloc Time stddev 36390.42 ( +0.00%) 33464.37 ( -8.04%) Kbuild Real time 196.13 ( +0.00%) 196.59 ( +0.23%) Kbuild User time 1234.74 ( +0.00%) 1231.67 ( -0.25%) Kbuild System time 62.62 ( +0.00%) 59.10 ( -5.54%) THP fault alloc 57054.53 ( +0.00%) 63223.67 ( +10.81%) THP fault fallback 11581.40 ( +0.00%) 5412.47 ( -53.26%) Direct compact fail 107.80 ( +0.00%) 59.07 ( -44.79%) Direct compact success 4.53 ( +0.00%) 2.80 ( -31.33%) Direct compact success rate % 3.20 ( +0.00%) 3.99 ( +18.66%) Compact daemon scanned migrate 5461033.93 ( +0.00%) 2267500.33 ( -58.48%) Compact daemon scanned free 5824897.93 ( +0.00%) 2339773.00 ( -59.83%) Compact direct scanned migrate 58336.93 ( +0.00%) 47659.93 ( -18.30%) Compact direct scanned free 32791.87 ( +0.00%) 40729.67 ( +24.21%) Compact total migrate scanned 5519370.87 ( +0.00%) 2315160.27 ( -58.05%) Compact total free scanned 5857689.80 ( +0.00%) 2380502.67 ( -59.36%) Alloc stall 2424.60 ( +0.00%) 638.87 ( -73.62%) Pages kswapd scanned 2657018.33 ( +0.00%) 4002186.33 ( +50.63%) Pages kswapd reclaimed 559583.07 ( +0.00%) 718577.80 ( +28.41%) Pages direct scanned 722094.07 ( +0.00%) 355172.73 ( -50.81%) Pages direct reclaimed 107257.80 ( +0.00%) 31162.80 ( -70.95%) Pages total scanned 3379112.40 ( +0.00%) 4357359.07 ( +28.95%) Pages total reclaimed 666840.87 ( +0.00%) 749740.60 ( +12.43%) Swap out 77238.20 ( +0.00%) 110084.33 ( +42.53%) Swap in 11712.80 ( +0.00%) 24457.00 ( +108.80%) File refaults 143438.80 ( +0.00%) 188226.93 ( +31.22%) Also of note is that compaction work overall is reduced. The reason for this is that when free pageblocks are more readily available, allocations are also much more likely to get physically placed in LRU order, instead of being forced to scavenge free space here and there. This means that reclaim by itself has better chances of freeing up whole blocks, and the system relies less on compaction. Comparing all changes to the vanilla kernel: VANILLA DEFRAGMODE-ASYNC-WMARKS Hugealloc Time mean 52739.45 ( +0.00%) 28904.00 ( -45.19%) Hugealloc Time stddev 56541.26 ( +0.00%) 33464.37 ( -40.81%) Kbuild Real time 197.47 ( +0.00%) 196.59 ( -0.44%) Kbuild User time 1240.49 ( +0.00%) 1231.67 ( -0.71%) Kbuild System time 70.08 ( +0.00%) 59.10 ( -15.45%) THP fault alloc 46727.07 ( +0.00%) 63223.67 ( +35.30%) THP fault fallback 21910.60 ( +0.00%) 5412.47 ( -75.29%) Direct compact fail 195.80 ( +0.00%) 59.07 ( -69.48%) Direct compact success 7.93 ( +0.00%) 2.80 ( -57.46%) Direct compact success rate % 3.51 ( +0.00%) 3.99 ( +10.49%) Compact daemon scanned migrate 3369601.27 ( +0.00%) 2267500.33 ( -32.71%) Compact daemon scanned free 5075474.47 ( +0.00%) 2339773.00 ( -53.90%) Compact direct scanned migrate 161787.27 ( +0.00%) 47659.93 ( -70.54%) Compact direct scanned free 163467.53 ( +0.00%) 40729.67 ( -75.08%) Compact total migrate scanned 3531388.53 ( +0.00%) 2315160.27 ( -34.44%) Compact total free scanned 5238942.00 ( +0.00%) 2380502.67 ( -54.56%) Alloc stall 2371.07 ( +0.00%) 638.87 ( -73.02%) Pages kswapd scanned 2160926.73 ( +0.00%) 4002186.33 ( +85.21%) Pages kswapd reclaimed 533191.07 ( +0.00%) 718577.80 ( +34.77%) Pages direct scanned 400450.33 ( +0.00%) 355172.73 ( -11.31%) Pages direct reclaimed 94441.73 ( +0.00%) 31162.80 ( -67.00%) Pages total scanned 2561377.07 ( +0.00%) 4357359.07 ( +70.12%) Pages total reclaimed 627632.80 ( +0.00%) 749740.60 ( +19.46%) Swap out 47959.53 ( +0.00%) 110084.33 ( +129.53%) Swap in 7276.00 ( +0.00%) 24457.00 ( +236.10%) File refaults 138043.00 ( +0.00%) 188226.93 ( +36.35%) THP allocation latencies and %sys time are down dramatically. THP allocation failures are down from nearly 50% to 8.5%. And to recall previous data points, the success rates are steady and reliable without the cumulative deterioration of fragmentation events. Compaction work is down overall. Direct compaction work especially is drastically reduced. As an aside, its success rate of 4% indicates there is room for improvement. For now it's good to rely on it less. Reclaim work is up overall, however direct reclaim work is down. Part of the increase can be attributed to a higher use of THPs, which due to internal fragmentation increase the memory footprint. This is not necessarily an unexpected side-effect for users of THP. However, taken both points together, there may well be some opportunities for fine tuning in the reclaim/compaction coordination. [hannes@cmpxchg.org: fix squawks from rebasing] Link: https://lkml.kernel.org/r/20250314210558.GD1316033@cmpxchg.org Link: https://lkml.kernel.org/r/20250313210647.1314586-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + mm/compaction.c | 41 +++++++++++++++++++++++++++++++++-------- mm/internal.h | 1 + mm/page_alloc.c | 29 +++++++++++++++++++++++------ mm/vmscan.c | 15 ++++++++++++++- mm/vmstat.c | 1 + 6 files changed, 73 insertions(+), 15 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dbb0ad69e17f..37c29f3fbca8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -138,6 +138,7 @@ enum numa_stat_item { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_FREE_PAGES_BLOCKS, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, diff --git a/mm/compaction.c b/mm/compaction.c index cf32e8053edb..139f00c0308a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2328,6 +2328,22 @@ static enum compact_result __compact_finished(struct compact_control *cc) if (!pageblock_aligned(cc->migrate_pfn)) return COMPACT_CONTINUE; + /* + * When defrag_mode is enabled, make kcompactd target + * watermarks in whole pageblocks. Because they can be stolen + * without polluting, no further fallback checks are needed. + */ + if (defrag_mode && !cc->direct_compaction) { + if (__zone_watermark_ok(cc->zone, cc->order, + high_wmark_pages(cc->zone), + cc->highest_zoneidx, cc->alloc_flags, + zone_page_state(cc->zone, + NR_FREE_PAGES_BLOCKS))) + return COMPACT_SUCCESS; + + return COMPACT_CONTINUE; + } + /* Direct compactor: Is a suitable page free? */ ret = COMPACT_NO_SUITABLE_PAGE; for (order = cc->order; order < NR_PAGE_ORDERS; order++) { @@ -2495,13 +2511,19 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, static enum compact_result compaction_suit_allocation_order(struct zone *zone, unsigned int order, int highest_zoneidx, unsigned int alloc_flags, - bool async) + bool async, bool kcompactd) { + unsigned long free_pages; unsigned long watermark; + if (kcompactd && defrag_mode) + free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + else + free_pages = zone_page_state(zone, NR_FREE_PAGES); + watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); - if (zone_watermark_ok(zone, order, watermark, highest_zoneidx, - alloc_flags)) + if (__zone_watermark_ok(zone, order, watermark, highest_zoneidx, + alloc_flags, free_pages)) return COMPACT_SUCCESS; /* @@ -2557,7 +2579,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) ret = compaction_suit_allocation_order(cc->zone, cc->order, cc->highest_zoneidx, cc->alloc_flags, - cc->mode == MIGRATE_ASYNC); + cc->mode == MIGRATE_ASYNC, + !cc->direct_compaction); if (ret != COMPACT_CONTINUE) return ret; } @@ -3051,6 +3074,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) struct zone *zone; enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx; enum compact_result ret; + unsigned int alloc_flags = defrag_mode ? + ALLOC_WMARK_HIGH : ALLOC_WMARK_MIN; for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) { zone = &pgdat->node_zones[zoneid]; @@ -3060,8 +3085,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, pgdat->kcompactd_max_order, - highest_zoneidx, ALLOC_WMARK_MIN, - false); + highest_zoneidx, alloc_flags, + false, true); if (ret == COMPACT_CONTINUE) return true; } @@ -3084,7 +3109,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) .mode = MIGRATE_SYNC_LIGHT, .ignore_skip_hint = false, .gfp_mask = GFP_KERNEL, - .alloc_flags = ALLOC_WMARK_MIN, + .alloc_flags = defrag_mode ? ALLOC_WMARK_HIGH : ALLOC_WMARK_MIN, }; enum compact_result ret; @@ -3104,7 +3129,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, cc.order, zoneid, cc.alloc_flags, - false); + false, true); if (ret != COMPACT_CONTINUE) continue; diff --git a/mm/internal.h b/mm/internal.h index 2f52a65272c1..286520a424fe 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -536,6 +536,7 @@ extern char * const zone_names[MAX_NR_ZONES]; DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); extern int min_free_kbytes; +extern int defrag_mode; void setup_per_zone_wmarks(void); void calculate_min_free_kbytes(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5a2ee82f723e..4337467eaf5a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -273,7 +273,7 @@ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; static int watermark_boost_factor __read_mostly = 15000; static int watermark_scale_factor = 10; -static int defrag_mode; +int defrag_mode; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone; @@ -660,16 +660,20 @@ static inline void __add_to_free_list(struct page *page, struct zone *zone, bool tail) { struct free_area *area = &zone->free_area[order]; + int nr_pages = 1 << order; VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, "page type is %lu, passed migratetype is %d (nr=%d)\n", - get_pageblock_migratetype(page), migratetype, 1 << order); + get_pageblock_migratetype(page), migratetype, nr_pages); if (tail) list_add_tail(&page->buddy_list, &area->free_list[migratetype]); else list_add(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; + + if (order >= pageblock_order && !is_migrate_isolate(migratetype)) + __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages); } /* @@ -681,24 +685,34 @@ static inline void move_to_free_list(struct page *page, struct zone *zone, unsigned int order, int old_mt, int new_mt) { struct free_area *area = &zone->free_area[order]; + int nr_pages = 1 << order; /* Free page moving can fail, so it happens before the type update */ VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt, "page type is %lu, passed migratetype is %d (nr=%d)\n", - get_pageblock_migratetype(page), old_mt, 1 << order); + get_pageblock_migratetype(page), old_mt, nr_pages); list_move_tail(&page->buddy_list, &area->free_list[new_mt]); - account_freepages(zone, -(1 << order), old_mt); - account_freepages(zone, 1 << order, new_mt); + account_freepages(zone, -nr_pages, old_mt); + account_freepages(zone, nr_pages, new_mt); + + if (order >= pageblock_order && + is_migrate_isolate(old_mt) != is_migrate_isolate(new_mt)) { + if (!is_migrate_isolate(old_mt)) + nr_pages = -nr_pages; + __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages); + } } static inline void __del_page_from_free_list(struct page *page, struct zone *zone, unsigned int order, int migratetype) { + int nr_pages = 1 << order; + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, "page type is %lu, passed migratetype is %d (nr=%d)\n", - get_pageblock_migratetype(page), migratetype, 1 << order); + get_pageblock_migratetype(page), migratetype, nr_pages); /* clear reported state and update reported page count */ if (page_reported(page)) @@ -708,6 +722,9 @@ static inline void __del_page_from_free_list(struct page *page, struct zone *zon __ClearPageBuddy(page); set_page_private(page, 0); zone->free_area[order].nr_free--; + + if (order >= pageblock_order && !is_migrate_isolate(migratetype)) + __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, -nr_pages); } static inline void del_page_from_free_list(struct page *page, struct zone *zone, diff --git a/mm/vmscan.c b/mm/vmscan.c index 3370bdca6868..b5c7dfc2b189 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6724,11 +6724,24 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * meet watermarks. */ for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { + unsigned long free_pages; + if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) mark = promo_wmark_pages(zone); else mark = high_wmark_pages(zone); - if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx)) + + /* + * In defrag_mode, watermarks must be met in whole + * blocks to avoid polluting allocator fallbacks. + */ + if (defrag_mode) + free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + else + free_pages = zone_page_state(zone, NR_FREE_PAGES); + + if (__zone_watermark_ok(zone, order, mark, highest_zoneidx, + 0, free_pages)) return true; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 16bfe1c694dd..ed49a86348f7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1190,6 +1190,7 @@ int fragmentation_index(struct zone *zone, unsigned int order) const char * const vmstat_text[] = { /* enum zone_stat_item counters */ "nr_free_pages", + "nr_free_pages_blocks", "nr_zone_inactive_anon", "nr_zone_active_anon", "nr_zone_inactive_file", From 6216182fb776ae546c5566f21976d116c8650cee Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 24 Oct 2024 11:19:40 +0100 Subject: [PATCH 1323/2157] RISC-V: clarify what some RISCV_ISA* config options do During some discussion on IRC yesterday and on Pu's bpf patch [1] I noticed that these RISCV_ISA* Kconfig options are not really clear about their implications. Many of these options have no impact on what userspace is allowed to do, for example an application can use Zbb regardless of whether or not the kernel does. Change the help text to try and clarify whether or not an option affects just the kernel, or also userspace. None of these options actually control whether or not an extension is detected dynamically as that's done regardless of Kconfig options, so drop any text that implies the option is required for dynamic detection, rewording them as "do x when y is detected". Link: https://lore.kernel.org/linux-riscv/20240328-ferocity-repose-c554f75a676c@spud/ [1] Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Charlie Jenkins Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20241024-overdue-slogan-0b0f69d3da91@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62545946ecf4..278a38c94c5a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -527,7 +527,8 @@ config RISCV_ISA_C help Adds "C" to the ISA subsets that the toolchain is allowed to emit when building Linux, which results in compressed instructions in the - Linux binary. + Linux binary. This option produces a kernel that will not run on + systems that do not support compressed instructions. If you don't know what to do here, say Y. @@ -537,8 +538,8 @@ config RISCV_ISA_SVNAPOT depends on RISCV_ALTERNATIVE default y help - Allow kernel to detect the Svnapot ISA-extension dynamically at boot - time and enable its usage. + Enable support for the Svnapot ISA-extension when it is detected + at boot. The Svnapot extension is used to mark contiguous PTEs as a range of contiguous virtual-to-physical translations for a naturally @@ -556,9 +557,8 @@ config RISCV_ISA_SVPBMT depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the Svpbmt - ISA-extension (Supervisor-mode: page-based memory types) and - enable its usage. + Add support for the Svpbmt ISA-extension (Supervisor-mode: + page-based memory types) in the kernel when it is detected at boot. The memory type for a page contains a combination of attributes that indicate the cacheability, idempotency, and ordering @@ -577,14 +577,15 @@ config TOOLCHAIN_HAS_V depends on AS_HAS_OPTION_ARCH config RISCV_ISA_V - bool "VECTOR extension support" + bool "Vector extension support" depends on TOOLCHAIN_HAS_V depends on FPU select DYNAMIC_SIGFRAME default y help - Say N here if you want to disable all vector related procedure - in the kernel. + Add support for the Vector extension when it is detected at boot. + When this option is disabled, neither the kernel nor userspace may + use vector procedures. If you don't know what to do here, say Y. @@ -667,8 +668,8 @@ config RISCV_ISA_ZBB depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the ZBB - extension (basic bit manipulation) and enable its usage. + Add support for enabling optimisations in the kernel when the + Zbb extension is detected at boot. The Zbb extension provides instructions to accelerate a number of bit-specific operations (count bit population, sign extending, @@ -707,9 +708,9 @@ config RISCV_ISA_ZICBOM select RISCV_DMA_NONCOHERENT select DMA_DIRECT_REMAP help - Adds support to dynamically detect the presence of the ZICBOM - extension (Cache Block Management Operations) and enable its - usage. + Add support for the Zicbom extension (Cache Block Management + Operations) and enable its use in the kernel when it is detected + at boot. The Zicbom extension can be used to handle for example non-coherent DMA support on devices that need it. @@ -722,7 +723,7 @@ config RISCV_ISA_ZICBOZ default y help Enable the use of the Zicboz extension (cbo.zero instruction) - when available. + in the kernel when it is detected at boot. The Zicboz extension is used for faster zeroing of memory. @@ -760,8 +761,9 @@ config FPU bool "FPU support" default y help - Say N here if you want to disable all floating-point related procedure - in the kernel. + Add support for floating point operations when an FPU is detected at + boot. When this option is disabled, neither the kernel nor userspace + may use the floating point unit. If you don't know what to do here, say Y. From 9343aaba1f256ff42730db5a61efc32a86149776 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 24 Oct 2024 11:19:41 +0100 Subject: [PATCH 1324/2157] RISC-V: separate Zbb optimisations requiring and not requiring toolchain support It seems a bit ridiculous to require toolchain support for BPF to assemble Zbb instructions, so move the dependency on toolchain support for Zbb optimisations out of the Kconfig option and to the callsites. Zbb support has always depended on alternatives, so while adjusting the config options guarding optimisations, remove any checks for whether or not alternatives are enabled. Reviewed-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Charlie Jenkins Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20241024-chump-freebase-d26b6d81af33@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 4 ++-- arch/riscv/include/asm/arch_hweight.h | 6 +++--- arch/riscv/include/asm/bitops.h | 4 ++-- arch/riscv/include/asm/checksum.h | 3 +-- arch/riscv/lib/csum.c | 21 +++------------------ arch/riscv/lib/strcmp.S | 5 +++-- arch/riscv/lib/strlen.S | 5 +++-- arch/riscv/lib/strncmp.S | 5 +++-- 8 files changed, 20 insertions(+), 33 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 278a38c94c5a..6ec7a500a25f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -664,12 +664,12 @@ config RISCV_ISA_ZBA config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" - depends on TOOLCHAIN_HAS_ZBB depends on RISCV_ALTERNATIVE default y help Add support for enabling optimisations in the kernel when the - Zbb extension is detected at boot. + Zbb extension is detected at boot. Some optimisations may + additionally depend on toolchain support for Zbb. The Zbb extension provides instructions to accelerate a number of bit-specific operations (count bit population, sign extending, diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index 613769b9cdc9..0e7cdbbec8ef 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -19,7 +19,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -50,7 +50,7 @@ static inline unsigned int __arch_hweight8(unsigned int w) #if BITS_PER_LONG == 64 static __always_inline unsigned long __arch_hweight64(__u64 w) { -# ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -64,7 +64,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) return w; legacy: -# endif +#endif return __sw_hweight64(w); } #else /* BITS_PER_LONG == 64 */ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index fae152ea0508..410e2235d658 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,7 +15,7 @@ #include #include -#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) +#if !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) #include #include #include @@ -175,7 +175,7 @@ static __always_inline int variable_fls(unsigned int x) variable_fls(x_); \ }) -#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ +#endif /* !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) */ #include #include diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index 88e6f1499e88..da378856f1d5 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -49,8 +49,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * ZBB only saves three instructions on 32-bit and five on 64-bit so not * worth checking if supported without Alternatives. */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index 7fb12c59e571..9408f50ca59a 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -40,12 +40,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, uproto = (__force unsigned int)htonl(proto); sum += uproto; - /* - * Zbb support saves 4 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -157,12 +152,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) csum = do_csum_common(ptr, end, data); #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -244,12 +234,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) end = (const unsigned long *)(buff + len); csum = do_csum_common(ptr, end, data); - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index 57a5c0066231..65027e742af1 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -8,7 +8,8 @@ /* int strcmp(const char *cs, const char *ct) */ SYM_FUNC_START(strcmp) - ALTERNATIVE("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -43,7 +44,7 @@ SYM_FUNC_START(strcmp) * The code was published as part of the bitmanip manual * in Appendix A. */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strcmp_zbb: .option push diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 962983b73251..eb4d2b7ed22b 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -8,7 +8,8 @@ /* int strlen(const char *s) */ SYM_FUNC_START(strlen) - ALTERNATIVE("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -33,7 +34,7 @@ SYM_FUNC_START(strlen) /* * Variant of strlen using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strlen_zbb: #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index 7b2d0ff9ed6c..062000c468c8 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -8,7 +8,8 @@ /* int strncmp(const char *cs, const char *ct, size_t count) */ SYM_FUNC_START(strncmp) - ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -46,7 +47,7 @@ SYM_FUNC_START(strncmp) /* * Variant of strncmp using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strncmp_zbb: .option push From 03dc00a2b67854604c0aa8cbd32105f8b633451e Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Wed, 8 Jan 2025 05:57:00 -0800 Subject: [PATCH 1325/2157] riscv: Support huge pfnmaps Use RSW0 as the special bit for pmds and puds, just like for ptes. Also define the {pte,pmd,pud}_pgprot helpers which were previously missing and are needed for the follow_pfnmap APIs. Signed-off-by: Andrew Bresticker Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250108135700.2614848-1-abrestic@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8b86ce1965dc..ff35eb16853c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -62,6 +62,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU + select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 050fdc49b5ad..b6697dc21daf 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -341,6 +341,14 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) +#define pte_pgprot pte_pgprot +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + static inline int pte_present(pte_t pte) { return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -674,6 +682,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd; @@ -699,6 +712,18 @@ static inline unsigned long pud_pfn(pud_t pud) return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT); } +#define pmd_pgprot pmd_pgprot +static inline pgprot_t pmd_pgprot(pmd_t pmd) +{ + return pte_pgprot(pmd_pte(pmd)); +} + +#define pud_pgprot pud_pgprot +static inline pgprot_t pud_pgprot(pud_t pud) +{ + return pte_pgprot(pud_pte(pud)); +} + static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); @@ -768,6 +793,30 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) return pte_pmd(pte_mkdevmap(pmd_pte(pmd))); } +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) +{ + return pte_special(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return pte_pmd(pte_mkspecial(pmd_pte(pmd))); +} +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) +{ + return pte_special(pud_pte(pud)); +} + +static inline pud_t pud_mkspecial(pud_t pud) +{ + return pte_pud(pte_mkspecial(pud_pte(pud))); +} +#endif + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { From d3817d091fe6480de5bf3faba0fc2ce25f8d023e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 30 Aug 2024 10:49:32 +0200 Subject: [PATCH 1326/2157] riscv: remove useless pc check in stacktrace handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking for pc to be a kernel text address at this location is useless since pc == handle_exception. Remove this check. [ alex: Fix merge conflict ] Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240830084934.3690037-1-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index d4355c770c36..3fe9e6edef8f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -74,7 +74,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!fn(arg, pc))) break; pc = ((struct pt_regs *)sp)->epc; From 4458b8f68dc7ab8309291f1667157d0250938291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Fri, 13 Sep 2024 07:13:24 +0200 Subject: [PATCH 1327/2157] riscv: hwprobe: export Zicntr and Zihpm extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export Zicntr and Zihpm ISA extensions through the hwprobe syscall. [ alex: Fix hwprobe numbering ] Signed-off-by: Miquel Sabaté Solà Acked-by: Jesse Taube Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240913051324.8176-1-mikisabate@gmail.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 6 ++++++ arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index f273ea15a8e8..35a979dd164a 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -183,6 +183,9 @@ The following keys are defined: defined in the Atomic Compare-and-Swap (CAS) instructions manual starting from commit 5059e0ca641c ("update to ratified"). + * :c:macro:`RISCV_HWPROBE_EXT_ZICNTR`: The Zicntr extension version 2.0 + is supported as defined in the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZICOND`: The Zicond extension is supported as defined in the RISC-V Integer Conditional (Zicond) operations extension manual starting from commit 95cf1f9 ("Add changes requested by Ved @@ -192,6 +195,9 @@ The following keys are defined: supported as defined in the RISC-V ISA manual starting from commit d8ab5c78c207 ("Zihintpause is ratified"). + * :c:macro:`RISCV_HWPROBE_EXT_ZIHPM`: The Zihpm extension version 2.0 + is supported as defined in the RISC-V ISA manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZVE32X`: The Vector sub-extension Zve32x is supported, as defined by version 1.0 of the RISC-V Vector extension manual. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index c3c1cc951cb9..8cac35cb19d8 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,8 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#define RISCV_HWPROBE_EXT_ZICNTR (1ULL << 50) +#define RISCV_HWPROBE_EXT_ZIHPM (1ULL << 51) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bcd3b816306c..b35cce0b57ae 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -108,9 +108,11 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCB); EXT_KEY(ZCMOP); EXT_KEY(ZICBOZ); + EXT_KEY(ZICNTR); EXT_KEY(ZICOND); EXT_KEY(ZIHINTNTL); EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIHPM); EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); From d9be2b9b60497a82aeceec3a98d8b37fdd2960f2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 13 Jan 2025 15:24:24 +0100 Subject: [PATCH 1328/2157] riscv: Call secondary mmu notifier when flushing the tlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required to allow the IOMMU driver to correctly flush its own TLB. Reviewed-by: Clément Léger Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20250113142424.30487-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/tlbflush.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 9b6e86ce3867..bb77607c87aa 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -78,10 +79,17 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return mm ? cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; +} + +static void __flush_tlb_range(struct mm_struct *mm, + const struct cpumask *cmask, unsigned long start, unsigned long size, unsigned long stride) { + unsigned long asid = get_mm_asid(mm); unsigned int cpu; if (cpumask_empty(cmask)) @@ -105,30 +113,26 @@ static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, } put_cpu(); -} -static inline unsigned long get_mm_asid(struct mm_struct *mm) -{ - return cntx2asid(atomic_long_read(&mm->context.id)); + if (mm) + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, start + size); } void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(mm, mm_cpumask(mm), 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int page_size) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - start, end - start, page_size); + __flush_tlb_range(mm, mm_cpumask(mm), start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, PAGE_SIZE); } @@ -161,13 +165,13 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, stride_size); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(NULL, cpu_online_mask, start, end - start, PAGE_SIZE); } @@ -175,7 +179,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE); } #endif @@ -189,7 +193,10 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm, unsigned long uaddr) { + unsigned long start = uaddr & PAGE_MASK; + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, start + PAGE_SIZE); } void arch_flush_tlb_batched_pending(struct mm_struct *mm) @@ -199,7 +206,7 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(NULL, &batch->cpumask, + 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); cpumask_clear(&batch->cpumask); } From d9708b1931fc0ebb21cd94a56283d09222847749 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 16 Dec 2024 20:39:10 -0500 Subject: [PATCH 1329/2157] riscv: Implement smp_cond_load8/16() with Zawrs RISC-V code uses the queued spinlock implementation, which calls the macros smp_cond_load_acquire for one byte. So, complement the implementation of byte and halfword versions. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Andrew Jones Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20241217013910.1039923-1-guoren@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 427c41dde643..2ec119eb147b 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, { unsigned long tmp; + u32 *__ptr32b; + ulong __s, __val, __mask; + asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 0, RISCV_ISA_EXT_ZAWRS, 1) : : : : no_zawrs); switch (size) { case 1: - fallthrough; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 2: - /* RISC-V doesn't have lr instructions on byte and half-word. */ - goto no_zawrs; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xffff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 4: asm volatile( " lr.w %0, %1\n" From 5550187c4c21740942c32a9ae56f9f472a104cb4 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 10 Feb 2025 18:53:51 +0800 Subject: [PATCH 1330/2157] um: Pass the correct Rust target and options with gcc In order to work around some issues with disabling SSE on older versions of gcc (compilation would fail upon seeing a function declaration containing a float, even if it was never called or defined), the corresponding CFLAGS and RUSTFLAGS were only set when using clang. However, this led to two problems: - Newer gcc versions also wouldn't get the correct flags, despite not having the bug. - The RUSTFLAGS for setting the rust target definition were not set, despite being unrelated. This works by chance for x86_64, as the built-in default target is close enough, but not for 32-bit x86. Move the target definition outside the conditional block, and update the condition to take into account the gcc version. Fixes: a3046a618a28 ("um: Only disable SSE on clang to work around old GCC bugs") Signed-off-by: David Gow Link: https://patch.msgid.link/20250210105353.2238769-2-davidgow@google.com Signed-off-by: Johannes Berg --- arch/x86/Makefile.um | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index a46b1397ad01..c86cbd9cbba3 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -7,12 +7,13 @@ core-y += arch/x86/crypto/ # GCC versions < 11. See: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 # -ifeq ($(CONFIG_CC_IS_CLANG),y) -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json +ifeq ($(call gcc-min-version, 110000)$(CONFIG_CC_IS_CLANG),y) +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 endif +KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json + ifeq ($(CONFIG_X86_32),y) START := 0x8048000 From d1d7f01f7cd35e16c6bcef5a0e31988b5c9980f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 10 Feb 2025 17:09:25 +0100 Subject: [PATCH 1331/2157] um: mark rodata read-only and implement _nofault accesses Mark read-only data actually read-only (simple mprotect), and to be able to test it also implement _nofault accesses. This works by setting up a new "segv_continue" pointer in current, and then when we hit a segfault we change the signal return context so that we continue at that address. The code using this sets it up so that it jumps to a label and then aborts the access that way, returning -EFAULT. It's possible to optimize the ___backtrack_faulted() thing by using asm goto (compiler version dependent) and/or gcc's (not sure if clang has it) &&label extension, but at least in one attempt I made the && caused the compiler to not load -EFAULT into the register in case of jumping to the &&label from the fault handler. So leave it like this for now. Signed-off-by: Johannes Berg Co-developed-by: Benjamin Berg Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250210160926.420133-2-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- arch/um/Kconfig | 1 + arch/um/include/asm/processor-generic.h | 2 ++ arch/um/include/asm/uaccess.h | 20 ++++++++++++----- arch/um/include/shared/arch.h | 2 ++ arch/um/include/shared/as-layout.h | 2 +- arch/um/include/shared/irq_user.h | 3 ++- arch/um/include/shared/kern_util.h | 12 ++++++---- arch/um/kernel/irq.c | 3 ++- arch/um/kernel/mem.c | 10 +++++++++ arch/um/kernel/trap.c | 28 +++++++++++++++++++----- arch/um/os-Linux/signal.c | 4 ++-- arch/um/os-Linux/skas/process.c | 8 +++---- arch/x86/um/os-Linux/mcontext.c | 12 ++++++++++ arch/x86/um/shared/sysdep/faultinfo_32.h | 12 ++++++++++ arch/x86/um/shared/sysdep/faultinfo_64.h | 12 ++++++++++ 15 files changed, 108 insertions(+), 23 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 18051b1cfce0..79509c7f39de 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -12,6 +12,7 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER + select ARCH_HAS_STRICT_KERNEL_RWX select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 5d6356eafffe..8a789c17acd8 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -31,6 +31,8 @@ struct thread_struct { } thread; } request; + void *segv_continue; + /* Contains variable sized FP registers */ struct pt_regs regs; }; diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 1d4b6bbc1b65..3a08f9029a3f 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include #include +#include #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ @@ -44,19 +45,28 @@ static inline int __access_ok(const void __user *ptr, unsigned long size) __access_ok_vsyscall(addr, size)); } -/* no pagefaults for kernel addresses in um */ #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - *((type *)dst) = get_unaligned((type *)(src)); \ - if (0) /* make sure the label looks used to the compiler */ \ + int __faulted; \ + \ + ___backtrack_faulted(__faulted); \ + if (__faulted) { \ + *((type *)dst) = (type) 0; \ goto err_label; \ + } \ + *((type *)dst) = get_unaligned((type *)(src)); \ + current->thread.segv_continue = NULL; \ } while (0) #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - put_unaligned(*((type *)src), (type *)(dst)); \ - if (0) /* make sure the label looks used to the compiler */ \ + int __faulted; \ + \ + ___backtrack_faulted(__faulted); \ + if (__faulted) \ goto err_label; \ + put_unaligned(*((type *)src), (type *)(dst)); \ + current->thread.segv_continue = NULL; \ } while (0) #endif diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h index 880ee42a3329..cc398a21ad96 100644 --- a/arch/um/include/shared/arch.h +++ b/arch/um/include/shared/arch.h @@ -12,4 +12,6 @@ extern void arch_check_bugs(void); extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs); extern void arch_examine_signal(int sig, struct uml_pt_regs *regs); +void mc_set_rip(void *_mc, void *target); + #endif diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index ea65f151bf48..4f44dcce8a7c 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -50,7 +50,7 @@ extern int linux_main(int argc, char **argv, char **envp); extern void uml_finishsetup(void); struct siginfo; -extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *); +extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *, void *); #endif diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index da0f6eea30d0..88835b52ae2b 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -15,7 +15,8 @@ enum um_irq_type { }; struct siginfo; -extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void sigio_handler(int sig, struct siginfo *unused_si, + struct uml_pt_regs *regs, void *mc); void sigio_run_timetravel_handlers(void); extern void free_irq_by_fd(int fd); extern void deactivate_fd(int fd, int irqnum); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index f21dc8517538..00ca3e12fd9a 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -24,10 +24,12 @@ extern void free_stack(unsigned long stack, int order); struct pt_regs; extern void do_signal(struct pt_regs *regs); extern void interrupt_end(void); -extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); +extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, + void *mc); extern unsigned long segv(struct faultinfo fi, unsigned long ip, - int is_user, struct uml_pt_regs *regs); + int is_user, struct uml_pt_regs *regs, + void *mc); extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); @@ -59,8 +61,10 @@ extern unsigned long from_irq_stack(int nested); extern int singlestepping(void); -extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); -extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc); +extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); void um_idle_sleep(void); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index a4991746f5ea..abe8f30a521c 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -236,7 +236,8 @@ static void _sigio_handler(struct uml_pt_regs *regs, free_irqs(); } -void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { preempt_disable(); _sigio_handler(regs, irqs_suspended); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index befed230aac2..05ffceb555b4 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -241,3 +243,11 @@ static const pgprot_t protection_map[16] = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED }; DECLARE_VM_GET_PAGE_PROT + +void mark_rodata_ro(void) +{ + unsigned long rodata_start = PFN_ALIGN(__start_rodata); + unsigned long rodata_end = PFN_ALIGN(__end_rodata); + + os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0); +} diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index cdaee3e94273..ce073150dc20 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by @@ -175,12 +176,14 @@ void fatal_sigsegv(void) * @sig: the signal number * @unused_si: the signal info struct; unused in this handler * @regs: the ptrace register information + * @mc: the mcontext of the signal * * The handler first extracts the faultinfo from the UML ptrace regs struct. * If the userfault did not happen in an UML userspace process, bad_segv is called. * Otherwise the signal did happen in a cloned userspace process, handle it. */ -void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { struct faultinfo * fi = UPT_FAULTINFO(regs); @@ -189,7 +192,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) bad_segv(*fi, UPT_IP(regs)); return; } - segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc); } /* @@ -199,7 +202,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) * give us bad data! */ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, - struct uml_pt_regs *regs) + struct uml_pt_regs *regs, void *mc) { int si_code; int err; @@ -223,6 +226,19 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, goto out; } else if (current->mm == NULL) { + if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); + } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } @@ -274,7 +290,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, return 0; } -void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) +void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, + void *mc) { int code, err; if (!UPT_IS_USER(regs)) { @@ -302,7 +319,8 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } } -void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { do_IRQ(WINCH_IRQ, regs); } diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 9ea7269ffb77..e71e5b4878d1 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -21,7 +21,7 @@ #include #include -void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = { [SIGTRAP] = relay_signal, [SIGFPE] = relay_signal, [SIGILL] = relay_signal, @@ -47,7 +47,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) if ((sig != SIGIO) && (sig != SIGWINCH)) unblock_signals_trace(); - (*sig_info[sig])(sig, si, &r); + (*sig_info[sig])(sig, si, &r, mc); errno = save_errno; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index e2f8f156402f..ae2aea062f06 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -166,7 +166,7 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi) static void handle_segv(int pid, struct uml_pt_regs *regs) { get_skas_faultinfo(pid, ®s->faultinfo); - segv(regs->faultinfo, 0, 1, NULL); + segv(regs->faultinfo, 0, 1, NULL, NULL); } static void handle_trap(int pid, struct uml_pt_regs *regs) @@ -525,7 +525,7 @@ void userspace(struct uml_pt_regs *regs) get_skas_faultinfo(pid, ®s->faultinfo); (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, - regs); + regs, NULL); } else handle_segv(pid, regs); break; @@ -533,7 +533,7 @@ void userspace(struct uml_pt_regs *regs) handle_trap(pid, regs); break; case SIGTRAP: - relay_signal(SIGTRAP, (struct siginfo *)&si, regs); + relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); break; case SIGALRM: break; @@ -543,7 +543,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals_trace(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); unblock_signals_trace(); break; default: diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c index e80ab7d28117..d2f3a595b4ef 100644 --- a/arch/x86/um/os-Linux/mcontext.c +++ b/arch/x86/um/os-Linux/mcontext.c @@ -4,6 +4,7 @@ #include #include #include +#include void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) { @@ -31,3 +32,14 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) regs->gp[CS / sizeof(unsigned long)] |= 3; #endif } + +void mc_set_rip(void *_mc, void *target) +{ + mcontext_t *mc = _mc; + +#ifdef __i386__ + mc->gregs[REG_EIP] = (unsigned long)target; +#else + mc->gregs[REG_RIP] = (unsigned long)target; +#endif +} diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index b6f2437ec29c..ab5c8e47049c 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -29,4 +29,16 @@ struct faultinfo { #define PTRACE_FULL_FAULTINFO 0 +#define ___backtrack_faulted(_faulted) \ + asm volatile ( \ + "mov $0, %0\n" \ + "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "jmp _end_%=\n" \ + "__get_kernel_nofault_faulted_%=:\n" \ + "mov $1, %0;" \ + "_end_%=:" \ + : "=r" (_faulted), \ + "=m" (current->thread.segv_continue) :: \ + ) + #endif diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index ee88f88974ea..26fb4835d3e9 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -29,4 +29,16 @@ struct faultinfo { #define PTRACE_FULL_FAULTINFO 1 +#define ___backtrack_faulted(_faulted) \ + asm volatile ( \ + "mov $0, %0\n" \ + "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "jmp _end_%=\n" \ + "__get_kernel_nofault_faulted_%=:\n" \ + "mov $1, %0;" \ + "_end_%=:" \ + : "=r" (_faulted), \ + "=m" (current->thread.segv_continue) :: \ + ) + #endif From 84a6fc378471fbeaf48f8604566a5a33a3d63c18 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 10 Feb 2025 17:09:26 +0100 Subject: [PATCH 1332/2157] um: remove copy_from_kernel_nofault_allowed There is no need to override the default version of this function anymore as UML now has proper _nofault memory access functions. Doing this also fixes the fact that the implementation was incorrect as using mincore() will incorrectly flag pages as inaccessible if they were swapped out by the host. Fixes: f75b1b1bedfb ("um: Implement probe_kernel_read()") Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250210160926.420133-3-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- arch/um/include/shared/os.h | 1 - arch/um/kernel/Makefile | 2 +- arch/um/kernel/maccess.c | 19 -------------- arch/um/os-Linux/process.c | 51 ------------------------------------- 4 files changed, 1 insertion(+), 72 deletions(-) delete mode 100644 arch/um/kernel/maccess.c diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 5babad8c5f75..bc02767f0639 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -213,7 +213,6 @@ extern int os_protect_memory(void *addr, unsigned long len, extern int os_unmap_memory(void *addr, int len); extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); -extern int os_mincore(void *addr, unsigned long len); void os_set_pdeathsig(void); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index f8567b933ffa..4df1cd0d2017 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -17,7 +17,7 @@ extra-y := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ signal.o sysrq.o time.o tlb.o trap.o \ - um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ + um_arch.o umid.o kmsg_dump.o capflags.o skas/ obj-y += load_file.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c deleted file mode 100644 index 8ccd56813f68..000000000000 --- a/arch/um/kernel/maccess.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2013 Richard Weinberger - */ - -#include -#include -#include - -bool copy_from_kernel_nofault_allowed(const void *src, size_t size) -{ - void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); - - if ((unsigned long)src < PAGE_SIZE || size <= 0) - return false; - if (os_mincore(psrc, size + src - psrc) <= 0) - return false; - return true; -} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 9f086f939420..184566edeee9 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -142,57 +142,6 @@ int __init can_drop_memory(void) return ok; } -static int os_page_mincore(void *addr) -{ - char vec[2]; - int ret; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - return 0; - else - return -errno; - } - - return vec[0] & 1; -} - -int os_mincore(void *addr, unsigned long len) -{ - char *vec; - int ret, i; - - if (len <= UM_KERN_PAGE_SIZE) - return os_page_mincore(addr); - - vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); - if (!vec) - return -ENOMEM; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - ret = 0; - else - ret = -errno; - - goto out; - } - - for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { - if (!(vec[i] & 1)) { - ret = 0; - goto out; - } - } - - ret = 1; -out: - free(vec); - return ret; -} - void init_new_thread_signals(void) { set_handler(SIGSEGV); From 1fc350eed627762f4f6db3f35776d481e7f02c5c Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 12 Feb 2025 12:57:56 +0800 Subject: [PATCH 1333/2157] um: Allocate vdso page pointer statically Instead of dynamically allocating the pointer to the vdso page during boot, we can just allocate it statically. Doing so will reduce error handling and make the code slightly more readable. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250212045756.164977-1-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/x86/um/vdso/vma.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index f238f7b33cdd..dc8dfb2abd80 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -12,33 +12,22 @@ static unsigned int __read_mostly vdso_enabled = 1; unsigned long um_vdso_addr; +static struct page *um_vdso; extern unsigned long task_size; extern char vdso_start[], vdso_end[]; -static struct page **vdsop; - static int __init init_vdso(void) { - struct page *um_vdso; - BUG_ON(vdso_end - vdso_start > PAGE_SIZE); um_vdso_addr = task_size - PAGE_SIZE; - vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); - if (!vdsop) - goto oom; - um_vdso = alloc_page(GFP_KERNEL); - if (!um_vdso) { - kfree(vdsop); - + if (!um_vdso) goto oom; - } copy_page(page_address(um_vdso), vdso_start); - *vdsop = um_vdso; return 0; @@ -56,6 +45,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct mm_struct *mm = current->mm; static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", + .pages = &um_vdso, }; if (!vdso_enabled) @@ -64,7 +54,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (mmap_write_lock_killable(mm)) return -EINTR; - vdso_mapping.pages = vdsop; vma = _install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, From 0bc754d1e31f40f4a343b692096d9e092ccc0370 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 14 Feb 2025 10:28:22 +0100 Subject: [PATCH 1334/2157] um: hostfs: avoid issues on inode number reuse by host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some file systems (e.g. ext4) may reuse inode numbers once the inode is not in use anymore. Usually hostfs will keep an FD open for each inode, but this is not always the case. In the case of sockets, this cannot even be done properly. As such, the following sequence of events was possible: * application creates and deletes a socket * hostfs creates/deletes the socket on the host * inode is still in the hostfs cache * hostfs creates a new file * ext4 on the outside reuses the inode number * hostfs finds the socket inode for the newly created file * application receives -ENXIO when opening the file As mentioned, this can only happen if the deleted file is a special file that is never opened on the host (i.e. no .open fop). As such, to prevent issues, it is sufficient to check that the inode has the expected type. That said, also add a check for the inode birth time, just to be on the safe side. Fixes: 74ce793bcbde ("hostfs: Fix ephemeral inodes") Signed-off-by: Benjamin Berg Reviewed-by: Mickaël Salaün Tested-by: Mickaël Salaün Link: https://patch.msgid.link/20250214092822.1241575-1-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- fs/hostfs/hostfs.h | 2 +- fs/hostfs/hostfs_kern.c | 7 ++++- fs/hostfs/hostfs_user.c | 59 ++++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 8b39c15c408c..15b2f094d36e 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -60,7 +60,7 @@ struct hostfs_stat { unsigned int uid; unsigned int gid; unsigned long long size; - struct hostfs_timespec atime, mtime, ctime; + struct hostfs_timespec atime, mtime, ctime, btime; unsigned int blksize; unsigned long long blocks; struct { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e0741e468956..e6e247235728 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,6 +33,7 @@ struct hostfs_inode_info { struct inode vfs_inode; struct mutex open_mutex; dev_t dev; + struct hostfs_timespec btime; }; static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) @@ -547,6 +548,7 @@ static int hostfs_inode_set(struct inode *ino, void *data) } HOSTFS_I(ino)->dev = dev; + HOSTFS_I(ino)->btime = st->btime; ino->i_ino = st->ino; ino->i_mode = st->mode; return hostfs_inode_update(ino, st); @@ -557,7 +559,10 @@ static int hostfs_inode_test(struct inode *inode, void *data) const struct hostfs_stat *st = data; dev_t dev = MKDEV(st->dev.maj, st->dev.min); - return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev; + return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev && + (inode->i_mode & S_IFMT) == (st->mode & S_IFMT) && + HOSTFS_I(inode)->btime.tv_sec == st->btime.tv_sec && + HOSTFS_I(inode)->btime.tv_nsec == st->btime.tv_nsec; } static struct inode *hostfs_iget(struct super_block *sb, char *name) diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 97e9c40a9448..3bcd9f35e70b 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -18,39 +18,48 @@ #include "hostfs.h" #include -static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) +static void statx_to_hostfs(const struct statx *buf, struct hostfs_stat *p) { - p->ino = buf->st_ino; - p->mode = buf->st_mode; - p->nlink = buf->st_nlink; - p->uid = buf->st_uid; - p->gid = buf->st_gid; - p->size = buf->st_size; - p->atime.tv_sec = buf->st_atime; - p->atime.tv_nsec = 0; - p->ctime.tv_sec = buf->st_ctime; - p->ctime.tv_nsec = 0; - p->mtime.tv_sec = buf->st_mtime; - p->mtime.tv_nsec = 0; - p->blksize = buf->st_blksize; - p->blocks = buf->st_blocks; - p->rdev.maj = os_major(buf->st_rdev); - p->rdev.min = os_minor(buf->st_rdev); - p->dev.maj = os_major(buf->st_dev); - p->dev.min = os_minor(buf->st_dev); + p->ino = buf->stx_ino; + p->mode = buf->stx_mode; + p->nlink = buf->stx_nlink; + p->uid = buf->stx_uid; + p->gid = buf->stx_gid; + p->size = buf->stx_size; + p->atime.tv_sec = buf->stx_atime.tv_sec; + p->atime.tv_nsec = buf->stx_atime.tv_nsec; + p->ctime.tv_sec = buf->stx_ctime.tv_sec; + p->ctime.tv_nsec = buf->stx_ctime.tv_nsec; + p->mtime.tv_sec = buf->stx_mtime.tv_sec; + p->mtime.tv_nsec = buf->stx_mtime.tv_nsec; + if (buf->stx_mask & STATX_BTIME) { + p->btime.tv_sec = buf->stx_btime.tv_sec; + p->btime.tv_nsec = buf->stx_btime.tv_nsec; + } else { + memset(&p->btime, 0, sizeof(p->btime)); + } + p->blksize = buf->stx_blksize; + p->blocks = buf->stx_blocks; + p->rdev.maj = buf->stx_rdev_major; + p->rdev.min = buf->stx_rdev_minor; + p->dev.maj = buf->stx_dev_major; + p->dev.min = buf->stx_dev_minor; } int stat_file(const char *path, struct hostfs_stat *p, int fd) { - struct stat64 buf; + struct statx buf; + int flags = AT_SYMLINK_NOFOLLOW; if (fd >= 0) { - if (fstat64(fd, &buf) < 0) - return -errno; - } else if (lstat64(path, &buf) < 0) { - return -errno; + flags |= AT_EMPTY_PATH; + path = ""; } - stat64_to_hostfs(&buf, p); + + if ((statx(fd, path, flags, STATX_BASIC_STATS | STATX_BTIME, &buf)) < 0) + return -errno; + + statx_to_hostfs(&buf, p); return 0; } From f664a1399633c63706f763d4bb58c96110355330 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Thu, 20 Feb 2025 17:39:40 -0500 Subject: [PATCH 1335/2157] um: use str_yes_no() to remove hardcoded "yes" and "no" Remove hard-coded strings by using the str_yes_no() helper function provided by . Signed-off-by: Ethan Carter Edwards Link: https://patch.msgid.link/20250220-um_yes_no-v1-1-2a355ed2d225@ethancedwards.com Signed-off-by: Johannes Berg --- arch/um/kernel/um_arch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797e..7f050783885a 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "model name\t: UML\n"); seq_printf(m, "mode\t\t: skas\n"); seq_printf(m, "host\t\t: %s\n", host_info); - seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no"); + seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU))); seq_printf(m, "flags\t\t:"); for (i = 0; i < 32*NCAPINTS; i++) if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL)) From e82cf3051e6193f61e03898f8dba035199064d36 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Fri, 21 Feb 2025 12:18:55 +0800 Subject: [PATCH 1336/2157] um: Update min_low_pfn to match changes in uml_reserved When uml_reserved is updated, min_low_pfn must also be updated accordingly. Otherwise, min_low_pfn will not accurately reflect the lowest available PFN. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250221041855.1156109-1-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/kernel/mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 05ffceb555b4..61b5a5ede01c 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -68,6 +68,7 @@ void __init mem_init(void) map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; + min_low_pfn = PFN_UP(__pa(uml_reserved)); /* this will put all low memory onto the freelists */ memblock_free_all(); From 089db01ea7eb4f366be45b9390a04f1c601c0071 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 28 Feb 2025 10:00:08 +0100 Subject: [PATCH 1337/2157] um/locking: Remove semicolon from "lock" prefix Minimum version of binutils required to compile the kernel is 2.25. This version correctly handles the "lock" prefix, so it is possible to remove the semicolon, which was used to support ancient versions of GNU as. Due to the semicolon, the compiler considers "lock; insn" as two separate instructions. Removing the semicolon makes asm length calculations more accurate, consequently making scheduling and inlining decisions of the compiler more accurate. Removing the semicolon also enables assembler checks involving lock prefix. Trying to assemble e.g. "lock andl %eax, %ebx" results in: Error: expecting lockable instruction after `lock' Signed-off-by: Uros Bizjak Cc: Richard Weinberger Cc: Anton Ivanov Cc: Johannes Berg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Link: https://patch.msgid.link/20250228090058.2499163-1-ubizjak@gmail.com Signed-off-by: Johannes Berg --- arch/x86/um/asm/barrier.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 4da336965698..b51aefd6ec2b 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -12,9 +12,9 @@ */ #ifdef CONFIG_X86_32 -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#define mb() alternative("lock addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) #else /* CONFIG_X86_32 */ From 887c5c12e80c8424bd471122d2e8b6b462e12874 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 14 Mar 2025 14:08:15 +0100 Subject: [PATCH 1338/2157] um: work around sched_yield not yielding in time-travel mode sched_yield by a userspace may not actually cause scheduling in time-travel mode as no time has passed. In the case seen it appears to be a badly implemented userspace spinlock in ASAN. Unfortunately, with time-travel it causes an extreme slowdown or even deadlock depending on the kernel configuration (CONFIG_UML_MAX_USERSPACE_ITERATIONS). Work around it by accounting time to the process whenever it executes a sched_yield syscall. Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250314130815.226872-1-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- arch/um/include/linux/time-internal.h | 2 ++ arch/um/kernel/skas/syscall.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index b22226634ff6..138908b999d7 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -83,6 +83,8 @@ extern void time_travel_not_configured(void); #define time_travel_del_event(...) time_travel_not_configured() #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ +extern unsigned long tt_extra_sched_jiffies; + /* * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, * which is intentional since we really shouldn't link it in that case. diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index b09e85279d2b..a5beaea2967e 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -31,6 +31,17 @@ void handle_syscall(struct uml_pt_regs *r) goto out; syscall = UPT_SYSCALL_NR(r); + + /* + * If no time passes, then sched_yield may not actually yield, causing + * broken spinlock implementations in userspace (ASAN) to hang for long + * periods of time. + */ + if ((time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) && + syscall == __NR_sched_yield) + tt_extra_sched_jiffies += 1; + if (syscall >= 0 && syscall < __NR_syscalls) { unsigned long ret = EXECUTE_SYSCALL(syscall, regs); From 24ffa71b0f15fe4827d3672d5918f97564b2fba1 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Sun, 16 Mar 2025 00:19:09 +0800 Subject: [PATCH 1339/2157] um: virt-pci: Refactor virtio_pcidev into its own module Decouple virt-pci and virtio_pcidev, refactoring virtio_pcidev into its own module. Define a set of APIs for virt-pci. This allows for future addition of more PCI emulation implementations. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250315161910.4082396-3-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/drivers/Kconfig | 12 +- arch/um/drivers/Makefile | 3 +- arch/um/drivers/virt-pci.c | 743 ++++++-------------------------- arch/um/drivers/virt-pci.h | 41 ++ arch/um/drivers/virtio_pcidev.c | 628 +++++++++++++++++++++++++++ 5 files changed, 816 insertions(+), 611 deletions(-) create mode 100644 arch/um/drivers/virt-pci.h create mode 100644 arch/um/drivers/virtio_pcidev.c diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index ede40a160c5e..9cb196070614 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -345,16 +345,20 @@ config UML_RTC by providing a fake RTC clock that causes a wakeup at the right time. -config UML_PCI_OVER_VIRTIO - bool "Enable PCI over VIRTIO device simulation" - # in theory, just VIRTIO is enough, but that causes recursion - depends on VIRTIO_UML +config UML_PCI + bool select FORCE_PCI select UML_IOMEM_EMULATION select UML_DMA_EMULATION select PCI_MSI select PCI_LOCKLESS_CONFIG +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select UML_PCI + config UML_PCI_OVER_VIRTIO_DEVICE_ID int "set the virtio device ID for PCI emulation" default -1 diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 57882e6bc215..0a5820343ad3 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_UML_RTC) += rtc.o -obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o +obj-$(CONFIG_UML_PCI) += virt-pci.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o # pcap_user.o must be added explicitly. USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index dd5580f975cc..b83b5a765d4e 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -5,52 +5,19 @@ */ #include #include -#include -#include #include #include #include -#include -#include -#include #include #include #include +#include "virt-pci.h" + #define MAX_DEVICES 8 #define MAX_MSI_VECTORS 32 #define CFG_SPACE_SIZE 4096 -/* for MSI-X we have a 32-bit payload */ -#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) -#define NUM_IRQ_MSGS 10 - -struct um_pci_message_buffer { - struct virtio_pcidev_msg hdr; - u8 data[8]; -}; - -struct um_pci_device { - struct virtio_device *vdev; - - /* for now just standard BARs */ - u8 resptr[PCI_STD_NUM_BARS]; - - struct virtqueue *cmd_vq, *irq_vq; - -#define UM_PCI_WRITE_BUFS 20 - struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1]; - void *extra_ptrs[UM_PCI_WRITE_BUFS + 1]; - DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS); - -#define UM_PCI_STAT_WAITING 0 - unsigned long status; - - int irq; - - bool platform; -}; - struct um_pci_device_reg { struct um_pci_device *dev; void __iomem *iomem; @@ -65,179 +32,15 @@ static struct irq_domain *um_pci_inner_domain; static struct irq_domain *um_pci_msi_domain; static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; -static unsigned int um_pci_max_delay_us = 40000; -module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); - -static int um_pci_get_buf(struct um_pci_device *dev, bool *posted) -{ - int i; - - for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { - if (!test_and_set_bit(i, dev->used_bufs)) - return i; - } - - *posted = false; - return UM_PCI_WRITE_BUFS; -} - -static void um_pci_free_buf(struct um_pci_device *dev, void *buf) -{ - int i; - - if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) { - kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]); - dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL; - return; - } - - for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { - if (buf == &dev->bufs[i]) { - kfree(dev->extra_ptrs[i]); - dev->extra_ptrs[i] = NULL; - WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); - return; - } - } - - WARN_ON(1); -} - -static int um_pci_send_cmd(struct um_pci_device *dev, - struct virtio_pcidev_msg *cmd, - unsigned int cmd_size, - const void *extra, unsigned int extra_size, - void *out, unsigned int out_size) -{ - struct scatterlist out_sg, extra_sg, in_sg; - struct scatterlist *sgs_list[] = { - [0] = &out_sg, - [1] = extra ? &extra_sg : &in_sg, - [2] = extra ? &in_sg : NULL, - }; - struct um_pci_message_buffer *buf; - int delay_count = 0; - bool bounce_out; - int ret, len; - int buf_idx; - bool posted; - - if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) - return -EINVAL; - - switch (cmd->op) { - case VIRTIO_PCIDEV_OP_CFG_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_MEMSET: - /* in PCI, writes are posted, so don't wait */ - posted = !out; - WARN_ON(!posted); - break; - default: - posted = false; - break; - } - - bounce_out = !posted && cmd_size <= sizeof(*cmd) && - out && out_size <= sizeof(buf->data); - - buf_idx = um_pci_get_buf(dev, &posted); - buf = &dev->bufs[buf_idx]; - memcpy(buf, cmd, cmd_size); - - if (posted && extra && extra_size > sizeof(buf) - cmd_size) { - dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, - GFP_ATOMIC); - - if (!dev->extra_ptrs[buf_idx]) { - um_pci_free_buf(dev, buf); - return -ENOMEM; - } - extra = dev->extra_ptrs[buf_idx]; - } else if (extra && extra_size <= sizeof(buf) - cmd_size) { - memcpy((u8 *)buf + cmd_size, extra, extra_size); - cmd_size += extra_size; - extra_size = 0; - extra = NULL; - cmd = (void *)buf; - } else { - cmd = (void *)buf; - } - - sg_init_one(&out_sg, cmd, cmd_size); - if (extra) - sg_init_one(&extra_sg, extra, extra_size); - /* allow stack for small buffers */ - if (bounce_out) - sg_init_one(&in_sg, buf->data, out_size); - else if (out) - sg_init_one(&in_sg, out, out_size); - - /* add to internal virtio queue */ - ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, - extra ? 2 : 1, - out ? 1 : 0, - cmd, GFP_ATOMIC); - if (ret) { - um_pci_free_buf(dev, buf); - return ret; - } - - if (posted) { - virtqueue_kick(dev->cmd_vq); - return 0; - } - - /* kick and poll for getting a response on the queue */ - set_bit(UM_PCI_STAT_WAITING, &dev->status); - virtqueue_kick(dev->cmd_vq); - ret = 0; - - while (1) { - void *completed = virtqueue_get_buf(dev->cmd_vq, &len); - - if (completed == buf) - break; - - if (completed) - um_pci_free_buf(dev, completed); - - if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || - ++delay_count > um_pci_max_delay_us, - "um virt-pci delay: %d", delay_count)) { - ret = -EIO; - break; - } - udelay(1); - } - clear_bit(UM_PCI_STAT_WAITING, &dev->status); - - if (bounce_out) - memcpy(out, buf->data, out_size); - - um_pci_free_buf(dev, buf); - - return ret; -} - static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, int size) { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_READ, - .size = size, - .addr = offset, - }; - /* max 8, we might not use it all */ - u8 data[8]; if (!dev) return ULONG_MAX; - memset(data, 0xff, sizeof(data)); - switch (size) { case 1: case 2: @@ -251,23 +54,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, return ULONG_MAX; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) - return ULONG_MAX; - - switch (size) { - case 1: - return data[0]; - case 2: - return le16_to_cpup((void *)data); - case 4: - return le32_to_cpup((void *)data); -#ifdef CONFIG_64BIT - case 8: - return le64_to_cpup((void *)data); -#endif - default: - return ULONG_MAX; - } + return dev->ops->cfgspace_read(dev, offset, size); } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -275,42 +62,24 @@ static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct { - struct virtio_pcidev_msg hdr; - /* maximum size - we may only use parts of it */ - u8 data[8]; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .size = size, - .addr = offset, - }, - }; if (!dev) return; switch (size) { case 1: - msg.data[0] = (u8)val; - break; case 2: - put_unaligned_le16(val, (void *)msg.data); - break; case 4: - put_unaligned_le32(val, (void *)msg.data); - break; #ifdef CONFIG_64BIT case 8: - put_unaligned_le64(val, (void *)msg.data); - break; #endif + break; default: WARN(1, "invalid config space write size %d\n", size); return; } - WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); + dev->ops->cfgspace_write(dev, offset, size, val); } static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { @@ -318,6 +87,56 @@ static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { .write = um_pci_cfgspace_write, }; +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid bar read size %d\n", size); + return ULONG_MAX; + } + + return dev->ops->bar_read(dev, bar, offset, size); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid bar write size %d\n", size); + return; + } + + dev->ops->bar_write(dev, bar, offset, size, val); +} + static void um_pci_bar_copy_from(void *priv, void *buffer, unsigned int offset, int size) { @@ -325,53 +144,9 @@ static void um_pci_bar_copy_from(void *priv, void *buffer, struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_READ, - .bar = *resptr, - .size = size, - .addr = offset, - }; + u8 bar = *resptr; - memset(buffer, 0xff, size); - - um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); -} - -static unsigned long um_pci_bar_read(void *priv, unsigned int offset, - int size) -{ - /* 8 is maximum size - we may only use parts of it */ - u8 data[8]; - - switch (size) { - case 1: - case 2: - case 4: -#ifdef CONFIG_64BIT - case 8: -#endif - break; - default: - WARN(1, "invalid config space read size %d\n", size); - return ULONG_MAX; - } - - um_pci_bar_copy_from(priv, data, offset, size); - - switch (size) { - case 1: - return data[0]; - case 2: - return le16_to_cpup((void *)data); - case 4: - return le32_to_cpup((void *)data); -#ifdef CONFIG_64BIT - case 8: - return le64_to_cpup((void *)data); -#endif - default: - return ULONG_MAX; - } + dev->ops->bar_copy_from(dev, bar, buffer, offset, size); } static void um_pci_bar_copy_to(void *priv, unsigned int offset, @@ -381,43 +156,9 @@ static void um_pci_bar_copy_to(void *priv, unsigned int offset, struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); -} - -static void um_pci_bar_write(void *priv, unsigned int offset, int size, - unsigned long val) -{ - /* maximum size - we may only use parts of it */ - u8 data[8]; - - switch (size) { - case 1: - data[0] = (u8)val; - break; - case 2: - put_unaligned_le16(val, (void *)data); - break; - case 4: - put_unaligned_le32(val, (void *)data); - break; -#ifdef CONFIG_64BIT - case 8: - put_unaligned_le64(val, (void *)data); - break; -#endif - default: - WARN(1, "invalid config space write size %d\n", size); - return; - } - - um_pci_bar_copy_to(priv, offset, data, size); + dev->ops->bar_copy_to(dev, bar, offset, buffer, size); } static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) @@ -426,20 +167,9 @@ static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct { - struct virtio_pcidev_msg hdr; - u8 data; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }, - .data = value, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); + dev->ops->bar_set(dev, bar, offset, value, size); } static const struct logic_iomem_ops um_pci_device_bar_ops = { @@ -486,76 +216,6 @@ static void um_pci_rescan(void) pci_unlock_rescan_remove(); } -static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) -{ - struct scatterlist sg[1]; - - sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); - if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) - kfree(buf); - else if (kick) - virtqueue_kick(vq); -} - -static void um_pci_handle_irq_message(struct virtqueue *vq, - struct virtio_pcidev_msg *msg) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - - if (!dev->irq) - return; - - /* we should properly chain interrupts, but on ARCH=um we don't care */ - - switch (msg->op) { - case VIRTIO_PCIDEV_OP_INT: - generic_handle_irq(dev->irq); - break; - case VIRTIO_PCIDEV_OP_MSI: - /* our MSI message is just the interrupt number */ - if (msg->size == sizeof(u32)) - generic_handle_irq(le32_to_cpup((void *)msg->data)); - else - generic_handle_irq(le16_to_cpup((void *)msg->data)); - break; - case VIRTIO_PCIDEV_OP_PME: - /* nothing to do - we already woke up due to the message */ - break; - default: - dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); - break; - } -} - -static void um_pci_cmd_vq_cb(struct virtqueue *vq) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - void *cmd; - int len; - - if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) - return; - - while ((cmd = virtqueue_get_buf(vq, &len))) - um_pci_free_buf(dev, cmd); -} - -static void um_pci_irq_vq_cb(struct virtqueue *vq) -{ - struct virtio_pcidev_msg *msg; - int len; - - while ((msg = virtqueue_get_buf(vq, &len))) { - if (len >= sizeof(*msg)) - um_pci_handle_irq_message(vq, msg); - - /* recycle the message buffer */ - um_pci_irq_vq_addbuf(vq, msg, true); - } -} - #ifdef CONFIG_OF /* Copied from arch/x86/kernel/devicetree.c */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) @@ -577,200 +237,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) } #endif -static int um_pci_init_vqs(struct um_pci_device *dev) -{ - struct virtqueue_info vqs_info[] = { - { "cmd", um_pci_cmd_vq_cb }, - { "irq", um_pci_irq_vq_cb }, - }; - struct virtqueue *vqs[2]; - int err, i; - - err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); - if (err) - return err; - - dev->cmd_vq = vqs[0]; - dev->irq_vq = vqs[1]; - - virtio_device_ready(dev->vdev); - - for (i = 0; i < NUM_IRQ_MSGS; i++) { - void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); - - if (msg) - um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); - } - - virtqueue_kick(dev->irq_vq); - - return 0; -} - -static void __um_pci_virtio_platform_remove(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); - - mutex_lock(&um_pci_mtx); - um_pci_platform_device = NULL; - mutex_unlock(&um_pci_mtx); - - kfree(dev); -} - -static int um_pci_virtio_platform_probe(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - int ret; - - dev->platform = true; - - mutex_lock(&um_pci_mtx); - - if (um_pci_platform_device) { - mutex_unlock(&um_pci_mtx); - ret = -EBUSY; - goto out_free; - } - - ret = um_pci_init_vqs(dev); - if (ret) { - mutex_unlock(&um_pci_mtx); - goto out_free; - } - - um_pci_platform_device = dev; - - mutex_unlock(&um_pci_mtx); - - ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); - if (ret) - __um_pci_virtio_platform_remove(vdev, dev); - - return ret; - -out_free: - kfree(dev); - return ret; -} - -static int um_pci_virtio_probe(struct virtio_device *vdev) -{ - struct um_pci_device *dev; - int i, free = -1; - int err = -ENOSPC; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - dev->vdev = vdev; - vdev->priv = dev; - - if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) - return um_pci_virtio_platform_probe(vdev, dev); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev) - continue; - free = i; - break; - } - - if (free < 0) - goto error; - - err = um_pci_init_vqs(dev); - if (err) - goto error; - - dev->irq = irq_alloc_desc(numa_node_id()); - if (dev->irq < 0) { - err = dev->irq; - goto err_reset; - } - um_pci_devices[free].dev = dev; - vdev->priv = dev; - - mutex_unlock(&um_pci_mtx); - - device_set_wakeup_enable(&vdev->dev, true); - - /* - * In order to do suspend-resume properly, don't allow VQs - * to be suspended. - */ - virtio_uml_set_no_vq_suspend(vdev, true); - - um_pci_rescan(); - return 0; -err_reset: - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); -error: - mutex_unlock(&um_pci_mtx); - kfree(dev); - return err; -} - -static void um_pci_virtio_remove(struct virtio_device *vdev) -{ - struct um_pci_device *dev = vdev->priv; - int i; - - if (dev->platform) { - of_platform_depopulate(&vdev->dev); - __um_pci_virtio_platform_remove(vdev, dev); - return; - } - - device_set_wakeup_enable(&vdev->dev, false); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev != dev) - continue; - - um_pci_devices[i].dev = NULL; - irq_free_desc(dev->irq); - - break; - } - mutex_unlock(&um_pci_mtx); - - if (i < MAX_DEVICES) { - struct pci_dev *pci_dev; - - pci_dev = pci_get_slot(bridge->bus, i); - if (pci_dev) - pci_stop_and_remove_bus_device_locked(pci_dev); - } - - /* Stop all virtqueues */ - virtio_reset_device(vdev); - dev->cmd_vq = NULL; - dev->irq_vq = NULL; - vdev->config->del_vqs(vdev); - - kfree(dev); -} - -static struct virtio_device_id id_table[] = { - { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; -MODULE_DEVICE_TABLE(virtio, id_table); - -static struct virtio_driver um_pci_virtio_driver = { - .driver.name = "virtio-pci", - .id_table = id_table, - .probe = um_pci_virtio_probe, - .remove = um_pci_virtio_remove, -}; - static struct resource virt_cfgspace_resource = { .name = "PCI config space", .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, @@ -889,7 +355,7 @@ static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) } static struct irq_chip um_pci_msi_bottom_irq_chip = { - .name = "UM virtio MSI", + .name = "UM virtual MSI", .irq_compose_msi_msg = um_pci_compose_msi_msg, }; @@ -939,7 +405,7 @@ static const struct irq_domain_ops um_pci_inner_domain_ops = { }; static struct irq_chip um_pci_msi_irq_chip = { - .name = "UM virtio PCIe MSI", + .name = "UM virtual PCIe MSI", .irq_mask = pci_msi_mask_irq, .irq_unmask = pci_msi_unmask_irq, }; @@ -998,6 +464,78 @@ static struct resource virt_platform_resource = { .flags = IORESOURCE_MEM, }; +int um_pci_device_register(struct um_pci_device *dev) +{ + int i, free = -1; + int err = 0; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) { + err = -ENOSPC; + goto out; + } + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto out; + } + + um_pci_devices[free].dev = dev; + +out: + mutex_unlock(&um_pci_mtx); + if (!err) + um_pci_rescan(); + return err; +} + +void um_pci_device_unregister(struct um_pci_device *dev) +{ + int i; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + break; + } + mutex_unlock(&um_pci_mtx); + + if (i < MAX_DEVICES) { + struct pci_dev *pci_dev; + + pci_dev = pci_get_slot(bridge->bus, i); + if (pci_dev) + pci_stop_and_remove_bus_device_locked(pci_dev); + } +} + +int um_pci_platform_device_register(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device) + return -EBUSY; + um_pci_platform_device = dev; + return 0; +} + +void um_pci_platform_device_unregister(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device == dev) + um_pci_platform_device = NULL; +} + static int __init um_pci_init(void) { struct irq_domain_info inner_domain_info = { @@ -1014,10 +552,6 @@ static int __init um_pci_init(void) WARN_ON(logic_iomem_add_region(&virt_platform_resource, &um_pci_platform_ops)); - if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, - "No virtio device ID configured for PCI - no PCI support\n")) - return 0; - bridge = pci_alloc_host_bridge(0); if (!bridge) { err = -ENOMEM; @@ -1065,10 +599,8 @@ static int __init um_pci_init(void) if (err) goto free; - err = register_virtio_driver(&um_pci_virtio_driver); - if (err) - goto free; return 0; + free: if (!IS_ERR_OR_NULL(um_pci_inner_domain)) irq_domain_remove(um_pci_inner_domain); @@ -1080,11 +612,10 @@ static int __init um_pci_init(void) } return err; } -module_init(um_pci_init); +device_initcall(um_pci_init); static void __exit um_pci_exit(void) { - unregister_virtio_driver(&um_pci_virtio_driver); irq_domain_remove(um_pci_msi_domain); irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); diff --git a/arch/um/drivers/virt-pci.h b/arch/um/drivers/virt-pci.h new file mode 100644 index 000000000000..b20d1475d1eb --- /dev/null +++ b/arch/um/drivers/virt-pci.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VIRT_PCI_H +#define __UM_VIRT_PCI_H + +#include + +struct um_pci_device { + const struct um_pci_ops *ops; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + int irq; +}; + +struct um_pci_ops { + unsigned long (*cfgspace_read)(struct um_pci_device *dev, + unsigned int offset, int size); + void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset, + int size, unsigned long val); + + unsigned long (*bar_read)(struct um_pci_device *dev, int bar, + unsigned int offset, int size); + void (*bar_write)(struct um_pci_device *dev, int bar, + unsigned int offset, int size, unsigned long val); + + void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer, + unsigned int offset, int size); + void (*bar_copy_to)(struct um_pci_device *dev, int bar, + unsigned int offset, const void *buffer, int size); + void (*bar_set)(struct um_pci_device *dev, int bar, + unsigned int offset, u8 value, int size); +}; + +int um_pci_device_register(struct um_pci_device *dev); +void um_pci_device_unregister(struct um_pci_device *dev); + +int um_pci_platform_device_register(struct um_pci_device *dev); +void um_pci_platform_device_unregister(struct um_pci_device *dev); + +#endif /* __UM_VIRT_PCI_H */ diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c new file mode 100644 index 000000000000..3c4c4c928fdd --- /dev/null +++ b/arch/um/drivers/virtio_pcidev.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-pci.h" + +#define to_virtio_pcidev(_pdev) \ + container_of(_pdev, struct virtio_pcidev_device, pdev) + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +struct virtio_pcidev_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +struct virtio_pcidev_device { + struct um_pci_device pdev; + struct virtio_device *vdev; + + struct virtqueue *cmd_vq, *irq_vq; + +#define VIRTIO_PCIDEV_WRITE_BUFS 20 + struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS); + +#define UM_PCI_STAT_WAITING 0 + unsigned long status; + + bool platform; +}; + +static unsigned int virtio_pcidev_max_delay_us = 40000; +module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644); + +static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } + + *posted = false; + return VIRTIO_PCIDEV_WRITE_BUFS; +} + +static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) { + kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]); + dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} + +static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + struct virtio_pcidev_message_buffer *buf; + int delay_count = 0; + bool bounce_out; + int ret, len; + int buf_idx; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); + + buf_idx = virtio_pcidev_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + virtio_pcidev_free_buf(dev, buf); + return -ENOMEM; + } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; + } else { + cmd = (void *)buf; + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + cmd, GFP_ATOMIC); + if (ret) { + virtio_pcidev_free_buf(dev, buf); + return ret; + } + + if (posted) { + virtqueue_kick(dev->cmd_vq); + return 0; + } + + /* kick and poll for getting a response on the queue */ + set_bit(UM_PCI_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + ret = 0; + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == buf) + break; + + if (completed) + virtio_pcidev_free_buf(dev, completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > virtio_pcidev_max_delay_us, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(UM_PCI_STAT_WAITING, &dev->status); + + if (bounce_out) + memcpy(out, buf->data, out_size); + + virtio_pcidev_free_buf(dev, buf); + + return ret; +} + +static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* max 8, we might not use it all */ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + /* size has been checked in um_pci_cfgspace_read() */ + if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + /* size has been checked in um_pci_cfgspace_write() */ + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + } + + WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev, + int bar, void *buffer, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = bar, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_read() */ + virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev, + int bar, unsigned int offset, + const void *buffer, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }; + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_write() */ + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size); +} + +static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }, + .data = value, + }; + + virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct um_pci_ops virtio_pcidev_um_pci_ops = { + .cfgspace_read = virtio_pcidev_cfgspace_read, + .cfgspace_write = virtio_pcidev_cfgspace_write, + .bar_read = virtio_pcidev_bar_read, + .bar_write = virtio_pcidev_bar_write, + .bar_copy_from = virtio_pcidev_bar_copy_from, + .bar_copy_to = virtio_pcidev_bar_copy_to, + .bar_set = virtio_pcidev_bar_set, +}; + +static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void virtio_pcidev_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + + if (!dev->pdev.irq) + return; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->pdev.irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) + virtio_pcidev_free_buf(dev, cmd); +} + +static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + virtio_pcidev_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + virtio_pcidev_irq_vq_addbuf(vq, msg, true); + } +} + +static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev) +{ + struct virtqueue_info vqs_info[] = { + { "cmd", virtio_pcidev_cmd_vq_cb }, + { "irq", virtio_pcidev_irq_vq_cb }, + }; + struct virtqueue *vqs[2]; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + virtio_device_ready(dev->vdev); + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + um_pci_platform_device_unregister(&dev->pdev); + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + int err; + + dev->platform = true; + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_platform_device_register(&dev->pdev); + if (err) + goto err_reset; + + err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); + if (err) + goto err_unregister; + + return 0; + +err_unregister: + um_pci_platform_device_unregister(&dev->pdev); +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static int virtio_pcidev_virtio_probe(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + dev->pdev.ops = &virtio_pcidev_um_pci_ops; + + if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) + return virtio_pcidev_virtio_platform_probe(vdev, dev); + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_device_register(&dev->pdev); + if (err) + goto err_reset; + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + return 0; + +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev = vdev->priv; + + if (dev->platform) { + of_platform_depopulate(&vdev->dev); + __virtio_pcidev_virtio_platform_remove(vdev, dev); + return; + } + + device_set_wakeup_enable(&vdev->dev, false); + + um_pci_device_unregister(&dev->pdev); + + /* Stop all virtqueues */ + virtio_reset_device(vdev); + dev->cmd_vq = NULL; + dev->irq_vq = NULL; + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver virtio_pcidev_virtio_driver = { + .driver.name = "virtio-pci", + .id_table = id_table, + .probe = virtio_pcidev_virtio_probe, + .remove = virtio_pcidev_virtio_remove, +}; + +static int __init virtio_pcidev_init(void) +{ + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + return register_virtio_driver(&virtio_pcidev_virtio_driver); +} +late_initcall(virtio_pcidev_init); + +static void __exit virtio_pcidev_exit(void) +{ + unregister_virtio_driver(&virtio_pcidev_virtio_driver); +} +module_exit(virtio_pcidev_exit); From cef721e0d53d2b64f2ba177c63a0dfdd7c0daf17 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 24 Feb 2025 19:18:19 +0100 Subject: [PATCH 1340/2157] um: Store full CSGSFS and SS register from mcontext Doing this allows using registers as retrieved from an mcontext to be pushed to a process using PTRACE_SETREGS. It is not entirely clear to me why CSGSFS was masked. Doing so creates issues when using the mcontext as process state in seccomp and simply copying the register appears to work perfectly fine for ptrace. Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20250224181827.647129-2-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- arch/x86/um/os-Linux/mcontext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c index d2f3a595b4ef..37decaa74761 100644 --- a/arch/x86/um/os-Linux/mcontext.c +++ b/arch/x86/um/os-Linux/mcontext.c @@ -28,8 +28,7 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) COPY(RIP); COPY2(EFLAGS, EFL); COPY2(CS, CSGSFS); - regs->gp[CS / sizeof(unsigned long)] &= 0xffff; - regs->gp[CS / sizeof(unsigned long)] |= 3; + regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48; #endif } From 35bc1883733c81ff307c9c73bd00191313e651af Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:45 +0800 Subject: [PATCH 1341/2157] dt-bindings: riscv: add bfloat16 ISA extension description Add description for the BFloat16 precision Floating-Point ISA extension, (Zfbfmin, Zvfbfmin, Zvfbfwma). which was ratified in commit 4dc23d62 ("Added Chapter title to BF16") of the riscv-isa-manual. Signed-off-by: Inochi Amaoto Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250213003849.147358-2-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 9c7dd7e75e0c..0a1f1a76d129 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -329,6 +329,12 @@ properties: instructions, as ratified in commit 056b6ff ("Zfa is ratified") of riscv-isa-manual. + - const: zfbfmin + description: + The standard Zfbfmin extension which provides minimal support for + 16-bit half-precision brain floating-point instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + - const: zfh description: The standard Zfh extension for 16-bit half-precision binary @@ -525,6 +531,18 @@ properties: in commit 6f702a2 ("Vector extensions are now ratified") of riscv-v-spec. + - const: zvfbfmin + description: + The standard Zvfbfmin extension for minimal support for vectored + 16-bit half-precision brain floating-point instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + + - const: zvfbfwma + description: + The standard Zvfbfwma extension for vectored half-precision brain + floating-point widening multiply-accumulate instructions, as ratified + in commit 4dc23d62 ("Added Chapter title to BF16") of riscv-isa-manual. + - const: zvfh description: The standard Zvfh extension for vectored half-precision @@ -663,6 +681,33 @@ properties: then: contains: const: zca + # Zfbfmin depends on F + - if: + contains: + const: zfbfmin + then: + contains: + const: f + # Zvfbfmin depends on V or Zve32f + - if: + contains: + const: zvfbfmin + then: + oneOf: + - contains: + const: v + - contains: + const: zve32f + # Zvfbfwma depends on Zfbfmin and Zvfbfmin + - if: + contains: + const: zvfbfwma + then: + allOf: + - contains: + const: zfbfmin + - contains: + const: zvfbfmin allOf: # Zcf extension does not exist on rv64 From e186c28dda11e8d6d829b08b9da2ec7c342edd78 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:46 +0800 Subject: [PATCH 1342/2157] riscv: add ISA extension parsing for bfloat16 ISA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add parsing for Zfbmin, Zvfbfmin, Zvfbfwma ISA extension which were ratified in 4dc23d62 ("Added Chapter title to BF16") of the riscv-isa-manual. Signed-off-by: Inochi Amaoto Reviewed-by: Clément Léger Link: https://lore.kernel.org/r/20250213003849.147358-3-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/hwcap.h | 3 +++ arch/riscv/kernel/cpufeature.c | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 869da082252a..14cc29f2a723 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -100,6 +100,9 @@ #define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_SVADE 92 #define RISCV_ISA_EXT_SVADU 93 +#define RISCV_ISA_EXT_ZFBFMIN 94 +#define RISCV_ISA_EXT_ZVFBFMIN 95 +#define RISCV_ISA_EXT_ZVFBFWMA 96 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c2..40021459a257 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -76,6 +76,15 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static int riscv_ext_f_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -136,6 +145,28 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32F)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zvfbfwma_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZFBFMIN) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVFBFMIN)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -341,6 +372,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), + __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), @@ -373,6 +405,9 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfmin, RISCV_ISA_EXT_ZVFBFMIN, riscv_vector_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfwma, RISCV_ISA_EXT_ZVFBFWMA, + riscv_ext_zvfbfwma_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), From a4863e002cf0dd6fb2f06796f16d7bc0974e9845 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 13 Feb 2025 08:38:47 +0800 Subject: [PATCH 1343/2157] riscv: hwprobe: export bfloat16 ISA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export Zfbmin, Zvfbfmin, Zvfbfwma ISA extension through hwprobe. Signed-off-by: Inochi Amaoto Reviewed-by: Clément Léger Link: https://lore.kernel.org/r/20250213003849.147358-4-inochiama@gmail.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 12 ++++++++++++ arch/riscv/include/uapi/asm/hwprobe.h | 3 +++ arch/riscv/kernel/sys_hwprobe.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 955fbcd19ce9..a9cb40e407a4 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -242,6 +242,18 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as defined in version 1.0 of the RISC-V Pointer Masking extensions. + * :c:macro:`RISCV_HWPROBE_EXT_ZFBFMIN`: The Zfbfmin extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFMIN`: The Zvfbfmin extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFWMA`: The Zvfbfwma extension is supported as + defined in the RISC-V ISA manual starting from commit 4dc23d6229de + ("Added Chapter title to BF16"). + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f77..aecc1c800d54 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,9 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#define RISCV_HWPROBE_EXT_ZFBFMIN (1ULL << 50) +#define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 51) +#define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 52) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc48..bd215f58bd1b 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -131,6 +131,8 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVE64D); EXT_KEY(ZVE64F); EXT_KEY(ZVE64X); + EXT_KEY(ZVFBFMIN); + EXT_KEY(ZVFBFWMA); EXT_KEY(ZVFH); EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); @@ -147,6 +149,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCD); EXT_KEY(ZCF); EXT_KEY(ZFA); + EXT_KEY(ZFBFMIN); EXT_KEY(ZFH); EXT_KEY(ZFHMIN); } From de70b532f91bbcfa9f4100f1a2cd62810c799239 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:04 +0800 Subject: [PATCH 1344/2157] RISC-V: Enable cbo.clean/flush in usermode Enabling cbo.clean and cbo.flush in user mode makes it more convenient to manage the cache state and achieve better performance. Reviewed-by: Andrew Jones Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-2-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 0ff97cf4dc2a..ef34622902bf 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -32,6 +32,7 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -100,6 +101,8 @@ static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); return -EINVAL; } + + any_cpu_has_zicbom = true; return 0; } @@ -1036,6 +1039,11 @@ void __init riscv_user_isa_enable(void) current->thread.envcfg |= ENVCFG_CBZE; else if (any_cpu_has_zicboz) pr_warn("Zicboz disabled as it is unavailable on some harts\n"); + + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOM)) + current->thread.envcfg |= ENVCFG_CBCFE; + else if (any_cpu_has_zicbom) + pr_warn("Zicbom disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE From eb10039709402cc1fab1533a1ecc1a54c2152e68 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:05 +0800 Subject: [PATCH 1345/2157] RISC-V: hwprobe: Expose Zicbom extension and its block size Expose Zicbom through hwprobe and also provide a key to extract its respective block size. [ alex: Fix merge conflicts and hwprobe numbering ] Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-3-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 6 ++++++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 8 +++++++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 67ef406cdb67..2792f12e90ba 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -260,6 +260,9 @@ The following keys are defined: defined in the RISC-V ISA manual starting from commit 4dc23d6229de ("Added Chapter title to BF16"). + * :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. @@ -321,3 +324,6 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor extension is supported in the T-Head ISA extensions spec starting from commit a18c801634 ("Add T-Head VECTOR vendor extension. "). + +* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which + represents the size of the Zicbom block in bytes. diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index dd624523981c..1f690fea0e03 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 11 +#define RISCV_HWPROBE_MAX_KEY 12 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 08ab52fe8004..056decf65b1b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -78,6 +78,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZFBFMIN (1ULL << 52) #define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 53) #define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 54) +#define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -100,6 +101,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 #define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 +#define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 32509111fe0a..cfe6ceaf3069 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -107,6 +107,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCA); EXT_KEY(ZCB); EXT_KEY(ZCMOP); + EXT_KEY(ZICBOM); EXT_KEY(ZICBOZ); EXT_KEY(ZICNTR); EXT_KEY(ZICOND); @@ -166,7 +167,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; @@ -284,6 +285,11 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) pair->value = riscv_cboz_block_size; break; + case RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM)) + pair->value = riscv_cbom_block_size; + break; case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: pair->value = user_max_virt_addr(); break; From 36dec9e44805f80d32277be76d16c88373bdc20d Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 26 Feb 2025 14:32:06 +0800 Subject: [PATCH 1346/2157] RISC-V: selftests: Add TEST_ZICBOM into CBO tests Add test for Zicbom and its block size into CBO tests, when Zicbom is present, test that cbo.clean/flush may be issued and works. As the software can't verify the clean/flush functions, we just judged that cbo.clean/flush isn't executed illegally. Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Signed-off-by: Yunhui Cui Link: https://lore.kernel.org/r/20250226063206.71216-4-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- tools/testing/selftests/riscv/hwprobe/cbo.c | 66 +++++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index a40541bb7c7d..5e96ef785d0d 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -50,6 +50,14 @@ static void cbo_clean(char *base) { cbo_insn(base, 1); } static void cbo_flush(char *base) { cbo_insn(base, 2); } static void cbo_zero(char *base) { cbo_insn(base, 4); } +static void test_no_cbo_inval(void *arg) +{ + ksft_print_msg("Testing cbo.inval instruction remain privileged\n"); + illegal_insn = false; + cbo_inval(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.inval\n"); +} + static void test_no_zicbom(void *arg) { ksft_print_msg("Testing Zicbom instructions remain privileged\n"); @@ -61,10 +69,6 @@ static void test_no_zicbom(void *arg) illegal_insn = false; cbo_flush(&mem[0]); ksft_test_result(illegal_insn, "No cbo.flush\n"); - - illegal_insn = false; - cbo_inval(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.inval\n"); } static void test_no_zicboz(void *arg) @@ -81,6 +85,30 @@ static bool is_power_of_2(__u64 n) return n != 0 && (n & (n - 1)) == 0; } +static void test_zicbom(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE, + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + long rc; + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE && + is_power_of_2(block_size), "Zicbom block size\n"); + ksft_print_msg("Zicbom block size: %llu\n", block_size); + + illegal_insn = false; + cbo_clean(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.clean\n"); + + illegal_insn = false; + cbo_flush(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.flush\n"); +} + static void test_zicboz(void *arg) { struct riscv_hwprobe pair = { @@ -129,7 +157,7 @@ static void test_zicboz(void *arg) ksft_test_result_pass("cbo.zero check\n"); } -static void check_no_zicboz_cpus(cpu_set_t *cpus) +static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo) { struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_IMA_EXT_0, @@ -137,6 +165,7 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) cpu_set_t one_cpu; int i = 0, c = 0; long rc; + char *cbostr; while (i++ < CPU_COUNT(cpus)) { while (!CPU_ISSET(c, cpus)) @@ -148,10 +177,13 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); - if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) - ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n" - "Use taskset to select a set of harts where Zicboz\n" - "presence (present or not) is consistent for each hart\n"); + cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom"; + + if (pair.value & cbo) + ksft_exit_fail_msg("%s is only present on a subset of harts.\n" + "Use taskset to select a set of harts where %s\n" + "presence (present or not) is consistent for each hart\n", + cbostr, cbostr); ++c; } } @@ -159,7 +191,9 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) enum { TEST_ZICBOZ, TEST_NO_ZICBOZ, + TEST_ZICBOM, TEST_NO_ZICBOM, + TEST_NO_CBO_INVAL, }; static struct test_info { @@ -169,7 +203,9 @@ static struct test_info { } tests[] = { [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz }, [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz }, - [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom }, + [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom }, + [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom }, + [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval }, }; int main(int argc, char **argv) @@ -189,6 +225,7 @@ int main(int argc, char **argv) assert(rc == 0); tests[TEST_NO_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOM].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; } rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); @@ -206,7 +243,14 @@ int main(int argc, char **argv) tests[TEST_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOZ].enabled = false; } else { - check_no_zicboz_cpus(&cpus); + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOZ); + } + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOM) { + tests[TEST_ZICBOM].enabled = true; + tests[TEST_NO_ZICBOM].enabled = false; + } else { + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM); } for (i = 0; i < ARRAY_SIZE(tests); ++i) From a4a58f510bd8c28f6191eed5698a5291b420c2d6 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Sat, 9 Nov 2024 09:46:05 +0800 Subject: [PATCH 1347/2157] riscv: Remove unused TASK_TI_FLAGS Since commit f0bddf50586d ("riscv: entry: Convert to generic entry"), TASK_TI_FLAGS is not used any more, so remove it. Fixes: f0bddf50586d ("riscv: entry: Convert to generic entry") Signed-off-by: Jinjie Ruan Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20241109014605.2801492-1-ruanjinjie@huawei.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e89455a6a0e5..16490755304e 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -36,7 +36,6 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); - OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); From 7f238b12660e53d7905b0d9989866b95a32c2467 Mon Sep 17 00:00:00 2001 From: Chin Yik Ming Date: Thu, 30 Jan 2025 04:38:43 +0800 Subject: [PATCH 1348/2157] riscv: Simplify base extension checks and direct boolean return Reduce three lines checking to single line using a ternary conditional expression for getting the base extension word. In addition, the test_bit macro function already return a boolean which matches the return type of the caller, so directly return the result of the test_bit macro function. Signed-off-by: Chin Yik Ming Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250129203843.1136838-1-yikming2222@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 6 ++---- arch/riscv/kernel/vendor_extensions.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index ef34622902bf..6ec9499e2cf2 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -54,9 +54,7 @@ u32 thead_vlenb_of; */ unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) { - if (!isa_bitmap) - return riscv_isa[0]; - return isa_bitmap[0]; + return !isa_bitmap ? riscv_isa[0] : isa_bitmap[0]; } EXPORT_SYMBOL_GPL(riscv_isa_extension_base); @@ -77,7 +75,7 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i if (bit >= RISCV_ISA_EXT_MAX) return false; - return test_bit(bit, bmap) ? true : false; + return test_bit(bit, bmap); } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a31ff84740eb..9feb7f67a0a3 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -61,6 +61,6 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig if (bit >= RISCV_ISA_VENDOR_EXT_MAX) return false; - return test_bit(bit, bmap->isa) ? true : false; + return test_bit(bit, bmap->isa); } EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); From ffef54ad41101f98ea6dd1dcd71c60bb6b7c8ee9 Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 19 Feb 2025 19:41:34 +0800 Subject: [PATCH 1349/2157] riscv: Add stimecmp save and restore If the HW support the SSTC extension, we should save and restore the stimecmp register while cpu non retention suspend. Signed-off-by: Nick Hu Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250219114135.27764-2-nick.hu@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/suspend.h | 4 ++++ arch/riscv/kernel/suspend.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 4ffb022b097f..dc5782b5fbad 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -18,6 +18,10 @@ struct suspend_context { unsigned long ie; #ifdef CONFIG_MMU unsigned long satp; + unsigned long stimecmp; +#if __riscv_xlen < 64 + unsigned long stimecmph; +#endif #endif }; diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 9a8a0dc035b2..24b3f57d467f 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -30,6 +30,13 @@ void suspend_save_csrs(struct suspend_context *context) */ #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + context->stimecmp = csr_read(CSR_STIMECMP); +#if __riscv_xlen < 64 + context->stimecmph = csr_read(CSR_STIMECMPH); +#endif + } + context->satp = csr_read(CSR_SATP); #endif } @@ -43,6 +50,13 @@ void suspend_restore_csrs(struct suspend_context *context) csr_write(CSR_IE, context->ie); #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + csr_write(CSR_STIMECMP, context->stimecmp); +#if __riscv_xlen < 64 + csr_write(CSR_STIMECMPH, context->stimecmph); +#endif + } + csr_write(CSR_SATP, context->satp); #endif } From 70c93b026ed07078e933583591aa9ca6701cd9da Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 19 Feb 2025 19:41:35 +0800 Subject: [PATCH 1350/2157] clocksource/drivers/timer-riscv: Stop stimecmp when cpu hotplug Stop the timer when the cpu is going to be offline otherwise the timer interrupt may be pending while performing power-down. Suggested-by: Anup Patel Link: https://lore.kernel.org/lkml/20240829033904.477200-3-nick.hu@sifive.com/T/#u Signed-off-by: Nick Hu Reviewed-by: Anup Patel Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/20250219114135.27764-3-nick.hu@sifive.com Signed-off-by: Alexandre Ghiti --- drivers/clocksource/timer-riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 48ce50c5f5e6..4d7cf338824a 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -126,7 +126,13 @@ static int riscv_timer_starting_cpu(unsigned int cpu) static int riscv_timer_dying_cpu(unsigned int cpu) { + /* + * Stop the timer when the cpu is going to be offline otherwise + * the timer interrupt may be pending while performing power-down. + */ + riscv_clock_event_stop(); disable_percpu_irq(riscv_clock_event_irq); + return 0; } From 418af0eafb48bbd972040703e5ff5ba94526b5b5 Mon Sep 17 00:00:00 2001 From: Chin Yik Ming Date: Fri, 15 Nov 2024 05:27:25 +0800 Subject: [PATCH 1351/2157] riscv: Fix a comment typo in set_mm_asid() s/verion/version Signed-off-by: Chin Yik Ming Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20241114212725.4172401-1-yikming2222@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 4abe3de23225..55c20ad1f744 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -158,7 +158,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) * * - We get a zero back from the cmpxchg and end up waiting on the * lock. Taking the lock synchronises with the rollover and so - * we are forced to see the updated verion. + * we are forced to see the updated version. * * - We get a valid context back from the cmpxchg then we continue * using old ASID because __flush_context() would have marked ASID From c2db83fe10331861d7feb60615c18a72eaf9d444 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:07:17 +0100 Subject: [PATCH 1352/2157] riscv: defconfig: Disable Renesas SoC support Follow-up to commit e36ddf3226864e09 ("riscv: defconfig: Disable RZ/Five peripheral support") in v6.12-rc1: - Disable ARCH_RENESAS, too, as currently RZ/Five is the sole Renesas RISC-V SoC, - Drop no longer needed explicit disable of USB_XHCI_RCAR, which depends on ARCH_RENESAS. Signed-off-by: Geert Uytterhoeven Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/e8a2fb273c8c68bd6d526b924b4212f397195b28.1738764211.git.geert+renesas@glider.be Signed-off-by: Alexandre Ghiti --- arch/riscv/configs/defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 0f7dcbe3c45b..3c8e16d71e17 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -26,7 +26,6 @@ CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_MICROCHIP=y -CONFIG_ARCH_RENESAS=y CONFIG_ARCH_SIFIVE=y CONFIG_ARCH_SOPHGO=y CONFIG_ARCH_SPACEMIT=y @@ -202,7 +201,6 @@ CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y -# CONFIG_USB_XHCI_RCAR is not set CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y From 72770690e02c082efbbbd78d768028cf8dd18b9c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 15:09:03 +0100 Subject: [PATCH 1353/2157] riscv: Remove duplicate CLINT_TIMER selections Since commit f862bbf4cdca696e ("riscv: Allow NOMMU kernels to run in S-mode") in v6.10, CLINT_TIMER is selected by the main RISCV symbol when RISCV_M_MODE is enabled. Signed-off-by: Geert Uytterhoeven Reviewed-by: Conor Dooley Acked-by: Conor Dooley Link: https://lore.kernel.org/r/ce55529a42fa232cacd580e38866c60701f91095.1738764474.git.geert+renesas@glider.be Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig.socs | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 17606940bb52..8b503e54fa1b 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -59,7 +59,6 @@ config ARCH_THEAD config ARCH_VIRT bool "QEMU Virt Machine" - select CLINT_TIMER if RISCV_M_MODE select POWER_RESET select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF @@ -79,7 +78,6 @@ config ARCH_CANAAN config SOC_CANAAN_K210 bool "Canaan Kendryte K210 SoC" depends on !MMU && ARCH_CANAAN - select CLINT_TIMER if RISCV_M_MODE select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK From bba547810c66434475d8800b3411c59ef71eafe9 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 24 Feb 2025 18:42:21 -0800 Subject: [PATCH 1354/2157] riscv: tracing: Fix __write_overflow_field in ftrace_partial_regs() The size of ®s->a0 is unknown, causing the error: ../include/linux/fortify-string.h:571:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] Fix this by wrapping the required registers in pt_regs with struct_group() and reference the group when doing the offending memcpy(). Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250224-fix_ftrace_partial_regs-v1-1-54b906417e86@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 2 +- arch/riscv/include/asm/ptrace.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c4721ce44ca4..ec6db1162021 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -207,7 +207,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) { struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); - memcpy(®s->a0, afregs->args, sizeof(afregs->args)); + memcpy(®s->a_regs, afregs->args, sizeof(afregs->args)); regs->epc = afregs->epc; regs->ra = afregs->ra; regs->sp = afregs->sp; diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index b5b0adcc85c1..2910231977cb 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -23,14 +23,16 @@ struct pt_regs { unsigned long t2; unsigned long s0; unsigned long s1; - unsigned long a0; - unsigned long a1; - unsigned long a2; - unsigned long a3; - unsigned long a4; - unsigned long a5; - unsigned long a6; - unsigned long a7; + struct_group(a_regs, + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + ); unsigned long s2; unsigned long s3; unsigned long s4; From 82e81b89501a9f19a3a0b0d7d9641d86c1956284 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Dec 2024 09:08:25 +0900 Subject: [PATCH 1355/2157] riscv: migrate to the generic rule for built-in DTB Commit 654102df2ac2 ("kbuild: add generic support for built-in boot DTBs") introduced generic support for built-in DTBs. Select GENERIC_BUILTIN_DTB when built-in DTB support is enabled. To keep consistency across architectures, this commit also renames CONFIG_BUILTIN_DTB_SOURCE to CONFIG_BUILTIN_DTB_NAME. Signed-off-by: Masahiro Yamada Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20241222000836.2578171-1-masahiroy@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/Kbuild | 1 - arch/riscv/Kconfig | 3 ++- arch/riscv/boot/dts/Makefile | 2 -- arch/riscv/configs/nommu_k210_defconfig | 2 +- arch/riscv/configs/nommu_k210_sdcard_defconfig | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 2c585f7a0b6e..126fb738fc44 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ net/ -obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ obj-$(CONFIG_CRYPTO) += crypto/ obj-y += errata/ obj-$(CONFIG_KVM) += kvm/ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ff35eb16853c..bae5fd79d690 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1272,13 +1272,14 @@ config RISCV_ISA_FALLBACK config BUILTIN_DTB bool "Built-in device tree" depends on OF && NONPORTABLE + select GENERIC_BUILTIN_DTB help Build a device tree into the Linux image. This option should be selected if no bootloader is being used. If unsure, say N. -config BUILTIN_DTB_SOURCE +config BUILTIN_DTB_NAME string "Built-in device tree source" depends on BUILTIN_DTB help diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index bff887d38abe..64a898da9aee 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -8,5 +8,3 @@ subdir-y += sophgo subdir-y += spacemit subdir-y += starfive subdir-y += thead - -obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE)) diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 87ff5a1233af..ee18d1e333f2 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -35,7 +35,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 95cbd574f291..e770d81b738e 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -27,7 +27,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set From 5f1a58ed91a040d4625d854f9bb3dd4995919202 Mon Sep 17 00:00:00 2001 From: Juhan Jin Date: Thu, 6 Feb 2025 13:28:36 -0600 Subject: [PATCH 1356/2157] riscv: ftrace: Add parentheses in macro definitions of make_call_t0 and make_call_ra This patch adds parentheses to parameters caller and callee of macros make_call_t0 and make_call_ra. Every existing invocation of these two macros uses a single variable for each argument, so the absence of the parentheses seems okay. However, future invocations might use more complex expressions as arguments. For example, a future invocation might look like this: make_call_t0(a - b, c, call). Without parentheses in the macro definition, the macro invocation expands to: ... unsigned int offset = (unsigned long) c - (unsigned long) a - b; ... which is clearly wrong. The use of parentheses ensures arguments are correctly evaluated and potentially saves future users of make_call_t0 and make_call_ra debugging trouble. Fixes: 6724a76cff85 ("riscv: ftrace: Reduce the detour code size to half") Signed-off-by: Juhan Jin Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/tencent_AE90AA59903A628E87E9F80E563DA5BA5508@qq.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index ec6db1162021..9704a73e515a 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -92,7 +92,7 @@ struct dyn_arch_ftrace { #define make_call_t0(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_t0(offset); \ call[1] = to_jalr_t0(offset); \ } while (0) @@ -108,7 +108,7 @@ do { \ #define make_call_ra(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_ra(offset); \ call[1] = to_jalr_ra(offset); \ } while (0) From eac5b138814a69435dae04c41591023477b3a18d Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Tue, 14 Jan 2025 00:30:20 +0800 Subject: [PATCH 1357/2157] riscv: remove redundant CMDLINE_FORCE check Drop redundant CMDLINE_FORCE check as it's already done in function early_init_dt_scan_chosen(). Signed-off-by: Zixian Zeng Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250114-rebund-v1-1-5632b2d54d6c@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4fe45daa6281..c174544eefc8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -235,11 +235,6 @@ static void __init parse_dtb(void) } else { pr_err("No DTB passed to the kernel\n"); } - -#ifdef CONFIG_CMDLINE_FORCE - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); - pr_info("Forcing kernel command line to: %s\n", boot_command_line); -#endif } #if defined(CONFIG_RISCV_COMBO_SPINLOCKS) From eb8db421ce83f4acf3ca51e642120f42adea3a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 08:37:58 +0100 Subject: [PATCH 1358/2157] riscv: mm: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are not meant to be used through printk(). It can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250217-restricted-pointers-riscv-v1-1-72a078076a76@linutronix.de Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/physaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 18706f457da7..559d291fac5c 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -12,7 +12,7 @@ phys_addr_t __virt_to_phys(unsigned long x) * Boundary checking aginst the kernel linear mapping space. */ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), - "virt_to_phys used for non-linear address: %pK (%pS)\n", + "virt_to_phys used for non-linear address: %p (%pS)\n", (void *)x, (void *)x); return __va_to_pa_nodebug(x); From 475afa39b123699e910c61ad9a51cedce4a0d310 Mon Sep 17 00:00:00 2001 From: Tingbo Liao Date: Fri, 28 Feb 2025 01:08:01 -0800 Subject: [PATCH 1359/2157] riscv: Fix the __riscv_copy_vec_words_unaligned implementation Correct the VEC_S macro definition to fix the implementation of vector words copy in the case of unalignment in RISC-V. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Tingbo Liao Link: https://lore.kernel.org/r/20250228090801.8334-1-tingbo.liao@starfivetech.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/vec-copy-unaligned.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S index d16f19f1b3b6..7ce4de6f6e69 100644 --- a/arch/riscv/kernel/vec-copy-unaligned.S +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -11,7 +11,7 @@ #define WORD_SEW CONCATENATE(e, WORD_EEW) #define VEC_L CONCATENATE(vle, WORD_EEW).v -#define VEC_S CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vse, WORD_EEW).v /* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ From 33981b1c4e499021421686dcfa7b3d23a430d00e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Feb 2025 10:06:13 +0100 Subject: [PATCH 1360/2157] riscv: Fix missing __free_pages() in check_vector_unaligned_access() The locally allocated pages are never freed up, so add the corresponding __free_pages(). Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Link: https://lore.kernel.org/r/20250228090613.345309-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 91f189cf1611..074ac4abd023 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -349,7 +349,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", cpu); - return; + goto free; } if (word_cycles < byte_cycles) @@ -363,6 +363,9 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); per_cpu(vector_misaligned_access, cpu) = speed; + +free: + __free_pages(page, MISALIGNED_BUFFER_ORDER); } static int riscv_online_cpu_vec(unsigned int cpu) From 16a0ca5e4e79ca52fb1c4a9194dc6bdadacae4fb Mon Sep 17 00:00:00 2001 From: Hajime Tazaki Date: Mon, 20 Jan 2025 15:00:03 +0900 Subject: [PATCH 1361/2157] um: x86: clean up elf specific definitions The file arch/x86/um/asm/module.h is equivalent to the definition of asm-generic. Thus this commit cleans up to use it. Signed-off-by: Hajime Tazaki Link: https://patch.msgid.link/2d70a0ed79ee0a0bef80ad4790063f4833dd9bed.1737348399.git.thehajime@gmail.com Signed-off-by: Johannes Berg --- arch/um/include/asm/Kbuild | 1 + arch/x86/um/asm/module.h | 24 ------------------------ 2 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 arch/x86/um/asm/module.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 428f2c5158c2..04ab3b653a48 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mmiowb.h +generic-y += module.h generic-y += module.lds.h generic-y += param.h generic-y += parport.h diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h deleted file mode 100644 index a3b061d66082..000000000000 --- a/arch/x86/um/asm/module.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_MODULE_H -#define __UM_MODULE_H - -/* UML is simple */ -struct mod_arch_specific -{ -}; - -#ifdef CONFIG_X86_32 - -#define Elf_Shdr Elf32_Shdr -#define Elf_Sym Elf32_Sym -#define Elf_Ehdr Elf32_Ehdr - -#else - -#define Elf_Shdr Elf64_Shdr -#define Elf_Sym Elf64_Sym -#define Elf_Ehdr Elf64_Ehdr - -#endif - -#endif From e8eb8e1bdae94b9e003f5909519fd311d0936890 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 17 Mar 2025 03:12:13 +0000 Subject: [PATCH 1362/2157] riscv: fgraph: Select HAVE_FUNCTION_GRAPH_TRACER depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, fgraph on riscv relies on the infrastructure of DYNAMIC_FTRACE_WITH_ARGS. However, DYNAMIC_FTRACE_WITH_ARGS may be turned off on riscv, which will cause the enabled fgraph to be abnormal. Therefore, let's select HAVE_FUNCTION_GRAPH_TRACER depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS. Fixes: a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503160820.dvqMpH0g-lkp@intel.com/ Signed-off-by: Pu Lehui Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250317031214.4138436-1-pulehui@huaweicloud.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bae5fd79d690..e2c80e88b5cd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -150,7 +150,7 @@ config RISCV select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU From de203da734fae00e75be50220ba5391e7beecdf9 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Wed, 16 Aug 2023 16:33:05 +0800 Subject: [PATCH 1363/2157] ntb_hw_switchtec: Fix shift-out-of-bounds in switchtec_ntb_mw_set_trans There is a kernel API ntb_mw_clear_trans() would pass 0 to both addr and size. This would make xlate_pos negative. [ 23.734156] switchtec switchtec0: MW 0: part 0 addr 0x0000000000000000 size 0x0000000000000000 [ 23.734158] ================================================================================ [ 23.734172] UBSAN: shift-out-of-bounds in drivers/ntb/hw/mscc/ntb_hw_switchtec.c:293:7 [ 23.734418] shift exponent -1 is negative Ensuring xlate_pos is a positive or zero before BIT. Fixes: 1e2fd202f859 ("ntb_hw_switchtec: Check for alignment of the buffer in mw_set_trans()") Signed-off-by: Yajun Deng Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index ad1786be2554..f851397b65d6 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -288,7 +288,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (size != 0 && xlate_pos < 12) return -EINVAL; - if (!IS_ALIGNED(addr, BIT_ULL(xlate_pos))) { + if (xlate_pos >= 0 && !IS_ALIGNED(addr, BIT_ULL(xlate_pos))) { /* * In certain circumstances we can get a buffer that is * not aligned to its size. (Most of the time From 8144e9c8f30fb23bb736a5d24d5c9d46965563c4 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Thu, 6 Jun 2024 11:15:19 +0300 Subject: [PATCH 1364/2157] ntb: intel: Fix using link status DB's Make sure we are not using DB's which were remapped for link status. Fixes: f6e51c354b60 ("ntb: intel: split out the gen3 code") Signed-off-by: Nikita Shubin Reviewed-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_gen3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c index ffcfc3e02c35..a5aa96a31f4a 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c @@ -215,6 +215,9 @@ static int gen3_init_ntb(struct intel_ntb_dev *ndev) } ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + /* Make sure we are not using DB's used for link status */ + if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) + ndev->db_valid_mask &= ~ndev->db_link_mask; ndev->reg->db_iowrite(ndev->db_valid_mask, ndev->self_mmio + From 4279e72cab31dd3eb8c89591eb9d2affa90ab6aa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 23 Sep 2024 10:38:11 +0200 Subject: [PATCH 1365/2157] ntb_perf: Delete duplicate dmaengine_unmap_put() call in perf_copy_chunk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function call “dmaengine_unmap_put(unmap)” was used in an if branch. The same call was immediately triggered by a subsequent goto statement. Thus avoid such a call repetition. This issue was detected by using the Coccinelle software. Fixes: 5648e56d03fa ("NTB: ntb_perf: Add full multi-port NTB API support") Cc: stable@vger.kernel.org Signed-off-by: Markus Elfring Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 72bc1d017a46..dfd175f79e8f 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -839,10 +839,8 @@ static int perf_copy_chunk(struct perf_thread *pthr, dma_set_unmap(tx, unmap); ret = dma_submit_error(dmaengine_submit(tx)); - if (ret) { - dmaengine_unmap_put(unmap); + if (ret) goto err_free_resource; - } dmaengine_unmap_put(unmap); From 1991934ce5fbaa6e9f4b879461f9bd06bfdbd409 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 10 Oct 2024 10:26:54 +0530 Subject: [PATCH 1366/2157] MAINTAINERS: Update AMD NTB maintainers Sanju is no longer with AMD. Update the MAINTAINERS file record. Signed-off-by: Shyam Sundar S K Signed-off-by: Jon Mason --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..348c2ac432c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16820,7 +16820,6 @@ F: Documentation/core-api/symbol-namespaces.rst F: scripts/nsdeps NTB AMD DRIVER -M: Sanjay R Mehta M: Shyam Sundar S K L: ntb@lists.linux.dev S: Supported From fd5625fc86922f36bedee5846fefd647b7e72751 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 15 Jan 2025 21:28:17 +0300 Subject: [PATCH 1367/2157] ntb: use 64-bit arithmetic for the MSI doorbell mask msi_db_mask is of type 'u64', still the standard 'int' arithmetic is performed to compute its value. While most of the ntb_hw drivers actually don't utilize the higher 32 bits of the doorbell mask now, this may be the case for Switchtec - see switchtec_ntb_init_db(). Found by Linux Verification Center (linuxtesting.org) with SVACE static analysis tool. Fixes: 2b0569b3b7e6 ("NTB: Add MSI interrupt support to ntb_transport") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Reviewed-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index a22ea4a4b202..4f775c3e218f 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1353,7 +1353,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_count = ilog2(qp_bitmap); if (nt->use_msi) { qp_count -= 1; - nt->msi_db_mask = 1 << qp_count; + nt->msi_db_mask = BIT_ULL(qp_count); ntb_db_clear_mask(ndev, nt->msi_db_mask); } From 9cecad134d84d14dc72a0eea7a107691c3e5a837 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 18 Mar 2025 13:36:04 +0800 Subject: [PATCH 1368/2157] i3c: master: svc: Fix missing the IBI rules The code does not add IBI rules for devices with controller capability. However, the secondary controller has the controller capability and works at target mode when the device is probed. Therefore, add IBI rules for such devices. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Signed-off-by: Stanley Chu Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250318053606.3087121-2-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 1d1f351b9a85..a72ba5a7edd4 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1106,7 +1106,7 @@ static int svc_i3c_update_ibirules(struct svc_i3c_master *master) /* Create the IBIRULES register for both cases */ i3c_bus_for_each_i3cdev(&master->base.bus, dev) { - if (I3C_BCR_DEVICE_ROLE(dev->info.bcr) == I3C_BCR_I3C_MASTER) + if (!(dev->info.bcr & I3C_BCR_IBI_REQ_CAP)) continue; if (dev->info.bcr & I3C_BCR_IBI_PAYLOAD) { From c06acf7143bddaa3c0f7bedd8b99e48f6acb85c3 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 18 Mar 2025 13:36:05 +0800 Subject: [PATCH 1369/2157] i3c: master: svc: Use readsb helper for reading MDB The target can send the MDB byte followed by additional data bytes. The readl on MRDATAB reads one actual byte, but the readsl advances the destination pointer by 4 bytes. This causes the subsequent payload to be copied to wrong position in the destination buffer. Cc: stable@kernel.org Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Signed-off-by: Stanley Chu Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250318053606.3087121-3-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index a72ba5a7edd4..57b9dec6b5a8 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -425,7 +425,7 @@ static int svc_i3c_master_handle_ibi(struct svc_i3c_master *master, slot->len < SVC_I3C_FIFO_SIZE) { mdatactrl = readl(master->regs + SVC_I3C_MDATACTRL); count = SVC_I3C_MDATACTRL_RXCOUNT(mdatactrl); - readsl(master->regs + SVC_I3C_MRDATAB, buf, count); + readsb(master->regs + SVC_I3C_MRDATAB, buf, count); slot->len += count; buf += count; } From 0430bf9bc1ac068c8b8c540eb93e5751872efc51 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 18 Mar 2025 13:36:06 +0800 Subject: [PATCH 1370/2157] i3c: master: svc: Fix missing STOP for master request The controller driver nacked the master request but didn't emit a STOP to end the transaction. The driver shall refuse the unsupported requests and return the controller state to IDLE by emitting a STOP. Signed-off-by: Stanley Chu Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250318053606.3087121-4-yschu@nuvoton.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 57b9dec6b5a8..e0cd3ce28b7f 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -592,6 +592,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) queue_work(master->base.wq, &master->hj_work); break; case SVC_I3C_MSTATUS_IBITYPE_MASTER_REQUEST: + svc_i3c_master_emit_stop(master); default: break; } From e36ba5ab808ef6237c3148d469c8238674230e2b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:23 -0700 Subject: [PATCH 1371/2157] iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that provides user space (VMM) another FD to read the vIOMMU Events. Allow a vIOMMU object to allocate vEVENTQs, with a condition that each vIOMMU can only have one single vEVENTQ per type. Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl. Link: https://patch.msgid.link/r/21acf0751dd5c93846935ee06f93b9c65eff5e04.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/eventq.c | 209 +++++++++++++++++++++++- drivers/iommu/iommufd/iommufd_private.h | 82 ++++++++++ drivers/iommu/iommufd/main.c | 7 + drivers/iommu/iommufd/viommu.c | 2 + include/linux/iommufd.h | 3 + include/uapi/linux/iommufd.h | 82 ++++++++++ 6 files changed, 384 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index f8e60e5879d1..4c43ace8c725 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -262,13 +262,148 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b return done == 0 ? rc : done; } +/* IOMMUFD_OBJ_VEVENTQ Functions */ + +void iommufd_veventq_abort(struct iommufd_object *obj) +{ + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_viommu *viommu = veventq->viommu; + struct iommufd_vevent *cur, *next; + + lockdep_assert_held_write(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(cur, next, &eventq->deliver, node) { + list_del(&cur->node); + if (cur != &veventq->lost_events_header) + kfree(cur); + } + + refcount_dec(&viommu->obj.users); + list_del(&veventq->node); +} + +void iommufd_veventq_destroy(struct iommufd_object *obj) +{ + struct iommufd_veventq *veventq = eventq_to_veventq( + container_of(obj, struct iommufd_eventq, obj)); + + down_write(&veventq->viommu->veventqs_rwsem); + iommufd_veventq_abort(obj); + up_write(&veventq->viommu->veventqs_rwsem); +} + +static struct iommufd_vevent * +iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + struct iommufd_vevent *vevent = NULL; + + spin_lock(&eventq->lock); + if (!list_empty(list)) { + struct iommufd_vevent *next; + + next = list_first_entry(list, struct iommufd_vevent, node); + /* Make a copy of the lost_events_header for copy_to_user */ + if (next == &veventq->lost_events_header) { + vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC); + if (!vevent) + goto out_unlock; + } + list_del(&next->node); + if (vevent) + memcpy(vevent, next, sizeof(*vevent)); + else + vevent = next; + } +out_unlock: + spin_unlock(&eventq->lock); + return vevent; +} + +static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + + spin_lock(&eventq->lock); + if (vevent_for_lost_events_header(vevent)) { + /* Remove the copy of the lost_events_header */ + kfree(vevent); + vevent = NULL; + /* An empty list needs the lost_events_header back */ + if (list_empty(list)) + vevent = &veventq->lost_events_header; + } + if (vevent) + list_add(&vevent->node, list); + spin_unlock(&eventq->lock); +} + +static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_vevent_header *hdr; + struct iommufd_vevent *cur; + size_t done = 0; + int rc = 0; + + if (*ppos) + return -ESPIPE; + + while ((cur = iommufd_veventq_deliver_fetch(veventq))) { + /* Validate the remaining bytes against the header size */ + if (done >= count || sizeof(*hdr) > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + hdr = &cur->header; + + /* If being a normal vEVENT, validate against the full size */ + if (!vevent_for_lost_events_header(cur) && + sizeof(hdr) + cur->data_len > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + + if (copy_to_user(buf + done, hdr, sizeof(*hdr))) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + done += sizeof(*hdr); + + if (cur->data_len && + copy_to_user(buf + done, cur->event_data, cur->data_len)) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + spin_lock(&eventq->lock); + veventq->num_events--; + spin_unlock(&eventq->lock); + done += cur->data_len; + kfree(cur); + } + + return done == 0 ? rc : done; +} + /* Common Event Queue Functions */ static __poll_t iommufd_eventq_fops_poll(struct file *filep, struct poll_table_struct *wait) { struct iommufd_eventq *eventq = filep->private_data; - __poll_t pollflags = EPOLLOUT; + __poll_t pollflags = 0; + + if (eventq->obj.type == IOMMUFD_OBJ_FAULT) + pollflags |= EPOLLOUT; poll_wait(filep, &eventq->wait_queue, wait); spin_lock(&eventq->lock); @@ -388,3 +523,75 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) return 0; } + +static const struct file_operations iommufd_veventq_fops = + INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL); + +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_veventq_alloc *cmd = ucmd->cmd; + struct iommufd_veventq *veventq; + struct iommufd_viommu *viommu; + int fdno; + int rc; + + if (cmd->flags || cmd->__reserved || + cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->veventq_depth) + return -EINVAL; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + down_write(&viommu->veventqs_rwsem); + + if (iommufd_viommu_find_veventq(viommu, cmd->type)) { + rc = -EEXIST; + goto out_unlock_veventqs; + } + + veventq = __iommufd_object_alloc(ucmd->ictx, veventq, + IOMMUFD_OBJ_VEVENTQ, common.obj); + if (IS_ERR(veventq)) { + rc = PTR_ERR(veventq); + goto out_unlock_veventqs; + } + + veventq->type = cmd->type; + veventq->viommu = viommu; + refcount_inc(&viommu->obj.users); + veventq->depth = cmd->veventq_depth; + list_add_tail(&veventq->node, &viommu->veventqs); + veventq->lost_events_header.header.flags = + IOMMU_VEVENTQ_FLAG_LOST_EVENTS; + + fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]", + ucmd->ictx, &iommufd_veventq_fops); + if (fdno < 0) { + rc = fdno; + goto out_abort; + } + + cmd->out_veventq_id = veventq->common.obj.id; + cmd->out_veventq_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + + iommufd_object_finalize(ucmd->ictx, &veventq->common.obj); + fd_install(fdno, veventq->common.filep); + goto out_unlock_veventqs; + +out_put_fdno: + put_unused_fd(fdno); + fput(veventq->common.filep); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj); +out_unlock_veventqs: + up_write(&viommu->veventqs_rwsem); + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 44fb30af10b0..8cda9c4672eb 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -507,6 +507,74 @@ void iommufd_fault_iopf_disable(struct iommufd_device *idev); void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, struct iommufd_attach_handle *handle); +/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */ +struct iommufd_vevent { + struct iommufd_vevent_header header; + struct list_head node; /* for iommufd_eventq::deliver */ + ssize_t data_len; + u64 event_data[] __counted_by(data_len); +}; + +#define vevent_for_lost_events_header(vevent) \ + (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) + +/* + * An iommufd_veventq object represents an interface to deliver vIOMMU events to + * the user space. It is created/destroyed by the user space and associated with + * a vIOMMU object during the allocations. + */ +struct iommufd_veventq { + struct iommufd_eventq common; + struct iommufd_viommu *viommu; + struct list_head node; /* for iommufd_viommu::veventqs */ + struct iommufd_vevent lost_events_header; + + unsigned int type; + unsigned int depth; + + /* Use common.lock for protection */ + u32 num_events; + u32 sequence; +}; + +static inline struct iommufd_veventq * +eventq_to_veventq(struct iommufd_eventq *eventq) +{ + return container_of(eventq, struct iommufd_veventq, common); +} + +static inline struct iommufd_veventq * +iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_VEVENTQ), + struct iommufd_veventq, common.obj); +} + +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd); +void iommufd_veventq_destroy(struct iommufd_object *obj); +void iommufd_veventq_abort(struct iommufd_object *obj); + +static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) +{ + struct iommufd_eventq *eventq = &veventq->common; + + lockdep_assert_held(&eventq->lock); + + /* + * Remove the lost_events_header and add the new node at the same time. + * Note the new node can be lost_events_header, for a sequence update. + */ + if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver)) + list_del(&veventq->lost_events_header.node); + list_add_tail(&vevent->node, &eventq->deliver); + vevent->header.sequence = veventq->sequence; + veventq->sequence = (veventq->sequence + 1) & INT_MAX; + + wake_up_interruptible(&eventq->wait_queue); +} + static inline struct iommufd_viommu * iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) { @@ -515,6 +583,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_viommu, obj); } +static inline struct iommufd_veventq * +iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type) +{ + struct iommufd_veventq *veventq, *next; + + lockdep_assert_held(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) { + if (veventq->type == type) + return veventq; + } + return NULL; +} + int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index b6fa9fd11bc1..3df468f64e7d 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -317,6 +317,7 @@ union ucmd_buffer { struct iommu_ioas_unmap unmap; struct iommu_option option; struct iommu_vdevice_alloc vdev; + struct iommu_veventq_alloc veventq; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; #ifdef CONFIG_IOMMUFD_TEST @@ -372,6 +373,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, struct iommu_vdevice_alloc, virt_id), + IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc, + struct iommu_veventq_alloc, out_veventq_fd), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, @@ -514,6 +517,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, }, + [IOMMUFD_OBJ_VEVENTQ] = { + .destroy = iommufd_veventq_destroy, + .abort = iommufd_veventq_abort, + }, [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy, }, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 69b88e8c7c26..01df2b985f02 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging; refcount_inc(&viommu->hwpt->common.obj.users); + INIT_LIST_HEAD(&viommu->veventqs); + init_rwsem(&viommu->veventqs_rwsem); /* * It is the most likely case that a physical IOMMU is unpluggable. A * pluggable IOMMU instance (if exists) is responsible for refcounting diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c749200..8948b1836940 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -34,6 +34,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, + IOMMUFD_OBJ_VEVENTQ, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -93,6 +94,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; struct xarray vdevs; + struct list_head veventqs; + struct rw_semaphore veventqs_rwsem; unsigned int type; }; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 78747b24bd0f..dbb8787d9c63 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -55,6 +55,7 @@ enum { IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, + IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, }; /** @@ -1014,4 +1015,85 @@ struct iommu_ioas_change_process { #define IOMMU_IOAS_CHANGE_PROCESS \ _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) +/** + * enum iommu_veventq_flag - flag for struct iommufd_vevent_header + * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs + */ +enum iommu_veventq_flag { + IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), +}; + +/** + * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status + * @flags: Combination of enum iommu_veventq_flag + * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of + * [0, INT_MAX] where the following index of INT_MAX is 0 + * + * Each iommufd_vevent_header reports a sequence index of the following vEVENT: + * ------------------------------------------------------------------------- + * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | + * ------------------------------------------------------------------------- + * And this sequence index is expected to be monotonic to the sequence index of + * the previous vEVENT. If two adjacent sequence indexes has a delta larger than + * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: + * ------------------------------------------------------------------------- + * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | + * ------------------------------------------------------------------------- + * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT + * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header + * would be added to the tail, and no data would follow this header: + * --------------------------------------------------------------------------- + * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | + * --------------------------------------------------------------------------- + */ +struct iommufd_vevent_header { + __u32 flags; + __u32 sequence; +}; + +/** + * enum iommu_veventq_type - Virtual Event Queue Type + * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_veventq_type { + IOMMU_VEVENTQ_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) + * @size: sizeof(struct iommu_veventq_alloc) + * @flags: Must be 0 + * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with + * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type + * @veventq_depth: Maximum number of events in the vEVENTQ + * @out_veventq_id: The ID of the new vEVENTQ + * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the + * successfully returned fd after using it + * @__reserved: Must be 0 + * + * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU + * can have multiple FDs for different types, but is confined to one per @type. + * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, + * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, + * if the number of the vEVENTs hits @veventq_depth. + * + * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by + * a type-specific data structure, in a normal case: + * ------------------------------------------------------------- + * || header0 | data0 | header1 | data1 | ... | headerN | dataN || + * ------------------------------------------------------------- + * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to + * struct iommufd_vevent_header). + */ +struct iommu_veventq_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 veventq_depth; + __u32 out_veventq_id; + __u32 out_veventq_fd; + __u32 __reserved; +}; +#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) #endif From ea94b211c5483080b749c142090f4c4de4926e51 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:24 -0700 Subject: [PATCH 1372/2157] iommufd/viommu: Add iommufd_viommu_get_vdev_id helper This is a reverse search v.s. iommufd_viommu_find_dev, as drivers may want to convert a struct device pointer (physical) to its virtual device ID for an event injection to the user space VM. Again, this avoids exposing more core structures to the drivers, than the iommufd_viommu alone. Link: https://patch.msgid.link/r/18b8e8bc1b8104d43b205d21602c036fd0804e56.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 24 ++++++++++++++++++++++++ include/linux/iommufd.h | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 2d98b04ff1cb..f132b98fb899 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -49,5 +49,29 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); +/* Return -ENOENT if device is not associated to the vIOMMU */ +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id) +{ + struct iommufd_vdevice *vdev; + unsigned long index; + int rc = -ENOENT; + + if (WARN_ON_ONCE(!vdev_id)) + return -EINVAL; + + xa_lock(&viommu->vdevs); + xa_for_each(&viommu->vdevs, index, vdev) { + if (vdev->dev == dev) { + *vdev_id = vdev->id; + rc = 0; + break; + } + } + xa_unlock(&viommu->vdevs); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 8948b1836940..05cb393aff0a 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -190,6 +190,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -203,6 +205,13 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { return NULL; } + +static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, + unsigned long *vdev_id) +{ + return -ENOENT; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* From e8e1ef9b77a7a09b7809890a52229f24d3c8b532 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:25 -0700 Subject: [PATCH 1373/2157] iommufd/viommu: Add iommufd_viommu_report_event helper Similar to iommu_report_device_fault, this allows IOMMU drivers to report vIOMMU events from threaded IRQ handlers to user space hypervisors. Link: https://patch.msgid.link/r/44be825042c8255e75d0151b338ffd8ba0e4920b.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 48 ++++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 11 ++++++++ 2 files changed, 59 insertions(+) diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index f132b98fb899..75b365561c16 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -73,5 +73,53 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); +/* + * Typically called in driver's threaded IRQ handler. + * The @type and @event_data must be defined in include/uapi/linux/iommufd.h + */ +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len) +{ + struct iommufd_veventq *veventq; + struct iommufd_vevent *vevent; + int rc = 0; + + if (WARN_ON_ONCE(!data_len || !event_data)) + return -EINVAL; + + down_read(&viommu->veventqs_rwsem); + + veventq = iommufd_viommu_find_veventq(viommu, type); + if (!veventq) { + rc = -EOPNOTSUPP; + goto out_unlock_veventqs; + } + + spin_lock(&veventq->common.lock); + if (veventq->num_events == veventq->depth) { + vevent = &veventq->lost_events_header; + goto out_set_header; + } + + vevent = kmalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); + if (!vevent) { + rc = -ENOMEM; + vevent = &veventq->lost_events_header; + goto out_set_header; + } + memcpy(vevent->event_data, event_data, data_len); + vevent->data_len = data_len; + veventq->num_events++; + +out_set_header: + iommufd_vevent_handler(veventq, vevent); + spin_unlock(&veventq->common.lock); +out_unlock_veventqs: + up_read(&viommu->veventqs_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 05cb393aff0a..60eff9272551 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -11,6 +11,7 @@ #include #include #include +#include struct device; struct file; @@ -192,6 +193,9 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, struct device *dev, unsigned long *vdev_id); +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -212,6 +216,13 @@ static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, { return -ENOENT; } + +static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, + void *event_data, size_t data_len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* From 941d0719aa66adb40b96f049635d86b447577970 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:26 -0700 Subject: [PATCH 1374/2157] iommufd/selftest: Require vdev_id when attaching to a nested domain When attaching a device to a vIOMMU-based nested domain, vdev_id must be present. Add a piece of code hard-requesting it, preparing for a vEVENTQ support in the following patch. Then, update the TEST_F. A HWPT-based nested domain will return a NULL new_viommu, thus no such a vDEVICE requirement. Link: https://patch.msgid.link/r/4051ca8a819e51cb30de6b4fe9e4d94d956afe3d.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 24 ++++++++++++++++++++++++ tools/testing/selftests/iommu/iommufd.c | 5 +++++ 2 files changed, 29 insertions(+) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d40deb0a4f06..ba84bacbce2e 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -161,7 +161,10 @@ enum selftest_obj_type { struct mock_dev { struct device dev; + struct mock_viommu *viommu; + struct rw_semaphore viommu_rwsem; unsigned long flags; + unsigned long vdev_id; int id; u32 cache[MOCK_DEV_CACHE_NUM]; }; @@ -193,10 +196,30 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { struct mock_dev *mdev = to_mock_dev(dev); + struct mock_viommu *new_viommu = NULL; + unsigned long vdev_id = 0; + int rc; if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) return -EINVAL; + iommu_group_mutex_assert(dev); + if (domain->type == IOMMU_DOMAIN_NESTED) { + new_viommu = to_mock_nested(domain)->mock_viommu; + if (new_viommu) { + rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev, + &vdev_id); + if (rc) + return rc; + } + } + if (new_viommu != mdev->viommu) { + down_write(&mdev->viommu_rwsem); + mdev->viommu = new_viommu; + mdev->vdev_id = vdev_id; + up_write(&mdev->viommu_rwsem); + } + return 0; } @@ -850,6 +873,7 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) if (!mdev) return ERR_PTR(-ENOMEM); + init_rwsem(&mdev->viommu_rwsem); device_initialize(&mdev->dev); mdev->flags = dev_flags; mdev->dev.release = mock_dev_release; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 618c03bb6509..6a050a1d64ed 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2740,6 +2740,7 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) uint32_t iopf_hwpt_id; uint32_t fault_id; uint32_t fault_fd; + uint32_t vdev_id; if (self->device_id) { test_ioctl_fault_alloc(&fault_id, &fault_fd); @@ -2756,6 +2757,10 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, + iopf_hwpt_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id)); From b3cc0b7599ccc128831fdc0fb71606a246d2a58a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:27 -0700 Subject: [PATCH 1375/2157] iommufd/selftest: Add IOMMU_TEST_OP_TRIGGER_VEVENT for vEVENTQ coverage The handler will get vDEVICE object from the given mdev and convert it to its per-vIOMMU virtual ID to mimic a real IOMMU driver. Link: https://patch.msgid.link/r/1ea874d20e56d65e7cfd6e0e8e01bd3dbd038761.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 10 ++++++++++ drivers/iommu/iommufd/selftest.c | 30 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index a6b7a163f636..87e9165cea27 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -24,6 +24,7 @@ enum { IOMMU_TEST_OP_MD_CHECK_IOTLB, IOMMU_TEST_OP_TRIGGER_IOPF, IOMMU_TEST_OP_DEV_CHECK_CACHE, + IOMMU_TEST_OP_TRIGGER_VEVENT, }; enum { @@ -145,6 +146,9 @@ struct iommu_test_cmd { __u32 id; __u32 cache; } check_dev_cache; + struct { + __u32 dev_id; + } trigger_vevent; }; __u32 last; }; @@ -212,4 +216,10 @@ struct iommu_viommu_invalidate_selftest { __u32 cache_id; }; +#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef + +struct iommu_viommu_event_selftest { + __u32 virt_id; +}; + #endif diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index ba84bacbce2e..d55dde28e9bc 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -1621,6 +1621,34 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, return 0; } +static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct iommu_viommu_event_selftest test = {}; + struct iommufd_device *idev; + struct mock_dev *mdev; + int rc = -ENOENT; + + idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + mdev = to_mock_dev(idev->dev); + + down_read(&mdev->viommu_rwsem); + if (!mdev->viommu || !mdev->vdev_id) + goto out_unlock; + + test.virt_id = mdev->vdev_id; + rc = iommufd_viommu_report_event(&mdev->viommu->core, + IOMMU_VEVENTQ_TYPE_SELFTEST, &test, + sizeof(test)); +out_unlock: + up_read(&mdev->viommu_rwsem); + iommufd_put_object(ucmd->ictx, &idev->obj); + + return rc; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = to_selftest_obj(obj); @@ -1702,6 +1730,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->dirty.flags); case IOMMU_TEST_OP_TRIGGER_IOPF: return iommufd_test_trigger_iopf(ucmd, cmd); + case IOMMU_TEST_OP_TRIGGER_VEVENT: + return iommufd_test_trigger_vevent(ucmd, cmd); default: return -EOPNOTSUPP; } From 97717a1f283fee4e886bbe96c6a0ca460f71a4ab Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:28 -0700 Subject: [PATCH 1376/2157] iommufd/selftest: Add IOMMU_VEVENTQ_ALLOC test coverage Trigger vEVENTs by feeding an idev ID and validating the returned output virt_ids whether they equal to the value that was set to the vDEVICE. Link: https://patch.msgid.link/r/e829532ec0a3927d61161b7674b20e731ecd495b.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 31 +++++ .../selftests/iommu/iommufd_fail_nth.c | 7 ++ tools/testing/selftests/iommu/iommufd_utils.h | 115 ++++++++++++++++++ 3 files changed, 153 insertions(+) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6a050a1d64ed..156c74da53cd 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2778,15 +2778,46 @@ TEST_F(iommufd_viommu, vdevice_alloc) uint32_t viommu_id = self->viommu_id; uint32_t dev_id = self->device_id; uint32_t vdev_id = 0; + uint32_t veventq_id; + uint32_t veventq_fd; + int prev_seq = -1; if (dev_id) { + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, + self->nested_hwpt_id); + + /* Allocate a vEVENTQ with veventq_depth=2 */ + test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST, + &veventq_id, &veventq_fd); + test_err_veventq_alloc(EEXIST, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL); /* Set vdev_id to 0x99, unset it, and set to 0x88 */ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, + self->nested_hwpt_id); + test_cmd_trigger_vevents(dev_id, 1); + test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq); test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(vdev_id); + + /* Try again with 0x88 */ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, + self->nested_hwpt_id); + /* Trigger an overflow with three events */ + test_cmd_trigger_vevents(dev_id, 3); + test_err_read_vevents(EOVERFLOW, veventq_fd, 3, 0x88, + &prev_seq); + /* Overflow must be gone after the previous reads */ + test_cmd_trigger_vevents(dev_id, 1); + test_cmd_read_vevents(veventq_fd, 1, 0x88, &prev_seq); + close(veventq_fd); + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(vdev_id); + test_ioctl_destroy(veventq_id); } else { test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL); } diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 64b1f8e1b0cf..99a7f7897bb2 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -620,6 +620,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) }; struct iommu_test_hw_info info; uint32_t fault_id, fault_fd; + uint32_t veventq_id, veventq_fd; uint32_t fault_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; @@ -692,6 +693,12 @@ TEST_FAIL_NTH(basic_fail_nth, device) IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data))) return -1; + if (_test_cmd_veventq_alloc(self->fd, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id, + &veventq_fd)) + return -1; + close(veventq_fd); + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index d979f5b0efe8..6f2ba2fa8f76 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "../kselftest_harness.h" #include "../../../../drivers/iommu/iommufd/iommufd_test.h" @@ -936,3 +937,117 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, EXPECT_ERRNO(_errno, \ _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ virt_id, vdev_id)) + +static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, + __u32 *veventq_id, __u32 *veventq_fd) +{ + struct iommu_veventq_alloc cmd = { + .size = sizeof(cmd), + .type = type, + .veventq_depth = 2, + .viommu_id = viommu_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VEVENTQ_ALLOC, &cmd); + if (ret) + return ret; + if (veventq_id) + *veventq_id = cmd.out_veventq_id; + if (veventq_fd) + *veventq_fd = cmd.out_veventq_fd; + return 0; +} + +#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \ + ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ + veventq_id, veventq_fd)) +#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \ + veventq_fd) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ + veventq_id, veventq_fd)) + +static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) +{ + struct iommu_test_cmd trigger_vevent_cmd = { + .size = sizeof(trigger_vevent_cmd), + .op = IOMMU_TEST_OP_TRIGGER_VEVENT, + .trigger_vevent = { + .dev_id = dev_id, + }, + }; + int ret; + + while (nvevents--) { + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd); + if (ret < 0) + return -1; + } + return ret; +} + +#define test_cmd_trigger_vevents(dev_id, nvevents) \ + ASSERT_EQ(0, _test_cmd_trigger_vevents(self->fd, dev_id, nvevents)) + +static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents, + __u32 virt_id, int *prev_seq) +{ + struct pollfd pollfd = { .fd = event_fd, .events = POLLIN }; + struct iommu_viommu_event_selftest *event; + struct iommufd_vevent_header *hdr; + ssize_t bytes; + void *data; + int ret, i; + + ret = poll(&pollfd, 1, 1000); + if (ret < 0) + return -1; + + data = calloc(nvevents, sizeof(*hdr) + sizeof(*event)); + if (!data) { + errno = ENOMEM; + return -1; + } + + bytes = read(event_fd, data, + nvevents * (sizeof(*hdr) + sizeof(*event))); + if (bytes <= 0) { + errno = EFAULT; + ret = -1; + goto out_free; + } + + for (i = 0; i < nvevents; i++) { + hdr = data + i * (sizeof(*hdr) + sizeof(*event)); + + if (hdr->flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS || + hdr->sequence - *prev_seq > 1) { + *prev_seq = hdr->sequence; + errno = EOVERFLOW; + ret = -1; + goto out_free; + } + *prev_seq = hdr->sequence; + event = data + sizeof(*hdr); + if (event->virt_id != virt_id) { + errno = EINVAL; + ret = -1; + goto out_free; + } + } + + ret = 0; +out_free: + free(data); + return ret; +} + +#define test_cmd_read_vevents(event_fd, nvevents, virt_id, prev_seq) \ + ASSERT_EQ(0, _test_cmd_read_vevents(self->fd, event_fd, nvevents, \ + virt_id, prev_seq)) +#define test_err_read_vevents(_errno, event_fd, nvevents, virt_id, prev_seq) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_read_vevents(self->fd, event_fd, nvevents, \ + virt_id, prev_seq)) From 2ec0458eb0e5a84d80f82667e358c4f2c187d2e4 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:29 -0700 Subject: [PATCH 1377/2157] Documentation: userspace-api: iommufd: Update FAULT and VEVENTQ With the introduction of the new objects, update the doc to reflect that. Link: https://patch.msgid.link/r/09829fbc218872d242323d8834da4bec187ce6f4.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Bagas Sanjaya Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- Documentation/userspace-api/iommufd.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst index 70289d6815d2..b0df15865dec 100644 --- a/Documentation/userspace-api/iommufd.rst +++ b/Documentation/userspace-api/iommufd.rst @@ -63,6 +63,13 @@ Following IOMMUFD objects are exposed to userspace: space usually has mappings from guest-level I/O virtual addresses to guest- level physical addresses. +- IOMMUFD_FAULT, representing a software queue for an HWPT reporting IO page + faults using the IOMMU HW's PRI (Page Request Interface). This queue object + provides user space an FD to poll the page fault events and also to respond + to those events. A FAULT object must be created first to get a fault_id that + could be then used to allocate a fault-enabled HWPT via the IOMMU_HWPT_ALLOC + command by setting the IOMMU_HWPT_FAULT_ID_VALID bit in its flags field. + - IOMMUFD_OBJ_VIOMMU, representing a slice of the physical IOMMU instance, passed to or shared with a VM. It may be some HW-accelerated virtualization features and some SW resources used by the VM. For examples: @@ -109,6 +116,14 @@ Following IOMMUFD objects are exposed to userspace: vIOMMU, which is a separate ioctl call from attaching the same device to an HWPT_PAGING that the vIOMMU holds. +- IOMMUFD_OBJ_VEVENTQ, representing a software queue for a vIOMMU to report its + events such as translation faults occurred to a nested stage-1 (excluding I/O + page faults that should go through IOMMUFD_OBJ_FAULT) and HW-specific events. + This queue object provides user space an FD to poll/read the vIOMMU events. A + vIOMMU object must be created first to get its viommu_id, which could be then + used to allocate a vEVENTQ. Each vIOMMU can support multiple types of vEVENTS, + but is confined to one vEVENTQ per vEVENTQ type. + All user-visible objects are destroyed via the IOMMU_DESTROY uAPI. The diagrams below show relationships between user-visible objects and kernel @@ -251,8 +266,10 @@ User visible objects are backed by following datastructures: - iommufd_device for IOMMUFD_OBJ_DEVICE. - iommufd_hwpt_paging for IOMMUFD_OBJ_HWPT_PAGING. - iommufd_hwpt_nested for IOMMUFD_OBJ_HWPT_NESTED. +- iommufd_fault for IOMMUFD_OBJ_FAULT. - iommufd_viommu for IOMMUFD_OBJ_VIOMMU. - iommufd_vdevice for IOMMUFD_OBJ_VDEVICE. +- iommufd_veventq for IOMMUFD_OBJ_VEVENTQ. Several terminologies when looking at these datastructures: From f0ea207ed7813bdf17e65aa042d023d1c59e49e3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:30 -0700 Subject: [PATCH 1378/2157] iommu/arm-smmu-v3: Introduce struct arm_smmu_vmaster Use it to store all vSMMU-related data. The vsid (Virtual Stream ID) will be the first use case. Since the vsid reader will be the eventq handler that already holds a streams_mutex, reuse that to fence the vmaster too. Also add a pair of arm_smmu_attach_prepare/commit_vmaster helpers to set or unset the master->vmaster pointer. Put the helpers inside the existing arm_smmu_attach_prepare/commit(). For identity/blocked ops that don't call arm_smmu_attach_prepare/commit(), add a simpler arm_smmu_master_clear_vmaster helper to unset the vmaster. Link: https://patch.msgid.link/r/a7f282e1a531279e25f06c651e95d56f6b120886.1741719725.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Acked-by: Will Deacon Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 41 +++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 +++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 28 +++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 5aa2e7af58b4..dfc80e1a8e2e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -85,6 +85,47 @@ static void arm_smmu_make_nested_domain_ste( } } +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + struct arm_smmu_vmaster *vmaster; + unsigned long vsid; + int ret; + + iommu_group_mutex_assert(state->master->dev); + + ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core, + state->master->dev, &vsid); + if (ret) + return ret; + + vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); + if (!vmaster) + return -ENOMEM; + vmaster->vsmmu = nested_domain->vsmmu; + vmaster->vsid = vsid; + state->vmaster = vmaster; + + return 0; +} + +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + mutex_lock(&master->smmu->streams_mutex); + kfree(master->vmaster); + master->vmaster = state->vmaster; + mutex_unlock(&master->smmu->streams_mutex); +} + +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ + struct arm_smmu_attach_state state = { .master = master }; + + arm_smmu_attach_commit_vmaster(&state); +} + static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, struct device *dev) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 358072b4e293..964d2cf27d3d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2803,6 +2803,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(new_domain); unsigned long flags; + int ret; /* * arm_smmu_share_asid() must not see two domains pointing to the same @@ -2832,9 +2833,18 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, } if (smmu_domain) { + if (new_domain->type == IOMMU_DOMAIN_NESTED) { + ret = arm_smmu_attach_prepare_vmaster( + state, to_smmu_nested_domain(new_domain)); + if (ret) + return ret; + } + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); - if (!master_domain) + if (!master_domain) { + kfree(state->vmaster); return -ENOMEM; + } master_domain->master = master; master_domain->ssid = state->ssid; if (new_domain->type == IOMMU_DOMAIN_NESTED) @@ -2861,6 +2871,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); kfree(master_domain); + kfree(state->vmaster); return -EINVAL; } @@ -2893,6 +2904,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) lockdep_assert_held(&arm_smmu_asid_lock); + arm_smmu_attach_commit_vmaster(state); + if (state->ats_enabled && !master->ats_enabled) { arm_smmu_enable_ats(master); } else if (state->ats_enabled && master->ats_enabled) { @@ -3162,6 +3175,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_ste ste; struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_bypass_ste(master->smmu, &ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); return 0; @@ -3180,7 +3194,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_ste ste; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_abort_ste(&ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_TERMINATE); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index bd9d7c85576a..557b300a3a0f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -799,6 +799,11 @@ struct arm_smmu_stream { struct rb_node node; }; +struct arm_smmu_vmaster { + struct arm_vsmmu *vsmmu; + unsigned long vsid; +}; + struct arm_smmu_event { u8 stall : 1, ssv : 1, @@ -824,6 +829,7 @@ struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; struct arm_smmu_stream *streams; + struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */ /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; @@ -972,6 +978,7 @@ struct arm_smmu_attach_state { bool disable_ats; ioasid_t ssid; /* Resulting state */ + struct arm_smmu_vmaster *vmaster; bool ats_enabled; }; @@ -1055,9 +1062,30 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, struct iommu_domain *parent, struct iommufd_ctx *ictx, unsigned int viommu_type); +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain); +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); #else #define arm_smmu_hw_info NULL #define arm_vsmmu_alloc NULL + +static inline int +arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + return 0; +} + +static inline void +arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ +} + +static inline void +arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ +} #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ From e7d3fa3d29d5b2ed12d247cf57a0a34fffe89eb8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:31 -0700 Subject: [PATCH 1379/2157] iommu/arm-smmu-v3: Report events that belong to devices attached to vIOMMU Aside from the IOPF framework, iommufd provides an additional pathway to report hardware events, via the vEVENTQ of vIOMMU infrastructure. Define an iommu_vevent_arm_smmuv3 uAPI structure, and report stage-1 events in the threaded IRQ handler. Also, add another four event record types that can be forwarded to a VM. Link: https://patch.msgid.link/r/5cf6719682fdfdabffdb08374cdf31ad2466d75a.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Acked-by: Will Deacon Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 17 ++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 58 +++++++++++-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++ include/uapi/linux/iommufd.h | 23 ++++++++ 4 files changed, 80 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index dfc80e1a8e2e..65adfed56969 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -433,4 +433,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, return &vsmmu->core; } +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) +{ + struct iommu_vevent_arm_smmuv3 vevt; + int i; + + lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex); + + vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) | + FIELD_PREP(EVTQ_0_SID, vmaster->vsid)); + for (i = 1; i < EVTQ_ENT_DWORDS; i++) + vevt.evt[i] = cpu_to_le64(evt[i]); + + return iommufd_viommu_report_event(&vmaster->vsmmu->core, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt, + sizeof(vevt)); +} + MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 964d2cf27d3d..5fa817a8f5f1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1813,8 +1813,8 @@ static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, mutex_unlock(&smmu->streams_mutex); } -static int arm_smmu_handle_event(struct arm_smmu_device *smmu, - struct arm_smmu_event *event) +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; @@ -1823,6 +1823,10 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, struct iommu_fault *flt = &fault_evt.fault; switch (event->id) { + case EVT_ID_BAD_STE_CONFIG: + case EVT_ID_STREAM_DISABLED_FAULT: + case EVT_ID_BAD_SUBSTREAMID_CONFIG: + case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1832,31 +1836,30 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, return -EOPNOTSUPP; } - if (!event->stall) - return -EOPNOTSUPP; + if (event->stall) { + if (event->read) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; - if (event->read) - perm |= IOMMU_FAULT_PERM_READ; - else - perm |= IOMMU_FAULT_PERM_WRITE; + if (event->instruction) + perm |= IOMMU_FAULT_PERM_EXEC; - if (event->instruction) - perm |= IOMMU_FAULT_PERM_EXEC; + if (event->privileged) + perm |= IOMMU_FAULT_PERM_PRIV; - if (event->privileged) - perm |= IOMMU_FAULT_PERM_PRIV; + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request){ + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = event->stag, + .perm = perm, + .addr = event->iova, + }; - flt->type = IOMMU_FAULT_PAGE_REQ; - flt->prm = (struct iommu_fault_page_request) { - .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = event->stag, - .perm = perm, - .addr = event->iova, - }; - - if (event->ssv) { - flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = event->ssid; + if (event->ssv) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = event->ssid; + } } mutex_lock(&smmu->streams_mutex); @@ -1866,7 +1869,12 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, goto out_unlock; } - ret = iommu_report_device_fault(master->dev, &fault_evt); + if (event->stall) + ret = iommu_report_device_fault(master->dev, &fault_evt); + else if (master->vmaster && !event->s2) + ret = arm_vmaster_report_event(master->vmaster, evt); + else + ret = -EOPNOTSUPP; /* Unhandled events should be pinned */ out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; @@ -1944,7 +1952,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) do { while (!queue_remove_raw(q, evt)) { arm_smmu_decode_event(smmu, evt, &event); - if (arm_smmu_handle_event(smmu, &event)) + if (arm_smmu_handle_event(smmu, evt, &event)) arm_smmu_dump_event(smmu, evt, &event, &rs); put_device(event.dev); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 557b300a3a0f..df06076a1698 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -1066,6 +1066,7 @@ int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, struct arm_smmu_nested_domain *nested_domain); void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt); #else #define arm_smmu_hw_info NULL #define arm_vsmmu_alloc NULL @@ -1086,6 +1087,12 @@ static inline void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) { } + +static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, + u64 *evt) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index dbb8787d9c63..8719d4f5d618 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1054,9 +1054,32 @@ struct iommufd_vevent_header { /** * enum iommu_veventq_type - Virtual Event Queue Type * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue */ enum iommu_veventq_type { IOMMU_VEVENTQ_TYPE_DEFAULT = 0, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, +}; + +/** + * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event + * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) + * @evt: 256-bit ARM SMMUv3 Event record, little-endian. + * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) + * - 0x04 C_BAD_STE + * - 0x06 F_STREAM_DISABLED + * - 0x08 C_BAD_SUBSTREAMID + * - 0x0a C_BAD_CD + * - 0x10 F_TRANSLATION + * - 0x11 F_ADDR_SIZE + * - 0x12 F_ACCESS + * - 0x13 F_PERMISSION + * + * StreamID field reports a virtual device ID. To receive a virtual event for a + * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. + */ +struct iommu_vevent_arm_smmuv3 { + __aligned_le64 evt[4]; }; /** From da0c56520e880441d0503d0cf0d6853dcfb5f1a4 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:32 -0700 Subject: [PATCH 1380/2157] iommu/arm-smmu-v3: Set MEV bit in nested STE for DoS mitigations There is a DoS concern on the shared hardware event queue among devices passed through to VMs, that too many translation failures that belong to VMs could overflow the shared hardware event queue if those VMs or their VMMs don't handle/recover the devices properly. The MEV bit in the STE allows to configure the SMMU HW to merge similar event records, though there is no guarantee. Set it in a nested STE for DoS mitigations. In the future, we might want to enable the MEV for non-nested cases too such as domain->type == IOMMU_DOMAIN_UNMANAGED or even IOMMU_DOMAIN_DMA. Link: https://patch.msgid.link/r/8ed12feef67fc65273d0f5925f401a81f56acebe.1741719725.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Acked-by: Will Deacon Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 2 ++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 ++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 65adfed56969..e4fd8d522af8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -43,6 +43,8 @@ static void arm_smmu_make_nested_cd_table_ste( target->data[0] |= nested_domain->ste[0] & ~cpu_to_le64(STRTAB_STE_0_CFG); target->data[1] |= nested_domain->ste[1]; + /* Merge events for DoS mitigations on eventq */ + target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV); } /* diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5fa817a8f5f1..b4c21aaed126 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1052,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | - STRTAB_STE_1_EATS); + STRTAB_STE_1_EATS | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); /* @@ -1068,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) if (cfg & BIT(1)) { used_bits[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | - STRTAB_STE_1_SHCFG); + STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index df06076a1698..dd1ad56ce863 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -266,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_MEV (1UL << 19) #define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) From aff12700b8dd7422bfe2277696e192af4df9de8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Feb 2025 09:57:25 +0100 Subject: [PATCH 1381/2157] ntb: reduce stack usage in idt_scan_mws idt_scan_mws() puts a large fixed-size array on the stack and copies it into a smaller dynamically allocated array at the end. On 32-bit targets, the fixed size can easily exceed the warning limit for possible stack overflow: drivers/ntb/hw/idt/ntb_hw_idt.c:1041:27: error: stack frame size (1032) exceeds limit (1024) in 'idt_scan_mws' [-Werror,-Wframe-larger-than] Change it to instead just always use dynamic allocation for the array from the start. It's too big for the stack, but not actually all that much for a permanent allocation. Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202205111109.PiKTruEj-lkp@intel.com/ Signed-off-by: Arnd Bergmann Reviewed-by: Dave Jiang Reviewed-by: Damien Le Moal Signed-off-by: Jon Mason --- drivers/ntb/hw/idt/ntb_hw_idt.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 544d8a4d2af5..f27df8d7f3b9 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -1041,7 +1041,7 @@ static inline char *idt_get_mw_name(enum idt_mw_type mw_type) static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, unsigned char *mw_cnt) { - struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws; + struct idt_mw_cfg *mws; const struct idt_ntb_bar *bars; enum idt_mw_type mw_type; unsigned char widx, bidx, en_cnt; @@ -1049,6 +1049,11 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, int aprt_size; u32 data; + mws = devm_kcalloc(&ndev->ntb.pdev->dev, IDT_MAX_NR_MWS, + sizeof(*mws), GFP_KERNEL); + if (!mws) + return ERR_PTR(-ENOMEM); + /* Retrieve the array of the BARs registers */ bars = portdata_tbl[port].bars; @@ -1103,16 +1108,7 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, } } - /* Allocate memory for memory window descriptors */ - ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws), - GFP_KERNEL); - if (!ret_mws) - return ERR_PTR(-ENOMEM); - - /* Copy the info of detected memory windows */ - memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws)); - - return ret_mws; + return mws; } /* From bf8a7ce7e4c7267a6f5f2b2023cfc459b330b25e Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 12 Mar 2025 20:02:16 +0530 Subject: [PATCH 1382/2157] ntb_hw_amd: Add NTB PCI ID for new gen CPU Add NTB support for new generation of processor. Signed-off-by: Basavaraj Natikar Signed-off-by: Jon Mason --- drivers/ntb/hw/amd/ntb_hw_amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index d687e8c2cc78..63ceed89b62e 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1318,6 +1318,7 @@ static const struct pci_device_id amd_ntb_pci_tbl[] = { { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] }, + { PCI_VDEVICE(AMD, 0x155a), (kernel_ulong_t)&dev_data[1] }, { PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] }, { 0, } }; From 517ec053eeb48f4fe96271b32b3df9c8e269a29c Mon Sep 17 00:00:00 2001 From: Aryan Srivastava Date: Thu, 10 Oct 2024 15:53:15 +1300 Subject: [PATCH 1383/2157] i2c: octeon: refactor common i2c operations Refactor the current implementation of the high-level composite read and write operations in preparation of the addition of block-mode read/write operations. The sending of the i2c command is generic and will apply for both the block-mode and non-block-mode ops. Extract this from the current hlc ops, and place into a generic function, octeon_i2c_hlc_cmd_send. The considerations made for extended addresses in the command construction are almost common for all cases, extract these into octeon_i2c_hlc_ext. There is one difference between the extended read and write cases. When performing extended read or writes the SW_TWSI_EXT must be written with an extended internal address, but the data field is only filled in the write case (read back in read case). This results in the original code block for the read case immediately writing this register, while the write case fills in any data and then writes the register. To create a common block of code for both processes remove the SW_TWSI_EXT write from within the code block and instead in it's place a variable is set, set_ext, which is returned and used as a condition to do the register write, in the read command case. There are parts of the commands construction which are common (only in the read case), extract this and place into generic function octeon_i2c_hlc_read_cmd. This function also reads the return from octeon_i2c_hlc_ext and completes the write to SW_TWSI_EXT if required. The write commands cannot be made entirely into common code as there are distinct differences in the block mode and non-block-mode process. Particularly the writing of data into the buffer. Signed-off-by: Aryan Srivastava Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20241010025317.2040470-2-aryan.srivastava@alliedtelesis.co.nz --- drivers/i2c/busses/i2c-octeon-core.c | 86 ++++++++++++++++------------ 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index 16cc34a0526e..3fbc828508ab 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -498,6 +498,50 @@ static int octeon_i2c_hlc_write(struct octeon_i2c *i2c, struct i2c_msg *msgs) return ret; } +/* Process hlc transaction */ +static int octeon_i2c_hlc_cmd_send(struct octeon_i2c *i2c, u64 cmd) +{ + octeon_i2c_hlc_int_clear(i2c); + octeon_i2c_writeq_flush(cmd, i2c->twsi_base + OCTEON_REG_SW_TWSI(i2c)); + + return octeon_i2c_hlc_wait(i2c); +} + +/* Generic consideration for extended internal addresses in i2c hlc r/w ops */ +static bool octeon_i2c_hlc_ext(struct octeon_i2c *i2c, struct i2c_msg msg, u64 *cmd_in, u64 *ext) +{ + bool set_ext = false; + u64 cmd = 0; + + if (msg.flags & I2C_M_TEN) + cmd |= SW_TWSI_OP_10_IA; + else + cmd |= SW_TWSI_OP_7_IA; + + if (msg.len == 2) { + cmd |= SW_TWSI_EIA; + *ext = (u64)msg.buf[0] << SW_TWSI_IA_SHIFT; + cmd |= (u64)msg.buf[1] << SW_TWSI_IA_SHIFT; + set_ext = true; + } else { + cmd |= (u64)msg.buf[0] << SW_TWSI_IA_SHIFT; + } + + *cmd_in |= cmd; + return set_ext; +} + +/* Construct and send i2c transaction core cmd for read ops */ +static int octeon_i2c_hlc_read_cmd(struct octeon_i2c *i2c, struct i2c_msg msg, u64 cmd) +{ + u64 ext = 0; + + if (octeon_i2c_hlc_ext(i2c, msg, &cmd, &ext)) + octeon_i2c_writeq_flush(ext, i2c->twsi_base + OCTEON_REG_SW_TWSI_EXT(i2c)); + + return octeon_i2c_hlc_cmd_send(i2c, cmd); +} + /* high-level-controller composite write+read, msg0=addr, msg1=data */ static int octeon_i2c_hlc_comp_read(struct octeon_i2c *i2c, struct i2c_msg *msgs) { @@ -512,26 +556,8 @@ static int octeon_i2c_hlc_comp_read(struct octeon_i2c *i2c, struct i2c_msg *msgs /* A */ cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT; - if (msgs[0].flags & I2C_M_TEN) - cmd |= SW_TWSI_OP_10_IA; - else - cmd |= SW_TWSI_OP_7_IA; - - if (msgs[0].len == 2) { - u64 ext = 0; - - cmd |= SW_TWSI_EIA; - ext = (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT; - cmd |= (u64)msgs[0].buf[1] << SW_TWSI_IA_SHIFT; - octeon_i2c_writeq_flush(ext, i2c->twsi_base + OCTEON_REG_SW_TWSI_EXT(i2c)); - } else { - cmd |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT; - } - - octeon_i2c_hlc_int_clear(i2c); - octeon_i2c_writeq_flush(cmd, i2c->twsi_base + OCTEON_REG_SW_TWSI(i2c)); - - ret = octeon_i2c_hlc_wait(i2c); + /* Send core command */ + ret = octeon_i2c_hlc_read_cmd(i2c, msgs[0], cmd); if (ret) goto err; @@ -567,19 +593,8 @@ static int octeon_i2c_hlc_comp_write(struct octeon_i2c *i2c, struct i2c_msg *msg /* A */ cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT; - if (msgs[0].flags & I2C_M_TEN) - cmd |= SW_TWSI_OP_10_IA; - else - cmd |= SW_TWSI_OP_7_IA; - - if (msgs[0].len == 2) { - cmd |= SW_TWSI_EIA; - ext |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT; - set_ext = true; - cmd |= (u64)msgs[0].buf[1] << SW_TWSI_IA_SHIFT; - } else { - cmd |= (u64)msgs[0].buf[0] << SW_TWSI_IA_SHIFT; - } + /* Set parameters for extended message (if required) */ + set_ext = octeon_i2c_hlc_ext(i2c, msgs[0], &cmd, &ext); for (i = 0, j = msgs[1].len - 1; i < msgs[1].len && i < 4; i++, j--) cmd |= (u64)msgs[1].buf[j] << (8 * i); @@ -592,10 +607,7 @@ static int octeon_i2c_hlc_comp_write(struct octeon_i2c *i2c, struct i2c_msg *msg if (set_ext) octeon_i2c_writeq_flush(ext, i2c->twsi_base + OCTEON_REG_SW_TWSI_EXT(i2c)); - octeon_i2c_hlc_int_clear(i2c); - octeon_i2c_writeq_flush(cmd, i2c->twsi_base + OCTEON_REG_SW_TWSI(i2c)); - - ret = octeon_i2c_hlc_wait(i2c); + ret = octeon_i2c_hlc_cmd_send(i2c, cmd); if (ret) goto err; From c6d859cf287ed78f09b112815dbad94c850e39af Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Tue, 4 Feb 2025 02:03:32 +0530 Subject: [PATCH 1384/2157] dt-bindings: i2c: samsung,s3c2410: add exynos7870-i2c compatible Exynos7870's (non-HS) I2C controllers are entirely compatible with samsung,s3c2440-i2c. Document Exynos7870's compatible string appropriately. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250204-exynos7870-i2c-v1-1-63d67871ab7e@disroot.org --- Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml b/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml index bbc568485627..6ba7d793504c 100644 --- a/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml @@ -22,6 +22,7 @@ properties: - samsung,exynos5-sata-phy-i2c - items: - enum: + - samsung,exynos7870-i2c - samsung,exynos7885-i2c - samsung,exynos850-i2c - const: samsung,s3c2440-i2c From fad3d2e3014957d99e4616bc1a698b8d719614b3 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Tue, 4 Feb 2025 02:03:33 +0530 Subject: [PATCH 1385/2157] dt-bindings: i2c: exynos5: add exynos7870-hsi2c compatible Exynos7870's HS-I2C controllers are entirely compatible with samsung,exynos7-hsi2c. Document Exynos7870's HS-I2C compatible string appropriately. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250204-exynos7870-i2c-v1-2-63d67871ab7e@disroot.org --- Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml index 70cc2ee9ee27..8d47b290b4ed 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml @@ -30,6 +30,7 @@ properties: - items: - enum: - samsung,exynos5433-hsi2c + - samsung,exynos7870-hsi2c - tesla,fsd-hsi2c - const: samsung,exynos7-hsi2c - items: From be7113d2e2a6f20cbee99c98d261a1fd6fd7b549 Mon Sep 17 00:00:00 2001 From: Vitalii Mordan Date: Wed, 12 Feb 2025 20:28:03 +0300 Subject: [PATCH 1386/2157] i2c: pxa: fix call balance of i2c->clk handling routines If the clock i2c->clk was not enabled in i2c_pxa_probe(), it should not be disabled in any path. Found by Linux Verification Center (linuxtesting.org) with Klever. Signed-off-by: Vitalii Mordan Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250212172803.1422136-1-mordan@ispras.ru --- drivers/i2c/busses/i2c-pxa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index cb6988482673..4415a29f749b 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -1503,7 +1503,10 @@ static int i2c_pxa_probe(struct platform_device *dev) i2c->adap.name); } - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return dev_err_probe(&dev->dev, ret, + "failed to enable clock\n"); if (i2c->use_pio) { i2c->adap.algo = &i2c_pxa_pio_algorithm; From 8f95d1da03e9cc3797038538369518ad685a7748 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 17 Feb 2025 18:17:08 +0530 Subject: [PATCH 1387/2157] i2c: amd: Switch to guard(mutex) Instead of using the 'goto label; mutex_unlock()' pattern use 'guard(mutex)' which will release the mutex when it goes out of scope. Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250217124709.3121848-1-Shyam-sundar.S-k@amd.com --- drivers/i2c/busses/i2c-designware-amdpsp.c | 26 ++++++++-------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-amdpsp.c b/drivers/i2c/busses/i2c-designware-amdpsp.c index 8fbd2a10c31a..404571ad61a8 100644 --- a/drivers/i2c/busses/i2c-designware-amdpsp.c +++ b/drivers/i2c/busses/i2c-designware-amdpsp.c @@ -151,19 +151,16 @@ static void release_bus(void) static void psp_release_i2c_bus_deferred(struct work_struct *work) { - mutex_lock(&psp_i2c_access_mutex); + guard(mutex)(&psp_i2c_access_mutex); /* * If there is any pending transaction, cannot release the bus here. * psp_release_i2c_bus() will take care of this later. */ if (psp_i2c_access_count) - goto cleanup; + return; release_bus(); - -cleanup: - mutex_unlock(&psp_i2c_access_mutex); } static DECLARE_DELAYED_WORK(release_queue, psp_release_i2c_bus_deferred); @@ -171,11 +168,11 @@ static int psp_acquire_i2c_bus(void) { int status; - mutex_lock(&psp_i2c_access_mutex); + guard(mutex)(&psp_i2c_access_mutex); /* Return early if mailbox malfunctioned */ if (psp_i2c_mbox_fail) - goto cleanup; + return 0; psp_i2c_access_count++; @@ -184,11 +181,11 @@ static int psp_acquire_i2c_bus(void) * reservation period. */ if (psp_i2c_sem_acquired) - goto cleanup; + return 0; status = psp_send_i2c_req(PSP_I2C_REQ_ACQUIRE); if (status) - goto cleanup; + return 0; psp_i2c_sem_acquired = jiffies; @@ -201,18 +198,16 @@ static int psp_acquire_i2c_bus(void) * communication with PSP. At any case i2c bus is granted to the caller, * thus always return success. */ -cleanup: - mutex_unlock(&psp_i2c_access_mutex); return 0; } static void psp_release_i2c_bus(void) { - mutex_lock(&psp_i2c_access_mutex); + guard(mutex)(&psp_i2c_access_mutex); /* Return early if mailbox was malfunctioned */ if (psp_i2c_mbox_fail) - goto cleanup; + return; /* * If we are last owner of PSP semaphore, need to release arbitration @@ -220,7 +215,7 @@ static void psp_release_i2c_bus(void) */ psp_i2c_access_count--; if (psp_i2c_access_count) - goto cleanup; + return; /* * Send a release command to PSP if the semaphore reservation timeout @@ -228,9 +223,6 @@ static void psp_release_i2c_bus(void) */ if (!delayed_work_pending(&release_queue)) release_bus(); - -cleanup: - mutex_unlock(&psp_i2c_access_mutex); } /* From a71248d96662da5f5d0e276776d1679864ad1bf2 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 17 Feb 2025 18:17:09 +0530 Subject: [PATCH 1388/2157] i2c: dw: Update the master_xfer callback name In light of the recent updates to the i2c subsystem, ensure to use the correct callback names. Specifically, replace '.master_xfer' with '.xfer'. Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250217124709.3121848-2-Shyam-sundar.S-k@amd.com --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 2569bf1a72e0..c5394229b77f 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -907,7 +907,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } static const struct i2c_algorithm i2c_dw_algo = { - .master_xfer = i2c_dw_xfer, + .xfer = i2c_dw_xfer, .functionality = i2c_dw_func, }; From 1505986abf18c40003ebc6d2357454e05b927a7e Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 17 Feb 2025 14:32:56 +0530 Subject: [PATCH 1389/2157] i2c: amd-asf: Modify callbacks of i2c_algorithm to align with the latest revision Adjust the i2c_algorithm callbacks to be consistent with the most recent revision by updating the callback names from master_xfer, reg_slave, and unreg_slave to the current naming convention: xfer, reg_target, and unreg_target. Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250217090258.398540-1-Shyam-sundar.S-k@amd.com --- drivers/i2c/busses/i2c-amd-asf-plat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd-asf-plat.c b/drivers/i2c/busses/i2c-amd-asf-plat.c index 93ebec162c6d..ca0fb46b73bd 100644 --- a/drivers/i2c/busses/i2c-amd-asf-plat.c +++ b/drivers/i2c/busses/i2c-amd-asf-plat.c @@ -272,9 +272,9 @@ static u32 amd_asf_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm amd_asf_smbus_algorithm = { - .master_xfer = amd_asf_xfer, - .reg_slave = amd_asf_reg_target, - .unreg_slave = amd_asf_unreg_target, + .xfer = amd_asf_xfer, + .reg_target = amd_asf_reg_target, + .unreg_target = amd_asf_unreg_target, .functionality = amd_asf_func, }; From b719afaa1e5d88a1b51d76adf344ff4a48efdb45 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 17 Feb 2025 14:32:57 +0530 Subject: [PATCH 1390/2157] i2c: amd-asf: Set cmd variable when encountering an error In the event of ASF error during the transfer, update the cmd and exit the process, as data processing is not performed when a command fails. Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250217090258.398540-2-Shyam-sundar.S-k@amd.com --- drivers/i2c/busses/i2c-amd-asf-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-amd-asf-plat.c b/drivers/i2c/busses/i2c-amd-asf-plat.c index ca0fb46b73bd..ca45f0f23321 100644 --- a/drivers/i2c/busses/i2c-amd-asf-plat.c +++ b/drivers/i2c/busses/i2c-amd-asf-plat.c @@ -69,7 +69,7 @@ static void amd_asf_process_target(struct work_struct *work) /* Check if no error bits are set in target status register */ if (reg & ASF_ERROR_STATUS) { /* Set bank as full */ - cmd = 0; + cmd = 1; reg |= GENMASK(3, 2); outb_p(reg, ASFDATABNKSEL); } else { From 1a64b21282ddc4f7ec8aeb8195c20cf15bd32525 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 17:36:56 +0100 Subject: [PATCH 1391/2157] i2c: mux: remove incorrect of_match_ptr annotations Building with W=1 shows a warning about ltc4306_of_match and i2c_mux_reg_of_match being unused when CONFIG_OF is disabled: drivers/i2c/muxes/i2c-mux-ltc4306.c:200:34: error: unused variable 'ltc4306_of_match' [-Werror,-Wunused-const-variable] drivers/i2c/muxes/i2c-mux-reg.c:242:34: error: unused variable 'i2c_mux_reg_of_match' [-Werror,-Wunused-const-variable] Acked-by: Peter Rosin Signed-off-by: Arnd Bergmann Acked-by: Michael Hennerich Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250225163700.4169480-1-arnd@kernel.org --- drivers/i2c/muxes/i2c-mux-ltc4306.c | 2 +- drivers/i2c/muxes/i2c-mux-reg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-ltc4306.c b/drivers/i2c/muxes/i2c-mux-ltc4306.c index 19a7c370946d..8a87f19bf5d5 100644 --- a/drivers/i2c/muxes/i2c-mux-ltc4306.c +++ b/drivers/i2c/muxes/i2c-mux-ltc4306.c @@ -303,7 +303,7 @@ static void ltc4306_remove(struct i2c_client *client) static struct i2c_driver ltc4306_driver = { .driver = { .name = "ltc4306", - .of_match_table = of_match_ptr(ltc4306_of_match), + .of_match_table = ltc4306_of_match, }, .probe = ltc4306_probe, .remove = ltc4306_remove, diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index dfa472d514cc..1e566ea92bc9 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -250,7 +250,7 @@ static struct platform_driver i2c_mux_reg_driver = { .remove = i2c_mux_reg_remove, .driver = { .name = "i2c-mux-reg", - .of_match_table = of_match_ptr(i2c_mux_reg_of_match), + .of_match_table = i2c_mux_reg_of_match, }, }; From 48277423e533e9fc3343cf192f05045fc09bdedc Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 28 Nov 2023 10:48:35 +0100 Subject: [PATCH 1392/2157] dt-bindings: i2c: qcom,i2c-qup: Document power-domains Similar to qcom,geni-i2c, for i2c-qup we need to vote for performance states on the VDDCX power domain to ensure that required clock rates can be generated correctly. I2C is typically used with a fixed clock rate, so a single required-opp is sufficient without a full OPP table (unlike spi-qup for example). Signed-off-by: Stephan Gerhold Reviewed-by: Rob Herring Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20231128-i2c-qup-dvfs-v1-1-59a0e3039111@kernkonzept.com --- Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml index f43947514d48..fc3077a7af0d 100644 --- a/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml +++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml @@ -52,9 +52,15 @@ properties: - const: default - const: sleep + power-domains: + maxItems: 1 + reg: maxItems: 1 + required-opps: + maxItems: 1 + required: - compatible - clock-names @@ -68,6 +74,7 @@ examples: - | #include #include + #include i2c@c175000 { compatible = "qcom,i2c-qup-v2.2.1"; @@ -82,6 +89,8 @@ examples: pinctrl-names = "default", "sleep"; pinctrl-0 = <&blsp1_i2c1_default>; pinctrl-1 = <&blsp1_i2c1_sleep>; + power-domains = <&rpmpd MSM8909_VDDCX>; + required-opps = <&rpmpd_opp_svs_krait>; clock-frequency = <400000>; #address-cells = <1>; From d15971447f1a503bd30be618e4f650724874e2ad Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 28 Nov 2023 10:48:36 +0100 Subject: [PATCH 1393/2157] dt-bindings: i2c: qup: Document interconnects When the I2C QUP controller is used together with a DMA engine it needs to vote for the interconnect path to the DRAM. Otherwise it may be unable to access the memory quickly enough. Signed-off-by: Stephan Gerhold Reviewed-by: Rob Herring Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20231128-i2c-qup-dvfs-v1-2-59a0e3039111@kernkonzept.com --- Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml index fc3077a7af0d..758d8f6321e1 100644 --- a/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml +++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml @@ -40,6 +40,9 @@ properties: - const: tx - const: rx + interconnects: + maxItems: 1 + interrupts: maxItems: 1 @@ -73,6 +76,7 @@ unevaluatedProperties: false examples: - | #include + #include #include #include @@ -91,6 +95,7 @@ examples: pinctrl-1 = <&blsp1_i2c1_sleep>; power-domains = <&rpmpd MSM8909_VDDCX>; required-opps = <&rpmpd_opp_svs_krait>; + interconnects = <&pnoc MASTER_BLSP_1 &bimc SLAVE_EBI_CH0>; clock-frequency = <400000>; #address-cells = <1>; From d4f35233a6345f62637463ef6e0708f44ffaa583 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 28 Nov 2023 10:48:37 +0100 Subject: [PATCH 1394/2157] i2c: qup: Vote for interconnect bandwidth to DRAM When the I2C QUP controller is used together with a DMA engine it needs to vote for the interconnect path to the DRAM. Otherwise it may be unable to access the memory quickly enough. The requested peak bandwidth is dependent on the I2C core clock. To avoid sending votes too often the bandwidth is always requested when a DMA transfer starts, but dropped only on runtime suspend. Runtime suspend should only happen if no transfer is active. After resumption we can defer the next vote until the first DMA transfer actually happens. The implementation is largely identical to the one introduced for spi-qup in commit ecdaa9473019 ("spi: qup: Vote for interconnect bandwidth to DRAM") since both drivers represent the same hardware block. Signed-off-by: Stephan Gerhold Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20231128-i2c-qup-dvfs-v1-3-59a0e3039111@kernkonzept.com --- drivers/i2c/busses/i2c-qup.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index da20b4487c9a..3a36d682ed57 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,8 @@ /* TAG length for DATA READ in RX FIFO */ #define READ_RX_TAGS_LEN 2 +#define QUP_BUS_WIDTH 8 + static unsigned int scl_freq; module_param_named(scl_freq, scl_freq, uint, 0444); MODULE_PARM_DESC(scl_freq, "SCL frequency override"); @@ -227,6 +230,7 @@ struct qup_i2c_dev { int irq; struct clk *clk; struct clk *pclk; + struct icc_path *icc_path; struct i2c_adapter adap; int clk_ctl; @@ -255,6 +259,10 @@ struct qup_i2c_dev { /* To configure when bus is in run state */ u32 config_run; + /* bandwidth votes */ + u32 src_clk_freq; + u32 cur_bw_clk_freq; + /* dma parameters */ bool is_dma; /* To check if the current transfer is using DMA */ @@ -453,6 +461,23 @@ static int qup_i2c_bus_active(struct qup_i2c_dev *qup, int len) return ret; } +static int qup_i2c_vote_bw(struct qup_i2c_dev *qup, u32 clk_freq) +{ + u32 needed_peak_bw; + int ret; + + if (qup->cur_bw_clk_freq == clk_freq) + return 0; + + needed_peak_bw = Bps_to_icc(clk_freq * QUP_BUS_WIDTH); + ret = icc_set_bw(qup->icc_path, 0, needed_peak_bw); + if (ret) + return ret; + + qup->cur_bw_clk_freq = clk_freq; + return 0; +} + static void qup_i2c_write_tx_fifo_v1(struct qup_i2c_dev *qup) { struct qup_i2c_block *blk = &qup->blk; @@ -838,6 +863,10 @@ static int qup_i2c_bam_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int ret = 0; int idx = 0; + ret = qup_i2c_vote_bw(qup, qup->src_clk_freq); + if (ret) + return ret; + enable_irq(qup->irq); ret = qup_i2c_req_dma(qup); @@ -1643,6 +1672,7 @@ static void qup_i2c_disable_clocks(struct qup_i2c_dev *qup) config = readl(qup->base + QUP_CONFIG); config |= QUP_CLOCK_AUTO_GATE; writel(config, qup->base + QUP_CONFIG); + qup_i2c_vote_bw(qup, 0); clk_disable_unprepare(qup->pclk); } @@ -1743,6 +1773,11 @@ static int qup_i2c_probe(struct platform_device *pdev) goto fail_dma; } qup->is_dma = true; + + qup->icc_path = devm_of_icc_get(&pdev->dev, NULL); + if (IS_ERR(qup->icc_path)) + return dev_err_probe(&pdev->dev, PTR_ERR(qup->icc_path), + "failed to get interconnect path\n"); } nodma: @@ -1791,6 +1826,7 @@ static int qup_i2c_probe(struct platform_device *pdev) qup_i2c_enable_clocks(qup); src_clk_freq = clk_get_rate(qup->clk); } + qup->src_clk_freq = src_clk_freq; /* * Bootloaders might leave a pending interrupt on certain QUP's, From e794dc30be8b69e44646a162db09b43f719525b7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:15 +0200 Subject: [PATCH 1395/2157] i2c: Introduce i2c_10bit_addr_*_from_msg() helpers There are already a lot of drivers that have been using i2c_8bit_addr_from_msg() for 7-bit addresses, now it's time to have the similar for 10-bit addresses. Signed-off-by: Andy Shevchenko Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250213141045.2716943-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- include/linux/i2c.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 2b2af24d2a43..1e35b1e23165 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -952,6 +952,21 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) return (msg->addr << 1) | (msg->flags & I2C_M_RD); } +/* + * 10-bit address + * addr_1: 5'b11110 | addr[9:8] | (R/nW) + * addr_2: addr[7:0] + */ +static inline u8 i2c_10bit_addr_hi_from_msg(const struct i2c_msg *msg) +{ + return 0xf0 | ((msg->addr & GENMASK(9, 8)) >> 7) | (msg->flags & I2C_M_RD); +} + +static inline u8 i2c_10bit_addr_lo_from_msg(const struct i2c_msg *msg) +{ + return msg->addr & GENMASK(7, 0); +} + u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); From 6af58d3ec736173a677ba56e579f9b01e38ef2f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:16 +0200 Subject: [PATCH 1396/2157] i2c: axxia: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-axxia.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 48916cf45ff7..50030256cd85 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -255,11 +255,6 @@ static int i2c_m_rd(const struct i2c_msg *msg) return (msg->flags & I2C_M_RD) != 0; } -static int i2c_m_ten(const struct i2c_msg *msg) -{ - return (msg->flags & I2C_M_TEN) != 0; -} - static int i2c_m_recv_len(const struct i2c_msg *msg) { return (msg->flags & I2C_M_RECV_LEN) != 0; @@ -439,20 +434,10 @@ static void axxia_i2c_set_addr(struct axxia_i2c_dev *idev, struct i2c_msg *msg) { u32 addr_1, addr_2; - if (i2c_m_ten(msg)) { - /* 10-bit address - * addr_1: 5'b11110 | addr[9:8] | (R/nW) - * addr_2: addr[7:0] - */ - addr_1 = 0xF0 | ((msg->addr >> 7) & 0x06); - if (i2c_m_rd(msg)) - addr_1 |= 1; /* Set the R/nW bit of the address */ - addr_2 = msg->addr & 0xFF; + if (msg->flags & I2C_M_TEN) { + addr_1 = i2c_10bit_addr_hi_from_msg(msg); + addr_2 = i2c_10bit_addr_lo_from_msg(msg); } else { - /* 7-bit address - * addr_1: addr[6:0] | (R/nW) - * addr_2: dont care - */ addr_1 = i2c_8bit_addr_from_msg(msg); addr_2 = 0; } From eaa0df0de963852ffa19dc6ddb765b9a9e169b63 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:17 +0200 Subject: [PATCH 1397/2157] i2c: bcm-kona: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-bcm-kona.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c index 340fe1305dd9..9d8838bbd938 100644 --- a/drivers/i2c/busses/i2c-bcm-kona.c +++ b/drivers/i2c/busses/i2c-bcm-kona.c @@ -471,12 +471,12 @@ static int bcm_kona_i2c_do_addr(struct bcm_kona_i2c_dev *dev, if (msg->flags & I2C_M_TEN) { /* First byte is 11110XX0 where XX is upper 2 bits */ - addr = 0xF0 | ((msg->addr & 0x300) >> 7); + addr = i2c_10bit_addr_hi_from_msg(msg) & ~I2C_M_RD; if (bcm_kona_i2c_write_byte(dev, addr, 0) < 0) return -EREMOTEIO; /* Second byte is the remaining 8 bits */ - addr = msg->addr & 0xFF; + addr = i2c_10bit_addr_lo_from_msg(msg); if (bcm_kona_i2c_write_byte(dev, addr, 0) < 0) return -EREMOTEIO; @@ -486,7 +486,7 @@ static int bcm_kona_i2c_do_addr(struct bcm_kona_i2c_dev *dev, return -EREMOTEIO; /* Then re-send the first byte with the read bit set */ - addr = 0xF0 | ((msg->addr & 0x300) >> 7) | 0x01; + addr = i2c_10bit_addr_hi_from_msg(msg); if (bcm_kona_i2c_write_byte(dev, addr, 0) < 0) return -EREMOTEIO; } From 3bf45fb5707818cff81997dd03bfbd9c1b3428f8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:18 +0200 Subject: [PATCH 1398/2157] i2c: brcmstb: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-5-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-brcmstb.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 00f1a046e985..5fa30e8926c5 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -414,23 +414,22 @@ static int brcmstb_i2c_do_addr(struct brcmstb_i2c_dev *dev, if (msg->flags & I2C_M_TEN) { /* First byte is 11110XX0 where XX is upper 2 bits */ - addr = 0xF0 | ((msg->addr & 0x300) >> 7); + addr = i2c_10bit_addr_hi_from_msg(msg) & ~I2C_M_RD; bsc_writel(dev, addr, chip_address); /* Second byte is the remaining 8 bits */ - addr = msg->addr & 0xFF; + addr = i2c_10bit_addr_lo_from_msg(msg); if (brcmstb_i2c_write_data_byte(dev, &addr, 0) < 0) return -EREMOTEIO; if (msg->flags & I2C_M_RD) { /* For read, send restart without stop condition */ - brcmstb_set_i2c_start_stop(dev, COND_RESTART - | COND_NOSTOP); + brcmstb_set_i2c_start_stop(dev, COND_RESTART | COND_NOSTOP); + /* Then re-send the first byte with the read bit set */ - addr = 0xF0 | ((msg->addr & 0x300) >> 7) | 0x01; + addr = i2c_10bit_addr_hi_from_msg(msg); if (brcmstb_i2c_write_data_byte(dev, &addr, 0) < 0) return -EREMOTEIO; - } } else { addr = i2c_8bit_addr_from_msg(msg); From 3bf28fab4ec5b60970183e99428b6c497a75df21 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:19 +0200 Subject: [PATCH 1399/2157] i2c: eg20t: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-6-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-eg20t.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 4914bfbee2a9..efdaddf99f9e 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -48,8 +48,6 @@ #define BUS_IDLE_TIMEOUT 20 #define PCH_I2CCTL_I2CMEN 0x0080 -#define TEN_BIT_ADDR_DEFAULT 0xF000 -#define TEN_BIT_ADDR_MASK 0xF0 #define PCH_START 0x0020 #define PCH_RESTART 0x0004 #define PCH_ESR_START 0x0001 @@ -58,7 +56,6 @@ #define PCH_ACK 0x0008 #define PCH_GETACK 0x0001 #define CLR_REG 0x0 -#define I2C_RD 0x1 #define I2CMCF_BIT 0x0080 #define I2CMIF_BIT 0x0002 #define I2CMAL_BIT 0x0010 @@ -76,8 +73,6 @@ #define I2CMBB_BIT 0x0020 #define BUFFER_MODE_MASK (I2CBMFI_BIT | I2CBMAL_BIT | I2CBMNA_BIT | \ I2CBMTO_BIT | I2CBMIS_BIT) -#define I2C_ADDR_MSK 0xFF -#define I2C_MSB_2B_MSK 0x300 #define FAST_MODE_CLK 400 #define FAST_MODE_EN 0x0001 #define SUB_ADDR_LEN_MAX 4 @@ -371,16 +366,12 @@ static s32 pch_i2c_writebytes(struct i2c_adapter *i2c_adap, struct i2c_algo_pch_data *adap = i2c_adap->algo_data; u8 *buf; u32 length; - u32 addr; - u32 addr_2_msb; - u32 addr_8_lsb; s32 wrcount; s32 rtn; void __iomem *p = adap->pch_base_address; length = msgs->len; buf = msgs->buf; - addr = msgs->addr; /* enable master tx */ pch_setbit(adap->pch_base_address, PCH_I2CCTL, I2C_TX_MODE); @@ -394,8 +385,7 @@ static s32 pch_i2c_writebytes(struct i2c_adapter *i2c_adap, } if (msgs->flags & I2C_M_TEN) { - addr_2_msb = ((addr & I2C_MSB_2B_MSK) >> 7) & 0x06; - iowrite32(addr_2_msb | TEN_BIT_ADDR_MASK, p + PCH_I2CDR); + iowrite32(i2c_10bit_addr_hi_from_msg(msgs), p + PCH_I2CDR); if (first) pch_i2c_start(adap); @@ -403,8 +393,7 @@ static s32 pch_i2c_writebytes(struct i2c_adapter *i2c_adap, if (rtn) return rtn; - addr_8_lsb = (addr & I2C_ADDR_MSK); - iowrite32(addr_8_lsb, p + PCH_I2CDR); + iowrite32(i2c_10bit_addr_lo_from_msg(msgs), p + PCH_I2CDR); } else { /* set 7 bit slave address and R/W bit as 0 */ iowrite32(i2c_8bit_addr_from_msg(msgs), p + PCH_I2CDR); @@ -490,15 +479,11 @@ static s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, u8 *buf; u32 count; u32 length; - u32 addr; - u32 addr_2_msb; - u32 addr_8_lsb; void __iomem *p = adap->pch_base_address; s32 rtn; length = msgs->len; buf = msgs->buf; - addr = msgs->addr; /* enable master reception */ pch_clrbit(adap->pch_base_address, PCH_I2CCTL, I2C_TX_MODE); @@ -509,8 +494,7 @@ static s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, } if (msgs->flags & I2C_M_TEN) { - addr_2_msb = ((addr & I2C_MSB_2B_MSK) >> 7); - iowrite32(addr_2_msb | TEN_BIT_ADDR_MASK, p + PCH_I2CDR); + iowrite32(i2c_10bit_addr_hi_from_msg(msgs) & ~I2C_M_RD, p + PCH_I2CDR); if (first) pch_i2c_start(adap); @@ -518,8 +502,7 @@ static s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, if (rtn) return rtn; - addr_8_lsb = (addr & I2C_ADDR_MSK); - iowrite32(addr_8_lsb, p + PCH_I2CDR); + iowrite32(i2c_10bit_addr_lo_from_msg(msgs), p + PCH_I2CDR); pch_i2c_restart(adap); @@ -527,8 +510,7 @@ static s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, if (rtn) return rtn; - addr_2_msb |= I2C_RD; - iowrite32(addr_2_msb | TEN_BIT_ADDR_MASK, p + PCH_I2CDR); + iowrite32(i2c_10bit_addr_hi_from_msg(msgs), p + PCH_I2CDR); } else { /* 7 address bits + R/W bit */ iowrite32(i2c_8bit_addr_from_msg(msgs), p + PCH_I2CDR); From ed7f48d3efa0943c0563c1657ae3b24e9ebbf568 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:20 +0200 Subject: [PATCH 1400/2157] i2c: kempld: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-7-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-kempld.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-kempld.c b/drivers/i2c/busses/i2c-kempld.c index 212196af68ba..9b4c7cba62b6 100644 --- a/drivers/i2c/busses/i2c-kempld.c +++ b/drivers/i2c/busses/i2c-kempld.c @@ -115,9 +115,7 @@ static int kempld_i2c_process(struct kempld_i2c_data *i2c) if (i2c->state == STATE_ADDR) { /* 10 bit address? */ if (i2c->msg->flags & I2C_M_TEN) { - addr = 0xf0 | ((i2c->msg->addr >> 7) & 0x6); - /* Set read bit if necessary */ - addr |= (i2c->msg->flags & I2C_M_RD) ? 1 : 0; + addr = i2c_10bit_addr_hi_from_msg(msg); i2c->state = STATE_ADDR10; } else { addr = i2c_8bit_addr_from_msg(i2c->msg); @@ -132,10 +130,12 @@ static int kempld_i2c_process(struct kempld_i2c_data *i2c) /* Second part of 10 bit addressing */ if (i2c->state == STATE_ADDR10) { - kempld_write8(pld, KEMPLD_I2C_DATA, i2c->msg->addr & 0xff); + addr = i2c_10bit_addr_lo_from_msg(msg); + i2c->state = STATE_START; + + kempld_write8(pld, KEMPLD_I2C_DATA, addr); kempld_write8(pld, KEMPLD_I2C_CMD, I2C_CMD_WRITE); - i2c->state = STATE_START; return 0; } From 6cdc8fe0ba423b002029b00daee8ea296abb7494 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:21 +0200 Subject: [PATCH 1401/2157] i2c: mt7621: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-8-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-mt7621.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 2103f21f9ddd..0a288c998419 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -164,22 +164,18 @@ static int mtk_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, /* write address */ if (pmsg->flags & I2C_M_TEN) { /* 10 bits address */ - addr = 0xf0 | ((pmsg->addr >> 7) & 0x06); - addr |= (pmsg->addr & 0xff) << 8; - if (pmsg->flags & I2C_M_RD) - addr |= 1; - iowrite32(addr, i2c->base + REG_SM0D0_REG); - ret = mtk_i2c_cmd(i2c, SM0CTL1_WRITE, 2); - if (ret) - goto err_timeout; + addr = i2c_10bit_addr_hi_from_msg(pmsg); + addr |= i2c_10bit_addr_lo_from_msg(pmsg) << 8; + len = 2; } else { /* 7 bits address */ addr = i2c_8bit_addr_from_msg(pmsg); - iowrite32(addr, i2c->base + REG_SM0D0_REG); - ret = mtk_i2c_cmd(i2c, SM0CTL1_WRITE, 1); - if (ret) - goto err_timeout; + len = 1; } + iowrite32(addr, i2c->base + REG_SM0D0_REG); + ret = mtk_i2c_cmd(i2c, SM0CTL1_WRITE, len); + if (ret) + goto err_timeout; /* check address ACK */ if (!(pmsg->flags & I2C_M_IGNORE_NAK)) { From dbb1c2edb5eab197f9d65fbc1ea0a500643cf1ca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:22 +0200 Subject: [PATCH 1402/2157] i2c: rzv2m: Use i2c_10bit_addr_*_from_msg() helpers Use i2c_10bit_addr_*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-9-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-rzv2m.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-rzv2m.c b/drivers/i2c/busses/i2c-rzv2m.c index 02b76e24a476..53762cc56d28 100644 --- a/drivers/i2c/busses/i2c-rzv2m.c +++ b/drivers/i2c/busses/i2c-rzv2m.c @@ -287,20 +287,15 @@ static int rzv2m_i2c_send_address(struct rzv2m_i2c_priv *priv, int ret; if (msg->flags & I2C_M_TEN) { - /* - * 10-bit address - * addr_1: 5'b11110 | addr[9:8] | (R/nW) - * addr_2: addr[7:0] - */ - addr = 0xf0 | ((msg->addr & GENMASK(9, 8)) >> 7); - addr |= !!(msg->flags & I2C_M_RD); - /* Send 1st address(extend code) */ + /* 10-bit address: Send 1st address(extend code) */ + addr = i2c_10bit_addr_hi_from_msg(msg); ret = rzv2m_i2c_write_with_ack(priv, addr); if (ret) return ret; - /* Send 2nd address */ - ret = rzv2m_i2c_write_with_ack(priv, msg->addr & 0xff); + /* 10-bit address: Send 2nd address */ + addr = i2c_10bit_addr_lo_from_msg(msg); + ret = rzv2m_i2c_write_with_ack(priv, addr); } else { /* 7-bit address */ addr = i2c_8bit_addr_from_msg(msg); From 2ee4415274fadbb24ce7d6a0bd88e1d752b2b553 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:23 +0200 Subject: [PATCH 1403/2157] i2c: ibm_iic: Use i2c_*bit_addr*_from_msg() helpers Use i2c_*bit_addr*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-10-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-ibm_iic.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index c76c4116ddc7..6bf45d752ff9 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -512,19 +512,17 @@ static int iic_xfer_bytes(struct ibm_iic_private* dev, struct i2c_msg* pm, static inline void iic_address(struct ibm_iic_private* dev, struct i2c_msg* msg) { volatile struct iic_regs __iomem *iic = dev->vaddr; - u16 addr = msg->addr; DBG2("%d: iic_address, 0x%03x (%d-bit)\n", dev->idx, - addr, msg->flags & I2C_M_TEN ? 10 : 7); + msg->addr, msg->flags & I2C_M_TEN ? 10 : 7); - if (msg->flags & I2C_M_TEN){ + if (msg->flags & I2C_M_TEN) { out_8(&iic->cntl, CNTL_AMD); - out_8(&iic->lmadr, addr); - out_8(&iic->hmadr, 0xf0 | ((addr >> 7) & 0x06)); - } - else { + out_8(&iic->lmadr, i2c_10bit_addr_lo_from_msg(msg)); + out_8(&iic->hmadr, i2c_10bit_addr_hi_from_msg(msg) & ~I2C_M_RD); + } else { out_8(&iic->cntl, 0); - out_8(&iic->lmadr, addr << 1); + out_8(&iic->lmadr, i2c_8bit_addr_from_msg(msg) & ~I2C_M_RD); } } From f2157d1f7aaad18b95b4f73cc8225e8e8de1f737 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Feb 2025 16:07:24 +0200 Subject: [PATCH 1404/2157] i2c: mv64xxx: Use i2c_*bit_addr*_from_msg() helpers Use i2c_*bit_addr*_from_msg() helpers instead of local copy. No functional change intended. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250213141045.2716943-11-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-mv64xxx.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 874309580c33..8fc26a511320 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -27,7 +27,6 @@ #include #include -#define MV64XXX_I2C_ADDR_ADDR(val) ((val & 0x7f) << 1) #define MV64XXX_I2C_BAUD_DIV_N(val) (val & 0x7) #define MV64XXX_I2C_BAUD_DIV_M(val) ((val & 0xf) << 3) @@ -176,22 +175,17 @@ static void mv64xxx_i2c_prepare_for_io(struct mv64xxx_i2c_data *drv_data, struct i2c_msg *msg) { - u32 dir = 0; - drv_data->cntl_bits = MV64XXX_I2C_REG_CONTROL_ACK | MV64XXX_I2C_REG_CONTROL_TWSIEN; if (!drv_data->atomic) drv_data->cntl_bits |= MV64XXX_I2C_REG_CONTROL_INTEN; - if (msg->flags & I2C_M_RD) - dir = 1; - if (msg->flags & I2C_M_TEN) { - drv_data->addr1 = 0xf0 | (((u32)msg->addr & 0x300) >> 7) | dir; - drv_data->addr2 = (u32)msg->addr & 0xff; + drv_data->addr1 = i2c_10bit_addr_hi_from_msg(msg); + drv_data->addr2 = i2c_10bit_addr_lo_from_msg(msg); } else { - drv_data->addr1 = MV64XXX_I2C_ADDR_ADDR((u32)msg->addr) | dir; + drv_data->addr1 = i2c_8bit_addr_from_msg(msg); drv_data->addr2 = 0; } } From 0d967f12314110013b977cc658816e7038af1f1d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 6 Mar 2025 10:58:15 -0500 Subject: [PATCH 1405/2157] dt-bindings: i2c: imx-lpi2c: add i.MX94 LPI2C Add compatible string "fsl,imx94-lpi2c" for the i.MX94 chip, which is backward compatible with i.MX7ULP. Set it to fall back to "fsl,imx7ulp-lpi2c". Signed-off-by: Frank Li Acked-by: Conor Dooley Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250306155815.110514-1-Frank.Li@nxp.com --- Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml index 1dcb9c78de3b..969030a6f82a 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml @@ -26,6 +26,7 @@ properties: - fsl,imx8qm-lpi2c - fsl,imx8ulp-lpi2c - fsl,imx93-lpi2c + - fsl,imx94-lpi2c - fsl,imx95-lpi2c - const: fsl,imx7ulp-lpi2c From 9e2fd53073cb52f57d40713dbecd649dee4f3c0a Mon Sep 17 00:00:00 2001 From: Anindya Sundar Gayen Date: Fri, 28 Feb 2025 19:07:45 +0530 Subject: [PATCH 1406/2157] i2c: i2c-exynos5: fixed a spelling error Corrected a spelling mistake in the i2c-exynos5 driver to improve code readability. No functional changes were made. Signed-off-by: Anindya Sundar Gayen Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250228133745.35053-1-anindya.sg@samsung.com --- drivers/i2c/busses/i2c-exynos5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 6cdd957ea7e4..02f24479aa07 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -814,7 +814,7 @@ static int exynos5_i2c_xfer_msg(struct exynos5_i2c *i2c, ret = i2c->state; /* - * If this is the last message to be transfered (stop == 1) + * If this is the last message to be transferred (stop == 1) * Then check if the bus can be brought back to idle. */ if (ret == 0 && stop) From ff885b6fd5dc1f90a5ee8d23b114a6c777437d56 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 27 Feb 2025 19:19:02 +0800 Subject: [PATCH 1407/2157] dt-bindings: i2c: i2c-rk3x: Add rk3562 support rk3562 i2c compatible to the existing rk3399 binding. Signed-off-by: Kever Yang Reviewed-by: Heiko Stuebner Acked-by: Rob Herring (Arm) Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250227111913.2344207-5-kever.yang@rock-chips.com --- Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml b/Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml index a9dae5b52f28..8101afa6f146 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml @@ -37,6 +37,7 @@ properties: - rockchip,px30-i2c - rockchip,rk3308-i2c - rockchip,rk3328-i2c + - rockchip,rk3562-i2c - rockchip,rk3568-i2c - rockchip,rk3576-i2c - rockchip,rk3588-i2c From 3d36dd1161ca3158f600d6dbeab8b645e56d1d92 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 6 Feb 2025 17:27:07 +0530 Subject: [PATCH 1408/2157] i2c: cadence: Simplify using devm_clk_get_enabled() Clock handling can be very simplified with using devm_clk_get_enabled() as was done by commit 8d2aaf4382b7 ("gpio: zynq: Simplify using devm_clk_get_enabled()"). And also fix issue in connection to incorrect sequence when err_clk_dis label is called. When reset_control_deassert() fails it jumps to err_clk_dis label which disables clock and also disable pm_runtime setup but nothing has been setup at this time of failure because initialization is done below reset_control_deassert() call. Signed-off-by: Michal Simek Signed-off-by: Manikanta Guntupalli Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250206115708.1085523-2-manikanta.guntupalli@amd.com --- drivers/i2c/busses/i2c-cadence.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index b64026fbca66..51dc7728d133 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1541,7 +1541,7 @@ static int cdns_i2c_probe(struct platform_device *pdev) snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); - id->clk = devm_clk_get(&pdev->dev, NULL); + id->clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(id->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(id->clk), "input clock not found.\n"); @@ -1551,16 +1551,10 @@ static int cdns_i2c_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(id->reset), "Failed to request reset.\n"); - ret = clk_prepare_enable(id->clk); - if (ret) - dev_err(&pdev->dev, "Unable to enable clock.\n"); - ret = reset_control_deassert(id->reset); - if (ret) { - dev_err_probe(&pdev->dev, ret, - "Failed to de-assert reset.\n"); - goto err_clk_dis; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to de-assert reset.\n"); pm_runtime_set_autosuspend_delay(id->dev, CNDS_I2C_PM_TIMEOUT); pm_runtime_use_autosuspend(id->dev); @@ -1616,8 +1610,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) err_clk_notifier_unregister: clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); reset_control_assert(id->reset); -err_clk_dis: - clk_disable_unprepare(id->clk); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); return ret; @@ -1642,7 +1634,6 @@ static void cdns_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&id->adap); clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); reset_control_assert(id->reset); - clk_disable_unprepare(id->clk); } static struct platform_driver cdns_i2c_drv = { From 61b804548e17447b2a777650a8ff0708707f0cb9 Mon Sep 17 00:00:00 2001 From: Manikanta Guntupalli Date: Thu, 6 Feb 2025 17:27:08 +0530 Subject: [PATCH 1409/2157] i2c: cadence: Move reset_control_assert after pm_runtime_set_suspended in probe error path Ensure reset_control_assert() is called after pm_runtime_set_suspended() in the cdns_i2c_probe exit path to maintain proper power management sequence in error cases. Signed-off-by: Manikanta Guntupalli Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250206115708.1085523-3-manikanta.guntupalli@amd.com --- drivers/i2c/busses/i2c-cadence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 51dc7728d133..8df63aaf2a80 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1609,9 +1609,9 @@ static int cdns_i2c_probe(struct platform_device *pdev) err_clk_notifier_unregister: clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); - reset_control_assert(id->reset); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); + reset_control_assert(id->reset); return ret; } From 9d515bf71ea2de9fefd74bb792f997857dd2f6b3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 21 Feb 2025 21:28:21 +0100 Subject: [PATCH 1410/2157] i2c: i801: Cosmetic improvements - Use pci_err et al instead of dev_err to simplify the code - use format %pr to print resource Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/6676001a-a584-46e2-a98e-17163d82c218@gmail.com --- drivers/i2c/busses/i2c-i801.c | 49 ++++++++++++++--------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 171d29d2770e..a3d79a5ab570 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -412,16 +412,14 @@ static int i801_check_post(struct i801_priv *priv, int status) /* Check if it worked */ status = inb_p(SMBHSTSTS(priv)); - if ((status & SMBHSTSTS_HOST_BUSY) || - !(status & SMBHSTSTS_FAILED)) - dev_dbg(&priv->pci_dev->dev, - "Failed terminating the transaction\n"); + if ((status & SMBHSTSTS_HOST_BUSY) || !(status & SMBHSTSTS_FAILED)) + pci_dbg(priv->pci_dev, "Failed terminating the transaction\n"); return -ETIMEDOUT; } if (status & SMBHSTSTS_FAILED) { result = -EIO; - dev_err(&priv->pci_dev->dev, "Transaction failed\n"); + pci_err(priv->pci_dev, "Transaction failed\n"); } if (status & SMBHSTSTS_DEV_ERR) { /* @@ -449,7 +447,7 @@ static int i801_check_post(struct i801_priv *priv, int status) } if (status & SMBHSTSTS_BUS_ERR) { result = -EAGAIN; - dev_dbg(&priv->pci_dev->dev, "Lost arbitration\n"); + pci_dbg(priv->pci_dev, "Lost arbitration\n"); } return result; @@ -578,8 +576,7 @@ static void i801_isr_byte_done(struct i801_priv *priv) if (priv->count < priv->len) priv->data[priv->count++] = inb(SMBBLKDAT(priv)); else - dev_dbg(&priv->pci_dev->dev, - "Discarding extra byte on block read\n"); + pci_dbg(priv->pci_dev, "Discarding extra byte on block read\n"); /* Set LAST_BYTE for last byte of read transaction */ if (priv->count == priv->len - 1) @@ -1438,7 +1435,7 @@ static void i801_add_tco(struct i801_priv *priv) priv->tco_pdev = i801_add_tco_spt(pci_dev, tco_res); if (IS_ERR(priv->tco_pdev)) - dev_warn(&pci_dev->dev, "failed to create iTCO device\n"); + pci_warn(pci_dev, "failed to create iTCO device\n"); } #ifdef CONFIG_ACPI @@ -1467,8 +1464,8 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; - dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); - dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n"); + pci_warn(pdev, "BIOS is accessing SMBus registers\n"); + pci_warn(pdev, "Driver SMBus register access inhibited\n"); /* * BIOS is accessing the host controller so prevent it from @@ -1549,8 +1546,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Disable features on user request */ for (i = 0; i < ARRAY_SIZE(i801_feature_names); i++) { if (priv->features & disable_features & (1 << i)) - dev_notice(&dev->dev, "%s disabled by user\n", - i801_feature_names[i]); + pci_notice(dev, "%s disabled by user\n", i801_feature_names[i]); } priv->features &= ~disable_features; @@ -1564,16 +1560,14 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) */ err = pci_enable_device(dev); if (err) { - dev_err(&dev->dev, "Failed to enable SMBus PCI device (%d)\n", - err); + pci_err(dev, "Failed to enable SMBus PCI device (%d)\n", err); return err; } /* Determine the address of the SMBus area */ priv->smba = pci_resource_start(dev, SMBBAR); if (!priv->smba) { - dev_err(&dev->dev, - "SMBus base address uninitialized, upgrade BIOS\n"); + pci_err(dev, "SMBus base address uninitialized, upgrade BIOS\n"); return -ENODEV; } @@ -1582,26 +1576,24 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) err = pcim_iomap_regions(dev, 1 << SMBBAR, DRV_NAME); if (err) { - dev_err(&dev->dev, - "Failed to request SMBus region 0x%lx-0x%Lx\n", - priv->smba, - (unsigned long long)pci_resource_end(dev, SMBBAR)); + pci_err(dev, "Failed to request SMBus region %pr\n", + pci_resource_n(dev, SMBBAR)); i801_acpi_remove(priv); return err; } - pci_read_config_byte(priv->pci_dev, SMBHSTCFG, &priv->original_hstcfg); + pci_read_config_byte(dev, SMBHSTCFG, &priv->original_hstcfg); i801_setup_hstcfg(priv); if (!(priv->original_hstcfg & SMBHSTCFG_HST_EN)) - dev_info(&dev->dev, "Enabling SMBus device\n"); + pci_info(dev, "Enabling SMBus device\n"); if (priv->original_hstcfg & SMBHSTCFG_SMB_SMI_EN) { - dev_dbg(&dev->dev, "SMBus using interrupt SMI#\n"); + pci_dbg(dev, "SMBus using interrupt SMI#\n"); /* Disable SMBus interrupt feature if SMBus using SMI# */ priv->features &= ~FEATURE_IRQ; } if (priv->original_hstcfg & SMBHSTCFG_SPD_WD) - dev_info(&dev->dev, "SPD Write Disable is set\n"); + pci_info(dev, "SPD Write Disable is set\n"); /* Clear special mode bits */ if (priv->features & (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER)) @@ -1620,7 +1612,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Complain if an interrupt is already pending */ pci_read_config_word(priv->pci_dev, PCI_STATUS, &pcists); if (pcists & PCI_STATUS_INTERRUPT) - dev_warn(&dev->dev, "An interrupt is pending!\n"); + pci_warn(dev, "An interrupt is pending!\n"); } if (priv->features & FEATURE_IRQ) { @@ -1629,12 +1621,11 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) err = devm_request_irq(&dev->dev, dev->irq, i801_isr, IRQF_SHARED, DRV_NAME, priv); if (err) { - dev_err(&dev->dev, "Failed to allocate irq %d: %d\n", - dev->irq, err); + pci_err(dev, "Failed to allocate irq %d: %d\n", dev->irq, err); priv->features &= ~FEATURE_IRQ; } } - dev_info(&dev->dev, "SMBus using %s\n", + pci_info(dev, "SMBus using %s\n", priv->features & FEATURE_IRQ ? "PCI interrupt" : "polling"); /* Host notification uses an interrupt */ From e5befb5b01bc6b570c47a421284ca5193d36280d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 21 Feb 2025 21:29:23 +0100 Subject: [PATCH 1411/2157] i2c: i801: Move i801_wait_intr and i801_wait_byte_done in the code Move both functions to avoid forward declarations in a subsequent patch. Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/a60ee54b-c5e8-4bdb-9f1f-8889f4dcd114@gmail.com --- drivers/i2c/busses/i2c-i801.c | 68 +++++++++++++++++------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a3d79a5ab570..9097bb9cdb9e 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -337,6 +337,40 @@ MODULE_PARM_DESC(disable_features, "Disable selected driver features:\n" "\t\t 0x10 don't use interrupts\n" "\t\t 0x20 disable SMBus Host Notify "); +/* Wait for BUSY being cleared and either INTR or an error flag being set */ +static int i801_wait_intr(struct i801_priv *priv) +{ + unsigned long timeout = jiffies + priv->adapter.timeout; + int status, busy; + + do { + usleep_range(250, 500); + status = inb_p(SMBHSTSTS(priv)); + busy = status & SMBHSTSTS_HOST_BUSY; + status &= STATUS_ERROR_FLAGS | SMBHSTSTS_INTR; + if (!busy && status) + return status & STATUS_ERROR_FLAGS; + } while (time_is_after_eq_jiffies(timeout)); + + return -ETIMEDOUT; +} + +/* Wait for either BYTE_DONE or an error flag being set */ +static int i801_wait_byte_done(struct i801_priv *priv) +{ + unsigned long timeout = jiffies + priv->adapter.timeout; + int status; + + do { + usleep_range(250, 500); + status = inb_p(SMBHSTSTS(priv)); + if (status & (STATUS_ERROR_FLAGS | SMBHSTSTS_BYTE_DONE)) + return status & STATUS_ERROR_FLAGS; + } while (time_is_after_eq_jiffies(timeout)); + + return -ETIMEDOUT; +} + static int i801_get_block_len(struct i801_priv *priv) { u8 len = inb_p(SMBHSTDAT0(priv)); @@ -453,40 +487,6 @@ static int i801_check_post(struct i801_priv *priv, int status) return result; } -/* Wait for BUSY being cleared and either INTR or an error flag being set */ -static int i801_wait_intr(struct i801_priv *priv) -{ - unsigned long timeout = jiffies + priv->adapter.timeout; - int status, busy; - - do { - usleep_range(250, 500); - status = inb_p(SMBHSTSTS(priv)); - busy = status & SMBHSTSTS_HOST_BUSY; - status &= STATUS_ERROR_FLAGS | SMBHSTSTS_INTR; - if (!busy && status) - return status & STATUS_ERROR_FLAGS; - } while (time_is_after_eq_jiffies(timeout)); - - return -ETIMEDOUT; -} - -/* Wait for either BYTE_DONE or an error flag being set */ -static int i801_wait_byte_done(struct i801_priv *priv) -{ - unsigned long timeout = jiffies + priv->adapter.timeout; - int status; - - do { - usleep_range(250, 500); - status = inb_p(SMBHSTSTS(priv)); - if (status & (STATUS_ERROR_FLAGS | SMBHSTSTS_BYTE_DONE)) - return status & STATUS_ERROR_FLAGS; - } while (time_is_after_eq_jiffies(timeout)); - - return -ETIMEDOUT; -} - static int i801_transaction(struct i801_priv *priv, int xact) { unsigned long result; From 3a3c6b7b0387d12b067052e1027cd967fae8378a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 21 Feb 2025 21:30:46 +0100 Subject: [PATCH 1412/2157] i2c: i801: Improve too small kill wait time in i801_check_post In my tests terminating a transaction took about 25ms, what is in line with the chip-internal timeout as described in 5.21.3.2 "Bus Time Out" in [0]. Therefore the 2ms delay is too low. Instead of a fixed delay let's use i801_wait_intr() here, this also facilitates the status handling. This potential issue seems to have been existing forever, but as no related problem is known, treat it as an improvement. [0] Intel document #326776-003, 7 Series PCH datasheet Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/ad4ef645-5d03-4833-a0b6-f31f8fd06483@gmail.com --- drivers/i2c/busses/i2c-i801.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9097bb9cdb9e..6a4147054b49 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -441,12 +441,11 @@ static int i801_check_post(struct i801_priv *priv, int status) if (unlikely(status < 0)) { /* try to stop the current command */ outb_p(SMBHSTCNT_KILL, SMBHSTCNT(priv)); - usleep_range(1000, 2000); + status = i801_wait_intr(priv); outb_p(0, SMBHSTCNT(priv)); /* Check if it worked */ - status = inb_p(SMBHSTSTS(priv)); - if ((status & SMBHSTSTS_HOST_BUSY) || !(status & SMBHSTSTS_FAILED)) + if (status < 0 || !(status & SMBHSTSTS_FAILED)) pci_dbg(priv->pci_dev, "Failed terminating the transaction\n"); return -ETIMEDOUT; } From e2942bb4e62938fcef1481c9c1470b661087cdb7 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 18 Mar 2025 22:29:21 +0100 Subject: [PATCH 1413/2157] rust: pci: impl Send + Sync for pci::Device Commit 7b948a2af6b5 ("rust: pci: fix unrestricted &mut pci::Device") changed the definition of pci::Device and discarded the implicitly derived Send and Sync traits. This isn't required by upstream code yet, and hence did not cause any issues. However, it is relied on by upcoming drivers, hence add it back in. Signed-off-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250318212940.137577-1-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/pci.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 0ac6cef74f81..0d09ae34a64d 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -465,3 +465,10 @@ fn as_ref(&self) -> &device::Device { unsafe { device::Device::as_ref(dev) } } } + +// SAFETY: A `Device` is always reference-counted and can be released from any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` can be shared among threads because all methods of `Device` +// (i.e. `Device) are thread safe. +unsafe impl Sync for Device {} From 455943aa187fd93358ccd00c894934061153ec0c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 18 Mar 2025 22:29:22 +0100 Subject: [PATCH 1414/2157] rust: platform: impl Send + Sync for platform::Device Commit 4d320e30ee04 ("rust: platform: fix unrestricted &mut platform::Device") changed the definition of platform::Device and discarded the implicitly derived Send and Sync traits. This isn't required by upstream code yet, and hence did not cause any issues. However, it is relied on by upcoming drivers, hence add it back in. Signed-off-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250318212940.137577-2-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/platform.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index c77c9f2e9aea..2811ca53d8b6 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -233,3 +233,10 @@ fn as_ref(&self) -> &device::Device { unsafe { device::Device::as_ref(dev) } } } + +// SAFETY: A `Device` is always reference-counted and can be released from any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` can be shared among threads because all methods of `Device` +// (i.e. `Device) are thread safe. +unsafe impl Sync for Device {} From 67a5ba8f742f247bc83e46dd2313c142b1383276 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 17 Mar 2025 03:12:14 +0000 Subject: [PATCH 1415/2157] riscv: fgraph: Fix stack layout to match __arch_ftrace_regs argument of ftrace_return_to_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Naresh Kamboju reported a "Bad frame pointer" kernel warning while running LTP trace ftrace_stress_test.sh in riscv. We can reproduce the same issue with the following command: ``` $ cd /sys/kernel/debug/tracing $ echo 'f:myprobe do_nanosleep%return args1=$retval' > dynamic_events $ echo 1 > events/fprobes/enable $ echo 1 > tracing_on $ sleep 1 ``` And we can get the following kernel warning: [ 127.692888] ------------[ cut here ]------------ [ 127.693755] Bad frame pointer: expected ff2000000065be50, received ba34c141e9594000 [ 127.693755] from func do_nanosleep return to ffffffff800ccb16 [ 127.698699] WARNING: CPU: 1 PID: 129 at kernel/trace/fgraph.c:755 ftrace_return_to_handler+0x1b2/0x1be [ 127.699894] Modules linked in: [ 127.700908] CPU: 1 UID: 0 PID: 129 Comm: sleep Not tainted 6.14.0-rc3-g0ab191c74642 #32 [ 127.701453] Hardware name: riscv-virtio,qemu (DT) [ 127.701859] epc : ftrace_return_to_handler+0x1b2/0x1be [ 127.702032] ra : ftrace_return_to_handler+0x1b2/0x1be [ 127.702151] epc : ffffffff8013b5e0 ra : ffffffff8013b5e0 sp : ff2000000065bd10 [ 127.702221] gp : ffffffff819c12f8 tp : ff60000080853100 t0 : 6e00000000000000 [ 127.702284] t1 : 0000000000000020 t2 : 6e7566206d6f7266 s0 : ff2000000065bd80 [ 127.702346] s1 : ff60000081262000 a0 : 000000000000007b a1 : ffffffff81894f20 [ 127.702408] a2 : 0000000000000010 a3 : fffffffffffffffe a4 : 0000000000000000 [ 127.702470] a5 : 0000000000000000 a6 : 0000000000000008 a7 : 0000000000000038 [ 127.702530] s2 : ba34c141e9594000 s3 : 0000000000000000 s4 : ff2000000065bdd0 [ 127.702591] s5 : 00007fff8adcf400 s6 : 000055556dc1d8c0 s7 : 0000000000000068 [ 127.702651] s8 : 00007fff8adf5d10 s9 : 000000000000006d s10: 0000000000000001 [ 127.702710] s11: 00005555737377c8 t3 : ffffffff819d899e t4 : ffffffff819d899e [ 127.702769] t5 : ffffffff819d89a0 t6 : ff2000000065bb18 [ 127.702826] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 127.703292] [] ftrace_return_to_handler+0x1b2/0x1be [ 127.703760] [] return_to_handler+0x16/0x26 [ 127.704009] [] return_to_handler+0x0/0x26 [ 127.704057] [] common_nsleep+0x42/0x54 [ 127.704117] [] __riscv_sys_clock_nanosleep+0xba/0x10a [ 127.704176] [] do_trap_ecall_u+0x188/0x218 [ 127.704295] [] handle_exception+0x14a/0x156 [ 127.705436] ---[ end trace 0000000000000000 ]--- The reason is that the stack layout for constructing argument for the ftrace_return_to_handler in the return_to_handler does not match the __arch_ftrace_regs structure of riscv, leading to unexpected results. Fixes: a3ed4157b7d8 ("fgraph: Replace fgraph_ret_regs with ftrace_regs") Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/all/CA+G9fYvp_oAxeDFj88Tk2rfEZ7jtYKAKSwfYS66=57Db9TBdyA@mail.gmail.com Signed-off-by: Pu Lehui Reviewed-by: Alexandre Ghiti Tested-by: Björn Töpel Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250317031214.4138436-2-pulehui@huaweicloud.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/mcount.S | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 068168046e0e..da4a4000e57e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -12,8 +12,6 @@ #include #include -#define ABI_SIZE_ON_STACK 80 - .text .macro SAVE_ABI_STATE @@ -28,12 +26,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -ABI_SIZE_ON_STACK - REG_S ra, 1*SZREG(sp) - REG_S s0, 8*SZREG(sp) - REG_S a0, 10*SZREG(sp) - REG_S a1, 11*SZREG(sp) - addi s0, sp, ABI_SIZE_ON_STACK + addi sp, sp, -FREGS_SIZE_ON_STACK + REG_S ra, FREGS_RA(sp) + REG_S s0, FREGS_S0(sp) + REG_S a0, FREGS_A0(sp) + REG_S a1, FREGS_A1(sp) + addi s0, sp, FREGS_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -43,11 +41,11 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 1*SZREG(sp) - REG_L s0, 8*SZREG(sp) - REG_L a0, 10*SZREG(sp) - REG_L a1, 11*SZREG(sp) - addi sp, sp, ABI_SIZE_ON_STACK + REG_L ra, FREGS_RA(sp) + REG_L s0, FREGS_S0(sp) + REG_L a0, FREGS_A0(sp) + REG_L a1, FREGS_A1(sp) + addi sp, sp, FREGS_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) From a65e0f67da24d1bff0dc679a018ced26d1952683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:08 +0200 Subject: [PATCH 1416/2157] dt-bindings: riscv: add Zaamo and Zalrsc ISA extension description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add description for the Zaamo and Zalrsc ISA extension[1]. Link: https://github.com/riscv/riscv-zaamo-zalrsc [1] Signed-off-by: Clément Léger Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240619153913.867263-2-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index f26997c3d34d..73d8fec29ec2 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -224,6 +224,12 @@ properties: as ratified at commit 4a69197e5617 ("Update to ratified state") of riscv-svvptc. + - const: zaamo + description: | + The standard Zaamo extension for atomic memory operations as + ratified at commit e87412e621f1 ("integrate Zaamo and Zalrsc text + (#1304)") of the unprivileged ISA specification. + - const: zabha description: | The Zabha extension for Byte and Halfword Atomic Memory Operations @@ -236,6 +242,12 @@ properties: is supported as ratified at commit 5059e0ca641c ("update to ratified") of the riscv-zacas. + - const: zalrsc + description: | + The standard Zalrsc extension for load-reserved/store-conditional as + ratified at commit e87412e621f1 ("integrate Zaamo and Zalrsc text + (#1304)") of the unprivileged ISA specification. + - const: zawrs description: | The Zawrs extension for entering a low-power state or for trapping @@ -718,6 +730,13 @@ properties: const: zfbfmin - contains: const: zvfbfmin + # Zacas depends on Zaamo + - if: + contains: + const: zacas + then: + contains: + const: zaamo allOf: # Zcf extension does not exist on rv64 From 35173b666a16ce631da9d70cd1b376162d9dea53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:09 +0200 Subject: [PATCH 1417/2157] riscv: add parsing for Zaamo and Zalrsc extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These 2 new extensions are actually a subset of the A extension which provides atomic memory operations and load-reserved/store-conditional instructions. Signed-off-by: Clément Léger Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240619153913.867263-3-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/kernel/cpufeature.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 14cc29f2a723..e3cbf203cdde 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -103,6 +103,8 @@ #define RISCV_ISA_EXT_ZFBFMIN 94 #define RISCV_ISA_EXT_ZVFBFMIN 95 #define RISCV_ISA_EXT_ZVFBFWMA 96 +#define RISCV_ISA_EXT_ZAAMO 97 +#define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 6ec9499e2cf2..af9c346da96e 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -182,6 +182,11 @@ static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, return 0; } +static const unsigned int riscv_a_exts[] = { + RISCV_ISA_EXT_ZAAMO, + RISCV_ISA_EXT_ZALRSC, +}; + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -353,7 +358,7 @@ static const unsigned int riscv_c_exts[] = { const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i), __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m), - __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a), + __RISCV_ISA_EXT_SUPERSET(a, RISCV_ISA_EXT_a, riscv_a_exts), __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), @@ -373,8 +378,10 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO), __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), From 9d45d1ff90a6888f6138eb7e1f2619ef427831d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:10 +0200 Subject: [PATCH 1418/2157] riscv: hwprobe: export Zaamo and Zalrsc extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the Zaamo and Zalrsc extensions to userspace using hwprobe. Signed-off-by: Clément Léger Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240619153913.867263-4-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 8 ++++++++ arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 2 ++ 3 files changed, 12 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 2792f12e90ba..53607d962653 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -245,6 +245,14 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. + * :c:macro:`RISCV_HWPROBE_EXT_ZAAMO`: The Zaamo extension is supported as + defined in the in the RISC-V ISA manual starting from commit e87412e621f1 + ("integrate Zaamo and Zalrsc text (#1304)"). + + * :c:macro:`RISCV_HWPROBE_EXT_ZALRSC`: The Zalrsc extension is supported as + defined in the in the RISC-V ISA manual starting from commit e87412e621f1 + ("integrate Zaamo and Zalrsc text (#1304)"). + * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as defined in version 1.0 of the RISC-V Pointer Masking extensions. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 056decf65b1b..3c2fce939673 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -79,6 +79,8 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 53) #define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 54) #define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) +#define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) +#define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cfe6ceaf3069..f7380ff9ed17 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -95,7 +95,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZAAMO); EXT_KEY(ZACAS); + EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); From 2d79608cf6112e787e6f90cf909bdadceed30dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:11 +0200 Subject: [PATCH 1419/2157] RISC-V: KVM: Allow Zaamo/Zalrsc extensions for Guest/VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zaamo/Zalrsc extensions for Guest/VM. Signed-off-by: Clément Léger Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240619153913.867263-5-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu_onereg.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index f06bc5efcd79..5f59fd226cc5 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVVPTC, KVM_RISCV_ISA_EXT_ZABHA, KVM_RISCV_ISA_EXT_ZICCRSE, + KVM_RISCV_ISA_EXT_ZAAMO, + KVM_RISCV_ISA_EXT_ZALRSC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f6d27b59c641..f05f12f80920 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -47,8 +47,10 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -149,8 +151,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: From 7a9827e777da311672cdba2f4400362b1eebb2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 19 Jun 2024 17:39:12 +0200 Subject: [PATCH 1420/2157] KVM: riscv: selftests: Add Zaamo/Zalrsc extensions to get-reg-list test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KVM RISC-V allows Zaamo/Zalrsc extensions for Guest/VM so add these extensions to get-reg-list test. Signed-off-by: Clément Léger Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240619153913.867263-6-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8515921dfdbf..569f2d67c9b8 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -53,8 +53,10 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: @@ -434,8 +436,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -974,8 +978,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); @@ -1045,8 +1051,10 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_svvptc, + &config_zaamo, &config_zabha, &config_zacas, + &config_zalrsc, &config_zawrs, &config_zba, &config_zbb, From d0259a856afca31d699b706ed5e2adf11086c73b Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 19 Mar 2025 20:20:15 +0900 Subject: [PATCH 1421/2157] 9p/net: fix improper handling of bogus negative read/write replies In p9_client_write() and p9_client_read_once(), if the server incorrectly replies with success but a negative write/read count then we would consider written (negative) <= rsize (positive) because both variables were signed. Make variables unsigned to avoid this problem. The reproducer linked below now fails with the following error instead of a null pointer deref: 9pnet: bogus RWRITE count (4294967295 > 3) Reported-by: Robert Morris Closes: https://lore.kernel.org/16271.1734448631@26-5-164.dynamic.csail.mit.edu Message-ID: <20250319-9p_unsigned_rw-v3-1-71327f1503d0@codewreck.org> Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet --- net/9p/client.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 09f8ced9f8bb..52a5497cfca7 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1548,7 +1548,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int count = iov_iter_count(to); - int rsize, received, non_zc = 0; + u32 rsize, received; + bool non_zc = false; char *dataptr; *err = 0; @@ -1571,7 +1572,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, 0, 11, "dqd", fid->fid, offset, rsize); } else { - non_zc = 1; + non_zc = true; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); } @@ -1592,11 +1593,11 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, return 0; } if (rsize < received) { - pr_err("bogus RREAD count (%d > %d)\n", received, rsize); + pr_err("bogus RREAD count (%u > %u)\n", received, rsize); received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1623,9 +1624,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = 0; while (iov_iter_count(from)) { - int count = iov_iter_count(from); - int rsize = fid->iounit; - int written; + size_t count = iov_iter_count(from); + u32 rsize = fid->iounit; + u32 written; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; @@ -1633,7 +1634,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %u (/%zu)\n", fid->fid, offset, rsize, count); /* Don't bother zerocopy for small IO (< 1024) */ @@ -1659,11 +1660,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) break; } if (rsize < written) { - pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); + pr_err("bogus RWRITE count (%u > %u)\n", written, rsize); written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); @@ -2098,7 +2099,8 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize, non_zc = 0; + int err, non_zc = 0; + u32 rsize; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -2107,7 +2109,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) iov_iter_kvec(&to, ITER_DEST, &kv, 1, count); - p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %u\n", fid->fid, offset, count); clnt = fid->clnt; @@ -2142,11 +2144,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) goto free_and_error; } if (rsize < count) { - pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize); + pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize); count = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count); if (non_zc) memmove(data, dataptr, count); From ad6d4558a7112af9e5f6727ac24fd8cd17469739 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 17 Mar 2025 06:51:06 +0900 Subject: [PATCH 1422/2157] 9p/net: return error on bogus (longer than requested) replies Up until now we've been considering longer than requested replies as acceptable, printing a message and just truncating the data, but it makes more sense to consider these an error. Make these fail with EIO instead. Suggested-by: Christian Schoenebeck Message-ID: <20250317-p9_bogus_io_error-v1-1-9639f6d1561f@codewreck.org> Signed-off-by: Dominique Martinet --- net/9p/client.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 52a5497cfca7..61461b9fa134 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1594,7 +1594,9 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, } if (rsize < received) { pr_err("bogus RREAD count (%u > %u)\n", received, rsize); - received = rsize; + *err = -EIO; + p9_req_put(clnt, req); + return 0; } p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received); @@ -1661,7 +1663,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) } if (rsize < written) { pr_err("bogus RWRITE count (%u > %u)\n", written, rsize); - written = rsize; + *err = -EIO; + iov_iter_revert(from, count - iov_iter_count(from)); + p9_req_put(clnt, req); + break; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written); @@ -1713,7 +1718,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (written > len) { pr_err("bogus RWRITE count (%d > %u)\n", written, len); - written = len; + written = -EIO; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); @@ -2145,7 +2150,8 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) } if (rsize < count) { pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize); - count = rsize; + err = -EIO; + goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count); From fbc0283fbeae27b88448c90305e738991457fee2 Mon Sep 17 00:00:00 2001 From: Ignacio Encinas Date: Tue, 18 Mar 2025 22:39:02 +0100 Subject: [PATCH 1423/2157] 9p/trans_fd: mark concurrent read and writes to p9_conn->err Writes for the error value of a connection are spinlock-protected inside p9_conn_cancel, but lockless reads are present elsewhere to avoid performing unnecessary work after an error has been met. Mark the write and lockless reads to make KCSAN happy. Mark the write as exclusive following the recommendation in "Lock-Protected Writes with Lockless Reads" in tools/memory-model/Documentation/access-marking.txt while we are at it. Mark p9_fd_request and p9_conn_cancel m->err reads despite the fact that they do not race with concurrent writes for stylistic reasons. Reported-by: syzbot+d69a7cc8c683c2cb7506@syzkaller.appspotmail.com Reported-by: syzbot+483d6c9b9231ea7e1851@syzkaller.appspotmail.com Signed-off-by: Ignacio Encinas Message-ID: <20250318-p9_conn_err_benign_data_race-v3-1-290bb18335cc@iencinas.com> Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 2fea50bf047a..339ec4e54778 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -192,12 +192,13 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_lock(&m->req_lock); - if (m->err) { + if (READ_ONCE(m->err)) { spin_unlock(&m->req_lock); return; } - m->err = err; + WRITE_ONCE(m->err, err); + ASSERT_EXCLUSIVE_WRITER(m->err); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); @@ -284,7 +285,7 @@ static void p9_read_work(struct work_struct *work) m = container_of(work, struct p9_conn, rq); - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); @@ -451,7 +452,7 @@ static void p9_write_work(struct work_struct *work) m = container_of(work, struct p9_conn, wq); - if (m->err < 0) { + if (READ_ONCE(m->err) < 0) { clear_bit(Wworksched, &m->wsched); return; } @@ -623,7 +624,7 @@ static void p9_poll_mux(struct p9_conn *m) __poll_t n; int err = -ECONNRESET; - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; n = p9_fd_poll(m->client, NULL, &err); @@ -666,6 +667,7 @@ static void p9_poll_mux(struct p9_conn *m) static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; + int err; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; @@ -674,9 +676,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) spin_lock(&m->req_lock); - if (m->err < 0) { + err = READ_ONCE(m->err); + if (err < 0) { spin_unlock(&m->req_lock); - return m->err; + return err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); From a00e022be5315c5a1f47521a1cc6d3b71c8e9c44 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:16 +0100 Subject: [PATCH 1424/2157] riscv: Annotate unaligned access init functions Several functions used in unaligned access probing are only run at init time. Annotate them appropriately. Fixes: f413aae96cda ("riscv: Set unaligned access speed at compile time") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-11-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/cpufeature.h | 4 ++-- arch/riscv/kernel/traps_misaligned.c | 8 ++++---- arch/riscv/kernel/unaligned_access_speed.c | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 569140d6e639..19defdc2002d 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -63,7 +63,7 @@ void __init riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) -bool check_unaligned_access_emulated_all_cpus(void); +bool __init check_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); @@ -76,7 +76,7 @@ static inline bool unaligned_ctl_available(void) } #endif -bool check_vector_unaligned_access_emulated_all_cpus(void); +bool __init check_vector_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_VECTOR_MISALIGNED) void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); DECLARE_PER_CPU(long, vector_misaligned_access); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 7cc108aed74e..aacbd9d7196e 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -605,7 +605,7 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un kernel_vector_end(); } -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -625,7 +625,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void) return true; } #else -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { return false; } @@ -659,7 +659,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) } } -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -684,7 +684,7 @@ bool unaligned_ctl_available(void) return unaligned_ctl; } #else -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { return false; } diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 91f189cf1611..b7a8ff7ba6df 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -121,7 +121,7 @@ static int check_unaligned_access(void *param) return 0; } -static void check_unaligned_access_nonboot_cpu(void *param) +static void __init check_unaligned_access_nonboot_cpu(void *param) { unsigned int cpu = smp_processor_id(); struct page **pages = param; @@ -175,7 +175,7 @@ static void set_unaligned_access_static_branches(void) modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); } -static int lock_and_set_unaligned_access_static_branch(void) +static int __init lock_and_set_unaligned_access_static_branch(void) { cpus_read_lock(); set_unaligned_access_static_branches(); @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int check_unaligned_access_speed_all_cpus(void) +static int __init check_unaligned_access_speed_all_cpus(void) { unsigned int cpu; unsigned int cpu_count = num_possible_cpus(); @@ -264,7 +264,7 @@ static int check_unaligned_access_speed_all_cpus(void) return 0; } #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_speed_all_cpus(void) +static int __init check_unaligned_access_speed_all_cpus(void) { return 0; } @@ -379,7 +379,7 @@ static int riscv_online_cpu_vec(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); @@ -393,13 +393,13 @@ static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unuse return 0; } #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { return 0; } #endif -static int check_unaligned_access_all_cpus(void) +static int __init check_unaligned_access_all_cpus(void) { bool all_cpus_emulated, all_cpus_vec_unsupported; From 5af72a818612332a11171b16f27a62ec0a0f91d7 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:17 +0100 Subject: [PATCH 1425/2157] riscv: Fix riscv_online_cpu_vec We shouldn't probe when we already know vector is unsupported and we should probe when we see we don't yet know whether it's supported. Furthermore, we should ensure we've set the access type to unsupported when we don't have vector at all. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-12-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index b7a8ff7ba6df..161964cf2abc 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -367,10 +367,12 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus static int riscv_online_cpu_vec(unsigned int cpu) { - if (!has_vector()) + if (!has_vector()) { + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; return 0; + } - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) return 0; check_vector_unaligned_access_emulated(NULL); From e6d0adf2eb5bb3244cb21a7a15899aa058bd384f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:18 +0100 Subject: [PATCH 1426/2157] riscv: Fix check_unaligned_access_all_cpus check_vector_unaligned_access_emulated_all_cpus(), like its name suggests, will return true when all cpus emulate unaligned vector accesses. If the function returned false it may have been because vector isn't supported at all (!has_vector()) or because at least one cpu doesn't emulate unaligned vector accesses. Since false may be returned for two cases, checking for it isn't sufficient when attempting to determine if we should proceed with the vector speed check. Move the !has_vector() functionality to check_unaligned_access_all_cpus() in order for check_vector_unaligned_access_emulated_all_cpus() to return false for a single case. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-13-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 6 ------ arch/riscv/kernel/unaligned_access_speed.c | 11 +++++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index aacbd9d7196e..4354c87c0376 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -609,12 +609,6 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; - if (!has_vector()) { - for_each_online_cpu(cpu) - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - return false; - } - schedule_on_each_cpu(check_vector_unaligned_access_emulated); for_each_online_cpu(cpu) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 161964cf2abc..02b485dc4bc4 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -403,13 +403,16 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int __init check_unaligned_access_all_cpus(void) { - bool all_cpus_emulated, all_cpus_vec_unsupported; + bool all_cpus_emulated; + int cpu; all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); - all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); - if (!all_cpus_vec_unsupported && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { kthread_run(vec_check_unaligned_access_speed_all_cpus, NULL, "vec_check_unaligned_access_speed_all_cpus"); } From 813d39baee3229d31420af61460b97f4fafdd352 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:19 +0100 Subject: [PATCH 1427/2157] riscv: Change check_unaligned_access_speed_all_cpus to void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return value of check_unaligned_access_speed_all_cpus() is always zero, so make the function void so we don't need to concern ourselves with it. The change also allows us to tidy up check_unaligned_access_all_cpus() a bit. Reviewed-by: Clément Léger Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-14-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 02b485dc4bc4..780f1c5f512a 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -218,7 +218,7 @@ static int riscv_offline_cpu(unsigned int cpu) } /* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int __init check_unaligned_access_speed_all_cpus(void) +static void __init check_unaligned_access_speed_all_cpus(void) { unsigned int cpu; unsigned int cpu_count = num_possible_cpus(); @@ -226,7 +226,7 @@ static int __init check_unaligned_access_speed_all_cpus(void) if (!bufs) { pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; + return; } /* @@ -261,12 +261,10 @@ static int __init check_unaligned_access_speed_all_cpus(void) } kfree(bufs); - return 0; } #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int __init check_unaligned_access_speed_all_cpus(void) +static void __init check_unaligned_access_speed_all_cpus(void) { - return 0; } #endif @@ -403,10 +401,10 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int __init check_unaligned_access_all_cpus(void) { - bool all_cpus_emulated; int cpu; - all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + if (!check_unaligned_access_emulated_all_cpus()) + check_unaligned_access_speed_all_cpus(); if (!has_vector()) { for_each_online_cpu(cpu) @@ -417,9 +415,6 @@ static int __init check_unaligned_access_all_cpus(void) NULL, "vec_check_unaligned_access_speed_all_cpus"); } - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); - return 0; } From 05ee21f0fcb8ca29bf42bd6cbce109f2e49c167f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:20 +0100 Subject: [PATCH 1428/2157] riscv: Fix set up of cpu hotplug callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU hotplug callbacks should be set up even if we detected all current cpus emulate misaligned accesses, since we want to ensure our expectations of all cpus emulating is maintained. Fixes: 6e5ce7f2eae3 ("riscv: Decouple emulated unaligned accesses from access speed") Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Clément Léger Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-15-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 780f1c5f512a..c9d3237649bb 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -247,13 +247,6 @@ static void __init check_unaligned_access_speed_all_cpus(void) /* Check core 0. */ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - out: for_each_cpu(cpu, cpu_online_mask) { if (bufs[cpu]) @@ -383,13 +376,6 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway { schedule_on_each_cpu(check_vector_unaligned_access); - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu_vec, NULL); - return 0; } #else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ @@ -415,6 +401,19 @@ static int __init check_unaligned_access_all_cpus(void) NULL, "vec_check_unaligned_access_speed_all_cpus"); } + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); +#endif +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); +#endif + return 0; } From 2744ec472de31141ad354907ff98843dd6040917 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:21 +0100 Subject: [PATCH 1429/2157] riscv: Fix set up of vector cpu hotplug callback Whether or not we have RISCV_PROBE_VECTOR_UNALIGNED_ACCESS we need to set up a cpu hotplug callback to check if we have vector at all, since, when we don't have vector, we need to set vector_misaligned_access to unsupported rather than leave it the default of unknown. Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Reviewed-by: Alexandre Ghiti Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-16-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 35 +++++++++++----------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index c9d3237649bb..d9d4ca1fadc7 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -356,21 +356,6 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus per_cpu(vector_misaligned_access, cpu) = speed; } -static int riscv_online_cpu_vec(unsigned int cpu) -{ - if (!has_vector()) { - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - return 0; - } - - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) - return 0; - - check_vector_unaligned_access_emulated(NULL); - check_vector_unaligned_access(NULL); - return 0; -} - /* Measure unaligned access speed on all CPUs present at boot in parallel. */ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { @@ -385,6 +370,24 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway } #endif +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) { + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + return 0; + } + +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); +#endif + + return 0; +} + static int __init check_unaligned_access_all_cpus(void) { int cpu; @@ -409,10 +412,8 @@ static int __init check_unaligned_access_all_cpus(void) cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu, riscv_offline_cpu); #endif -#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu_vec, NULL); -#endif return 0; } From aecb09e091dc143345a9e4b282d0444554445f4b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:22 +0100 Subject: [PATCH 1430/2157] riscv: Add parameter for skipping access speed tests Allow skipping scalar and vector unaligned access speed tests. This is useful for testing alternative code paths and to skip the tests in environments where they run too slowly. All CPUs must have the same unaligned access speed. The code movement is because we now need the scalar cpu hotplug callback to always run, so we need to bring it and its supporting functions out of CONFIG_RISCV_PROBE_UNALIGNED_ACCESS. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-17-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 187 +++++++++++++-------- 1 file changed, 121 insertions(+), 66 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index d9d4ca1fadc7..18e334549544 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -24,8 +24,12 @@ DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + static cpumask_t fast_misaligned_access; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static int check_unaligned_access(void *param) { int cpu = smp_processor_id(); @@ -130,6 +134,50 @@ static void __init check_unaligned_access_nonboot_cpu(void *param) check_unaligned_access(pages[cpu]); } +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ +} +#endif + DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); static void modify_unaligned_access_branches(cpumask_t *mask, int weight) @@ -188,21 +236,29 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch); static int riscv_online_cpu(unsigned int cpu) { - static struct page *buf; - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + goto exit; + } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; goto exit; - - check_unaligned_access_emulated(NULL); - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; } - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + { + static struct page *buf; + + check_unaligned_access_emulated(NULL); + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + } +#endif exit: set_unaligned_access_static_branches(); @@ -217,50 +273,6 @@ static int riscv_offline_cpu(unsigned int cpu) return 0; } -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static void __init check_unaligned_access_speed_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - -out: - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); -} -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static void __init check_unaligned_access_speed_all_cpus(void) -{ -} -#endif - #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS static void check_vector_unaligned_access(struct work_struct *work __always_unused) { @@ -372,8 +384,8 @@ static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __alway static int riscv_online_cpu_vec(unsigned int cpu) { - if (!has_vector()) { - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; return 0; } @@ -388,30 +400,73 @@ static int riscv_online_cpu_vec(unsigned int cpu) return 0; } +static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" }; + +static int __init set_unaligned_scalar_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param); + +static int __init set_unaligned_vector_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param); + static int __init check_unaligned_access_all_cpus(void) { int cpu; - if (!check_unaligned_access_emulated_all_cpus()) + if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN && + !check_unaligned_access_emulated_all_cpus()) { check_unaligned_access_speed_all_cpus(); - - if (!has_vector()) { + } else { + pr_info("scalar unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_scalar_speed_param]); for_each_online_cpu(cpu) - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - } else if (!check_vector_unaligned_access_emulated_all_cpus() && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } + + if (!has_vector()) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + + if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN && + !check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { kthread_run(vec_check_unaligned_access_speed_all_cpus, NULL, "vec_check_unaligned_access_speed_all_cpus"); + } else { + pr_info("vector unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_vector_speed_param]); + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } /* * Setup hotplug callbacks for any new CPUs that come online or go * offline. */ -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu, riscv_offline_cpu); -#endif cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", riscv_online_cpu_vec, NULL); From 9fe58530a8cd1402aaa35cad19143777a6f65393 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Mar 2025 13:00:23 +0100 Subject: [PATCH 1431/2157] Documentation/kernel-parameters: Add riscv unaligned speed parameters Document riscv parameters used to select scalar and vector unaligned access speeds. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20250304120014.143628-18-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8..9e3c5fecfa52 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7477,6 +7477,22 @@ Note that genuine overcurrent events won't be reported either. + unaligned_scalar_speed= + [RISCV] + Format: {slow | fast | unsupported} + Allow skipping scalar unaligned access speed tests. This + is useful for testing alternative code paths and to skip + the tests in environments where they run too slowly. All + CPUs must have the same scalar unaligned access speed. + + unaligned_vector_speed= + [RISCV] + Format: {slow | fast | unsupported} + Allow skipping vector unaligned access speed tests. This + is useful for testing alternative code paths and to skip + the tests in environments where they run too slowly. All + CPUs must have the same vector unaligned access speed. + unknown_nmi_panic [X86] Cause panic on unknown NMI. From 4f087eafdcef24b7160b097ddb9704084767b77a Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 19 Mar 2025 21:55:20 +0800 Subject: [PATCH 1432/2157] um: Add pthread-based helper support Introduce a new set of utility functions that can be used to create pthread-based helpers. Helper threads created in this way will ensure thread safety for errno while sharing the same memory space. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250319135523.97050-2-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/include/shared/os.h | 5 +++ arch/um/os-Linux/helper.c | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index bc02767f0639..d0ae42911cb5 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -224,6 +224,11 @@ extern int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long *stack_out); extern int helper_wait(int pid); +struct os_helper_thread; +int os_run_helper_thread(struct os_helper_thread **td_out, + void *(*routine)(void *), void *arg); +void os_kill_helper_thread(struct os_helper_thread *td); +void os_fix_helper_thread_signals(void); /* umid.c */ extern int umid_file_name(char *name, char *buf, int len); diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 3cb8ac63be6e..df22cba24d82 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -167,3 +168,65 @@ int helper_wait(int pid) } else return 0; } + +struct os_helper_thread { + pthread_t handle; +}; + +int os_run_helper_thread(struct os_helper_thread **td_out, + void *(*routine)(void *), void *arg) +{ + struct os_helper_thread *td; + sigset_t sigset, oset; + int err, flags; + + flags = __uml_cant_sleep() ? UM_GFP_ATOMIC : UM_GFP_KERNEL; + td = uml_kmalloc(sizeof(*td), flags); + if (!td) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = -errno; + kfree(td); + return err; + } + + err = pthread_create(&td->handle, NULL, routine, arg); + + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask: %d", errno); + + if (err != 0) + kfree(td); + else + *td_out = td; + + return -err; +} + +void os_kill_helper_thread(struct os_helper_thread *td) +{ + pthread_cancel(td->handle); + pthread_join(td->handle, NULL); + kfree(td); +} + +void os_fix_helper_thread_signals(void) +{ + sigset_t sigset; + + sigemptyset(&sigset); + + sigaddset(&sigset, SIGWINCH); + sigaddset(&sigset, SIGPIPE); + sigaddset(&sigset, SIGPROF); + sigaddset(&sigset, SIGINT); + sigaddset(&sigset, SIGTERM); + sigaddset(&sigset, SIGCHLD); + sigaddset(&sigset, SIGALRM); + sigaddset(&sigset, SIGIO); + sigaddset(&sigset, SIGUSR1); + + pthread_sigmask(SIG_SETMASK, &sigset, NULL); +} From d7f89a9da4328eee450d2e72631d1ec7e52976f0 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 19 Mar 2025 21:55:21 +0800 Subject: [PATCH 1433/2157] um: ubd: Switch to the pthread-based helper The ubd io thread and UML kernel thread share the same errno, which can lead to conflicts when both call syscalls concurrently. Switch to the pthread-based helper to address this issue. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250319135523.97050-3-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/drivers/ubd.h | 6 ++++-- arch/um/drivers/ubd_kern.c | 25 +++++++++++-------------- arch/um/drivers/ubd_user.c | 14 +++++++------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h index f016fe15499f..2985c14661f4 100644 --- a/arch/um/drivers/ubd.h +++ b/arch/um/drivers/ubd.h @@ -7,8 +7,10 @@ #ifndef __UM_UBD_USER_H #define __UM_UBD_USER_H -extern int start_io_thread(unsigned long sp, int *fds_out); -extern int io_thread(void *arg); +#include + +int start_io_thread(struct os_helper_thread **td_out, int *fd_out); +void *io_thread(void *arg); extern int kernel_fd; extern int ubd_read_poll(int timeout); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 0b1e61f72fb3..4de6613e7468 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -474,12 +474,12 @@ static irqreturn_t ubd_intr(int irq, void *dev) } /* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; +static struct os_helper_thread *io_td; static void kill_io_thread(void) { - if(io_pid != -1) - os_kill_process(io_pid, 1); + if (io_td) + os_kill_helper_thread(io_td); } __uml_exitcall(kill_io_thread); @@ -1104,8 +1104,8 @@ static int __init ubd_init(void) late_initcall(ubd_init); -static int __init ubd_driver_init(void){ - unsigned long stack; +static int __init ubd_driver_init(void) +{ int err; /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ @@ -1114,13 +1114,11 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); - if(io_pid < 0){ + err = start_io_thread(&io_td, &thread_fd); + if (err < 0) { printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; + "falling back to synchronous I/O\n", -err); return 0; } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, @@ -1496,12 +1494,11 @@ int kernel_fd = -1; /* Only changed by the io thread. XXX: currently unused. */ static int io_count; -int io_thread(void *arg) +void *io_thread(void *arg) { int n, count, written, res; - os_set_pdeathsig(); - os_fix_helper_signals(); + os_fix_helper_thread_signals(); while(1){ n = bulk_req_safe_read( @@ -1543,5 +1540,5 @@ int io_thread(void *arg) } while (written < n); } - return 0; + return NULL; } diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index b4f8b8e60564..c5e6545f6fcf 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -25,9 +25,9 @@ static struct pollfd kernel_pollfd; -int start_io_thread(unsigned long sp, int *fd_out) +int start_io_thread(struct os_helper_thread **td_out, int *fd_out) { - int pid, fds[2], err; + int fds[2], err; err = os_pipe(fds, 1, 1); if(err < 0){ @@ -47,14 +47,14 @@ int start_io_thread(unsigned long sp, int *fd_out) goto out_close; } - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); - if(pid < 0){ - err = -errno; - printk("start_io_thread - clone failed : errno = %d\n", errno); + err = os_run_helper_thread(td_out, io_thread, NULL); + if (err < 0) { + printk("%s - failed to run helper thread, err = %d\n", + __func__, -err); goto out_close; } - return(pid); + return 0; out_close: os_close_file(fds[0]); From d295beeed2552a987796d627ba7d0985b1e2d72f Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 19 Mar 2025 21:55:22 +0800 Subject: [PATCH 1434/2157] um: Switch to the pthread-based helper in sigio workaround The write_sigio thread and UML kernel thread share the same errno, which can lead to conflicts when both call syscalls concurrently. Switch to the pthread-based helper to address this issue. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250319135523.97050-4-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/os-Linux/sigio.c | 44 +++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9aac8def4d63..61b348a2ea97 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -21,8 +21,7 @@ * Protected by sigio_lock(), also used by sigio_cleanup, which is an * exitcall. */ -static int write_sigio_pid = -1; -static unsigned long write_sigio_stack; +static struct os_helper_thread *write_sigio_td; /* * These arrays are initialized before the sigio thread is started, and @@ -48,15 +47,15 @@ static struct pollfds current_poll; static struct pollfds next_poll; static struct pollfds all_sigio_fds; -static int write_sigio_thread(void *unused) +static void *write_sigio_thread(void *unused) { struct pollfds *fds, tmp; struct pollfd *p; int i, n, respond_fd; char c; - os_set_pdeathsig(); - os_fix_helper_signals(); + os_fix_helper_thread_signals(); + fds = ¤t_poll; while (1) { n = poll(fds->poll, fds->used, -1); @@ -98,7 +97,7 @@ static int write_sigio_thread(void *unused) } } - return 0; + return NULL; } static int need_poll(struct pollfds *polls, int n) @@ -152,11 +151,10 @@ static void update_thread(void) return; fail: /* Critical section start */ - if (write_sigio_pid != -1) { - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); + if (write_sigio_td) { + os_kill_helper_thread(write_sigio_td); + write_sigio_td = NULL; } - write_sigio_pid = -1; close(sigio_private[0]); close(sigio_private[1]); close(write_sigio_fds[0]); @@ -220,7 +218,7 @@ int __ignore_sigio_fd(int fd) * sigio_cleanup has already run, then update_thread will hang * or fail because the thread is no longer running. */ - if (write_sigio_pid == -1) + if (!write_sigio_td) return -EIO; for (i = 0; i < current_poll.used; i++) { @@ -279,14 +277,14 @@ static void write_sigio_workaround(void) int err; int l_write_sigio_fds[2]; int l_sigio_private[2]; - int l_write_sigio_pid; + struct os_helper_thread *l_write_sigio_td; /* We call this *tons* of times - and most ones we must just fail. */ sigio_lock(); - l_write_sigio_pid = write_sigio_pid; + l_write_sigio_td = write_sigio_td; sigio_unlock(); - if (l_write_sigio_pid != -1) + if (l_write_sigio_td) return; err = os_pipe(l_write_sigio_fds, 1, 1); @@ -312,7 +310,7 @@ static void write_sigio_workaround(void) * Did we race? Don't try to optimize this, please, it's not so likely * to happen, and no more than once at the boot. */ - if (write_sigio_pid != -1) + if (write_sigio_td) goto out_free; current_poll = ((struct pollfds) { .poll = p, @@ -325,18 +323,15 @@ static void write_sigio_workaround(void) memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, - CLONE_FILES | CLONE_VM, - &write_sigio_stack); - - if (write_sigio_pid < 0) + err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); + if (err < 0) goto out_clear; sigio_unlock(); return; out_clear: - write_sigio_pid = -1; + write_sigio_td = NULL; write_sigio_fds[0] = -1; write_sigio_fds[1] = -1; sigio_private[0] = -1; @@ -394,12 +389,11 @@ void maybe_sigio_broken(int fd) static void sigio_cleanup(void) { - if (write_sigio_pid == -1) + if (!write_sigio_td) return; - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); - write_sigio_pid = -1; + os_kill_helper_thread(write_sigio_td); + write_sigio_td = NULL; } __uml_exitcall(sigio_cleanup); From 69f52573c24de9d2919f83e3b3b396a09118b7c4 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 19 Mar 2025 21:55:23 +0800 Subject: [PATCH 1435/2157] um: Prohibit the VM_CLONE flag in run_helper_thread() Directly creating helper threads with VM_CLONE using clone can compromise the thread safety of errno. Since all these helper threads have been converted to use os_run_helper_thread(), let's prevent using this flag in run_helper_thread(). Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250319135523.97050-5-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/os-Linux/helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index df22cba24d82..89c2ad2a4e3a 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -122,6 +122,10 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; + /* To share memory space, use os_run_helper_thread() instead. */ + if (flags & CLONE_VM) + return -EINVAL; + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; From 33c9da5dfb18c2ff5a88d01aca2cf253cd0ac3bc Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Sun, 16 Mar 2025 00:19:08 +0800 Subject: [PATCH 1436/2157] um: Rewrite the sigio workaround based on epoll and tgkill The existing sigio workaround implementation removes FDs from the poll when events are triggered, requiring users to re-add them via add_sigio_fd() after processing. This introduces a potential race condition between FD removal in write_sigio_thread() and next_poll update in __add_sigio_fd(), and is inefficient due to frequent FD removal and re-addition. Rewrite the implementation based on epoll and tgkill for improved efficiency and reliability. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20250315161910.4082396-2-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg --- arch/um/drivers/random.c | 2 +- arch/um/drivers/rtc_user.c | 2 +- arch/um/include/shared/os.h | 2 +- arch/um/include/shared/sigio.h | 1 - arch/um/kernel/sigio.c | 26 --- arch/um/os-Linux/sigio.c | 332 +++++---------------------------- 6 files changed, 48 insertions(+), 317 deletions(-) diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index da985e0dc69a..ca08c91f47a3 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -79,7 +79,7 @@ static int __init rng_init (void) if (err < 0) goto err_out_cleanup_hw; - sigio_broken(random_fd); + sigio_broken(); hwrng.name = RNG_MODULE_NAME; hwrng.read = rng_dev_read; diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c index 7c3cec4c68cf..51e79f3148cd 100644 --- a/arch/um/drivers/rtc_user.c +++ b/arch/um/drivers/rtc_user.c @@ -39,7 +39,7 @@ int uml_rtc_start(bool timetravel) } /* apparently timerfd won't send SIGIO, use workaround */ - sigio_broken(uml_rtc_irq_fds[0]); + sigio_broken(); err = add_sigio_fd(uml_rtc_irq_fds[0]); if (err < 0) { close(uml_rtc_irq_fds[0]); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index d0ae42911cb5..152a60080d5b 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -314,7 +314,7 @@ extern void um_irqs_resume(void); extern int add_sigio_fd(int fd); extern int ignore_sigio_fd(int fd); extern void maybe_sigio_broken(int fd); -extern void sigio_broken(int fd); +extern void sigio_broken(void); /* * unlocked versions for IRQ controller code. * diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h index e60c8b227844..c6c2edce1f6d 100644 --- a/arch/um/include/shared/sigio.h +++ b/arch/um/include/shared/sigio.h @@ -6,7 +6,6 @@ #ifndef __SIGIO_H__ #define __SIGIO_H__ -extern int write_sigio_irq(int fd); extern void sigio_lock(void); extern void sigio_unlock(void); diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 5085a50c3b8c..4fc04742048a 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -8,32 +8,6 @@ #include #include -/* Protected by sigio_lock() called from write_sigio_workaround */ -static int sigio_irq_fd = -1; - -static irqreturn_t sigio_interrupt(int irq, void *data) -{ - char c; - - os_read_file(sigio_irq_fd, &c, sizeof(c)); - return IRQ_HANDLED; -} - -int write_sigio_irq(int fd) -{ - int err; - - err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, - 0, "write sigio", NULL); - if (err < 0) { - printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " - "err = %d\n", err); - return -1; - } - sigio_irq_fd = fd; - return 0; -} - /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ static DEFINE_MUTEX(sigio_mutex); diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 61b348a2ea97..a05a6ecee756 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -23,180 +24,49 @@ */ static struct os_helper_thread *write_sigio_td; -/* - * These arrays are initialized before the sigio thread is started, and - * the descriptors closed after it is killed. So, it can't see them change. - * On the UML side, they are changed under the sigio_lock. - */ -#define SIGIO_FDS_INIT {-1, -1} +static int epollfd = -1; -static int write_sigio_fds[2] = SIGIO_FDS_INIT; -static int sigio_private[2] = SIGIO_FDS_INIT; +#define MAX_EPOLL_EVENTS 64 -struct pollfds { - struct pollfd *poll; - int size; - int used; -}; - -/* - * Protected by sigio_lock(). Used by the sigio thread, but the UML thread - * synchronizes with it. - */ -static struct pollfds current_poll; -static struct pollfds next_poll; -static struct pollfds all_sigio_fds; +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; static void *write_sigio_thread(void *unused) { - struct pollfds *fds, tmp; - struct pollfd *p; - int i, n, respond_fd; - char c; + int pid = getpid(); + int r; os_fix_helper_thread_signals(); - fds = ¤t_poll; while (1) { - n = poll(fds->poll, fds->used, -1); - if (n < 0) { + r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1); + if (r < 0) { if (errno == EINTR) continue; - printk(UM_KERN_ERR "write_sigio_thread : poll returned " - "%d, errno = %d\n", n, errno); + printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n", + __func__, errno); } - for (i = 0; i < fds->used; i++) { - p = &fds->poll[i]; - if (p->revents == 0) - continue; - if (p->fd == sigio_private[1]) { - CATCH_EINTR(n = read(sigio_private[1], &c, - sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR - "write_sigio_thread : " - "read on socket failed, " - "err = %d\n", errno); - tmp = current_poll; - current_poll = next_poll; - next_poll = tmp; - respond_fd = sigio_private[1]; - } - else { - respond_fd = write_sigio_fds[1]; - fds->used--; - memmove(&fds->poll[i], &fds->poll[i + 1], - (fds->used - i) * sizeof(*fds->poll)); - } - CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR "write_sigio_thread : " - "write on socket failed, err = %d\n", - errno); - } + CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + if (r < 0) + printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", + __func__, errno); } return NULL; } -static int need_poll(struct pollfds *polls, int n) -{ - struct pollfd *new; - - if (n <= polls->size) - return 0; - - new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); - if (new == NULL) { - printk(UM_KERN_ERR "need_poll : failed to allocate new " - "pollfds\n"); - return -ENOMEM; - } - - memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); - kfree(polls->poll); - - polls->poll = new; - polls->size = n; - return 0; -} - -/* - * Must be called with sigio_lock held, because it's needed by the marked - * critical section. - */ -static void update_thread(void) -{ - unsigned long flags; - int n; - char c; - - flags = um_set_signals_trace(0); - CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", - errno); - goto fail; - } - - CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", - errno); - goto fail; - } - - um_set_signals_trace(flags); - return; - fail: - /* Critical section start */ - if (write_sigio_td) { - os_kill_helper_thread(write_sigio_td); - write_sigio_td = NULL; - } - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); - /* Critical section end */ - um_set_signals_trace(flags); -} - int __add_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n; + struct epoll_event event = { + .data.fd = fd, + .events = EPOLLIN | EPOLLET, + }; + int r; - for (i = 0; i < all_sigio_fds.used; i++) { - if (all_sigio_fds.poll[i].fd == fd) - break; - } - if (i == all_sigio_fds.used) - return -ENOSPC; - - p = &all_sigio_fds.poll[i]; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - return 0; - } - - n = current_poll.used; - err = need_poll(&next_poll, n + 1); - if (err) - return err; - - memcpy(next_poll.poll, current_poll.poll, - current_poll.used * sizeof(struct pollfd)); - next_poll.poll[n] = *p; - next_poll.used = n + 1; - update_thread(); - - return 0; + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event)); + return r < 0 ? -errno : 0; } - int add_sigio_fd(int fd) { int err; @@ -210,38 +80,11 @@ int add_sigio_fd(int fd) int __ignore_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n = 0; + struct epoll_event event; + int r; - /* - * This is called from exitcalls elsewhere in UML - if - * sigio_cleanup has already run, then update_thread will hang - * or fail because the thread is no longer running. - */ - if (!write_sigio_td) - return -EIO; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - break; - } - if (i == current_poll.used) - return -ENOENT; - - err = need_poll(&next_poll, current_poll.used - 1); - if (err) - return err; - - for (i = 0; i < current_poll.used; i++) { - p = ¤t_poll.poll[i]; - if (p->fd != fd) - next_poll.poll[n++] = *p; - } - next_poll.used = current_poll.used - 1; - - update_thread(); - - return 0; + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event)); + return r < 0 ? -errno : 0; } int ignore_sigio_fd(int fd) @@ -255,124 +98,39 @@ int ignore_sigio_fd(int fd) return err; } -static struct pollfd *setup_initial_poll(int fd) -{ - struct pollfd *p; - - p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); - if (p == NULL) { - printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " - "poll\n"); - return NULL; - } - *p = ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); - return p; -} - static void write_sigio_workaround(void) { - struct pollfd *p; int err; - int l_write_sigio_fds[2]; - int l_sigio_private[2]; - struct os_helper_thread *l_write_sigio_td; - - /* We call this *tons* of times - and most ones we must just fail. */ - sigio_lock(); - l_write_sigio_td = write_sigio_td; - sigio_unlock(); - - if (l_write_sigio_td) - return; - - err = os_pipe(l_write_sigio_fds, 1, 1); - if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " - "err = %d\n", -err); - return; - } - err = os_pipe(l_sigio_private, 1, 1); - if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " - "err = %d\n", -err); - goto out_close1; - } - - p = setup_initial_poll(l_sigio_private[1]); - if (!p) - goto out_close2; sigio_lock(); - - /* - * Did we race? Don't try to optimize this, please, it's not so likely - * to happen, and no more than once at the boot. - */ if (write_sigio_td) - goto out_free; + goto out; - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - - if (write_sigio_irq(l_write_sigio_fds[0])) - goto out_clear_poll; - - memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); - memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - - err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); - if (err < 0) - goto out_clear; - - sigio_unlock(); - return; - -out_clear: - write_sigio_td = NULL; - write_sigio_fds[0] = -1; - write_sigio_fds[1] = -1; - sigio_private[0] = -1; - sigio_private[1] = -1; -out_clear_poll: - current_poll = ((struct pollfds) { .poll = NULL, - .size = 0, - .used = 0 }); -out_free: - sigio_unlock(); - kfree(p); -out_close2: - close(l_sigio_private[0]); - close(l_sigio_private[1]); -out_close1: - close(l_write_sigio_fds[0]); - close(l_write_sigio_fds[1]); -} - -void sigio_broken(int fd) -{ - int err; - - write_sigio_workaround(); - - sigio_lock(); - err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); - if (err) { - printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " - "for descriptor %d\n", fd); + epollfd = epoll_create(MAX_EPOLL_EVENTS); + if (epollfd < 0) { + printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n", + __func__, errno); + goto out; + } + + err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); + if (err < 0) { + printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n", + __func__, -err); + close(epollfd); + epollfd = -1; goto out; } - all_sigio_fds.poll[all_sigio_fds.used++] = - ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); out: sigio_unlock(); } +void sigio_broken(void) +{ + write_sigio_workaround(); +} + /* Changed during early boot */ static int pty_output_sigio; @@ -384,7 +142,7 @@ void maybe_sigio_broken(int fd) if (pty_output_sigio) return; - sigio_broken(fd); + sigio_broken(); } static void sigio_cleanup(void) From afa8a93932aa63b107d81bd438454760d8c7c8a3 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 19 Mar 2025 11:35:19 -0700 Subject: [PATCH 1437/2157] riscv: Move nop definition to insn-def.h We have duplicated the definition of the nop instruction in ftrace.h and in jump_label.c. Move this definition into the generic file insn-def.h so that they can share the definition with each other and with future files. Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250319-runtime_const_riscv-v10-1-745b31a11d65@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/ftrace.h | 1 - arch/riscv/include/asm/insn-def.h | 3 +++ arch/riscv/kernel/ftrace.c | 6 +++--- arch/riscv/kernel/jump_label.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c4721ce44ca4..b7f361a50f64 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -79,7 +79,6 @@ struct dyn_arch_ftrace { #define AUIPC_RA (0x00000097) #define JALR_T0 (0x000282e7) #define AUIPC_T0 (0x00000297) -#define NOP4 (0x00000013) #define to_jalr_t0(offset) \ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 9a913010cdd9..71060a2f838e 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -199,5 +199,8 @@ #define RISCV_PAUSE ".4byte 0x100000f" #define ZAWRS_WRS_NTO ".4byte 0x00d00073" #define ZAWRS_WRS_STO ".4byte 0x01d00073" +#define RISCV_NOP4 ".4byte 0x00000013" + +#define RISCV_INSN_NOP4 _AC(0x00000013, U) #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 3524db5e4fa0..674dcdfae7a1 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -36,7 +36,7 @@ static int ftrace_check_current_call(unsigned long hook_pos, unsigned int *expected) { unsigned int replaced[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; /* we expect nops at the hook position */ if (!expected) @@ -68,7 +68,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, bool enable, bool ra) { unsigned int call[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (ra) make_call_ra(hook_pos, target, call); @@ -97,7 +97,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 654ed159c830..b4c1a6a3fbd2 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -11,8 +11,8 @@ #include #include #include +#include -#define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU bool arch_jump_label_transform_queue(struct jump_entry *entry, @@ -33,7 +33,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | (((u32)offset & GENMASK(20, 20)) << (31 - 20)); } else { - insn = RISCV_INSN_NOP; + insn = RISCV_INSN_NOP4; } if (early_boot_irqs_disabled) { From a44fb5722199de8338d991db5ad3d509192179bb Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 19 Mar 2025 11:35:20 -0700 Subject: [PATCH 1438/2157] riscv: Add runtime constant support Implement the runtime constant infrastructure for riscv. Use this infrastructure to generate constants to be used by the d_hash() function. This is the riscv variant of commit 94a2bc0f611c ("arm64: add 'runtime constant' support") and commit e3c92e81711d ("runtime constants: add x86 architecture support"). [ alex: Remove trailing whitespace ] Signed-off-by: Charlie Jenkins Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250319-runtime_const_riscv-v10-2-745b31a11d65@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 22 ++ arch/riscv/include/asm/asm.h | 1 + arch/riscv/include/asm/runtime-const.h | 265 +++++++++++++++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 3 + 4 files changed, 291 insertions(+) create mode 100644 arch/riscv/include/asm/runtime-const.h diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..c123f7c0579c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -783,6 +783,28 @@ config RISCV_ISA_ZBC If you don't know what to do here, say Y. +config TOOLCHAIN_HAS_ZBKB + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbkb) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbkb) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZBKB + bool "Zbkb extension support for bit manipulation instructions" + depends on TOOLCHAIN_HAS_ZBKB + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZBKB + extension (bit manipulation for cryptography) and enable its usage. + + The Zbkb extension provides instructions to accelerate a number + of common cryptography operations (pack, zip, etc). + + If you don't know what to do here, say Y. + config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" depends on MMU diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 776354895b81..a8a2af6dfe9d 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -27,6 +27,7 @@ #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) +#define SRLI __REG_SEL(srliw, srli) #if __SIZEOF_POINTER__ == 8 #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h new file mode 100644 index 000000000000..ea2e49c7149c --- /dev/null +++ b/arch/riscv/include/asm/runtime-const.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_RUNTIME_CONST_H +#define _ASM_RISCV_RUNTIME_CONST_H + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_32BIT +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "addi %[__ret],%[__ret],-0x211\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret)); \ + __ret; \ +}) +#else +/* + * Loading 64-bit constants into a register from immediates is a non-trivial + * task on riscv64. To get it somewhat performant, load 32 bits into two + * different registers and then combine the results. + * + * If the processor supports the Zbkb extension, we can combine the final + * "slli,slli,srli,add" into the single "pack" instruction. If the processor + * doesn't support Zbkb but does support the Zbb extension, we can + * combine the final "slli,srli,add" into one instruction "add.uw". + */ +#define RISCV_RUNTIME_CONST_64_PREAMBLE \ + ".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "lui %[__tmp],0x1234\n\t" \ + "addiw %[__ret],%[__ret],-0x211\n\t" \ + "addiw %[__tmp],%[__tmp],0x567\n\t" \ + +#define RISCV_RUNTIME_CONST_64_BASE \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "slli %[__ret],%[__ret],32\n\t" \ + "srli %[__ret],%[__ret],32\n\t" \ + "add %[__ret],%[__ret],%[__tmp]\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBA \ + ".option push\n\t" \ + ".option arch,+zba\n\t" \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBKB \ + ".option push\n\t" \ + ".option arch,+zbkb\n\t" \ + "pack %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + +#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE_2( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBA) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#else +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + RISCV_RUNTIME_CONST_64_BASE \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#endif +#endif + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + u32 __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + SRLI " %[__ret],%[__val],12\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret) \ + : [__val] "r" (val)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + /* On riscv there are currently only cache-wide flushes so va is ignored. */ + __always_unused uintptr_t va = (uintptr_t)where; + + flush_icache_range(va, va + 4 * insns); +} + +/* + * The 32-bit immediate is stored in a lui+addi pairing. + * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. + * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. + */ +static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) +{ + unsigned int lower_immediate, upper_immediate; + u32 lui_insn, addi_insn, addi_insn_mask; + __le32 lui_res, addi_res; + + /* Mask out upper 12 bit of addi */ + addi_insn_mask = 0x000fffff; + + lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; + addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; + + lower_immediate = sign_extend32(val, 11); + upper_immediate = (val - lower_immediate); + + if (upper_immediate & 0xfffff000) { + /* replace upper 20 bits of lui with upper immediate */ + lui_insn &= 0x00000fff; + lui_insn |= upper_immediate & 0xfffff000; + } else { + /* replace lui with nop if immediate is small enough to fit in addi */ + lui_insn = RISCV_INSN_NOP4; + /* + * lui is being skipped, so do a load instead of an add. A load + * is performed by adding with the x0 register. Setting rs to + * zero with the following mask will accomplish this goal. + */ + addi_insn_mask &= 0x07fff; + } + + if (lower_immediate & 0x00000fff) { + /* replace upper 12 bits of addi with lower 12 bits of val */ + addi_insn &= addi_insn_mask; + addi_insn |= (lower_immediate & 0x00000fff) << 20; + } else { + /* replace addi with nop if lower_immediate is empty */ + addi_insn = RISCV_INSN_NOP4; + } + + addi_res = cpu_to_le32(addi_insn); + lui_res = cpu_to_le32(lui_insn); + mutex_lock(&text_mutex); + patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); + patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); + mutex_unlock(&text_mutex); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ +#ifdef CONFIG_32BIT + __runtime_fixup_32(where, where + 4, val); + __runtime_fixup_caches(where, 2); +#else + __runtime_fixup_32(where, where + 8, val); + __runtime_fixup_32(where + 4, where + 12, val >> 32); + __runtime_fixup_caches(where, 4); +#endif +} + +/* + * Replace the least significant 5 bits of the srli/srliw immediate that is + * located at bits 20-24 + */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le16 *parcel = where; + __le32 res; + u32 insn; + + insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= 0xfe0fffff; + insn |= (val & 0b11111) << 20; + + res = cpu_to_le32(insn); + mutex_lock(&text_mutex); + patch_text_nosync(where, &res, sizeof(insn)); + mutex_unlock(&text_mutex); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_RISCV_RUNTIME_CONST_H */ diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 002ca58dd998..61bd5ba6680a 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -97,6 +97,9 @@ SECTIONS { EXIT_DATA } + + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(L1_CACHE_BYTES) .rel.dyn : { From a239c6e91b665f1837cf57b97fe638ef1baf2e78 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sat, 22 Feb 2025 17:58:17 +0100 Subject: [PATCH 1439/2157] staging: gpib: Fix Oops after disconnect in ni_usb If the usb dongle is disconnected subsequent calls to the driver cause a NULL dereference Oops as the bus_interface is set to NULL on disconnect. This problem was introduced by setting usb_dev from the bus_interface for dev_xxx messages. Previously bus_interface was checked for NULL only in the the functions directly calling usb_fill_bulk_urb or usb_control_msg. Check for valid bus_interface on all interface entry points and return -ENODEV if it is NULL. Fixes: 4934b98bb243 ("staging: gpib: Update messaging and usb_device refs in ni_usb") Cc: stable Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250222165817.12856-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 93 ++++++++++++++++++----- 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 61b15b19e134..62fbc78204ce 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -591,7 +591,7 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, { int retval, parse_retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x20; int in_data_length; @@ -604,8 +604,11 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, struct ni_usb_register reg; *bytes_read = 0; + if (!ni_priv->bus_interface) + return -ENODEV; if (length > max_read_length) return -EINVAL; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -718,7 +721,7 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; int out_data_length; static const int in_data_length = 0x10; @@ -728,9 +731,11 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, struct ni_usb_status_block status; static const int max_write_length = 0xffff; - *bytes_written = 0; + if (!ni_priv->bus_interface) + return -ENODEV; if (length > max_write_length) return -EINVAL; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data_length = length + 0x10; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -819,7 +824,7 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; int out_data_length; static const int in_data_length = 0x10; @@ -831,8 +836,11 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len static const int max_command_length = 0x10; *command_bytes_written = 0; + if (!ni_priv->bus_interface) + return -ENODEV; if (length > max_command_length) length = max_command_length; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data_length = length + 0x10; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -925,7 +933,7 @@ static int ni_usb_take_control(gpib_board_t *board, int synchronous) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x10; static const int in_data_length = 0x10; @@ -933,6 +941,9 @@ static int ni_usb_take_control(gpib_board_t *board, int synchronous) int i = 0; struct ni_usb_status_block status; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -983,7 +994,7 @@ static int ni_usb_go_to_standby(gpib_board_t *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x10; static const int in_data_length = 0x20; @@ -991,6 +1002,9 @@ static int ni_usb_go_to_standby(gpib_board_t *board) int i = 0; struct ni_usb_status_block status; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -1039,11 +1053,14 @@ static void ni_usb_request_system_control(gpib_board_t *board, int request_contr { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[4]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); if (request_control) { writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = CMDR; @@ -1087,7 +1104,7 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x10; static const int in_data_length = 0x10; @@ -1095,7 +1112,10 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) int i = 0; struct ni_usb_status_block status; - // FIXME: we are going to pulse when assert is true, and ignore otherwise + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); +// FIXME: we are going to pulse when assert is true, and ignore otherwise if (assert == 0) return; out_data = kmalloc(out_data_length, GFP_KERNEL); @@ -1133,10 +1153,13 @@ static void ni_usb_remote_enable(gpib_board_t *board, int enable) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; struct ni_usb_register reg; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); reg.device = NIUSB_SUBDEV_TNT4882; reg.address = nec7210_to_tnt4882_offset(AUXMR); if (enable) @@ -1180,11 +1203,14 @@ static unsigned int ni_usb_update_status(gpib_board_t *board, unsigned int clear { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; static const int buffer_length = 8; u8 *buffer; struct ni_usb_status_block status; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); buffer = kmalloc(buffer_length, GFP_KERNEL); if (!buffer) return board->status; @@ -1232,11 +1258,14 @@ static int ni_usb_primary_address(gpib_board_t *board, unsigned int address) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[2]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = nec7210_to_tnt4882_offset(ADR); writes[i].value = address; @@ -1287,11 +1316,14 @@ static int ni_usb_secondary_address(gpib_board_t *board, unsigned int address, i { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[3]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); i += ni_usb_write_sad(writes, address, enable); retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { @@ -1306,7 +1338,7 @@ static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x10; static const int in_data_length = 0x20; @@ -1315,6 +1347,9 @@ static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) int j = 0; struct ni_usb_status_block status; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -1358,11 +1393,14 @@ static void ni_usb_parallel_poll_configure(gpib_board_t *board, uint8_t config) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[1]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = nec7210_to_tnt4882_offset(AUXMR); writes[i].value = PPR | config; @@ -1380,11 +1418,14 @@ static void ni_usb_parallel_poll_response(gpib_board_t *board, int ist) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[1]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = nec7210_to_tnt4882_offset(AUXMR); if (ist) @@ -1405,11 +1446,14 @@ static void ni_usb_serial_poll_response(gpib_board_t *board, u8 status) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[1]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = nec7210_to_tnt4882_offset(SPMR); writes[i].value = status; @@ -1432,11 +1476,14 @@ static void ni_usb_return_to_local(gpib_board_t *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; int i = 0; struct ni_usb_register writes[1]; unsigned int ibsta; + if (!ni_priv->bus_interface) + return; // -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); writes[i].device = NIUSB_SUBDEV_TNT4882; writes[i].address = nec7210_to_tnt4882_offset(AUXMR); writes[i].value = AUX_RTL; @@ -1454,7 +1501,7 @@ static int ni_usb_line_status(const gpib_board_t *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; static const int out_data_length = 0x20; static const int in_data_length = 0x20; @@ -1464,6 +1511,9 @@ static int ni_usb_line_status(const gpib_board_t *board) int line_status = ValidALL; // NI windows driver reads 0xd(HSSEL), 0xc (ARD0), 0x1f (BSR) + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) return -ENOMEM; @@ -1570,12 +1620,15 @@ static unsigned int ni_usb_t1_delay(gpib_board_t *board, unsigned int nano_sec) { int retval; struct ni_usb_priv *ni_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); + struct usb_device *usb_dev; struct ni_usb_register writes[3]; unsigned int ibsta; unsigned int actual_ns; int i; + if (!ni_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(ni_priv->bus_interface); i = ni_usb_setup_t1_delay(writes, nano_sec, &actual_ns); retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { From 8491e73a5223acb0a4b4d78c3f8b96aa9c5e774d Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sat, 22 Feb 2025 21:45:15 +0100 Subject: [PATCH 1440/2157] staging: gpib: Fix Oops after disconnect in agilent usb If the agilent usb dongle is disconnected subsequent calls to the driver cause a NULL dereference Oops as the bus_interface is set to NULL on disconnect. This problem was introduced by setting usb_dev from the bus_interface for dev_xxx messages. Previously bus_interface was checked for NULL only in the functions directly calling usb_fill_bulk_urb or usb_control_msg. Check for valid bus_interface on all interface entry points and return -ENODEV if it is NULL. Fixes: fbae7090f30c ("staging: gpib: Update messaging and usb_device refs in agilent_usb") Cc: stable Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250222204515.5104-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82357a/agilent_82357a.c | 65 ++++++++++++++++--- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 7ebebe00dc48..e0d36f0dff25 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -427,7 +427,7 @@ static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t leng { int retval; struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data, *in_data; int out_data_length, in_data_length; int bytes_written, bytes_read; @@ -438,6 +438,10 @@ static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t leng *nbytes = 0; *end = 0; + + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); out_data_length = 0x9; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -534,7 +538,7 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer { int retval; struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; u8 *out_data = NULL; u8 *status_data = NULL; int out_data_length; @@ -545,6 +549,10 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer struct agilent_82357a_register_pairlet read_reg; *bytes_written = 0; + if (!a_priv->bus_interface) + return -ENODEV; + + usb_dev = interface_to_usbdev(a_priv->bus_interface); out_data_length = length + 0x8; out_data = kmalloc(out_data_length, GFP_KERNEL); if (!out_data) @@ -697,9 +705,13 @@ int agilent_82357a_take_control_internal(gpib_board_t *board, int synchronous) static int agilent_82357a_take_control(gpib_board_t *board, int synchronous) { + struct agilent_82357a_priv *a_priv = board->private_data; const int timeout = 10; int i; + if (!a_priv->bus_interface) + return -ENODEV; + /* It looks like the 9914 does not handle tcs properly. * See comment above tms9914_take_control_workaround() in * drivers/gpib/tms9914/tms9914_aux.c @@ -723,10 +735,14 @@ static int agilent_82357a_take_control(gpib_board_t *board, int synchronous) static int agilent_82357a_go_to_standby(gpib_board_t *board) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet write; int retval; + if (!a_priv->bus_interface) + return -ENODEV; + + usb_dev = interface_to_usbdev(a_priv->bus_interface); write.address = AUXCR; write.value = AUX_GTS; retval = agilent_82357a_write_registers(a_priv, &write, 1); @@ -739,11 +755,15 @@ static int agilent_82357a_go_to_standby(gpib_board_t *board) static void agilent_82357a_request_system_control(gpib_board_t *board, int request_control) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet writes[2]; int retval; int i = 0; + if (!a_priv->bus_interface) + return; // -ENODEV; + + usb_dev = interface_to_usbdev(a_priv->bus_interface); /* 82357B needs bit to be set in 9914 AUXCR register */ writes[i].address = AUXCR; if (request_control) { @@ -767,10 +787,14 @@ static void agilent_82357a_request_system_control(gpib_board_t *board, int reque static void agilent_82357a_interface_clear(gpib_board_t *board, int assert) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet write; int retval; + if (!a_priv->bus_interface) + return; // -ENODEV; + + usb_dev = interface_to_usbdev(a_priv->bus_interface); write.address = AUXCR; write.value = AUX_SIC; if (assert) { @@ -785,10 +809,14 @@ static void agilent_82357a_interface_clear(gpib_board_t *board, int assert) static void agilent_82357a_remote_enable(gpib_board_t *board, int enable) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet write; int retval; + if (!a_priv->bus_interface) + return; //-ENODEV; + + usb_dev = interface_to_usbdev(a_priv->bus_interface); write.address = AUXCR; write.value = AUX_SRE; if (enable) @@ -804,6 +832,8 @@ static int agilent_82357a_enable_eos(gpib_board_t *board, uint8_t eos_byte, int { struct agilent_82357a_priv *a_priv = board->private_data; + if (!a_priv->bus_interface) + return -ENODEV; if (compare_8_bits == 0) return -EOPNOTSUPP; @@ -822,10 +852,13 @@ static void agilent_82357a_disable_eos(gpib_board_t *board) static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned int clear_mask) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet address_status, bus_status; int retval; + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); board->status &= ~clear_mask; if (a_priv->is_cic) set_bit(CIC_NUM, &board->status); @@ -885,6 +918,9 @@ static int agilent_82357a_primary_address(gpib_board_t *board, unsigned int addr struct agilent_82357a_register_pairlet write; int retval; + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); // put primary address in address0 write.address = ADR; write.value = address & ADDRESS_MASK; @@ -906,11 +942,14 @@ static int agilent_82357a_secondary_address(gpib_board_t *board, unsigned int ad static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet writes[2]; struct agilent_82357a_register_pairlet read; int retval; + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); // execute parallel poll writes[0].address = AUXCR; writes[0].value = AUX_CS | AUX_RPP; @@ -975,11 +1014,14 @@ static void agilent_82357a_return_to_local(gpib_board_t *board) static int agilent_82357a_line_status(const gpib_board_t *board) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet bus_status; int retval; int status = ValidALL; + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); bus_status.address = BSR; retval = agilent_82357a_read_registers(a_priv, &bus_status, 1, 0); if (retval) { @@ -1025,10 +1067,13 @@ static unsigned short nanosec_to_fast_talker_bits(unsigned int *nanosec) static unsigned int agilent_82357a_t1_delay(gpib_board_t *board, unsigned int nanosec) { struct agilent_82357a_priv *a_priv = board->private_data; - struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); + struct usb_device *usb_dev; struct agilent_82357a_register_pairlet write; int retval; + if (!a_priv->bus_interface) + return -ENODEV; + usb_dev = interface_to_usbdev(a_priv->bus_interface); write.address = FAST_TALKER_T1; write.value = nanosec_to_fast_talker_bits(&nanosec); retval = agilent_82357a_write_registers(a_priv, &write, 1); From 82184cc11723f1a0c45d0829faed7723678ba0f4 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:18:26 +0000 Subject: [PATCH 1441/2157] staging: gpib: Correct CamelCase for BUS constants Adhere to Linux kernel coding style and remove duplicate enums. Reported by checkpatch CHECK: Avoid CamelCase Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319211827.9854-2-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- .../staging/gpib/agilent_82357a/agilent_82357a.c | 16 ++++++++-------- drivers/staging/gpib/cb7210/cb7210.c | 16 ++++++++-------- drivers/staging/gpib/common/iblib.c | 4 ++-- drivers/staging/gpib/eastwood/fluke_gpib.c | 16 ++++++++-------- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 16 ++++++++-------- drivers/staging/gpib/gpio/gpib_bitbang.c | 16 ++++++++-------- drivers/staging/gpib/hp_82341/hp_82341.c | 2 +- drivers/staging/gpib/ines/ines_gpib.c | 16 ++++++++-------- .../staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 16 ++++++++-------- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 16 ++++++++-------- drivers/staging/gpib/tms9914/tms9914.c | 16 ++++++++-------- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 16 ++++++++-------- drivers/staging/gpib/uapi/gpib_user.h | 11 ----------- 13 files changed, 83 insertions(+), 94 deletions(-) diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index e0d36f0dff25..17f41817587b 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -1030,21 +1030,21 @@ static int agilent_82357a_line_status(const gpib_board_t *board) return retval; } if (bus_status.value & BSR_REN_BIT) - status |= BusREN; + status |= BUS_REN; if (bus_status.value & BSR_IFC_BIT) - status |= BusIFC; + status |= BUS_IFC; if (bus_status.value & BSR_SRQ_BIT) - status |= BusSRQ; + status |= BUS_SRQ; if (bus_status.value & BSR_EOI_BIT) - status |= BusEOI; + status |= BUS_EOI; if (bus_status.value & BSR_NRFD_BIT) - status |= BusNRFD; + status |= BUS_NRFD; if (bus_status.value & BSR_NDAC_BIT) - status |= BusNDAC; + status |= BUS_NDAC; if (bus_status.value & BSR_DAV_BIT) - status |= BusDAV; + status |= BUS_DAV; if (bus_status.value & BSR_ATN_BIT) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 19dfd8b4a8e7..ba02dce62dd9 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -388,21 +388,21 @@ static int cb7210_line_status(const gpib_board_t *board) bsr_bits = cb7210_paged_read_byte(cb_priv, BUS_STATUS, BUS_STATUS_PAGE); if ((bsr_bits & BSR_REN_BIT) == 0) - status |= BusREN; + status |= BUS_REN; if ((bsr_bits & BSR_IFC_BIT) == 0) - status |= BusIFC; + status |= BUS_IFC; if ((bsr_bits & BSR_SRQ_BIT) == 0) - status |= BusSRQ; + status |= BUS_SRQ; if ((bsr_bits & BSR_EOI_BIT) == 0) - status |= BusEOI; + status |= BUS_EOI; if ((bsr_bits & BSR_NRFD_BIT) == 0) - status |= BusNRFD; + status |= BUS_NRFD; if ((bsr_bits & BSR_NDAC_BIT) == 0) - status |= BusNDAC; + status |= BUS_NDAC; if ((bsr_bits & BSR_DAV_BIT) == 0) - status |= BusDAV; + status |= BUS_DAV; if ((bsr_bits & BSR_ATN_BIT) == 0) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c index fd2874c2fff4..1caa57b41596 100644 --- a/drivers/staging/gpib/common/iblib.c +++ b/drivers/staging/gpib/common/iblib.c @@ -77,7 +77,7 @@ static int check_for_command_acceptors(gpib_board_t *board) return lines; if ((lines & ValidNRFD) && (lines & ValidNDAC)) { - if ((lines & BusNRFD) == 0 && (lines & BusNDAC) == 0) + if ((lines & BUS_NRFD) == 0 && (lines & BUS_NDAC) == 0) return -ENOTCONN; } @@ -521,7 +521,7 @@ int general_ibstatus(gpib_board_t *board, const gpib_status_queue_t *device, /* get real SRQI status if we can */ if (iblines(board, &line_status) == 0) { if ((line_status & ValidSRQ)) { - if ((line_status & BusSRQ)) + if ((line_status & BUS_SRQ)) status |= SRQI; else status &= ~SRQI; diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index 731732bd8301..4e04acfdb48d 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -204,21 +204,21 @@ static int fluke_line_status(const gpib_board_t *board) bsr_bits = fluke_paged_read_byte(e_priv, BUS_STATUS, BUS_STATUS_PAGE); if ((bsr_bits & BSR_REN_BIT) == 0) - status |= BusREN; + status |= BUS_REN; if ((bsr_bits & BSR_IFC_BIT) == 0) - status |= BusIFC; + status |= BUS_IFC; if ((bsr_bits & BSR_SRQ_BIT) == 0) - status |= BusSRQ; + status |= BUS_SRQ; if ((bsr_bits & BSR_EOI_BIT) == 0) - status |= BusEOI; + status |= BUS_EOI; if ((bsr_bits & BSR_NRFD_BIT) == 0) - status |= BusNRFD; + status |= BUS_NRFD; if ((bsr_bits & BSR_NDAC_BIT) == 0) - status |= BusNDAC; + status |= BUS_NDAC; if ((bsr_bits & BSR_DAV_BIT) == 0) - status |= BusDAV; + status |= BUS_DAV; if ((bsr_bits & BSR_ATN_BIT) == 0) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index d62c83368518..8fbfa7e285af 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -240,21 +240,21 @@ static int fmh_gpib_line_status(const gpib_board_t *board) bsr_bits = read_byte(nec_priv, BUS_STATUS_REG); if ((bsr_bits & BSR_REN_BIT) == 0) - status |= BusREN; + status |= BUS_REN; if ((bsr_bits & BSR_IFC_BIT) == 0) - status |= BusIFC; + status |= BUS_IFC; if ((bsr_bits & BSR_SRQ_BIT) == 0) - status |= BusSRQ; + status |= BUS_SRQ; if ((bsr_bits & BSR_EOI_BIT) == 0) - status |= BusEOI; + status |= BUS_EOI; if ((bsr_bits & BSR_NRFD_BIT) == 0) - status |= BusNRFD; + status |= BUS_NRFD; if ((bsr_bits & BSR_NDAC_BIT) == 0) - status |= BusNDAC; + status |= BUS_NDAC; if ((bsr_bits & BSR_DAV_BIT) == 0) - status |= BusDAV; + status |= BUS_DAV; if ((bsr_bits & BSR_ATN_BIT) == 0) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/gpio/gpib_bitbang.c b/drivers/staging/gpib/gpio/gpib_bitbang.c index 2012db188f58..004bf0e9bc88 100644 --- a/drivers/staging/gpib/gpio/gpib_bitbang.c +++ b/drivers/staging/gpib/gpio/gpib_bitbang.c @@ -1034,21 +1034,21 @@ static int bb_line_status(const gpib_board_t *board) int line_status = ValidALL; if (gpiod_get_value(REN) == 0) - line_status |= BusREN; + line_status |= BUS_REN; if (gpiod_get_value(IFC) == 0) - line_status |= BusIFC; + line_status |= BUS_IFC; if (gpiod_get_value(NDAC) == 0) - line_status |= BusNDAC; + line_status |= BUS_NDAC; if (gpiod_get_value(NRFD) == 0) - line_status |= BusNRFD; + line_status |= BUS_NRFD; if (gpiod_get_value(DAV) == 0) - line_status |= BusDAV; + line_status |= BUS_DAV; if (gpiod_get_value(EOI) == 0) - line_status |= BusEOI; + line_status |= BUS_EOI; if (gpiod_get_value(_ATN) == 0) - line_status |= BusATN; + line_status |= BUS_ATN; if (gpiod_get_value(SRQ) == 0) - line_status |= BusSRQ; + line_status |= BUS_SRQ; dbg_printk(2, "status lines: %4x\n", line_status); diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 91fbaf953bcd..b5d79ae519e7 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -158,7 +158,7 @@ static int restart_write_fifo(gpib_board_t *board, struct hp_82341_priv *hp_priv //restart doesn't work if data holdoff is in effect status = tms9914_line_status(board, tms_priv); - if ((status & BusNRFD) == 0) { + if ((status & BUS_NRFD) == 0) { outb(RESTART_STREAM_BIT, hp_priv->iobase[0] + STREAM_STATUS_REG); return 0; } diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index 56da6cd91188..c5c34845fc38 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -36,21 +36,21 @@ int ines_line_status(const gpib_board_t *board) bcm_bits = ines_inb(ines_priv, BUS_CONTROL_MONITOR); if (bcm_bits & BCM_REN_BIT) - status |= BusREN; + status |= BUS_REN; if (bcm_bits & BCM_IFC_BIT) - status |= BusIFC; + status |= BUS_IFC; if (bcm_bits & BCM_SRQ_BIT) - status |= BusSRQ; + status |= BUS_SRQ; if (bcm_bits & BCM_EOI_BIT) - status |= BusEOI; + status |= BUS_EOI; if (bcm_bits & BCM_NRFD_BIT) - status |= BusNRFD; + status |= BUS_NRFD; if (bcm_bits & BCM_NDAC_BIT) - status |= BusNDAC; + status |= BUS_NDAC; if (bcm_bits & BCM_DAV_BIT) - status |= BusDAV; + status |= BUS_DAV; if (bcm_bits & BCM_ATN_BIT) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 50faa0c17617..2775dc83305e 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -694,21 +694,21 @@ static int usb_gpib_line_status(const gpib_board_t *board) } if ((buffer & 0x01) == 0) - line_status |= BusREN; + line_status |= BUS_REN; if ((buffer & 0x02) == 0) - line_status |= BusIFC; + line_status |= BUS_IFC; if ((buffer & 0x04) == 0) - line_status |= BusNDAC; + line_status |= BUS_NDAC; if ((buffer & 0x08) == 0) - line_status |= BusNRFD; + line_status |= BUS_NRFD; if ((buffer & 0x10) == 0) - line_status |= BusDAV; + line_status |= BUS_DAV; if ((buffer & 0x20) == 0) - line_status |= BusEOI; + line_status |= BUS_EOI; if ((buffer & 0x40) == 0) - line_status |= BusATN; + line_status |= BUS_ATN; if ((buffer & 0x80) == 0) - line_status |= BusSRQ; + line_status |= BUS_SRQ; DIA_LOG(1, "done with %x %x\n", buffer, line_status); diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 62fbc78204ce..a788af70fa64 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -1561,21 +1561,21 @@ static int ni_usb_line_status(const gpib_board_t *board) ni_usb_parse_register_read_block(in_data, &bsr_bits, 1); kfree(in_data); if (bsr_bits & BCSR_REN_BIT) - line_status |= BusREN; + line_status |= BUS_REN; if (bsr_bits & BCSR_IFC_BIT) - line_status |= BusIFC; + line_status |= BUS_IFC; if (bsr_bits & BCSR_SRQ_BIT) - line_status |= BusSRQ; + line_status |= BUS_SRQ; if (bsr_bits & BCSR_EOI_BIT) - line_status |= BusEOI; + line_status |= BUS_EOI; if (bsr_bits & BCSR_NRFD_BIT) - line_status |= BusNRFD; + line_status |= BUS_NRFD; if (bsr_bits & BCSR_NDAC_BIT) - line_status |= BusNDAC; + line_status |= BUS_NDAC; if (bsr_bits & BCSR_DAV_BIT) - line_status |= BusDAV; + line_status |= BUS_DAV; if (bsr_bits & BCSR_ATN_BIT) - line_status |= BusATN; + line_status |= BUS_ATN; return line_status; } diff --git a/drivers/staging/gpib/tms9914/tms9914.c b/drivers/staging/gpib/tms9914/tms9914.c index 1f2bb163cfb5..17adeccf448b 100644 --- a/drivers/staging/gpib/tms9914/tms9914.c +++ b/drivers/staging/gpib/tms9914/tms9914.c @@ -396,21 +396,21 @@ int tms9914_line_status(const gpib_board_t *board, struct tms9914_priv *priv) bsr_bits = read_byte(priv, BSR); if (bsr_bits & BSR_REN_BIT) - status |= BusREN; + status |= BUS_REN; if (bsr_bits & BSR_IFC_BIT) - status |= BusIFC; + status |= BUS_IFC; if (bsr_bits & BSR_SRQ_BIT) - status |= BusSRQ; + status |= BUS_SRQ; if (bsr_bits & BSR_EOI_BIT) - status |= BusEOI; + status |= BUS_EOI; if (bsr_bits & BSR_NRFD_BIT) - status |= BusNRFD; + status |= BUS_NRFD; if (bsr_bits & BSR_NDAC_BIT) - status |= BusNDAC; + status |= BUS_NDAC; if (bsr_bits & BSR_DAV_BIT) - status |= BusDAV; + status |= BUS_DAV; if (bsr_bits & BSR_ATN_BIT) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index d32420dee5e5..76fd9422809c 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -159,21 +159,21 @@ static int tnt4882_line_status(const gpib_board_t *board) bcsr_bits = tnt_readb(tnt_priv, BSR); if (bcsr_bits & BCSR_REN_BIT) - status |= BusREN; + status |= BUS_REN; if (bcsr_bits & BCSR_IFC_BIT) - status |= BusIFC; + status |= BUS_IFC; if (bcsr_bits & BCSR_SRQ_BIT) - status |= BusSRQ; + status |= BUS_SRQ; if (bcsr_bits & BCSR_EOI_BIT) - status |= BusEOI; + status |= BUS_EOI; if (bcsr_bits & BCSR_NRFD_BIT) - status |= BusNRFD; + status |= BUS_NRFD; if (bcsr_bits & BCSR_NDAC_BIT) - status |= BusNDAC; + status |= BUS_NDAC; if (bcsr_bits & BCSR_DAV_BIT) - status |= BusDAV; + status |= BUS_DAV; if (bcsr_bits & BCSR_ATN_BIT) - status |= BusATN; + status |= BUS_ATN; return status; } diff --git a/drivers/staging/gpib/uapi/gpib_user.h b/drivers/staging/gpib/uapi/gpib_user.h index 0896a55a758f..2d742540630a 100644 --- a/drivers/staging/gpib/uapi/gpib_user.h +++ b/drivers/staging/gpib/uapi/gpib_user.h @@ -115,17 +115,6 @@ enum bus_control_line { ValidATN = 0x40, ValidEOI = 0x80, ValidALL = 0xff, - BusDAV = 0x0100, /* DAV line status bit */ - BusNDAC = 0x0200, /* NDAC line status bit */ - BusNRFD = 0x0400, /* NRFD line status bit */ - BusIFC = 0x0800, /* IFC line status bit */ - BusREN = 0x1000, /* REN line status bit */ - BusSRQ = 0x2000, /* SRQ line status bit */ - BusATN = 0x4000, /* ATN line status bit */ - BusEOI = 0x8000 /* EOI line status bit */ -}; - -enum old_bus_control_line { BUS_DAV = 0x0100, /* DAV line status bit */ BUS_NDAC = 0x0200, /* NDAC line status bit */ BUS_NRFD = 0x0400, /* NRFD line status bit */ From a8e233d0747fa0838a77280f79b8a744da96e56a Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:18:27 +0000 Subject: [PATCH 1442/2157] staging: gpib: Correct CamelCase for VALID enums Adhere to Linux kernel coding style. Reported by checkpatch CHECK: Avoid CamelCase Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319211827.9854-3-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82357a/agilent_82357a.c | 2 +- drivers/staging/gpib/cb7210/cb7210.c | 2 +- drivers/staging/gpib/common/iblib.c | 4 ++-- drivers/staging/gpib/eastwood/fluke_gpib.c | 2 +- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 2 +- drivers/staging/gpib/gpio/gpib_bitbang.c | 2 +- drivers/staging/gpib/ines/ines_gpib.c | 2 +- .../staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 2 +- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 2 +- drivers/staging/gpib/tms9914/tms9914.c | 2 +- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 2 +- drivers/staging/gpib/uapi/gpib_user.h | 18 +++++++++--------- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 17f41817587b..4dda899ff1b2 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -1017,7 +1017,7 @@ static int agilent_82357a_line_status(const gpib_board_t *board) struct usb_device *usb_dev; struct agilent_82357a_register_pairlet bus_status; int retval; - int status = ValidALL; + int status = VALID_ALL; if (!a_priv->bus_interface) return -ENODEV; diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index ba02dce62dd9..808e0a71dfd6 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -379,7 +379,7 @@ static int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t lengt static int cb7210_line_status(const gpib_board_t *board) { - int status = ValidALL; + int status = VALID_ALL; int bsr_bits; struct cb7210_priv *cb_priv; diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c index 1caa57b41596..92e792e12301 100644 --- a/drivers/staging/gpib/common/iblib.c +++ b/drivers/staging/gpib/common/iblib.c @@ -76,7 +76,7 @@ static int check_for_command_acceptors(gpib_board_t *board) if (lines < 0) return lines; - if ((lines & ValidNRFD) && (lines & ValidNDAC)) { + if ((lines & VALID_NRFD) && (lines & VALID_NDAC)) { if ((lines & BUS_NRFD) == 0 && (lines & BUS_NDAC) == 0) return -ENOTCONN; } @@ -520,7 +520,7 @@ int general_ibstatus(gpib_board_t *board, const gpib_status_queue_t *device, status &= ~TIMO; /* get real SRQI status if we can */ if (iblines(board, &line_status) == 0) { - if ((line_status & ValidSRQ)) { + if ((line_status & VALID_SRQ)) { if ((line_status & BUS_SRQ)) status |= SRQI; else diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index 4e04acfdb48d..bcddf3125b8c 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -195,7 +195,7 @@ static void fluke_return_to_local(gpib_board_t *board) static int fluke_line_status(const gpib_board_t *board) { - int status = ValidALL; + int status = VALID_ALL; int bsr_bits; struct fluke_priv *e_priv; diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index 8fbfa7e285af..3e8ad8aa332f 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -229,7 +229,7 @@ static void fmh_gpib_return_to_local(gpib_board_t *board) static int fmh_gpib_line_status(const gpib_board_t *board) { - int status = ValidALL; + int status = VALID_ALL; int bsr_bits; struct fmh_priv *e_priv; struct nec7210_priv *nec_priv; diff --git a/drivers/staging/gpib/gpio/gpib_bitbang.c b/drivers/staging/gpib/gpio/gpib_bitbang.c index 004bf0e9bc88..28c5fa9b81ab 100644 --- a/drivers/staging/gpib/gpio/gpib_bitbang.c +++ b/drivers/staging/gpib/gpio/gpib_bitbang.c @@ -1031,7 +1031,7 @@ static void bb_return_to_local(gpib_board_t *board) static int bb_line_status(const gpib_board_t *board) { - int line_status = ValidALL; + int line_status = VALID_ALL; if (gpiod_get_value(REN) == 0) line_status |= BUS_REN; diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index c5c34845fc38..d31eab1a05e4 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("GPIB driver for Ines iGPIB 72010"); int ines_line_status(const gpib_board_t *board) { - int status = ValidALL; + int status = VALID_ALL; int bcm_bits; struct ines_priv *ines_priv; diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 2775dc83305e..011096ece7d6 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -658,7 +658,7 @@ static void usb_gpib_interface_clear(gpib_board_t *board, int assert) static int usb_gpib_line_status(const gpib_board_t *board) { int buffer; - int line_status = ValidALL; /* all lines will be read */ + int line_status = VALID_ALL; /* all lines will be read */ struct list_head *p, *q; WQT *item; unsigned long flags; diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index a788af70fa64..2de5aaea2615 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -1508,7 +1508,7 @@ static int ni_usb_line_status(const gpib_board_t *board) int bytes_written = 0, bytes_read = 0; int i = 0; unsigned int bsr_bits; - int line_status = ValidALL; + int line_status = VALID_ALL; // NI windows driver reads 0xd(HSSEL), 0xc (ARD0), 0x1f (BSR) if (!ni_priv->bus_interface) diff --git a/drivers/staging/gpib/tms9914/tms9914.c b/drivers/staging/gpib/tms9914/tms9914.c index 17adeccf448b..c563fcab44fc 100644 --- a/drivers/staging/gpib/tms9914/tms9914.c +++ b/drivers/staging/gpib/tms9914/tms9914.c @@ -391,7 +391,7 @@ static unsigned int update_status_nolock(gpib_board_t *board, struct tms9914_pri int tms9914_line_status(const gpib_board_t *board, struct tms9914_priv *priv) { int bsr_bits; - int status = ValidALL; + int status = VALID_ALL; bsr_bits = read_byte(priv, BSR); diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index 76fd9422809c..e62f3424ca20 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -150,7 +150,7 @@ MODULE_DESCRIPTION("GPIB driver for National Instruments boards using tnt4882 or static int tnt4882_line_status(const gpib_board_t *board) { - int status = ValidALL; + int status = VALID_ALL; int bcsr_bits; struct tnt4882_priv *tnt_priv; diff --git a/drivers/staging/gpib/uapi/gpib_user.h b/drivers/staging/gpib/uapi/gpib_user.h index 2d742540630a..5ff4588686fd 100644 --- a/drivers/staging/gpib/uapi/gpib_user.h +++ b/drivers/staging/gpib/uapi/gpib_user.h @@ -106,15 +106,15 @@ enum eos_flags { /* GPIB Bus Control Lines bit vector */ enum bus_control_line { - ValidDAV = 0x01, - ValidNDAC = 0x02, - ValidNRFD = 0x04, - ValidIFC = 0x08, - ValidREN = 0x10, - ValidSRQ = 0x20, - ValidATN = 0x40, - ValidEOI = 0x80, - ValidALL = 0xff, + VALID_DAV = 0x01, + VALID_NDAC = 0x02, + VALID_NRFD = 0x04, + VALID_IFC = 0x08, + VALID_REN = 0x10, + VALID_SRQ = 0x20, + VALID_ATN = 0x40, + VALID_EOI = 0x80, + VALID_ALL = 0xff, BUS_DAV = 0x0100, /* DAV line status bit */ BUS_NDAC = 0x0200, /* NDAC line status bit */ BUS_NRFD = 0x0400, /* NRFD line status bit */ From 8982069fa8e2fb5759515134a2caed60cc5ba0f4 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:05 +0000 Subject: [PATCH 1443/2157] staging: gpib: struct typing for gpib_board Using Linux code style for gpib_board struct in .h to allow drivers to migrate. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-2-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/include/gpib_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/gpib/include/gpib_types.h b/drivers/staging/gpib/include/gpib_types.h index b41781a55a60..7dc5a16e427b 100644 --- a/drivers/staging/gpib/include/gpib_types.h +++ b/drivers/staging/gpib/include/gpib_types.h @@ -23,7 +23,7 @@ #include typedef struct gpib_interface_struct gpib_interface_t; -typedef struct gpib_board_struct gpib_board_t; +typedef struct gpib_board gpib_board_t; /* config parameters that are only used by driver attach functions */ typedef struct { @@ -220,7 +220,7 @@ typedef struct gpib_interface_list_struct { * It provides storage for variables local to each board, and interface * functions for performing operations on the board */ -struct gpib_board_struct { +struct gpib_board { /* functions used by this board */ gpib_interface_t *interface; /* Pointer to module whose use count we should increment when From 53b86985c18265443296f002607035605e7353a5 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:06 +0000 Subject: [PATCH 1444/2157] staging: gpib: agilent_82350b: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-3-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82350b/agilent_82350b.c | 99 ++++++++++--------- 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index f83e1f321561..15a9c4ab77ba 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -25,13 +25,13 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for Agilent 82350b"); static int read_transfer_counter(struct agilent_82350b_priv *a_priv); -static unsigned short read_and_clear_event_status(gpib_board_t *board); +static unsigned short read_and_clear_event_status(struct gpib_board *board); static void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count); -static int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written); +static int agilent_82350b_write(struct gpib_board *board, uint8_t *buffer, + size_t length, int send_eoi, size_t *bytes_written); -static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) +static int agilent_82350b_accel_read(struct gpib_board *board, uint8_t *buffer, + size_t length, int *end, size_t *bytes_read) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -130,7 +130,7 @@ static int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_ return 0; } -static int translate_wait_return_value(gpib_board_t *board, int retval) +static int translate_wait_return_value(struct gpib_board *board, int retval) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -145,8 +145,9 @@ static int translate_wait_return_value(gpib_board_t *board, int retval) return 0; } -static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written) +static int agilent_82350b_accel_write(struct gpib_board *board, uint8_t *buffer, + size_t length, int send_eoi, + size_t *bytes_written) { struct agilent_82350b_priv *a_priv = board->private_data; struct tms9914_priv *tms_priv = &a_priv->tms9914_priv; @@ -227,7 +228,7 @@ static int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size return 0; } -static unsigned short read_and_clear_event_status(gpib_board_t *board) +static unsigned short read_and_clear_event_status(struct gpib_board *board) { struct agilent_82350b_priv *a_priv = board->private_data; unsigned long flags; @@ -245,7 +246,7 @@ static irqreturn_t agilent_82350b_interrupt(int irq, void *arg) { int tms9914_status1 = 0, tms9914_status2 = 0; int event_status; - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct agilent_82350b_priv *a_priv = board->private_data; unsigned long flags; irqreturn_t retval = IRQ_NONE; @@ -272,7 +273,7 @@ static irqreturn_t agilent_82350b_interrupt(int irq, void *arg) return retval; } -static void agilent_82350b_detach(gpib_board_t *board); +static void agilent_82350b_detach(struct gpib_board *board); static int read_transfer_counter(struct agilent_82350b_priv *a_priv) { @@ -296,17 +297,16 @@ static void set_transfer_counter(struct agilent_82350b_priv *a_priv, int count) } // wrappers for interface functions -static int agilent_82350b_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, - size_t *bytes_read) - +static int agilent_82350b_read(struct gpib_board *board, uint8_t *buffer, + size_t length, int *end, size_t *bytes_read) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -static int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int agilent_82350b_write(struct gpib_board *board, uint8_t *buffer, + size_t length, int send_eoi, size_t *bytes_written) { struct agilent_82350b_priv *priv = board->private_data; @@ -314,8 +314,8 @@ static int agilent_82350b_write(gpib_board_t *board, uint8_t *buffer, size_t len return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -static int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t length, - size_t *bytes_written) +static int agilent_82350b_command(struct gpib_board *board, uint8_t *buffer, + size_t length, size_t *bytes_written) { struct agilent_82350b_priv *priv = board->private_data; @@ -323,7 +323,7 @@ static int agilent_82350b_command(gpib_board_t *board, uint8_t *buffer, size_t l return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -static int agilent_82350b_take_control(gpib_board_t *board, int synchronous) +static int agilent_82350b_take_control(struct gpib_board *board, int synchronous) { struct agilent_82350b_priv *priv = board->private_data; @@ -331,7 +331,7 @@ static int agilent_82350b_take_control(gpib_board_t *board, int synchronous) return tms9914_take_control_workaround(board, &priv->tms9914_priv, synchronous); } -static int agilent_82350b_go_to_standby(gpib_board_t *board) +static int agilent_82350b_go_to_standby(struct gpib_board *board) { struct agilent_82350b_priv *priv = board->private_data; @@ -339,7 +339,8 @@ static int agilent_82350b_go_to_standby(gpib_board_t *board) return tms9914_go_to_standby(board, &priv->tms9914_priv); } -static void agilent_82350b_request_system_control(gpib_board_t *board, int request_control) +static void agilent_82350b_request_system_control(struct gpib_board *board, + int request_control) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -357,7 +358,7 @@ static void agilent_82350b_request_system_control(gpib_board_t *board, int reque tms9914_request_system_control(board, &a_priv->tms9914_priv, request_control); } -static void agilent_82350b_interface_clear(gpib_board_t *board, int assert) +static void agilent_82350b_interface_clear(struct gpib_board *board, int assert) { struct agilent_82350b_priv *priv = board->private_data; @@ -365,91 +366,97 @@ static void agilent_82350b_interface_clear(gpib_board_t *board, int assert) tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -static void agilent_82350b_remote_enable(gpib_board_t *board, int enable) +static void agilent_82350b_remote_enable(struct gpib_board *board, int enable) { struct agilent_82350b_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -static int agilent_82350b_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int agilent_82350b_enable_eos(struct gpib_board *board, uint8_t eos_byte, + int compare_8_bits) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -static void agilent_82350b_disable_eos(gpib_board_t *board) +static void agilent_82350b_disable_eos(struct gpib_board *board) { struct agilent_82350b_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -static unsigned int agilent_82350b_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int agilent_82350b_update_status(struct gpib_board *board, + unsigned int clear_mask) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -static int agilent_82350b_primary_address(gpib_board_t *board, unsigned int address) +static int agilent_82350b_primary_address(struct gpib_board *board, + unsigned int address) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -static int agilent_82350b_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int agilent_82350b_secondary_address(struct gpib_board *board, + unsigned int address, int enable) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -static int agilent_82350b_parallel_poll(gpib_board_t *board, uint8_t *result) +static int agilent_82350b_parallel_poll(struct gpib_board *board, uint8_t *result) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -static void agilent_82350b_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void agilent_82350b_parallel_poll_configure(struct gpib_board *board, + uint8_t config) { struct agilent_82350b_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -static void agilent_82350b_parallel_poll_response(gpib_board_t *board, int ist) +static void agilent_82350b_parallel_poll_response(struct gpib_board *board, int ist) { struct agilent_82350b_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -static void agilent_82350b_serial_poll_response(gpib_board_t *board, uint8_t status) +static void agilent_82350b_serial_poll_response(struct gpib_board *board, uint8_t status) { struct agilent_82350b_priv *priv = board->private_data; tms9914_serial_poll_response(board, &priv->tms9914_priv, status); } -static uint8_t agilent_82350b_serial_poll_status(gpib_board_t *board) +static uint8_t agilent_82350b_serial_poll_status(struct gpib_board *board) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_serial_poll_status(board, &priv->tms9914_priv); } -static int agilent_82350b_line_status(const gpib_board_t *board) +static int agilent_82350b_line_status(const struct gpib_board *board) { struct agilent_82350b_priv *priv = board->private_data; return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int nanosec) +static unsigned int agilent_82350b_t1_delay(struct gpib_board *board, + unsigned int nanosec) { struct agilent_82350b_priv *a_priv = board->private_data; static const int nanosec_per_clock = 30; @@ -464,14 +471,14 @@ static unsigned int agilent_82350b_t1_delay(gpib_board_t *board, unsigned int na return value * nanosec_per_clock; } -static void agilent_82350b_return_to_local(gpib_board_t *board) +static void agilent_82350b_return_to_local(struct gpib_board *board) { struct agilent_82350b_priv *priv = board->private_data; tms9914_return_to_local(board, &priv->tms9914_priv); } -static int agilent_82350b_allocate_private(gpib_board_t *board) +static int agilent_82350b_allocate_private(struct gpib_board *board) { board->private_data = kzalloc(sizeof(struct agilent_82350b_priv), GFP_KERNEL); if (!board->private_data) @@ -479,13 +486,14 @@ static int agilent_82350b_allocate_private(gpib_board_t *board) return 0; } -static void agilent_82350b_free_private(gpib_board_t *board) +static void agilent_82350b_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t *config) +static int init_82350a_hardware(struct gpib_board *board, + const gpib_board_config_t *config) { struct agilent_82350b_priv *a_priv = board->private_data; static const unsigned int firmware_length = 5302; @@ -550,7 +558,7 @@ static int init_82350a_hardware(gpib_board_t *board, const gpib_board_config_t * return 0; } -static int test_sram(gpib_board_t *board) +static int test_sram(struct gpib_board *board) { struct agilent_82350b_priv *a_priv = board->private_data; @@ -579,7 +587,8 @@ static int test_sram(gpib_board_t *board) return 0; } -static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_config_t *config, +static int agilent_82350b_generic_attach(struct gpib_board *board, + const gpib_board_config_t *config, int use_fifos) { @@ -721,17 +730,19 @@ static int agilent_82350b_generic_attach(gpib_board_t *board, const gpib_board_c return 0; } -static int agilent_82350b_unaccel_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int agilent_82350b_unaccel_attach(struct gpib_board *board, + const gpib_board_config_t *config) { return agilent_82350b_generic_attach(board, config, 0); } -static int agilent_82350b_accel_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int agilent_82350b_accel_attach(struct gpib_board *board, + const gpib_board_config_t *config) { return agilent_82350b_generic_attach(board, config, 1); } -static void agilent_82350b_detach(gpib_board_t *board) +static void agilent_82350b_detach(struct gpib_board *board) { struct agilent_82350b_priv *a_priv = board->private_data; struct tms9914_priv *tms_priv; From 7daafcc9a0c4f8a10d3ae07252f8e8997e019f9e Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:07 +0000 Subject: [PATCH 1445/2157] staging: gpib: agilent_82357a: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-4-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- .../gpib/agilent_82357a/agilent_82357a.c | 85 ++++++++++--------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 4dda899ff1b2..85e12c33f118 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -25,9 +25,10 @@ MODULE_DESCRIPTION("GPIB driver for Agilent 82357A/B usb adapters"); static struct usb_interface *agilent_82357a_driver_interfaces[MAX_NUM_82357A_INTERFACES]; static DEFINE_MUTEX(agilent_82357a_hotplug_lock); // protect board insertion and removal -static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned int clear_mask); +static unsigned int agilent_82357a_update_status(struct gpib_board *board, + unsigned int clear_mask); -static int agilent_82357a_take_control_internal(gpib_board_t *board, int synchronous); +static int agilent_82357a_take_control_internal(struct gpib_board *board, int synchronous); static void agilent_82357a_bulk_complete(struct urb *urb) { @@ -419,10 +420,10 @@ static int agilent_82357a_abort(struct agilent_82357a_priv *a_priv, int flush) } // interface functions -int agilent_82357a_command(gpib_board_t *board, uint8_t *buffer, size_t length, +int agilent_82357a_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written); -static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int agilent_82357a_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *nbytes) { int retval; @@ -533,8 +534,10 @@ static int agilent_82357a_read(gpib_board_t *board, uint8_t *buffer, size_t leng return retval; } -static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_commands, int send_eoi, size_t *bytes_written) +static ssize_t agilent_82357a_generic_write(struct gpib_board *board, + uint8_t *buffer, size_t length, + int send_commands, int send_eoi, + size_t *bytes_written) { int retval; struct agilent_82357a_priv *a_priv = board->private_data; @@ -672,19 +675,19 @@ static ssize_t agilent_82357a_generic_write(gpib_board_t *board, uint8_t *buffer return 0; } -static int agilent_82357a_write(gpib_board_t *board, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written) +static int agilent_82357a_write(struct gpib_board *board, uint8_t *buffer, + size_t length, int send_eoi, size_t *bytes_written) { return agilent_82357a_generic_write(board, buffer, length, 0, send_eoi, bytes_written); } -int agilent_82357a_command(gpib_board_t *board, uint8_t *buffer, size_t length, +int agilent_82357a_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { return agilent_82357a_generic_write(board, buffer, length, 1, 0, bytes_written); } -int agilent_82357a_take_control_internal(gpib_board_t *board, int synchronous) +int agilent_82357a_take_control_internal(struct gpib_board *board, int synchronous) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); @@ -703,7 +706,7 @@ int agilent_82357a_take_control_internal(gpib_board_t *board, int synchronous) return retval; } -static int agilent_82357a_take_control(gpib_board_t *board, int synchronous) +static int agilent_82357a_take_control(struct gpib_board *board, int synchronous) { struct agilent_82357a_priv *a_priv = board->private_data; const int timeout = 10; @@ -732,7 +735,7 @@ static int agilent_82357a_take_control(gpib_board_t *board, int synchronous) return 0; } -static int agilent_82357a_go_to_standby(gpib_board_t *board) +static int agilent_82357a_go_to_standby(struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -752,7 +755,8 @@ static int agilent_82357a_go_to_standby(gpib_board_t *board) } //FIXME should change prototype to return int -static void agilent_82357a_request_system_control(gpib_board_t *board, int request_control) +static void agilent_82357a_request_system_control(struct gpib_board *board, + int request_control) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -784,7 +788,7 @@ static void agilent_82357a_request_system_control(gpib_board_t *board, int reque return;// retval; } -static void agilent_82357a_interface_clear(gpib_board_t *board, int assert) +static void agilent_82357a_interface_clear(struct gpib_board *board, int assert) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -806,7 +810,7 @@ static void agilent_82357a_interface_clear(gpib_board_t *board, int assert) dev_err(&usb_dev->dev, "write_registers() returned error\n"); } -static void agilent_82357a_remote_enable(gpib_board_t *board, int enable) +static void agilent_82357a_remote_enable(struct gpib_board *board, int enable) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -828,7 +832,8 @@ static void agilent_82357a_remote_enable(gpib_board_t *board, int enable) return;// 0; } -static int agilent_82357a_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int agilent_82357a_enable_eos(struct gpib_board *board, uint8_t eos_byte, + int compare_8_bits) { struct agilent_82357a_priv *a_priv = board->private_data; @@ -842,14 +847,15 @@ static int agilent_82357a_enable_eos(gpib_board_t *board, uint8_t eos_byte, int return 0; } -static void agilent_82357a_disable_eos(gpib_board_t *board) +static void agilent_82357a_disable_eos(struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; a_priv->eos_mode &= ~REOS; } -static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int agilent_82357a_update_status(struct gpib_board *board, + unsigned int clear_mask) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -911,7 +917,7 @@ static unsigned int agilent_82357a_update_status(gpib_board_t *board, unsigned i return board->status; } -static int agilent_82357a_primary_address(gpib_board_t *board, unsigned int address) +static int agilent_82357a_primary_address(struct gpib_board *board, unsigned int address) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); @@ -932,14 +938,15 @@ static int agilent_82357a_primary_address(gpib_board_t *board, unsigned int addr return retval; } -static int agilent_82357a_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int agilent_82357a_secondary_address(struct gpib_board *board, + unsigned int address, int enable) { if (enable) return -EOPNOTSUPP; return 0; } -static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) +static int agilent_82357a_parallel_poll(struct gpib_board *board, uint8_t *result) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -981,37 +988,37 @@ static int agilent_82357a_parallel_poll(gpib_board_t *board, uint8_t *result) return 0; } -static void agilent_82357a_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void agilent_82357a_parallel_poll_configure(struct gpib_board *board, uint8_t config) { //board can only be system controller return;// 0; } -static void agilent_82357a_parallel_poll_response(gpib_board_t *board, int ist) +static void agilent_82357a_parallel_poll_response(struct gpib_board *board, int ist) { //board can only be system controller return;// 0; } -static void agilent_82357a_serial_poll_response(gpib_board_t *board, uint8_t status) +static void agilent_82357a_serial_poll_response(struct gpib_board *board, uint8_t status) { //board can only be system controller return;// 0; } -static uint8_t agilent_82357a_serial_poll_status(gpib_board_t *board) +static uint8_t agilent_82357a_serial_poll_status(struct gpib_board *board) { //board can only be system controller return 0; } -static void agilent_82357a_return_to_local(gpib_board_t *board) +static void agilent_82357a_return_to_local(struct gpib_board *board) { //board can only be system controller return;// 0; } -static int agilent_82357a_line_status(const gpib_board_t *board) +static int agilent_82357a_line_status(const struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -1064,7 +1071,7 @@ static unsigned short nanosec_to_fast_talker_bits(unsigned int *nanosec) return bits; } -static unsigned int agilent_82357a_t1_delay(gpib_board_t *board, unsigned int nanosec) +static unsigned int agilent_82357a_t1_delay(struct gpib_board *board, unsigned int nanosec) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -1084,7 +1091,7 @@ static unsigned int agilent_82357a_t1_delay(gpib_board_t *board, unsigned int na static void agilent_82357a_interrupt_complete(struct urb *urb) { - gpib_board_t *board = urb->context; + struct gpib_board *board = urb->context; struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); int retval; @@ -1122,7 +1129,7 @@ static void agilent_82357a_interrupt_complete(struct urb *urb) dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); } -static int agilent_82357a_setup_urbs(gpib_board_t *board) +static int agilent_82357a_setup_urbs(struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; @@ -1186,7 +1193,7 @@ static void agilent_82357a_release_urbs(struct agilent_82357a_priv *a_priv) } } -static int agilent_82357a_allocate_private(gpib_board_t *board) +static int agilent_82357a_allocate_private(struct gpib_board *board) { struct agilent_82357a_priv *a_priv; @@ -1201,14 +1208,14 @@ static int agilent_82357a_allocate_private(gpib_board_t *board) return 0; } -static void agilent_82357a_free_private(gpib_board_t *board) +static void agilent_82357a_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } #define INIT_NUM_REG_WRITES 18 -static int agilent_82357a_init(gpib_board_t *board) +static int agilent_82357a_init(struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); @@ -1298,7 +1305,7 @@ static inline int agilent_82357a_device_match(struct usb_interface *interface, return 1; } -static int agilent_82357a_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int agilent_82357a_attach(struct gpib_board *board, const gpib_board_config_t *config) { int retval; int i; @@ -1374,7 +1381,7 @@ static int agilent_82357a_attach(gpib_board_t *board, const gpib_board_config_t return retval; } -static int agilent_82357a_go_idle(gpib_board_t *board) +static int agilent_82357a_go_idle(struct gpib_board *board) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(a_priv->bus_interface); @@ -1403,7 +1410,7 @@ static int agilent_82357a_go_idle(gpib_board_t *board) return 0; } -static void agilent_82357a_detach(gpib_board_t *board) +static void agilent_82357a_detach(struct gpib_board *board) { struct agilent_82357a_priv *a_priv; @@ -1511,7 +1518,7 @@ static void agilent_82357a_driver_disconnect(struct usb_interface *interface) for (i = 0; i < MAX_NUM_82357A_INTERFACES; ++i) { if (agilent_82357a_driver_interfaces[i] == interface) { - gpib_board_t *board = usb_get_intfdata(interface); + struct gpib_board *board = usb_get_intfdata(interface); if (board) { struct agilent_82357a_priv *a_priv = board->private_data; @@ -1547,7 +1554,7 @@ static int agilent_82357a_driver_suspend(struct usb_interface *interface, pm_mes for (i = 0; i < MAX_NUM_82357A_INTERFACES; ++i) { if (agilent_82357a_driver_interfaces[i] == interface) { - gpib_board_t *board = usb_get_intfdata(interface); + struct gpib_board *board = usb_get_intfdata(interface); if (board) { struct agilent_82357a_priv *a_priv = board->private_data; @@ -1583,7 +1590,7 @@ static int agilent_82357a_driver_suspend(struct usb_interface *interface, pm_mes static int agilent_82357a_driver_resume(struct usb_interface *interface) { struct usb_device *usb_dev = interface_to_usbdev(interface); - gpib_board_t *board; + struct gpib_board *board; int i, retval; mutex_lock(&agilent_82357a_hotplug_lock); From 990f25c6bc999905c0651f51ce70af74b7f50037 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:08 +0000 Subject: [PATCH 1446/2157] staging: gpib: cb7210: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-5-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cb7210/cb7210.c | 107 ++++++++++++++------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 808e0a71dfd6..621aa8fda913 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -27,7 +27,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver Measurement Computing boards using cb7210.2 and cbi488.2"); -static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int cb7210_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); static inline int have_fifo_word(const struct cb7210_priv *cb_priv) @@ -40,7 +40,7 @@ static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, return 0; } -static inline void input_fifo_enable(gpib_board_t *board, int enable) +static inline void input_fifo_enable(struct gpib_board *board, int enable) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -76,7 +76,7 @@ static inline void input_fifo_enable(gpib_board_t *board, int enable) spin_unlock_irqrestore(&board->spinlock, flags); } -static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t *buffer, +static int fifo_read(struct gpib_board *board, struct cb7210_priv *cb_priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -170,7 +170,7 @@ static int fifo_read(gpib_board_t *board, struct cb7210_priv *cb_priv, uint8_t * return retval; } -static int cb7210_accel_read(gpib_board_t *board, uint8_t *buffer, +static int cb7210_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval; @@ -229,7 +229,7 @@ static int output_fifo_empty(const struct cb7210_priv *cb_priv) return 0; } -static inline void output_fifo_enable(gpib_board_t *board, int enable) +static inline void output_fifo_enable(struct gpib_board *board, int enable) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -264,7 +264,8 @@ static inline void output_fifo_enable(gpib_board_t *board, int enable) spin_unlock_irqrestore(&board->spinlock, flags); } -static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int fifo_write(struct gpib_board *board, uint8_t *buffer, size_t length, + size_t *bytes_written) { size_t count = 0; ssize_t retval = 0; @@ -349,8 +350,8 @@ static int fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, size_ return retval; } -static int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, - size_t *bytes_written) +static int cb7210_accel_write(struct gpib_board *board, uint8_t *buffer, + size_t length, int send_eoi, size_t *bytes_written) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -377,7 +378,7 @@ static int cb7210_accel_write(gpib_board_t *board, uint8_t *buffer, size_t lengt return retval; } -static int cb7210_line_status(const gpib_board_t *board) +static int cb7210_line_status(const struct gpib_board *board) { int status = VALID_ALL; int bsr_bits; @@ -407,7 +408,7 @@ static int cb7210_line_status(const gpib_board_t *board) return status; } -static unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int cb7210_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -424,7 +425,7 @@ static unsigned int cb7210_t1_delay(gpib_board_t *board, unsigned int nano_sec) return retval; } -static irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board); +static irqreturn_t cb7210_locked_internal_interrupt(struct gpib_board *board); /* * GPIB interrupt service routines @@ -433,7 +434,7 @@ static irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board); static irqreturn_t cb_pci_interrupt(int irq, void *arg) { int bits; - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct cb7210_priv *priv = board->private_data; // first task check if this is really our interrupt in a shared irq environment @@ -462,7 +463,7 @@ static irqreturn_t cb_pci_interrupt(int irq, void *arg) return cb7210_locked_internal_interrupt(arg); } -static irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) +static irqreturn_t cb7210_internal_interrupt(struct gpib_board *board) { int hs_status, status1, status2; struct cb7210_priv *priv = board->private_data; @@ -516,7 +517,7 @@ static irqreturn_t cb7210_internal_interrupt(gpib_board_t *board) return IRQ_HANDLED; } -static irqreturn_t cb7210_locked_internal_interrupt(gpib_board_t *board) +static irqreturn_t cb7210_locked_internal_interrupt(struct gpib_board *board) { unsigned long flags; irqreturn_t retval; @@ -532,14 +533,14 @@ static irqreturn_t cb7210_interrupt(int irq, void *arg) return cb7210_internal_interrupt(arg); } -static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config); +static int cb_pci_attach(struct gpib_board *board, const gpib_board_config_t *config); +static int cb_isa_attach(struct gpib_board *board, const gpib_board_config_t *config); -static void cb_pci_detach(gpib_board_t *board); -static void cb_isa_detach(gpib_board_t *board); +static void cb_pci_detach(struct gpib_board *board); +static void cb_isa_detach(struct gpib_board *board); // wrappers for interface functions -static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int cb7210_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct cb7210_priv *priv = board->private_data; @@ -547,7 +548,7 @@ static int cb7210_read(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -static int cb7210_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int cb7210_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct cb7210_priv *priv = board->private_data; @@ -555,7 +556,7 @@ static int cb7210_write(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int cb7210_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int cb7210_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct cb7210_priv *priv = board->private_data; @@ -563,21 +564,21 @@ static int cb7210_command(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int cb7210_take_control(gpib_board_t *board, int synchronous) +static int cb7210_take_control(struct gpib_board *board, int synchronous) { struct cb7210_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int cb7210_go_to_standby(gpib_board_t *board) +static int cb7210_go_to_standby(struct gpib_board *board) { struct cb7210_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void cb7210_request_system_control(gpib_board_t *board, int request_control) +static void cb7210_request_system_control(struct gpib_board *board, int request_control) { struct cb7210_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -591,91 +592,91 @@ static void cb7210_request_system_control(gpib_board_t *board, int request_contr nec7210_request_system_control(board, nec_priv, request_control); } -static void cb7210_interface_clear(gpib_board_t *board, int assert) +static void cb7210_interface_clear(struct gpib_board *board, int assert) { struct cb7210_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void cb7210_remote_enable(gpib_board_t *board, int enable) +static void cb7210_remote_enable(struct gpib_board *board, int enable) { struct cb7210_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int cb7210_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int cb7210_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct cb7210_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void cb7210_disable_eos(gpib_board_t *board) +static void cb7210_disable_eos(struct gpib_board *board) { struct cb7210_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int cb7210_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int cb7210_update_status(struct gpib_board *board, unsigned int clear_mask) { struct cb7210_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -static int cb7210_primary_address(gpib_board_t *board, unsigned int address) +static int cb7210_primary_address(struct gpib_board *board, unsigned int address) { struct cb7210_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int cb7210_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int cb7210_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct cb7210_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int cb7210_parallel_poll(gpib_board_t *board, uint8_t *result) +static int cb7210_parallel_poll(struct gpib_board *board, uint8_t *result) { struct cb7210_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -static void cb7210_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) +static void cb7210_parallel_poll_configure(struct gpib_board *board, uint8_t configuration) { struct cb7210_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, configuration); } -static void cb7210_parallel_poll_response(gpib_board_t *board, int ist) +static void cb7210_parallel_poll_response(struct gpib_board *board, int ist) { struct cb7210_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -static void cb7210_serial_poll_response(gpib_board_t *board, uint8_t status) +static void cb7210_serial_poll_response(struct gpib_board *board, uint8_t status) { struct cb7210_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -static uint8_t cb7210_serial_poll_status(gpib_board_t *board) +static uint8_t cb7210_serial_poll_status(struct gpib_board *board) { struct cb7210_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static void cb7210_return_to_local(gpib_board_t *board) +static void cb7210_return_to_local(struct gpib_board *board) { struct cb7210_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -851,7 +852,7 @@ static gpib_interface_t cb_isa_accel_interface = { .return_to_local = cb7210_return_to_local, }; -static int cb7210_allocate_private(gpib_board_t *board) +static int cb7210_allocate_private(struct gpib_board *board) { struct cb7210_priv *priv; @@ -864,14 +865,14 @@ static int cb7210_allocate_private(gpib_board_t *board) return 0; } -static void cb7210_generic_detach(gpib_board_t *board) +static void cb7210_generic_detach(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } // generic part of attach functions shared by all cb7210 boards -static int cb7210_generic_attach(gpib_board_t *board) +static int cb7210_generic_attach(struct gpib_board *board) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -889,7 +890,7 @@ static int cb7210_generic_attach(gpib_board_t *board) return 0; } -static int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) +static int cb7210_init(struct cb7210_priv *cb_priv, struct gpib_board *board) { struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; @@ -925,7 +926,7 @@ static int cb7210_init(struct cb7210_priv *cb_priv, gpib_board_t *board) return 0; } -static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_pci_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -1007,7 +1008,7 @@ static int cb_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) return cb7210_init(cb_priv, board); } -static void cb_pci_detach(gpib_board_t *board) +static void cb_pci_detach(struct gpib_board *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1030,7 +1031,7 @@ static void cb_pci_detach(gpib_board_t *board) cb7210_generic_detach(board); } -static int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_isa_attach(struct gpib_board *board, const gpib_board_config_t *config) { int isr_flags = 0; struct cb7210_priv *cb_priv; @@ -1066,7 +1067,7 @@ static int cb_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) return cb7210_init(cb_priv, board); } -static void cb_isa_detach(gpib_board_t *board) +static void cb_isa_detach(struct gpib_board *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1132,8 +1133,8 @@ static struct pci_driver cb7210_pci_driver = { static int cb_gpib_config(struct pcmcia_device *link); static void cb_gpib_release(struct pcmcia_device *link); -static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config); -static void cb_pcmcia_detach(gpib_board_t *board); +static int cb_pcmcia_attach(struct gpib_board *board, const gpib_board_config_t *config); +static void cb_pcmcia_detach(struct gpib_board *board); /* * A linked list of "instances" of the gpib device. Each actual @@ -1166,7 +1167,7 @@ static struct pcmcia_device *curr_dev; struct local_info { struct pcmcia_device *p_dev; - gpib_board_t *dev; + struct gpib_board *dev; }; /* @@ -1222,7 +1223,7 @@ static int cb_gpib_probe(struct pcmcia_device *link) static void cb_gpib_remove(struct pcmcia_device *link) { struct local_info *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (info->dev) cb_pcmcia_detach(info->dev); @@ -1287,7 +1288,7 @@ static void cb_gpib_release(struct pcmcia_device *link) static int cb_gpib_suspend(struct pcmcia_device *link) { //struct local_info *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (link->open) dev_warn(&link->dev, "Device still open\n"); @@ -1299,7 +1300,7 @@ static int cb_gpib_suspend(struct pcmcia_device *link) static int cb_gpib_resume(struct pcmcia_device *link) { //struct local_info *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; /*if (link->open) { * ni_gpib_probe(dev); / really? @@ -1416,7 +1417,7 @@ static gpib_interface_t cb_pcmcia_accel_interface = { .return_to_local = cb7210_return_to_local, }; -static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cb_pcmcia_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct cb7210_priv *cb_priv; struct nec7210_priv *nec_priv; @@ -1452,7 +1453,7 @@ static int cb_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf return cb7210_init(cb_priv, board); } -static void cb_pcmcia_detach(gpib_board_t *board) +static void cb_pcmcia_detach(struct gpib_board *board) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv; From f973adec9a5965ba5731646eea2c065f775748c6 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:09 +0000 Subject: [PATCH 1447/2157] staging: gpib: cec_gpib: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-6-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/cec/cec_gpib.c | 59 +++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/staging/gpib/cec/cec_gpib.c b/drivers/staging/gpib/cec/cec_gpib.c index 8f2b4b46a446..abe5b1b8bc5b 100644 --- a/drivers/staging/gpib/cec/cec_gpib.c +++ b/drivers/staging/gpib/cec/cec_gpib.c @@ -25,7 +25,7 @@ MODULE_DESCRIPTION("GPIB driver for CEC PCI and PCMCIA boards"); static irqreturn_t cec_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct cec_priv *priv = board->private_data; unsigned long flags; irqreturn_t retval; @@ -40,12 +40,12 @@ static irqreturn_t cec_interrupt(int irq, void *arg) #define CEC_DEV_ID 0x5cec #define CEC_SUBID 0x9050 -static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config); +static int cec_pci_attach(struct gpib_board *board, const gpib_board_config_t *config); -static void cec_pci_detach(gpib_board_t *board); +static void cec_pci_detach(struct gpib_board *board); // wrappers for interface functions -static int cec_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int cec_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct cec_priv *priv = board->private_data; @@ -53,7 +53,7 @@ static int cec_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *en return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -static int cec_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int cec_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct cec_priv *priv = board->private_data; @@ -61,126 +61,127 @@ static int cec_write(gpib_board_t *board, uint8_t *buffer, size_t length, int se return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int cec_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int cec_command(struct gpib_board *board, uint8_t *buffer, + size_t length, size_t *bytes_written) { struct cec_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int cec_take_control(gpib_board_t *board, int synchronous) +static int cec_take_control(struct gpib_board *board, int synchronous) { struct cec_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int cec_go_to_standby(gpib_board_t *board) +static int cec_go_to_standby(struct gpib_board *board) { struct cec_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void cec_request_system_control(gpib_board_t *board, int request_control) +static void cec_request_system_control(struct gpib_board *board, int request_control) { struct cec_priv *priv = board->private_data; nec7210_request_system_control(board, &priv->nec7210_priv, request_control); } -static void cec_interface_clear(gpib_board_t *board, int assert) +static void cec_interface_clear(struct gpib_board *board, int assert) { struct cec_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void cec_remote_enable(gpib_board_t *board, int enable) +static void cec_remote_enable(struct gpib_board *board, int enable) { struct cec_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int cec_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int cec_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct cec_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void cec_disable_eos(gpib_board_t *board) +static void cec_disable_eos(struct gpib_board *board) { struct cec_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int cec_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int cec_update_status(struct gpib_board *board, unsigned int clear_mask) { struct cec_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -static int cec_primary_address(gpib_board_t *board, unsigned int address) +static int cec_primary_address(struct gpib_board *board, unsigned int address) { struct cec_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int cec_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int cec_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct cec_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int cec_parallel_poll(gpib_board_t *board, uint8_t *result) +static int cec_parallel_poll(struct gpib_board *board, uint8_t *result) { struct cec_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -static void cec_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void cec_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct cec_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, config); } -static void cec_parallel_poll_response(gpib_board_t *board, int ist) +static void cec_parallel_poll_response(struct gpib_board *board, int ist) { struct cec_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -static void cec_serial_poll_response(gpib_board_t *board, uint8_t status) +static void cec_serial_poll_response(struct gpib_board *board, uint8_t status) { struct cec_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -static uint8_t cec_serial_poll_status(gpib_board_t *board) +static uint8_t cec_serial_poll_status(struct gpib_board *board) { struct cec_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static unsigned int cec_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int cec_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct cec_priv *priv = board->private_data; return nec7210_t1_delay(board, &priv->nec7210_priv, nano_sec); } -static void cec_return_to_local(gpib_board_t *board) +static void cec_return_to_local(struct gpib_board *board) { struct cec_priv *priv = board->private_data; @@ -215,7 +216,7 @@ static gpib_interface_t cec_pci_interface = { .return_to_local = cec_return_to_local, }; -static int cec_allocate_private(gpib_board_t *board) +static int cec_allocate_private(struct gpib_board *board) { struct cec_priv *priv; @@ -228,13 +229,13 @@ static int cec_allocate_private(gpib_board_t *board) return 0; } -static void cec_free_private(gpib_board_t *board) +static void cec_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -static int cec_generic_attach(gpib_board_t *board) +static int cec_generic_attach(struct gpib_board *board) { struct cec_priv *cec_priv; struct nec7210_priv *nec_priv; @@ -252,7 +253,7 @@ static int cec_generic_attach(gpib_board_t *board) return 0; } -static void cec_init(struct cec_priv *cec_priv, const gpib_board_t *board) +static void cec_init(struct cec_priv *cec_priv, const struct gpib_board *board) { struct nec7210_priv *nec_priv = &cec_priv->nec7210_priv; @@ -264,7 +265,7 @@ static void cec_init(struct cec_priv *cec_priv, const gpib_board_t *board) nec7210_board_online(nec_priv, board); } -static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int cec_pci_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct cec_priv *cec_priv; struct nec7210_priv *nec_priv; @@ -322,7 +323,7 @@ static int cec_pci_attach(gpib_board_t *board, const gpib_board_config_t *config return 0; } -static void cec_pci_detach(gpib_board_t *board) +static void cec_pci_detach(struct gpib_board *board) { struct cec_priv *cec_priv = board->private_data; struct nec7210_priv *nec_priv; From 840459da1574dbfedd83eaf00995b3cfd8074679 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:10 +0000 Subject: [PATCH 1448/2157] staging: gpib: common: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-7-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/common/gpib_os.c | 196 +++++++++++++------------- drivers/staging/gpib/common/iblib.c | 50 +++---- drivers/staging/gpib/common/ibsys.h | 14 +- 3 files changed, 130 insertions(+), 130 deletions(-) diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c index 301c8a1a62c2..9bf2076cbbb3 100644 --- a/drivers/staging/gpib/common/gpib_os.c +++ b/drivers/staging/gpib/common/gpib_os.c @@ -26,53 +26,53 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB base support"); MODULE_ALIAS_CHARDEV_MAJOR(GPIB_CODE); -static int board_type_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, unsigned long arg); -static int read_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int board_type_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg); +static int read_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg); -static int write_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int write_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg); -static int command_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int command_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg); -static int open_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned long arg); -static int close_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned long arg); -static int serial_poll_ioctl(gpib_board_t *board, unsigned long arg); -static int wait_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, unsigned long arg); -static int parallel_poll_ioctl(gpib_board_t *board, unsigned long arg); -static int online_ioctl(gpib_board_t *board, unsigned long arg); -static int remote_enable_ioctl(gpib_board_t *board, unsigned long arg); -static int take_control_ioctl(gpib_board_t *board, unsigned long arg); -static int line_status_ioctl(gpib_board_t *board, unsigned long arg); -static int pad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int open_dev_ioctl(struct file *filep, struct gpib_board *board, unsigned long arg); +static int close_dev_ioctl(struct file *filep, struct gpib_board *board, unsigned long arg); +static int serial_poll_ioctl(struct gpib_board *board, unsigned long arg); +static int wait_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg); +static int parallel_poll_ioctl(struct gpib_board *board, unsigned long arg); +static int online_ioctl(struct gpib_board *board, unsigned long arg); +static int remote_enable_ioctl(struct gpib_board *board, unsigned long arg); +static int take_control_ioctl(struct gpib_board *board, unsigned long arg); +static int line_status_ioctl(struct gpib_board *board, unsigned long arg); +static int pad_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg); -static int sad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int sad_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg); -static int eos_ioctl(gpib_board_t *board, unsigned long arg); -static int request_service_ioctl(gpib_board_t *board, unsigned long arg); -static int request_service2_ioctl(gpib_board_t *board, unsigned long arg); +static int eos_ioctl(struct gpib_board *board, unsigned long arg); +static int request_service_ioctl(struct gpib_board *board, unsigned long arg); +static int request_service2_ioctl(struct gpib_board *board, unsigned long arg); static int iobase_ioctl(gpib_board_config_t *config, unsigned long arg); static int irq_ioctl(gpib_board_config_t *config, unsigned long arg); static int dma_ioctl(gpib_board_config_t *config, unsigned long arg); -static int autospoll_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int autospoll_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg); -static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int mutex_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg); -static int timeout_ioctl(gpib_board_t *board, unsigned long arg); -static int status_bytes_ioctl(gpib_board_t *board, unsigned long arg); -static int board_info_ioctl(const gpib_board_t *board, unsigned long arg); -static int ppc_ioctl(gpib_board_t *board, unsigned long arg); -static int set_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg); -static int get_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg); -static int query_board_rsv_ioctl(gpib_board_t *board, unsigned long arg); -static int interface_clear_ioctl(gpib_board_t *board, unsigned long arg); +static int timeout_ioctl(struct gpib_board *board, unsigned long arg); +static int status_bytes_ioctl(struct gpib_board *board, unsigned long arg); +static int board_info_ioctl(const struct gpib_board *board, unsigned long arg); +static int ppc_ioctl(struct gpib_board *board, unsigned long arg); +static int set_local_ppoll_mode_ioctl(struct gpib_board *board, unsigned long arg); +static int get_local_ppoll_mode_ioctl(struct gpib_board *board, unsigned long arg); +static int query_board_rsv_ioctl(struct gpib_board *board, unsigned long arg); +static int interface_clear_ioctl(struct gpib_board *board, unsigned long arg); static int select_pci_ioctl(gpib_board_config_t *config, unsigned long arg); static int select_device_path_ioctl(gpib_board_config_t *config, unsigned long arg); -static int event_ioctl(gpib_board_t *board, unsigned long arg); -static int request_system_control_ioctl(gpib_board_t *board, unsigned long arg); -static int t1_delay_ioctl(gpib_board_t *board, unsigned long arg); +static int event_ioctl(struct gpib_board *board, unsigned long arg); +static int request_system_control_ioctl(struct gpib_board *board, unsigned long arg); +static int t1_delay_ioctl(struct gpib_board *board, unsigned long arg); -static int cleanup_open_devices(gpib_file_private_t *file_priv, gpib_board_t *board); +static int cleanup_open_devices(gpib_file_private_t *file_priv, struct gpib_board *board); -static int pop_gpib_event_nolock(gpib_board_t *board, gpib_event_queue_t *queue, short *event_type); +static int pop_gpib_event_nolock(struct gpib_board *board, gpib_event_queue_t *queue, short *event_type); /* * Timer functions @@ -82,14 +82,14 @@ static int pop_gpib_event_nolock(gpib_board_t *board, gpib_event_queue_t *queue, static void watchdog_timeout(struct timer_list *t) { - gpib_board_t *board = from_timer(board, t, timer); + struct gpib_board *board = from_timer(board, t, timer); set_bit(TIMO_NUM, &board->status); wake_up_interruptible(&board->wait); } /* install timer interrupt handler */ -void os_start_timer(gpib_board_t *board, unsigned int usec_timeout) +void os_start_timer(struct gpib_board *board, unsigned int usec_timeout) /* Starts the timeout task */ { if (timer_pending(&board->timer)) { @@ -105,14 +105,14 @@ void os_start_timer(gpib_board_t *board, unsigned int usec_timeout) } } -void os_remove_timer(gpib_board_t *board) +void os_remove_timer(struct gpib_board *board) /* Removes the timeout task */ { if (timer_pending(&board->timer)) del_timer_sync(&board->timer); } -int io_timed_out(gpib_board_t *board) +int io_timed_out(struct gpib_board *board) { if (test_bit(TIMO_NUM, &board->status)) return 1; @@ -140,7 +140,7 @@ static void pseudo_irq_handler(struct timer_list *t) mod_timer(&pseudo_irq->timer, jiffies + pseudo_irq_period()); } -int gpib_request_pseudo_irq(gpib_board_t *board, irqreturn_t (*handler)(int, void *)) +int gpib_request_pseudo_irq(struct gpib_board *board, irqreturn_t (*handler)(int, void *)) { if (timer_pending(&board->pseudo_irq.timer) || board->pseudo_irq.handler) { dev_err(board->gpib_dev, "only one pseudo interrupt per board allowed\n"); @@ -159,7 +159,7 @@ int gpib_request_pseudo_irq(gpib_board_t *board, irqreturn_t (*handler)(int, voi } EXPORT_SYMBOL(gpib_request_pseudo_irq); -void gpib_free_pseudo_irq(gpib_board_t *board) +void gpib_free_pseudo_irq(struct gpib_board *board) { atomic_set(&board->pseudo_irq.active, 0); @@ -178,7 +178,7 @@ unsigned int num_status_bytes(const gpib_status_queue_t *dev) } // push status byte onto back of status byte fifo -int push_status_byte(gpib_board_t *board, gpib_status_queue_t *device, u8 poll_byte) +int push_status_byte(struct gpib_board *board, gpib_status_queue_t *device, u8 poll_byte) { struct list_head *head = &device->status_bytes; status_byte_t *status; @@ -212,7 +212,7 @@ int push_status_byte(gpib_board_t *board, gpib_status_queue_t *device, u8 poll_b } // pop status byte from front of status byte fifo -int pop_status_byte(gpib_board_t *board, gpib_status_queue_t *device, u8 *poll_byte) +int pop_status_byte(struct gpib_board *board, gpib_status_queue_t *device, u8 *poll_byte) { struct list_head *head = &device->status_bytes; struct list_head *front = head->next; @@ -243,7 +243,7 @@ int pop_status_byte(gpib_board_t *board, gpib_status_queue_t *device, u8 *poll_b return 0; } -gpib_status_queue_t *get_gpib_status_queue(gpib_board_t *board, unsigned int pad, int sad) +gpib_status_queue_t *get_gpib_status_queue(struct gpib_board *board, unsigned int pad, int sad) { gpib_status_queue_t *device; struct list_head *list_ptr; @@ -258,7 +258,7 @@ gpib_status_queue_t *get_gpib_status_queue(gpib_board_t *board, unsigned int pad return NULL; } -int get_serial_poll_byte(gpib_board_t *board, unsigned int pad, int sad, unsigned int usec_timeout, +int get_serial_poll_byte(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *poll_byte) { gpib_status_queue_t *device; @@ -270,7 +270,7 @@ int get_serial_poll_byte(gpib_board_t *board, unsigned int pad, int sad, unsigne return dvrsp(board, pad, sad, usec_timeout, poll_byte); } -int autopoll_all_devices(gpib_board_t *board) +int autopoll_all_devices(struct gpib_board *board) { int retval; @@ -301,7 +301,7 @@ int autopoll_all_devices(gpib_board_t *board) return retval; } -static int setup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) +static int setup_serial_poll(struct gpib_board *board, unsigned int usec_timeout) { u8 cmd_string[8]; int i; @@ -333,7 +333,7 @@ static int setup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) return 0; } -static int read_serial_poll_byte(gpib_board_t *board, unsigned int pad, +static int read_serial_poll_byte(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *result) { u8 cmd_string[8]; @@ -378,7 +378,7 @@ static int read_serial_poll_byte(gpib_board_t *board, unsigned int pad, return 0; } -static int cleanup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) +static int cleanup_serial_poll(struct gpib_board *board, unsigned int usec_timeout) { u8 cmd_string[8]; int ret; @@ -404,7 +404,7 @@ static int cleanup_serial_poll(gpib_board_t *board, unsigned int usec_timeout) return 0; } -static int serial_poll_single(gpib_board_t *board, unsigned int pad, int sad, +static int serial_poll_single(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *result) { int retval, cleanup_retval; @@ -422,7 +422,7 @@ static int serial_poll_single(gpib_board_t *board, unsigned int pad, int sad, return 0; } -int serial_poll_all(gpib_board_t *board, unsigned int usec_timeout) +int serial_poll_all(struct gpib_board *board, unsigned int usec_timeout) { int retval = 0; struct list_head *cur; @@ -469,7 +469,7 @@ int serial_poll_all(gpib_board_t *board, unsigned int usec_timeout) * SPD and UNT are sent at the completion of the poll. */ -int dvrsp(gpib_board_t *board, unsigned int pad, int sad, +int dvrsp(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *result) { int status = ibstatus(board); @@ -521,7 +521,7 @@ static int init_gpib_file_private(gpib_file_private_t *priv) int ibopen(struct inode *inode, struct file *filep) { unsigned int minor = iminor(inode); - gpib_board_t *board; + struct gpib_board *board; gpib_file_private_t *priv; if (minor >= GPIB_MAX_NUM_BOARDS) { @@ -559,7 +559,7 @@ int ibopen(struct inode *inode, struct file *filep) int ibclose(struct inode *inode, struct file *filep) { unsigned int minor = iminor(inode); - gpib_board_t *board; + struct gpib_board *board; gpib_file_private_t *priv = filep->private_data; gpib_descriptor_t *desc; @@ -605,7 +605,7 @@ int ibclose(struct inode *inode, struct file *filep) long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) { unsigned int minor = iminor(filep->f_path.dentry->d_inode); - gpib_board_t *board; + struct gpib_board *board; gpib_file_private_t *file_priv = filep->private_data; long retval = -ENOTTY; @@ -806,7 +806,7 @@ long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) return retval; } -static int board_type_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, unsigned long arg) +static int board_type_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg) { struct list_head *list_ptr; board_type_ioctl_t cmd; @@ -857,7 +857,7 @@ static int board_type_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, return -EINVAL; } -static int read_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int read_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg) { read_write_ioctl_t read_cmd; @@ -933,7 +933,7 @@ static int read_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, } static int command_ioctl(gpib_file_private_t *file_priv, - gpib_board_t *board, unsigned long arg) + struct gpib_board *board, unsigned long arg) { read_write_ioctl_t cmd; u8 __user *userbuf; @@ -1016,7 +1016,7 @@ static int command_ioctl(gpib_file_private_t *file_priv, return retval; } -static int write_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int write_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg) { read_write_ioctl_t write_cmd; @@ -1087,7 +1087,7 @@ static int write_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, return retval; } -static int status_bytes_ioctl(gpib_board_t *board, unsigned long arg) +static int status_bytes_ioctl(struct gpib_board *board, unsigned long arg) { gpib_status_queue_t *device; spoll_bytes_ioctl_t cmd; @@ -1110,7 +1110,7 @@ static int status_bytes_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int increment_open_device_count(gpib_board_t *board, struct list_head *head, +static int increment_open_device_count(struct gpib_board *board, struct list_head *head, unsigned int pad, int sad) { struct list_head *list_ptr; @@ -1145,7 +1145,7 @@ static int increment_open_device_count(gpib_board_t *board, struct list_head *he return 0; } -static int subtract_open_device_count(gpib_board_t *board, struct list_head *head, +static int subtract_open_device_count(struct gpib_board *board, struct list_head *head, unsigned int pad, int sad, unsigned int count) { gpib_status_queue_t *device; @@ -1174,13 +1174,13 @@ static int subtract_open_device_count(gpib_board_t *board, struct list_head *hea return -EINVAL; } -static inline int decrement_open_device_count(gpib_board_t *board, struct list_head *head, +static inline int decrement_open_device_count(struct gpib_board *board, struct list_head *head, unsigned int pad, int sad) { return subtract_open_device_count(board, head, pad, sad, 1); } -static int cleanup_open_devices(gpib_file_private_t *file_priv, gpib_board_t *board) +static int cleanup_open_devices(gpib_file_private_t *file_priv, struct gpib_board *board) { int retval = 0; int i; @@ -1205,7 +1205,7 @@ static int cleanup_open_devices(gpib_file_private_t *file_priv, gpib_board_t *bo return 0; } -static int open_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned long arg) +static int open_dev_ioctl(struct file *filep, struct gpib_board *board, unsigned long arg) { open_dev_ioctl_t open_dev_cmd; int retval; @@ -1255,7 +1255,7 @@ static int open_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned long return 0; } -static int close_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned long arg) +static int close_dev_ioctl(struct file *filep, struct gpib_board *board, unsigned long arg) { close_dev_ioctl_t cmd; gpib_file_private_t *file_priv = filep->private_data; @@ -1282,7 +1282,7 @@ static int close_dev_ioctl(struct file *filep, gpib_board_t *board, unsigned lon return 0; } -static int serial_poll_ioctl(gpib_board_t *board, unsigned long arg) +static int serial_poll_ioctl(struct gpib_board *board, unsigned long arg) { serial_poll_ioctl_t serial_cmd; int retval; @@ -1303,7 +1303,7 @@ static int serial_poll_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int wait_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, +static int wait_ioctl(gpib_file_private_t *file_priv, struct gpib_board *board, unsigned long arg) { wait_ioctl_t wait_cmd; @@ -1330,7 +1330,7 @@ static int wait_ioctl(gpib_file_private_t *file_priv, gpib_board_t *board, return 0; } -static int parallel_poll_ioctl(gpib_board_t *board, unsigned long arg) +static int parallel_poll_ioctl(struct gpib_board *board, unsigned long arg) { u8 poll_byte; int retval; @@ -1346,7 +1346,7 @@ static int parallel_poll_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int online_ioctl(gpib_board_t *board, unsigned long arg) +static int online_ioctl(struct gpib_board *board, unsigned long arg) { online_ioctl_t online_cmd; int retval; @@ -1390,7 +1390,7 @@ static int online_ioctl(gpib_board_t *board, unsigned long arg) return retval; } -static int remote_enable_ioctl(gpib_board_t *board, unsigned long arg) +static int remote_enable_ioctl(struct gpib_board *board, unsigned long arg) { int enable; int retval; @@ -1402,7 +1402,7 @@ static int remote_enable_ioctl(gpib_board_t *board, unsigned long arg) return ibsre(board, enable); } -static int take_control_ioctl(gpib_board_t *board, unsigned long arg) +static int take_control_ioctl(struct gpib_board *board, unsigned long arg) { int synchronous; int retval; @@ -1414,7 +1414,7 @@ static int take_control_ioctl(gpib_board_t *board, unsigned long arg) return ibcac(board, synchronous, 1); } -static int line_status_ioctl(gpib_board_t *board, unsigned long arg) +static int line_status_ioctl(struct gpib_board *board, unsigned long arg) { short lines; int retval; @@ -1430,7 +1430,7 @@ static int line_status_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int pad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int pad_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg) { pad_ioctl_t cmd; @@ -1466,7 +1466,7 @@ static int pad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, return 0; } -static int sad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int sad_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg) { sad_ioctl_t cmd; @@ -1501,7 +1501,7 @@ static int sad_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, return 0; } -static int eos_ioctl(gpib_board_t *board, unsigned long arg) +static int eos_ioctl(struct gpib_board *board, unsigned long arg) { eos_ioctl_t eos_cmd; int retval; @@ -1513,7 +1513,7 @@ static int eos_ioctl(gpib_board_t *board, unsigned long arg) return ibeos(board, eos_cmd.eos, eos_cmd.eos_flags); } -static int request_service_ioctl(gpib_board_t *board, unsigned long arg) +static int request_service_ioctl(struct gpib_board *board, unsigned long arg) { u8 status_byte; int retval; @@ -1525,7 +1525,7 @@ static int request_service_ioctl(gpib_board_t *board, unsigned long arg) return ibrsv2(board, status_byte, status_byte & request_service_bit); } -static int request_service2_ioctl(gpib_board_t *board, unsigned long arg) +static int request_service2_ioctl(struct gpib_board *board, unsigned long arg) { request_service2_t request_service2_cmd; int retval; @@ -1592,7 +1592,7 @@ static int dma_ioctl(gpib_board_config_t *config, unsigned long arg) return 0; } -static int autospoll_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int autospoll_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg) { autospoll_ioctl_t enable; @@ -1630,7 +1630,7 @@ static int autospoll_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, return retval; } -static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, +static int mutex_ioctl(struct gpib_board *board, gpib_file_private_t *file_priv, unsigned long arg) { int retval, lock_mutex; @@ -1670,7 +1670,7 @@ static int mutex_ioctl(gpib_board_t *board, gpib_file_private_t *file_priv, return 0; } -static int timeout_ioctl(gpib_board_t *board, unsigned long arg) +static int timeout_ioctl(struct gpib_board *board, unsigned long arg) { unsigned int timeout; int retval; @@ -1685,7 +1685,7 @@ static int timeout_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int ppc_ioctl(gpib_board_t *board, unsigned long arg) +static int ppc_ioctl(struct gpib_board *board, unsigned long arg) { ppoll_config_ioctl_t cmd; int retval; @@ -1711,7 +1711,7 @@ static int ppc_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int set_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg) +static int set_local_ppoll_mode_ioctl(struct gpib_board *board, unsigned long arg) { local_ppoll_mode_ioctl_t cmd; int retval; @@ -1728,7 +1728,7 @@ static int set_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int get_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg) +static int get_local_ppoll_mode_ioctl(struct gpib_board *board, unsigned long arg) { local_ppoll_mode_ioctl_t cmd; int retval; @@ -1741,7 +1741,7 @@ static int get_local_ppoll_mode_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int query_board_rsv_ioctl(gpib_board_t *board, unsigned long arg) +static int query_board_rsv_ioctl(struct gpib_board *board, unsigned long arg) { int status; int retval; @@ -1755,7 +1755,7 @@ static int query_board_rsv_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int board_info_ioctl(const gpib_board_t *board, unsigned long arg) +static int board_info_ioctl(const struct gpib_board *board, unsigned long arg) { board_info_ioctl_t info; int retval; @@ -1778,7 +1778,7 @@ static int board_info_ioctl(const gpib_board_t *board, unsigned long arg) return 0; } -static int interface_clear_ioctl(gpib_board_t *board, unsigned long arg) +static int interface_clear_ioctl(struct gpib_board *board, unsigned long arg) { unsigned int usec_duration; int retval; @@ -1841,7 +1841,7 @@ unsigned int num_gpib_events(const gpib_event_queue_t *queue) return queue->num_events; } -static int push_gpib_event_nolock(gpib_board_t *board, short event_type) +static int push_gpib_event_nolock(struct gpib_board *board, short event_type) { gpib_event_queue_t *queue = &board->event_queue; struct list_head *head = &queue->event_head; @@ -1879,7 +1879,7 @@ static int push_gpib_event_nolock(gpib_board_t *board, short event_type) } // push event onto back of event queue -int push_gpib_event(gpib_board_t *board, short event_type) +int push_gpib_event(struct gpib_board *board, short event_type) { unsigned long flags; int retval; @@ -1897,7 +1897,7 @@ int push_gpib_event(gpib_board_t *board, short event_type) } EXPORT_SYMBOL(push_gpib_event); -static int pop_gpib_event_nolock(gpib_board_t *board, gpib_event_queue_t *queue, short *event_type) +static int pop_gpib_event_nolock(struct gpib_board *board, gpib_event_queue_t *queue, short *event_type) { struct list_head *head = &queue->event_head; struct list_head *front = head->next; @@ -1931,7 +1931,7 @@ static int pop_gpib_event_nolock(gpib_board_t *board, gpib_event_queue_t *queue, } // pop event from front of event queue -int pop_gpib_event(gpib_board_t *board, gpib_event_queue_t *queue, short *event_type) +int pop_gpib_event(struct gpib_board *board, gpib_event_queue_t *queue, short *event_type) { unsigned long flags; int retval; @@ -1942,7 +1942,7 @@ int pop_gpib_event(gpib_board_t *board, gpib_event_queue_t *queue, short *event_ return retval; } -static int event_ioctl(gpib_board_t *board, unsigned long arg) +static int event_ioctl(struct gpib_board *board, unsigned long arg) { event_ioctl_t user_event; int retval; @@ -1961,7 +1961,7 @@ static int event_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int request_system_control_ioctl(gpib_board_t *board, unsigned long arg) +static int request_system_control_ioctl(struct gpib_board *board, unsigned long arg) { rsc_ioctl_t request_control; int retval; @@ -1975,7 +1975,7 @@ static int request_system_control_ioctl(gpib_board_t *board, unsigned long arg) return 0; } -static int t1_delay_ioctl(gpib_board_t *board, unsigned long arg) +static int t1_delay_ioctl(struct gpib_board *board, unsigned long arg) { t1_delay_ioctl_t cmd; unsigned int delay; @@ -2004,7 +2004,7 @@ static const struct file_operations ib_fops = { .release = &ibclose, }; -gpib_board_t board_array[GPIB_MAX_NUM_BOARDS]; +struct gpib_board board_array[GPIB_MAX_NUM_BOARDS]; LIST_HEAD(registered_drivers); @@ -2039,7 +2039,7 @@ void gpib_unregister_driver(gpib_interface_t *interface) struct list_head *list_ptr; for (i = 0; i < GPIB_MAX_NUM_BOARDS; i++) { - gpib_board_t *board = &board_array[i]; + struct gpib_board *board = &board_array[i]; if (board->interface == interface) { if (board->use_count > 0) @@ -2069,7 +2069,7 @@ static void init_gpib_board_config(gpib_board_config_t *config) config->pci_slot = -1; } -void init_gpib_board(gpib_board_t *board) +void init_gpib_board(struct gpib_board *board) { board->interface = NULL; board->provider_module = NULL; @@ -2104,7 +2104,7 @@ void init_gpib_board(gpib_board_t *board) board->local_ppoll_mode = 0; } -int gpib_allocate_board(gpib_board_t *board) +int gpib_allocate_board(struct gpib_board *board) { if (!board->buffer) { board->buffer_length = 0x4000; @@ -2117,7 +2117,7 @@ int gpib_allocate_board(gpib_board_t *board) return 0; } -void gpib_deallocate_board(gpib_board_t *board) +void gpib_deallocate_board(struct gpib_board *board) { short dummy; @@ -2130,7 +2130,7 @@ void gpib_deallocate_board(gpib_board_t *board) pop_gpib_event(board, &board->event_queue, &dummy); } -static void init_board_array(gpib_board_t *board_array, unsigned int length) +static void init_board_array(struct gpib_board *board_array, unsigned int length) { int i; diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c index 92e792e12301..6cca8a49e839 100644 --- a/drivers/staging/gpib/common/iblib.c +++ b/drivers/staging/gpib/common/iblib.c @@ -21,7 +21,7 @@ * If fallback_to_async is non-zero, try to take control asynchronously * if synchronous attempt fails. */ -int ibcac(gpib_board_t *board, int sync, int fallback_to_async) +int ibcac(struct gpib_board *board, int sync, int fallback_to_async) { int status = ibstatus(board); int retval; @@ -61,7 +61,7 @@ int ibcac(gpib_board_t *board, int sync, int fallback_to_async) * set the skip_check_for_command_acceptors flag in their * gpib_interface_struct to avoid useless overhead. */ -static int check_for_command_acceptors(gpib_board_t *board) +static int check_for_command_acceptors(struct gpib_board *board) { int lines; @@ -96,7 +96,7 @@ static int check_for_command_acceptors(gpib_board_t *board) * must be called to initialize the GPIB and enable * the interface to leave the controller idle state. */ -int ibcmd(gpib_board_t *board, uint8_t *buf, size_t length, size_t *bytes_written) +int ibcmd(struct gpib_board *board, uint8_t *buf, size_t length, size_t *bytes_written) { ssize_t ret = 0; int status; @@ -131,7 +131,7 @@ int ibcmd(gpib_board_t *board, uint8_t *buf, size_t length, size_t *bytes_writte * active state, i.e., turn ATN off. */ -int ibgts(gpib_board_t *board) +int ibgts(struct gpib_board *board) { int status = ibstatus(board); int retval; @@ -146,7 +146,7 @@ int ibgts(gpib_board_t *board) return retval; } -static int autospoll_wait_should_wake_up(gpib_board_t *board) +static int autospoll_wait_should_wake_up(struct gpib_board *board) { int retval; @@ -162,7 +162,7 @@ static int autospoll_wait_should_wake_up(gpib_board_t *board) static int autospoll_thread(void *board_void) { - gpib_board_t *board = board_void; + struct gpib_board *board = board_void; int retval = 0; dev_dbg(board->gpib_dev, "entering autospoll thread\n"); @@ -199,7 +199,7 @@ static int autospoll_thread(void *board_void) return retval; } -int ibonline(gpib_board_t *board) +int ibonline(struct gpib_board *board) { int retval; @@ -238,7 +238,7 @@ int ibonline(gpib_board_t *board) } /* XXX need to make sure board is generally not in use (grab board lock?) */ -int iboffline(gpib_board_t *board) +int iboffline(struct gpib_board *board) { int retval; @@ -270,7 +270,7 @@ int iboffline(gpib_board_t *board) * Next LSB (bits 8-15) - STATUS lines mask (lines that are currently set). * */ -int iblines(const gpib_board_t *board, short *lines) +int iblines(const struct gpib_board *board, short *lines) { int retval; @@ -297,7 +297,7 @@ int iblines(const gpib_board_t *board, short *lines) * calling ibcmd. */ -int ibrd(gpib_board_t *board, uint8_t *buf, size_t length, int *end_flag, size_t *nbytes) +int ibrd(struct gpib_board *board, uint8_t *buf, size_t length, int *end_flag, size_t *nbytes) { ssize_t ret = 0; int retval; @@ -343,7 +343,7 @@ int ibrd(gpib_board_t *board, uint8_t *buf, size_t length, int *end_flag, size_t * 1. Prior to conducting the poll the interface is placed * in the controller active state. */ -int ibrpp(gpib_board_t *board, uint8_t *result) +int ibrpp(struct gpib_board *board, uint8_t *result) { int retval = 0; @@ -358,7 +358,7 @@ int ibrpp(gpib_board_t *board, uint8_t *result) return retval; } -int ibppc(gpib_board_t *board, uint8_t configuration) +int ibppc(struct gpib_board *board, uint8_t configuration) { configuration &= 0x1f; board->interface->parallel_poll_configure(board, configuration); @@ -367,7 +367,7 @@ int ibppc(gpib_board_t *board, uint8_t configuration) return 0; } -int ibrsv2(gpib_board_t *board, uint8_t status_byte, int new_reason_for_service) +int ibrsv2(struct gpib_board *board, uint8_t status_byte, int new_reason_for_service) { int board_status = ibstatus(board); const unsigned int MSS = status_byte & request_service_bit; @@ -400,7 +400,7 @@ int ibrsv2(gpib_board_t *board, uint8_t status_byte, int new_reason_for_service) * ibcmd in order to initialize the bus and enable the * interface to leave the controller idle state. */ -int ibsic(gpib_board_t *board, unsigned int usec_duration) +int ibsic(struct gpib_board *board, unsigned int usec_duration) { if (board->master == 0) return -EINVAL; @@ -419,7 +419,7 @@ int ibsic(gpib_board_t *board, unsigned int usec_duration) } /* FIXME make int */ -void ibrsc(gpib_board_t *board, int request_control) +void ibrsc(struct gpib_board *board, int request_control) { board->master = request_control != 0; if (board->interface->request_system_control) @@ -430,7 +430,7 @@ void ibrsc(gpib_board_t *board, int request_control) * IBSRE * Send REN true if v is non-zero or false if v is zero. */ -int ibsre(gpib_board_t *board, int enable) +int ibsre(struct gpib_board *board, int enable) { if (board->master == 0) return -EINVAL; @@ -447,7 +447,7 @@ int ibsre(gpib_board_t *board, int enable) * change the GPIB address of the interface board. The address * must be 0 through 30. ibonl resets the address to PAD. */ -int ibpad(gpib_board_t *board, unsigned int addr) +int ibpad(struct gpib_board *board, unsigned int addr) { if (addr > MAX_GPIB_PRIMARY_ADDRESS) return -EINVAL; @@ -465,7 +465,7 @@ int ibpad(gpib_board_t *board, unsigned int addr) * The address must be 0 through 30, or negative disables. ibonl resets the * address to SAD. */ -int ibsad(gpib_board_t *board, int addr) +int ibsad(struct gpib_board *board, int addr) { if (addr > MAX_GPIB_SECONDARY_ADDRESS) return -EINVAL; @@ -486,7 +486,7 @@ int ibsad(gpib_board_t *board, int addr) * Set the end-of-string modes for I/O operations to v. * */ -int ibeos(gpib_board_t *board, int eos, int eosflags) +int ibeos(struct gpib_board *board, int eos, int eosflags) { int retval; @@ -501,12 +501,12 @@ int ibeos(gpib_board_t *board, int eos, int eosflags) return retval; } -int ibstatus(gpib_board_t *board) +int ibstatus(struct gpib_board *board) { return general_ibstatus(board, NULL, 0, 0, NULL); } -int general_ibstatus(gpib_board_t *board, const gpib_status_queue_t *device, +int general_ibstatus(struct gpib_board *board, const gpib_status_queue_t *device, int clear_mask, int set_mask, gpib_descriptor_t *desc) { int status = 0; @@ -552,7 +552,7 @@ int general_ibstatus(gpib_board_t *board, const gpib_status_queue_t *device, } struct wait_info { - gpib_board_t *board; + struct gpib_board *board; struct timer_list timer; int timed_out; unsigned long usec_timeout; @@ -576,7 +576,7 @@ static void init_wait_info(struct wait_info *winfo) static int wait_satisfied(struct wait_info *winfo, gpib_status_queue_t *status_queue, int wait_mask, int *status, gpib_descriptor_t *desc) { - gpib_board_t *board = winfo->board; + struct gpib_board *board = winfo->board; int temp_status; if (mutex_lock_interruptible(&board->big_gpib_mutex)) @@ -622,7 +622,7 @@ static void remove_wait_timer(struct wait_info *winfo) * If the mask is 0 then * no condition is waited for. */ -int ibwait(gpib_board_t *board, int wait_mask, int clear_mask, int set_mask, +int ibwait(struct gpib_board *board, int wait_mask, int clear_mask, int set_mask, int *status, unsigned long usec_timeout, gpib_descriptor_t *desc) { int retval = 0; @@ -677,7 +677,7 @@ int ibwait(gpib_board_t *board, int wait_mask, int clear_mask, int set_mask, * well as the interface board itself must be * addressed by calling ibcmd. */ -int ibwrt(gpib_board_t *board, uint8_t *buf, size_t cnt, int send_eoi, size_t *bytes_written) +int ibwrt(struct gpib_board *board, uint8_t *buf, size_t cnt, int send_eoi, size_t *bytes_written) { int ret = 0; int retval; diff --git a/drivers/staging/gpib/common/ibsys.h b/drivers/staging/gpib/common/ibsys.h index da20971e9c7e..19960af809c2 100644 --- a/drivers/staging/gpib/common/ibsys.h +++ b/drivers/staging/gpib/common/ibsys.h @@ -19,13 +19,13 @@ #define MAX_GPIB_PRIMARY_ADDRESS 30 #define MAX_GPIB_SECONDARY_ADDRESS 31 -int gpib_allocate_board(gpib_board_t *board); -void gpib_deallocate_board(gpib_board_t *board); +int gpib_allocate_board(struct gpib_board *board); +void gpib_deallocate_board(struct gpib_board *board); unsigned int num_status_bytes(const gpib_status_queue_t *dev); -int push_status_byte(gpib_board_t *board, gpib_status_queue_t *device, uint8_t poll_byte); -int pop_status_byte(gpib_board_t *board, gpib_status_queue_t *device, uint8_t *poll_byte); -gpib_status_queue_t *get_gpib_status_queue(gpib_board_t *board, unsigned int pad, int sad); -int get_serial_poll_byte(gpib_board_t *board, unsigned int pad, int sad, +int push_status_byte(struct gpib_board *board, gpib_status_queue_t *device, uint8_t poll_byte); +int pop_status_byte(struct gpib_board *board, gpib_status_queue_t *device, uint8_t *poll_byte); +gpib_status_queue_t *get_gpib_status_queue(struct gpib_board *board, unsigned int pad, int sad); +int get_serial_poll_byte(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *poll_byte); -int autopoll_all_devices(gpib_board_t *board); +int autopoll_all_devices(struct gpib_board *board); From 1cadd195a22f5af6c4d70cec99e9fdbd99c4a2a3 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:11 +0000 Subject: [PATCH 1449/2157] staging: gpib: eastwood: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-8-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/eastwood/fluke_gpib.c | 93 +++++++++++----------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index bcddf3125b8c..b3b629c892e2 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -24,11 +24,11 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB Driver for Fluke cda devices"); -static int fluke_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config); -static int fluke_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config); -static void fluke_detach(gpib_board_t *board); -static int fluke_config_dma(gpib_board_t *board, int output); -static irqreturn_t fluke_gpib_internal_interrupt(gpib_board_t *board); +static int fluke_attach_holdoff_all(struct gpib_board *board, const gpib_board_config_t *config); +static int fluke_attach_holdoff_end(struct gpib_board *board, const gpib_board_config_t *config); +static void fluke_detach(struct gpib_board *board); +static int fluke_config_dma(struct gpib_board *board, int output); +static irqreturn_t fluke_gpib_internal_interrupt(struct gpib_board *board); static struct platform_device *fluke_gpib_pdev; @@ -54,7 +54,7 @@ static void fluke_locking_write_byte(struct nec7210_priv *nec_priv, uint8_t byte } // wrappers for interface functions -static int fluke_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int fluke_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fluke_priv *priv = board->private_data; @@ -62,7 +62,7 @@ static int fluke_read(gpib_board_t *board, uint8_t *buffer, size_t length, int * return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -static int fluke_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fluke_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fluke_priv *priv = board->private_data; @@ -70,28 +70,29 @@ static int fluke_write(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int fluke_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int fluke_command(struct gpib_board *board, uint8_t *buffer, + size_t length, size_t *bytes_written) { struct fluke_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int fluke_take_control(gpib_board_t *board, int synchronous) +static int fluke_take_control(struct gpib_board *board, int synchronous) { struct fluke_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int fluke_go_to_standby(gpib_board_t *board) +static int fluke_go_to_standby(struct gpib_board *board) { struct fluke_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void fluke_request_system_control(gpib_board_t *board, int request_control) +static void fluke_request_system_control(struct gpib_board *board, int request_control) { struct fluke_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -99,91 +100,91 @@ static void fluke_request_system_control(gpib_board_t *board, int request_contro nec7210_request_system_control(board, nec_priv, request_control); } -static void fluke_interface_clear(gpib_board_t *board, int assert) +static void fluke_interface_clear(struct gpib_board *board, int assert) { struct fluke_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void fluke_remote_enable(gpib_board_t *board, int enable) +static void fluke_remote_enable(struct gpib_board *board, int enable) { struct fluke_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int fluke_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int fluke_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct fluke_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void fluke_disable_eos(gpib_board_t *board) +static void fluke_disable_eos(struct gpib_board *board) { struct fluke_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int fluke_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int fluke_update_status(struct gpib_board *board, unsigned int clear_mask) { struct fluke_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -static int fluke_primary_address(gpib_board_t *board, unsigned int address) +static int fluke_primary_address(struct gpib_board *board, unsigned int address) { struct fluke_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int fluke_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int fluke_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct fluke_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int fluke_parallel_poll(gpib_board_t *board, uint8_t *result) +static int fluke_parallel_poll(struct gpib_board *board, uint8_t *result) { struct fluke_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -static void fluke_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) +static void fluke_parallel_poll_configure(struct gpib_board *board, uint8_t configuration) { struct fluke_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, configuration); } -static void fluke_parallel_poll_response(gpib_board_t *board, int ist) +static void fluke_parallel_poll_response(struct gpib_board *board, int ist) { struct fluke_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -static void fluke_serial_poll_response(gpib_board_t *board, uint8_t status) +static void fluke_serial_poll_response(struct gpib_board *board, uint8_t status) { struct fluke_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -static uint8_t fluke_serial_poll_status(gpib_board_t *board) +static uint8_t fluke_serial_poll_status(struct gpib_board *board) { struct fluke_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static void fluke_return_to_local(gpib_board_t *board) +static void fluke_return_to_local(struct gpib_board *board) { struct fluke_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -193,7 +194,7 @@ static void fluke_return_to_local(gpib_board_t *board) write_byte(nec_priv, AUX_RTL, AUXMR); } -static int fluke_line_status(const gpib_board_t *board) +static int fluke_line_status(const struct gpib_board *board) { int status = VALID_ALL; int bsr_bits; @@ -223,7 +224,7 @@ static int fluke_line_status(const gpib_board_t *board) return status; } -static unsigned int fluke_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int fluke_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -240,7 +241,7 @@ static unsigned int fluke_t1_delay(gpib_board_t *board, unsigned int nano_sec) return retval; } -static int lacs_or_read_ready(gpib_board_t *board) +static int lacs_or_read_ready(struct gpib_board *board) { const struct fluke_priv *e_priv = board->private_data; const struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -256,7 +257,7 @@ static int lacs_or_read_ready(gpib_board_t *board) /* Wait until it is possible for a read to do something useful. This * is not essential, it only exists to prevent RFD holdoff from being released pointlessly. */ -static int wait_for_read(gpib_board_t *board) +static int wait_for_read(struct gpib_board *board) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -313,7 +314,7 @@ static int source_handshake_is_sids_or_sgns(struct fluke_priv *e_priv) * If the chip is SGNS it is probably waiting for a a byte to * be written to it. */ -static int wait_for_data_out_ready(gpib_board_t *board) +static int wait_for_data_out_ready(struct gpib_board *board) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -332,7 +333,7 @@ static int wait_for_data_out_ready(gpib_board_t *board) return retval; } -static int wait_for_sids_or_sgns(gpib_board_t *board) +static int wait_for_sids_or_sgns(struct gpib_board *board) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -353,7 +354,7 @@ static int wait_for_sids_or_sgns(gpib_board_t *board) static void fluke_dma_callback(void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; unsigned long flags; @@ -370,7 +371,7 @@ static void fluke_dma_callback(void *arg) spin_unlock_irqrestore(&board->spinlock, flags); } -static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fluke_dma_write(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct fluke_priv *e_priv = board->private_data; @@ -455,7 +456,7 @@ static int fluke_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, return retval; } -static int fluke_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fluke_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fluke_priv *e_priv = board->private_data; @@ -543,7 +544,7 @@ static int fluke_get_dma_residue(struct dma_chan *chan, dma_cookie_t cookie) return state.residue; } -static int fluke_dma_read(gpib_board_t *board, uint8_t *buffer, +static int fluke_dma_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fluke_priv *e_priv = board->private_data; @@ -656,7 +657,7 @@ static int fluke_dma_read(gpib_board_t *board, uint8_t *buffer, return retval; } -static int fluke_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fluke_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fluke_priv *e_priv = board->private_data; @@ -786,7 +787,7 @@ static gpib_interface_t fluke_interface = { .return_to_local = fluke_return_to_local, }; -irqreturn_t fluke_gpib_internal_interrupt(gpib_board_t *board) +irqreturn_t fluke_gpib_internal_interrupt(struct gpib_board *board) { int status0, status1, status2; struct fluke_priv *priv = board->private_data; @@ -823,7 +824,7 @@ irqreturn_t fluke_gpib_internal_interrupt(gpib_board_t *board) static irqreturn_t fluke_gpib_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; unsigned long flags; irqreturn_t retval; @@ -833,7 +834,7 @@ static irqreturn_t fluke_gpib_interrupt(int irq, void *arg) return retval; } -static int fluke_allocate_private(gpib_board_t *board) +static int fluke_allocate_private(struct gpib_board *board) { struct fluke_priv *priv; @@ -850,7 +851,7 @@ static int fluke_allocate_private(gpib_board_t *board) return 0; } -static void fluke_generic_detach(gpib_board_t *board) +static void fluke_generic_detach(struct gpib_board *board) { if (board->private_data) { struct fluke_priv *e_priv = board->private_data; @@ -862,7 +863,7 @@ static void fluke_generic_detach(gpib_board_t *board) } // generic part of attach functions shared by all cb7210 boards -static int fluke_generic_attach(gpib_board_t *board) +static int fluke_generic_attach(struct gpib_board *board) { struct fluke_priv *e_priv; struct nec7210_priv *nec_priv; @@ -882,7 +883,7 @@ static int fluke_generic_attach(gpib_board_t *board) return 0; } -static int fluke_config_dma(gpib_board_t *board, int output) +static int fluke_config_dma(struct gpib_board *board, int output) { struct fluke_priv *e_priv = board->private_data; struct dma_slave_config config; @@ -907,7 +908,7 @@ static int fluke_config_dma(gpib_board_t *board, int output) return dmaengine_slave_config(e_priv->dma_channel, &config); } -static int fluke_init(struct fluke_priv *e_priv, gpib_board_t *board, int handshake_mode) +static int fluke_init(struct fluke_priv *e_priv, struct gpib_board *board, int handshake_mode) { struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -942,7 +943,7 @@ static bool gpib_dma_channel_filter(struct dma_chan *chan, void *filter_param) return chan->chan_id == 0; } -static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *config, +static int fluke_attach_impl(struct gpib_board *board, const gpib_board_config_t *config, unsigned int handshake_mode) { struct fluke_priv *e_priv; @@ -1048,17 +1049,17 @@ static int fluke_attach_impl(gpib_board_t *board, const gpib_board_config_t *con return fluke_init(e_priv, board, handshake_mode); } -int fluke_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config) +int fluke_attach_holdoff_all(struct gpib_board *board, const gpib_board_config_t *config) { return fluke_attach_impl(board, config, HR_HLDA); } -int fluke_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config) +int fluke_attach_holdoff_end(struct gpib_board *board, const gpib_board_config_t *config) { return fluke_attach_impl(board, config, HR_HLDE); } -void fluke_detach(gpib_board_t *board) +void fluke_detach(struct gpib_board *board) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv; From 0ca4205bffa2434073f8c1432d40ac82f3d9055f Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:12 +0000 Subject: [PATCH 1450/2157] staging: gpib: fmh_gpib: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-9-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 122 ++++++++++++----------- 1 file changed, 62 insertions(+), 60 deletions(-) diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index 3e8ad8aa332f..f6bf127441f8 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -32,19 +32,21 @@ MODULE_DESCRIPTION("GPIB Driver for fmh_gpib_core"); MODULE_AUTHOR("Frank Mori Hess "); static irqreturn_t fmh_gpib_interrupt(int irq, void *arg); -static int fmh_gpib_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config); -static int fmh_gpib_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config); -static void fmh_gpib_detach(gpib_board_t *board); -static int fmh_gpib_pci_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config); -static int fmh_gpib_pci_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config); -static void fmh_gpib_pci_detach(gpib_board_t *board); -static int fmh_gpib_config_dma(gpib_board_t *board, int output); -static irqreturn_t fmh_gpib_internal_interrupt(gpib_board_t *board); +static int fmh_gpib_attach_holdoff_all(struct gpib_board *board, const gpib_board_config_t *config); +static int fmh_gpib_attach_holdoff_end(struct gpib_board *board, const gpib_board_config_t *config); +static void fmh_gpib_detach(struct gpib_board *board); +static int fmh_gpib_pci_attach_holdoff_all(struct gpib_board *board, + const gpib_board_config_t *config); +static int fmh_gpib_pci_attach_holdoff_end(struct gpib_board *board, + const gpib_board_config_t *config); +static void fmh_gpib_pci_detach(struct gpib_board *board); +static int fmh_gpib_config_dma(struct gpib_board *board, int output); +static irqreturn_t fmh_gpib_internal_interrupt(struct gpib_board *board); static struct platform_driver fmh_gpib_platform_driver; static struct pci_driver fmh_gpib_pci_driver; // wrappers for interface functions -static int fmh_gpib_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fmh_priv *priv = board->private_data; @@ -52,7 +54,7 @@ static int fmh_gpib_read(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -static int fmh_gpib_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fmh_priv *priv = board->private_data; @@ -60,7 +62,7 @@ static int fmh_gpib_write(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int fmh_gpib_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct fmh_priv *priv = board->private_data; @@ -68,21 +70,21 @@ static int fmh_gpib_command(gpib_board_t *board, uint8_t *buffer, size_t length, return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int fmh_gpib_take_control(gpib_board_t *board, int synchronous) +static int fmh_gpib_take_control(struct gpib_board *board, int synchronous) { struct fmh_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int fmh_gpib_go_to_standby(gpib_board_t *board) +static int fmh_gpib_go_to_standby(struct gpib_board *board) { struct fmh_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void fmh_gpib_request_system_control(gpib_board_t *board, int request_control) +static void fmh_gpib_request_system_control(struct gpib_board *board, int request_control) { struct fmh_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -90,77 +92,77 @@ static void fmh_gpib_request_system_control(gpib_board_t *board, int request_con nec7210_request_system_control(board, nec_priv, request_control); } -static void fmh_gpib_interface_clear(gpib_board_t *board, int assert) +static void fmh_gpib_interface_clear(struct gpib_board *board, int assert) { struct fmh_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void fmh_gpib_remote_enable(gpib_board_t *board, int enable) +static void fmh_gpib_remote_enable(struct gpib_board *board, int enable) { struct fmh_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int fmh_gpib_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int fmh_gpib_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct fmh_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void fmh_gpib_disable_eos(gpib_board_t *board) +static void fmh_gpib_disable_eos(struct gpib_board *board) { struct fmh_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int fmh_gpib_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int fmh_gpib_update_status(struct gpib_board *board, unsigned int clear_mask) { struct fmh_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -static int fmh_gpib_primary_address(gpib_board_t *board, unsigned int address) +static int fmh_gpib_primary_address(struct gpib_board *board, unsigned int address) { struct fmh_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int fmh_gpib_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int fmh_gpib_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct fmh_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int fmh_gpib_parallel_poll(gpib_board_t *board, uint8_t *result) +static int fmh_gpib_parallel_poll(struct gpib_board *board, uint8_t *result) { struct fmh_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -static void fmh_gpib_parallel_poll_configure(gpib_board_t *board, uint8_t configuration) +static void fmh_gpib_parallel_poll_configure(struct gpib_board *board, uint8_t configuration) { struct fmh_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, configuration); } -static void fmh_gpib_parallel_poll_response(gpib_board_t *board, int ist) +static void fmh_gpib_parallel_poll_response(struct gpib_board *board, int ist) { struct fmh_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -static void fmh_gpib_local_parallel_poll_mode(gpib_board_t *board, int local) +static void fmh_gpib_local_parallel_poll_mode(struct gpib_board *board, int local) { struct fmh_priv *priv = board->private_data; @@ -175,7 +177,7 @@ static void fmh_gpib_local_parallel_poll_mode(gpib_board_t *board, int local) } } -static void fmh_gpib_serial_poll_response2(gpib_board_t *board, uint8_t status, +static void fmh_gpib_serial_poll_response2(struct gpib_board *board, uint8_t status, int new_reason_for_service) { struct fmh_priv *priv = board->private_data; @@ -210,14 +212,14 @@ static void fmh_gpib_serial_poll_response2(gpib_board_t *board, uint8_t status, spin_unlock_irqrestore(&board->spinlock, flags); } -static uint8_t fmh_gpib_serial_poll_status(gpib_board_t *board) +static uint8_t fmh_gpib_serial_poll_status(struct gpib_board *board) { struct fmh_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static void fmh_gpib_return_to_local(gpib_board_t *board) +static void fmh_gpib_return_to_local(struct gpib_board *board) { struct fmh_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -227,7 +229,7 @@ static void fmh_gpib_return_to_local(gpib_board_t *board) write_byte(nec_priv, AUX_RTL, AUXMR); } -static int fmh_gpib_line_status(const gpib_board_t *board) +static int fmh_gpib_line_status(const struct gpib_board *board) { int status = VALID_ALL; int bsr_bits; @@ -259,7 +261,7 @@ static int fmh_gpib_line_status(const gpib_board_t *board) return status; } -static unsigned int fmh_gpib_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int fmh_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -276,7 +278,7 @@ static unsigned int fmh_gpib_t1_delay(gpib_board_t *board, unsigned int nano_sec return retval; } -static int lacs_or_read_ready(gpib_board_t *board) +static int lacs_or_read_ready(struct gpib_board *board) { const struct fmh_priv *e_priv = board->private_data; const struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -291,7 +293,7 @@ static int lacs_or_read_ready(gpib_board_t *board) return retval; } -static int wait_for_read(gpib_board_t *board) +static int wait_for_read(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -310,7 +312,7 @@ static int wait_for_read(gpib_board_t *board) return retval; } -static int wait_for_rx_fifo_half_full_or_end(gpib_board_t *board) +static int wait_for_rx_fifo_half_full_or_end(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -333,7 +335,7 @@ static int wait_for_rx_fifo_half_full_or_end(gpib_board_t *board) /* Wait until the gpib chip is ready to accept a data out byte. */ -static int wait_for_data_out_ready(gpib_board_t *board) +static int wait_for_data_out_ready(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -357,7 +359,7 @@ static int wait_for_data_out_ready(gpib_board_t *board) static void fmh_gpib_dma_callback(void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; unsigned long flags; @@ -389,7 +391,7 @@ static int fmh_gpib_all_bytes_are_sent(struct fmh_priv *e_priv) return 1; } -static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_dma_write(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct fmh_priv *e_priv = board->private_data; @@ -467,7 +469,7 @@ static int fmh_gpib_dma_write(gpib_board_t *board, uint8_t *buffer, size_t lengt return retval; } -static int fmh_gpib_accel_write(gpib_board_t *board, uint8_t *buffer, +static int fmh_gpib_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fmh_priv *e_priv = board->private_data; @@ -557,7 +559,7 @@ static int fmh_gpib_get_dma_residue(struct dma_chan *chan, dma_cookie_t cookie) return state.residue; } -static int wait_for_tx_fifo_half_empty(gpib_board_t *board) +static int wait_for_tx_fifo_half_empty(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; @@ -582,7 +584,7 @@ static int wait_for_tx_fifo_half_empty(gpib_board_t *board) /* supports writing a chunk of data whose length must fit into the hardware'd xfer counter, * called in a loop by fmh_gpib_fifo_write() */ -static int fmh_gpib_fifo_write_countable(gpib_board_t *board, uint8_t *buffer, +static int fmh_gpib_fifo_write_countable(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fmh_priv *e_priv = board->private_data; @@ -648,7 +650,7 @@ static int fmh_gpib_fifo_write_countable(gpib_board_t *board, uint8_t *buffer, return retval; } -static int fmh_gpib_fifo_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_fifo_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct fmh_priv *e_priv = board->private_data; @@ -695,7 +697,7 @@ static int fmh_gpib_fifo_write(gpib_board_t *board, uint8_t *buffer, size_t leng return retval; } -static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, +static int fmh_gpib_dma_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fmh_priv *e_priv = board->private_data; @@ -807,7 +809,7 @@ static int fmh_gpib_dma_read(gpib_board_t *board, uint8_t *buffer, return retval; } -static void fmh_gpib_release_rfd_holdoff(gpib_board_t *board, struct fmh_priv *e_priv) +static void fmh_gpib_release_rfd_holdoff(struct gpib_board *board, struct fmh_priv *e_priv) { struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; unsigned int ext_status_1; @@ -844,7 +846,7 @@ static void fmh_gpib_release_rfd_holdoff(gpib_board_t *board, struct fmh_priv *e spin_unlock_irqrestore(&board->spinlock, flags); } -static int fmh_gpib_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fmh_priv *e_priv = board->private_data; @@ -894,7 +896,7 @@ static int fmh_gpib_accel_read(gpib_board_t *board, uint8_t *buffer, size_t leng /* Read a chunk of data whose length is within the limits of the hardware's * xfer counter. Called in a loop from fmh_gpib_fifo_read(). */ -static int fmh_gpib_fifo_read_countable(gpib_board_t *board, uint8_t *buffer, +static int fmh_gpib_fifo_read_countable(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fmh_priv *e_priv = board->private_data; @@ -952,7 +954,7 @@ static int fmh_gpib_fifo_read_countable(gpib_board_t *board, uint8_t *buffer, return retval; } -static int fmh_gpib_fifo_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int fmh_gpib_fifo_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct fmh_priv *e_priv = board->private_data; @@ -1121,7 +1123,7 @@ static gpib_interface_t fmh_gpib_pci_unaccel_interface = { .return_to_local = fmh_gpib_return_to_local, }; -irqreturn_t fmh_gpib_internal_interrupt(gpib_board_t *board) +irqreturn_t fmh_gpib_internal_interrupt(struct gpib_board *board) { unsigned int status0, status1, status2, ext_status_1, fifo_status; struct fmh_priv *priv = board->private_data; @@ -1211,7 +1213,7 @@ irqreturn_t fmh_gpib_internal_interrupt(gpib_board_t *board) irqreturn_t fmh_gpib_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; unsigned long flags; irqreturn_t retval; @@ -1221,7 +1223,7 @@ irqreturn_t fmh_gpib_interrupt(int irq, void *arg) return retval; } -static int fmh_gpib_allocate_private(gpib_board_t *board) +static int fmh_gpib_allocate_private(struct gpib_board *board) { struct fmh_priv *priv; @@ -1238,7 +1240,7 @@ static int fmh_gpib_allocate_private(gpib_board_t *board) return 0; } -static void fmh_gpib_generic_detach(gpib_board_t *board) +static void fmh_gpib_generic_detach(struct gpib_board *board) { if (board->private_data) { struct fmh_priv *e_priv = board->private_data; @@ -1252,7 +1254,7 @@ static void fmh_gpib_generic_detach(gpib_board_t *board) } // generic part of attach functions -static int fmh_gpib_generic_attach(gpib_board_t *board) +static int fmh_gpib_generic_attach(struct gpib_board *board) { struct fmh_priv *e_priv; struct nec7210_priv *nec_priv; @@ -1272,7 +1274,7 @@ static int fmh_gpib_generic_attach(gpib_board_t *board) return 0; } -static int fmh_gpib_config_dma(gpib_board_t *board, int output) +static int fmh_gpib_config_dma(struct gpib_board *board, int output) { struct fmh_priv *e_priv = board->private_data; struct dma_slave_config config; @@ -1302,7 +1304,7 @@ static int fmh_gpib_config_dma(gpib_board_t *board, int output) return dmaengine_slave_config(e_priv->dma_channel, &config); } -static int fmh_gpib_init(struct fmh_priv *e_priv, gpib_board_t *board, int handshake_mode) +static int fmh_gpib_init(struct fmh_priv *e_priv, struct gpib_board *board, int handshake_mode) { struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; unsigned long flags; @@ -1349,7 +1351,7 @@ static int fmh_gpib_device_match(struct device *dev, const void *data) return 1; } -static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t *config, +static int fmh_gpib_attach_impl(struct gpib_board *board, const gpib_board_config_t *config, unsigned int handshake_mode, int acquire_dma) { struct fmh_priv *e_priv; @@ -1452,17 +1454,17 @@ static int fmh_gpib_attach_impl(gpib_board_t *board, const gpib_board_config_t * return fmh_gpib_init(e_priv, board, handshake_mode); } -int fmh_gpib_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config) +int fmh_gpib_attach_holdoff_all(struct gpib_board *board, const gpib_board_config_t *config) { return fmh_gpib_attach_impl(board, config, HR_HLDA, 0); } -int fmh_gpib_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config) +int fmh_gpib_attach_holdoff_end(struct gpib_board *board, const gpib_board_config_t *config) { return fmh_gpib_attach_impl(board, config, HR_HLDE, 1); } -void fmh_gpib_detach(gpib_board_t *board) +void fmh_gpib_detach(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1495,7 +1497,7 @@ void fmh_gpib_detach(gpib_board_t *board) fmh_gpib_generic_detach(board); } -static int fmh_gpib_pci_attach_impl(gpib_board_t *board, const gpib_board_config_t *config, +static int fmh_gpib_pci_attach_impl(struct gpib_board *board, const gpib_board_config_t *config, unsigned int handshake_mode) { struct fmh_priv *e_priv; @@ -1568,12 +1570,12 @@ static int fmh_gpib_pci_attach_impl(gpib_board_t *board, const gpib_board_config return fmh_gpib_init(e_priv, board, handshake_mode); } -int fmh_gpib_pci_attach_holdoff_all(gpib_board_t *board, const gpib_board_config_t *config) +int fmh_gpib_pci_attach_holdoff_all(struct gpib_board *board, const gpib_board_config_t *config) { return fmh_gpib_pci_attach_impl(board, config, HR_HLDA); } -int fmh_gpib_pci_attach_holdoff_end(gpib_board_t *board, const gpib_board_config_t *config) +int fmh_gpib_pci_attach_holdoff_end(struct gpib_board *board, const gpib_board_config_t *config) { int retval; struct fmh_priv *e_priv; @@ -1587,7 +1589,7 @@ int fmh_gpib_pci_attach_holdoff_end(gpib_board_t *board, const gpib_board_config return retval; } -void fmh_gpib_pci_detach(gpib_board_t *board) +void fmh_gpib_pci_detach(struct gpib_board *board) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv; From d2167c03896fa333a5b43a02e01a7e292315d473 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:13 +0000 Subject: [PATCH 1451/2157] staging: gpib: gpio: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-10-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/gpio/gpib_bitbang.c | 72 ++++++++++++------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/staging/gpib/gpio/gpib_bitbang.c b/drivers/staging/gpib/gpio/gpib_bitbang.c index 28c5fa9b81ab..611ff58b94ca 100644 --- a/drivers/staging/gpib/gpio/gpib_bitbang.c +++ b/drivers/staging/gpib/gpio/gpib_bitbang.c @@ -318,14 +318,14 @@ struct bb_priv { }; static inline long usec_diff(struct timespec64 *a, struct timespec64 *b); -static void bb_buffer_print(gpib_board_t *board, unsigned char *buffer, size_t length, +static void bb_buffer_print(struct gpib_board *board, unsigned char *buffer, size_t length, int cmd, int eoi); static void set_data_lines(u8 byte); static u8 get_data_lines(void); static void set_data_lines_input(void); static void set_data_lines_output(void); static inline int check_for_eos(struct bb_priv *priv, uint8_t byte); -static void set_atn(gpib_board_t *board, int atn_asserted); +static void set_atn(struct gpib_board *board, int atn_asserted); static inline void SET_DIR_WRITE(struct bb_priv *priv); static inline void SET_DIR_READ(struct bb_priv *priv); @@ -353,7 +353,7 @@ static char printable(char x) * * ***************************************************************************/ -static int bb_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int bb_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct bb_priv *priv = board->private_data; @@ -425,7 +425,7 @@ static int bb_read(gpib_board_t *board, uint8_t *buffer, size_t length, static irqreturn_t bb_DAV_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct bb_priv *priv = board->private_data; int val; unsigned long flags; @@ -491,7 +491,7 @@ static irqreturn_t bb_DAV_interrupt(int irq, void *arg) * * ***************************************************************************/ -static int bb_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int bb_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { unsigned long flags; @@ -580,7 +580,7 @@ static int bb_write(gpib_board_t *board, uint8_t *buffer, size_t length, static irqreturn_t bb_NRFD_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct bb_priv *priv = board->private_data; unsigned long flags; int nrfd; @@ -653,7 +653,7 @@ static irqreturn_t bb_NRFD_interrupt(int irq, void *arg) static irqreturn_t bb_NDAC_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct bb_priv *priv = board->private_data; unsigned long flags; int ndac; @@ -714,7 +714,7 @@ static irqreturn_t bb_NDAC_interrupt(int irq, void *arg) static irqreturn_t bb_SRQ_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; int val = gpiod_get_value(SRQ); @@ -728,7 +728,7 @@ static irqreturn_t bb_SRQ_interrupt(int irq, void *arg) return IRQ_HANDLED; } -static int bb_command(gpib_board_t *board, uint8_t *buffer, +static int bb_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { size_t ret; @@ -809,7 +809,7 @@ static char *cmd_string[32] = { "CFE" // 0x1f }; -static void bb_buffer_print(gpib_board_t *board, unsigned char *buffer, size_t length, +static void bb_buffer_print(struct gpib_board *board, unsigned char *buffer, size_t length, int cmd, int eoi) { int i; @@ -842,7 +842,7 @@ static void bb_buffer_print(gpib_board_t *board, unsigned char *buffer, size_t l * STATUS Management * * * ***************************************************************************/ -static void set_atn(gpib_board_t *board, int atn_asserted) +static void set_atn(struct gpib_board *board, int atn_asserted) { struct bb_priv *priv = board->private_data; @@ -867,7 +867,7 @@ static void set_atn(gpib_board_t *board, int atn_asserted) priv->atn_asserted = atn_asserted; } -static int bb_take_control(gpib_board_t *board, int synchronous) +static int bb_take_control(struct gpib_board *board, int synchronous) { dbg_printk(2, "%d\n", synchronous); set_atn(board, 1); @@ -875,14 +875,14 @@ static int bb_take_control(gpib_board_t *board, int synchronous) return 0; } -static int bb_go_to_standby(gpib_board_t *board) +static int bb_go_to_standby(struct gpib_board *board) { dbg_printk(2, "\n"); set_atn(board, 0); return 0; } -static void bb_request_system_control(gpib_board_t *board, int request_control) +static void bb_request_system_control(struct gpib_board *board, int request_control) { dbg_printk(2, "%d\n", request_control); if (request_control) { @@ -894,7 +894,7 @@ static void bb_request_system_control(gpib_board_t *board, int request_control) } } -static void bb_interface_clear(gpib_board_t *board, int assert) +static void bb_interface_clear(struct gpib_board *board, int assert) { struct bb_priv *priv = board->private_data; @@ -908,7 +908,7 @@ static void bb_interface_clear(gpib_board_t *board, int assert) } } -static void bb_remote_enable(gpib_board_t *board, int enable) +static void bb_remote_enable(struct gpib_board *board, int enable) { dbg_printk(2, "%d\n", enable); if (enable) { @@ -920,7 +920,7 @@ static void bb_remote_enable(gpib_board_t *board, int enable) } } -static int bb_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int bb_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct bb_priv *priv = board->private_data; @@ -933,7 +933,7 @@ static int bb_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bi return 0; } -static void bb_disable_eos(gpib_board_t *board) +static void bb_disable_eos(struct gpib_board *board) { struct bb_priv *priv = board->private_data; @@ -941,7 +941,7 @@ static void bb_disable_eos(gpib_board_t *board) priv->eos_flags &= ~REOS; } -static unsigned int bb_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int bb_update_status(struct gpib_board *board, unsigned int clear_mask) { struct bb_priv *priv = board->private_data; @@ -972,14 +972,14 @@ static unsigned int bb_update_status(gpib_board_t *board, unsigned int clear_mas return board->status; } -static int bb_primary_address(gpib_board_t *board, unsigned int address) +static int bb_primary_address(struct gpib_board *board, unsigned int address) { dbg_printk(2, "%d\n", address); board->pad = address; return 0; } -static int bb_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int bb_secondary_address(struct gpib_board *board, unsigned int address, int enable) { dbg_printk(2, "%d %d\n", address, enable); if (enable) @@ -987,29 +987,29 @@ static int bb_secondary_address(gpib_board_t *board, unsigned int address, int e return 0; } -static int bb_parallel_poll(gpib_board_t *board, uint8_t *result) +static int bb_parallel_poll(struct gpib_board *board, uint8_t *result) { return -ENOENT; } -static void bb_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void bb_parallel_poll_configure(struct gpib_board *board, uint8_t config) { } -static void bb_parallel_poll_response(gpib_board_t *board, int ist) +static void bb_parallel_poll_response(struct gpib_board *board, int ist) { } -static void bb_serial_poll_response(gpib_board_t *board, uint8_t status) +static void bb_serial_poll_response(struct gpib_board *board, uint8_t status) { } -static uint8_t bb_serial_poll_status(gpib_board_t *board) +static uint8_t bb_serial_poll_status(struct gpib_board *board) { return 0; // -ENOENT; } -static unsigned int bb_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int bb_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct bb_priv *priv = board->private_data; @@ -1025,11 +1025,11 @@ static unsigned int bb_t1_delay(gpib_board_t *board, unsigned int nano_sec) return priv->t1_delay; } -static void bb_return_to_local(gpib_board_t *board) +static void bb_return_to_local(struct gpib_board *board) { } -static int bb_line_status(const gpib_board_t *board) +static int bb_line_status(const struct gpib_board *board) { int line_status = VALID_ALL; @@ -1061,7 +1061,7 @@ static int bb_line_status(const gpib_board_t *board) * * ***************************************************************************/ -static int allocate_private(gpib_board_t *board) +static int allocate_private(struct gpib_board *board) { board->private_data = kzalloc(sizeof(struct bb_priv), GFP_KERNEL); if (!board->private_data) @@ -1069,13 +1069,13 @@ static int allocate_private(gpib_board_t *board) return 0; } -static void free_private(gpib_board_t *board) +static void free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -static int bb_get_irq(gpib_board_t *board, char *name, +static int bb_get_irq(struct gpib_board *board, char *name, struct gpio_desc *gpio, int *irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long flags) { @@ -1097,7 +1097,7 @@ static int bb_get_irq(gpib_board_t *board, char *name, return 0; } -static void bb_free_irq(gpib_board_t *board, int *irq, char *name) +static void bb_free_irq(struct gpib_board *board, int *irq, char *name) { if (*irq) { free_irq(*irq, board); @@ -1118,7 +1118,7 @@ static void release_gpios(void) } } -static int allocate_gpios(gpib_board_t *board) +static int allocate_gpios(struct gpib_board *board) { int j, retval = 0; bool error = false; @@ -1176,7 +1176,7 @@ static int allocate_gpios(gpib_board_t *board) return retval; } -static void bb_detach(gpib_board_t *board) +static void bb_detach(struct gpib_board *board) { struct bb_priv *priv = board->private_data; @@ -1206,7 +1206,7 @@ static void bb_detach(gpib_board_t *board) free_private(board); } -static int bb_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int bb_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct bb_priv *priv; int retval = 0; From 1691b2e3f20a9708991a6ec91137eb2a4a94ae38 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:14 +0000 Subject: [PATCH 1452/2157] staging: gpib: hp2335: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-11-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82335/hp82335.c | 56 ++++++++++++------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/staging/gpib/hp_82335/hp82335.c b/drivers/staging/gpib/hp_82335/hp82335.c index 982544d1b382..368e60c6ecd2 100644 --- a/drivers/staging/gpib/hp_82335/hp82335.c +++ b/drivers/staging/gpib/hp_82335/hp82335.c @@ -24,12 +24,12 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for HP 82335 interface cards"); -static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config); -static void hp82335_detach(gpib_board_t *board); +static int hp82335_attach(struct gpib_board *board, const gpib_board_config_t *config); +static void hp82335_detach(struct gpib_board *board); static irqreturn_t hp82335_interrupt(int irq, void *arg); // wrappers for interface functions -static int hp82335_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int hp82335_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct hp82335_priv *priv = board->private_data; @@ -37,7 +37,7 @@ static int hp82335_read(gpib_board_t *board, uint8_t *buffer, size_t length, return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -static int hp82335_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int hp82335_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct hp82335_priv *priv = board->private_data; @@ -45,7 +45,7 @@ static int hp82335_write(gpib_board_t *board, uint8_t *buffer, size_t length, in return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -static int hp82335_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int hp82335_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct hp82335_priv *priv = board->private_data; @@ -53,126 +53,126 @@ static int hp82335_command(gpib_board_t *board, uint8_t *buffer, size_t length, return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -static int hp82335_take_control(gpib_board_t *board, int synchronous) +static int hp82335_take_control(struct gpib_board *board, int synchronous) { struct hp82335_priv *priv = board->private_data; return tms9914_take_control(board, &priv->tms9914_priv, synchronous); } -static int hp82335_go_to_standby(gpib_board_t *board) +static int hp82335_go_to_standby(struct gpib_board *board) { struct hp82335_priv *priv = board->private_data; return tms9914_go_to_standby(board, &priv->tms9914_priv); } -static void hp82335_request_system_control(gpib_board_t *board, int request_control) +static void hp82335_request_system_control(struct gpib_board *board, int request_control) { struct hp82335_priv *priv = board->private_data; tms9914_request_system_control(board, &priv->tms9914_priv, request_control); } -static void hp82335_interface_clear(gpib_board_t *board, int assert) +static void hp82335_interface_clear(struct gpib_board *board, int assert) { struct hp82335_priv *priv = board->private_data; tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -static void hp82335_remote_enable(gpib_board_t *board, int enable) +static void hp82335_remote_enable(struct gpib_board *board, int enable) { struct hp82335_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -static int hp82335_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int hp82335_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct hp82335_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -static void hp82335_disable_eos(gpib_board_t *board) +static void hp82335_disable_eos(struct gpib_board *board) { struct hp82335_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -static unsigned int hp82335_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int hp82335_update_status(struct gpib_board *board, unsigned int clear_mask) { struct hp82335_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -static int hp82335_primary_address(gpib_board_t *board, unsigned int address) +static int hp82335_primary_address(struct gpib_board *board, unsigned int address) { struct hp82335_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -static int hp82335_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int hp82335_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct hp82335_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -static int hp82335_parallel_poll(gpib_board_t *board, uint8_t *result) +static int hp82335_parallel_poll(struct gpib_board *board, uint8_t *result) { struct hp82335_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -static void hp82335_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void hp82335_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct hp82335_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -static void hp82335_parallel_poll_response(gpib_board_t *board, int ist) +static void hp82335_parallel_poll_response(struct gpib_board *board, int ist) { struct hp82335_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -static void hp82335_serial_poll_response(gpib_board_t *board, uint8_t status) +static void hp82335_serial_poll_response(struct gpib_board *board, uint8_t status) { struct hp82335_priv *priv = board->private_data; tms9914_serial_poll_response(board, &priv->tms9914_priv, status); } -static uint8_t hp82335_serial_poll_status(gpib_board_t *board) +static uint8_t hp82335_serial_poll_status(struct gpib_board *board) { struct hp82335_priv *priv = board->private_data; return tms9914_serial_poll_status(board, &priv->tms9914_priv); } -static int hp82335_line_status(const gpib_board_t *board) +static int hp82335_line_status(const struct gpib_board *board) { struct hp82335_priv *priv = board->private_data; return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int hp82335_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int hp82335_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct hp82335_priv *priv = board->private_data; return tms9914_t1_delay(board, &priv->tms9914_priv, nano_sec); } -static void hp82335_return_to_local(gpib_board_t *board) +static void hp82335_return_to_local(struct gpib_board *board) { struct hp82335_priv *priv = board->private_data; @@ -207,7 +207,7 @@ static gpib_interface_t hp82335_interface = { .return_to_local = hp82335_return_to_local, }; -static int hp82335_allocate_private(gpib_board_t *board) +static int hp82335_allocate_private(struct gpib_board *board) { board->private_data = kzalloc(sizeof(struct hp82335_priv), GFP_KERNEL); if (!board->private_data) @@ -215,7 +215,7 @@ static int hp82335_allocate_private(gpib_board_t *board) return 0; } -static void hp82335_free_private(gpib_board_t *board) +static void hp82335_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; @@ -243,7 +243,7 @@ static void hp82335_clear_interrupt(struct hp82335_priv *hp_priv) writeb(0, tms_priv->mmiobase + HPREG_INTR_CLEAR); } -static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int hp82335_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct hp82335_priv *hp_priv; struct tms9914_priv *tms_priv; @@ -307,7 +307,7 @@ static int hp82335_attach(gpib_board_t *board, const gpib_board_config_t *config return 0; } -static void hp82335_detach(gpib_board_t *board) +static void hp82335_detach(struct gpib_board *board) { struct hp82335_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv; @@ -354,7 +354,7 @@ module_exit(hp82335_exit_module); static irqreturn_t hp82335_interrupt(int irq, void *arg) { int status1, status2; - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct hp82335_priv *priv = board->private_data; unsigned long flags; irqreturn_t retval; From b35e450b8a2fc9a7cb411523a33787aca49d70b9 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:15 +0000 Subject: [PATCH 1453/2157] staging: gpib: hp_82341: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-12-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/hp_82341/hp_82341.c | 68 ++++++++++++------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index b5d79ae519e7..5d76d01f6b32 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -22,14 +22,14 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for hp 82341a/b/c/d boards"); -static unsigned short read_and_clear_event_status(gpib_board_t *board); +static unsigned short read_and_clear_event_status(struct gpib_board *board); static void set_transfer_counter(struct hp_82341_priv *hp_priv, int count); static int read_transfer_counter(struct hp_82341_priv *hp_priv); -static int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int hp_82341_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); static irqreturn_t hp_82341_interrupt(int irq, void *arg); -static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int hp_82341_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct hp_82341_priv *hp_priv = board->private_data; @@ -147,7 +147,7 @@ static int hp_82341_accel_read(gpib_board_t *board, uint8_t *buffer, size_t leng return 0; } -static int restart_write_fifo(gpib_board_t *board, struct hp_82341_priv *hp_priv) +static int restart_write_fifo(struct gpib_board *board, struct hp_82341_priv *hp_priv) { struct tms9914_priv *tms_priv = &hp_priv->tms9914_priv; @@ -172,7 +172,7 @@ static int restart_write_fifo(gpib_board_t *board, struct hp_82341_priv *hp_priv return 0; } -static int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int hp_82341_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct hp_82341_priv *hp_priv = board->private_data; @@ -250,12 +250,12 @@ static int hp_82341_accel_write(gpib_board_t *board, uint8_t *buffer, size_t len return 0; } -static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *config); +static int hp_82341_attach(struct gpib_board *board, const gpib_board_config_t *config); -static void hp_82341_detach(gpib_board_t *board); +static void hp_82341_detach(struct gpib_board *board); // wrappers for interface functions -static int hp_82341_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int hp_82341_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct hp_82341_priv *priv = board->private_data; @@ -263,7 +263,7 @@ static int hp_82341_read(gpib_board_t *board, uint8_t *buffer, size_t length, in return tms9914_read(board, &priv->tms9914_priv, buffer, length, end, bytes_read); } -static int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int hp_82341_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct hp_82341_priv *priv = board->private_data; @@ -271,7 +271,7 @@ static int hp_82341_write(gpib_board_t *board, uint8_t *buffer, size_t length, i return tms9914_write(board, &priv->tms9914_priv, buffer, length, send_eoi, bytes_written); } -static int hp_82341_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int hp_82341_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct hp_82341_priv *priv = board->private_data; @@ -279,21 +279,21 @@ static int hp_82341_command(gpib_board_t *board, uint8_t *buffer, size_t length, return tms9914_command(board, &priv->tms9914_priv, buffer, length, bytes_written); } -static int hp_82341_take_control(gpib_board_t *board, int synchronous) +static int hp_82341_take_control(struct gpib_board *board, int synchronous) { struct hp_82341_priv *priv = board->private_data; return tms9914_take_control(board, &priv->tms9914_priv, synchronous); } -static int hp_82341_go_to_standby(gpib_board_t *board) +static int hp_82341_go_to_standby(struct gpib_board *board) { struct hp_82341_priv *priv = board->private_data; return tms9914_go_to_standby(board, &priv->tms9914_priv); } -static void hp_82341_request_system_control(gpib_board_t *board, int request_control) +static void hp_82341_request_system_control(struct gpib_board *board, int request_control) { struct hp_82341_priv *priv = board->private_data; @@ -305,105 +305,105 @@ static void hp_82341_request_system_control(gpib_board_t *board, int request_con tms9914_request_system_control(board, &priv->tms9914_priv, request_control); } -static void hp_82341_interface_clear(gpib_board_t *board, int assert) +static void hp_82341_interface_clear(struct gpib_board *board, int assert) { struct hp_82341_priv *priv = board->private_data; tms9914_interface_clear(board, &priv->tms9914_priv, assert); } -static void hp_82341_remote_enable(gpib_board_t *board, int enable) +static void hp_82341_remote_enable(struct gpib_board *board, int enable) { struct hp_82341_priv *priv = board->private_data; tms9914_remote_enable(board, &priv->tms9914_priv, enable); } -static int hp_82341_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int hp_82341_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct hp_82341_priv *priv = board->private_data; return tms9914_enable_eos(board, &priv->tms9914_priv, eos_byte, compare_8_bits); } -static void hp_82341_disable_eos(gpib_board_t *board) +static void hp_82341_disable_eos(struct gpib_board *board) { struct hp_82341_priv *priv = board->private_data; tms9914_disable_eos(board, &priv->tms9914_priv); } -static unsigned int hp_82341_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int hp_82341_update_status(struct gpib_board *board, unsigned int clear_mask) { struct hp_82341_priv *priv = board->private_data; return tms9914_update_status(board, &priv->tms9914_priv, clear_mask); } -static int hp_82341_primary_address(gpib_board_t *board, unsigned int address) +static int hp_82341_primary_address(struct gpib_board *board, unsigned int address) { struct hp_82341_priv *priv = board->private_data; return tms9914_primary_address(board, &priv->tms9914_priv, address); } -static int hp_82341_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int hp_82341_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct hp_82341_priv *priv = board->private_data; return tms9914_secondary_address(board, &priv->tms9914_priv, address, enable); } -static int hp_82341_parallel_poll(gpib_board_t *board, uint8_t *result) +static int hp_82341_parallel_poll(struct gpib_board *board, uint8_t *result) { struct hp_82341_priv *priv = board->private_data; return tms9914_parallel_poll(board, &priv->tms9914_priv, result); } -static void hp_82341_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void hp_82341_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct hp_82341_priv *priv = board->private_data; tms9914_parallel_poll_configure(board, &priv->tms9914_priv, config); } -static void hp_82341_parallel_poll_response(gpib_board_t *board, int ist) +static void hp_82341_parallel_poll_response(struct gpib_board *board, int ist) { struct hp_82341_priv *priv = board->private_data; tms9914_parallel_poll_response(board, &priv->tms9914_priv, ist); } -static void hp_82341_serial_poll_response(gpib_board_t *board, uint8_t status) +static void hp_82341_serial_poll_response(struct gpib_board *board, uint8_t status) { struct hp_82341_priv *priv = board->private_data; tms9914_serial_poll_response(board, &priv->tms9914_priv, status); } -static uint8_t hp_82341_serial_poll_status(gpib_board_t *board) +static uint8_t hp_82341_serial_poll_status(struct gpib_board *board) { struct hp_82341_priv *priv = board->private_data; return tms9914_serial_poll_status(board, &priv->tms9914_priv); } -static int hp_82341_line_status(const gpib_board_t *board) +static int hp_82341_line_status(const struct gpib_board *board) { struct hp_82341_priv *priv = board->private_data; return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int hp_82341_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int hp_82341_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct hp_82341_priv *priv = board->private_data; return tms9914_t1_delay(board, &priv->tms9914_priv, nano_sec); } -static void hp_82341_return_to_local(gpib_board_t *board) +static void hp_82341_return_to_local(struct gpib_board *board) { struct hp_82341_priv *priv = board->private_data; @@ -465,7 +465,7 @@ static gpib_interface_t hp_82341_interface = { .return_to_local = hp_82341_return_to_local, }; -static int hp_82341_allocate_private(gpib_board_t *board) +static int hp_82341_allocate_private(struct gpib_board *board) { board->private_data = kzalloc(sizeof(struct hp_82341_priv), GFP_KERNEL); if (!board->private_data) @@ -473,7 +473,7 @@ static int hp_82341_allocate_private(gpib_board_t *board) return 0; } -static void hp_82341_free_private(gpib_board_t *board) +static void hp_82341_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; @@ -686,7 +686,7 @@ static int clear_xilinx(struct hp_82341_priv *hp_priv) return 0; } -static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int hp_82341_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct hp_82341_priv *hp_priv; struct tms9914_priv *tms_priv; @@ -778,7 +778,7 @@ static int hp_82341_attach(gpib_board_t *board, const gpib_board_config_t *confi return 0; } -static void hp_82341_detach(gpib_board_t *board) +static void hp_82341_detach(struct gpib_board *board) { struct hp_82341_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv; @@ -844,7 +844,7 @@ module_exit(hp_82341_exit_module); /* * GPIB interrupt service routines */ -static unsigned short read_and_clear_event_status(gpib_board_t *board) +static unsigned short read_and_clear_event_status(struct gpib_board *board) { struct hp_82341_priv *hp_priv = board->private_data; unsigned long flags; @@ -860,7 +860,7 @@ static unsigned short read_and_clear_event_status(gpib_board_t *board) static irqreturn_t hp_82341_interrupt(int irq, void *arg) { int status1, status2; - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct hp_82341_priv *hp_priv = board->private_data; struct tms9914_priv *tms_priv = &hp_priv->tms9914_priv; unsigned long flags; From 517c64917ea6002bc2d497589fe74a815e56410f Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:16 +0000 Subject: [PATCH 1454/2157] staging: gpib: ines: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-13-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ines/ines.h | 54 ++++++------- drivers/staging/gpib/ines/ines_gpib.c | 111 +++++++++++++------------- 2 files changed, 83 insertions(+), 82 deletions(-) diff --git a/drivers/staging/gpib/ines/ines.h b/drivers/staging/gpib/ines/ines.h index 3918737fa21a..b17475aed046 100644 --- a/drivers/staging/gpib/ines/ines.h +++ b/drivers/staging/gpib/ines/ines.h @@ -36,41 +36,41 @@ struct ines_priv { }; // interface functions -int ines_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int ines_write(gpib_board_t *board, uint8_t *buffer, size_t length, +int ines_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); +int ines_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); -int ines_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, +int ines_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int ines_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, +int ines_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); -int ines_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written); -int ines_take_control(gpib_board_t *board, int synchronous); -int ines_go_to_standby(gpib_board_t *board); -void ines_request_system_control(gpib_board_t *board, int request_control); -void ines_interface_clear(gpib_board_t *board, int assert); -void ines_remote_enable(gpib_board_t *board, int enable); -int ines_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits); -void ines_disable_eos(gpib_board_t *board); -unsigned int ines_update_status(gpib_board_t *board, unsigned int clear_mask); -int ines_primary_address(gpib_board_t *board, unsigned int address); -int ines_secondary_address(gpib_board_t *board, unsigned int address, int enable); -int ines_parallel_poll(gpib_board_t *board, uint8_t *result); -void ines_parallel_poll_configure(gpib_board_t *board, uint8_t config); -void ines_parallel_poll_response(gpib_board_t *board, int ist); -void ines_serial_poll_response(gpib_board_t *board, uint8_t status); -uint8_t ines_serial_poll_status(gpib_board_t *board); -int ines_line_status(const gpib_board_t *board); -unsigned int ines_t1_delay(gpib_board_t *board, unsigned int nano_sec); -void ines_return_to_local(gpib_board_t *board); +int ines_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written); +int ines_take_control(struct gpib_board *board, int synchronous); +int ines_go_to_standby(struct gpib_board *board); +void ines_request_system_control(struct gpib_board *board, int request_control); +void ines_interface_clear(struct gpib_board *board, int assert); +void ines_remote_enable(struct gpib_board *board, int enable); +int ines_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits); +void ines_disable_eos(struct gpib_board *board); +unsigned int ines_update_status(struct gpib_board *board, unsigned int clear_mask); +int ines_primary_address(struct gpib_board *board, unsigned int address); +int ines_secondary_address(struct gpib_board *board, unsigned int address, int enable); +int ines_parallel_poll(struct gpib_board *board, uint8_t *result); +void ines_parallel_poll_configure(struct gpib_board *board, uint8_t config); +void ines_parallel_poll_response(struct gpib_board *board, int ist); +void ines_serial_poll_response(struct gpib_board *board, uint8_t status); +uint8_t ines_serial_poll_status(struct gpib_board *board); +int ines_line_status(const struct gpib_board *board); +unsigned int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec); +void ines_return_to_local(struct gpib_board *board); // interrupt service routines irqreturn_t ines_pci_interrupt(int irq, void *arg); -irqreturn_t ines_interrupt(gpib_board_t *board); +irqreturn_t ines_interrupt(struct gpib_board *board); // utility functions -void ines_free_private(gpib_board_t *board); -int ines_generic_attach(gpib_board_t *board); -void ines_online(struct ines_priv *priv, const gpib_board_t *board, int use_accel); +void ines_free_private(struct gpib_board *board); +int ines_generic_attach(struct gpib_board *board); +void ines_online(struct ines_priv *priv, const struct gpib_board *board, int use_accel); void ines_set_xfer_counter(struct ines_priv *priv, unsigned int count); /* inb/outb wrappers */ diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index d31eab1a05e4..983bb88a4376 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -25,7 +25,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for Ines iGPIB 72010"); -int ines_line_status(const gpib_board_t *board) +int ines_line_status(const struct gpib_board *board) { int status = VALID_ALL; int bcm_bits; @@ -65,7 +65,7 @@ void ines_set_xfer_counter(struct ines_priv *priv, unsigned int count) ines_outb(priv, count & 0xff, XFER_COUNT_LOWER); } -unsigned int ines_t1_delay(gpib_board_t *board, unsigned int nano_sec) +unsigned int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct ines_priv *ines_priv = board->private_data; struct nec7210_priv *nec_priv = &ines_priv->nec7210_priv; @@ -95,7 +95,7 @@ static inline unsigned short num_in_fifo_bytes(struct ines_priv *ines_priv) return ines_inb(ines_priv, IN_FIFO_COUNT); } -static ssize_t pio_read(gpib_board_t *board, struct ines_priv *ines_priv, uint8_t *buffer, +static ssize_t pio_read(struct gpib_board *board, struct ines_priv *ines_priv, uint8_t *buffer, size_t length, size_t *nbytes) { ssize_t retval = 0; @@ -133,7 +133,7 @@ static ssize_t pio_read(gpib_board_t *board, struct ines_priv *ines_priv, uint8_ return retval; } -int ines_accel_read(gpib_board_t *board, uint8_t *buffer, +int ines_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -190,7 +190,7 @@ static inline unsigned short num_out_fifo_bytes(struct ines_priv *ines_priv) return ines_inb(ines_priv, OUT_FIFO_COUNT); } -static int ines_write_wait(gpib_board_t *board, struct ines_priv *ines_priv, +static int ines_write_wait(struct gpib_board *board, struct ines_priv *ines_priv, unsigned int fifo_threshold) { struct nec7210_priv *nec_priv = &ines_priv->nec7210_priv; @@ -213,7 +213,7 @@ static int ines_write_wait(gpib_board_t *board, struct ines_priv *ines_priv, return 0; } -int ines_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, +int ines_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { size_t count = 0; @@ -266,7 +266,7 @@ int ines_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, irqreturn_t ines_pci_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct ines_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -281,7 +281,7 @@ irqreturn_t ines_pci_interrupt(int irq, void *arg) return ines_interrupt(board); } -irqreturn_t ines_interrupt(gpib_board_t *board) +irqreturn_t ines_interrupt(struct gpib_board *board) { struct ines_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -313,12 +313,12 @@ irqreturn_t ines_interrupt(gpib_board_t *board) return IRQ_HANDLED; } -static int ines_pci_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ines_pci_accel_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ines_isa_attach(gpib_board_t *board, const gpib_board_config_t *config); +static int ines_pci_attach(struct gpib_board *board, const gpib_board_config_t *config); +static int ines_pci_accel_attach(struct gpib_board *board, const gpib_board_config_t *config); +static int ines_isa_attach(struct gpib_board *board, const gpib_board_config_t *config); -static void ines_pci_detach(gpib_board_t *board); -static void ines_isa_detach(gpib_board_t *board); +static void ines_pci_detach(struct gpib_board *board); +static void ines_isa_detach(struct gpib_board *board); enum ines_pci_vendor_ids { PCI_VENDOR_ID_INES_QUICKLOGIC = 0x16da @@ -393,7 +393,8 @@ static struct ines_pci_id pci_ids[] = { static const int num_pci_chips = ARRAY_SIZE(pci_ids); // wrappers for interface functions -int ines_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) +int ines_read(struct gpib_board *board, uint8_t *buffer, size_t length, + int *end, size_t *bytes_read) { struct ines_priv *priv = board->private_data; struct nec7210_priv *nec_priv = &priv->nec7210_priv; @@ -411,7 +412,7 @@ int ines_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, siz return retval; } -int ines_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +int ines_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct ines_priv *priv = board->private_data; @@ -419,119 +420,119 @@ int ines_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -int ines_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +int ines_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct ines_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -int ines_take_control(gpib_board_t *board, int synchronous) +int ines_take_control(struct gpib_board *board, int synchronous) { struct ines_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -int ines_go_to_standby(gpib_board_t *board) +int ines_go_to_standby(struct gpib_board *board) { struct ines_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -void ines_request_system_control(gpib_board_t *board, int request_control) +void ines_request_system_control(struct gpib_board *board, int request_control) { struct ines_priv *priv = board->private_data; nec7210_request_system_control(board, &priv->nec7210_priv, request_control); } -void ines_interface_clear(gpib_board_t *board, int assert) +void ines_interface_clear(struct gpib_board *board, int assert) { struct ines_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -void ines_remote_enable(gpib_board_t *board, int enable) +void ines_remote_enable(struct gpib_board *board, int enable) { struct ines_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -int ines_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +int ines_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct ines_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -void ines_disable_eos(gpib_board_t *board) +void ines_disable_eos(struct gpib_board *board) { struct ines_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -unsigned int ines_update_status(gpib_board_t *board, unsigned int clear_mask) +unsigned int ines_update_status(struct gpib_board *board, unsigned int clear_mask) { struct ines_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -int ines_primary_address(gpib_board_t *board, unsigned int address) +int ines_primary_address(struct gpib_board *board, unsigned int address) { struct ines_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -int ines_secondary_address(gpib_board_t *board, unsigned int address, int enable) +int ines_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct ines_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -int ines_parallel_poll(gpib_board_t *board, uint8_t *result) +int ines_parallel_poll(struct gpib_board *board, uint8_t *result) { struct ines_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -void ines_parallel_poll_configure(gpib_board_t *board, uint8_t config) +void ines_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct ines_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, config); } -void ines_parallel_poll_response(gpib_board_t *board, int ist) +void ines_parallel_poll_response(struct gpib_board *board, int ist) { struct ines_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -void ines_serial_poll_response(gpib_board_t *board, uint8_t status) +void ines_serial_poll_response(struct gpib_board *board, uint8_t status) { struct ines_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -uint8_t ines_serial_poll_status(gpib_board_t *board) +uint8_t ines_serial_poll_status(struct gpib_board *board) { struct ines_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -void ines_return_to_local(gpib_board_t *board) +void ines_return_to_local(struct gpib_board *board) { struct ines_priv *priv = board->private_data; @@ -650,7 +651,7 @@ static gpib_interface_t ines_isa_interface = { .return_to_local = ines_return_to_local, }; -static int ines_allocate_private(gpib_board_t *board) +static int ines_allocate_private(struct gpib_board *board) { struct ines_priv *priv; @@ -663,13 +664,13 @@ static int ines_allocate_private(gpib_board_t *board) return 0; } -void ines_free_private(gpib_board_t *board) +void ines_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -int ines_generic_attach(gpib_board_t *board) +int ines_generic_attach(struct gpib_board *board) { struct ines_priv *ines_priv; struct nec7210_priv *nec_priv; @@ -689,7 +690,7 @@ int ines_generic_attach(gpib_board_t *board) return 0; } -void ines_online(struct ines_priv *ines_priv, const gpib_board_t *board, int use_accel) +void ines_online(struct ines_priv *ines_priv, const struct gpib_board *board, int use_accel) { struct nec7210_priv *nec_priv = &ines_priv->nec7210_priv; @@ -723,7 +724,7 @@ void ines_online(struct ines_priv *ines_priv, const gpib_board_t *board, int use nec7210_set_reg_bits(nec_priv, IMR1, HR_DOIE | HR_DIIE, 0); } -static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ines_common_pci_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; struct nec7210_priv *nec_priv; @@ -851,7 +852,7 @@ static int ines_common_pci_attach(gpib_board_t *board, const gpib_board_config_t return 0; } -int ines_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +int ines_pci_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; int retval; @@ -866,7 +867,7 @@ int ines_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -int ines_pci_accel_attach(gpib_board_t *board, const gpib_board_config_t *config) +int ines_pci_accel_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; int retval; @@ -883,7 +884,7 @@ int ines_pci_accel_attach(gpib_board_t *board, const gpib_board_config_t *config static const int ines_isa_iosize = 0x20; -int ines_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +int ines_isa_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; struct nec7210_priv *nec_priv; @@ -914,7 +915,7 @@ int ines_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -void ines_pci_detach(gpib_board_t *board) +void ines_pci_detach(struct gpib_board *board) { struct ines_priv *ines_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -948,7 +949,7 @@ void ines_pci_detach(gpib_board_t *board) ines_free_private(board); } -void ines_isa_detach(gpib_board_t *board) +void ines_isa_detach(struct gpib_board *board) { struct ines_priv *ines_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -999,11 +1000,11 @@ static const int ines_pcmcia_iosize = 0x20; static int ines_gpib_config(struct pcmcia_device *link); static void ines_gpib_release(struct pcmcia_device *link); -static int ines_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config); -static int ines_pcmcia_accel_attach(gpib_board_t *board, const gpib_board_config_t *config); -static void ines_pcmcia_detach(gpib_board_t *board); +static int ines_pcmcia_attach(struct gpib_board *board, const gpib_board_config_t *config); +static int ines_pcmcia_accel_attach(struct gpib_board *board, const gpib_board_config_t *config); +static void ines_pcmcia_detach(struct gpib_board *board); static irqreturn_t ines_pcmcia_interrupt(int irq, void *arg); -static int ines_common_pcmcia_attach(gpib_board_t *board); +static int ines_common_pcmcia_attach(struct gpib_board *board); /* * A linked list of "instances" of the gpib device. Each actual * PCMCIA card corresponds to one device instance, and is described @@ -1035,7 +1036,7 @@ static struct pcmcia_device *curr_dev; struct local_info { struct pcmcia_device *p_dev; - gpib_board_t *dev; + struct gpib_board *dev; u_short manfid; u_short cardid; }; @@ -1086,7 +1087,7 @@ static int ines_gpib_probe(struct pcmcia_device *link) static void ines_gpib_remove(struct pcmcia_device *link) { struct local_info *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (info->dev) ines_pcmcia_detach(info->dev); @@ -1171,7 +1172,7 @@ static void ines_gpib_release(struct pcmcia_device *link) static int ines_gpib_suspend(struct pcmcia_device *link) { //struct local_info *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (link->open) dev_err(&link->dev, "Device still open\n"); @@ -1183,7 +1184,7 @@ static int ines_gpib_suspend(struct pcmcia_device *link) static int ines_gpib_resume(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; /*if (link->open) { * ni_gpib_probe(dev); / really? @@ -1300,12 +1301,12 @@ static gpib_interface_t ines_pcmcia_interface = { irqreturn_t ines_pcmcia_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; return ines_interrupt(board); } -int ines_common_pcmcia_attach(gpib_board_t *board) +int ines_common_pcmcia_attach(struct gpib_board *board) { struct ines_priv *ines_priv; struct nec7210_priv *nec_priv; @@ -1344,7 +1345,7 @@ int ines_common_pcmcia_attach(gpib_board_t *board) return 0; } -int ines_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) +int ines_pcmcia_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; int retval; @@ -1359,7 +1360,7 @@ int ines_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -int ines_pcmcia_accel_attach(gpib_board_t *board, const gpib_board_config_t *config) +int ines_pcmcia_accel_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct ines_priv *ines_priv; int retval; @@ -1374,7 +1375,7 @@ int ines_pcmcia_accel_attach(gpib_board_t *board, const gpib_board_config_t *con return 0; } -void ines_pcmcia_detach(gpib_board_t *board) +void ines_pcmcia_detach(struct gpib_board *board) { struct ines_priv *ines_priv = board->private_data; struct nec7210_priv *nec_priv; From 344a50b0f4eecc160c61d780f53d2f75586016ce Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:17 +0000 Subject: [PATCH 1455/2157] staging: gpib: lpvo_usb_gpib: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-14-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- .../gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 011096ece7d6..1675aa2aff6c 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -146,7 +146,7 @@ struct usb_gpib_priv { /* private data to the device */ #define GPIB_DEV (((struct usb_gpib_priv *)board->private_data)->dev) -static void show_status(gpib_board_t *board) +static void show_status(struct gpib_board *board) { DIA_LOG(2, "# - buffer_length %d\n", board->buffer_length); DIA_LOG(2, "# - status %lx\n", board->status); @@ -184,8 +184,8 @@ static struct mutex minors_lock; /* operations on usb_minors are to be prote struct usb_skel; static ssize_t skel_do_write(struct usb_skel *, const char *, size_t); static ssize_t skel_do_read(struct usb_skel *, char *, size_t); -static int skel_do_open(gpib_board_t *, int); -static int skel_do_release(gpib_board_t *); +static int skel_do_open(struct gpib_board *, int); +static int skel_do_release(struct gpib_board *); /* * usec_diff : take difference in MICROsec between two 'timespec' @@ -237,7 +237,7 @@ static int write_loop(void *dev, char *msg, int leng) * it has to be given explicitly. */ -static int send_command(gpib_board_t *board, char *msg, int leng) +static int send_command(struct gpib_board *board, char *msg, int leng) { char buffer[64]; int nchar; @@ -278,7 +278,7 @@ static int send_command(gpib_board_t *board, char *msg, int leng) * */ -static int set_control_line(gpib_board_t *board, int line, int value) +static int set_control_line(struct gpib_board *board, int line, int value) { char msg[] = USB_GPIB_SET_LINES; int retval; @@ -309,7 +309,7 @@ static int set_control_line(gpib_board_t *board, int line, int value) * @char_buf: the routine private data structure */ -static int one_char(gpib_board_t *board, struct char_buf *b) +static int one_char(struct gpib_board *board, struct char_buf *b) { struct timespec64 before, after; @@ -343,7 +343,7 @@ static int one_char(gpib_board_t *board, struct char_buf *b) * not supported. */ -static void set_timeout(gpib_board_t *board) +static void set_timeout(struct gpib_board *board) { int n, val; char command[sizeof(USB_GPIB_TTMO) + 6]; @@ -391,7 +391,7 @@ static void set_timeout(gpib_board_t *board) * detach() will be called. Always. */ -static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int usb_gpib_attach(struct gpib_board *board, const gpib_board_config_t *config) { int retval, j; u32 base = config->ibbase; @@ -510,7 +510,7 @@ static int usb_gpib_attach(gpib_board_t *board, const gpib_board_config_t *confi * */ -static void usb_gpib_detach(gpib_board_t *board) +static void usb_gpib_detach(struct gpib_board *board) { int retval; @@ -537,7 +537,7 @@ static void usb_gpib_detach(gpib_board_t *board) * Other functions follow in alphabetical order */ /* command */ -static int usb_gpib_command(gpib_board_t *board, +static int usb_gpib_command(struct gpib_board *board, u8 *buffer, size_t length, size_t *bytes_written) @@ -570,7 +570,7 @@ static int usb_gpib_command(gpib_board_t *board, * Cannot do nothing here, but remember for future use. */ -static void usb_gpib_disable_eos(gpib_board_t *board) +static void usb_gpib_disable_eos(struct gpib_board *board) { ((struct usb_gpib_priv *)board->private_data)->eos_flags &= ~REOS; DIA_LOG(1, "done: %x\n", @@ -586,7 +586,7 @@ static void usb_gpib_disable_eos(gpib_board_t *board) * */ -static int usb_gpib_enable_eos(gpib_board_t *board, +static int usb_gpib_enable_eos(struct gpib_board *board, u8 eos_byte, int compare_8_bits) { @@ -606,7 +606,7 @@ static int usb_gpib_enable_eos(gpib_board_t *board, * @board: the gpib_board data area for this gpib interface */ -static int usb_gpib_go_to_standby(gpib_board_t *board) +static int usb_gpib_go_to_standby(struct gpib_board *board) { int retval = set_control_line(board, IB_BUS_ATN, 0); @@ -628,7 +628,7 @@ static int usb_gpib_go_to_standby(gpib_board_t *board) * the de-assert request. */ -static void usb_gpib_interface_clear(gpib_board_t *board, int assert) +static void usb_gpib_interface_clear(struct gpib_board *board, int assert) { int retval = 0; @@ -655,7 +655,7 @@ static void usb_gpib_interface_clear(gpib_board_t *board, int assert) #define WQH head #define WQE entry -static int usb_gpib_line_status(const gpib_board_t *board) +static int usb_gpib_line_status(const struct gpib_board *board) { int buffer; int line_status = VALID_ALL; /* all lines will be read */ @@ -686,7 +686,7 @@ static int usb_gpib_line_status(const gpib_board_t *board) msleep(sleep); } - buffer = send_command((gpib_board_t *)board, USB_GPIB_STATUS, 0); + buffer = send_command((struct gpib_board *)board, USB_GPIB_STATUS, 0); if (buffer < 0) { dev_err(board->gpib_dev, "line status read failed with %d\n", buffer); @@ -717,7 +717,7 @@ static int usb_gpib_line_status(const gpib_board_t *board) /* parallel_poll */ -static int usb_gpib_parallel_poll(gpib_board_t *board, uint8_t *result) +static int usb_gpib_parallel_poll(struct gpib_board *board, uint8_t *result) { /* request parallel poll asserting ATN | EOI; * we suppose ATN already asserted @@ -744,7 +744,7 @@ static int usb_gpib_parallel_poll(gpib_board_t *board, uint8_t *result) /* read */ -static int usb_gpib_read(gpib_board_t *board, +static int usb_gpib_read(struct gpib_board *board, u8 *buffer, size_t length, int *end, @@ -908,7 +908,7 @@ static int usb_gpib_read(gpib_board_t *board, /* remote_enable */ -static void usb_gpib_remote_enable(gpib_board_t *board, int enable) +static void usb_gpib_remote_enable(struct gpib_board *board, int enable) { int retval; @@ -921,7 +921,7 @@ static void usb_gpib_remote_enable(gpib_board_t *board, int enable) /* request_system_control */ -static void usb_gpib_request_system_control(gpib_board_t *board, +static void usb_gpib_request_system_control(struct gpib_board *board, int request_control) { if (request_control) @@ -935,7 +935,7 @@ static void usb_gpib_request_system_control(gpib_board_t *board, /* take_control */ /* beware: the sync flag is ignored; what is its real meaning? */ -static int usb_gpib_take_control(gpib_board_t *board, int sync) +static int usb_gpib_take_control(struct gpib_board *board, int sync) { int retval; @@ -950,7 +950,7 @@ static int usb_gpib_take_control(gpib_board_t *board, int sync) /* update_status */ -static unsigned int usb_gpib_update_status(gpib_board_t *board, +static unsigned int usb_gpib_update_status(struct gpib_board *board, unsigned int clear_mask) { /* There is nothing we can do here, I guess */ @@ -965,7 +965,7 @@ static unsigned int usb_gpib_update_status(gpib_board_t *board, /* write */ /* beware: DLE characters are not escaped - can only send ASCII data */ -static int usb_gpib_write(gpib_board_t *board, +static int usb_gpib_write(struct gpib_board *board, u8 *buffer, size_t length, int send_eoi, @@ -1008,33 +1008,33 @@ static int usb_gpib_write(gpib_board_t *board, /* parallel_poll configure */ -static void usb_gpib_parallel_poll_configure(gpib_board_t *board, +static void usb_gpib_parallel_poll_configure(struct gpib_board *board, uint8_t configuration) { } /* parallel_poll_response */ -static void usb_gpib_parallel_poll_response(gpib_board_t *board, int ist) +static void usb_gpib_parallel_poll_response(struct gpib_board *board, int ist) { } /* primary_address */ -static int usb_gpib_primary_address(gpib_board_t *board, unsigned int address) +static int usb_gpib_primary_address(struct gpib_board *board, unsigned int address) { return 0; } /* return_to_local */ -static void usb_gpib_return_to_local(gpib_board_t *board) +static void usb_gpib_return_to_local(struct gpib_board *board) { } /* secondary_address */ -static int usb_gpib_secondary_address(gpib_board_t *board, +static int usb_gpib_secondary_address(struct gpib_board *board, unsigned int address, int enable) { @@ -1043,20 +1043,20 @@ static int usb_gpib_secondary_address(gpib_board_t *board, /* serial_poll_response */ -static void usb_gpib_serial_poll_response(gpib_board_t *board, uint8_t status) +static void usb_gpib_serial_poll_response(struct gpib_board *board, uint8_t status) { } /* serial_poll_status */ -static uint8_t usb_gpib_serial_poll_status(gpib_board_t *board) +static uint8_t usb_gpib_serial_poll_status(struct gpib_board *board) { return 0; } /* t1_delay */ -static unsigned int usb_gpib_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int usb_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) { return 0; } @@ -1295,7 +1295,7 @@ static void skel_delete(struct kref *kref) * skel_do_open() - to be called by usb_gpib_attach */ -static int skel_do_open(gpib_board_t *board, int subminor) +static int skel_do_open(struct gpib_board *board, int subminor) { struct usb_skel *dev; struct usb_interface *interface; @@ -1332,7 +1332,7 @@ static int skel_do_open(gpib_board_t *board, int subminor) * skel_do_release() - to be called by usb_gpib_detach */ -static int skel_do_release(gpib_board_t *board) +static int skel_do_release(struct gpib_board *board) { struct usb_skel *dev; From 4a55f2e13401515b43ee9816ca36c3ca4d06509e Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:18 +0000 Subject: [PATCH 1456/2157] staging: gpib: nec7210 struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-15-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/include/nec7210.h | 58 ++++++++++---------- drivers/staging/gpib/nec7210/nec7210.c | 76 +++++++++++++------------- 2 files changed, 68 insertions(+), 66 deletions(-) diff --git a/drivers/staging/gpib/include/nec7210.h b/drivers/staging/gpib/include/nec7210.h index ca998c4a84bf..6c49283bd139 100644 --- a/drivers/staging/gpib/include/nec7210.h +++ b/drivers/staging/gpib/include/nec7210.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +//* SPDX-License-Identifier: GPL-2.0 */ /*************************************************************************** * copyright : (C) 2002 by Frank Mori Hess @@ -78,48 +78,48 @@ enum { }; // interface functions -int nec7210_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +int nec7210_read(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int nec7210_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +int nec7210_write(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); -int nec7210_command(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +int nec7210_command(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written); -int nec7210_take_control(gpib_board_t *board, struct nec7210_priv *priv, int syncronous); -int nec7210_go_to_standby(gpib_board_t *board, struct nec7210_priv *priv); -void nec7210_request_system_control(gpib_board_t *board, +int nec7210_take_control(struct gpib_board *board, struct nec7210_priv *priv, int syncronous); +int nec7210_go_to_standby(struct gpib_board *board, struct nec7210_priv *priv); +void nec7210_request_system_control(struct gpib_board *board, struct nec7210_priv *priv, int request_control); -void nec7210_interface_clear(gpib_board_t *board, struct nec7210_priv *priv, int assert); -void nec7210_remote_enable(gpib_board_t *board, struct nec7210_priv *priv, int enable); -int nec7210_enable_eos(gpib_board_t *board, struct nec7210_priv *priv, uint8_t eos_bytes, +void nec7210_interface_clear(struct gpib_board *board, struct nec7210_priv *priv, int assert); +void nec7210_remote_enable(struct gpib_board *board, struct nec7210_priv *priv, int enable); +int nec7210_enable_eos(struct gpib_board *board, struct nec7210_priv *priv, uint8_t eos_bytes, int compare_8_bits); -void nec7210_disable_eos(gpib_board_t *board, struct nec7210_priv *priv); -unsigned int nec7210_update_status(gpib_board_t *board, struct nec7210_priv *priv, +void nec7210_disable_eos(struct gpib_board *board, struct nec7210_priv *priv); +unsigned int nec7210_update_status(struct gpib_board *board, struct nec7210_priv *priv, unsigned int clear_mask); -unsigned int nec7210_update_status_nolock(gpib_board_t *board, struct nec7210_priv *priv); -int nec7210_primary_address(const gpib_board_t *board, +unsigned int nec7210_update_status_nolock(struct gpib_board *board, struct nec7210_priv *priv); +int nec7210_primary_address(const struct gpib_board *board, struct nec7210_priv *priv, unsigned int address); -int nec7210_secondary_address(const gpib_board_t *board, struct nec7210_priv *priv, +int nec7210_secondary_address(const struct gpib_board *board, struct nec7210_priv *priv, unsigned int address, int enable); -int nec7210_parallel_poll(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *result); -void nec7210_serial_poll_response(gpib_board_t *board, struct nec7210_priv *priv, uint8_t status); -void nec7210_parallel_poll_configure(gpib_board_t *board, +int nec7210_parallel_poll(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *result); +void nec7210_serial_poll_response(struct gpib_board *board, struct nec7210_priv *priv, uint8_t status); +void nec7210_parallel_poll_configure(struct gpib_board *board, struct nec7210_priv *priv, unsigned int configuration); -void nec7210_parallel_poll_response(gpib_board_t *board, +void nec7210_parallel_poll_response(struct gpib_board *board, struct nec7210_priv *priv, int ist); -uint8_t nec7210_serial_poll_status(gpib_board_t *board, +uint8_t nec7210_serial_poll_status(struct gpib_board *board, struct nec7210_priv *priv); -unsigned int nec7210_t1_delay(gpib_board_t *board, +unsigned int nec7210_t1_delay(struct gpib_board *board, struct nec7210_priv *priv, unsigned int nano_sec); -void nec7210_return_to_local(const gpib_board_t *board, struct nec7210_priv *priv); +void nec7210_return_to_local(const struct gpib_board *board, struct nec7210_priv *priv); // utility functions -void nec7210_board_reset(struct nec7210_priv *priv, const gpib_board_t *board); -void nec7210_board_online(struct nec7210_priv *priv, const gpib_board_t *board); +void nec7210_board_reset(struct nec7210_priv *priv, const struct gpib_board *board); +void nec7210_board_online(struct nec7210_priv *priv, const struct gpib_board *board); unsigned int nec7210_set_reg_bits(struct nec7210_priv *priv, unsigned int reg, unsigned int mask, unsigned int bits); -void nec7210_set_handshake_mode(gpib_board_t *board, struct nec7210_priv *priv, int mode); -void nec7210_release_rfd_holdoff(gpib_board_t *board, struct nec7210_priv *priv); -uint8_t nec7210_read_data_in(gpib_board_t *board, struct nec7210_priv *priv, int *end); +void nec7210_set_handshake_mode(struct gpib_board *board, struct nec7210_priv *priv, int mode); +void nec7210_release_rfd_holdoff(struct gpib_board *board, struct nec7210_priv *priv); +uint8_t nec7210_read_data_in(struct gpib_board *board, struct nec7210_priv *priv, int *end); // wrappers for io functions uint8_t nec7210_ioport_read_byte(struct nec7210_priv *priv, unsigned int register_num); @@ -134,8 +134,8 @@ void nec7210_locking_iomem_write_byte(struct nec7210_priv *priv, uint8_t data, unsigned int register_num); // interrupt service routine -irqreturn_t nec7210_interrupt(gpib_board_t *board, struct nec7210_priv *priv); -irqreturn_t nec7210_interrupt_have_status(gpib_board_t *board, +irqreturn_t nec7210_interrupt(struct gpib_board *board, struct nec7210_priv *priv); +irqreturn_t nec7210_interrupt_have_status(struct gpib_board *board, struct nec7210_priv *priv, int status1, int status2); #endif //_NEC7210_H diff --git a/drivers/staging/gpib/nec7210/nec7210.c b/drivers/staging/gpib/nec7210/nec7210.c index 85f1e79d658a..773495cde9d8 100644 --- a/drivers/staging/gpib/nec7210/nec7210.c +++ b/drivers/staging/gpib/nec7210/nec7210.c @@ -23,7 +23,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB library code for NEC uPD7210"); -int nec7210_enable_eos(gpib_board_t *board, struct nec7210_priv *priv, uint8_t eos_byte, +int nec7210_enable_eos(struct gpib_board *board, struct nec7210_priv *priv, uint8_t eos_byte, int compare_8_bits) { write_byte(priv, eos_byte, EOSR); @@ -37,14 +37,14 @@ int nec7210_enable_eos(gpib_board_t *board, struct nec7210_priv *priv, uint8_t e } EXPORT_SYMBOL(nec7210_enable_eos); -void nec7210_disable_eos(gpib_board_t *board, struct nec7210_priv *priv) +void nec7210_disable_eos(struct gpib_board *board, struct nec7210_priv *priv) { priv->auxa_bits &= ~HR_REOS; write_byte(priv, priv->auxa_bits, AUXMR); } EXPORT_SYMBOL(nec7210_disable_eos); -int nec7210_parallel_poll(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *result) +int nec7210_parallel_poll(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *result) { int ret; @@ -64,14 +64,14 @@ int nec7210_parallel_poll(gpib_board_t *board, struct nec7210_priv *priv, uint8_ } EXPORT_SYMBOL(nec7210_parallel_poll); -void nec7210_parallel_poll_configure(gpib_board_t *board, +void nec7210_parallel_poll_configure(struct gpib_board *board, struct nec7210_priv *priv, unsigned int configuration) { write_byte(priv, PPR | configuration, AUXMR); } EXPORT_SYMBOL(nec7210_parallel_poll_configure); -void nec7210_parallel_poll_response(gpib_board_t *board, struct nec7210_priv *priv, int ist) +void nec7210_parallel_poll_response(struct gpib_board *board, struct nec7210_priv *priv, int ist) { if (ist) write_byte(priv, AUX_SPPF, AUXMR); @@ -85,7 +85,8 @@ EXPORT_SYMBOL(nec7210_parallel_poll_response); * the 488.2 capability (for example with NI chips), or we need to implement the * 488.2 set srv state machine in the driver (if that is even viable). */ -void nec7210_serial_poll_response(gpib_board_t *board, struct nec7210_priv *priv, uint8_t status) +void nec7210_serial_poll_response(struct gpib_board *board, + struct nec7210_priv *priv, uint8_t status) { unsigned long flags; @@ -102,13 +103,13 @@ void nec7210_serial_poll_response(gpib_board_t *board, struct nec7210_priv *priv } EXPORT_SYMBOL(nec7210_serial_poll_response); -uint8_t nec7210_serial_poll_status(gpib_board_t *board, struct nec7210_priv *priv) +uint8_t nec7210_serial_poll_status(struct gpib_board *board, struct nec7210_priv *priv) { return read_byte(priv, SPSR); } EXPORT_SYMBOL(nec7210_serial_poll_status); -int nec7210_primary_address(const gpib_board_t *board, struct nec7210_priv *priv, +int nec7210_primary_address(const struct gpib_board *board, struct nec7210_priv *priv, unsigned int address) { // put primary address in address0 @@ -117,7 +118,7 @@ int nec7210_primary_address(const gpib_board_t *board, struct nec7210_priv *priv } EXPORT_SYMBOL(nec7210_primary_address); -int nec7210_secondary_address(const gpib_board_t *board, struct nec7210_priv *priv, +int nec7210_secondary_address(const struct gpib_board *board, struct nec7210_priv *priv, unsigned int address, int enable) { if (enable) { @@ -166,7 +167,7 @@ static void update_listener_state(struct nec7210_priv *priv, unsigned int addres } } -unsigned int nec7210_update_status_nolock(gpib_board_t *board, struct nec7210_priv *priv) +unsigned int nec7210_update_status_nolock(struct gpib_board *board, struct nec7210_priv *priv) { int address_status_bits; u8 spoll_status; @@ -209,7 +210,7 @@ unsigned int nec7210_update_status_nolock(gpib_board_t *board, struct nec7210_pr } EXPORT_SYMBOL(nec7210_update_status_nolock); -unsigned int nec7210_update_status(gpib_board_t *board, struct nec7210_priv *priv, +unsigned int nec7210_update_status(struct gpib_board *board, struct nec7210_priv *priv, unsigned int clear_mask) { unsigned long flags; @@ -234,7 +235,7 @@ unsigned int nec7210_set_reg_bits(struct nec7210_priv *priv, unsigned int reg, } EXPORT_SYMBOL(nec7210_set_reg_bits); -void nec7210_set_handshake_mode(gpib_board_t *board, struct nec7210_priv *priv, int mode) +void nec7210_set_handshake_mode(struct gpib_board *board, struct nec7210_priv *priv, int mode) { unsigned long flags; @@ -250,7 +251,7 @@ void nec7210_set_handshake_mode(gpib_board_t *board, struct nec7210_priv *priv, } EXPORT_SYMBOL(nec7210_set_handshake_mode); -uint8_t nec7210_read_data_in(gpib_board_t *board, struct nec7210_priv *priv, int *end) +uint8_t nec7210_read_data_in(struct gpib_board *board, struct nec7210_priv *priv, int *end) { unsigned long flags; u8 data; @@ -268,7 +269,7 @@ uint8_t nec7210_read_data_in(gpib_board_t *board, struct nec7210_priv *priv, int } EXPORT_SYMBOL(nec7210_read_data_in); -int nec7210_take_control(gpib_board_t *board, struct nec7210_priv *priv, int syncronous) +int nec7210_take_control(struct gpib_board *board, struct nec7210_priv *priv, int syncronous) { int i; const int timeout = 100; @@ -295,7 +296,7 @@ int nec7210_take_control(gpib_board_t *board, struct nec7210_priv *priv, int syn } EXPORT_SYMBOL(nec7210_take_control); -int nec7210_go_to_standby(gpib_board_t *board, struct nec7210_priv *priv) +int nec7210_go_to_standby(struct gpib_board *board, struct nec7210_priv *priv) { int i; const int timeout = 1000; @@ -329,7 +330,7 @@ int nec7210_go_to_standby(gpib_board_t *board, struct nec7210_priv *priv) } EXPORT_SYMBOL(nec7210_go_to_standby); -void nec7210_request_system_control(gpib_board_t *board, struct nec7210_priv *priv, +void nec7210_request_system_control(struct gpib_board *board, struct nec7210_priv *priv, int request_control) { if (request_control == 0) { @@ -340,7 +341,7 @@ void nec7210_request_system_control(gpib_board_t *board, struct nec7210_priv *pr } EXPORT_SYMBOL(nec7210_request_system_control); -void nec7210_interface_clear(gpib_board_t *board, struct nec7210_priv *priv, int assert) +void nec7210_interface_clear(struct gpib_board *board, struct nec7210_priv *priv, int assert) { if (assert) write_byte(priv, AUX_SIFC, AUXMR); @@ -349,7 +350,7 @@ void nec7210_interface_clear(gpib_board_t *board, struct nec7210_priv *priv, int } EXPORT_SYMBOL(nec7210_interface_clear); -void nec7210_remote_enable(gpib_board_t *board, struct nec7210_priv *priv, int enable) +void nec7210_remote_enable(struct gpib_board *board, struct nec7210_priv *priv, int enable) { if (enable) write_byte(priv, AUX_SREN, AUXMR); @@ -358,7 +359,7 @@ void nec7210_remote_enable(gpib_board_t *board, struct nec7210_priv *priv, int e } EXPORT_SYMBOL(nec7210_remote_enable); -void nec7210_release_rfd_holdoff(gpib_board_t *board, struct nec7210_priv *priv) +void nec7210_release_rfd_holdoff(struct gpib_board *board, struct nec7210_priv *priv) { unsigned long flags; @@ -372,7 +373,7 @@ void nec7210_release_rfd_holdoff(gpib_board_t *board, struct nec7210_priv *priv) } EXPORT_SYMBOL(nec7210_release_rfd_holdoff); -unsigned int nec7210_t1_delay(gpib_board_t *board, struct nec7210_priv *priv, +unsigned int nec7210_t1_delay(struct gpib_board *board, struct nec7210_priv *priv, unsigned int nano_sec) { unsigned int retval; @@ -390,13 +391,13 @@ unsigned int nec7210_t1_delay(gpib_board_t *board, struct nec7210_priv *priv, } EXPORT_SYMBOL(nec7210_t1_delay); -void nec7210_return_to_local(const gpib_board_t *board, struct nec7210_priv *priv) +void nec7210_return_to_local(const struct gpib_board *board, struct nec7210_priv *priv) { write_byte(priv, AUX_RTL, AUXMR); } EXPORT_SYMBOL(nec7210_return_to_local); -static inline short nec7210_atn_has_changed(gpib_board_t *board, struct nec7210_priv *priv) +static inline short nec7210_atn_has_changed(struct gpib_board *board, struct nec7210_priv *priv) { short address_status_bits = read_byte(priv, ADSR); @@ -414,7 +415,7 @@ static inline short nec7210_atn_has_changed(gpib_board_t *board, struct nec7210_ return -1; } -int nec7210_command(gpib_board_t *board, struct nec7210_priv *priv, uint8_t +int nec7210_command(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written) { int retval = 0; @@ -463,7 +464,7 @@ int nec7210_command(gpib_board_t *board, struct nec7210_priv *priv, uint8_t } EXPORT_SYMBOL(nec7210_command); -static int pio_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +static int pio_read(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -512,7 +513,7 @@ static int pio_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buf } #ifdef NEC_DMA -static ssize_t __dma_read(gpib_board_t *board, struct nec7210_priv *priv, size_t length) +static ssize_t __dma_read(struct gpib_board *board, struct nec7210_priv *priv, size_t length) { ssize_t retval = 0; size_t count = 0; @@ -567,7 +568,7 @@ static ssize_t __dma_read(gpib_board_t *board, struct nec7210_priv *priv, size_t return retval ? retval : count; } -static ssize_t dma_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +static ssize_t dma_read(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length) { size_t remain = length; @@ -594,7 +595,7 @@ static ssize_t dma_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t } #endif -int nec7210_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +int nec7210_read(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -615,7 +616,7 @@ int nec7210_read(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer } EXPORT_SYMBOL(nec7210_read); -static int pio_write_wait(gpib_board_t *board, struct nec7210_priv *priv, +static int pio_write_wait(struct gpib_board *board, struct nec7210_priv *priv, short wake_on_lacs, short wake_on_atn, short wake_on_bus_error) { // wait until byte is ready to be sent @@ -641,7 +642,7 @@ static int pio_write_wait(gpib_board_t *board, struct nec7210_priv *priv, return 0; } -static int pio_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +static int pio_write(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written) { size_t last_count = 0; @@ -684,7 +685,7 @@ static int pio_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *bu } #ifdef NEC_DMA -static ssize_t __dma_write(gpib_board_t *board, struct nec7210_priv *priv, dma_addr_t address, +static ssize_t __dma_write(struct gpib_board *board, struct nec7210_priv *priv, dma_addr_t address, size_t length) { unsigned long flags, dma_irq_flags; @@ -741,7 +742,7 @@ static ssize_t __dma_write(gpib_board_t *board, struct nec7210_priv *priv, dma_a return retval ? retval : length; } -static ssize_t dma_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, +static ssize_t dma_write(struct gpib_board *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length) { size_t remain = length; @@ -765,8 +766,9 @@ static ssize_t dma_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t return length - remain; } #endif -int nec7210_write(gpib_board_t *board, struct nec7210_priv *priv, uint8_t *buffer, size_t length, - int send_eoi, size_t *bytes_written) +int nec7210_write(struct gpib_board *board, struct nec7210_priv *priv, + uint8_t *buffer, size_t length, int send_eoi, + size_t *bytes_written) { int retval = 0; @@ -827,7 +829,7 @@ EXPORT_SYMBOL(nec7210_write); /* * interrupt service routine */ -irqreturn_t nec7210_interrupt(gpib_board_t *board, struct nec7210_priv *priv) +irqreturn_t nec7210_interrupt(struct gpib_board *board, struct nec7210_priv *priv) { int status1, status2; @@ -839,7 +841,7 @@ irqreturn_t nec7210_interrupt(gpib_board_t *board, struct nec7210_priv *priv) } EXPORT_SYMBOL(nec7210_interrupt); -irqreturn_t nec7210_interrupt_have_status(gpib_board_t *board, +irqreturn_t nec7210_interrupt_have_status(struct gpib_board *board, struct nec7210_priv *priv, int status1, int status2) { #ifdef NEC_DMA @@ -957,7 +959,7 @@ irqreturn_t nec7210_interrupt_have_status(gpib_board_t *board, } EXPORT_SYMBOL(nec7210_interrupt_have_status); -void nec7210_board_reset(struct nec7210_priv *priv, const gpib_board_t *board) +void nec7210_board_reset(struct nec7210_priv *priv, const struct gpib_board *board) { /* 7210 chip reset */ write_byte(priv, AUX_CR, AUXMR); @@ -991,7 +993,7 @@ void nec7210_board_reset(struct nec7210_priv *priv, const gpib_board_t *board) } EXPORT_SYMBOL(nec7210_board_reset); -void nec7210_board_online(struct nec7210_priv *priv, const gpib_board_t *board) +void nec7210_board_online(struct nec7210_priv *priv, const struct gpib_board *board) { /* set GPIB address */ nec7210_primary_address(board, priv, board->pad); From f3ac015f4cee819a81f38e6fae6fb161e7010696 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:19 +0000 Subject: [PATCH 1457/2157] staging: gpib: ni_usb_gpib: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-16-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 70 +++++++++++------------ 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 2de5aaea2615..fdc8077d646e 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("GPIB driver for National Instruments USB devices"); static struct usb_interface *ni_usb_driver_interfaces[MAX_NUM_NI_USB_INTERFACES]; static int ni_usb_parse_status_block(const u8 *buffer, struct ni_usb_status_block *status); -static int ni_usb_set_interrupt_monitor(gpib_board_t *board, unsigned int monitored_bits); +static int ni_usb_set_interrupt_monitor(struct gpib_board *board, unsigned int monitored_bits); static void ni_usb_stop(struct ni_usb_priv *ni_priv); static DEFINE_MUTEX(ni_usb_hotplug_lock); @@ -310,7 +310,7 @@ static int ni_usb_receive_control_msg(struct ni_usb_priv *ni_priv, __u8 request, return retval; } -static void ni_usb_soft_update_status(gpib_board_t *board, unsigned int ni_usb_ibsta, +static void ni_usb_soft_update_status(struct gpib_board *board, unsigned int ni_usb_ibsta, unsigned int clear_mask) { static const unsigned int ni_usb_ibsta_mask = SRQI | ATN | CIC | REM | LACS | TACS | LOK; @@ -586,7 +586,7 @@ static int ni_usb_write_registers(struct ni_usb_priv *ni_priv, } // interface functions -static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, +static int ni_usb_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { int retval, parse_retval; @@ -716,7 +716,7 @@ static int ni_usb_read(gpib_board_t *board, uint8_t *buffer, size_t length, return retval; } -static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int ni_usb_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { int retval; @@ -819,7 +819,7 @@ static int ni_usb_write(gpib_board_t *board, uint8_t *buffer, size_t length, return retval; } -static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t length, +static int ni_usb_command_chunk(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *command_bytes_written) { int retval; @@ -912,7 +912,7 @@ static int ni_usb_command_chunk(gpib_board_t *board, uint8_t *buffer, size_t len return 0; } -static int ni_usb_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int ni_usb_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { size_t count; @@ -929,7 +929,7 @@ static int ni_usb_command(gpib_board_t *board, uint8_t *buffer, size_t length, return 0; } -static int ni_usb_take_control(gpib_board_t *board, int synchronous) +static int ni_usb_take_control(struct gpib_board *board, int synchronous) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -990,7 +990,7 @@ static int ni_usb_take_control(gpib_board_t *board, int synchronous) return retval; } -static int ni_usb_go_to_standby(gpib_board_t *board) +static int ni_usb_go_to_standby(struct gpib_board *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1049,7 +1049,7 @@ static int ni_usb_go_to_standby(gpib_board_t *board) return 0; } -static void ni_usb_request_system_control(gpib_board_t *board, int request_control) +static void ni_usb_request_system_control(struct gpib_board *board, int request_control) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1100,7 +1100,7 @@ static void ni_usb_request_system_control(gpib_board_t *board, int request_contr } //FIXME maybe the interface should have a "pulse interface clear" function that can return an error? -static void ni_usb_interface_clear(gpib_board_t *board, int assert) +static void ni_usb_interface_clear(struct gpib_board *board, int assert) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1149,7 +1149,7 @@ static void ni_usb_interface_clear(gpib_board_t *board, int assert) ni_usb_soft_update_status(board, status.ibsta, 0); } -static void ni_usb_remote_enable(gpib_board_t *board, int enable) +static void ni_usb_remote_enable(struct gpib_board *board, int enable) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1176,7 +1176,7 @@ static void ni_usb_remote_enable(gpib_board_t *board, int enable) return;// 0; } -static int ni_usb_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int ni_usb_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct ni_usb_priv *ni_priv = board->private_data; @@ -1189,7 +1189,7 @@ static int ni_usb_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_ return 0; } -static void ni_usb_disable_eos(gpib_board_t *board) +static void ni_usb_disable_eos(struct gpib_board *board) { struct ni_usb_priv *ni_priv = board->private_data; /* adapter gets unhappy if you don't zero all the bits @@ -1199,7 +1199,7 @@ static void ni_usb_disable_eos(gpib_board_t *board) ni_priv->eos_char = 0; } -static unsigned int ni_usb_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int ni_usb_update_status(struct gpib_board *board, unsigned int clear_mask) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1254,7 +1254,7 @@ static void ni_usb_stop(struct ni_usb_priv *ni_priv) kfree(buffer); } -static int ni_usb_primary_address(gpib_board_t *board, unsigned int address) +static int ni_usb_primary_address(struct gpib_board *board, unsigned int address) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1312,7 +1312,7 @@ static int ni_usb_write_sad(struct ni_usb_register *writes, int address, int ena return i; } -static int ni_usb_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int ni_usb_secondary_address(struct gpib_board *board, unsigned int address, int enable) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1334,7 +1334,7 @@ static int ni_usb_secondary_address(gpib_board_t *board, unsigned int address, i return 0; } -static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) +static int ni_usb_parallel_poll(struct gpib_board *board, uint8_t *result) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1389,7 +1389,7 @@ static int ni_usb_parallel_poll(gpib_board_t *board, uint8_t *result) return retval; } -static void ni_usb_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void ni_usb_parallel_poll_configure(struct gpib_board *board, uint8_t config) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1414,7 +1414,7 @@ static void ni_usb_parallel_poll_configure(gpib_board_t *board, uint8_t config) return;// 0; } -static void ni_usb_parallel_poll_response(gpib_board_t *board, int ist) +static void ni_usb_parallel_poll_response(struct gpib_board *board, int ist) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1442,7 +1442,7 @@ static void ni_usb_parallel_poll_response(gpib_board_t *board, int ist) return;// 0; } -static void ni_usb_serial_poll_response(gpib_board_t *board, u8 status) +static void ni_usb_serial_poll_response(struct gpib_board *board, u8 status) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1467,12 +1467,12 @@ static void ni_usb_serial_poll_response(gpib_board_t *board, u8 status) return;// 0; } -static uint8_t ni_usb_serial_poll_status(gpib_board_t *board) +static uint8_t ni_usb_serial_poll_status(struct gpib_board *board) { return 0; } -static void ni_usb_return_to_local(gpib_board_t *board) +static void ni_usb_return_to_local(struct gpib_board *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1497,7 +1497,7 @@ static void ni_usb_return_to_local(gpib_board_t *board) return;// 0; } -static int ni_usb_line_status(const gpib_board_t *board) +static int ni_usb_line_status(const struct gpib_board *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1616,7 +1616,7 @@ static int ni_usb_setup_t1_delay(struct ni_usb_register *reg, unsigned int nano_ return i; } -static unsigned int ni_usb_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int ni_usb_t1_delay(struct gpib_board *board, unsigned int nano_sec) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1640,7 +1640,7 @@ static unsigned int ni_usb_t1_delay(gpib_board_t *board, unsigned int nano_sec) return actual_ns; } -static int ni_usb_allocate_private(gpib_board_t *board) +static int ni_usb_allocate_private(struct gpib_board *board) { struct ni_usb_priv *ni_priv; @@ -1663,7 +1663,7 @@ static void ni_usb_free_private(struct ni_usb_priv *ni_priv) } #define NUM_INIT_WRITES 26 -static int ni_usb_setup_init(gpib_board_t *board, struct ni_usb_register *writes) +static int ni_usb_setup_init(struct gpib_board *board, struct ni_usb_register *writes) { struct ni_usb_priv *ni_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); @@ -1770,7 +1770,7 @@ static int ni_usb_setup_init(gpib_board_t *board, struct ni_usb_register *writes return i; } -static int ni_usb_init(gpib_board_t *board) +static int ni_usb_init(struct gpib_board *board) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1799,7 +1799,7 @@ static int ni_usb_init(gpib_board_t *board) static void ni_usb_interrupt_complete(struct urb *urb) { - gpib_board_t *board = urb->context; + struct gpib_board *board = urb->context; struct ni_usb_priv *ni_priv = board->private_data; struct usb_device *usb_dev = interface_to_usbdev(ni_priv->bus_interface); int retval; @@ -1835,7 +1835,7 @@ static void ni_usb_interrupt_complete(struct urb *urb) dev_err(&usb_dev->dev, "failed to resubmit interrupt urb\n"); } -static int ni_usb_set_interrupt_monitor(gpib_board_t *board, unsigned int monitored_bits) +static int ni_usb_set_interrupt_monitor(struct gpib_board *board, unsigned int monitored_bits) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1866,7 +1866,7 @@ static int ni_usb_set_interrupt_monitor(gpib_board_t *board, unsigned int monito return 0; } -static int ni_usb_setup_urbs(gpib_board_t *board) +static int ni_usb_setup_urbs(struct gpib_board *board) { struct ni_usb_priv *ni_priv = board->private_data; struct usb_device *usb_dev; @@ -2205,7 +2205,7 @@ static inline int ni_usb_device_match(struct usb_interface *interface, return 1; } -static int ni_usb_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_usb_attach(struct gpib_board *board, const gpib_board_config_t *config) { int retval; int i, index; @@ -2338,7 +2338,7 @@ static int ni_usb_shutdown_hardware(struct ni_usb_priv *ni_priv) return 0; } -static void ni_usb_detach(gpib_board_t *board) +static void ni_usb_detach(struct gpib_board *board) { struct ni_usb_priv *ni_priv; @@ -2445,7 +2445,7 @@ static void ni_usb_driver_disconnect(struct usb_interface *interface) mutex_lock(&ni_usb_hotplug_lock); for (i = 0; i < MAX_NUM_NI_USB_INTERFACES; i++) { if (ni_usb_driver_interfaces[i] == interface) { - gpib_board_t *board = usb_get_intfdata(interface); + struct gpib_board *board = usb_get_intfdata(interface); if (board) { struct ni_usb_priv *ni_priv = board->private_data; @@ -2474,7 +2474,7 @@ static void ni_usb_driver_disconnect(struct usb_interface *interface) static int ni_usb_driver_suspend(struct usb_interface *interface, pm_message_t message) { struct usb_device *usb_dev = interface_to_usbdev(interface); - gpib_board_t *board; + struct gpib_board *board; int i, retval; mutex_lock(&ni_usb_hotplug_lock); @@ -2518,7 +2518,7 @@ static int ni_usb_driver_resume(struct usb_interface *interface) { struct usb_device *usb_dev = interface_to_usbdev(interface); - gpib_board_t *board; + struct gpib_board *board; int i, retval; mutex_lock(&ni_usb_hotplug_lock); From 1d61a41b3bdb38682e2574b89492a816d71fbe30 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:20 +0000 Subject: [PATCH 1458/2157] staging: gpib: pc2: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-17-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/pc2/pc2_gpib.c | 68 ++++++++++++++--------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/staging/gpib/pc2/pc2_gpib.c b/drivers/staging/gpib/pc2/pc2_gpib.c index 6711851301ec..d8e0733d8ab0 100644 --- a/drivers/staging/gpib/pc2/pc2_gpib.c +++ b/drivers/staging/gpib/pc2/pc2_gpib.c @@ -58,7 +58,7 @@ MODULE_DESCRIPTION("GPIB driver for PC2/PC2a and compatible devices"); irqreturn_t pc2_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct pc2_priv *priv = board->private_data; unsigned long flags; irqreturn_t retval; @@ -71,7 +71,7 @@ irqreturn_t pc2_interrupt(int irq, void *arg) irqreturn_t pc2a_interrupt(int irq, void *arg) { - gpib_board_t *board = arg; + struct gpib_board *board = arg; struct pc2_priv *priv = board->private_data; int status1, status2; unsigned long flags; @@ -90,7 +90,7 @@ irqreturn_t pc2a_interrupt(int irq, void *arg) } // wrappers for interface functions -static int pc2_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int pc2_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct pc2_priv *priv = board->private_data; @@ -98,7 +98,7 @@ static int pc2_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *en return nec7210_read(board, &priv->nec7210_priv, buffer, length, end, bytes_read); } -static int pc2_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int pc2_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct pc2_priv *priv = board->private_data; @@ -106,133 +106,133 @@ static int pc2_write(gpib_board_t *board, uint8_t *buffer, size_t length, int se return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int pc2_command(gpib_board_t *board, uint8_t *buffer, size_t length, size_t *bytes_written) +static int pc2_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct pc2_priv *priv = board->private_data; return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int pc2_take_control(gpib_board_t *board, int synchronous) +static int pc2_take_control(struct gpib_board *board, int synchronous) { struct pc2_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int pc2_go_to_standby(gpib_board_t *board) +static int pc2_go_to_standby(struct gpib_board *board) { struct pc2_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void pc2_request_system_control(gpib_board_t *board, int request_control) +static void pc2_request_system_control(struct gpib_board *board, int request_control) { struct pc2_priv *priv = board->private_data; nec7210_request_system_control(board, &priv->nec7210_priv, request_control); } -static void pc2_interface_clear(gpib_board_t *board, int assert) +static void pc2_interface_clear(struct gpib_board *board, int assert) { struct pc2_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void pc2_remote_enable(gpib_board_t *board, int enable) +static void pc2_remote_enable(struct gpib_board *board, int enable) { struct pc2_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int pc2_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int pc2_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct pc2_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void pc2_disable_eos(gpib_board_t *board) +static void pc2_disable_eos(struct gpib_board *board) { struct pc2_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int pc2_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int pc2_update_status(struct gpib_board *board, unsigned int clear_mask) { struct pc2_priv *priv = board->private_data; return nec7210_update_status(board, &priv->nec7210_priv, clear_mask); } -static int pc2_primary_address(gpib_board_t *board, unsigned int address) +static int pc2_primary_address(struct gpib_board *board, unsigned int address) { struct pc2_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int pc2_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int pc2_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct pc2_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int pc2_parallel_poll(gpib_board_t *board, uint8_t *result) +static int pc2_parallel_poll(struct gpib_board *board, uint8_t *result) { struct pc2_priv *priv = board->private_data; return nec7210_parallel_poll(board, &priv->nec7210_priv, result); } -static void pc2_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void pc2_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct pc2_priv *priv = board->private_data; nec7210_parallel_poll_configure(board, &priv->nec7210_priv, config); } -static void pc2_parallel_poll_response(gpib_board_t *board, int ist) +static void pc2_parallel_poll_response(struct gpib_board *board, int ist) { struct pc2_priv *priv = board->private_data; nec7210_parallel_poll_response(board, &priv->nec7210_priv, ist); } -static void pc2_serial_poll_response(gpib_board_t *board, uint8_t status) +static void pc2_serial_poll_response(struct gpib_board *board, uint8_t status) { struct pc2_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -static uint8_t pc2_serial_poll_status(gpib_board_t *board) +static uint8_t pc2_serial_poll_status(struct gpib_board *board) { struct pc2_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static unsigned int pc2_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int pc2_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct pc2_priv *priv = board->private_data; return nec7210_t1_delay(board, &priv->nec7210_priv, nano_sec); } -static void pc2_return_to_local(gpib_board_t *board) +static void pc2_return_to_local(struct gpib_board *board) { struct pc2_priv *priv = board->private_data; nec7210_return_to_local(board, &priv->nec7210_priv); } -static int allocate_private(gpib_board_t *board) +static int allocate_private(struct gpib_board *board) { struct pc2_priv *priv; @@ -245,13 +245,13 @@ static int allocate_private(gpib_board_t *board) return 0; } -static void free_private(gpib_board_t *board) +static void free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -static int pc2_generic_attach(gpib_board_t *board, const gpib_board_config_t *config, +static int pc2_generic_attach(struct gpib_board *board, const gpib_board_config_t *config, enum nec7210_chipset chipset) { struct pc2_priv *pc2_priv; @@ -294,7 +294,7 @@ static int pc2_generic_attach(gpib_board_t *board, const gpib_board_config_t *co return 0; } -static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2_attach(struct gpib_board *board, const gpib_board_config_t *config) { int isr_flags = 0; struct pc2_priv *pc2_priv; @@ -338,7 +338,7 @@ static int pc2_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -static void pc2_detach(gpib_board_t *board) +static void pc2_detach(struct gpib_board *board) { struct pc2_priv *pc2_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -365,7 +365,7 @@ static void pc2_detach(gpib_board_t *board) free_private(board); } -static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *config, +static int pc2a_common_attach(struct gpib_board *board, const gpib_board_config_t *config, unsigned int num_registers, enum nec7210_chipset chipset) { unsigned int i, j; @@ -459,22 +459,22 @@ static int pc2a_common_attach(gpib_board_t *board, const gpib_board_config_t *co return 0; } -static int pc2a_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2a_attach(struct gpib_board *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2a_iosize, NEC7210); } -static int pc2a_cb7210_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2a_cb7210_attach(struct gpib_board *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2a_iosize, CB7210); } -static int pc2_2a_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int pc2_2a_attach(struct gpib_board *board, const gpib_board_config_t *config) { return pc2a_common_attach(board, config, pc2_2a_iosize, NAT4882); } -static void pc2a_common_detach(gpib_board_t *board, unsigned int num_registers) +static void pc2a_common_detach(struct gpib_board *board, unsigned int num_registers) { int i; struct pc2_priv *pc2_priv = board->private_data; @@ -507,12 +507,12 @@ static void pc2a_common_detach(gpib_board_t *board, unsigned int num_registers) free_private(board); } -static void pc2a_detach(gpib_board_t *board) +static void pc2a_detach(struct gpib_board *board) { pc2a_common_detach(board, pc2a_iosize); } -static void pc2_2a_detach(gpib_board_t *board) +static void pc2_2a_detach(struct gpib_board *board) { pc2a_common_detach(board, pc2_2a_iosize); } From e473ee288fb6fbc67963f7c92554947e14515a20 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:21 +0000 Subject: [PATCH 1459/2157] staging: gpib: tms9914: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-18-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/include/tms9914.h | 52 ++++++++++---------- drivers/staging/gpib/tms9914/tms9914.c | 66 +++++++++++++------------- 2 files changed, 59 insertions(+), 59 deletions(-) diff --git a/drivers/staging/gpib/include/tms9914.h b/drivers/staging/gpib/include/tms9914.h index d8c8d1c9b131..424c95ad85c6 100644 --- a/drivers/staging/gpib/include/tms9914.h +++ b/drivers/staging/gpib/include/tms9914.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +//* SPDX-License-Identifier: GPL-2.0 */ /*************************************************************************** * copyright : (C) 2002 by Frank Mori Hess @@ -79,47 +79,47 @@ enum { }; // interface functions -int tms9914_read(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +int tms9914_read(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); -int tms9914_write(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +int tms9914_write(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); -int tms9914_command(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +int tms9914_command(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written); -int tms9914_take_control(gpib_board_t *board, struct tms9914_priv *priv, int syncronous); +int tms9914_take_control(struct gpib_board *board, struct tms9914_priv *priv, int syncronous); /* alternate version of tms9914_take_control which works around buggy tcs * implementation. */ -int tms9914_take_control_workaround(gpib_board_t *board, struct tms9914_priv *priv, +int tms9914_take_control_workaround(struct gpib_board *board, struct tms9914_priv *priv, int syncronous); -int tms9914_go_to_standby(gpib_board_t *board, struct tms9914_priv *priv); -void tms9914_request_system_control(gpib_board_t *board, struct tms9914_priv *priv, +int tms9914_go_to_standby(struct gpib_board *board, struct tms9914_priv *priv); +void tms9914_request_system_control(struct gpib_board *board, struct tms9914_priv *priv, int request_control); -void tms9914_interface_clear(gpib_board_t *board, struct tms9914_priv *priv, int assert); -void tms9914_remote_enable(gpib_board_t *board, struct tms9914_priv *priv, int enable); -int tms9914_enable_eos(gpib_board_t *board, struct tms9914_priv *priv, uint8_t eos_bytes, +void tms9914_interface_clear(struct gpib_board *board, struct tms9914_priv *priv, int assert); +void tms9914_remote_enable(struct gpib_board *board, struct tms9914_priv *priv, int enable); +int tms9914_enable_eos(struct gpib_board *board, struct tms9914_priv *priv, uint8_t eos_bytes, int compare_8_bits); -void tms9914_disable_eos(gpib_board_t *board, struct tms9914_priv *priv); -unsigned int tms9914_update_status(gpib_board_t *board, struct tms9914_priv *priv, +void tms9914_disable_eos(struct gpib_board *board, struct tms9914_priv *priv); +unsigned int tms9914_update_status(struct gpib_board *board, struct tms9914_priv *priv, unsigned int clear_mask); -int tms9914_primary_address(gpib_board_t *board, +int tms9914_primary_address(struct gpib_board *board, struct tms9914_priv *priv, unsigned int address); -int tms9914_secondary_address(gpib_board_t *board, struct tms9914_priv *priv, +int tms9914_secondary_address(struct gpib_board *board, struct tms9914_priv *priv, unsigned int address, int enable); -int tms9914_parallel_poll(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *result); -void tms9914_parallel_poll_configure(gpib_board_t *board, +int tms9914_parallel_poll(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *result); +void tms9914_parallel_poll_configure(struct gpib_board *board, struct tms9914_priv *priv, uint8_t config); -void tms9914_parallel_poll_response(gpib_board_t *board, +void tms9914_parallel_poll_response(struct gpib_board *board, struct tms9914_priv *priv, int ist); -void tms9914_serial_poll_response(gpib_board_t *board, struct tms9914_priv *priv, uint8_t status); -uint8_t tms9914_serial_poll_status(gpib_board_t *board, struct tms9914_priv *priv); -int tms9914_line_status(const gpib_board_t *board, struct tms9914_priv *priv); -unsigned int tms9914_t1_delay(gpib_board_t *board, struct tms9914_priv *priv, +void tms9914_serial_poll_response(struct gpib_board *board, struct tms9914_priv *priv, uint8_t status); +uint8_t tms9914_serial_poll_status(struct gpib_board *board, struct tms9914_priv *priv); +int tms9914_line_status(const struct gpib_board *board, struct tms9914_priv *priv); +unsigned int tms9914_t1_delay(struct gpib_board *board, struct tms9914_priv *priv, unsigned int nano_sec); -void tms9914_return_to_local(const gpib_board_t *board, struct tms9914_priv *priv); +void tms9914_return_to_local(const struct gpib_board *board, struct tms9914_priv *priv); // utility functions void tms9914_board_reset(struct tms9914_priv *priv); -void tms9914_online(gpib_board_t *board, struct tms9914_priv *priv); +void tms9914_online(struct gpib_board *board, struct tms9914_priv *priv); void tms9914_release_holdoff(struct tms9914_priv *priv); void tms9914_set_holdoff_mode(struct tms9914_priv *priv, enum tms9914_holdoff_mode mode); @@ -130,8 +130,8 @@ uint8_t tms9914_iomem_read_byte(struct tms9914_priv *priv, unsigned int register void tms9914_iomem_write_byte(struct tms9914_priv *priv, uint8_t data, unsigned int register_num); // interrupt service routine -irqreturn_t tms9914_interrupt(gpib_board_t *board, struct tms9914_priv *priv); -irqreturn_t tms9914_interrupt_have_status(gpib_board_t *board, struct tms9914_priv *priv, +irqreturn_t tms9914_interrupt(struct gpib_board *board, struct tms9914_priv *priv); +irqreturn_t tms9914_interrupt_have_status(struct gpib_board *board, struct tms9914_priv *priv, int status1, int status2); // tms9914 has 8 registers diff --git a/drivers/staging/gpib/tms9914/tms9914.c b/drivers/staging/gpib/tms9914/tms9914.c index c563fcab44fc..2abda9d7dfcb 100644 --- a/drivers/staging/gpib/tms9914/tms9914.c +++ b/drivers/staging/gpib/tms9914/tms9914.c @@ -27,9 +27,9 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB library for tms9914"); -static unsigned int update_status_nolock(gpib_board_t *board, struct tms9914_priv *priv); +static unsigned int update_status_nolock(struct gpib_board *board, struct tms9914_priv *priv); -int tms9914_take_control(gpib_board_t *board, struct tms9914_priv *priv, int synchronous) +int tms9914_take_control(struct gpib_board *board, struct tms9914_priv *priv, int synchronous) { int i; const int timeout = 100; @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(tms9914_take_control); * The rest of the tms9914 based drivers still use tms9914_take_control * directly (which does issue tcs). */ -int tms9914_take_control_workaround(gpib_board_t *board, struct tms9914_priv *priv, int synchronous) +int tms9914_take_control_workaround(struct gpib_board *board, struct tms9914_priv *priv, int synchronous) { if (synchronous) return -ETIMEDOUT; @@ -74,7 +74,7 @@ int tms9914_take_control_workaround(gpib_board_t *board, struct tms9914_priv *pr } EXPORT_SYMBOL_GPL(tms9914_take_control_workaround); -int tms9914_go_to_standby(gpib_board_t *board, struct tms9914_priv *priv) +int tms9914_go_to_standby(struct gpib_board *board, struct tms9914_priv *priv) { int i; const int timeout = 1000; @@ -95,7 +95,7 @@ int tms9914_go_to_standby(gpib_board_t *board, struct tms9914_priv *priv) } EXPORT_SYMBOL_GPL(tms9914_go_to_standby); -void tms9914_interface_clear(gpib_board_t *board, struct tms9914_priv *priv, int assert) +void tms9914_interface_clear(struct gpib_board *board, struct tms9914_priv *priv, int assert) { if (assert) { write_byte(priv, AUX_SIC | AUX_CS, AUXCR); @@ -107,7 +107,7 @@ void tms9914_interface_clear(gpib_board_t *board, struct tms9914_priv *priv, int } EXPORT_SYMBOL_GPL(tms9914_interface_clear); -void tms9914_remote_enable(gpib_board_t *board, struct tms9914_priv *priv, int enable) +void tms9914_remote_enable(struct gpib_board *board, struct tms9914_priv *priv, int enable) { if (enable) write_byte(priv, AUX_SRE | AUX_CS, AUXCR); @@ -116,7 +116,7 @@ void tms9914_remote_enable(gpib_board_t *board, struct tms9914_priv *priv, int e } EXPORT_SYMBOL_GPL(tms9914_remote_enable); -void tms9914_request_system_control(gpib_board_t *board, struct tms9914_priv *priv, +void tms9914_request_system_control(struct gpib_board *board, struct tms9914_priv *priv, int request_control) { if (request_control) { @@ -128,7 +128,7 @@ void tms9914_request_system_control(gpib_board_t *board, struct tms9914_priv *pr } EXPORT_SYMBOL_GPL(tms9914_request_system_control); -unsigned int tms9914_t1_delay(gpib_board_t *board, struct tms9914_priv *priv, +unsigned int tms9914_t1_delay(struct gpib_board *board, struct tms9914_priv *priv, unsigned int nano_sec) { static const int clock_period = 200; // assuming 5Mhz input clock @@ -154,7 +154,7 @@ unsigned int tms9914_t1_delay(gpib_board_t *board, struct tms9914_priv *priv, } EXPORT_SYMBOL_GPL(tms9914_t1_delay); -void tms9914_return_to_local(const gpib_board_t *board, struct tms9914_priv *priv) +void tms9914_return_to_local(const struct gpib_board *board, struct tms9914_priv *priv) { write_byte(priv, AUX_RTL, AUXCR); } @@ -192,7 +192,7 @@ void tms9914_release_holdoff(struct tms9914_priv *priv) } EXPORT_SYMBOL_GPL(tms9914_release_holdoff); -int tms9914_enable_eos(gpib_board_t *board, struct tms9914_priv *priv, uint8_t eos_byte, +int tms9914_enable_eos(struct gpib_board *board, struct tms9914_priv *priv, uint8_t eos_byte, int compare_8_bits) { priv->eos = eos_byte; @@ -203,13 +203,13 @@ int tms9914_enable_eos(gpib_board_t *board, struct tms9914_priv *priv, uint8_t e } EXPORT_SYMBOL(tms9914_enable_eos); -void tms9914_disable_eos(gpib_board_t *board, struct tms9914_priv *priv) +void tms9914_disable_eos(struct gpib_board *board, struct tms9914_priv *priv) { priv->eos_flags &= ~REOS; } EXPORT_SYMBOL(tms9914_disable_eos); -int tms9914_parallel_poll(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *result) +int tms9914_parallel_poll(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *result) { // execute parallel poll write_byte(priv, AUX_CS | AUX_RPP, AUXCR); @@ -234,7 +234,7 @@ static void set_ppoll_reg(struct tms9914_priv *priv, int enable, } } -void tms9914_parallel_poll_configure(gpib_board_t *board, +void tms9914_parallel_poll_configure(struct gpib_board *board, struct tms9914_priv *priv, uint8_t config) { priv->ppoll_enable = (config & PPC_DISABLE) == 0; @@ -244,14 +244,14 @@ void tms9914_parallel_poll_configure(gpib_board_t *board, } EXPORT_SYMBOL(tms9914_parallel_poll_configure); -void tms9914_parallel_poll_response(gpib_board_t *board, +void tms9914_parallel_poll_response(struct gpib_board *board, struct tms9914_priv *priv, int ist) { set_ppoll_reg(priv, priv->ppoll_enable, priv->ppoll_line, priv->ppoll_sense, ist); } EXPORT_SYMBOL(tms9914_parallel_poll_response); -void tms9914_serial_poll_response(gpib_board_t *board, struct tms9914_priv *priv, uint8_t status) +void tms9914_serial_poll_response(struct gpib_board *board, struct tms9914_priv *priv, uint8_t status) { unsigned long flags; @@ -266,7 +266,7 @@ void tms9914_serial_poll_response(gpib_board_t *board, struct tms9914_priv *priv } EXPORT_SYMBOL(tms9914_serial_poll_response); -uint8_t tms9914_serial_poll_status(gpib_board_t *board, struct tms9914_priv *priv) +uint8_t tms9914_serial_poll_status(struct gpib_board *board, struct tms9914_priv *priv) { u8 status; unsigned long flags; @@ -279,7 +279,7 @@ uint8_t tms9914_serial_poll_status(gpib_board_t *board, struct tms9914_priv *pri } EXPORT_SYMBOL(tms9914_serial_poll_status); -int tms9914_primary_address(gpib_board_t *board, struct tms9914_priv *priv, unsigned int address) +int tms9914_primary_address(struct gpib_board *board, struct tms9914_priv *priv, unsigned int address) { // put primary address in address0 write_byte(priv, address & ADDRESS_MASK, ADR); @@ -287,7 +287,7 @@ int tms9914_primary_address(gpib_board_t *board, struct tms9914_priv *priv, unsi } EXPORT_SYMBOL(tms9914_primary_address); -int tms9914_secondary_address(gpib_board_t *board, struct tms9914_priv *priv, +int tms9914_secondary_address(struct gpib_board *board, struct tms9914_priv *priv, unsigned int address, int enable) { if (enable) @@ -300,7 +300,7 @@ int tms9914_secondary_address(gpib_board_t *board, struct tms9914_priv *priv, } EXPORT_SYMBOL(tms9914_secondary_address); -unsigned int tms9914_update_status(gpib_board_t *board, struct tms9914_priv *priv, +unsigned int tms9914_update_status(struct gpib_board *board, struct tms9914_priv *priv, unsigned int clear_mask) { unsigned long flags; @@ -342,7 +342,7 @@ static void update_listener_state(struct tms9914_priv *priv, unsigned int addres } } -static unsigned int update_status_nolock(gpib_board_t *board, struct tms9914_priv *priv) +static unsigned int update_status_nolock(struct gpib_board *board, struct tms9914_priv *priv) { int address_status; int bsr_bits; @@ -388,7 +388,7 @@ static unsigned int update_status_nolock(gpib_board_t *board, struct tms9914_pri return board->status; } -int tms9914_line_status(const gpib_board_t *board, struct tms9914_priv *priv) +int tms9914_line_status(const struct gpib_board *board, struct tms9914_priv *priv) { int bsr_bits; int status = VALID_ALL; @@ -433,7 +433,7 @@ static int check_for_eos(struct tms9914_priv *priv, uint8_t byte) return 0; } -static int wait_for_read_byte(gpib_board_t *board, struct tms9914_priv *priv) +static int wait_for_read_byte(struct gpib_board *board, struct tms9914_priv *priv) { if (wait_event_interruptible(board->wait, test_bit(READ_READY_BN, &priv->state) || @@ -449,7 +449,7 @@ static int wait_for_read_byte(gpib_board_t *board, struct tms9914_priv *priv) return 0; } -static inline uint8_t tms9914_read_data_in(gpib_board_t *board, struct tms9914_priv *priv, int *end) +static inline uint8_t tms9914_read_data_in(struct gpib_board *board, struct tms9914_priv *priv, int *end) { unsigned long flags; u8 data; @@ -480,7 +480,7 @@ static inline uint8_t tms9914_read_data_in(gpib_board_t *board, struct tms9914_p return data; } -static int pio_read(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +static int pio_read(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -501,7 +501,7 @@ static int pio_read(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buf return retval; } -int tms9914_read(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +int tms9914_read(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { ssize_t retval = 0; @@ -541,7 +541,7 @@ int tms9914_read(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer } EXPORT_SYMBOL(tms9914_read); -static int pio_write_wait(gpib_board_t *board, struct tms9914_priv *priv) +static int pio_write_wait(struct gpib_board *board, struct tms9914_priv *priv) { // wait until next byte is ready to be sent if (wait_event_interruptible(board->wait, @@ -561,7 +561,7 @@ static int pio_write_wait(gpib_board_t *board, struct tms9914_priv *priv) return 0; } -static int pio_write(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +static int pio_write(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written) { ssize_t retval = 0; @@ -585,7 +585,7 @@ static int pio_write(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *bu return length; } -int tms9914_write(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, +int tms9914_write(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { ssize_t retval = 0; @@ -620,7 +620,7 @@ int tms9914_write(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffe } EXPORT_SYMBOL(tms9914_write); -static void check_my_address_state(gpib_board_t *board, struct tms9914_priv *priv, int cmd_byte) +static void check_my_address_state(struct gpib_board *board, struct tms9914_priv *priv, int cmd_byte) { if (cmd_byte == MLA(board->pad)) { priv->primary_listen_addressed = 1; @@ -655,7 +655,7 @@ static void check_my_address_state(gpib_board_t *board, struct tms9914_priv *pri } } -int tms9914_command(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *buffer, +int tms9914_command(struct gpib_board *board, struct tms9914_priv *priv, uint8_t *buffer, size_t length, size_t *bytes_written) { int retval = 0; @@ -692,7 +692,7 @@ int tms9914_command(gpib_board_t *board, struct tms9914_priv *priv, uint8_t *bu } EXPORT_SYMBOL(tms9914_command); -irqreturn_t tms9914_interrupt(gpib_board_t *board, struct tms9914_priv *priv) +irqreturn_t tms9914_interrupt(struct gpib_board *board, struct tms9914_priv *priv) { int status0, status1; @@ -703,7 +703,7 @@ irqreturn_t tms9914_interrupt(gpib_board_t *board, struct tms9914_priv *priv) } EXPORT_SYMBOL(tms9914_interrupt); -irqreturn_t tms9914_interrupt_have_status(gpib_board_t *board, struct tms9914_priv *priv, +irqreturn_t tms9914_interrupt_have_status(struct gpib_board *board, struct tms9914_priv *priv, int status0, int status1) { // record reception of END @@ -837,7 +837,7 @@ void tms9914_board_reset(struct tms9914_priv *priv) } EXPORT_SYMBOL_GPL(tms9914_board_reset); -void tms9914_online(gpib_board_t *board, struct tms9914_priv *priv) +void tms9914_online(struct gpib_board *board, struct tms9914_priv *priv) { /* set GPIB address */ tms9914_primary_address(board, priv, board->pad); From fc2c620c3924f26919c7dc1fc46e63c43a7ab98f Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:22 +0000 Subject: [PATCH 1460/2157] staging: gpib: tnt4882: struct gpib_board Using Linux code style for struct gpib_board. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-19-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 100 ++++++++++---------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index e62f3424ca20..bc99526f2d0c 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -51,7 +51,7 @@ struct tnt4882_priv { unsigned short auxg_bits; // bits written to auxiliary register G }; -static irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board); +static irqreturn_t tnt4882_internal_interrupt(struct gpib_board *board); // register offset for nec7210 compatible registers static const int atgpib_reg_offset = 2; @@ -148,7 +148,7 @@ static inline void tnt_writeb(struct tnt4882_priv *priv, unsigned short value, u MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GPIB driver for National Instruments boards using tnt4882 or compatible chips"); -static int tnt4882_line_status(const gpib_board_t *board) +static int tnt4882_line_status(const struct gpib_board *board) { int status = VALID_ALL; int bcsr_bits; @@ -178,7 +178,7 @@ static int tnt4882_line_status(const gpib_board_t *board) return status; } -static unsigned int tnt4882_t1_delay(gpib_board_t *board, unsigned int nano_sec) +static unsigned int tnt4882_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; @@ -251,7 +251,7 @@ static int drain_fifo_words(struct tnt4882_priv *tnt_priv, uint8_t *buffer, int return count; } -static void tnt4882_release_holdoff(gpib_board_t *board, struct tnt4882_priv *tnt_priv) +static void tnt4882_release_holdoff(struct gpib_board *board, struct tnt4882_priv *tnt_priv) { struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; unsigned short sasr_bits; @@ -274,7 +274,7 @@ static void tnt4882_release_holdoff(gpib_board_t *board, struct tnt4882_priv *tn } } -static int tnt4882_accel_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int tnt4882_accel_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { size_t count = 0; @@ -426,7 +426,7 @@ static unsigned int tnt_transfer_count(struct tnt4882_priv *tnt_priv) return -count; }; -static int write_wait(gpib_board_t *board, struct tnt4882_priv *tnt_priv, +static int write_wait(struct gpib_board *board, struct tnt4882_priv *tnt_priv, int wait_for_done, int send_commands) { struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; @@ -448,7 +448,7 @@ static int write_wait(gpib_board_t *board, struct tnt4882_priv *tnt_priv, return 0; } -static int generic_write(gpib_board_t *board, uint8_t *buffer, size_t length, +static int generic_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, int send_commands, size_t *bytes_written) { size_t count = 0; @@ -539,19 +539,19 @@ static int generic_write(gpib_board_t *board, uint8_t *buffer, size_t length, return retval; } -static int tnt4882_accel_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int tnt4882_accel_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { return generic_write(board, buffer, length, send_eoi, 0, bytes_written); } -static int tnt4882_command(gpib_board_t *board, uint8_t *buffer, size_t length, +static int tnt4882_command(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { return generic_write(board, buffer, length, 0, 1, bytes_written); } -static irqreturn_t tnt4882_internal_interrupt(gpib_board_t *board) +static irqreturn_t tnt4882_internal_interrupt(struct gpib_board *board) { struct tnt4882_priv *priv = board->private_data; int isr0_bits, isr3_bits, imr3_bits; @@ -592,7 +592,7 @@ static irqreturn_t tnt4882_interrupt(int irq, void *arg) } // wrappers for interface functions -static int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, +static int tnt4882_read(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read) { struct tnt4882_priv *priv = board->private_data; @@ -612,7 +612,7 @@ static int tnt4882_read(gpib_board_t *board, uint8_t *buffer, size_t length, int return retval; } -static int tnt4882_write(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, +static int tnt4882_write(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written) { struct tnt4882_priv *priv = board->private_data; @@ -620,7 +620,7 @@ static int tnt4882_write(gpib_board_t *board, uint8_t *buffer, size_t length, in return nec7210_write(board, &priv->nec7210_priv, buffer, length, send_eoi, bytes_written); } -static int tnt4882_command_unaccel(gpib_board_t *board, uint8_t *buffer, +static int tnt4882_command_unaccel(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written) { struct tnt4882_priv *priv = board->private_data; @@ -628,21 +628,21 @@ static int tnt4882_command_unaccel(gpib_board_t *board, uint8_t *buffer, return nec7210_command(board, &priv->nec7210_priv, buffer, length, bytes_written); } -static int tnt4882_take_control(gpib_board_t *board, int synchronous) +static int tnt4882_take_control(struct gpib_board *board, int synchronous) { struct tnt4882_priv *priv = board->private_data; return nec7210_take_control(board, &priv->nec7210_priv, synchronous); } -static int tnt4882_go_to_standby(gpib_board_t *board) +static int tnt4882_go_to_standby(struct gpib_board *board) { struct tnt4882_priv *priv = board->private_data; return nec7210_go_to_standby(board, &priv->nec7210_priv); } -static void tnt4882_request_system_control(gpib_board_t *board, int request_control) +static void tnt4882_request_system_control(struct gpib_board *board, int request_control) { struct tnt4882_priv *priv = board->private_data; @@ -657,35 +657,35 @@ static void tnt4882_request_system_control(gpib_board_t *board, int request_cont } } -static void tnt4882_interface_clear(gpib_board_t *board, int assert) +static void tnt4882_interface_clear(struct gpib_board *board, int assert) { struct tnt4882_priv *priv = board->private_data; nec7210_interface_clear(board, &priv->nec7210_priv, assert); } -static void tnt4882_remote_enable(gpib_board_t *board, int enable) +static void tnt4882_remote_enable(struct gpib_board *board, int enable) { struct tnt4882_priv *priv = board->private_data; nec7210_remote_enable(board, &priv->nec7210_priv, enable); } -static int tnt4882_enable_eos(gpib_board_t *board, uint8_t eos_byte, int compare_8_bits) +static int tnt4882_enable_eos(struct gpib_board *board, uint8_t eos_byte, int compare_8_bits) { struct tnt4882_priv *priv = board->private_data; return nec7210_enable_eos(board, &priv->nec7210_priv, eos_byte, compare_8_bits); } -static void tnt4882_disable_eos(gpib_board_t *board) +static void tnt4882_disable_eos(struct gpib_board *board) { struct tnt4882_priv *priv = board->private_data; nec7210_disable_eos(board, &priv->nec7210_priv); } -static unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clear_mask) +static unsigned int tnt4882_update_status(struct gpib_board *board, unsigned int clear_mask) { unsigned long flags; u8 line_status; @@ -704,21 +704,21 @@ static unsigned int tnt4882_update_status(gpib_board_t *board, unsigned int clea return board->status; } -static int tnt4882_primary_address(gpib_board_t *board, unsigned int address) +static int tnt4882_primary_address(struct gpib_board *board, unsigned int address) { struct tnt4882_priv *priv = board->private_data; return nec7210_primary_address(board, &priv->nec7210_priv, address); } -static int tnt4882_secondary_address(gpib_board_t *board, unsigned int address, int enable) +static int tnt4882_secondary_address(struct gpib_board *board, unsigned int address, int enable) { struct tnt4882_priv *priv = board->private_data; return nec7210_secondary_address(board, &priv->nec7210_priv, address, enable); } -static int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result) +static int tnt4882_parallel_poll(struct gpib_board *board, uint8_t *result) { struct tnt4882_priv *tnt_priv = board->private_data; @@ -735,7 +735,7 @@ static int tnt4882_parallel_poll(gpib_board_t *board, uint8_t *result) } } -static void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config) +static void tnt4882_parallel_poll_configure(struct gpib_board *board, uint8_t config) { struct tnt4882_priv *priv = board->private_data; @@ -753,7 +753,7 @@ static void tnt4882_parallel_poll_configure(gpib_board_t *board, uint8_t config) } } -static void tnt4882_parallel_poll_response(gpib_board_t *board, int ist) +static void tnt4882_parallel_poll_response(struct gpib_board *board, int ist) { struct tnt4882_priv *priv = board->private_data; @@ -763,14 +763,14 @@ static void tnt4882_parallel_poll_response(gpib_board_t *board, int ist) /* this is just used by the old nec7210 isa interfaces, the newer * boards use tnt4882_serial_poll_response2 */ -static void tnt4882_serial_poll_response(gpib_board_t *board, uint8_t status) +static void tnt4882_serial_poll_response(struct gpib_board *board, uint8_t status) { struct tnt4882_priv *priv = board->private_data; nec7210_serial_poll_response(board, &priv->nec7210_priv, status); } -static void tnt4882_serial_poll_response2(gpib_board_t *board, uint8_t status, +static void tnt4882_serial_poll_response2(struct gpib_board *board, uint8_t status, int new_reason_for_service) { struct tnt4882_priv *priv = board->private_data; @@ -804,21 +804,21 @@ static void tnt4882_serial_poll_response2(gpib_board_t *board, uint8_t status, spin_unlock_irqrestore(&board->spinlock, flags); } -static uint8_t tnt4882_serial_poll_status(gpib_board_t *board) +static uint8_t tnt4882_serial_poll_status(struct gpib_board *board) { struct tnt4882_priv *priv = board->private_data; return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static void tnt4882_return_to_local(gpib_board_t *board) +static void tnt4882_return_to_local(struct gpib_board *board) { struct tnt4882_priv *priv = board->private_data; nec7210_return_to_local(board, &priv->nec7210_priv); } -static void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, gpib_board_t *board) +static void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, struct gpib_board *board) { struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; @@ -831,7 +831,7 @@ static void tnt4882_board_reset(struct tnt4882_priv *tnt_priv, gpib_board_t *boa nec7210_board_reset(nec_priv, board); } -static int tnt4882_allocate_private(gpib_board_t *board) +static int tnt4882_allocate_private(struct gpib_board *board) { struct tnt4882_priv *tnt_priv; @@ -844,13 +844,13 @@ static int tnt4882_allocate_private(gpib_board_t *board) return 0; } -static void tnt4882_free_private(gpib_board_t *board) +static void tnt4882_free_private(struct gpib_board *board) { kfree(board->private_data); board->private_data = NULL; } -static void tnt4882_init(struct tnt4882_priv *tnt_priv, const gpib_board_t *board) +static void tnt4882_init(struct tnt4882_priv *tnt_priv, const struct gpib_board *board) { struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; @@ -898,7 +898,7 @@ static void tnt4882_init(struct tnt4882_priv *tnt_priv, const gpib_board_t *boar tnt_writeb(tnt_priv, tnt_priv->imr0_bits, IMR0); } -static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_pci_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct tnt4882_priv *tnt_priv; struct nec7210_priv *nec_priv; @@ -982,7 +982,7 @@ static int ni_pci_attach(gpib_board_t *board, const gpib_board_config_t *config) return 0; } -static void ni_pci_detach(gpib_board_t *board) +static void ni_pci_detach(struct gpib_board *board) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1019,7 +1019,7 @@ static int ni_isapnp_find(struct pnp_dev **dev) return 0; } -static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t *config, +static int ni_isa_attach_common(struct gpib_board *board, const gpib_board_config_t *config, enum nec7210_chipset chipset) { struct tnt4882_priv *tnt_priv; @@ -1075,22 +1075,22 @@ static int ni_isa_attach_common(gpib_board_t *board, const gpib_board_config_t * return 0; } -static int ni_tnt_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_tnt_isa_attach(struct gpib_board *board, const gpib_board_config_t *config) { return ni_isa_attach_common(board, config, TNT4882); } -static int ni_nat4882_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_nat4882_isa_attach(struct gpib_board *board, const gpib_board_config_t *config) { return ni_isa_attach_common(board, config, NAT4882); } -static int ni_nec_isa_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_nec_isa_attach(struct gpib_board *board, const gpib_board_config_t *config) { return ni_isa_attach_common(board, config, NEC7210); } -static void ni_isa_detach(gpib_board_t *board) +static void ni_isa_detach(struct gpib_board *board) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv; @@ -1523,7 +1523,7 @@ static void __exit tnt4882_exit_module(void) static int ni_gpib_config(struct pcmcia_device *link); static void ni_gpib_release(struct pcmcia_device *link); -static void ni_pcmcia_detach(gpib_board_t *board); +static void ni_pcmcia_detach(struct gpib_board *board); /* * A linked list of "instances" of the dummy device. Each actual @@ -1542,7 +1542,7 @@ static struct pcmcia_device *curr_dev; struct local_info_t { struct pcmcia_device *p_dev; - gpib_board_t *dev; + struct gpib_board *dev; int stop; struct bus_operations *bus; }; @@ -1556,7 +1556,7 @@ struct local_info_t { static int ni_gpib_probe(struct pcmcia_device *link) { struct local_info_t *info; - //struct gpib_board_t *dev; + //struct struct gpib_board *dev; /* Allocate space for private device-specific data */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -1589,7 +1589,7 @@ static int ni_gpib_probe(struct pcmcia_device *link) static void ni_gpib_remove(struct pcmcia_device *link) { struct local_info_t *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (info->dev) ni_pcmcia_detach(info->dev); @@ -1618,7 +1618,7 @@ static int ni_gpib_config_iteration(struct pcmcia_device *link, void *priv_data) static int ni_gpib_config(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; - //gpib_board_t *dev = info->dev; + //struct gpib_board *dev = info->dev; int last_ret; last_ret = pcmcia_loop_config(link, &ni_gpib_config_iteration, NULL); @@ -1649,7 +1649,7 @@ static void ni_gpib_release(struct pcmcia_device *link) static int ni_gpib_suspend(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; if (link->open) dev_warn(&link->dev, "Device still open\n"); @@ -1661,7 +1661,7 @@ static int ni_gpib_suspend(struct pcmcia_device *link) static int ni_gpib_resume(struct pcmcia_device *link) { //struct local_info_t *info = link->priv; - //struct gpib_board_t *dev = info->dev; + //struct struct gpib_board *dev = info->dev; /*if (link->open) { * ni_gpib_probe(dev); / really? @@ -1702,7 +1702,7 @@ static void __exit exit_ni_gpib_cs(void) static const int pcmcia_gpib_iosize = 32; -static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *config) +static int ni_pcmcia_attach(struct gpib_board *board, const gpib_board_config_t *config) { struct local_info_t *info; struct tnt4882_priv *tnt_priv; @@ -1750,7 +1750,7 @@ static int ni_pcmcia_attach(gpib_board_t *board, const gpib_board_config_t *conf return 0; } -static void ni_pcmcia_detach(gpib_board_t *board) +static void ni_pcmcia_detach(struct gpib_board *board) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv; From 50af7beb90b355f6aa6914ff3f66ee6b848c0990 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:23 +0000 Subject: [PATCH 1461/2157] staging: gpib: struct typing for gpib_gboard_t Using Linux code style for gpib_board struct. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-20-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/include/gpibP.h | 10 ++-- drivers/staging/gpib/include/gpib_proto.h | 58 +++++++++++------------ drivers/staging/gpib/include/gpib_types.h | 54 ++++++++++----------- 3 files changed, 61 insertions(+), 61 deletions(-) diff --git a/drivers/staging/gpib/include/gpibP.h b/drivers/staging/gpib/include/gpibP.h index d35fdd391f7e..0c71a038e444 100644 --- a/drivers/staging/gpib/include/gpibP.h +++ b/drivers/staging/gpib/include/gpibP.h @@ -26,13 +26,13 @@ struct pci_dev *gpib_pci_get_subsys(const gpib_board_config_t *config, unsigned unsigned int device_id, unsigned int ss_vendor, unsigned int ss_device, struct pci_dev *from); unsigned int num_gpib_events(const gpib_event_queue_t *queue); -int push_gpib_event(gpib_board_t *board, short event_type); -int pop_gpib_event(gpib_board_t *board, gpib_event_queue_t *queue, short *event_type); -int gpib_request_pseudo_irq(gpib_board_t *board, irqreturn_t (*handler)(int, void *)); -void gpib_free_pseudo_irq(gpib_board_t *board); +int push_gpib_event(struct gpib_board *board, short event_type); +int pop_gpib_event(struct gpib_board *board, gpib_event_queue_t *queue, short *event_type); +int gpib_request_pseudo_irq(struct gpib_board *board, irqreturn_t (*handler)(int, void *)); +void gpib_free_pseudo_irq(struct gpib_board *board); int gpib_match_device_path(struct device *dev, const char *device_path_in); -extern gpib_board_t board_array[GPIB_MAX_NUM_BOARDS]; +extern struct gpib_board board_array[GPIB_MAX_NUM_BOARDS]; extern struct list_head registered_drivers; diff --git a/drivers/staging/gpib/include/gpib_proto.h b/drivers/staging/gpib/include/gpib_proto.h index 1499f954210b..2c7dfc02f517 100644 --- a/drivers/staging/gpib/include/gpib_proto.h +++ b/drivers/staging/gpib/include/gpib_proto.h @@ -10,11 +10,11 @@ int ibclose(struct inode *inode, struct file *file); long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg); int osInit(void); void osReset(void); -void os_start_timer(gpib_board_t *board, unsigned int usec_timeout); -void os_remove_timer(gpib_board_t *board); +void os_start_timer(struct gpib_board *board, unsigned int usec_timeout); +void os_remove_timer(struct gpib_board *board); void osSendEOI(void); void osSendEOI(void); -void init_gpib_board(gpib_board_t *board); +void init_gpib_board(struct gpib_board *board); static inline unsigned long usec_to_jiffies(unsigned int usec) { unsigned long usec_per_jiffy = 1000000 / HZ; @@ -22,35 +22,35 @@ static inline unsigned long usec_to_jiffies(unsigned int usec) return 1 + (usec + usec_per_jiffy - 1) / usec_per_jiffy; }; -int serial_poll_all(gpib_board_t *board, unsigned int usec_timeout); +int serial_poll_all(struct gpib_board *board, unsigned int usec_timeout); void init_gpib_descriptor(gpib_descriptor_t *desc); -int dvrsp(gpib_board_t *board, unsigned int pad, int sad, +int dvrsp(struct gpib_board *board, unsigned int pad, int sad, unsigned int usec_timeout, uint8_t *result); -int ibAPWait(gpib_board_t *board, int pad); -int ibAPrsp(gpib_board_t *board, int padsad, char *spb); -void ibAPE(gpib_board_t *board, int pad, int v); -int ibcac(gpib_board_t *board, int sync, int fallback_to_async); -int ibcmd(gpib_board_t *board, uint8_t *buf, size_t length, size_t *bytes_written); -int ibgts(gpib_board_t *board); -int ibonline(gpib_board_t *board); -int iboffline(gpib_board_t *board); -int iblines(const gpib_board_t *board, short *lines); -int ibrd(gpib_board_t *board, uint8_t *buf, size_t length, int *end_flag, size_t *bytes_read); -int ibrpp(gpib_board_t *board, uint8_t *buf); -int ibrsv2(gpib_board_t *board, uint8_t status_byte, int new_reason_for_service); -void ibrsc(gpib_board_t *board, int request_control); -int ibsic(gpib_board_t *board, unsigned int usec_duration); -int ibsre(gpib_board_t *board, int enable); -int ibpad(gpib_board_t *board, unsigned int addr); -int ibsad(gpib_board_t *board, int addr); -int ibeos(gpib_board_t *board, int eos, int eosflags); -int ibwait(gpib_board_t *board, int wait_mask, int clear_mask, int set_mask, +int ibAPWait(struct gpib_board *board, int pad); +int ibAPrsp(struct gpib_board *board, int padsad, char *spb); +void ibAPE(struct gpib_board *board, int pad, int v); +int ibcac(struct gpib_board *board, int sync, int fallback_to_async); +int ibcmd(struct gpib_board *board, uint8_t *buf, size_t length, size_t *bytes_written); +int ibgts(struct gpib_board *board); +int ibonline(struct gpib_board *board); +int iboffline(struct gpib_board *board); +int iblines(const struct gpib_board *board, short *lines); +int ibrd(struct gpib_board *board, uint8_t *buf, size_t length, int *end_flag, size_t *bytes_read); +int ibrpp(struct gpib_board *board, uint8_t *buf); +int ibrsv2(struct gpib_board *board, uint8_t status_byte, int new_reason_for_service); +void ibrsc(struct gpib_board *board, int request_control); +int ibsic(struct gpib_board *board, unsigned int usec_duration); +int ibsre(struct gpib_board *board, int enable); +int ibpad(struct gpib_board *board, unsigned int addr); +int ibsad(struct gpib_board *board, int addr); +int ibeos(struct gpib_board *board, int eos, int eosflags); +int ibwait(struct gpib_board *board, int wait_mask, int clear_mask, int set_mask, int *status, unsigned long usec_timeout, gpib_descriptor_t *desc); -int ibwrt(gpib_board_t *board, uint8_t *buf, size_t cnt, int send_eoi, size_t *bytes_written); -int ibstatus(gpib_board_t *board); -int general_ibstatus(gpib_board_t *board, const gpib_status_queue_t *device, +int ibwrt(struct gpib_board *board, uint8_t *buf, size_t cnt, int send_eoi, size_t *bytes_written); +int ibstatus(struct gpib_board *board); +int general_ibstatus(struct gpib_board *board, const gpib_status_queue_t *device, int clear_mask, int set_mask, gpib_descriptor_t *desc); -int io_timed_out(gpib_board_t *board); -int ibppc(gpib_board_t *board, uint8_t configuration); +int io_timed_out(struct gpib_board *board); +int ibppc(struct gpib_board *board, uint8_t configuration); #endif /* GPIB_PROTO_INCLUDED */ diff --git a/drivers/staging/gpib/include/gpib_types.h b/drivers/staging/gpib/include/gpib_types.h index 7dc5a16e427b..515fdb91d150 100644 --- a/drivers/staging/gpib/include/gpib_types.h +++ b/drivers/staging/gpib/include/gpib_types.h @@ -55,9 +55,9 @@ struct gpib_interface_struct { /* name of board */ char *name; /* attach() initializes board and allocates resources */ - int (*attach)(gpib_board_t *board, const gpib_board_config_t *config); + int (*attach)(struct gpib_board *board, const gpib_board_config_t *config); /* detach() shuts down board and frees resources */ - void (*detach)(gpib_board_t *board); + void (*detach)(struct gpib_board *board); /* read() should read at most 'length' bytes from the bus into * 'buffer'. It should return when it fills the buffer or * encounters an END (EOI and or EOS if appropriate). It should set 'end' @@ -68,19 +68,19 @@ struct gpib_interface_struct { * return indicates error. * nbytes returns number of bytes read */ - int (*read)(gpib_board_t *board, uint8_t *buffer, size_t length, int *end, + int (*read)(struct gpib_board *board, uint8_t *buffer, size_t length, int *end, size_t *bytes_read); /* write() should write 'length' bytes from buffer to the bus. * If the boolean value send_eoi is nonzero, then EOI should * be sent along with the last byte. Returns number of bytes * written or negative value on error. */ - int (*write)(gpib_board_t *board, uint8_t *buffer, size_t length, int send_eoi, + int (*write)(struct gpib_board *board, uint8_t *buffer, size_t length, int send_eoi, size_t *bytes_written); /* command() writes the command bytes in 'buffer' to the bus * Returns zero on success or negative value on error. */ - int (*command)(gpib_board_t *board, uint8_t *buffer, size_t length, + int (*command)(struct gpib_board *board, uint8_t *buffer, size_t length, size_t *bytes_written); /* Take control (assert ATN). If 'asyncronous' is nonzero, take * control asyncronously (assert ATN immediately without waiting @@ -88,54 +88,54 @@ struct gpib_interface_struct { * until board becomes controller in charge. Returns zero no success, * nonzero on error. */ - int (*take_control)(gpib_board_t *board, int asyncronous); + int (*take_control)(struct gpib_board *board, int asyncronous); /* De-assert ATN. Returns zero on success, nonzer on error. */ - int (*go_to_standby)(gpib_board_t *board); + int (*go_to_standby)(struct gpib_board *board); /* request/release control of the IFC and REN lines (system controller) */ - void (*request_system_control)(gpib_board_t *board, int request_control); + void (*request_system_control)(struct gpib_board *board, int request_control); /* Asserts or de-asserts 'interface clear' (IFC) depending on * boolean value of 'assert' */ - void (*interface_clear)(gpib_board_t *board, int assert); + void (*interface_clear)(struct gpib_board *board, int assert); /* Sends remote enable command if 'enable' is nonzero, disables remote mode * if 'enable' is zero */ - void (*remote_enable)(gpib_board_t *board, int enable); + void (*remote_enable)(struct gpib_board *board, int enable); /* enable END for reads, when byte 'eos' is received. If * 'compare_8_bits' is nonzero, then all 8 bits are compared * with the eos bytes. Otherwise only the 7 least significant * bits are compared. */ - int (*enable_eos)(gpib_board_t *board, uint8_t eos, int compare_8_bits); + int (*enable_eos)(struct gpib_board *board, uint8_t eos, int compare_8_bits); /* disable END on eos byte (END on EOI only)*/ - void (*disable_eos)(gpib_board_t *board); + void (*disable_eos)(struct gpib_board *board); /* configure parallel poll */ - void (*parallel_poll_configure)(gpib_board_t *board, uint8_t configuration); + void (*parallel_poll_configure)(struct gpib_board *board, uint8_t configuration); /* conduct parallel poll */ - int (*parallel_poll)(gpib_board_t *board, uint8_t *result); + int (*parallel_poll)(struct gpib_board *board, uint8_t *result); /* set/clear ist (individual status bit) */ - void (*parallel_poll_response)(gpib_board_t *board, int ist); + void (*parallel_poll_response)(struct gpib_board *board, int ist); /* select local parallel poll configuration mode PP2 versus remote PP1 */ - void (*local_parallel_poll_mode)(gpib_board_t *board, int local); + void (*local_parallel_poll_mode)(struct gpib_board *board, int local); /* Returns current status of the bus lines. Should be set to * NULL if your board does not have the ability to query the * state of the bus lines. */ - int (*line_status)(const gpib_board_t *board); + int (*line_status)(const struct gpib_board *board); /* updates and returns the board's current status. * The meaning of the bits are specified in gpib_user.h * in the IBSTA section. The driver does not need to * worry about setting the CMPL, END, TIMO, or ERR bits. */ - unsigned int (*update_status)(gpib_board_t *board, unsigned int clear_mask); + unsigned int (*update_status)(struct gpib_board *board, unsigned int clear_mask); /* Sets primary address 0-30 for gpib interface card. */ - int (*primary_address)(gpib_board_t *board, unsigned int address); + int (*primary_address)(struct gpib_board *board, unsigned int address); /* Sets and enables, or disables secondary address 0-30 * for gpib interface card. */ - int (*secondary_address)(gpib_board_t *board, unsigned int address, + int (*secondary_address)(struct gpib_board *board, unsigned int address, int enable); /* Sets the byte the board should send in response to a serial poll. * This function should also start or stop requests for service via @@ -149,7 +149,7 @@ struct gpib_interface_struct { * by IEEE 488.2 section 11.3.3.4.3 "Allowed Coupled Control of * STB, reqt, and reqf". */ - void (*serial_poll_response)(gpib_board_t *board, uint8_t status_byte); + void (*serial_poll_response)(struct gpib_board *board, uint8_t status_byte); /* Sets the byte the board should send in response to a serial poll. * This function should also request service via IEEE 488.2 reqt/reqf * based on MSS (bit 6 of the status_byte) and new_reason_for_service. @@ -164,15 +164,15 @@ struct gpib_interface_struct { * If this method is left NULL by the driver, then the user library * function ibrsv2 will not work. */ - void (*serial_poll_response2)(gpib_board_t *board, uint8_t status_byte, + void (*serial_poll_response2)(struct gpib_board *board, uint8_t status_byte, int new_reason_for_service); /* returns the byte the board will send in response to a serial poll. */ - uint8_t (*serial_poll_status)(gpib_board_t *board); + uint8_t (*serial_poll_status)(struct gpib_board *board); /* adjust T1 delay */ - unsigned int (*t1_delay)(gpib_board_t *board, unsigned int nano_sec); + unsigned int (*t1_delay)(struct gpib_board *board, unsigned int nano_sec); /* go to local mode */ - void (*return_to_local)(gpib_board_t *board); + void (*return_to_local)(struct gpib_board *board); /* board does not support 7 bit eos comparisons */ unsigned no_7_bit_eos : 1; /* skip check for listeners before trying to send command bytes */ @@ -198,7 +198,7 @@ static inline void init_event_queue(gpib_event_queue_t *queue) struct gpib_pseudo_irq { struct timer_list timer; irqreturn_t (*handler)(int irq, void *arg); - gpib_board_t *board; + struct gpib_board *board; atomic_t active; }; @@ -216,7 +216,7 @@ typedef struct gpib_interface_list_struct { struct module *module; } gpib_interface_list_t; -/* One gpib_board_t is allocated for each physical board in the computer. +/* One struct gpib_board is allocated for each physical board in the computer. * It provides storage for variables local to each board, and interface * functions for performing operations on the board */ From 4a2b4d93e8b3bf986ccdaa128354dddc3b8da7e0 Mon Sep 17 00:00:00 2001 From: Michael Rubin Date: Wed, 19 Mar 2025 21:59:24 +0000 Subject: [PATCH 1462/2157] staging: gpib: Removing typedef for gpib_board Removing typedef as per Linux code style. Adhering to Linux code style. In general, a pointer, or a struct that has elements that can reasonably be directly accessed should never be a typedef. Signed-off-by: Michael Rubin Acked-By: Dave Penkler Link: https://lore.kernel.org/r/20250319215924.19387-21-matchstick@neverthere.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/include/gpib_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/include/gpib_types.h b/drivers/staging/gpib/include/gpib_types.h index 515fdb91d150..31b35f2cb2e1 100644 --- a/drivers/staging/gpib/include/gpib_types.h +++ b/drivers/staging/gpib/include/gpib_types.h @@ -23,7 +23,7 @@ #include typedef struct gpib_interface_struct gpib_interface_t; -typedef struct gpib_board gpib_board_t; +struct gpib_board; /* config parameters that are only used by driver attach functions */ typedef struct { From 4f991a6430f7634d7e5c6cfa089160c6337f3bc2 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Wed, 5 Mar 2025 16:32:16 -0300 Subject: [PATCH 1463/2157] staging: gpib: fix kernel-doc section for write_loop() function Add a colon character in the kernel-doc section of write_loop() in order 'leng' to be picked as argument by the kernel-doc compiler. This change fix the following warning: warning: Function parameter or struct member 'leng' not described in 'send_command' Signed-off-by: Gaston Gonzalez Link: https://lore.kernel.org/r/20250305193614.39604-3-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 1675aa2aff6c..7f3da11291f6 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -232,7 +232,7 @@ static int write_loop(void *dev, char *msg, int leng) * * @board: the gpib_board_struct data area for this gpib interface * @msg: the byte sequence. - * @leng the byte sequence length; can be given as zero and is + * @leng: the byte sequence length; can be given as zero and is * computed automatically, but if 'msg' contains a zero byte, * it has to be given explicitly. */ From 4ec9b9b584c9b3e3515d4174aa815589e70e65aa Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Wed, 5 Mar 2025 16:32:18 -0300 Subject: [PATCH 1464/2157] staging: gpib: fix kernel-doc section for function usb_gpib_interface_clear() Add '@' character in kernel-doc comment in order 'assert' to be picked as argument by the kernel-doc compiler. This change fix the following warning: warning: Function parameter or struct member 'assert' not described in 'usb_gpib_interface_clear' Signed-off-by: Gaston Gonzalez Link: https://lore.kernel.org/r/20250305193614.39604-5-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 7f3da11291f6..37d97cd01822 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -621,7 +621,7 @@ static int usb_gpib_go_to_standby(struct gpib_board *board) * usb_gpib_interface_clear() - Assert or de-assert IFC * * @board: the gpib_board data area for this gpib interface - * assert: 1: assert IFC; 0: de-assert IFC + * @assert: 1: assert IFC; 0: de-assert IFC * * Currently on the assert request we issue the lpvo IBZ * command that cycles IFC low for 100 usec, then we ignore From f17cd486391cc24dcc3d4d63699721c6a7937b84 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Wed, 5 Mar 2025 16:32:20 -0300 Subject: [PATCH 1465/2157] staging: gpib: fix kernel-doc section for usb_gpib_line_status() function The function name field in the kernel-doc section for the usb_gpib_line_status() is defined as 'line_status'. In addition, after the kernel-doc section, there are three macro definition instead of the function definition. These issues trigger the warning: warning: expecting prototype for line_status(). Prototype was for WQT() instead. Fix the warning by renaming the function in the kernel-doc section and by moving the macros to the beginning of the file with the rest of macros definition. Signed-off-by: Gaston Gonzalez Link: https://lore.kernel.org/r/20250305193614.39604-7-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 37d97cd01822..041600dc443f 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -78,6 +78,10 @@ module_param(debug, int, 0644); dev_dbg(board->gpib_dev, format, ## __VA_ARGS__); } \ while (0) +#define WQT wait_queue_entry_t +#define WQH head +#define WQE entry + /* standard and extended command sets of the usb-gpib adapter */ #define USB_GPIB_ON "\nIB\n" @@ -644,17 +648,12 @@ static void usb_gpib_interface_clear(struct gpib_board *board, int assert) } /** - * line_status() - Read the status of the bus lines. + * usb_gpib_line_status() - Read the status of the bus lines. * * @board: the gpib_board data area for this gpib interface * * We can read all lines. */ - -#define WQT wait_queue_entry_t -#define WQH head -#define WQE entry - static int usb_gpib_line_status(const struct gpib_board *board) { int buffer; From 97d83d292ba10313aec366141d24c24eeaa3cb7a Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Wed, 5 Mar 2025 16:32:22 -0300 Subject: [PATCH 1466/2157] staging: gpib: remove commented-out lines Remove commented-out code in function write_loop(). Signed-off-by: Gaston Gonzalez Link: https://lore.kernel.org/r/20250305193614.39604-9-gascoar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 041600dc443f..282d7387574e 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -217,18 +217,7 @@ static inline int usec_diff(struct timespec64 *a, struct timespec64 *b) static int write_loop(void *dev, char *msg, int leng) { -// int nchar = 0, val; - -// do { - return skel_do_write(dev, msg, leng); - -// if (val < 1) { -// return -EIO; -// } -// nchar +=val; -// } while (nchar < leng); -// return leng; } /** From ed3751860e6ca5f4f3bb8db3006e607460b047cd Mon Sep 17 00:00:00 2001 From: Rodrigo Gobbi Date: Mon, 24 Feb 2025 22:44:05 -0300 Subject: [PATCH 1467/2157] staging: gpib: change return type of t1_delay function to report errors The current code returns "unsigned int" and it doesn't handle errors correctly if it happens during ioctl call for t1 delay configuration. The ni_usb_t1_delay(), from NI, is the only function returning -1 at this point. The caller, t1_delay_ioctl(), doesn't check for errors and sets board->t1_nano_sec to -1 and returns success. The board->t1_nano_sec value is also used in ni_usb_setup_t1_delay() besides the ioctl call and a value of -1 is treated as being above 1100ns. It may or may not have a noticeable effect, but it's obviously not right considering the content of ni_usb_setup_t1_delay(). Typical delays are in the 200-2000 range, but definitely not more than INT_MAX so we can fix this code by changing the return type to int and adding a check for errors. While we're at it, lets change the error code in ni_usb_t1_delay() from -1 and instead propagate the error from ni_usb_write_registers(). Fixes: 4e127de14fa7 ("staging: gpib: Add National Instruments USB GPIB driver") Signed-off-by: Rodrigo Gobbi Link: https://lore.kernel.org/r/20250225014811.77995-1-rodrigo.gobbi.7@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 3 +-- drivers/staging/gpib/agilent_82357a/agilent_82357a.c | 2 +- drivers/staging/gpib/cb7210/cb7210.c | 2 +- drivers/staging/gpib/cec/cec_gpib.c | 2 +- drivers/staging/gpib/common/gpib_os.c | 5 ++++- drivers/staging/gpib/eastwood/fluke_gpib.c | 2 +- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 2 +- drivers/staging/gpib/gpio/gpib_bitbang.c | 2 +- drivers/staging/gpib/hp_82335/hp82335.c | 2 +- drivers/staging/gpib/hp_82341/hp_82341.c | 2 +- drivers/staging/gpib/include/gpib_types.h | 2 +- drivers/staging/gpib/include/nec7210.h | 4 ++-- drivers/staging/gpib/ines/ines.h | 2 +- drivers/staging/gpib/ines/ines_gpib.c | 2 +- drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c | 2 +- drivers/staging/gpib/nec7210/nec7210.c | 4 ++-- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 4 ++-- drivers/staging/gpib/pc2/pc2_gpib.c | 2 +- drivers/staging/gpib/tnt4882/tnt4882_gpib.c | 2 +- 19 files changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 15a9c4ab77ba..445b9380ff98 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -455,8 +455,7 @@ static int agilent_82350b_line_status(const struct gpib_board *board) return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int agilent_82350b_t1_delay(struct gpib_board *board, - unsigned int nanosec) +static int agilent_82350b_t1_delay(struct gpib_board *board, unsigned int nanosec) { struct agilent_82350b_priv *a_priv = board->private_data; static const int nanosec_per_clock = 30; diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 85e12c33f118..67bf125645c0 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -1071,7 +1071,7 @@ static unsigned short nanosec_to_fast_talker_bits(unsigned int *nanosec) return bits; } -static unsigned int agilent_82357a_t1_delay(struct gpib_board *board, unsigned int nanosec) +static int agilent_82357a_t1_delay(struct gpib_board *board, unsigned int nanosec) { struct agilent_82357a_priv *a_priv = board->private_data; struct usb_device *usb_dev; diff --git a/drivers/staging/gpib/cb7210/cb7210.c b/drivers/staging/gpib/cb7210/cb7210.c index 621aa8fda913..6b22a33a8c4f 100644 --- a/drivers/staging/gpib/cb7210/cb7210.c +++ b/drivers/staging/gpib/cb7210/cb7210.c @@ -408,7 +408,7 @@ static int cb7210_line_status(const struct gpib_board *board) return status; } -static unsigned int cb7210_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int cb7210_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct cb7210_priv *cb_priv = board->private_data; struct nec7210_priv *nec_priv = &cb_priv->nec7210_priv; diff --git a/drivers/staging/gpib/cec/cec_gpib.c b/drivers/staging/gpib/cec/cec_gpib.c index abe5b1b8bc5b..a822fa428cd0 100644 --- a/drivers/staging/gpib/cec/cec_gpib.c +++ b/drivers/staging/gpib/cec/cec_gpib.c @@ -174,7 +174,7 @@ static uint8_t cec_serial_poll_status(struct gpib_board *board) return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static unsigned int cec_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int cec_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct cec_priv *priv = board->private_data; diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c index 9bf2076cbbb3..cb77fe0a4b9a 100644 --- a/drivers/staging/gpib/common/gpib_os.c +++ b/drivers/staging/gpib/common/gpib_os.c @@ -1990,8 +1990,11 @@ static int t1_delay_ioctl(struct gpib_board *board, unsigned long arg) delay = cmd; - board->t1_nano_sec = board->interface->t1_delay(board, delay); + retval = board->interface->t1_delay(board, delay); + if (retval < 0) + return retval; + board->t1_nano_sec = retval; return 0; } diff --git a/drivers/staging/gpib/eastwood/fluke_gpib.c b/drivers/staging/gpib/eastwood/fluke_gpib.c index b3b629c892e2..a6b1ac169f94 100644 --- a/drivers/staging/gpib/eastwood/fluke_gpib.c +++ b/drivers/staging/gpib/eastwood/fluke_gpib.c @@ -224,7 +224,7 @@ static int fluke_line_status(const struct gpib_board *board) return status; } -static unsigned int fluke_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int fluke_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct fluke_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index f6bf127441f8..53f4b3fccc3c 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -261,7 +261,7 @@ static int fmh_gpib_line_status(const struct gpib_board *board) return status; } -static unsigned int fmh_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int fmh_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct fmh_priv *e_priv = board->private_data; struct nec7210_priv *nec_priv = &e_priv->nec7210_priv; diff --git a/drivers/staging/gpib/gpio/gpib_bitbang.c b/drivers/staging/gpib/gpio/gpib_bitbang.c index 611ff58b94ca..86bdd381472a 100644 --- a/drivers/staging/gpib/gpio/gpib_bitbang.c +++ b/drivers/staging/gpib/gpio/gpib_bitbang.c @@ -1009,7 +1009,7 @@ static uint8_t bb_serial_poll_status(struct gpib_board *board) return 0; // -ENOENT; } -static unsigned int bb_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int bb_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct bb_priv *priv = board->private_data; diff --git a/drivers/staging/gpib/hp_82335/hp82335.c b/drivers/staging/gpib/hp_82335/hp82335.c index 368e60c6ecd2..fd23b1cb80f9 100644 --- a/drivers/staging/gpib/hp_82335/hp82335.c +++ b/drivers/staging/gpib/hp_82335/hp82335.c @@ -165,7 +165,7 @@ static int hp82335_line_status(const struct gpib_board *board) return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int hp82335_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int hp82335_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct hp82335_priv *priv = board->private_data; diff --git a/drivers/staging/gpib/hp_82341/hp_82341.c b/drivers/staging/gpib/hp_82341/hp_82341.c index 5d76d01f6b32..f52e673dc869 100644 --- a/drivers/staging/gpib/hp_82341/hp_82341.c +++ b/drivers/staging/gpib/hp_82341/hp_82341.c @@ -396,7 +396,7 @@ static int hp_82341_line_status(const struct gpib_board *board) return tms9914_line_status(board, &priv->tms9914_priv); } -static unsigned int hp_82341_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int hp_82341_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct hp_82341_priv *priv = board->private_data; diff --git a/drivers/staging/gpib/include/gpib_types.h b/drivers/staging/gpib/include/gpib_types.h index 31b35f2cb2e1..2d9b9be683f8 100644 --- a/drivers/staging/gpib/include/gpib_types.h +++ b/drivers/staging/gpib/include/gpib_types.h @@ -170,7 +170,7 @@ struct gpib_interface_struct { */ uint8_t (*serial_poll_status)(struct gpib_board *board); /* adjust T1 delay */ - unsigned int (*t1_delay)(struct gpib_board *board, unsigned int nano_sec); + int (*t1_delay)(struct gpib_board *board, unsigned int nano_sec); /* go to local mode */ void (*return_to_local)(struct gpib_board *board); /* board does not support 7 bit eos comparisons */ diff --git a/drivers/staging/gpib/include/nec7210.h b/drivers/staging/gpib/include/nec7210.h index 6c49283bd139..069896456230 100644 --- a/drivers/staging/gpib/include/nec7210.h +++ b/drivers/staging/gpib/include/nec7210.h @@ -108,8 +108,8 @@ void nec7210_parallel_poll_response(struct gpib_board *board, struct nec7210_priv *priv, int ist); uint8_t nec7210_serial_poll_status(struct gpib_board *board, struct nec7210_priv *priv); -unsigned int nec7210_t1_delay(struct gpib_board *board, - struct nec7210_priv *priv, unsigned int nano_sec); +int nec7210_t1_delay(struct gpib_board *board, + struct nec7210_priv *priv, unsigned int nano_sec); void nec7210_return_to_local(const struct gpib_board *board, struct nec7210_priv *priv); // utility functions diff --git a/drivers/staging/gpib/ines/ines.h b/drivers/staging/gpib/ines/ines.h index b17475aed046..ff27f055a0ff 100644 --- a/drivers/staging/gpib/ines/ines.h +++ b/drivers/staging/gpib/ines/ines.h @@ -60,7 +60,7 @@ void ines_parallel_poll_response(struct gpib_board *board, int ist); void ines_serial_poll_response(struct gpib_board *board, uint8_t status); uint8_t ines_serial_poll_status(struct gpib_board *board); int ines_line_status(const struct gpib_board *board); -unsigned int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec); +int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec); void ines_return_to_local(struct gpib_board *board); // interrupt service routines diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index 983bb88a4376..d93eb05dab90 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -65,7 +65,7 @@ void ines_set_xfer_counter(struct ines_priv *priv, unsigned int count) ines_outb(priv, count & 0xff, XFER_COUNT_LOWER); } -unsigned int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec) +int ines_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct ines_priv *ines_priv = board->private_data; struct nec7210_priv *nec_priv = &ines_priv->nec7210_priv; diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 282d7387574e..faf96e9cc4a1 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -1044,7 +1044,7 @@ static uint8_t usb_gpib_serial_poll_status(struct gpib_board *board) /* t1_delay */ -static unsigned int usb_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int usb_gpib_t1_delay(struct gpib_board *board, unsigned int nano_sec) { return 0; } diff --git a/drivers/staging/gpib/nec7210/nec7210.c b/drivers/staging/gpib/nec7210/nec7210.c index 773495cde9d8..846c0a3fa1dc 100644 --- a/drivers/staging/gpib/nec7210/nec7210.c +++ b/drivers/staging/gpib/nec7210/nec7210.c @@ -373,8 +373,8 @@ void nec7210_release_rfd_holdoff(struct gpib_board *board, struct nec7210_priv * } EXPORT_SYMBOL(nec7210_release_rfd_holdoff); -unsigned int nec7210_t1_delay(struct gpib_board *board, struct nec7210_priv *priv, - unsigned int nano_sec) +int nec7210_t1_delay(struct gpib_board *board, struct nec7210_priv *priv, + unsigned int nano_sec) { unsigned int retval; diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index fdc8077d646e..14f7049a8e5e 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -1616,7 +1616,7 @@ static int ni_usb_setup_t1_delay(struct ni_usb_register *reg, unsigned int nano_ return i; } -static unsigned int ni_usb_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int ni_usb_t1_delay(struct gpib_board *board, unsigned int nano_sec) { int retval; struct ni_usb_priv *ni_priv = board->private_data; @@ -1633,7 +1633,7 @@ static unsigned int ni_usb_t1_delay(struct gpib_board *board, unsigned int nano_ retval = ni_usb_write_registers(ni_priv, writes, i, &ibsta); if (retval < 0) { dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval); - return -1; //FIXME should change return type to int for error reporting + return retval; } board->t1_nano_sec = actual_ns; ni_usb_soft_update_status(board, ibsta, 0); diff --git a/drivers/staging/gpib/pc2/pc2_gpib.c b/drivers/staging/gpib/pc2/pc2_gpib.c index d8e0733d8ab0..96d3c09f2273 100644 --- a/drivers/staging/gpib/pc2/pc2_gpib.c +++ b/drivers/staging/gpib/pc2/pc2_gpib.c @@ -218,7 +218,7 @@ static uint8_t pc2_serial_poll_status(struct gpib_board *board) return nec7210_serial_poll_status(board, &priv->nec7210_priv); } -static unsigned int pc2_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int pc2_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct pc2_priv *priv = board->private_data; diff --git a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c index bc99526f2d0c..c35b084b6fd0 100644 --- a/drivers/staging/gpib/tnt4882/tnt4882_gpib.c +++ b/drivers/staging/gpib/tnt4882/tnt4882_gpib.c @@ -178,7 +178,7 @@ static int tnt4882_line_status(const struct gpib_board *board) return status; } -static unsigned int tnt4882_t1_delay(struct gpib_board *board, unsigned int nano_sec) +static int tnt4882_t1_delay(struct gpib_board *board, unsigned int nano_sec) { struct tnt4882_priv *tnt_priv = board->private_data; struct nec7210_priv *nec_priv = &tnt_priv->nec7210_priv; From 3691b585b909d4382f2c96da6900d19b613e9521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Sat, 22 Feb 2025 19:03:45 +0000 Subject: [PATCH 1468/2157] staging: rtl8723bs: Remove some unused functions, macros, and structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove some functions, macros, and structs that have not been used since they were introduced in commit 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver"). Signed-off-by: 谢致邦 (XIE Zhibang) Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/tencent_C69BFF8D3EC7B66BFCF0063ED3DEF4BC590A@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/hal_com.c | 3 + .../staging/rtl8723bs/include/osdep_intf.h | 27 -- .../staging/rtl8723bs/include/rtl8723b_hal.h | 1 - drivers/staging/rtl8723bs/include/rtw_io.h | 92 ----- drivers/staging/rtl8723bs/include/rtw_mp.h | 341 ------------------ drivers/staging/rtl8723bs/os_dep/os_intfs.c | 2 - 6 files changed, 3 insertions(+), 463 deletions(-) delete mode 100644 drivers/staging/rtl8723bs/include/rtw_mp.h diff --git a/drivers/staging/rtl8723bs/hal/hal_com.c b/drivers/staging/rtl8723bs/hal/hal_com.c index b41ec89932af..1213a91cffff 100644 --- a/drivers/staging/rtl8723bs/hal/hal_com.c +++ b/drivers/staging/rtl8723bs/hal/hal_com.c @@ -884,6 +884,9 @@ static u32 Array_kfreemap[] = { 0xfc, 0x0, }; +#define REG_RF_BB_GAIN_OFFSET 0x7f +//#define RF_GAIN_OFFSET_MASK 0xfffff + void rtw_bb_rf_gain_offset(struct adapter *padapter) { u8 value = padapter->eeprompriv.EEPROMRFGainOffset; diff --git a/drivers/staging/rtl8723bs/include/osdep_intf.h b/drivers/staging/rtl8723bs/include/osdep_intf.h index 73199be78139..83a25598e962 100644 --- a/drivers/staging/rtl8723bs/include/osdep_intf.h +++ b/drivers/staging/rtl8723bs/include/osdep_intf.h @@ -8,33 +8,6 @@ #ifndef __OSDEP_INTF_H_ #define __OSDEP_INTF_H_ - -struct intf_priv { - - u8 *intf_dev; - u32 max_iosz; /* USB2.0: 128, USB1.1: 64, SDIO:64 */ - u32 max_xmitsz; /* USB2.0: unlimited, SDIO:512 */ - u32 max_recvsz; /* USB2.0: unlimited, SDIO:512 */ - - volatile u8 *io_rwmem; - volatile u8 *allocated_io_rwmem; - u32 io_wsz; /* unit: 4bytes */ - u32 io_rsz;/* unit: 4bytes */ - u8 intf_status; - - void (*_bus_io)(u8 *priv); - -/* -Under Sync. IRP (SDIO/USB) -A protection mechanism is necessary for the io_rwmem(read/write protocol) - -Under Async. IRP (SDIO/USB) -The protection mechanism is through the pending queue. -*/ - - struct mutex ioctl_mutex; -}; - struct dvobj_priv *devobj_init(void); void devobj_deinit(struct dvobj_priv *pdvobj); diff --git a/drivers/staging/rtl8723bs/include/rtl8723b_hal.h b/drivers/staging/rtl8723bs/include/rtl8723b_hal.h index e6d6e9de5474..a4a14474c35d 100644 --- a/drivers/staging/rtl8723bs/include/rtl8723b_hal.h +++ b/drivers/staging/rtl8723bs/include/rtl8723b_hal.h @@ -15,7 +15,6 @@ #include "rtl8723b_recv.h" #include "rtl8723b_xmit.h" #include "rtl8723b_cmd.h" -#include "rtw_mp.h" #include "hal_pwr_seq.h" #include "Hal8192CPhyReg.h" #include "hal_phy_cfg.h" diff --git a/drivers/staging/rtl8723bs/include/rtw_io.h b/drivers/staging/rtl8723bs/include/rtw_io.h index 0ee87be6dc4f..adf1de4d7924 100644 --- a/drivers/staging/rtl8723bs/include/rtw_io.h +++ b/drivers/staging/rtl8723bs/include/rtw_io.h @@ -8,16 +8,7 @@ #ifndef _RTW_IO_H_ #define _RTW_IO_H_ -/* - For prompt mode accessing, caller shall free io_req - Otherwise, io_handler will free io_req -*/ - -/* below is for the intf_option bit definition... */ - -struct intf_priv; struct intf_hdl; -struct io_queue; struct _io_ops { u8 (*_read8)(struct intf_hdl *pintfhdl, u32 addr); @@ -36,8 +27,6 @@ struct _io_ops { void (*_read_mem)(struct intf_hdl *pintfhdl, u32 addr, u32 cnt, u8 *pmem); void (*_write_mem)(struct intf_hdl *pintfhdl, u32 addr, u32 cnt, u8 *pmem); - void (*_sync_irp_protocol_rw)(struct io_queue *pio_q); - u32 (*_read_interrupt)(struct intf_hdl *pintfhdl, u32 addr); u32 (*_read_port)(struct intf_hdl *pintfhdl, u32 addr, u32 cnt, u8 *pmem); @@ -49,18 +38,6 @@ struct _io_ops { void (*_write_port_cancel)(struct intf_hdl *pintfhdl); }; -struct io_req { - struct list_head list; - u32 addr; - volatile u32 val; - u32 command; - u32 status; - u8 *pbuf; - - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt); - u8 *cnxt; -}; - struct intf_hdl { struct adapter *padapter; struct dvobj_priv *pintf_dev;/* pointer to &(padapter->dvobjpriv); */ @@ -74,21 +51,6 @@ struct intf_hdl { int rtw_inc_and_chk_continual_io_error(struct dvobj_priv *dvobj); void rtw_reset_continual_io_error(struct dvobj_priv *dvobj); -/* -Below is the data structure used by _io_handler - -*/ - -struct io_queue { - spinlock_t lock; - struct list_head free_ioreqs; - struct list_head pending; /* The io_req list that will be served in the single protocol read/write. */ - struct list_head processing; - u8 *free_ioreqs_buf; /* 4-byte aligned */ - u8 *pallocated_free_ioreqs_buf; - struct intf_hdl intf; -}; - struct io_priv { struct adapter *padapter; @@ -97,20 +59,6 @@ struct io_priv { }; -extern uint ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue); -extern void sync_ioreq_enqueue(struct io_req *preq, struct io_queue *ioqueue); -extern uint sync_ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue); - - -extern uint free_ioreq(struct io_req *preq, struct io_queue *pio_queue); -extern struct io_req *alloc_ioreq(struct io_queue *pio_q); - -extern uint register_intf_hdl(u8 *dev, struct intf_hdl *pintfhdl); -extern void unregister_intf_hdl(struct intf_hdl *pintfhdl); - -extern void _rtw_attrib_read(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); -extern void _rtw_attrib_write(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); - extern u8 rtw_read8(struct adapter *adapter, u32 addr); extern u16 rtw_read16(struct adapter *adapter, u32 addr); extern u32 rtw_read32(struct adapter *adapter, u32 addr); @@ -121,46 +69,6 @@ extern int rtw_write32(struct adapter *adapter, u32 addr, u32 val); extern u32 rtw_write_port(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); -extern void rtw_write_scsi(struct adapter *adapter, u32 cnt, u8 *pmem); - -/* ioreq */ -extern void ioreq_read8(struct adapter *adapter, u32 addr, u8 *pval); -extern void ioreq_read16(struct adapter *adapter, u32 addr, u16 *pval); -extern void ioreq_read32(struct adapter *adapter, u32 addr, u32 *pval); -extern void ioreq_write8(struct adapter *adapter, u32 addr, u8 val); -extern void ioreq_write16(struct adapter *adapter, u32 addr, u16 val); -extern void ioreq_write32(struct adapter *adapter, u32 addr, u32 val); - - -extern uint async_read8(struct adapter *adapter, u32 addr, u8 *pbuff, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); -extern uint async_read16(struct adapter *adapter, u32 addr, u8 *pbuff, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); -extern uint async_read32(struct adapter *adapter, u32 addr, u8 *pbuff, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); - -extern void async_read_mem(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); -extern void async_read_port(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); - -extern void async_write8(struct adapter *adapter, u32 addr, u8 val, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); -extern void async_write16(struct adapter *adapter, u32 addr, u16 val, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); -extern void async_write32(struct adapter *adapter, u32 addr, u32 val, - void (*_async_io_callback)(struct adapter *padater, struct io_req *pio_req, u8 *cnxt), u8 *cnxt); - -extern void async_write_mem(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); -extern void async_write_port(struct adapter *adapter, u32 addr, u32 cnt, u8 *pmem); - - int rtw_init_io_priv(struct adapter *padapter, void (*set_intf_ops)(struct adapter *padapter, struct _io_ops *pops)); - -extern uint alloc_io_queue(struct adapter *adapter); -extern void free_io_queue(struct adapter *adapter); -extern void async_bus_io(struct io_queue *pio_q); -extern void bus_sync_io(struct io_queue *pio_q); -extern u32 _ioreq2rwmem(struct io_queue *pio_q); -extern void dev_power_down(struct adapter *Adapter, u8 bpwrup); - #endif /* _RTL8711_IO_H_ */ diff --git a/drivers/staging/rtl8723bs/include/rtw_mp.h b/drivers/staging/rtl8723bs/include/rtw_mp.h deleted file mode 100644 index 5a1cbd2ed851..000000000000 --- a/drivers/staging/rtl8723bs/include/rtw_mp.h +++ /dev/null @@ -1,341 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/****************************************************************************** - * - * Copyright(c) 2007 - 2011 Realtek Corporation. All rights reserved. - * - ******************************************************************************/ -#ifndef _RTW_MP_H_ -#define _RTW_MP_H_ - -#define MAX_MP_XMITBUF_SZ 2048 - -struct mp_xmit_frame { - struct list_head list; - - struct pkt_attrib attrib; - - struct sk_buff *pkt; - - int frame_tag; - - struct adapter *padapter; - - uint mem[(MAX_MP_XMITBUF_SZ >> 2)]; -}; - -struct mp_wiparam { - u32 bcompleted; - u32 act_type; - u32 io_offset; - u32 io_value; -}; - -struct mp_tx { - u8 stop; - u32 count, sended; - u8 payload; - struct pkt_attrib attrib; - /* struct tx_desc desc; */ - /* u8 resvdtx[7]; */ - u8 desc[TXDESC_SIZE]; - u8 *pallocated_buf; - u8 *buf; - u32 buf_size, write_size; - void *PktTxThread; -}; - -#define MP_MAX_LINES 1000 -#define MP_MAX_LINES_BYTES 256 - -typedef void (*MPT_WORK_ITEM_HANDLER)(void *Adapter); -struct mpt_context { - /* Indicate if we have started Mass Production Test. */ - bool bMassProdTest; - - /* Indicate if the driver is unloading or unloaded. */ - bool bMptDrvUnload; - - struct timer_list MPh2c_timeout_timer; -/* Event used to sync H2c for BT control */ - - bool MptH2cRspEvent; - bool MptBtC2hEvent; - bool bMPh2c_timeout; - - /* 8190 PCI does not support NDIS_WORK_ITEM. */ - /* Work Item for Mass Production Test. */ - /* NDIS_WORK_ITEM MptWorkItem; */ -/* RT_WORK_ITEM MptWorkItem; */ - /* Event used to sync the case unloading driver and MptWorkItem is still in progress. */ -/* NDIS_EVENT MptWorkItemEvent; */ - /* To protect the following variables. */ -/* NDIS_SPIN_LOCK MptWorkItemSpinLock; */ - /* Indicate a MptWorkItem is scheduled and not yet finished. */ - bool bMptWorkItemInProgress; - /* An instance which implements function and context of MptWorkItem. */ - MPT_WORK_ITEM_HANDLER CurrMptAct; - - /* 1 =Start, 0 =Stop from UI. */ - u32 MptTestStart; - /* _TEST_MODE, defined in MPT_Req2.h */ - u32 MptTestItem; - /* Variable needed in each implementation of CurrMptAct. */ - u32 MptActType; /* Type of action performed in CurrMptAct. */ - /* The Offset of IO operation is depend of MptActType. */ - u32 MptIoOffset; - /* The Value of IO operation is depend of MptActType. */ - u32 MptIoValue; - /* The RfPath of IO operation is depend of MptActType. */ - u32 MptRfPath; - - enum wireless_mode MptWirelessModeToSw; /* Wireless mode to switch. */ - u8 MptChannelToSw; /* Channel to switch. */ - u8 MptInitGainToSet; /* Initial gain to set. */ - u32 MptBandWidth; /* bandwidth to switch. */ - u32 MptRateIndex; /* rate index. */ - /* Register value kept for Single Carrier Tx test. */ - u8 btMpCckTxPower; - /* Register value kept for Single Carrier Tx test. */ - u8 btMpOfdmTxPower; - /* For MP Tx Power index */ - u8 TxPwrLevel[2]; /* rf-A, rf-B */ - u32 RegTxPwrLimit; - /* Content of RCR Register for Mass Production Test. */ - u32 MptRCR; - /* true if we only receive packets with specific pattern. */ - bool bMptFilterPattern; - /* Rx OK count, statistics used in Mass Production Test. */ - u32 MptRxOkCnt; - /* Rx CRC32 error count, statistics used in Mass Production Test. */ - u32 MptRxCrcErrCnt; - - bool bCckContTx; /* true if we are in CCK Continuous Tx test. */ - bool bOfdmContTx; /* true if we are in OFDM Continuous Tx test. */ - bool bStartContTx; /* true if we have start Continuous Tx test. */ - /* true if we are in Single Carrier Tx test. */ - bool bSingleCarrier; - /* true if we are in Carrier Suppression Tx Test. */ - bool bCarrierSuppression; - /* true if we are in Single Tone Tx test. */ - bool bSingleTone; - - /* ACK counter asked by K.Y.. */ - bool bMptEnableAckCounter; - u32 MptAckCounter; - - /* SD3 Willis For 8192S to save 1T/2T RF table for ACUT Only fro ACUT delete later ~~~! */ - /* s8 BufOfLines[2][MAX_LINES_HWCONFIG_TXT][MAX_BYTES_LINE_HWCONFIG_TXT]; */ - /* s8 BufOfLines[2][MP_MAX_LINES][MP_MAX_LINES_BYTES]; */ - /* s32 RfReadLine[2]; */ - - u8 APK_bound[2]; /* for APK path A/path B */ - bool bMptIndexEven; - - u8 backup0xc50; - u8 backup0xc58; - u8 backup0xc30; - u8 backup0x52_RF_A; - u8 backup0x52_RF_B; - - u32 backup0x58_RF_A; - u32 backup0x58_RF_B; - - u8 h2cReqNum; - u8 c2hBuf[32]; - - u8 btInBuf[100]; - u32 mptOutLen; - u8 mptOutBuf[100]; - -}; -/* endif */ - -/* define RTPRIV_IOCTL_MP (SIOCIWFIRSTPRIV + 0x17) */ -enum { - WRITE_REG = 1, - READ_REG, - WRITE_RF, - READ_RF, - MP_START, - MP_STOP, - MP_RATE, - MP_CHANNEL, - MP_BANDWIDTH, - MP_TXPOWER, - MP_ANT_TX, - MP_ANT_RX, - MP_CTX, - MP_QUERY, - MP_ARX, - MP_PSD, - MP_PWRTRK, - MP_THER, - MP_IOCTL, - EFUSE_GET, - EFUSE_SET, - MP_RESET_STATS, - MP_DUMP, - MP_PHYPARA, - MP_SetRFPathSwh, - MP_QueryDrvStats, - MP_SetBT, - CTA_TEST, - MP_DISABLE_BT_COEXIST, - MP_PwrCtlDM, - MP_NULL, - MP_GET_TXPOWER_INX, -}; - -struct mp_priv { - struct adapter *papdater; - - /* Testing Flag */ - u32 mode;/* 0 for normal type packet, 1 for loopback packet (16bytes TXCMD) */ - - u32 prev_fw_state; - - /* OID cmd handler */ - struct mp_wiparam workparam; -/* u8 act_in_progress; */ - - /* Tx Section */ - u8 TID; - u32 tx_pktcount; - u32 pktInterval; - struct mp_tx tx; - - /* Rx Section */ - u32 rx_bssidpktcount; - u32 rx_pktcount; - u32 rx_pktcount_filter_out; - u32 rx_crcerrpktcount; - u32 rx_pktloss; - bool rx_bindicatePkt; - struct recv_stat rxstat; - - /* RF/BB relative */ - u8 channel; - u8 bandwidth; - u8 prime_channel_offset; - u8 txpoweridx; - u8 txpoweridx_b; - u8 rateidx; - u32 preamble; -/* u8 modem; */ - u32 CrystalCap; -/* u32 curr_crystalcap; */ - - u16 antenna_tx; - u16 antenna_rx; -/* u8 curr_rfpath; */ - - u8 check_mp_pkt; - - u8 bSetTxPower; -/* uint ForcedDataRate; */ - u8 mp_dm; - u8 mac_filter[ETH_ALEN]; - u8 bmac_filter; - - struct wlan_network mp_network; - NDIS_802_11_MAC_ADDRESS network_macaddr; - - u8 *pallocated_mp_xmitframe_buf; - u8 *pmp_xmtframe_buf; - struct __queue free_mp_xmitqueue; - u32 free_mp_xmitframe_cnt; - bool bSetRxBssid; - bool bTxBufCkFail; - - struct mpt_context MptCtx; - - u8 *TXradomBuffer; -}; - -/* Hardware Registers */ -extern u8 mpdatarate[NumRates]; - -#define MAX_TX_PWR_INDEX_N_MODE 64 /* 0x3F */ - -#define REG_RF_BB_GAIN_OFFSET 0x7f -#define RF_GAIN_OFFSET_MASK 0xfffff - -/* */ -/* struct mp_xmit_frame *alloc_mp_xmitframe(struct mp_priv *pmp_priv); */ -/* int free_mp_xmitframe(struct xmit_priv *pxmitpriv, struct mp_xmit_frame *pmp_xmitframe); */ - -s32 init_mp_priv(struct adapter *padapter); -void free_mp_priv(struct mp_priv *pmp_priv); -s32 MPT_InitializeAdapter(struct adapter *padapter, u8 Channel); -void MPT_DeInitAdapter(struct adapter *padapter); -s32 mp_start_test(struct adapter *padapter); -void mp_stop_test(struct adapter *padapter); - -u32 _read_rfreg(struct adapter *padapter, u8 rfpath, u32 addr, u32 bitmask); -void _write_rfreg(struct adapter *padapter, u8 rfpath, u32 addr, u32 bitmask, u32 val); - -u32 read_macreg(struct adapter *padapter, u32 addr, u32 sz); -void write_macreg(struct adapter *padapter, u32 addr, u32 val, u32 sz); - -void SetChannel(struct adapter *padapter); -void SetBandwidth(struct adapter *padapter); -int SetTxPower(struct adapter *padapter); -void SetAntennaPathPower(struct adapter *padapter); -void SetDataRate(struct adapter *padapter); - -void SetAntenna(struct adapter *padapter); - -s32 SetThermalMeter(struct adapter *padapter, u8 target_ther); -void GetThermalMeter(struct adapter *padapter, u8 *value); - -void SetContinuousTx(struct adapter *padapter, u8 bStart); -void SetSingleCarrierTx(struct adapter *padapter, u8 bStart); -void SetSingleToneTx(struct adapter *padapter, u8 bStart); -void SetCarrierSuppressionTx(struct adapter *padapter, u8 bStart); -void PhySetTxPowerLevel(struct adapter *padapter); - -void fill_txdesc_for_mp(struct adapter *padapter, u8 *ptxdesc); -void SetPacketTx(struct adapter *padapter); -void SetPacketRx(struct adapter *padapter, u8 bStartRx); - -void ResetPhyRxPktCount(struct adapter *padapter); -u32 GetPhyRxPktReceived(struct adapter *padapter); -u32 GetPhyRxPktCRC32Error(struct adapter *padapter); - -s32 SetPowerTracking(struct adapter *padapter, u8 enable); -void GetPowerTracking(struct adapter *padapter, u8 *enable); - -u32 mp_query_psd(struct adapter *padapter, u8 *data); - -void Hal_SetAntenna(struct adapter *padapter); -void Hal_SetBandwidth(struct adapter *padapter); - -void Hal_SetTxPower(struct adapter *padapter); -void Hal_SetCarrierSuppressionTx(struct adapter *padapter, u8 bStart); -void Hal_SetSingleToneTx(struct adapter *padapter, u8 bStart); -void Hal_SetSingleCarrierTx(struct adapter *padapter, u8 bStart); -void Hal_SetContinuousTx(struct adapter *padapter, u8 bStart); - -void Hal_SetDataRate(struct adapter *padapter); -void Hal_SetChannel(struct adapter *padapter); -void Hal_SetAntennaPathPower(struct adapter *padapter); -s32 Hal_SetThermalMeter(struct adapter *padapter, u8 target_ther); -s32 Hal_SetPowerTracking(struct adapter *padapter, u8 enable); -void Hal_GetPowerTracking(struct adapter *padapter, u8 *enable); -void Hal_GetThermalMeter(struct adapter *padapter, u8 *value); -void Hal_mpt_SwitchRfSetting(struct adapter *padapter); -void Hal_MPT_CCKTxPowerAdjust(struct adapter *Adapter, bool bInCH14); -void Hal_MPT_CCKTxPowerAdjustbyIndex(struct adapter *padapter, bool beven); -void Hal_SetCCKTxPower(struct adapter *padapter, u8 *TxPower); -void Hal_SetOFDMTxPower(struct adapter *padapter, u8 *TxPower); -void Hal_TriggerRFThermalMeter(struct adapter *padapter); -u8 Hal_ReadRFThermalMeter(struct adapter *padapter); -void Hal_SetCCKContinuousTx(struct adapter *padapter, u8 bStart); -void Hal_SetOFDMContinuousTx(struct adapter *padapter, u8 bStart); -void Hal_ProSetCrystalCap(struct adapter *padapter, u32 CrystalCapVal); -void MP_PHY_SetRFPathSwitch(struct adapter *padapter, bool bMain); -u32 mpt_ProQueryCalTxPower(struct adapter *padapter, u8 RfPath); -void MPT_PwrCtlDM(struct adapter *padapter, u32 bstart); -u8 MptToMgntRate(u32 MptRateIdx); - -#endif /* _RTW_MP_H_ */ diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c index 738a601c55bb..de48c3454ab3 100644 --- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c +++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c @@ -724,8 +724,6 @@ u8 rtw_free_drv_sw(struct adapter *padapter) rtw_free_mlme_priv(&padapter->mlmepriv); - /* free_io_queue(padapter); */ - _rtw_free_xmit_priv(&padapter->xmitpriv); _rtw_free_sta_priv(&padapter->stapriv); /* will free bcmc_stainfo here */ From b2a9a6a26b7e954297e51822e396572026480bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Sat, 22 Feb 2025 19:36:17 +0000 Subject: [PATCH 1469/2157] staging: rtl8723bs: select CONFIG_CRYPTO_LIB_AES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following issue: ERROR: modpost: "aes_expandkey" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! ERROR: modpost: "aes_encrypt" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! Fixes: 7d40753d8820 ("staging: rtl8723bs: use in-kernel aes encryption in OMAC1 routines") Fixes: 3d3a170f6d80 ("staging: rtl8723bs: use in-kernel aes encryption") Signed-off-by: 谢致邦 (XIE Zhibang) Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/tencent_0BDDF3A721708D16A2E7C3DAFF0FEC79A105@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/Kconfig b/drivers/staging/rtl8723bs/Kconfig index 8d48c61961a6..353e6ee2c145 100644 --- a/drivers/staging/rtl8723bs/Kconfig +++ b/drivers/staging/rtl8723bs/Kconfig @@ -4,6 +4,7 @@ config RTL8723BS depends on WLAN && MMC && CFG80211 depends on m select CRYPTO + select CRYPTO_LIB_AES select CRYPTO_LIB_ARC4 help This option enables support for RTL8723BS SDIO drivers, such as From 96622d58f50b8f364bb11d48f2d41e3348020b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 11 Mar 2025 10:40:49 +0100 Subject: [PATCH 1470/2157] staging: vchiq_arm: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping looks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. There are still a few users of %pK left, but these use it through seq_file, for which its usage is safe. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20250311-restricted-pointers-vchiq_arm-v2-1-a14e1c0681fc@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_arm.c | 6 +- .../interface/vchiq_arm/vchiq_core.c | 64 +++++++++---------- .../interface/vchiq_arm/vchiq_dev.c | 14 ++-- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index a4e83e5d619b..2db219e4786f 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -271,7 +271,7 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state return -ENXIO; } - dev_dbg(&pdev->dev, "arm: vchiq_init - done (slots %pK, phys %pad)\n", + dev_dbg(&pdev->dev, "arm: vchiq_init - done (slots %p, phys %pad)\n", vchiq_slot_zero, &slot_phys); mutex_init(&drv_mgmt->connected_mutex); @@ -368,7 +368,7 @@ void free_bulk_waiter(struct vchiq_instance *instance) &instance->bulk_waiter_list, list) { list_del(&waiter->list); dev_dbg(instance->state->dev, - "arm: bulk_waiter - cleaned up %pK for pid %d\n", + "arm: bulk_waiter - cleaned up %p for pid %d\n", waiter, waiter->pid); kfree(waiter); } @@ -622,7 +622,7 @@ vchiq_blocking_bulk_transfer(struct vchiq_instance *instance, unsigned int handl mutex_lock(&instance->bulk_waiter_list_mutex); list_add(&waiter->list, &instance->bulk_waiter_list); mutex_unlock(&instance->bulk_waiter_list_mutex); - dev_dbg(instance->state->dev, "arm: saved bulk_waiter %pK for pid %d\n", + dev_dbg(instance->state->dev, "arm: saved bulk_waiter %p for pid %d\n", waiter, current->pid); } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 8d5795db4f39..e2cac0898b8f 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -470,7 +470,7 @@ make_service_callback(struct vchiq_service *service, enum vchiq_reason reason, cb_userdata = bulk->cb_userdata; } - dev_dbg(service->state->dev, "core: %d: callback:%d (%s, %pK, %pK %pK)\n", + dev_dbg(service->state->dev, "core: %d: callback:%d (%s, %p, %p %p)\n", service->state->id, service->localport, reason_names[reason], header, cb_data, cb_userdata); status = service->base.callback(service->instance, reason, header, service->handle, @@ -778,7 +778,7 @@ process_free_data_message(struct vchiq_state *state, u32 *service_found, complete("a->quota_event); } else if (count == 0) { dev_err(state->dev, - "core: service %d message_use_count=%d (header %pK, msgid %x, header->msgid %x, header->size %x)\n", + "core: service %d message_use_count=%d (header %p, msgid %x, header->msgid %x, header->size %x)\n", port, quota->message_use_count, header, msgid, header->msgid, header->size); WARN(1, "invalid message use count\n"); @@ -799,11 +799,11 @@ process_free_data_message(struct vchiq_state *state, u32 *service_found, * it has dropped below its quota */ complete("a->quota_event); - dev_dbg(state->dev, "core: %d: pfq:%d %x@%pK - slot_use->%d\n", + dev_dbg(state->dev, "core: %d: pfq:%d %x@%p - slot_use->%d\n", state->id, port, header->size, header, count - 1); } else { dev_err(state->dev, - "core: service %d slot_use_count=%d (header %pK, msgid %x, header->msgid %x, header->size %x)\n", + "core: service %d slot_use_count=%d (header %p, msgid %x, header->msgid %x, header->size %x)\n", port, count, header, msgid, header->msgid, header->size); WARN(1, "bad slot use count\n"); } @@ -845,7 +845,7 @@ process_free_queue(struct vchiq_state *state, u32 *service_found, */ rmb(); - dev_dbg(state->dev, "core: %d: pfq %d=%pK %x %x\n", + dev_dbg(state->dev, "core: %d: pfq %d=%p %x %x\n", state->id, slot_index, data, local->slot_queue_recycle, slot_queue_available); @@ -868,7 +868,7 @@ process_free_queue(struct vchiq_state *state, u32 *service_found, pos += calc_stride(header->size); if (pos > VCHIQ_SLOT_SIZE) { dev_err(state->dev, - "core: pfq - pos %x: header %pK, msgid %x, header->msgid %x, header->size %x\n", + "core: pfq - pos %x: header %p, msgid %x, header->msgid %x, header->size %x\n", pos, header, msgid, header->msgid, header->size); WARN(1, "invalid slot position\n"); } @@ -1060,7 +1060,7 @@ queue_message(struct vchiq_state *state, struct vchiq_service *service, int tx_end_index; int slot_use_count; - dev_dbg(state->dev, "core: %d: qm %s@%pK,%zx (%d->%d)\n", + dev_dbg(state->dev, "core: %d: qm %s@%p,%zx (%d->%d)\n", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); @@ -1117,7 +1117,7 @@ queue_message(struct vchiq_state *state, struct vchiq_service *service, VCHIQ_SERVICE_STATS_INC(service, ctrl_tx_count); VCHIQ_SERVICE_STATS_ADD(service, ctrl_tx_bytes, size); } else { - dev_dbg(state->dev, "core: %d: qm %s@%pK,%zx (%d->%d)\n", + dev_dbg(state->dev, "core: %d: qm %s@%p,%zx (%d->%d)\n", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); if (size != 0) { @@ -1204,7 +1204,7 @@ queue_message_sync(struct vchiq_state *state, struct vchiq_service *service, state->id, oldmsgid); } - dev_dbg(state->dev, "sync: %d: qms %s@%pK,%x (%d->%d)\n", + dev_dbg(state->dev, "sync: %d: qms %s@%p,%x (%d->%d)\n", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); @@ -1539,7 +1539,7 @@ create_pagelist(struct vchiq_instance *instance, struct vchiq_bulk *bulk) pagelist = dma_alloc_coherent(instance->state->dev, pagelist_size, &dma_addr, GFP_KERNEL); - dev_dbg(instance->state->dev, "arm: %pK\n", pagelist); + dev_dbg(instance->state->dev, "arm: %p\n", pagelist); if (!pagelist) return NULL; @@ -1692,7 +1692,7 @@ free_pagelist(struct vchiq_instance *instance, struct vchiq_pagelist_info *pagel unsigned int num_pages = pagelistinfo->num_pages; unsigned int cache_line_size; - dev_dbg(instance->state->dev, "arm: %pK, %d\n", pagelistinfo->pagelist, actual); + dev_dbg(instance->state->dev, "arm: %p, %d\n", pagelistinfo->pagelist, actual); drv_mgmt = dev_get_drvdata(instance->state->dev); @@ -1849,7 +1849,7 @@ parse_open(struct vchiq_state *state, struct vchiq_header *header) payload = (struct vchiq_open_payload *)header->data; fourcc = payload->fourcc; - dev_dbg(state->dev, "core: %d: prs OPEN@%pK (%d->'%p4cc')\n", + dev_dbg(state->dev, "core: %d: prs OPEN@%p (%d->'%p4cc')\n", state->id, header, localport, &fourcc); service = get_listening_service(state, fourcc); @@ -1976,14 +1976,14 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) service = get_connected_service(state, remoteport); if (service) dev_warn(state->dev, - "core: %d: prs %s@%pK (%d->%d) - found connected service %d\n", + "core: %d: prs %s@%p (%d->%d) - found connected service %d\n", state->id, msg_type_str(type), header, remoteport, localport, service->localport); } if (!service) { dev_err(state->dev, - "core: %d: prs %s@%pK (%d->%d) - invalid/closed service %d\n", + "core: %d: prs %s@%p (%d->%d) - invalid/closed service %d\n", state->id, msg_type_str(type), header, remoteport, localport, localport); goto skip_message; @@ -2003,7 +2003,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) if (((unsigned long)header & VCHIQ_SLOT_MASK) + calc_stride(size) > VCHIQ_SLOT_SIZE) { - dev_err(state->dev, "core: header %pK (msgid %x) - size %x too big for slot\n", + dev_err(state->dev, "core: header %p (msgid %x) - size %x too big for slot\n", header, (unsigned int)msgid, (unsigned int)size); WARN(1, "oversized for slot\n"); } @@ -2022,7 +2022,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) service->peer_version = payload->version; } dev_dbg(state->dev, - "core: %d: prs OPENACK@%pK,%x (%d->%d) v:%d\n", + "core: %d: prs OPENACK@%p,%x (%d->%d) v:%d\n", state->id, header, size, remoteport, localport, service->peer_version); if (service->srvstate == VCHIQ_SRVSTATE_OPENING) { @@ -2037,7 +2037,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) case VCHIQ_MSG_CLOSE: WARN_ON(size); /* There should be no data */ - dev_dbg(state->dev, "core: %d: prs CLOSE@%pK (%d->%d)\n", + dev_dbg(state->dev, "core: %d: prs CLOSE@%p (%d->%d)\n", state->id, header, remoteport, localport); mark_service_closing_internal(service, 1); @@ -2049,7 +2049,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) &service->base.fourcc, service->localport, service->remoteport); break; case VCHIQ_MSG_DATA: - dev_dbg(state->dev, "core: %d: prs DATA@%pK,%x (%d->%d)\n", + dev_dbg(state->dev, "core: %d: prs DATA@%p,%x (%d->%d)\n", state->id, header, size, remoteport, localport); if ((service->remoteport == remoteport) && @@ -2069,7 +2069,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) } break; case VCHIQ_MSG_CONNECT: - dev_dbg(state->dev, "core: %d: prs CONNECT@%pK\n", + dev_dbg(state->dev, "core: %d: prs CONNECT@%p\n", state->id, header); state->version_common = ((struct vchiq_slot_zero *) state->slot_data)->version; @@ -2102,7 +2102,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) if ((int)(queue->remote_insert - queue->local_insert) >= 0) { dev_err(state->dev, - "core: %d: prs %s@%pK (%d->%d) unexpected (ri=%d,li=%d)\n", + "core: %d: prs %s@%p (%d->%d) unexpected (ri=%d,li=%d)\n", state->id, msg_type_str(type), header, remoteport, localport, queue->remote_insert, queue->local_insert); mutex_unlock(&service->bulk_mutex); @@ -2120,7 +2120,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) bulk->actual = *(int *)header->data; queue->remote_insert++; - dev_dbg(state->dev, "core: %d: prs %s@%pK (%d->%d) %x@%pad\n", + dev_dbg(state->dev, "core: %d: prs %s@%p (%d->%d) %x@%pad\n", state->id, msg_type_str(type), header, remoteport, localport, bulk->actual, &bulk->dma_addr); @@ -2140,12 +2140,12 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) } break; case VCHIQ_MSG_PADDING: - dev_dbg(state->dev, "core: %d: prs PADDING@%pK,%x\n", + dev_dbg(state->dev, "core: %d: prs PADDING@%p,%x\n", state->id, header, size); break; case VCHIQ_MSG_PAUSE: /* If initiated, signal the application thread */ - dev_dbg(state->dev, "core: %d: prs PAUSE@%pK,%x\n", + dev_dbg(state->dev, "core: %d: prs PAUSE@%p,%x\n", state->id, header, size); if (state->conn_state == VCHIQ_CONNSTATE_PAUSED) { dev_err(state->dev, "core: %d: PAUSE received in state PAUSED\n", @@ -2162,7 +2162,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) vchiq_set_conn_state(state, VCHIQ_CONNSTATE_PAUSED); break; case VCHIQ_MSG_RESUME: - dev_dbg(state->dev, "core: %d: prs RESUME@%pK,%x\n", + dev_dbg(state->dev, "core: %d: prs RESUME@%p,%x\n", state->id, header, size); /* Release the slot mutex */ mutex_unlock(&state->slot_mutex); @@ -2179,7 +2179,7 @@ parse_message(struct vchiq_state *state, struct vchiq_header *header) break; default: - dev_err(state->dev, "core: %d: prs invalid msgid %x@%pK,%x\n", + dev_err(state->dev, "core: %d: prs invalid msgid %x@%p,%x\n", state->id, msgid, header, size); WARN(1, "invalid message\n"); break; @@ -2400,7 +2400,7 @@ sync_func(void *v) if (!service) { dev_err(state->dev, - "sync: %d: sf %s@%pK (%d->%d) - invalid/closed service %d\n", + "sync: %d: sf %s@%p (%d->%d) - invalid/closed service %d\n", state->id, msg_type_str(type), header, remoteport, localport, localport); release_message_sync(state, header); @@ -2422,7 +2422,7 @@ sync_func(void *v) header->data; service->peer_version = payload->version; } - dev_err(state->dev, "sync: %d: sf OPENACK@%pK,%x (%d->%d) v:%d\n", + dev_err(state->dev, "sync: %d: sf OPENACK@%p,%x (%d->%d) v:%d\n", state->id, header, size, remoteport, localport, service->peer_version); if (service->srvstate == VCHIQ_SRVSTATE_OPENING) { @@ -2435,7 +2435,7 @@ sync_func(void *v) break; case VCHIQ_MSG_DATA: - dev_dbg(state->dev, "sync: %d: sf DATA@%pK,%x (%d->%d)\n", + dev_dbg(state->dev, "sync: %d: sf DATA@%p,%x (%d->%d)\n", state->id, header, size, remoteport, localport); if ((service->remoteport == remoteport) && @@ -2449,7 +2449,7 @@ sync_func(void *v) break; default: - dev_err(state->dev, "sync: error: %d: sf unexpected msgid %x@%pK,%x\n", + dev_err(state->dev, "sync: error: %d: sf unexpected msgid %x@%p,%x\n", state->id, msgid, header, size); release_message_sync(state, header); break; @@ -2926,13 +2926,13 @@ release_service_messages(struct vchiq_service *service) int port = VCHIQ_MSG_DSTPORT(msgid); if ((port == service->localport) && (msgid & VCHIQ_MSGID_CLAIMED)) { - dev_dbg(state->dev, "core: fsi - hdr %pK\n", header); + dev_dbg(state->dev, "core: fsi - hdr %p\n", header); release_slot(state, slot_info, header, NULL); } pos += calc_stride(header->size); if (pos > VCHIQ_SLOT_SIZE) { dev_err(state->dev, - "core: fsi - pos %x: header %pK, msgid %x, header->msgid %x, header->size %x\n", + "core: fsi - pos %x: header %p, msgid %x, header->msgid %x, header->size %x\n", pos, header, msgid, header->msgid, header->size); WARN(1, "invalid slot position\n"); } @@ -3091,7 +3091,7 @@ vchiq_bulk_xfer_queue_msg_killable(struct vchiq_service *service, */ wmb(); - dev_dbg(state->dev, "core: %d: bt (%d->%d) %cx %x@%pad %pK\n", + dev_dbg(state->dev, "core: %d: bt (%d->%d) %cx %x@%pad %p\n", state->id, service->localport, service->remoteport, dir_char, bulk->size, &bulk->dma_addr, bulk->cb_data); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c index 454f43416503..3b20ba5c7362 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c @@ -270,7 +270,7 @@ static int vchiq_ioc_dequeue_message(struct vchiq_instance *instance, } } else { dev_err(service->state->dev, - "arm: header %pK: bufsize %x < size %x\n", + "arm: header %p: bufsize %x < size %x\n", header, args->bufsize, header->size); WARN(1, "invalid size\n"); ret = -EMSGSIZE; @@ -328,7 +328,7 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance, ret = -ESRCH; goto out; } - dev_dbg(service->state->dev, "arm: found bulk_waiter %pK for pid %d\n", + dev_dbg(service->state->dev, "arm: found bulk_waiter %p for pid %d\n", waiter, current->pid); status = vchiq_bulk_xfer_waiting(instance, args->handle, @@ -366,7 +366,7 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance, mutex_lock(&instance->bulk_waiter_list_mutex); list_add(&waiter->list, &instance->bulk_waiter_list); mutex_unlock(&instance->bulk_waiter_list_mutex); - dev_dbg(service->state->dev, "arm: saved bulk_waiter %pK for pid %d\n", + dev_dbg(service->state->dev, "arm: saved bulk_waiter %p for pid %d\n", waiter, current->pid); ret = put_user(mode_waiting, mode); @@ -512,7 +512,7 @@ static int vchiq_ioc_await_completion(struct vchiq_instance *instance, /* This must be a VCHIQ-style service */ if (args->msgbufsize < msglen) { dev_err(service->state->dev, - "arm: header %pK: msgbufsize %x < msglen %x\n", + "arm: header %p: msgbufsize %x < msglen %x\n", header, args->msgbufsize, msglen); WARN(1, "invalid message size\n"); if (ret == 0) @@ -588,7 +588,7 @@ vchiq_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long ret = 0; int i, rc; - dev_dbg(instance->state->dev, "arm: instance %pK, cmd %s, arg %lx\n", instance, + dev_dbg(instance->state->dev, "arm: instance %p, cmd %s, arg %lx\n", instance, ((_IOC_TYPE(cmd) == VCHIQ_IOC_MAGIC) && (_IOC_NR(cmd) <= VCHIQ_IOC_MAX)) ? ioctl_names[_IOC_NR(cmd)] : "", arg); @@ -874,12 +874,12 @@ vchiq_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (!status && (ret < 0) && (ret != -EINTR) && (ret != -EWOULDBLOCK)) { dev_dbg(instance->state->dev, - "arm: ioctl instance %pK, cmd %s -> status %d, %ld\n", + "arm: ioctl instance %p, cmd %s -> status %d, %ld\n", instance, (_IOC_NR(cmd) <= VCHIQ_IOC_MAX) ? ioctl_names[_IOC_NR(cmd)] : "", status, ret); } else { dev_dbg(instance->state->dev, - "arm: ioctl instance %pK, cmd %s -> status %d\n, %ld\n", + "arm: ioctl instance %p, cmd %s -> status %d\n, %ld\n", instance, (_IOC_NR(cmd) <= VCHIQ_IOC_MAX) ? ioctl_names[_IOC_NR(cmd)] : "", status, ret); } From 63f4dbb196db60a8536ba3d1b835d597a83f6cbb Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 9 Mar 2025 13:50:10 +0100 Subject: [PATCH 1471/2157] staging: vchiq_arm: Register debugfs after cdev The commit 2a4d15a4ae98 ("staging: vchiq: Refactor vchiq cdev code") moved the debugfs directory creation before vchiq character device registration. In case the latter fails, the debugfs directory won't be cleaned up. Fixes: 2a4d15a4ae98 ("staging: vchiq: Refactor vchiq cdev code") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250309125014.37166-2-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 2db219e4786f..31bdb15fd751 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1386,8 +1386,6 @@ static int vchiq_probe(struct platform_device *pdev) return ret; } - vchiq_debugfs_init(&mgmt->state); - dev_dbg(&pdev->dev, "arm: platform initialised - version %d (min %d)\n", VCHIQ_VERSION, VCHIQ_VERSION_MIN); @@ -1401,6 +1399,8 @@ static int vchiq_probe(struct platform_device *pdev) return ret; } + vchiq_debugfs_init(&mgmt->state); + bcm2835_audio = vchiq_device_register(&pdev->dev, "bcm2835-audio"); bcm2835_camera = vchiq_device_register(&pdev->dev, "bcm2835-camera"); From 3db89bc6d973e2bcaa852f6409c98c228f39a926 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 9 Mar 2025 13:50:11 +0100 Subject: [PATCH 1472/2157] staging: vchiq_arm: Fix possible NPR of keep-alive thread In case vchiq_platform_conn_state_changed() is never called or fails before driver removal, ka_thread won't be a valid pointer to a task_struct. So do the necessary checks before calling kthread_stop to avoid a crash. Fixes: 863a756aaf49 ("staging: vc04_services: vchiq_core: Stop kthreads on vchiq module unload") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250309125014.37166-3-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 31bdb15fd751..910b8fd34cb1 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1422,7 +1422,8 @@ static void vchiq_remove(struct platform_device *pdev) kthread_stop(mgmt->state.slot_handler_thread); arm_state = vchiq_platform_get_arm_state(&mgmt->state); - kthread_stop(arm_state->ka_thread); + if (!IS_ERR_OR_NULL(arm_state->ka_thread)) + kthread_stop(arm_state->ka_thread); } static struct platform_driver vchiq_driver = { From cfb320d990919836b49bd090c6c232c6c4d90b41 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 9 Mar 2025 13:50:12 +0100 Subject: [PATCH 1473/2157] staging: vchiq_arm: Stop kthreads if vchiq cdev register fails In case the vchiq character device cannot be registered during probe, all kthreads needs to be stopped to avoid resource leaks. Fixes: 863a756aaf49 ("staging: vc04_services: vchiq_core: Stop kthreads on vchiq module unload") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250309125014.37166-4-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_arm.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 910b8fd34cb1..6434cbdc1a6e 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -308,6 +308,20 @@ static struct vchiq_arm_state *vchiq_platform_get_arm_state(struct vchiq_state * return (struct vchiq_arm_state *)state->platform_state; } +static void +vchiq_platform_uninit(struct vchiq_drv_mgmt *mgmt) +{ + struct vchiq_arm_state *arm_state; + + kthread_stop(mgmt->state.sync_thread); + kthread_stop(mgmt->state.recycle_thread); + kthread_stop(mgmt->state.slot_handler_thread); + + arm_state = vchiq_platform_get_arm_state(&mgmt->state); + if (!IS_ERR_OR_NULL(arm_state->ka_thread)) + kthread_stop(arm_state->ka_thread); +} + void vchiq_dump_platform_state(struct seq_file *f) { seq_puts(f, " Platform: 2835 (VC master)\n"); @@ -1396,6 +1410,7 @@ static int vchiq_probe(struct platform_device *pdev) ret = vchiq_register_chrdev(&pdev->dev); if (ret) { dev_err(&pdev->dev, "arm: Failed to initialize vchiq cdev\n"); + vchiq_platform_uninit(mgmt); return ret; } @@ -1410,20 +1425,12 @@ static int vchiq_probe(struct platform_device *pdev) static void vchiq_remove(struct platform_device *pdev) { struct vchiq_drv_mgmt *mgmt = dev_get_drvdata(&pdev->dev); - struct vchiq_arm_state *arm_state; vchiq_device_unregister(bcm2835_audio); vchiq_device_unregister(bcm2835_camera); vchiq_debugfs_deinit(); vchiq_deregister_chrdev(); - - kthread_stop(mgmt->state.sync_thread); - kthread_stop(mgmt->state.recycle_thread); - kthread_stop(mgmt->state.slot_handler_thread); - - arm_state = vchiq_platform_get_arm_state(&mgmt->state); - if (!IS_ERR_OR_NULL(arm_state->ka_thread)) - kthread_stop(arm_state->ka_thread); + vchiq_platform_uninit(mgmt); } static struct platform_driver vchiq_driver = { From 86bc8821700665ad3962f3ef0d93667f59cf7031 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 9 Mar 2025 13:50:13 +0100 Subject: [PATCH 1474/2157] staging: vchiq_arm: Create keep-alive thread during probe Creating the keep-alive thread in vchiq_platform_init_state have the following advantages: - abort driver probe if kthread_create fails (more consistent behavior) - make resource release process easier Since vchiq_keepalive_thread_func is defined below vchiq_platform_init_state, the latter must be moved. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250309125014.37166-5-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_arm.c | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 6434cbdc1a6e..cdf5687ad4f0 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -280,29 +280,6 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state return 0; } -int -vchiq_platform_init_state(struct vchiq_state *state) -{ - struct vchiq_arm_state *platform_state; - - platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL); - if (!platform_state) - return -ENOMEM; - - rwlock_init(&platform_state->susp_res_lock); - - init_completion(&platform_state->ka_evt); - atomic_set(&platform_state->ka_use_count, 0); - atomic_set(&platform_state->ka_use_ack_count, 0); - atomic_set(&platform_state->ka_release_count, 0); - - platform_state->state = state; - - state->platform_state = (struct opaque_platform_state *)platform_state; - - return 0; -} - static struct vchiq_arm_state *vchiq_platform_get_arm_state(struct vchiq_state *state) { return (struct vchiq_arm_state *)state->platform_state; @@ -1011,6 +988,39 @@ vchiq_keepalive_thread_func(void *v) return 0; } +int +vchiq_platform_init_state(struct vchiq_state *state) +{ + struct vchiq_arm_state *platform_state; + char threadname[16]; + + platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL); + if (!platform_state) + return -ENOMEM; + + snprintf(threadname, sizeof(threadname), "vchiq-keep/%d", + state->id); + platform_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func, + (void *)state, threadname); + if (IS_ERR(platform_state->ka_thread)) { + dev_err(state->dev, "couldn't create thread %s\n", threadname); + return PTR_ERR(platform_state->ka_thread); + } + + rwlock_init(&platform_state->susp_res_lock); + + init_completion(&platform_state->ka_evt); + atomic_set(&platform_state->ka_use_count, 0); + atomic_set(&platform_state->ka_use_ack_count, 0); + atomic_set(&platform_state->ka_release_count, 0); + + platform_state->state = state; + + state->platform_state = (struct opaque_platform_state *)platform_state; + + return 0; +} + int vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service, enum USE_TYPE_E use_type) @@ -1331,7 +1341,6 @@ void vchiq_platform_conn_state_changed(struct vchiq_state *state, enum vchiq_connstate newstate) { struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state); - char threadname[16]; dev_dbg(state->dev, "suspend: %d: %s->%s\n", state->id, get_conn_state_name(oldstate), get_conn_state_name(newstate)); @@ -1346,17 +1355,7 @@ void vchiq_platform_conn_state_changed(struct vchiq_state *state, arm_state->first_connect = 1; write_unlock_bh(&arm_state->susp_res_lock); - snprintf(threadname, sizeof(threadname), "vchiq-keep/%d", - state->id); - arm_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func, - (void *)state, - threadname); - if (IS_ERR(arm_state->ka_thread)) { - dev_err(state->dev, "suspend: Couldn't create thread %s\n", - threadname); - } else { - wake_up_process(arm_state->ka_thread); - } + wake_up_process(arm_state->ka_thread); } static const struct of_device_id vchiq_of_match[] = { From 3e5def4249b9ad089aa0b46b84e6f5f8f70e2d85 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 9 Mar 2025 13:50:14 +0100 Subject: [PATCH 1475/2157] staging: vchiq_arm: Improve initial VCHIQ connect The code to start the keep-alive thread on initial VCHIQ connect within vchiq_platform_conn_state_changed is unnecessary complex. Move the keep-alive thread wake-up into a separate function and call it during VCHIQ connect. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20250309125014.37166-6-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- .../interface/vchiq_arm/vchiq_arm.c | 28 +++++-------------- .../interface/vchiq_arm/vchiq_core.c | 1 + .../interface/vchiq_arm/vchiq_core.h | 2 ++ 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index cdf5687ad4f0..5dbf8d53db09 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -97,13 +97,6 @@ struct vchiq_arm_state { * tracked separately with the state. */ int peer_use_count; - - /* - * Flag to indicate that the first vchiq connect has made it through. - * This means that both sides should be fully ready, and we should - * be able to suspend after this point. - */ - int first_connect; }; static int @@ -1336,26 +1329,19 @@ vchiq_check_service(struct vchiq_service *service) return ret; } +void vchiq_platform_connected(struct vchiq_state *state) +{ + struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state); + + wake_up_process(arm_state->ka_thread); +} + void vchiq_platform_conn_state_changed(struct vchiq_state *state, enum vchiq_connstate oldstate, enum vchiq_connstate newstate) { - struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state); - dev_dbg(state->dev, "suspend: %d: %s->%s\n", state->id, get_conn_state_name(oldstate), get_conn_state_name(newstate)); - if (state->conn_state != VCHIQ_CONNSTATE_CONNECTED) - return; - - write_lock_bh(&arm_state->susp_res_lock); - if (arm_state->first_connect) { - write_unlock_bh(&arm_state->susp_res_lock); - return; - } - - arm_state->first_connect = 1; - write_unlock_bh(&arm_state->susp_res_lock); - wake_up_process(arm_state->ka_thread); } static const struct of_device_id vchiq_of_match[] = { diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index e2cac0898b8f..e7b0c800a205 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -3343,6 +3343,7 @@ vchiq_connect_internal(struct vchiq_state *state, struct vchiq_instance *instanc return -EAGAIN; vchiq_set_conn_state(state, VCHIQ_CONNSTATE_CONNECTED); + vchiq_platform_connected(state); complete(&state->connect); } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h index 9b4e766990a4..3b5c0618e567 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h @@ -575,6 +575,8 @@ int vchiq_send_remote_use(struct vchiq_state *state); int vchiq_send_remote_use_active(struct vchiq_state *state); +void vchiq_platform_connected(struct vchiq_state *state); + void vchiq_platform_conn_state_changed(struct vchiq_state *state, enum vchiq_connstate oldstate, enum vchiq_connstate newstate); From 3b23d31e569ca19b7bf1059760b879f61cd71ded Mon Sep 17 00:00:00 2001 From: David Zalman Date: Wed, 19 Mar 2025 07:10:12 -0400 Subject: [PATCH 1476/2157] staging: rtl8723bs: fixed a unnecessary parentheses coding style issue Remove unnecessary parentheses around derefrened pointers in core/rtw_ap.c to adapter to the Linux kernel coding style. Reported by checkpatch: CHECK: Unnecessary parentheses around x->y Signed-off-by: David Zalman Link: https://lore.kernel.org/r/20250319111012.1588-1-davidzalman.101@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ap.c | 96 ++++++++++++------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c index a6dc88dd4ba1..50022bb5911e 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ap.c +++ b/drivers/staging/rtl8723bs/core/rtw_ap.c @@ -324,7 +324,7 @@ void add_RATid(struct adapter *padapter, struct sta_info *psta, u8 rssi_level) { unsigned char sta_band = 0, shortGIrate = false; unsigned int tx_ra_bitmap = 0; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct wlan_bssid_ex *pcur_network = (struct wlan_bssid_ex *)&pmlmepriv->cur_network.network; @@ -372,9 +372,9 @@ void update_bmc_sta(struct adapter *padapter) unsigned char network_type; int supportRateNum = 0; unsigned int tx_ra_bitmap = 0; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; struct wlan_bssid_ex *pcur_network = (struct wlan_bssid_ex *)&pmlmepriv->cur_network.network; struct sta_info *psta = rtw_get_bcmc_stainfo(padapter); @@ -451,9 +451,9 @@ void update_bmc_sta(struct adapter *padapter) void update_sta_info_apmode(struct adapter *padapter, struct sta_info *psta) { - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &padapter->securitypriv; - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct ht_priv *phtpriv_ap = &pmlmepriv->htpriv; struct ht_priv *phtpriv_sta = &psta->htpriv; u8 cur_ldpc_cap = 0, cur_stbc_cap = 0, cur_beamform_cap = 0; @@ -563,10 +563,10 @@ void update_sta_info_apmode(struct adapter *padapter, struct sta_info *psta) static void update_ap_info(struct adapter *padapter, struct sta_info *psta) { - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct wlan_bssid_ex *pnetwork = (struct wlan_bssid_ex *)&pmlmepriv->cur_network.network; - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct ht_priv *phtpriv_ap = &pmlmepriv->htpriv; psta->wireless_mode = pmlmeext->cur_wireless_mode; @@ -609,7 +609,7 @@ static void update_hw_ht_param(struct adapter *padapter) unsigned char max_AMPDU_len; unsigned char min_MPDU_spacing; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; /* handle A-MPDU parameter field * @@ -645,13 +645,13 @@ void start_bss_network(struct adapter *padapter) u32 acparm; int ie_len; struct registry_priv *pregpriv = &padapter->registrypriv; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct security_priv *psecuritypriv = &(padapter->securitypriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct security_priv *psecuritypriv = &padapter->securitypriv; struct wlan_bssid_ex *pnetwork = (struct wlan_bssid_ex *)&pmlmepriv->cur_network.network; - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); - struct wlan_bssid_ex *pnetwork_mlmeext = &(pmlmeinfo->network); + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; + struct wlan_bssid_ex *pnetwork_mlmeext = &pmlmeinfo->network; struct HT_info_element *pht_info = NULL; u8 cbw40_enable = 0; @@ -823,7 +823,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) u8 WMM_PARA_IE[] = {0x00, 0x50, 0xf2, 0x02, 0x01, 0x01}; struct registry_priv *pregistrypriv = &padapter->registrypriv; struct security_priv *psecuritypriv = &padapter->securitypriv; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct wlan_bssid_ex *pbss_network = (struct wlan_bssid_ex *)&pmlmepriv->cur_network.network; u8 *ie = pbss_network->ies; @@ -845,7 +845,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) pbss_network->rssi = 0; - memcpy(pbss_network->mac_address, myid(&(padapter->eeprompriv)), ETH_ALEN); + memcpy(pbss_network->mac_address, myid(&padapter->eeprompriv), ETH_ALEN); /* beacon interval */ p = rtw_get_beacon_interval_from_ie(ie);/* ie + 8; 8: TimeStamp, 2: Beacon Interval 2:Capability */ @@ -1186,7 +1186,7 @@ int rtw_acl_add_sta(struct adapter *padapter, u8 *addr) if ((NUM_ACL - 1) < pacl_list->num) return (-1); - spin_lock_bh(&(pacl_node_q->lock)); + spin_lock_bh(&pacl_node_q->lock); phead = get_list_head(pacl_node_q); list_for_each(plist, phead) { @@ -1200,12 +1200,12 @@ int rtw_acl_add_sta(struct adapter *padapter, u8 *addr) } } - spin_unlock_bh(&(pacl_node_q->lock)); + spin_unlock_bh(&pacl_node_q->lock); if (added) return ret; - spin_lock_bh(&(pacl_node_q->lock)); + spin_lock_bh(&pacl_node_q->lock); for (i = 0; i < NUM_ACL; i++) { paclnode = &pacl_list->aclnode[i]; @@ -1225,7 +1225,7 @@ int rtw_acl_add_sta(struct adapter *padapter, u8 *addr) } } - spin_unlock_bh(&(pacl_node_q->lock)); + spin_unlock_bh(&pacl_node_q->lock); return ret; } @@ -1238,7 +1238,7 @@ void rtw_acl_remove_sta(struct adapter *padapter, u8 *addr) struct wlan_acl_pool *pacl_list = &pstapriv->acl_list; struct __queue *pacl_node_q = &pacl_list->acl_node_q; - spin_lock_bh(&(pacl_node_q->lock)); + spin_lock_bh(&pacl_node_q->lock); phead = get_list_head(pacl_node_q); list_for_each_safe(plist, tmp, phead) { @@ -1258,7 +1258,7 @@ void rtw_acl_remove_sta(struct adapter *padapter, u8 *addr) } } - spin_unlock_bh(&(pacl_node_q->lock)); + spin_unlock_bh(&pacl_node_q->lock); } @@ -1308,7 +1308,7 @@ static int rtw_ap_set_key( u8 keylen; struct cmd_obj *pcmd; struct setkey_parm *psetkeyparm; - struct cmd_priv *pcmdpriv = &(padapter->cmdpriv); + struct cmd_priv *pcmdpriv = &padapter->cmdpriv; int res = _SUCCESS; pcmd = rtw_zmalloc(sizeof(struct cmd_obj)); @@ -1345,7 +1345,7 @@ static int rtw_ap_set_key( keylen = 16; } - memcpy(&(psetkeyparm->key[0]), key, keylen); + memcpy(&psetkeyparm->key[0], key, keylen); pcmd->cmdcode = _SetKey_CMD_; pcmd->parmbuf = (u8 *)psetkeyparm; @@ -1397,10 +1397,10 @@ static void update_bcn_fixed_ie(struct adapter *padapter) static void update_bcn_erpinfo_ie(struct adapter *padapter) { - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); - struct wlan_bssid_ex *pnetwork = &(pmlmeinfo->network); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; + struct wlan_bssid_ex *pnetwork = &pmlmeinfo->network; unsigned char *p, *ie = pnetwork->ies; u32 len = 0; @@ -1461,10 +1461,10 @@ static void update_bcn_wps_ie(struct adapter *padapter) u8 *pbackup_remainder_ie = NULL; uint wps_ielen = 0, wps_offset, remainder_ielen; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); - struct wlan_bssid_ex *pnetwork = &(pmlmeinfo->network); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; + struct wlan_bssid_ex *pnetwork = &pmlmeinfo->network; unsigned char *ie = pnetwork->ies; u32 ielen = pnetwork->ie_length; @@ -1537,8 +1537,8 @@ void update_beacon(struct adapter *padapter, u8 ie_id, u8 *oui, u8 tx) if (!padapter) return; - pmlmepriv = &(padapter->mlmepriv); - pmlmeext = &(padapter->mlmeextpriv); + pmlmepriv = &padapter->mlmepriv; + pmlmeext = &padapter->mlmeextpriv; /* pmlmeinfo = &(pmlmeext->mlmext_info); */ if (!pmlmeext->bstart_bss) @@ -1619,7 +1619,7 @@ static int rtw_ht_operation_update(struct adapter *padapter) { u16 cur_op_mode, new_op_mode; int op_mode_changes = 0; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct ht_priv *phtpriv_ap = &pmlmepriv->htpriv; if (pmlmepriv->htpriv.ht_option) @@ -1703,8 +1703,8 @@ void associated_clients_update(struct adapter *padapter, u8 updated) void bss_cap_update_on_sta_join(struct adapter *padapter, struct sta_info *psta) { u8 beacon_updated = false; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; if (!(psta->flags & WLAN_STA_SHORT_PREAMBLE)) { if (!psta->no_short_preamble_set) { @@ -1823,8 +1823,8 @@ void bss_cap_update_on_sta_join(struct adapter *padapter, struct sta_info *psta) u8 bss_cap_update_on_sta_leave(struct adapter *padapter, struct sta_info *psta) { u8 beacon_updated = false; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); - struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; + struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; if (!psta) return beacon_updated; @@ -1932,7 +1932,7 @@ void rtw_sta_flush(struct adapter *padapter) struct sta_info *psta = NULL; struct sta_priv *pstapriv = &padapter->stapriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; u8 bc_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE) @@ -1962,7 +1962,7 @@ void rtw_sta_flush(struct adapter *padapter) void sta_info_update(struct adapter *padapter, struct sta_info *psta) { int flags = psta->flags; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; /* update wmm cap. */ if (WLAN_STA_WME & flags) @@ -1991,7 +1991,7 @@ void sta_info_update(struct adapter *padapter, struct sta_info *psta) void ap_sta_info_defer_update(struct adapter *padapter, struct sta_info *psta) { struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; - struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); + struct mlme_ext_info *pmlmeinfo = &pmlmeext->mlmext_info; if (psta->state & _FW_LINKED) { pmlmeinfo->FW_sta_info[psta->mac_id].psta = psta; @@ -2006,7 +2006,7 @@ void rtw_ap_restore_network(struct adapter *padapter) struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct sta_priv *pstapriv = &padapter->stapriv; struct sta_info *psta; - struct security_priv *psecuritypriv = &(padapter->securitypriv); + struct security_priv *psecuritypriv = &padapter->securitypriv; struct list_head *phead, *plist; u8 chk_alive_num = 0; char chk_alive_list[NUM_STA]; @@ -2072,7 +2072,7 @@ void rtw_ap_restore_network(struct adapter *padapter) void start_ap_mode(struct adapter *padapter) { int i; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct sta_priv *pstapriv = &padapter->stapriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct wlan_acl_pool *pacl_list = &pstapriv->acl_list; @@ -2109,7 +2109,7 @@ void start_ap_mode(struct adapter *padapter) pmlmepriv->p2p_probe_resp_ie = NULL; /* for ACL */ - INIT_LIST_HEAD(&(pacl_list->acl_node_q.queue)); + INIT_LIST_HEAD(&pacl_list->acl_node_q.queue); pacl_list->num = 0; pacl_list->mode = 0; for (i = 0; i < NUM_ACL; i++) { @@ -2124,7 +2124,7 @@ void stop_ap_mode(struct adapter *padapter) struct rtw_wlan_acl_node *paclnode; struct sta_info *psta = NULL; struct sta_priv *pstapriv = &padapter->stapriv; - struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); + struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct wlan_acl_pool *pacl_list = &pstapriv->acl_list; struct __queue *pacl_node_q = &pacl_list->acl_node_q; @@ -2142,7 +2142,7 @@ void stop_ap_mode(struct adapter *padapter) padapter->securitypriv.ndisencryptstatus = Ndis802_11WEPDisabled; /* for ACL */ - spin_lock_bh(&(pacl_node_q->lock)); + spin_lock_bh(&pacl_node_q->lock); phead = get_list_head(pacl_node_q); list_for_each_safe(plist, tmp, phead) { paclnode = list_entry(plist, struct rtw_wlan_acl_node, list); @@ -2155,7 +2155,7 @@ void stop_ap_mode(struct adapter *padapter) pacl_list->num--; } } - spin_unlock_bh(&(pacl_node_q->lock)); + spin_unlock_bh(&pacl_node_q->lock); rtw_sta_flush(padapter); From 935e1d90bf6f14cd190b3a95f3cbf7e298123043 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 19 Mar 2025 15:52:55 +0100 Subject: [PATCH 1477/2157] rust: pci: require Send for Driver trait implementers The instance of Self, returned and created by Driver::probe() is dropped in the bus' remove() callback. Request implementers of the Driver trait to implement Send, since the remove() callback is not guaranteed to run from the same thread as probe(). Fixes: 1bd8b6b2c5d3 ("rust: pci: add basic PCI device / driver abstractions") Cc: stable Reported-by: Alice Ryhl Closes: https://lore.kernel.org/lkml/Z9rDxOJ2V2bPjj5i@google.com/ Signed-off-by: Danilo Krummrich Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250319145350.69543-1-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/pci.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 0d09ae34a64d..22a32172b108 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -222,7 +222,7 @@ macro_rules! pci_device_table { ///``` /// Drivers must implement this trait in order to get a PCI driver registered. Please refer to the /// `Adapter` documentation for an example. -pub trait Driver { +pub trait Driver: Send { /// The type holding information about each device id supported by the driver. /// /// TODO: Use associated_type_defaults once stabilized: From 51d0de7596a458096756c895cfed6bc4a7ecac10 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 19 Mar 2025 15:52:56 +0100 Subject: [PATCH 1478/2157] rust: platform: require Send for Driver trait implementers The instance of Self, returned and created by Driver::probe() is dropped in the bus' remove() callback. Request implementers of the Driver trait to implement Send, since the remove() callback is not guaranteed to run from the same thread as probe(). Fixes: 683a63befc73 ("rust: platform: add basic platform device / driver abstractions") Cc: stable Reported-by: Alice Ryhl Closes: https://lore.kernel.org/lkml/Z9rDxOJ2V2bPjj5i@google.com/ Signed-off-by: Danilo Krummrich Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250319145350.69543-2-dakr@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/platform.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 2811ca53d8b6..e37531bae8e9 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -149,7 +149,7 @@ macro_rules! module_platform_driver { /// } /// } ///``` -pub trait Driver { +pub trait Driver: Send { /// The type holding driver private data about each device id supported by the driver. /// /// TODO: Use associated_type_defaults once stabilized: From a72f418703b55072d837b72ac87091e18049260c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:16 +0100 Subject: [PATCH 1479/2157] tty: convert "TTY Struct Flags" to an enum Convert TTY_* macros (flags) to an enum. This allows for easier kernel-doc (the comment needed fine tuning), grouping of these nicely, and proper checking. Note that these are bit positions. So they are used such as test_bit(TTY_THROTTLED, ...). Given these are not the user API (only in-kernel API/ABI), the bit positions are NOT preserved in this patch. All are renumbered naturally using the enum-auto-numbering. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/tty/tty_struct.rst | 2 +- include/linux/tty.h | 52 +++++++++++---------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/Documentation/driver-api/tty/tty_struct.rst b/Documentation/driver-api/tty/tty_struct.rst index c72f5a4293b2..29caf1c1ca5f 100644 --- a/Documentation/driver-api/tty/tty_struct.rst +++ b/Documentation/driver-api/tty/tty_struct.rst @@ -72,7 +72,7 @@ TTY Struct Flags ================ .. kernel-doc:: include/linux/tty.h - :doc: TTY Struct Flags + :identifiers: tty_struct_flags TTY Struct Reference ==================== diff --git a/include/linux/tty.h b/include/linux/tty.h index 2372f9357240..6bb4fb3845f0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -251,7 +251,7 @@ struct tty_file_private { }; /** - * DOC: TTY Struct Flags + * enum tty_struct_flags - TTY Struct Flags * * These bits are used in the :c:member:`tty_struct.flags` field. * @@ -260,62 +260,64 @@ struct tty_file_private { * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. * - * TTY_THROTTLED + * @TTY_THROTTLED: * Driver input is throttled. The ldisc should call * :c:member:`tty_driver.unthrottle()` in order to resume reception when * it is ready to process more data (at threshold min). * - * TTY_IO_ERROR + * @TTY_IO_ERROR: * If set, causes all subsequent userspace read/write calls on the tty to * fail, returning -%EIO. (May be no ldisc too.) * - * TTY_OTHER_CLOSED + * @TTY_OTHER_CLOSED: * Device is a pty and the other side has closed. * - * TTY_EXCLUSIVE + * @TTY_EXCLUSIVE: * Exclusive open mode (a single opener). * - * TTY_DO_WRITE_WAKEUP + * @TTY_DO_WRITE_WAKEUP: * If set, causes the driver to call the * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume * transmission when it can accept more data to transmit. * - * TTY_LDISC_OPEN + * @TTY_LDISC_OPEN: * Indicates that a line discipline is open. For debugging purposes only. * - * TTY_PTY_LOCK + * @TTY_PTY_LOCK: * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. * - * TTY_NO_WRITE_SPLIT + * @TTY_NO_WRITE_SPLIT: * Prevent driver from splitting up writes into smaller chunks (preserve * write boundaries to driver). * - * TTY_HUPPED + * @TTY_HUPPED: * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. * - * TTY_HUPPING + * @TTY_HUPPING: * The TTY is in the process of hanging up to abort potential readers. * - * TTY_LDISC_CHANGING + * @TTY_LDISC_CHANGING: * Line discipline for this TTY is being changed. I/O should not block * when this is set. Use tty_io_nonblock() to check. * - * TTY_LDISC_HALTED + * @TTY_LDISC_HALTED: * Line discipline for this TTY was stopped. No work should be queued to * this ldisc. */ -#define TTY_THROTTLED 0 -#define TTY_IO_ERROR 1 -#define TTY_OTHER_CLOSED 2 -#define TTY_EXCLUSIVE 3 -#define TTY_DO_WRITE_WAKEUP 5 -#define TTY_LDISC_OPEN 11 -#define TTY_PTY_LOCK 16 -#define TTY_NO_WRITE_SPLIT 17 -#define TTY_HUPPED 18 -#define TTY_HUPPING 19 -#define TTY_LDISC_CHANGING 20 -#define TTY_LDISC_HALTED 22 +enum tty_struct_flags { + TTY_THROTTLED, + TTY_IO_ERROR, + TTY_OTHER_CLOSED, + TTY_EXCLUSIVE, + TTY_DO_WRITE_WAKEUP, + TTY_LDISC_OPEN, + TTY_PTY_LOCK, + TTY_NO_WRITE_SPLIT, + TTY_HUPPED, + TTY_HUPPING, + TTY_LDISC_CHANGING, + TTY_LDISC_HALTED, +}; static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { From ec1132dd55ef590fa0553308b1f7da914d505151 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:17 +0100 Subject: [PATCH 1480/2157] tty: audit: do not use N_TTY_BUF_SIZE N_TTY_BUF_SIZE -- as the name suggests -- is the N_TTY's buffer size. There is no reason to couple that to audit's buffer size, so define an own TTY_AUDIT_BUF_SIZE macro (with the same size). N_TTY_BUF_SIZE is private and will be moved to n_tty.c later. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_audit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 1d81eeefb068..75542333c54a 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -12,12 +12,14 @@ #include #include "tty.h" +#define TTY_AUDIT_BUF_SIZE 4096 + struct tty_audit_buf { struct mutex mutex; /* Protects all data below */ dev_t dev; /* The TTY which the data is from */ bool icanon; size_t valid; - u8 *data; /* Allocated size N_TTY_BUF_SIZE */ + u8 *data; /* Allocated size TTY_AUDIT_BUF_SIZE */ }; static struct tty_audit_buf *tty_audit_buf_ref(void) @@ -37,7 +39,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(void) if (!buf) goto err; - buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); + buf->data = kmalloc(TTY_AUDIT_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; @@ -235,14 +237,14 @@ void tty_audit_add_data(const struct tty_struct *tty, const void *data, do { size_t run; - run = N_TTY_BUF_SIZE - buf->valid; + run = TTY_AUDIT_BUF_SIZE - buf->valid; if (run > size) run = size; memcpy(buf->data + buf->valid, data, run); buf->valid += run; data += run; size -= run; - if (buf->valid == N_TTY_BUF_SIZE) + if (buf->valid == TTY_AUDIT_BUF_SIZE) tty_audit_buf_push(buf); } while (size != 0); mutex_unlock(&buf->mutex); From 0738abc2a42e4094510ef210edf7a1aaa13f913e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:18 +0100 Subject: [PATCH 1481/2157] tty: caif: do not use N_TTY_BUF_SIZE N_TTY_BUF_SIZE -- as the name suggests -- is the N_TTY's buffer size. There is no reason to couple that to caif's tty->receive_room. Use 4096 directly -- even though, it should be some sort of "SKB_MAX_ALLOC" or alike. But definitely not N_TTY_BUF_SIZE. N_TTY_BUF_SIZE is private and will be moved to n_tty.c later. Signed-off-by: Jiri Slaby (SUSE) Acked-by: Jakub Kicinski Cc: Andrew Lunn Cc: David S. Miller Cc: Eric Dumazet Cc: Paolo Abeni Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20250317070046.24386-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/caif/caif_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index ed3a589def6b..e7d1b9301fde 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -344,7 +344,7 @@ static int ldisc_open(struct tty_struct *tty) ser->tty = tty_kref_get(tty); ser->dev = dev; debugfs_init(ser, tty); - tty->receive_room = N_TTY_BUF_SIZE; + tty->receive_room = 4096; tty->disc_data = ser; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); rtnl_lock(); From d2e38e713bada24539b2b049f9be8d40dea79f41 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:19 +0100 Subject: [PATCH 1482/2157] tty: move N_TTY_BUF_SIZE to n_tty "N_TTY_BUF_SIZE" is private to n_tty and shall not be exposed to the world. Definitely not in tty.h somewhere in the middle of "struct tty_struct". This is a remnant of moving "read_flags" to "struct n_tty_data" in commit 3fe780b379fa ("TTY: move ldisc data from tty_struct: bitmaps"). But some cleanup was needed first (in previous patches). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-5-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 2 ++ include/linux/tty.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 5e9ca4376d68..2c5995019dd1 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -56,6 +56,8 @@ */ #define WAKEUP_CHARS 256 +#define N_TTY_BUF_SIZE 4096 + /* * This defines the low- and high-watermarks for throttling and * unthrottling the TTY driver. These watermarks are used for diff --git a/include/linux/tty.h b/include/linux/tty.h index 6bb4fb3845f0..0a46e4054dec 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -239,7 +239,6 @@ struct tty_struct { struct list_head tty_files; -#define N_TTY_BUF_SIZE 4096 struct work_struct SAK_work; } __randomize_layout; From d97aa066678bd1e2951ee93db9690835dfe57ab6 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:20 +0100 Subject: [PATCH 1483/2157] tty: n_tty: use uint for space returned by tty_write_room() tty_write_room() returns an "unsigned int". So in case some insane driver (like my tty test driver) returns (legitimate) UINT_MAX from its tty_operations::write_room(), n_tty is confused on several places. For example, in process_output_block(), the result of tty_write_room() is stored into (signed) "int". So this UINT_MAX suddenly becomes -1. And that is extended to ssize_t and returned from process_output_block(). This causes a write() to such a node to receive -EPERM (which is -1). Fix that by using proper "unsigned int" and proper "== 0" test. And return 0 constant directly in that "if", so that it is immediately clear what is returned ("space" equals to 0 at that point). Similarly for process_output() and __process_echoes(). Note this does not fix any in-tree driver as of now. If you want "Fixes: something", it would be commit 03b3b1a2405c ("tty: make tty_operations::write_room return uint"). I intentionally do not mark this patch by a real tag below. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 2c5995019dd1..765d24268d75 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -488,7 +488,8 @@ static int do_output_char(u8 c, struct tty_struct *tty, int space) static int process_output(u8 c, struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; - int space, retval; + unsigned int space; + int retval; mutex_lock(&ldata->output_lock); @@ -524,16 +525,16 @@ static ssize_t process_output_block(struct tty_struct *tty, const u8 *buf, unsigned int nr) { struct n_tty_data *ldata = tty->disc_data; - int space; - int i; + unsigned int space; + int i; const u8 *cp; mutex_lock(&ldata->output_lock); space = tty_write_room(tty); - if (space <= 0) { + if (space == 0) { mutex_unlock(&ldata->output_lock); - return space; + return 0; } if (nr > space) nr = space; @@ -698,7 +699,7 @@ static int n_tty_process_echo_ops(struct tty_struct *tty, size_t *tail, static size_t __process_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; - int space, old_space; + unsigned int space, old_space; size_t tail; u8 c; From fdfa49a8c96500ef9e1fe1cd2e68c8fd71d8b53a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:21 +0100 Subject: [PATCH 1484/2157] tty: n_tty: simplify process_output() Using guard(mutex), the function can be written in a much more efficient way. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-7-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 765d24268d75..df52aae5f71a 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -488,19 +488,13 @@ static int do_output_char(u8 c, struct tty_struct *tty, int space) static int process_output(u8 c, struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; - unsigned int space; - int retval; - mutex_lock(&ldata->output_lock); + guard(mutex)(&ldata->output_lock); - space = tty_write_room(tty); - retval = do_output_char(c, tty, space); - - mutex_unlock(&ldata->output_lock); - if (retval < 0) + if (do_output_char(c, tty, tty_write_room(tty)) < 0) return -1; - else - return 0; + + return 0; } /** From e287d64605d6ab66884b6d473a04fc91d5dc16c3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:22 +0100 Subject: [PATCH 1485/2157] tty: n_tty: clean up process_output_block() * Use guard(mutex), which results in: - the function can return directly when "space == 0". - "i" can now be "unsigned" as it is no longer abused to hold a retval from tty->ops->write(). Note the compared-to "nr" is already "unsigned". * The end label is now dubbed "do_write" as that is what happens there. Unlike the uncertain "break_out" name. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-8-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index df52aae5f71a..5d172edbb03c 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -519,17 +519,15 @@ static ssize_t process_output_block(struct tty_struct *tty, const u8 *buf, unsigned int nr) { struct n_tty_data *ldata = tty->disc_data; - unsigned int space; - int i; + unsigned int space, i; const u8 *cp; - mutex_lock(&ldata->output_lock); + guard(mutex)(&ldata->output_lock); space = tty_write_room(tty); - if (space == 0) { - mutex_unlock(&ldata->output_lock); + if (space == 0) return 0; - } + if (nr > space) nr = space; @@ -541,18 +539,18 @@ static ssize_t process_output_block(struct tty_struct *tty, if (O_ONLRET(tty)) ldata->column = 0; if (O_ONLCR(tty)) - goto break_out; + goto do_write; ldata->canon_column = ldata->column; break; case '\r': if (O_ONOCR(tty) && ldata->column == 0) - goto break_out; + goto do_write; if (O_OCRNL(tty)) - goto break_out; + goto do_write; ldata->canon_column = ldata->column = 0; break; case '\t': - goto break_out; + goto do_write; case '\b': if (ldata->column > 0) ldata->column--; @@ -560,18 +558,15 @@ static ssize_t process_output_block(struct tty_struct *tty, default: if (!iscntrl(c)) { if (O_OLCUC(tty)) - goto break_out; + goto do_write; if (!is_continuation(c, tty)) ldata->column++; } break; } } -break_out: - i = tty->ops->write(tty, buf, i); - - mutex_unlock(&ldata->output_lock); - return i; +do_write: + return tty->ops->write(tty, buf, i); } static int n_tty_process_echo_ops(struct tty_struct *tty, size_t *tail, From 67e781f56867775f35b1836b71be76df5209ceb1 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:23 +0100 Subject: [PATCH 1486/2157] tty: n_tty: drop n_tty_trace() This n_tty_trace() is an always disabled debugging macro. It comes from commit 32f13521ca68 ("n_tty: Line copy to user buffer in canonical mode"). Drop it as it is dead for over a decade. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-9-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 5d172edbb03c..43ba740792d9 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -81,14 +81,6 @@ #define ECHO_BLOCK 256 #define ECHO_DISCARD_WATERMARK N_TTY_BUF_SIZE - (ECHO_BLOCK + 32) - -#undef N_TTY_TRACE -#ifdef N_TTY_TRACE -# define n_tty_trace(f, args...) trace_printk(f, ##args) -#else -# define n_tty_trace(f, args...) no_printk(f, ##args) -#endif - struct n_tty_data { /* producer-published */ size_t read_head; @@ -2026,9 +2018,6 @@ static bool canon_copy_from_read_buf(const struct tty_struct *tty, u8 **kbp, tail = MASK(ldata->read_tail); size = min_t(size_t, tail + n, N_TTY_BUF_SIZE); - n_tty_trace("%s: nr:%zu tail:%zu n:%zu size:%zu\n", - __func__, *nr, tail, n, size); - eol = find_next_bit(ldata->read_flags, size, tail); more = n - (size - tail); if (eol == N_TTY_BUF_SIZE && more) { @@ -2046,9 +2035,6 @@ static bool canon_copy_from_read_buf(const struct tty_struct *tty, u8 **kbp, if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) n = c; - n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n", - __func__, eol, found, n, c, tail, more); - tty_copy(tty, *kbp, tail, n); *kbp += n; *nr -= n; From aa1ebc9cffce227e1efd17efec5ad9798aefaf0c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:24 +0100 Subject: [PATCH 1487/2157] tty: n_tty: extract n_tty_continue_cookie() from n_tty_read() n_tty_read() is a very long function doing too much of different stuff. Extract the "cookie" (continuation read) handling to a separate function: n_tty_continue_cookie(). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-10-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 66 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 43ba740792d9..88aa5f9cbe5e 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2111,6 +2111,39 @@ static int job_control(struct tty_struct *tty, struct file *file) return __tty_check_change(tty, SIGTTIN); } +/* + * We still hold the atomic_read_lock and the termios_rwsem, and can just + * continue to copy data. + */ +static ssize_t n_tty_continue_cookie(struct tty_struct *tty, u8 *kbuf, + size_t nr, void **cookie) +{ + struct n_tty_data *ldata = tty->disc_data; + u8 *kb = kbuf; + + if (ldata->icanon && !L_EXTPROC(tty)) { + /* + * If we have filled the user buffer, see if we should skip an + * EOF character before releasing the lock and returning done. + */ + if (!nr) + canon_skip_eof(ldata); + else if (canon_copy_from_read_buf(tty, &kb, &nr)) + return kb - kbuf; + } else { + if (copy_from_read_buf(tty, &kb, &nr)) + return kb - kbuf; + } + + /* No more data - release locks and stop retries */ + n_tty_kick_worker(tty); + n_tty_check_unthrottle(tty); + up_read(&tty->termios_rwsem); + mutex_unlock(&ldata->atomic_read_lock); + *cookie = NULL; + + return kb - kbuf; +} /** * n_tty_read - read function for tty @@ -2144,36 +2177,9 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, bool packet; size_t old_tail; - /* - * Is this a continuation of a read started earler? - * - * If so, we still hold the atomic_read_lock and the - * termios_rwsem, and can just continue to copy data. - */ - if (*cookie) { - if (ldata->icanon && !L_EXTPROC(tty)) { - /* - * If we have filled the user buffer, see - * if we should skip an EOF character before - * releasing the lock and returning done. - */ - if (!nr) - canon_skip_eof(ldata); - else if (canon_copy_from_read_buf(tty, &kb, &nr)) - return kb - kbuf; - } else { - if (copy_from_read_buf(tty, &kb, &nr)) - return kb - kbuf; - } - - /* No more data - release locks and stop retries */ - n_tty_kick_worker(tty); - n_tty_check_unthrottle(tty); - up_read(&tty->termios_rwsem); - mutex_unlock(&ldata->atomic_read_lock); - *cookie = NULL; - return kb - kbuf; - } + /* Is this a continuation of a read started earlier? */ + if (*cookie) + return n_tty_continue_cookie(tty, kbuf, nr, cookie); retval = job_control(tty, file); if (retval < 0) From 40315ce580691006633d80861a81541ee17fe511 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:25 +0100 Subject: [PATCH 1488/2157] tty: n_tty: extract n_tty_wait_for_input() n_tty_read() is a very long function doing too much of different stuff. Extract the "wait for input" to a separate function: n_tty_wait_for_input(). It returns an error (< 0), no input (0), or has potential input (1). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-11-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 57 ++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 88aa5f9cbe5e..0e3eb18490f0 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2145,6 +2145,33 @@ static ssize_t n_tty_continue_cookie(struct tty_struct *tty, u8 *kbuf, return kb - kbuf; } +static int n_tty_wait_for_input(struct tty_struct *tty, struct file *file, + struct wait_queue_entry *wait, long *timeout) +{ + if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) + return -EIO; + if (tty_hung_up_p(file)) + return 0; + /* + * Abort readers for ttys which never actually get hung up. + * See __tty_hangup(). + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + return 0; + if (!*timeout) + return 0; + if (tty_io_nonblock(tty, file)) + return -EAGAIN; + if (signal_pending(current)) + return -ERESTARTSYS; + + up_read(&tty->termios_rwsem); + *timeout = wait_woken(wait, TASK_INTERRUPTIBLE, *timeout); + down_read(&tty->termios_rwsem); + + return 1; +} + /** * n_tty_read - read function for tty * @tty: tty device @@ -2234,34 +2261,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, tty_buffer_flush_work(tty->port); down_read(&tty->termios_rwsem); if (!input_available_p(tty, 0)) { - if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) { - retval = -EIO; + int ret = n_tty_wait_for_input(tty, file, &wait, + &timeout); + if (ret <= 0) { + retval = ret; break; } - if (tty_hung_up_p(file)) - break; - /* - * Abort readers for ttys which never actually - * get hung up. See __tty_hangup(). - */ - if (test_bit(TTY_HUPPING, &tty->flags)) - break; - if (!timeout) - break; - if (tty_io_nonblock(tty, file)) { - retval = -EAGAIN; - break; - } - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - up_read(&tty->termios_rwsem); - - timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, - timeout); - - down_read(&tty->termios_rwsem); continue; } } From f812af3642ef4f2ff3db49f33d097f1b8668ce72 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:26 +0100 Subject: [PATCH 1489/2157] tty: n_tty: move more_to_be_read to the end of n_tty_read() n_tty_read() contains "we need more data" handling deep in that function. And there is also a label (more_to_be_read) as we handle this situation from two places. It makes more sense to have all "return"s accumulated at the end of functions. And "goto" from multiple places there. Therefore, do this with the "more_to_be_read" label in n_tty_read(). After this and the previous changes, n_tty_read() is now much more easier to follow. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-12-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 0e3eb18490f0..6af3f3a0b531 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2281,21 +2281,8 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, nr--; } - /* - * Copy data, and if there is more to be had - * and we have nothing more to wait for, then - * let's mark us for retries. - * - * NOTE! We return here with both the termios_sem - * and atomic_read_lock still held, the retries - * will release them when done. - */ - if (copy_from_read_buf(tty, &kb, &nr) && kb - kbuf >= minimum) { -more_to_be_read: - remove_wait_queue(&tty->read_wait, &wait); - *cookie = cookie; - return kb - kbuf; - } + if (copy_from_read_buf(tty, &kb, &nr) && kb - kbuf >= minimum) + goto more_to_be_read; } n_tty_check_unthrottle(tty); @@ -2322,6 +2309,18 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, u8 *kbuf, retval = kb - kbuf; return retval; +more_to_be_read: + /* + * There is more to be had and we have nothing more to wait for, so + * let's mark us for retries. + * + * NOTE! We return here with both the termios_sem and atomic_read_lock + * still held, the retries will release them when done. + */ + remove_wait_queue(&tty->read_wait, &wait); + *cookie = cookie; + + return kb - kbuf; } /** From 67acbcc324272ed06cd1c8f360afdeceb919af89 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:27 +0100 Subject: [PATCH 1490/2157] tty: tty_driver: move TTY macros to the top So that they can be referenced in structs once converted to enums (in the next patches). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-13-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 156 ++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index d4cdc089f6c3..f3be6d56e9e5 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -16,6 +16,84 @@ struct tty_driver; struct serial_icounter_struct; struct serial_struct; +/** + * DOC: TTY Driver Flags + * + * TTY_DRIVER_RESET_TERMIOS + * Requests the tty layer to reset the termios setting when the last + * process has closed the device. Used for PTYs, in particular. + * + * TTY_DRIVER_REAL_RAW + * Indicates that the driver will guarantee not to set any special + * character handling flags if this is set for the tty: + * + * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` + * + * That is, if there is no reason for the driver to + * send notifications of parity and break characters up to the line + * driver, it won't do so. This allows the line driver to optimize for + * this case if this flag is set. (Note that there is also a promise, if + * the above case is true, not to signal overruns, either.) + * + * TTY_DRIVER_DYNAMIC_DEV + * The individual tty devices need to be registered with a call to + * tty_register_device() when the device is found in the system and + * unregistered with a call to tty_unregister_device() so the devices will + * be show up properly in sysfs. If not set, all &tty_driver.num entries + * will be created by the tty core in sysfs when tty_register_driver() is + * called. This is to be used by drivers that have tty devices that can + * appear and disappear while the main tty driver is registered with the + * tty core. + * + * TTY_DRIVER_DEVPTS_MEM + * Don't use the standard arrays (&tty_driver.ttys and + * &tty_driver.termios), instead use dynamic memory keyed through the + * devpts filesystem. This is only applicable to the PTY driver. + * + * TTY_DRIVER_HARDWARE_BREAK + * Hardware handles break signals. Pass the requested timeout to the + * &tty_operations.break_ctl instead of using a simple on/off interface. + * + * TTY_DRIVER_DYNAMIC_ALLOC + * Do not allocate structures which are needed per line for this driver + * (&tty_driver.ports) as it would waste memory. The driver will take + * care. This is only applicable to the PTY driver. + * + * TTY_DRIVER_UNNUMBERED_NODE + * Do not create numbered ``/dev`` nodes. For example, create + * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a + * driver for a single tty device is being allocated. + */ +#define TTY_DRIVER_INSTALLED 0x0001 +#define TTY_DRIVER_RESET_TERMIOS 0x0002 +#define TTY_DRIVER_REAL_RAW 0x0004 +#define TTY_DRIVER_DYNAMIC_DEV 0x0008 +#define TTY_DRIVER_DEVPTS_MEM 0x0010 +#define TTY_DRIVER_HARDWARE_BREAK 0x0020 +#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040 +#define TTY_DRIVER_UNNUMBERED_NODE 0x0080 + +/* tty driver types */ +#define TTY_DRIVER_TYPE_SYSTEM 0x0001 +#define TTY_DRIVER_TYPE_CONSOLE 0x0002 +#define TTY_DRIVER_TYPE_SERIAL 0x0003 +#define TTY_DRIVER_TYPE_PTY 0x0004 +#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */ +#define TTY_DRIVER_TYPE_SYSCONS 0x0006 + +/* system subtypes (magic, used by tty_io.c) */ +#define SYSTEM_TYPE_TTY 0x0001 +#define SYSTEM_TYPE_CONSOLE 0x0002 +#define SYSTEM_TYPE_SYSCONS 0x0003 +#define SYSTEM_TYPE_SYSPTMX 0x0004 + +/* pty subtypes (magic, used by tty_io.c) */ +#define PTY_TYPE_MASTER 0x0001 +#define PTY_TYPE_SLAVE 0x0002 + +/* serial subtype definitions */ +#define SERIAL_TYPE_NORMAL 1 + /** * struct tty_operations -- interface between driver and tty * @@ -494,84 +572,6 @@ static inline void tty_set_operations(struct tty_driver *driver, driver->ops = op; } -/** - * DOC: TTY Driver Flags - * - * TTY_DRIVER_RESET_TERMIOS - * Requests the tty layer to reset the termios setting when the last - * process has closed the device. Used for PTYs, in particular. - * - * TTY_DRIVER_REAL_RAW - * Indicates that the driver will guarantee not to set any special - * character handling flags if this is set for the tty: - * - * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` - * - * That is, if there is no reason for the driver to - * send notifications of parity and break characters up to the line - * driver, it won't do so. This allows the line driver to optimize for - * this case if this flag is set. (Note that there is also a promise, if - * the above case is true, not to signal overruns, either.) - * - * TTY_DRIVER_DYNAMIC_DEV - * The individual tty devices need to be registered with a call to - * tty_register_device() when the device is found in the system and - * unregistered with a call to tty_unregister_device() so the devices will - * be show up properly in sysfs. If not set, all &tty_driver.num entries - * will be created by the tty core in sysfs when tty_register_driver() is - * called. This is to be used by drivers that have tty devices that can - * appear and disappear while the main tty driver is registered with the - * tty core. - * - * TTY_DRIVER_DEVPTS_MEM - * Don't use the standard arrays (&tty_driver.ttys and - * &tty_driver.termios), instead use dynamic memory keyed through the - * devpts filesystem. This is only applicable to the PTY driver. - * - * TTY_DRIVER_HARDWARE_BREAK - * Hardware handles break signals. Pass the requested timeout to the - * &tty_operations.break_ctl instead of using a simple on/off interface. - * - * TTY_DRIVER_DYNAMIC_ALLOC - * Do not allocate structures which are needed per line for this driver - * (&tty_driver.ports) as it would waste memory. The driver will take - * care. This is only applicable to the PTY driver. - * - * TTY_DRIVER_UNNUMBERED_NODE - * Do not create numbered ``/dev`` nodes. For example, create - * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a - * driver for a single tty device is being allocated. - */ -#define TTY_DRIVER_INSTALLED 0x0001 -#define TTY_DRIVER_RESET_TERMIOS 0x0002 -#define TTY_DRIVER_REAL_RAW 0x0004 -#define TTY_DRIVER_DYNAMIC_DEV 0x0008 -#define TTY_DRIVER_DEVPTS_MEM 0x0010 -#define TTY_DRIVER_HARDWARE_BREAK 0x0020 -#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040 -#define TTY_DRIVER_UNNUMBERED_NODE 0x0080 - -/* tty driver types */ -#define TTY_DRIVER_TYPE_SYSTEM 0x0001 -#define TTY_DRIVER_TYPE_CONSOLE 0x0002 -#define TTY_DRIVER_TYPE_SERIAL 0x0003 -#define TTY_DRIVER_TYPE_PTY 0x0004 -#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */ -#define TTY_DRIVER_TYPE_SYSCONS 0x0006 - -/* system subtypes (magic, used by tty_io.c) */ -#define SYSTEM_TYPE_TTY 0x0001 -#define SYSTEM_TYPE_CONSOLE 0x0002 -#define SYSTEM_TYPE_SYSCONS 0x0003 -#define SYSTEM_TYPE_SYSPTMX 0x0004 - -/* pty subtypes (magic, used by tty_io.c) */ -#define PTY_TYPE_MASTER 0x0001 -#define PTY_TYPE_SLAVE 0x0002 - -/* serial subtype definitions */ -#define SERIAL_TYPE_NORMAL 1 - int tty_register_driver(struct tty_driver *driver); void tty_unregister_driver(struct tty_driver *driver); struct device *tty_register_device(struct tty_driver *driver, unsigned index, From 109e06ae1dcf41d470705c54a67b123792424279 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:28 +0100 Subject: [PATCH 1491/2157] tty: tty_driver: convert "TTY Driver Flags" to an enum Convert TTY_DRIVER_* macros (flags) to an enum. This allows for easier kernel-doc (the comment needed fine tuning), grouping of these nicely, and proper checking. Given these are flags, define them using modern BIT() instead of hex constants. It turns out (thanks, kernel-doc checker) that internal TTY_DRIVER_INSTALLED was undocumented. Fix that too. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-14-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/tty/tty_driver.rst | 2 +- include/linux/tty_driver.h | 40 ++++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Documentation/driver-api/tty/tty_driver.rst b/Documentation/driver-api/tty/tty_driver.rst index cc529f863406..f6cbffdb6e01 100644 --- a/Documentation/driver-api/tty/tty_driver.rst +++ b/Documentation/driver-api/tty/tty_driver.rst @@ -35,7 +35,7 @@ Here comes the documentation of flags accepted by tty_alloc_driver() (or __tty_alloc_driver()): .. kernel-doc:: include/linux/tty_driver.h - :doc: TTY Driver Flags + :identifiers: tty_driver_flag ---- diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index f3be6d56e9e5..d0d940236580 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -17,13 +17,19 @@ struct serial_icounter_struct; struct serial_struct; /** - * DOC: TTY Driver Flags + * enum tty_driver_flag -- TTY Driver Flags * - * TTY_DRIVER_RESET_TERMIOS + * These are flags passed to tty_alloc_driver(). + * + * @TTY_DRIVER_INSTALLED: + * Whether this driver was succesfully installed. This is a tty internal + * flag. Do not touch. + * + * @TTY_DRIVER_RESET_TERMIOS: * Requests the tty layer to reset the termios setting when the last * process has closed the device. Used for PTYs, in particular. * - * TTY_DRIVER_REAL_RAW + * @TTY_DRIVER_REAL_RAW: * Indicates that the driver will guarantee not to set any special * character handling flags if this is set for the tty: * @@ -35,7 +41,7 @@ struct serial_struct; * this case if this flag is set. (Note that there is also a promise, if * the above case is true, not to signal overruns, either.) * - * TTY_DRIVER_DYNAMIC_DEV + * @TTY_DRIVER_DYNAMIC_DEV: * The individual tty devices need to be registered with a call to * tty_register_device() when the device is found in the system and * unregistered with a call to tty_unregister_device() so the devices will @@ -45,33 +51,35 @@ struct serial_struct; * appear and disappear while the main tty driver is registered with the * tty core. * - * TTY_DRIVER_DEVPTS_MEM + * @TTY_DRIVER_DEVPTS_MEM: * Don't use the standard arrays (&tty_driver.ttys and * &tty_driver.termios), instead use dynamic memory keyed through the * devpts filesystem. This is only applicable to the PTY driver. * - * TTY_DRIVER_HARDWARE_BREAK + * @TTY_DRIVER_HARDWARE_BREAK: * Hardware handles break signals. Pass the requested timeout to the * &tty_operations.break_ctl instead of using a simple on/off interface. * - * TTY_DRIVER_DYNAMIC_ALLOC + * @TTY_DRIVER_DYNAMIC_ALLOC: * Do not allocate structures which are needed per line for this driver * (&tty_driver.ports) as it would waste memory. The driver will take * care. This is only applicable to the PTY driver. * - * TTY_DRIVER_UNNUMBERED_NODE + * @TTY_DRIVER_UNNUMBERED_NODE: * Do not create numbered ``/dev`` nodes. For example, create * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a * driver for a single tty device is being allocated. */ -#define TTY_DRIVER_INSTALLED 0x0001 -#define TTY_DRIVER_RESET_TERMIOS 0x0002 -#define TTY_DRIVER_REAL_RAW 0x0004 -#define TTY_DRIVER_DYNAMIC_DEV 0x0008 -#define TTY_DRIVER_DEVPTS_MEM 0x0010 -#define TTY_DRIVER_HARDWARE_BREAK 0x0020 -#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040 -#define TTY_DRIVER_UNNUMBERED_NODE 0x0080 +enum tty_driver_flag { + TTY_DRIVER_INSTALLED = BIT(0), + TTY_DRIVER_RESET_TERMIOS = BIT(1), + TTY_DRIVER_REAL_RAW = BIT(2), + TTY_DRIVER_DYNAMIC_DEV = BIT(3), + TTY_DRIVER_DEVPTS_MEM = BIT(4), + TTY_DRIVER_HARDWARE_BREAK = BIT(5), + TTY_DRIVER_DYNAMIC_ALLOC = BIT(6), + TTY_DRIVER_UNNUMBERED_NODE = BIT(7), +}; /* tty driver types */ #define TTY_DRIVER_TYPE_SYSTEM 0x0001 From 63f3cd5d80d720fc6c9fd321138bbbf322ade392 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:29 +0100 Subject: [PATCH 1492/2157] tty: tty_driver: document both {,__}tty_alloc_driver() properly __tty_alloc_driver()'s kernel-doc needed some care: describe the return value using the standard "Returns:", and use the new enum tty_driver_flag for @flags. Then, the tty_alloc_driver() macro was undocumented, but referenced many times in the docs. Copy the docs from the above (except the @owner parameter, obviously). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-15-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/tty/tty_driver.rst | 2 ++ drivers/tty/tty_io.c | 8 +++++--- include/linux/tty_driver.h | 8 +++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Documentation/driver-api/tty/tty_driver.rst b/Documentation/driver-api/tty/tty_driver.rst index f6cbffdb6e01..7138222a70f2 100644 --- a/Documentation/driver-api/tty/tty_driver.rst +++ b/Documentation/driver-api/tty/tty_driver.rst @@ -25,6 +25,8 @@ freed. For reference, both allocation and deallocation functions are explained here in detail: +.. kernel-doc:: include/linux/tty_driver.h + :identifiers: tty_alloc_driver .. kernel-doc:: drivers/tty/tty_io.c :identifiers: __tty_alloc_driver tty_driver_kref_put diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 449dbd216460..ca9b7d7bad2b 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3329,10 +3329,12 @@ EXPORT_SYMBOL(tty_unregister_device); * __tty_alloc_driver - allocate tty driver * @lines: count of lines this driver can handle at most * @owner: module which is responsible for this driver - * @flags: some of %TTY_DRIVER_ flags, will be set in driver->flags + * @flags: some of enum tty_driver_flag, will be set in driver->flags * - * This should not be called directly, some of the provided macros should be - * used instead. Use IS_ERR() and friends on @retval. + * This should not be called directly, tty_alloc_driver() should be used + * instead. + * + * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). */ struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner, unsigned long flags) diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index d0d940236580..0fe38befa1b8 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -564,7 +564,13 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line); void tty_driver_kref_put(struct tty_driver *driver); -/* Use TTY_DRIVER_* flags below */ +/** + * tty_alloc_driver - allocate tty driver + * @lines: count of lines this driver can handle at most + * @flags: some of enum tty_driver_flag, will be set in driver->flags + * + * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). + */ #define tty_alloc_driver(lines, flags) \ __tty_alloc_driver(lines, THIS_MODULE, flags) From 52443558adcdb11cff76beec34bf75f6779e1a08 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:30 +0100 Subject: [PATCH 1493/2157] tty: tty_driver: introduce TTY driver sub/types enums Convert TTY_DRIVER_TYPE_*, and subtype macros to two enums: tty_driver_type and tty_driver_subtype. This allows for easier kernel-doc (later), grouping of these nicely, and proper checking. The tty_driver's ::type and ::subtype now use these enums instead of bare "short". Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-16-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 0fe38befa1b8..188ee9b768eb 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -81,26 +81,26 @@ enum tty_driver_flag { TTY_DRIVER_UNNUMBERED_NODE = BIT(7), }; -/* tty driver types */ -#define TTY_DRIVER_TYPE_SYSTEM 0x0001 -#define TTY_DRIVER_TYPE_CONSOLE 0x0002 -#define TTY_DRIVER_TYPE_SERIAL 0x0003 -#define TTY_DRIVER_TYPE_PTY 0x0004 -#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */ -#define TTY_DRIVER_TYPE_SYSCONS 0x0006 +enum tty_driver_type { + TTY_DRIVER_TYPE_SYSTEM, + TTY_DRIVER_TYPE_CONSOLE, + TTY_DRIVER_TYPE_SERIAL, + TTY_DRIVER_TYPE_PTY, + TTY_DRIVER_TYPE_SCC, + TTY_DRIVER_TYPE_SYSCONS, +}; -/* system subtypes (magic, used by tty_io.c) */ -#define SYSTEM_TYPE_TTY 0x0001 -#define SYSTEM_TYPE_CONSOLE 0x0002 -#define SYSTEM_TYPE_SYSCONS 0x0003 -#define SYSTEM_TYPE_SYSPTMX 0x0004 +enum tty_driver_subtype { + SYSTEM_TYPE_TTY = 1, + SYSTEM_TYPE_CONSOLE, + SYSTEM_TYPE_SYSCONS, + SYSTEM_TYPE_SYSPTMX, -/* pty subtypes (magic, used by tty_io.c) */ -#define PTY_TYPE_MASTER 0x0001 -#define PTY_TYPE_SLAVE 0x0002 + PTY_TYPE_MASTER = 1, + PTY_TYPE_SLAVE, -/* serial subtype definitions */ -#define SERIAL_TYPE_NORMAL 1 + SERIAL_TYPE_NORMAL = 1, +}; /** * struct tty_operations -- interface between driver and tty @@ -500,8 +500,8 @@ struct tty_operations { * @major: major /dev device number (zero for autoassignment) * @minor_start: the first minor /dev device number * @num: number of devices allocated - * @type: type of tty driver (%TTY_DRIVER_TYPE_) - * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_) + * @type: type of tty driver (enum tty_driver_type) + * @subtype: subtype of tty driver (enum tty_driver_subtype) * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) * @flags: tty driver flags (%TTY_DRIVER_) * @proc_entry: proc fs entry, used internally @@ -533,8 +533,8 @@ struct tty_driver { int major; int minor_start; unsigned int num; - short type; - short subtype; + enum tty_driver_type type; + enum tty_driver_subtype subtype; struct ktermios init_termios; unsigned long flags; struct proc_dir_entry *proc_entry; From 5af89030960fd987a6c25e33405bbaaff3c7298c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:31 +0100 Subject: [PATCH 1494/2157] tty: serdev: drop serdev_controller_ops::write_room() In particular, serdev_device_write_room() is not called, so the whole serdev's write_room() can go. Signed-off-by: Jiri Slaby (SUSE) Cc: Rob Herring Link: https://lore.kernel.org/r/20250317070046.24386-17-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 11 ----------- drivers/tty/serdev/serdev-ttyport.c | 9 --------- include/linux/serdev.h | 6 ------ 3 files changed, 26 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index ebf0bbc2cff2..eb2a2e58fe78 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -316,17 +316,6 @@ void serdev_device_write_flush(struct serdev_device *serdev) } EXPORT_SYMBOL_GPL(serdev_device_write_flush); -int serdev_device_write_room(struct serdev_device *serdev) -{ - struct serdev_controller *ctrl = serdev->ctrl; - - if (!ctrl || !ctrl->ops->write_room) - return 0; - - return serdev->ctrl->ops->write_room(ctrl); -} -EXPORT_SYMBOL_GPL(serdev_device_write_room); - unsigned int serdev_device_set_baudrate(struct serdev_device *serdev, unsigned int speed) { struct serdev_controller *ctrl = serdev->ctrl; diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 3d7ae7fa5018..bab1b143b8a6 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -92,14 +92,6 @@ static void ttyport_write_flush(struct serdev_controller *ctrl) tty_driver_flush_buffer(tty); } -static int ttyport_write_room(struct serdev_controller *ctrl) -{ - struct serport *serport = serdev_controller_get_drvdata(ctrl); - struct tty_struct *tty = serport->tty; - - return tty_write_room(tty); -} - static int ttyport_open(struct serdev_controller *ctrl) { struct serport *serport = serdev_controller_get_drvdata(ctrl); @@ -259,7 +251,6 @@ static int ttyport_break_ctl(struct serdev_controller *ctrl, unsigned int break_ static const struct serdev_controller_ops ctrl_ops = { .write_buf = ttyport_write_buf, .write_flush = ttyport_write_flush, - .write_room = ttyport_write_room, .open = ttyport_open, .close = ttyport_close, .set_flow_control = ttyport_set_flow_control, diff --git a/include/linux/serdev.h b/include/linux/serdev.h index ff78efc1f60d..34562eb99931 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -84,7 +84,6 @@ enum serdev_parity { struct serdev_controller_ops { ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); - int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); @@ -212,7 +211,6 @@ int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); -int serdev_device_write_room(struct serdev_device *); /* * serdev device driver functions @@ -273,10 +271,6 @@ static inline ssize_t serdev_device_write(struct serdev_device *sdev, return -ENODEV; } static inline void serdev_device_write_flush(struct serdev_device *sdev) {} -static inline int serdev_device_write_room(struct serdev_device *sdev) -{ - return 0; -} #define serdev_device_driver_register(x) #define serdev_device_driver_unregister(x) From e10865aa8ebca5fe1748f83dc8bdb2aa4e63828b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:32 +0100 Subject: [PATCH 1495/2157] tty: mmc: sdio: use bool for cts and remove parentheses 'cts' in sdio_uart_check_modem_status() is considered a 'bool', but typed as signed 'int'. Make it 'bool' so it is clear the code does not care about the masked value, but true/false. Signed-off-by: Jiri Slaby (SUSE) Cc: Ulf Hansson Cc: linux-mmc@vger.kernel.org Link: https://lore.kernel.org/r/20250317070046.24386-18-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sdio_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio_uart.c b/drivers/mmc/core/sdio_uart.c index 6b7471dba3bf..7423a601e1e5 100644 --- a/drivers/mmc/core/sdio_uart.c +++ b/drivers/mmc/core/sdio_uart.c @@ -471,7 +471,7 @@ static void sdio_uart_check_modem_status(struct sdio_uart_port *port) port->icount.cts++; tty = tty_port_tty_get(&port->port); if (tty && C_CRTSCTS(tty)) { - int cts = (status & UART_MSR_CTS); + bool cts = status & UART_MSR_CTS; if (tty->hw_stopped) { if (cts) { tty->hw_stopped = false; From 0fdbeabf218e51b4714e33430f128fe3ffbecea3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:33 +0100 Subject: [PATCH 1496/2157] tty: moxa: drop version dump to logs The arbitrary MOXA_VERSION is dumped to the logs when the driver is loaded. Avoid this as a driver should be silent unless something breaks. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-19-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/moxa.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index ebaada8db929..2b75ca12cbc9 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -347,8 +347,6 @@ #define MX_PARMARK 0xA0 #define MX_PARSPACE 0x20 -#define MOXA_VERSION "6.0k" - #define MOXA_FW_HDRLEN 32 #define MOXAMAJOR 172 @@ -1327,9 +1325,6 @@ static int __init moxa_init(void) struct moxa_board_conf *brd = moxa_boards; unsigned int i; - printk(KERN_INFO "MOXA Intellio family driver version %s\n", - MOXA_VERSION); - tty_port_init(&moxa_service_port); moxaDriver = tty_alloc_driver(MAX_PORTS + 1, From eed0d311767e1c25703032b6d26568a9b2428a18 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:34 +0100 Subject: [PATCH 1497/2157] tty: moxa: drop ISA support I doubt anyone actually uses this driver (unlike mxser.c and serial moxa driven devices). Even less there is anyone with a moxa ISA card. The newer mxser dropped the support for ISA in 2021. Let this moxa follow now. Good diet. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-20-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/Kconfig | 2 +- drivers/tty/moxa.c | 100 +++----------------------------------------- 2 files changed, 6 insertions(+), 96 deletions(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 63a494d36a1f..0f3f55372c11 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -210,7 +210,7 @@ config SERIAL_NONSTANDARD config MOXA_INTELLIO tristate "Moxa Intellio support" - depends on SERIAL_NONSTANDARD && (ISA || EISA || PCI) + depends on SERIAL_NONSTANDARD && PCI select FW_LOADER help Say Y here if you have a Moxa Intellio multiport serial card. diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index 2b75ca12cbc9..a753afcb53b5 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -355,33 +355,21 @@ #define MAX_PORTS_PER_BOARD 32 /* Don't change this value */ #define MAX_PORTS (MAX_BOARDS * MAX_PORTS_PER_BOARD) -#define MOXA_IS_320(brd) ((brd)->boardType == MOXA_BOARD_C320_ISA || \ - (brd)->boardType == MOXA_BOARD_C320_PCI) - -/* - * Define the Moxa PCI vendor and device IDs. - */ -#define MOXA_BUS_TYPE_ISA 0 -#define MOXA_BUS_TYPE_PCI 1 +#define MOXA_IS_320(brd) ((brd)->boardType == MOXA_BOARD_C320_PCI) enum { MOXA_BOARD_C218_PCI = 1, - MOXA_BOARD_C218_ISA, MOXA_BOARD_C320_PCI, - MOXA_BOARD_C320_ISA, MOXA_BOARD_CP204J, }; static char *moxa_brdname[] = { "C218 Turbo PCI series", - "C218 Turbo ISA series", "C320 Turbo PCI series", - "C320 Turbo ISA series", "CP-204J series", }; -#ifdef CONFIG_PCI static const struct pci_device_id moxa_pcibrds[] = { { PCI_DEVICE(PCI_VENDOR_ID_MOXA, PCI_DEVICE_ID_MOXA_C218), .driver_data = MOXA_BOARD_C218_PCI }, @@ -392,14 +380,12 @@ static const struct pci_device_id moxa_pcibrds[] = { { 0 } }; MODULE_DEVICE_TABLE(pci, moxa_pcibrds); -#endif /* CONFIG_PCI */ struct moxa_port; static struct moxa_board_conf { int boardType; int numPorts; - int busType; unsigned int ready; @@ -459,9 +445,6 @@ static unsigned int moxaLowWaterChk; static DEFINE_MUTEX(moxa_openlock); static DEFINE_SPINLOCK(moxa_lock); -static unsigned long baseaddr[MAX_BOARDS]; -static unsigned int type[MAX_BOARDS]; -static unsigned int numports[MAX_BOARDS]; static struct tty_port moxa_service_port; MODULE_AUTHOR("William Chen"); @@ -471,13 +454,6 @@ MODULE_FIRMWARE("c218tunx.cod"); MODULE_FIRMWARE("cp204unx.cod"); MODULE_FIRMWARE("c320tunx.cod"); -module_param_array(type, uint, NULL, 0); -MODULE_PARM_DESC(type, "card type: C218=2, C320=4"); -module_param_hw_array(baseaddr, ulong, ioport, NULL, 0); -MODULE_PARM_DESC(baseaddr, "base address"); -module_param_array(numports, uint, NULL, 0); -MODULE_PARM_DESC(numports, "numports (ignored for C218)"); - module_param(ttymajor, int, 0); /* @@ -723,7 +699,6 @@ static DEFINE_TIMER(moxaTimer, moxa_poll); static int moxa_check_fw_model(struct moxa_board_conf *brd, u8 model) { switch (brd->boardType) { - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: if (model != 1) goto err; @@ -767,7 +742,6 @@ static int moxa_load_bios(struct moxa_board_conf *brd, const u8 *buf, msleep(2000); switch (brd->boardType) { - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: tmp = readw(baseAddr + C218_key); if (tmp != C218_KeyCode) @@ -831,7 +805,6 @@ static int moxa_real_load_code(struct moxa_board_conf *brd, const void *ptr, switch (brd->boardType) { case MOXA_BOARD_CP204J: - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: key = C218_key; loadbuf = C218_LoadBuf; @@ -896,15 +869,9 @@ static int moxa_real_load_code(struct moxa_board_conf *brd, const void *ptr, return -EIO; if (MOXA_IS_320(brd)) { - if (brd->busType == MOXA_BUS_TYPE_PCI) { /* ASIC board */ - writew(0x3800, baseAddr + TMS320_PORT1); - writew(0x3900, baseAddr + TMS320_PORT2); - writew(28499, baseAddr + TMS320_CLOCK); - } else { - writew(0x3200, baseAddr + TMS320_PORT1); - writew(0x3400, baseAddr + TMS320_PORT2); - writew(19999, baseAddr + TMS320_CLOCK); - } + writew(0x3800, baseAddr + TMS320_PORT1); + writew(0x3900, baseAddr + TMS320_PORT2); + writew(28499, baseAddr + TMS320_CLOCK); } writew(1, baseAddr + Disable_IRQ); writew(0, baseAddr + Magic_no); @@ -955,7 +922,6 @@ static int moxa_load_code(struct moxa_board_conf *brd, const void *ptr, return retval; switch (brd->boardType) { - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: case MOXA_BOARD_CP204J: port = brd->ports; @@ -1139,7 +1105,6 @@ static int moxa_init_board(struct moxa_board_conf *brd, struct device *dev) } switch (brd->boardType) { - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: file = "c218tunx.cod"; break; @@ -1225,7 +1190,6 @@ static void moxa_board_deinit(struct moxa_board_conf *brd) kfree(brd->ports); } -#ifdef CONFIG_PCI static int moxa_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1268,7 +1232,6 @@ static int moxa_pci_probe(struct pci_dev *pdev, board->boardType = board_type; switch (board_type) { - case MOXA_BOARD_C218_ISA: case MOXA_BOARD_C218_PCI: board->numPorts = 8; break; @@ -1280,7 +1243,6 @@ static int moxa_pci_probe(struct pci_dev *pdev, board->numPorts = 0; break; } - board->busType = MOXA_BUS_TYPE_PCI; retval = moxa_init_board(board, &pdev->dev); if (retval) @@ -1316,14 +1278,10 @@ static struct pci_driver moxa_pci_driver = { .probe = moxa_pci_probe, .remove = moxa_pci_remove }; -#endif /* CONFIG_PCI */ static int __init moxa_init(void) { - unsigned int isabrds = 0; int retval = 0; - struct moxa_board_conf *brd = moxa_boards; - unsigned int i; tty_port_init(&moxa_service_port); @@ -1352,64 +1310,16 @@ static int __init moxa_init(void) return -1; } - /* Find the boards defined from module args. */ - - for (i = 0; i < MAX_BOARDS; i++) { - if (!baseaddr[i]) - break; - if (type[i] == MOXA_BOARD_C218_ISA || - type[i] == MOXA_BOARD_C320_ISA) { - pr_debug("Moxa board %2d: %s board(baseAddr=%lx)\n", - isabrds + 1, moxa_brdname[type[i] - 1], - baseaddr[i]); - brd->boardType = type[i]; - brd->numPorts = type[i] == MOXA_BOARD_C218_ISA ? 8 : - numports[i]; - brd->busType = MOXA_BUS_TYPE_ISA; - brd->basemem = ioremap(baseaddr[i], 0x4000); - if (!brd->basemem) { - printk(KERN_ERR "MOXA: can't remap %lx\n", - baseaddr[i]); - continue; - } - if (moxa_init_board(brd, NULL)) { - iounmap(brd->basemem); - brd->basemem = NULL; - continue; - } - - printk(KERN_INFO "MOXA isa board found at 0x%.8lx and " - "ready (%u ports, firmware loaded)\n", - baseaddr[i], brd->numPorts); - - brd++; - isabrds++; - } - } - -#ifdef CONFIG_PCI retval = pci_register_driver(&moxa_pci_driver); - if (retval) { + if (retval) printk(KERN_ERR "Can't register MOXA pci driver!\n"); - if (isabrds) - retval = 0; - } -#endif return retval; } static void __exit moxa_exit(void) { - unsigned int i; - -#ifdef CONFIG_PCI pci_unregister_driver(&moxa_pci_driver); -#endif - - for (i = 0; i < MAX_BOARDS; i++) /* ISA boards */ - if (moxa_boards[i].ready) - moxa_board_deinit(&moxa_boards[i]); del_timer_sync(&moxaTimer); From 794d7b2721016b81f01838b0ff6eeb18bcfe6fcd Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:35 +0100 Subject: [PATCH 1498/2157] tty: moxa: carve out special ioctls and extra tty_port These ioctls are undocumented and not exposed -- they are defined locally. Given they need a special tty_port just for them, this is very ugly. So drop this whole functionality. It is barely used for something real. (And if it is, we'd need a common functionality to all drivers.) Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-21-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/moxa.c | 146 +-------------------------------------------- 1 file changed, 1 insertion(+), 145 deletions(-) diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index a753afcb53b5..1348e2214b81 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -43,15 +43,6 @@ #include #include -#include - -#define MOXA 0x400 -#define MOXA_GET_IQUEUE (MOXA + 1) /* get input buffered count */ -#define MOXA_GET_OQUEUE (MOXA + 2) /* get output buffered count */ -#define MOXA_GETDATACOUNT (MOXA + 23) -#define MOXA_GET_IOQUEUE (MOXA + 27) -#define MOXA_FLUSH_QUEUE (MOXA + 28) -#define MOXA_GETMSTATUS (MOXA + 65) /* * System Configuration @@ -397,19 +388,6 @@ static struct moxa_board_conf { void __iomem *intTable; } moxa_boards[MAX_BOARDS]; -struct mxser_mstatus { - tcflag_t cflag; - int cts; - int dsr; - int ri; - int dcd; -}; - -struct moxaq_str { - int inq; - int outq; -}; - struct moxa_port { struct tty_port port; struct moxa_board_conf *board; @@ -424,12 +402,6 @@ struct moxa_port { u8 lowChkFlag; }; -struct mon_str { - int tick; - int rxcnt[MAX_PORTS]; - int txcnt[MAX_PORTS]; -}; - /* statusflags */ #define TXSTOPPED 1 #define LOWWAIT 2 @@ -439,14 +411,11 @@ struct mon_str { #define WAKEUP_CHARS 256 static int ttymajor = MOXAMAJOR; -static struct mon_str moxaLog; static unsigned int moxaFuncTout = HZ / 2; static unsigned int moxaLowWaterChk; static DEFINE_MUTEX(moxa_openlock); static DEFINE_SPINLOCK(moxa_lock); -static struct tty_port moxa_service_port; - MODULE_AUTHOR("William Chen"); MODULE_DESCRIPTION("MOXA Intellio Family Multiport Board Device Driver"); MODULE_LICENSE("GPL"); @@ -557,104 +526,6 @@ static void moxa_low_water_check(void __iomem *ofsAddr) * TTY operations */ -static int moxa_ioctl(struct tty_struct *tty, - unsigned int cmd, unsigned long arg) -{ - struct moxa_port *ch = tty->driver_data; - void __user *argp = (void __user *)arg; - int status, ret = 0; - - if (tty->index == MAX_PORTS) { - if (cmd != MOXA_GETDATACOUNT && cmd != MOXA_GET_IOQUEUE && - cmd != MOXA_GETMSTATUS) - return -EINVAL; - } else if (!ch) - return -ENODEV; - - switch (cmd) { - case MOXA_GETDATACOUNT: - moxaLog.tick = jiffies; - if (copy_to_user(argp, &moxaLog, sizeof(moxaLog))) - ret = -EFAULT; - break; - case MOXA_FLUSH_QUEUE: - MoxaPortFlushData(ch, arg); - break; - case MOXA_GET_IOQUEUE: { - struct moxaq_str __user *argm = argp; - struct moxaq_str tmp; - struct moxa_port *p; - unsigned int i, j; - - for (i = 0; i < MAX_BOARDS; i++) { - p = moxa_boards[i].ports; - for (j = 0; j < MAX_PORTS_PER_BOARD; j++, p++, argm++) { - memset(&tmp, 0, sizeof(tmp)); - spin_lock_bh(&moxa_lock); - if (moxa_boards[i].ready) { - tmp.inq = MoxaPortRxQueue(p); - tmp.outq = MoxaPortTxQueue(p); - } - spin_unlock_bh(&moxa_lock); - if (copy_to_user(argm, &tmp, sizeof(tmp))) - return -EFAULT; - } - } - break; - } case MOXA_GET_OQUEUE: - status = MoxaPortTxQueue(ch); - ret = put_user(status, (unsigned long __user *)argp); - break; - case MOXA_GET_IQUEUE: - status = MoxaPortRxQueue(ch); - ret = put_user(status, (unsigned long __user *)argp); - break; - case MOXA_GETMSTATUS: { - struct mxser_mstatus __user *argm = argp; - struct mxser_mstatus tmp; - struct moxa_port *p; - unsigned int i, j; - - for (i = 0; i < MAX_BOARDS; i++) { - p = moxa_boards[i].ports; - for (j = 0; j < MAX_PORTS_PER_BOARD; j++, p++, argm++) { - struct tty_struct *ttyp; - memset(&tmp, 0, sizeof(tmp)); - spin_lock_bh(&moxa_lock); - if (!moxa_boards[i].ready) { - spin_unlock_bh(&moxa_lock); - goto copy; - } - - status = MoxaPortLineStatus(p); - spin_unlock_bh(&moxa_lock); - - if (status & 1) - tmp.cts = 1; - if (status & 2) - tmp.dsr = 1; - if (status & 4) - tmp.dcd = 1; - - ttyp = tty_port_tty_get(&p->port); - if (!ttyp) - tmp.cflag = p->cflag; - else - tmp.cflag = ttyp->termios.c_cflag; - tty_kref_put(ttyp); -copy: - if (copy_to_user(argm, &tmp, sizeof(tmp))) - return -EFAULT; - } - } - break; - } - default: - ret = -ENOIOCTLCMD; - } - return ret; -} - static int moxa_break_ctl(struct tty_struct *tty, int state) { struct moxa_port *port = tty->driver_data; @@ -671,7 +542,6 @@ static const struct tty_operations moxa_ops = { .write_room = moxa_write_room, .flush_buffer = moxa_flush_buffer, .chars_in_buffer = moxa_chars_in_buffer, - .ioctl = moxa_ioctl, .set_termios = moxa_set_termios, .stop = moxa_stop, .start = moxa_start, @@ -1283,9 +1153,7 @@ static int __init moxa_init(void) { int retval = 0; - tty_port_init(&moxa_service_port); - - moxaDriver = tty_alloc_driver(MAX_PORTS + 1, + moxaDriver = tty_alloc_driver(MAX_PORTS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(moxaDriver)) @@ -1301,8 +1169,6 @@ static int __init moxa_init(void) moxaDriver->init_termios.c_ispeed = 9600; moxaDriver->init_termios.c_ospeed = 9600; tty_set_operations(moxaDriver, &moxa_ops); - /* Having one more port only for ioctls is ugly */ - tty_port_link_device(&moxa_service_port, moxaDriver, MAX_PORTS); if (tty_register_driver(moxaDriver)) { printk(KERN_ERR "can't register MOXA Smartio tty driver!\n"); @@ -1362,9 +1228,6 @@ static int moxa_open(struct tty_struct *tty, struct file *filp) int port; port = tty->index; - if (port == MAX_PORTS) { - return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - } if (mutex_lock_interruptible(&moxa_openlock)) return -ERESTARTSYS; brd = &moxa_boards[port / MAX_PORTS_PER_BOARD]; @@ -2087,7 +1950,6 @@ static ssize_t MoxaPortWriteData(struct tty_struct *tty, const u8 *buffer, c = (head > tail) ? (head - tail - 1) : (head - tail + tx_mask); if (c > len) c = len; - moxaLog.txcnt[port->port.tty->index] += c; total = c; if (spage == epage) { bufhead = readw(ofsAddr + Ofs_txb); @@ -2129,7 +1991,6 @@ static ssize_t MoxaPortWriteData(struct tty_struct *tty, const u8 *buffer, static int MoxaPortReadData(struct moxa_port *port) { - struct tty_struct *tty = port->port.tty; void __iomem *baseAddr, *ofsAddr, *ofs; u8 *dst; unsigned int count, len, total; @@ -2148,7 +2009,6 @@ static int MoxaPortReadData(struct moxa_port *port) return 0; total = count; - moxaLog.rxcnt[tty->index] += total; if (spage == epage) { bufhead = readw(ofsAddr + Ofs_rxb); writew(spage, baseAddr + Control_reg); @@ -2236,8 +2096,6 @@ static int moxa_get_serial_info(struct tty_struct *tty, { struct moxa_port *info = tty->driver_data; - if (tty->index == MAX_PORTS) - return -EINVAL; if (!info) return -ENODEV; mutex_lock(&info->port.mutex); @@ -2257,8 +2115,6 @@ static int moxa_set_serial_info(struct tty_struct *tty, struct moxa_port *info = tty->driver_data; unsigned int close_delay; - if (tty->index == MAX_PORTS) - return -EINVAL; if (!info) return -ENODEV; From 528f31191ebaa00d4a99b293eef1d16f604a0bd0 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:36 +0100 Subject: [PATCH 1499/2157] tty: srmcons: fix retval from srmcons_init() The value returned from srmcons_init() was -ENODEV for over 2 decades. But it does not matter, given device_initcall() ignores retvals. But to be honest, return 0 in case the tty driver was registered properly. To do that, the condition is inverted and a short path taken in case of error. err_free_drv is introduced as it will be used from more places later. Signed-off-by: Jiri Slaby (SUSE) Tested-by: Magnus Lindholm Cc: Richard Henderson Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Link: https://lore.kernel.org/r/20250317070046.24386-22-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/srmcons.c | 56 ++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 3e61073f4b30..b9cd364e814e 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -196,40 +196,44 @@ static const struct tty_operations srmcons_ops = { static int __init srmcons_init(void) { + struct tty_driver *driver; + int err; + timer_setup(&srmcons_singleton.timer, srmcons_receive_chars, 0); - if (srm_is_registered_console) { - struct tty_driver *driver; - int err; - driver = tty_alloc_driver(MAX_SRM_CONSOLE_DEVICES, 0); - if (IS_ERR(driver)) - return PTR_ERR(driver); + if (!srm_is_registered_console) + return -ENODEV; - tty_port_init(&srmcons_singleton.port); + driver = tty_alloc_driver(MAX_SRM_CONSOLE_DEVICES, 0); + if (IS_ERR(driver)) + return PTR_ERR(driver); - driver->driver_name = "srm"; - driver->name = "srm"; - driver->major = 0; /* dynamic */ - driver->minor_start = 0; - driver->type = TTY_DRIVER_TYPE_SYSTEM; - driver->subtype = SYSTEM_TYPE_SYSCONS; - driver->init_termios = tty_std_termios; - tty_set_operations(driver, &srmcons_ops); - tty_port_link_device(&srmcons_singleton.port, driver, 0); - err = tty_register_driver(driver); - if (err) { - tty_driver_kref_put(driver); - tty_port_destroy(&srmcons_singleton.port); - return err; - } - srmcons_driver = driver; - } + tty_port_init(&srmcons_singleton.port); - return -ENODEV; + driver->driver_name = "srm"; + driver->name = "srm"; + driver->major = 0; /* dynamic */ + driver->minor_start = 0; + driver->type = TTY_DRIVER_TYPE_SYSTEM; + driver->subtype = SYSTEM_TYPE_SYSCONS; + driver->init_termios = tty_std_termios; + tty_set_operations(driver, &srmcons_ops); + tty_port_link_device(&srmcons_singleton.port, driver, 0); + err = tty_register_driver(driver); + if (err) + goto err_free_drv; + + srmcons_driver = driver; + + return 0; +err_free_drv: + tty_driver_kref_put(driver); + tty_port_destroy(&srmcons_singleton.port); + + return err; } device_initcall(srmcons_init); - /* * The console driver */ From 49ddb69cf6f1790168ba5dd42a79c8ca65ebf3d8 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:37 +0100 Subject: [PATCH 1500/2157] tty: staging/greybus: pass tty_driver flags to tty_alloc_driver() tty_alloc_driver() is supposed to receive tty driver flags. Signed-off-by: Jiri Slaby (SUSE) Acked-by: Johan Hovold Cc: David Lin Cc: Alex Elder Cc: greybus-dev@lists.linaro.org Cc: linux-staging@lists.linux.dev Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/20250317070046.24386-23-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/uart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c index 8eab94cb06fa..308ed1ca9947 100644 --- a/drivers/staging/greybus/uart.c +++ b/drivers/staging/greybus/uart.c @@ -948,7 +948,8 @@ static int gb_tty_init(void) { int retval = 0; - gb_tty_driver = tty_alloc_driver(GB_NUM_MINORS, 0); + gb_tty_driver = tty_alloc_driver(GB_NUM_MINORS, TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(gb_tty_driver)) { pr_err("Can not allocate tty driver\n"); retval = -ENOMEM; @@ -961,7 +962,6 @@ static int gb_tty_init(void) gb_tty_driver->minor_start = 0; gb_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; gb_tty_driver->subtype = SERIAL_TYPE_NORMAL; - gb_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; gb_tty_driver->init_termios = tty_std_termios; gb_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; From 53edbd412852cff47a5e32ad310c792a23bcd4c3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:38 +0100 Subject: [PATCH 1501/2157] tty: sunsu: drop serial_{in,out}p() They are simple wrappers around serial_{in/out}() without actually pausing the execution. Since ever. So drop these useless wrappers. Signed-off-by: Jiri Slaby (SUSE) Cc: David S. Miller Cc: sparclinux@vger.kernel.org Link: https://lore.kernel.org/r/20250317070046.24386-24-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunsu.c | 164 +++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 87 deletions(-) diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index 7f0fef07e141..2dc68b3201a4 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -150,16 +150,6 @@ static void serial_out(struct uart_sunsu_port *up, int offset, int value) } } -/* - * We used to support using pause I/O for certain machines. We - * haven't supported this for a while, but just in case it's badly - * needed for certain old 386 machines, I've left these #define's - * in.... - */ -#define serial_inp(up, offset) serial_in(up, offset) -#define serial_outp(up, offset, value) serial_out(up, offset, value) - - /* * For the 16C950 */ @@ -193,12 +183,12 @@ static int __enable_rsa(struct uart_sunsu_port *up) unsigned char mode; int result; - mode = serial_inp(up, UART_RSA_MSR); + mode = serial_in(up, UART_RSA_MSR); result = mode & UART_RSA_MSR_FIFO; if (!result) { - serial_outp(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO); - mode = serial_inp(up, UART_RSA_MSR); + serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO); + mode = serial_in(up, UART_RSA_MSR); result = mode & UART_RSA_MSR_FIFO; } @@ -217,7 +207,7 @@ static void enable_rsa(struct uart_sunsu_port *up) uart_port_unlock_irq(&up->port); } if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) - serial_outp(up, UART_RSA_FRR, 0); + serial_out(up, UART_RSA_FRR, 0); } } @@ -236,12 +226,12 @@ static void disable_rsa(struct uart_sunsu_port *up) up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { uart_port_lock_irq(&up->port); - mode = serial_inp(up, UART_RSA_MSR); + mode = serial_in(up, UART_RSA_MSR); result = !(mode & UART_RSA_MSR_FIFO); if (!result) { - serial_outp(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO); - mode = serial_inp(up, UART_RSA_MSR); + serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO); + mode = serial_in(up, UART_RSA_MSR); result = !(mode & UART_RSA_MSR_FIFO); } @@ -326,7 +316,7 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status) int saw_console_brk = 0; do { - ch = serial_inp(up, UART_RX); + ch = serial_in(up, UART_RX); flag = TTY_NORMAL; up->port.icount.rx++; @@ -387,7 +377,7 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status) */ tty_insert_flip_char(port, 0, TTY_OVERRUN); ignore_char: - *status = serial_inp(up, UART_LSR); + *status = serial_in(up, UART_LSR); } while ((*status & UART_LSR_DR) && (max_count-- > 0)); if (saw_console_brk) @@ -401,7 +391,7 @@ static void transmit_chars(struct uart_sunsu_port *up) int count; if (up->port.x_char) { - serial_outp(up, UART_TX, up->port.x_char); + serial_out(up, UART_TX, up->port.x_char); up->port.icount.tx++; up->port.x_char = 0; return; @@ -460,7 +450,7 @@ static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id) uart_port_lock_irqsave(&up->port, &flags); do { - status = serial_inp(up, UART_LSR); + status = serial_in(up, UART_LSR); if (status & UART_LSR_DR) receive_chars(up, &status); check_modem_status(up); @@ -498,7 +488,7 @@ static void sunsu_change_mouse_baud(struct uart_sunsu_port *up) static void receive_kbd_ms_chars(struct uart_sunsu_port *up, int is_break) { do { - unsigned char ch = serial_inp(up, UART_RX); + unsigned char ch = serial_in(up, UART_RX); /* Stop-A is handled by drivers/char/keyboard.c now. */ if (up->su_type == SU_PORT_KBD) { @@ -530,7 +520,7 @@ static irqreturn_t sunsu_kbd_ms_interrupt(int irq, void *dev_id) struct uart_sunsu_port *up = dev_id; if (!(serial_in(up, UART_IIR) & UART_IIR_NO_INT)) { - unsigned char status = serial_inp(up, UART_LSR); + unsigned char status = serial_in(up, UART_LSR); if ((status & UART_LSR_DR) || (status & UART_LSR_BI)) receive_kbd_ms_chars(up, (status & UART_LSR_BI) != 0); @@ -619,14 +609,14 @@ static int sunsu_startup(struct uart_port *port) if (up->port.type == PORT_16C950) { /* Wake up and initialize UART */ up->acr = 0; - serial_outp(up, UART_LCR, 0xBF); - serial_outp(up, UART_EFR, UART_EFR_ECB); - serial_outp(up, UART_IER, 0); - serial_outp(up, UART_LCR, 0); + serial_out(up, UART_LCR, 0xBF); + serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, UART_IER, 0); + serial_out(up, UART_LCR, 0); serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ - serial_outp(up, UART_LCR, 0xBF); - serial_outp(up, UART_EFR, UART_EFR_ECB); - serial_outp(up, UART_LCR, 0); + serial_out(up, UART_LCR, 0xBF); + serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, UART_LCR, 0); } #ifdef CONFIG_SERIAL_8250_RSA @@ -642,19 +632,19 @@ static int sunsu_startup(struct uart_port *port) * (they will be reenabled in set_termios()) */ if (uart_config[up->port.type].flags & UART_CLEAR_FIFO) { - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_outp(up, UART_FCR, 0); + serial_out(up, UART_FCR, 0); } /* * Clear the interrupt registers. */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); + (void) serial_in(up, UART_LSR); + (void) serial_in(up, UART_RX); + (void) serial_in(up, UART_IIR); + (void) serial_in(up, UART_MSR); /* * At this point, there's no way the LSR could still be 0xff; @@ -662,7 +652,7 @@ static int sunsu_startup(struct uart_port *port) * here. */ if (!(up->port.flags & UPF_BUGGY_UART) && - (serial_inp(up, UART_LSR) == 0xff)) { + (serial_in(up, UART_LSR) == 0xff)) { printk("ttyS%d: LSR safety check engaged!\n", up->port.line); return -ENODEV; } @@ -682,7 +672,7 @@ static int sunsu_startup(struct uart_port *port) /* * Now, initialize the UART */ - serial_outp(up, UART_LCR, UART_LCR_WLEN8); + serial_out(up, UART_LCR, UART_LCR_WLEN8); uart_port_lock_irqsave(&up->port, &flags); @@ -697,7 +687,7 @@ static int sunsu_startup(struct uart_port *port) * anyway, so we don't enable them here. */ up->ier = UART_IER_RLSI | UART_IER_RDI; - serial_outp(up, UART_IER, up->ier); + serial_out(up, UART_IER, up->ier); if (up->port.flags & UPF_FOURPORT) { unsigned int icp; @@ -712,10 +702,10 @@ static int sunsu_startup(struct uart_port *port) /* * And clear the interrupt registers again for luck. */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); + (void) serial_in(up, UART_LSR); + (void) serial_in(up, UART_RX); + (void) serial_in(up, UART_IIR); + (void) serial_in(up, UART_MSR); return 0; } @@ -730,7 +720,7 @@ static void sunsu_shutdown(struct uart_port *port) * Disable interrupts from this port */ up->ier = 0; - serial_outp(up, UART_IER, 0); + serial_out(up, UART_IER, 0); uart_port_lock_irqsave(&up->port, &flags); if (up->port.flags & UPF_FOURPORT) { @@ -746,11 +736,11 @@ static void sunsu_shutdown(struct uart_port *port) /* * Disable break condition and FIFOs */ - serial_out(up, UART_LCR, serial_inp(up, UART_LCR) & ~UART_LCR_SBC); - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | + serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_outp(up, UART_FCR, 0); + serial_out(up, UART_FCR, 0); #ifdef CONFIG_SERIAL_8250_RSA /* @@ -872,22 +862,22 @@ sunsu_change_speed(struct uart_port *port, unsigned int cflag, serial_out(up, UART_IER, up->ier); if (uart_config[up->port.type].flags & UART_STARTECH) { - serial_outp(up, UART_LCR, 0xBF); - serial_outp(up, UART_EFR, cflag & CRTSCTS ? UART_EFR_CTS :0); + serial_out(up, UART_LCR, 0xBF); + serial_out(up, UART_EFR, cflag & CRTSCTS ? UART_EFR_CTS :0); } - serial_outp(up, UART_LCR, cval | UART_LCR_DLAB);/* set DLAB */ - serial_outp(up, UART_DLL, quot & 0xff); /* LS of divisor */ - serial_outp(up, UART_DLM, quot >> 8); /* MS of divisor */ + serial_out(up, UART_LCR, cval | UART_LCR_DLAB);/* set DLAB */ + serial_out(up, UART_DLL, quot & 0xff); /* LS of divisor */ + serial_out(up, UART_DLM, quot >> 8); /* MS of divisor */ if (up->port.type == PORT_16750) - serial_outp(up, UART_FCR, fcr); /* set fcr */ - serial_outp(up, UART_LCR, cval); /* reset DLAB */ + serial_out(up, UART_FCR, fcr); /* set fcr */ + serial_out(up, UART_LCR, cval); /* reset DLAB */ up->lcr = cval; /* Save LCR */ if (up->port.type != PORT_16750) { if (fcr & UART_FCR_ENABLE_FIFO) { /* emulated UARTs (Lucent Venus 167x) need two steps */ - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); } - serial_outp(up, UART_FCR, fcr); /* set fcr */ + serial_out(up, UART_FCR, fcr); /* set fcr */ } up->cflag = cflag; @@ -1051,18 +1041,18 @@ static void sunsu_autoconfig(struct uart_sunsu_port *up) * 0x80 is a non-existent port; which should be safe since * include/asm/io.h also makes this assumption. */ - scratch = serial_inp(up, UART_IER); - serial_outp(up, UART_IER, 0); + scratch = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0); #ifdef __i386__ outb(0xff, 0x080); #endif - scratch2 = serial_inp(up, UART_IER); - serial_outp(up, UART_IER, 0x0f); + scratch2 = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0x0f); #ifdef __i386__ outb(0, 0x080); #endif - scratch3 = serial_inp(up, UART_IER); - serial_outp(up, UART_IER, scratch); + scratch3 = serial_in(up, UART_IER); + serial_out(up, UART_IER, scratch); if (scratch2 != 0 || scratch3 != 0x0F) goto out; /* We failed; there's nothing here */ } @@ -1080,16 +1070,16 @@ static void sunsu_autoconfig(struct uart_sunsu_port *up) * that conflicts with COM 1-4 --- we hope! */ if (!(up->port.flags & UPF_SKIP_TEST)) { - serial_outp(up, UART_MCR, UART_MCR_LOOP | 0x0A); - status1 = serial_inp(up, UART_MSR) & 0xF0; - serial_outp(up, UART_MCR, save_mcr); + serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A); + status1 = serial_in(up, UART_MSR) & 0xF0; + serial_out(up, UART_MCR, save_mcr); if (status1 != 0x90) goto out; /* We failed loopback test */ } - serial_outp(up, UART_LCR, 0xBF); /* set up for StarTech test */ - serial_outp(up, UART_EFR, 0); /* EFR is the same as FCR */ - serial_outp(up, UART_LCR, 0); - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_LCR, 0xBF); /* set up for StarTech test */ + serial_out(up, UART_EFR, 0); /* EFR is the same as FCR */ + serial_out(up, UART_LCR, 0); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); scratch = serial_in(up, UART_IIR) >> 6; switch (scratch) { case 0: @@ -1107,19 +1097,19 @@ static void sunsu_autoconfig(struct uart_sunsu_port *up) } if (up->port.type == PORT_16550A) { /* Check for Startech UART's */ - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_DLAB); if (serial_in(up, UART_EFR) == 0) { up->port.type = PORT_16650; } else { - serial_outp(up, UART_LCR, 0xBF); + serial_out(up, UART_LCR, 0xBF); if (serial_in(up, UART_EFR) == 0) up->port.type = PORT_16650V2; } } if (up->port.type == PORT_16550A) { /* Check for TI 16750 */ - serial_outp(up, UART_LCR, save_lcr | UART_LCR_DLAB); - serial_outp(up, UART_FCR, + serial_out(up, UART_LCR, save_lcr | UART_LCR_DLAB); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); scratch = serial_in(up, UART_IIR) >> 5; if (scratch == 7) { @@ -1129,24 +1119,24 @@ static void sunsu_autoconfig(struct uart_sunsu_port *up) * mode if the UART_FCR7_64BYTE bit was set * while UART_LCR_DLAB was latched. */ - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_outp(up, UART_LCR, 0); - serial_outp(up, UART_FCR, + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_LCR, 0); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); scratch = serial_in(up, UART_IIR) >> 5; if (scratch == 6) up->port.type = PORT_16750; } - serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); } - serial_outp(up, UART_LCR, save_lcr); + serial_out(up, UART_LCR, save_lcr); if (up->port.type == PORT_16450) { scratch = serial_in(up, UART_SCR); - serial_outp(up, UART_SCR, 0xa5); + serial_out(up, UART_SCR, 0xa5); status1 = serial_in(up, UART_SCR); - serial_outp(up, UART_SCR, 0x5a); + serial_out(up, UART_SCR, 0x5a); status2 = serial_in(up, UART_SCR); - serial_outp(up, UART_SCR, scratch); + serial_out(up, UART_SCR, scratch); if ((status1 != 0xa5) || (status2 != 0x5a)) up->port.type = PORT_8250; @@ -1163,15 +1153,15 @@ static void sunsu_autoconfig(struct uart_sunsu_port *up) */ #ifdef CONFIG_SERIAL_8250_RSA if (up->port.type == PORT_RSA) - serial_outp(up, UART_RSA_FRR, 0); + serial_out(up, UART_RSA_FRR, 0); #endif - serial_outp(up, UART_MCR, save_mcr); - serial_outp(up, UART_FCR, (UART_FCR_ENABLE_FIFO | + serial_out(up, UART_MCR, save_mcr); + serial_out(up, UART_FCR, (UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)); - serial_outp(up, UART_FCR, 0); + serial_out(up, UART_FCR, 0); (void)serial_in(up, UART_RX); - serial_outp(up, UART_IER, 0); + serial_out(up, UART_IER, 0); out: uart_port_unlock_irqrestore(&up->port, flags); From d0e8e5b017e6212474b900cc1a3491709762d35c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:39 +0100 Subject: [PATCH 1502/2157] tty: sunsu: remove unused serial_icr_read() It is commented and never used. Signed-off-by: Jiri Slaby (SUSE) Cc: David S. Miller Cc: sparclinux@vger.kernel.org Link: https://lore.kernel.org/r/20250317070046.24386-25-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sunsu.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index 2dc68b3201a4..383141fe7ba0 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -159,20 +159,6 @@ static void serial_icr_write(struct uart_sunsu_port *up, int offset, int value) serial_out(up, UART_ICR, value); } -#if 0 /* Unused currently */ -static unsigned int serial_icr_read(struct uart_sunsu_port *up, int offset) -{ - unsigned int value; - - serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); - serial_out(up, UART_SCR, offset); - value = serial_in(up, UART_ICR); - serial_icr_write(up, UART_ACR, up->acr); - - return value; -} -#endif - #ifdef CONFIG_SERIAL_8250_RSA /* * Attempts to turn on the RSA FIFO. Returns zero on failure. From bfc467db60b76c30ca1f7f02088a219b6d5b6e8c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:40 +0100 Subject: [PATCH 1503/2157] serial: remove redundant tty_port_link_device() The linking is done implicitly by tty_port_register_device_attr_serdev() few lines below. So drop this explicit tty_port_link_device(). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-26-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 92f7e752f862..c26a7cfa3778 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -3156,7 +3156,6 @@ static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *u if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); - tty_port_link_device(port, drv->tty_driver, uport->line); uart_configure_port(drv, state, uport); port->console = uart_console(uport); From 1e657d663f4fa10d07d6e6ebfa76616355b66196 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:41 +0100 Subject: [PATCH 1504/2157] serial: pass struct uart_state to uart_line_info() uart_line_info() wants to work with struct uart_state. Do not pass a driver and an index. Pass the precomputed struct directly. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-27-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c26a7cfa3778..20b2d5c5df6d 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2013,9 +2013,8 @@ static const char *uart_type(struct uart_port *port) #ifdef CONFIG_PROC_FS -static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i) +static void uart_line_info(struct seq_file *m, struct uart_state *state) { - struct uart_state *state = drv->state + i; struct tty_port *port = &state->port; enum uart_pm_state pm_state; struct uart_port *uport; @@ -2100,7 +2099,7 @@ static int uart_proc_show(struct seq_file *m, void *v) seq_printf(m, "serinfo:1.0 driver%s%s revision:%s\n", "", "", ""); for (i = 0; i < drv->nr; i++) - uart_line_info(m, drv, i); + uart_line_info(m, drv->state + i); return 0; } #endif From dbd26a886e94deb7fda9050f9195ccb41f9a5d93 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:42 +0100 Subject: [PATCH 1505/2157] serial: 8250: use serial_port_in/out() helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are serial_port_in/out() helpers to be used instead of direct p->serial_in/out(). Use them in various 8250 drivers. Signed-off-by: Jiri Slaby (SUSE) Cc: "Ilpo Järvinen" Cc: Andy Shevchenko -- [v2] * Use serial_port_in/out() and not serial_in/out() [Andy] Reviewed-by: Andy Shevchenko # 8250_dw Link: https://lore.kernel.org/r/20250317070046.24386-28-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 16 ++++++++-------- drivers/tty/serial/8250/8250_fsl.c | 8 ++++---- drivers/tty/serial/8250/8250_omap.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index af24ec25d976..f068b8d7840a 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -120,12 +120,12 @@ static void dw8250_force_idle(struct uart_port *p) * enabled. */ if (up->fcr & UART_FCR_ENABLE_FIFO) { - lsr = p->serial_in(p, UART_LSR); + lsr = serial_port_in(p, UART_LSR); if (!(lsr & UART_LSR_DR)) return; } - (void)p->serial_in(p, UART_RX); + serial_port_in(p, UART_RX); } static void dw8250_check_lcr(struct uart_port *p, int offset, int value) @@ -139,7 +139,7 @@ static void dw8250_check_lcr(struct uart_port *p, int offset, int value) /* Make sure LCR write wasn't ignored */ while (tries--) { - unsigned int lcr = p->serial_in(p, offset); + unsigned int lcr = serial_port_in(p, offset); if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) return; @@ -260,7 +260,7 @@ static int dw8250_handle_irq(struct uart_port *p) { struct uart_8250_port *up = up_to_u8250p(p); struct dw8250_data *d = to_dw8250_data(p->private_data); - unsigned int iir = p->serial_in(p, UART_IIR); + unsigned int iir = serial_port_in(p, UART_IIR); bool rx_timeout = (iir & 0x3f) == UART_IIR_RX_TIMEOUT; unsigned int quirks = d->pdata->quirks; unsigned int status; @@ -281,7 +281,7 @@ static int dw8250_handle_irq(struct uart_port *p) status = serial_lsr_in(up); if (!(status & (UART_LSR_DR | UART_LSR_BI))) - (void) p->serial_in(p, UART_RX); + serial_port_in(p, UART_RX); uart_port_unlock_irqrestore(p, flags); } @@ -303,7 +303,7 @@ static int dw8250_handle_irq(struct uart_port *p) if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { /* Clear the USR */ - (void)p->serial_in(p, d->pdata->usr_reg); + serial_port_in(p, d->pdata->usr_reg); return 1; } @@ -390,7 +390,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, static void dw8250_set_ldisc(struct uart_port *p, struct ktermios *termios) { struct uart_8250_port *up = up_to_u8250p(p); - unsigned int mcr = p->serial_in(p, UART_MCR); + unsigned int mcr = serial_port_in(p, UART_MCR); if (up->capabilities & UART_CAP_IRDA) { if (termios->c_line == N_IRDA) @@ -398,7 +398,7 @@ static void dw8250_set_ldisc(struct uart_port *p, struct ktermios *termios) else mcr &= ~DW_UART_MCR_SIRE; - p->serial_out(p, UART_MCR, mcr); + serial_port_out(p, UART_MCR, mcr); } serial8250_do_set_ldisc(p, termios); } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 1b7bd55619c6..649ae5c8304d 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -32,7 +32,7 @@ int fsl8250_handle_irq(struct uart_port *port) uart_port_lock_irqsave(&up->port, &flags); - iir = port->serial_in(port, UART_IIR); + iir = serial_port_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { uart_port_unlock_irqrestore(&up->port, flags); return 0; @@ -54,12 +54,12 @@ int fsl8250_handle_irq(struct uart_port *port) if (unlikely((iir & UART_IIR_ID) == UART_IIR_RLSI && (up->lsr_saved_flags & UART_LSR_BI))) { up->lsr_saved_flags &= ~UART_LSR_BI; - port->serial_in(port, UART_RX); + serial_port_in(port, UART_RX); uart_port_unlock_irqrestore(&up->port, flags); return 1; } - lsr = orig_lsr = up->port.serial_in(&up->port, UART_LSR); + lsr = orig_lsr = serial_port_in(port, UART_LSR); /* Process incoming characters first */ if ((lsr & (UART_LSR_DR | UART_LSR_BI)) && @@ -71,7 +71,7 @@ int fsl8250_handle_irq(struct uart_port *port) if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { unsigned long delay; - up->ier = port->serial_in(port, UART_IER); + up->ier = serial_port_in(port, UART_IER); if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { port->ops->stop_rx(port); } else { diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index c2b75e3f106d..2a0ce11f405d 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -692,7 +692,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) /* Synchronize UART_IER access against the console. */ uart_port_lock(port); - up->ier = port->serial_in(port, UART_IER); + up->ier = serial_port_in(port, UART_IER); if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { port->ops->stop_rx(port); } else { From 6b879bc9032b1178d8720494e9daa964bea211a9 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:43 +0100 Subject: [PATCH 1506/2157] serial: 8250_rsa: simplify rsa8250_{request/release}_resource() * Use already defined 'port' for fetching start/offset, and size. * Return from the switch immediately -- so it is clear what is returned and when. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-29-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_rsa.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c index dfaa613e452d..82f2593b4c59 100644 --- a/drivers/tty/serial/8250/8250_rsa.c +++ b/drivers/tty/serial/8250/8250_rsa.c @@ -16,30 +16,27 @@ static unsigned int probe_rsa_count; static int rsa8250_request_resource(struct uart_8250_port *up) { - unsigned long start = UART_RSA_BASE << up->port.regshift; - unsigned int size = 8 << up->port.regshift; struct uart_port *port = &up->port; - int ret = -EINVAL; + unsigned long start = UART_RSA_BASE << port->regshift; + unsigned int size = 8 << port->regshift; switch (port->iotype) { case UPIO_HUB6: case UPIO_PORT: start += port->iobase; - if (request_region(start, size, "serial-rsa")) - ret = 0; - else - ret = -EBUSY; - break; + if (!request_region(start, size, "serial-rsa")) + return -EBUSY; + return 0; + default: + return -EINVAL; } - - return ret; } static void rsa8250_release_resource(struct uart_8250_port *up) { - unsigned long offset = UART_RSA_BASE << up->port.regshift; - unsigned int size = 8 << up->port.regshift; struct uart_port *port = &up->port; + unsigned long offset = UART_RSA_BASE << port->regshift; + unsigned int size = 8 << port->regshift; switch (port->iotype) { case UPIO_HUB6: From dc7d36668f40b4ba98da4197b68002c347e24d76 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:44 +0100 Subject: [PATCH 1507/2157] serial: 8250_port: do not use goto for UPQ_NO_TXEN_TEST code flow This is unnecessary here and makes the code harder to follow. Invert the condition and drop the goto+label. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-30-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 34 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3f256e96c722..6466f60416a9 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2406,28 +2406,26 @@ int serial8250_do_startup(struct uart_port *port) * test if we receive TX irq. This way, we'll never enable * UART_BUG_TXEN. */ - if (up->port.quirks & UPQ_NO_TXEN_TEST) - goto dont_test_tx_en; + if (!(up->port.quirks & UPQ_NO_TXEN_TEST)) { + /* + * Do a quick test to see if we receive an interrupt when we + * enable the TX irq. + */ + serial_port_out(port, UART_IER, UART_IER_THRI); + lsr = serial_port_in(port, UART_LSR); + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); - /* - * Do a quick test to see if we receive an interrupt when we enable - * the TX irq. - */ - serial_port_out(port, UART_IER, UART_IER_THRI); - lsr = serial_port_in(port, UART_LSR); - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - - if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { - if (!(up->bugs & UART_BUG_TXEN)) { - up->bugs |= UART_BUG_TXEN; - dev_dbg(port->dev, "enabling bad tx status workarounds\n"); + if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { + if (!(up->bugs & UART_BUG_TXEN)) { + up->bugs |= UART_BUG_TXEN; + dev_dbg(port->dev, "enabling bad tx status workarounds\n"); + } + } else { + up->bugs &= ~UART_BUG_TXEN; } - } else { - up->bugs &= ~UART_BUG_TXEN; } -dont_test_tx_en: uart_port_unlock_irqrestore(port, flags); /* From 2667bd6673eb49c9c299c1202bab5a3fc04586de Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:45 +0100 Subject: [PATCH 1508/2157] serial: 8250_port: simplify serial8250_request_std_resource() Return immediately from the error locations or switch-case ends. It is therefore easier to see the flow. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-31-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 6466f60416a9..8ac452cea36c 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2966,7 +2966,6 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) { unsigned int size = serial8250_port_size(up); struct uart_port *port = &up->port; - int ret = 0; switch (port->iotype) { case UPIO_AU: @@ -2975,32 +2974,28 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) { - ret = -EINVAL; - break; - } + if (!port->mapbase) + return -EINVAL; - if (!request_mem_region(port->mapbase, size, "serial")) { - ret = -EBUSY; - break; - } + if (!request_mem_region(port->mapbase, size, "serial")) + return -EBUSY; if (port->flags & UPF_IOREMAP) { port->membase = ioremap(port->mapbase, size); if (!port->membase) { release_mem_region(port->mapbase, size); - ret = -ENOMEM; + return -ENOMEM; } } - break; - + return 0; case UPIO_HUB6: case UPIO_PORT: if (!request_region(port->iobase, size, "serial")) - ret = -EBUSY; - break; + return -EBUSY; + return 0; } - return ret; + + return 0; } static void serial8250_release_std_resource(struct uart_8250_port *up) From 067e95857021bb242099d6bd818e1c57a101802b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 17 Mar 2025 08:00:46 +0100 Subject: [PATCH 1509/2157] serial: switch change_irq and change_port to bool in uart_set_info() change_irq and change_port are boolean variables. Mark them as such (instead of uint). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250317070046.24386-32-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 20b2d5c5df6d..04eaa24ed153 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -895,8 +895,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, { struct uart_port *uport = uart_port_check(state); unsigned long new_port; - unsigned int change_irq, change_port, closing_wait; - unsigned int old_custom_divisor, close_delay; + unsigned int old_custom_divisor, close_delay, closing_wait; + bool change_irq, change_port; upf_t old_flags, new_flags; int retval; From bd8cad85561b8de36f099404c332bf3e3dbe90f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 11:40:21 +0200 Subject: [PATCH 1510/2157] serial: 8250_dw: Comment possible corner cases in serial_out() implementation 8250 DesignWare driver uses a few custom implementations of the serial_out(). These implementations are carefully made to avoid infinite loops. But this is not obvious from looking at the code. Comment the possible corner cases in the respective functions. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250317094021.1201512-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index f068b8d7840a..1902f29444a1 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -107,11 +107,23 @@ static inline int dw8250_modify_msr(struct uart_port *p, int offset, int value) return value; } +/* + * This function is being called as part of the uart_port::serial_out() + * routine. Hence, it must not call serial_port_out() or serial_out() + * against the modified registers here, i.e. LCR. + */ static void dw8250_force_idle(struct uart_port *p) { struct uart_8250_port *up = up_to_u8250p(p); unsigned int lsr; + /* + * The following call currently performs serial_out() + * against the FCR register. Because it differs to LCR + * there will be no infinite loop, but if it ever gets + * modified, we might need a new custom version of it + * that avoids infinite recursion. + */ serial8250_clear_and_reinit_fifos(up); /* @@ -128,6 +140,11 @@ static void dw8250_force_idle(struct uart_port *p) serial_port_in(p, UART_RX); } +/* + * This function is being called as part of the uart_port::serial_out() + * routine. Hence, it must not call serial_port_out() or serial_out() + * against the modified registers here, i.e. LCR. + */ static void dw8250_check_lcr(struct uart_port *p, int offset, int value) { struct dw8250_data *d = to_dw8250_data(p->private_data); From 0a3b5a59fddde2351b752792f8c51bb77fb682e4 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Wed, 19 Feb 2025 00:22:43 +0530 Subject: [PATCH 1511/2157] dt-bindings: serial: samsung: add exynos7870-uart compatible Document the compatible string for Exynos7870's UART driver. The devicetree property samsung,uart-fifosize must be mandatory, as the driver enquires about the FIFO sizes. This feature makes it compatible with Exynos8895's UART. Signed-off-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250219-exynos7870-uart-v2-1-c8c67f3a936c@disroot.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/samsung_uart.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml index 070eba9f19d3..83d9986d8e98 100644 --- a/Documentation/devicetree/bindings/serial/samsung_uart.yaml +++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml @@ -42,6 +42,10 @@ properties: - samsung,exynosautov9-uart - samsung,exynosautov920-uart - const: samsung,exynos850-uart + - items: + - enum: + - samsung,exynos7870-uart + - const: samsung,exynos8895-uart reg: maxItems: 1 From be6a23650908e2f827f2e7839a3fbae41ccb5b63 Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Sun, 23 Feb 2025 22:07:38 +0000 Subject: [PATCH 1512/2157] tty: serial: 8250: Add some more device IDs These card IDs got missed the first time around. Cc: stable Signed-off-by: Cameron Williams Link: https://lore.kernel.org/r/DB7PR02MB380295BCC879CCF91315AC38C4C12@DB7PR02MB3802.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index df4d0d832e54..37a5df725121 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5253,6 +5253,14 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0BA2, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0BA3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, /* * Brainboxes UC-235/246 */ @@ -5373,6 +5381,14 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_4_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0C42, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0C43, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, /* * Brainboxes UC-420 */ From 6cb37e95b9861aa12887ce2e3fe6b85b3147a6d4 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 6 Mar 2025 12:10:51 -0500 Subject: [PATCH 1513/2157] dt-bindings: serial: fsl-lpuart: support i.MX94 Add compatible string "fsl,imx94-lpuart" for the i.MX94 chip, which is backward compatible with i.MX8ULP. Set it to fall back to "fsl,imx8ulp-lpuart". Signed-off-by: Frank Li Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250306171052.244548-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/fsl-lpuart.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/fsl-lpuart.yaml b/Documentation/devicetree/bindings/serial/fsl-lpuart.yaml index 3f9ace89dee9..c42261b5a80a 100644 --- a/Documentation/devicetree/bindings/serial/fsl-lpuart.yaml +++ b/Documentation/devicetree/bindings/serial/fsl-lpuart.yaml @@ -30,6 +30,7 @@ properties: - items: - enum: - fsl,imx93-lpuart + - fsl,imx94-lpuart - fsl,imx95-lpuart - const: fsl,imx8ulp-lpuart - const: fsl,imx7ulp-lpuart From 5c7e2896481a177bbda41d7850f05a9f5a8aee2b Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Mon, 10 Mar 2025 22:27:10 +0000 Subject: [PATCH 1514/2157] tty: serial: 8250: Add Brainboxes XC devices These ExpressCard devices use the OxPCIE chip and can be used with this driver. Signed-off-by: Cameron Williams Cc: stable Link: https://lore.kernel.org/r/DB7PR02MB3802907A9360F27F6CD67AAFC4D62@DB7PR02MB3802.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 37a5df725121..73c200127b08 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2727,6 +2727,22 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .init = pci_oxsemi_tornado_init, .setup = pci_oxsemi_tornado_setup, }, + { + .vendor = PCI_VENDOR_ID_INTASHIELD, + .device = 0x4026, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_oxsemi_tornado_init, + .setup = pci_oxsemi_tornado_setup, + }, + { + .vendor = PCI_VENDOR_ID_INTASHIELD, + .device = 0x4021, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_oxsemi_tornado_init, + .setup = pci_oxsemi_tornado_setup, + }, { .vendor = PCI_VENDOR_ID_INTEL, .device = 0x8811, @@ -5615,6 +5631,20 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_oxsemi_1_15625000 }, + /* + * Brainboxes XC-235 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4026, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, + /* + * Brainboxes XC-475 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4021, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, /* * Perle PCI-RAS cards From f5cb528d6441eb860250a2f085773aac4f44085e Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 12 Mar 2025 10:25:03 +0800 Subject: [PATCH 1515/2157] tty: serial: fsl_lpuart: disable transmitter before changing RS485 related registers According to the LPUART reference manual, TXRTSE and TXRTSPOL of MODIR register only can be changed when the transmitter is disabled. So disable the transmitter before changing RS485 related registers and re-enable it after the change is done. Fixes: 67b01837861c ("tty: serial: lpuart: Add RS485 support for 32-bit uart flavour") Cc: stable Signed-off-by: Sherry Sun Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250312022503.1342990-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 91d02c55c470..203ec3b46304 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1484,6 +1484,19 @@ static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termio unsigned long modem = lpuart32_read(&sport->port, UARTMODIR) & ~(UARTMODIR_TXRTSPOL | UARTMODIR_TXRTSE); + u32 ctrl; + + /* TXRTSE and TXRTSPOL only can be changed when transmitter is disabled. */ + ctrl = lpuart32_read(&sport->port, UARTCTRL); + if (ctrl & UARTCTRL_TE) { + /* wait for the transmit engine to complete */ + lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); + lpuart32_write(&sport->port, ctrl & ~UARTCTRL_TE, UARTCTRL); + + while (lpuart32_read(&sport->port, UARTCTRL) & UARTCTRL_TE) + cpu_relax(); + } + lpuart32_write(&sport->port, modem, UARTMODIR); if (rs485->flags & SER_RS485_ENABLED) { @@ -1503,6 +1516,10 @@ static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termio } lpuart32_write(&sport->port, modem, UARTMODIR); + + if (ctrl & UARTCTRL_TE) + lpuart32_write(&sport->port, ctrl, UARTCTRL); + return 0; } From b6a8f6ab2c53e5ea3c7f2a3978db378a89bb7595 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 12 Mar 2025 10:39:02 +0800 Subject: [PATCH 1516/2157] tty: serial: fsl_lpuart: Use u32 and u8 for register variables Use u32 and u8 rather than unsigned long or unsigned char for register variables for clarity and consistency. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20250312023904.1343351-2-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 93 ++++++++++++++++----------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 203ec3b46304..6f64a3300a38 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -441,7 +441,7 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport) static void lpuart_stop_tx(struct uart_port *port) { - unsigned char temp; + u8 temp; temp = readb(port->membase + UARTCR2); temp &= ~(UARTCR2_TIE | UARTCR2_TCIE); @@ -450,7 +450,7 @@ static void lpuart_stop_tx(struct uart_port *port) static void lpuart32_stop_tx(struct uart_port *port) { - unsigned long temp; + u32 temp; temp = lpuart32_read(port, UARTCTRL); temp &= ~(UARTCTRL_TIE | UARTCTRL_TCIE); @@ -459,7 +459,7 @@ static void lpuart32_stop_tx(struct uart_port *port) static void lpuart_stop_rx(struct uart_port *port) { - unsigned char temp; + u8 temp; temp = readb(port->membase + UARTCR2); writeb(temp & ~UARTCR2_RE, port->membase + UARTCR2); @@ -467,7 +467,7 @@ static void lpuart_stop_rx(struct uart_port *port) static void lpuart32_stop_rx(struct uart_port *port) { - unsigned long temp; + u32 temp; temp = lpuart32_read(port, UARTCTRL); lpuart32_write(port, temp & ~UARTCTRL_RE, UARTCTRL); @@ -642,7 +642,7 @@ static int lpuart_poll_init(struct uart_port *port) struct lpuart_port *sport = container_of(port, struct lpuart_port, port); unsigned long flags; - unsigned char temp; + u8 temp; sport->port.fifosize = 0; @@ -752,7 +752,7 @@ static inline void lpuart_transmit_buffer(struct lpuart_port *sport) static inline void lpuart32_transmit_buffer(struct lpuart_port *sport) { struct tty_port *tport = &sport->port.state->port; - unsigned long txcnt; + u32 txcnt; unsigned char c; if (sport->port.x_char) { @@ -789,7 +789,7 @@ static void lpuart_start_tx(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned char temp; + u8 temp; temp = readb(port->membase + UARTCR2); writeb(temp | UARTCR2_TIE, port->membase + UARTCR2); @@ -806,7 +806,7 @@ static void lpuart_start_tx(struct uart_port *port) static void lpuart32_start_tx(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long temp; + u32 temp; if (sport->lpuart_dma_tx_use) { if (!lpuart_stopped_or_empty(port)) @@ -839,8 +839,8 @@ static unsigned int lpuart_tx_empty(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned char sr1 = readb(port->membase + UARTSR1); - unsigned char sfifo = readb(port->membase + UARTSFIFO); + u8 sr1 = readb(port->membase + UARTSR1); + u8 sfifo = readb(port->membase + UARTSFIFO); if (sport->dma_tx_in_progress) return 0; @@ -855,9 +855,9 @@ static unsigned int lpuart32_tx_empty(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long stat = lpuart32_read(port, UARTSTAT); - unsigned long sfifo = lpuart32_read(port, UARTFIFO); - unsigned long ctrl = lpuart32_read(port, UARTCTRL); + u32 stat = lpuart32_read(port, UARTSTAT); + u32 sfifo = lpuart32_read(port, UARTFIFO); + u32 ctrl = lpuart32_read(port, UARTCTRL); if (sport->dma_tx_in_progress) return 0; @@ -884,7 +884,7 @@ static void lpuart_rxint(struct lpuart_port *sport) { unsigned int flg, ignored = 0, overrun = 0; struct tty_port *port = &sport->port.state->port; - unsigned char rx, sr; + u8 rx, sr; uart_port_lock(&sport->port); @@ -961,7 +961,7 @@ static void lpuart32_rxint(struct lpuart_port *sport) { unsigned int flg, ignored = 0; struct tty_port *port = &sport->port.state->port; - unsigned long rx, sr; + u32 rx, sr; bool is_break; uart_port_lock(&sport->port); @@ -1039,7 +1039,7 @@ static void lpuart32_rxint(struct lpuart_port *sport) static irqreturn_t lpuart_int(int irq, void *dev_id) { struct lpuart_port *sport = dev_id; - unsigned char sts; + u8 sts; sts = readb(sport->port.membase + UARTSR1); @@ -1113,7 +1113,7 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport) int count, copied; if (lpuart_is_32(sport)) { - unsigned long sr = lpuart32_read(&sport->port, UARTSTAT); + u32 sr = lpuart32_read(&sport->port, UARTSTAT); if (sr & (UARTSTAT_PE | UARTSTAT_FE)) { /* Clear the error flags */ @@ -1125,10 +1125,10 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport) sport->port.icount.frame++; } } else { - unsigned char sr = readb(sport->port.membase + UARTSR1); + u8 sr = readb(sport->port.membase + UARTSR1); if (sr & (UARTSR1_PE | UARTSR1_FE)) { - unsigned char cr2; + u8 cr2; /* Disable receiver during this operation... */ cr2 = readb(sport->port.membase + UARTCR2); @@ -1279,7 +1279,7 @@ static void lpuart32_dma_idleint(struct lpuart_port *sport) static irqreturn_t lpuart32_int(int irq, void *dev_id) { struct lpuart_port *sport = dev_id; - unsigned long sts, rxcount; + u32 sts, rxcount; sts = lpuart32_read(&sport->port, UARTSTAT); rxcount = lpuart32_read(&sport->port, UARTWATER); @@ -1411,12 +1411,12 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) dma_async_issue_pending(chan); if (lpuart_is_32(sport)) { - unsigned long temp = lpuart32_read(&sport->port, UARTBAUD); + u32 temp = lpuart32_read(&sport->port, UARTBAUD); lpuart32_write(&sport->port, temp | UARTBAUD_RDMAE, UARTBAUD); if (sport->dma_idle_int) { - unsigned long ctrl = lpuart32_read(&sport->port, UARTCTRL); + u32 ctrl = lpuart32_read(&sport->port, UARTCTRL); lpuart32_write(&sport->port, ctrl | UARTCTRL_ILIE, UARTCTRL); } @@ -1482,7 +1482,7 @@ static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termio struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long modem = lpuart32_read(&sport->port, UARTMODIR) + u32 modem = lpuart32_read(&sport->port, UARTMODIR) & ~(UARTMODIR_TXRTSPOL | UARTMODIR_TXRTSE); u32 ctrl; @@ -1577,7 +1577,7 @@ static void lpuart32_set_mctrl(struct uart_port *port, unsigned int mctrl) static void lpuart_break_ctl(struct uart_port *port, int break_state) { - unsigned char temp; + u8 temp; temp = readb(port->membase + UARTCR2) & ~UARTCR2_SBK; @@ -1589,7 +1589,7 @@ static void lpuart_break_ctl(struct uart_port *port, int break_state) static void lpuart32_break_ctl(struct uart_port *port, int break_state) { - unsigned long temp; + u32 temp; temp = lpuart32_read(port, UARTCTRL); @@ -1623,8 +1623,7 @@ static void lpuart32_break_ctl(struct uart_port *port, int break_state) static void lpuart_setup_watermark(struct lpuart_port *sport) { - unsigned char val, cr2; - unsigned char cr2_saved; + u8 val, cr2, cr2_saved; cr2 = readb(sport->port.membase + UARTCR2); cr2_saved = cr2; @@ -1657,7 +1656,7 @@ static void lpuart_setup_watermark(struct lpuart_port *sport) static void lpuart_setup_watermark_enable(struct lpuart_port *sport) { - unsigned char cr2; + u8 cr2; lpuart_setup_watermark(sport); @@ -1668,8 +1667,7 @@ static void lpuart_setup_watermark_enable(struct lpuart_port *sport) static void lpuart32_setup_watermark(struct lpuart_port *sport) { - unsigned long val, ctrl; - unsigned long ctrl_saved; + u32 val, ctrl, ctrl_saved; ctrl = lpuart32_read(&sport->port, UARTCTRL); ctrl_saved = ctrl; @@ -1778,7 +1776,7 @@ static void lpuart_tx_dma_startup(struct lpuart_port *sport) static void lpuart_rx_dma_startup(struct lpuart_port *sport) { int ret; - unsigned char cr3; + u8 cr3; if (uart_console(&sport->port)) goto err; @@ -1828,7 +1826,7 @@ static void lpuart_hw_setup(struct lpuart_port *sport) static int lpuart_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned char temp; + u8 temp; /* determine FIFO size and enable FIFO mode */ temp = readb(sport->port.membase + UARTPFIFO); @@ -1848,7 +1846,7 @@ static int lpuart_startup(struct uart_port *port) static void lpuart32_hw_disable(struct lpuart_port *sport) { - unsigned long temp; + u32 temp; temp = lpuart32_read(&sport->port, UARTCTRL); temp &= ~(UARTCTRL_RIE | UARTCTRL_ILIE | UARTCTRL_RE | @@ -1858,7 +1856,7 @@ static void lpuart32_hw_disable(struct lpuart_port *sport) static void lpuart32_configure(struct lpuart_port *sport) { - unsigned long temp; + u32 temp; temp = lpuart32_read(&sport->port, UARTCTRL); if (!sport->lpuart_dma_rx_use) @@ -1888,7 +1886,7 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) static int lpuart32_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long temp; + u32 temp; /* determine FIFO size */ temp = lpuart32_read(&sport->port, UARTFIFO); @@ -1942,7 +1940,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) static void lpuart_shutdown(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned char temp; + u8 temp; unsigned long flags; uart_port_lock_irqsave(port, &flags); @@ -1962,7 +1960,7 @@ static void lpuart32_shutdown(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - unsigned long temp; + u32 temp; unsigned long flags; uart_port_lock_irqsave(port, &flags); @@ -1998,7 +1996,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); unsigned long flags; - unsigned char cr1, old_cr1, old_cr2, cr3, cr4, bdh, modem; + u8 cr1, old_cr1, old_cr2, cr3, cr4, bdh, modem; unsigned int baud; unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; unsigned int sbr, brfa; @@ -2236,7 +2234,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); unsigned long flags; - unsigned long ctrl, old_ctrl, bd, modem; + u32 ctrl, old_ctrl, bd, modem; unsigned int baud; unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; @@ -2503,7 +2501,7 @@ static void lpuart_console_write(struct console *co, const char *s, unsigned int count) { struct lpuart_port *sport = lpuart_ports[co->index]; - unsigned char old_cr2, cr2; + u8 old_cr2, cr2; unsigned long flags; int locked = 1; @@ -2533,7 +2531,7 @@ static void lpuart32_console_write(struct console *co, const char *s, unsigned int count) { struct lpuart_port *sport = lpuart_ports[co->index]; - unsigned long old_cr, cr; + u32 old_cr, cr; unsigned long flags; int locked = 1; @@ -2567,7 +2565,7 @@ static void __init lpuart_console_get_options(struct lpuart_port *sport, int *baud, int *parity, int *bits) { - unsigned char cr, bdh, bdl, brfa; + u8 cr, bdh, bdl, brfa; unsigned int sbr, uartclk, baud_raw; cr = readb(sport->port.membase + UARTCR2); @@ -2616,7 +2614,7 @@ static void __init lpuart32_console_get_options(struct lpuart_port *sport, int *baud, int *parity, int *bits) { - unsigned long cr, bd; + u32 cr, bd; unsigned int sbr, uartclk, baud_raw; cr = lpuart32_read(&sport->port, UARTCTRL); @@ -2822,7 +2820,7 @@ static int lpuart_global_reset(struct lpuart_port *sport) { struct uart_port *port = &sport->port; void __iomem *global_addr; - unsigned long ctrl, bd; + u32 ctrl, bd; unsigned int val = 0; int ret; @@ -3028,7 +3026,7 @@ static int lpuart_runtime_resume(struct device *dev) static void serial_lpuart_enable_wakeup(struct lpuart_port *sport, bool on) { - unsigned int val, baud; + u32 val, baud; if (lpuart_is_32(sport)) { val = lpuart32_read(&sport->port, UARTCTRL); @@ -3093,7 +3091,7 @@ static int lpuart_suspend_noirq(struct device *dev) static int lpuart_resume_noirq(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); - unsigned int val; + u32 val; pinctrl_pm_select_default_state(dev); @@ -3113,7 +3111,8 @@ static int lpuart_resume_noirq(struct device *dev) static int lpuart_suspend(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); - unsigned long temp, flags; + u32 temp; + unsigned long flags; uart_suspend_port(&lpuart_reg, &sport->port); From 3cc16ae096f164ae0c6b98416c25a01db5f3a529 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 12 Mar 2025 10:39:03 +0800 Subject: [PATCH 1517/2157] tty: serial: fsl_lpuart: use port struct directly to simply code Most lpuart functions have the parameter struct uart_port *port, but still use the &sport->port to get the uart_port instead of use it directly, let's simply the code logic, directly use this struct instead of covert it from struct sport. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20250312023904.1343351-3-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 210 ++++++++++++++++---------------- 1 file changed, 102 insertions(+), 108 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 6f64a3300a38..3b48e320e7f4 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -581,7 +581,7 @@ static int lpuart_dma_tx_request(struct uart_port *port) ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig); if (ret) { - dev_err(sport->port.dev, + dev_err(port->dev, "DMA slave config failed, err = %d\n", ret); return ret; } @@ -611,13 +611,13 @@ static void lpuart_flush_buffer(struct uart_port *port) } if (lpuart_is_32(sport)) { - val = lpuart32_read(&sport->port, UARTFIFO); + val = lpuart32_read(port, UARTFIFO); val |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH; - lpuart32_write(&sport->port, val, UARTFIFO); + lpuart32_write(port, val, UARTFIFO); } else { - val = readb(sport->port.membase + UARTCFIFO); + val = readb(port->membase + UARTCFIFO); val |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH; - writeb(val, sport->port.membase + UARTCFIFO); + writeb(val, port->membase + UARTCFIFO); } } @@ -644,33 +644,33 @@ static int lpuart_poll_init(struct uart_port *port) unsigned long flags; u8 temp; - sport->port.fifosize = 0; + port->fifosize = 0; - uart_port_lock_irqsave(&sport->port, &flags); + uart_port_lock_irqsave(port, &flags); /* Disable Rx & Tx */ - writeb(0, sport->port.membase + UARTCR2); + writeb(0, port->membase + UARTCR2); - temp = readb(sport->port.membase + UARTPFIFO); + temp = readb(port->membase + UARTPFIFO); /* Enable Rx and Tx FIFO */ writeb(temp | UARTPFIFO_RXFE | UARTPFIFO_TXFE, - sport->port.membase + UARTPFIFO); + port->membase + UARTPFIFO); /* flush Tx and Rx FIFO */ writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH, - sport->port.membase + UARTCFIFO); + port->membase + UARTCFIFO); /* explicitly clear RDRF */ - if (readb(sport->port.membase + UARTSR1) & UARTSR1_RDRF) { - readb(sport->port.membase + UARTDR); - writeb(UARTSFIFO_RXUF, sport->port.membase + UARTSFIFO); + if (readb(port->membase + UARTSR1) & UARTSR1_RDRF) { + readb(port->membase + UARTDR); + writeb(UARTSFIFO_RXUF, port->membase + UARTSFIFO); } - writeb(0, sport->port.membase + UARTTWFIFO); - writeb(1, sport->port.membase + UARTRWFIFO); + writeb(0, port->membase + UARTTWFIFO); + writeb(1, port->membase + UARTRWFIFO); /* Enable Rx and Tx */ - writeb(UARTCR2_RE | UARTCR2_TE, sport->port.membase + UARTCR2); - uart_port_unlock_irqrestore(&sport->port, flags); + writeb(UARTCR2_RE | UARTCR2_TE, port->membase + UARTCR2); + uart_port_unlock_irqrestore(port, flags); return 0; } @@ -696,30 +696,30 @@ static int lpuart32_poll_init(struct uart_port *port) struct lpuart_port *sport = container_of(port, struct lpuart_port, port); u32 temp; - sport->port.fifosize = 0; + port->fifosize = 0; - uart_port_lock_irqsave(&sport->port, &flags); + uart_port_lock_irqsave(port, &flags); /* Disable Rx & Tx */ - lpuart32_write(&sport->port, 0, UARTCTRL); + lpuart32_write(port, 0, UARTCTRL); - temp = lpuart32_read(&sport->port, UARTFIFO); + temp = lpuart32_read(port, UARTFIFO); /* Enable Rx and Tx FIFO */ - lpuart32_write(&sport->port, temp | UARTFIFO_RXFE | UARTFIFO_TXFE, UARTFIFO); + lpuart32_write(port, temp | UARTFIFO_RXFE | UARTFIFO_TXFE, UARTFIFO); /* flush Tx and Rx FIFO */ - lpuart32_write(&sport->port, UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH, UARTFIFO); + lpuart32_write(port, UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH, UARTFIFO); /* explicitly clear RDRF */ - if (lpuart32_read(&sport->port, UARTSTAT) & UARTSTAT_RDRF) { - lpuart32_read(&sport->port, UARTDATA); - lpuart32_write(&sport->port, UARTFIFO_RXUF, UARTFIFO); + if (lpuart32_read(port, UARTSTAT) & UARTSTAT_RDRF) { + lpuart32_read(port, UARTDATA); + lpuart32_write(port, UARTFIFO_RXUF, UARTFIFO); } /* Enable Rx and Tx */ - lpuart32_write(&sport->port, UARTCTRL_RE | UARTCTRL_TE, UARTCTRL); - uart_port_unlock_irqrestore(&sport->port, flags); + lpuart32_write(port, UARTCTRL_RE | UARTCTRL_TE, UARTCTRL); + uart_port_unlock_irqrestore(port, flags); return 0; } @@ -1449,12 +1449,9 @@ static void lpuart_dma_rx_free(struct uart_port *port) static int lpuart_config_rs485(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485) { - struct lpuart_port *sport = container_of(port, - struct lpuart_port, port); - - u8 modem = readb(sport->port.membase + UARTMODEM) & + u8 modem = readb(port->membase + UARTMODEM) & ~(UARTMODEM_TXRTSPOL | UARTMODEM_TXRTSE); - writeb(modem, sport->port.membase + UARTMODEM); + writeb(modem, port->membase + UARTMODEM); if (rs485->flags & SER_RS485_ENABLED) { /* Enable auto RS-485 RTS mode */ @@ -1472,32 +1469,29 @@ static int lpuart_config_rs485(struct uart_port *port, struct ktermios *termios, modem &= ~UARTMODEM_TXRTSPOL; } - writeb(modem, sport->port.membase + UARTMODEM); + writeb(modem, port->membase + UARTMODEM); return 0; } static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485) { - struct lpuart_port *sport = container_of(port, - struct lpuart_port, port); - - u32 modem = lpuart32_read(&sport->port, UARTMODIR) + u32 modem = lpuart32_read(port, UARTMODIR) & ~(UARTMODIR_TXRTSPOL | UARTMODIR_TXRTSE); u32 ctrl; /* TXRTSE and TXRTSPOL only can be changed when transmitter is disabled. */ - ctrl = lpuart32_read(&sport->port, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); if (ctrl & UARTCTRL_TE) { /* wait for the transmit engine to complete */ - lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); - lpuart32_write(&sport->port, ctrl & ~UARTCTRL_TE, UARTCTRL); + lpuart32_wait_bit_set(port, UARTSTAT, UARTSTAT_TC); + lpuart32_write(port, ctrl & ~UARTCTRL_TE, UARTCTRL); - while (lpuart32_read(&sport->port, UARTCTRL) & UARTCTRL_TE) + while (lpuart32_read(port, UARTCTRL) & UARTCTRL_TE) cpu_relax(); } - lpuart32_write(&sport->port, modem, UARTMODIR); + lpuart32_write(port, modem, UARTMODIR); if (rs485->flags & SER_RS485_ENABLED) { /* Enable auto RS-485 RTS mode */ @@ -1515,10 +1509,10 @@ static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termio modem &= ~UARTMODIR_TXRTSPOL; } - lpuart32_write(&sport->port, modem, UARTMODIR); + lpuart32_write(port, modem, UARTMODIR); if (ctrl & UARTCTRL_TE) - lpuart32_write(&sport->port, ctrl, UARTCTRL); + lpuart32_write(port, ctrl, UARTCTRL); return 0; } @@ -1829,11 +1823,11 @@ static int lpuart_startup(struct uart_port *port) u8 temp; /* determine FIFO size and enable FIFO mode */ - temp = readb(sport->port.membase + UARTPFIFO); + temp = readb(port->membase + UARTPFIFO); sport->txfifo_size = UARTFIFO_DEPTH((temp >> UARTPFIFO_TXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK); - sport->port.fifosize = sport->txfifo_size; + port->fifosize = sport->txfifo_size; sport->rxfifo_size = UARTFIFO_DEPTH((temp >> UARTPFIFO_RXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK); @@ -1889,11 +1883,11 @@ static int lpuart32_startup(struct uart_port *port) u32 temp; /* determine FIFO size */ - temp = lpuart32_read(&sport->port, UARTFIFO); + temp = lpuart32_read(port, UARTFIFO); sport->txfifo_size = UARTFIFO_DEPTH((temp >> UARTFIFO_TXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK); - sport->port.fifosize = sport->txfifo_size; + port->fifosize = sport->txfifo_size; sport->rxfifo_size = UARTFIFO_DEPTH((temp >> UARTFIFO_RXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK); @@ -1906,7 +1900,7 @@ static int lpuart32_startup(struct uart_port *port) if (is_layerscape_lpuart(sport)) { sport->rxfifo_size = 16; sport->txfifo_size = 16; - sport->port.fifosize = sport->txfifo_size; + port->fifosize = sport->txfifo_size; } lpuart_request_dma(sport); @@ -1966,8 +1960,8 @@ static void lpuart32_shutdown(struct uart_port *port) uart_port_lock_irqsave(port, &flags); /* clear status */ - temp = lpuart32_read(&sport->port, UARTSTAT); - lpuart32_write(&sport->port, temp, UARTSTAT); + temp = lpuart32_read(port, UARTSTAT); + lpuart32_write(port, temp, UARTSTAT); /* disable Rx/Tx DMA */ temp = lpuart32_read(port, UARTBAUD); @@ -2001,12 +1995,12 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; unsigned int sbr, brfa; - cr1 = old_cr1 = readb(sport->port.membase + UARTCR1); - old_cr2 = readb(sport->port.membase + UARTCR2); - cr3 = readb(sport->port.membase + UARTCR3); - cr4 = readb(sport->port.membase + UARTCR4); - bdh = readb(sport->port.membase + UARTBDH); - modem = readb(sport->port.membase + UARTMODEM); + cr1 = old_cr1 = readb(port->membase + UARTCR1); + old_cr2 = readb(port->membase + UARTCR2); + cr3 = readb(port->membase + UARTCR3); + cr4 = readb(port->membase + UARTCR4); + bdh = readb(port->membase + UARTBDH); + modem = readb(port->membase + UARTMODEM); /* * only support CS8 and CS7, and for CS7 must enable PE. * supported mode: @@ -2038,7 +2032,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, * When auto RS-485 RTS mode is enabled, * hardware flow control need to be disabled. */ - if (sport->port.rs485.flags & SER_RS485_ENABLED) + if (port->rs485.flags & SER_RS485_ENABLED) termios->c_cflag &= ~CRTSCTS; if (termios->c_cflag & CRTSCTS) @@ -2079,59 +2073,59 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, * Need to update the Ring buffer length according to the selected * baud rate and restart Rx DMA path. * - * Since timer function acqures sport->port.lock, need to stop before + * Since timer function acqures port->lock, need to stop before * acquring same lock because otherwise del_timer_sync() can deadlock. */ if (old && sport->lpuart_dma_rx_use) - lpuart_dma_rx_free(&sport->port); + lpuart_dma_rx_free(port); - uart_port_lock_irqsave(&sport->port, &flags); + uart_port_lock_irqsave(port, &flags); - sport->port.read_status_mask = 0; + port->read_status_mask = 0; if (termios->c_iflag & INPCK) - sport->port.read_status_mask |= UARTSR1_FE | UARTSR1_PE; + port->read_status_mask |= UARTSR1_FE | UARTSR1_PE; if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - sport->port.read_status_mask |= UARTSR1_FE; + port->read_status_mask |= UARTSR1_FE; /* characters to ignore */ - sport->port.ignore_status_mask = 0; + port->ignore_status_mask = 0; if (termios->c_iflag & IGNPAR) - sport->port.ignore_status_mask |= UARTSR1_PE; + port->ignore_status_mask |= UARTSR1_PE; if (termios->c_iflag & IGNBRK) { - sport->port.ignore_status_mask |= UARTSR1_FE; + port->ignore_status_mask |= UARTSR1_FE; /* * if we're ignoring parity and break indicators, * ignore overruns too (for real raw support). */ if (termios->c_iflag & IGNPAR) - sport->port.ignore_status_mask |= UARTSR1_OR; + port->ignore_status_mask |= UARTSR1_OR; } /* update the per-port timeout */ uart_update_timeout(port, termios->c_cflag, baud); /* wait transmit engin complete */ - lpuart_wait_bit_set(&sport->port, UARTSR1, UARTSR1_TC); + lpuart_wait_bit_set(port, UARTSR1, UARTSR1_TC); /* disable transmit and receive */ writeb(old_cr2 & ~(UARTCR2_TE | UARTCR2_RE), - sport->port.membase + UARTCR2); + port->membase + UARTCR2); - sbr = sport->port.uartclk / (16 * baud); - brfa = ((sport->port.uartclk - (16 * sbr * baud)) * 2) / baud; + sbr = port->uartclk / (16 * baud); + brfa = ((port->uartclk - (16 * sbr * baud)) * 2) / baud; bdh &= ~UARTBDH_SBR_MASK; bdh |= (sbr >> 8) & 0x1F; cr4 &= ~UARTCR4_BRFA_MASK; brfa &= UARTCR4_BRFA_MASK; - writeb(cr4 | brfa, sport->port.membase + UARTCR4); - writeb(bdh, sport->port.membase + UARTBDH); - writeb(sbr & 0xFF, sport->port.membase + UARTBDL); - writeb(cr3, sport->port.membase + UARTCR3); - writeb(cr1, sport->port.membase + UARTCR1); - writeb(modem, sport->port.membase + UARTMODEM); + writeb(cr4 | brfa, port->membase + UARTCR4); + writeb(bdh, port->membase + UARTBDH); + writeb(sbr & 0xFF, port->membase + UARTBDL); + writeb(cr3, port->membase + UARTCR3); + writeb(cr1, port->membase + UARTCR1); + writeb(modem, port->membase + UARTMODEM); /* restore control register */ - writeb(old_cr2, sport->port.membase + UARTCR2); + writeb(old_cr2, port->membase + UARTCR2); if (old && sport->lpuart_dma_rx_use) { if (!lpuart_start_rx_dma(sport)) @@ -2140,7 +2134,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, sport->lpuart_dma_rx_use = false; } - uart_port_unlock_irqrestore(&sport->port, flags); + uart_port_unlock_irqrestore(port, flags); } static void __lpuart32_serial_setbrg(struct uart_port *port, @@ -2238,9 +2232,9 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, unsigned int baud; unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; - ctrl = old_ctrl = lpuart32_read(&sport->port, UARTCTRL); - bd = lpuart32_read(&sport->port, UARTBAUD); - modem = lpuart32_read(&sport->port, UARTMODIR); + ctrl = old_ctrl = lpuart32_read(port, UARTCTRL); + bd = lpuart32_read(port, UARTBAUD); + modem = lpuart32_read(port, UARTMODIR); sport->is_cs7 = false; /* * only support CS8 and CS7 @@ -2274,7 +2268,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, * When auto RS-485 RTS mode is enabled, * hardware flow control need to be disabled. */ - if (sport->port.rs485.flags & SER_RS485_ENABLED) + if (port->rs485.flags & SER_RS485_ENABLED) termios->c_cflag &= ~CRTSCTS; if (termios->c_cflag & CRTSCTS) @@ -2324,32 +2318,32 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, * Need to update the Ring buffer length according to the selected * baud rate and restart Rx DMA path. * - * Since timer function acqures sport->port.lock, need to stop before + * Since timer function acqures port->lock, need to stop before * acquring same lock because otherwise del_timer_sync() can deadlock. */ if (old && sport->lpuart_dma_rx_use) - lpuart_dma_rx_free(&sport->port); + lpuart_dma_rx_free(port); - uart_port_lock_irqsave(&sport->port, &flags); + uart_port_lock_irqsave(port, &flags); - sport->port.read_status_mask = 0; + port->read_status_mask = 0; if (termios->c_iflag & INPCK) - sport->port.read_status_mask |= UARTSTAT_FE | UARTSTAT_PE; + port->read_status_mask |= UARTSTAT_FE | UARTSTAT_PE; if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - sport->port.read_status_mask |= UARTSTAT_FE; + port->read_status_mask |= UARTSTAT_FE; /* characters to ignore */ - sport->port.ignore_status_mask = 0; + port->ignore_status_mask = 0; if (termios->c_iflag & IGNPAR) - sport->port.ignore_status_mask |= UARTSTAT_PE; + port->ignore_status_mask |= UARTSTAT_PE; if (termios->c_iflag & IGNBRK) { - sport->port.ignore_status_mask |= UARTSTAT_FE; + port->ignore_status_mask |= UARTSTAT_FE; /* * if we're ignoring parity and break indicators, * ignore overruns too (for real raw support). */ if (termios->c_iflag & IGNPAR) - sport->port.ignore_status_mask |= UARTSTAT_OR; + port->ignore_status_mask |= UARTSTAT_OR; } /* update the per-port timeout */ @@ -2361,22 +2355,22 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, * asserted. */ if (!(old_ctrl & UARTCTRL_SBK)) { - lpuart32_write(&sport->port, 0, UARTMODIR); - lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); + lpuart32_write(port, 0, UARTMODIR); + lpuart32_wait_bit_set(port, UARTSTAT, UARTSTAT_TC); } /* disable transmit and receive */ - lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE), + lpuart32_write(port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE), UARTCTRL); - lpuart32_write(&sport->port, bd, UARTBAUD); + lpuart32_write(port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ - lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); + lpuart32_write(port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ - lpuart32_write(&sport->port, ctrl, UARTCTRL); + lpuart32_write(port, ctrl, UARTCTRL); /* re-enable the CTS if needed */ - lpuart32_write(&sport->port, modem, UARTMODIR); + lpuart32_write(port, modem, UARTMODIR); if ((ctrl & (UARTCTRL_PE | UARTCTRL_M)) == UARTCTRL_PE) sport->is_cs7 = true; @@ -2388,7 +2382,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, sport->lpuart_dma_rx_use = false; } - uart_port_unlock_irqrestore(&sport->port, flags); + uart_port_unlock_irqrestore(port, flags); } static const char *lpuart_type(struct uart_port *port) @@ -2826,7 +2820,7 @@ static int lpuart_global_reset(struct lpuart_port *sport) ret = clk_prepare_enable(sport->ipg_clk); if (ret) { - dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); + dev_err(port->dev, "failed to enable uart ipg clk: %d\n", ret); return ret; } @@ -2837,10 +2831,10 @@ static int lpuart_global_reset(struct lpuart_port *sport) */ ctrl = lpuart32_read(port, UARTCTRL); if (ctrl & UARTCTRL_TE) { - bd = lpuart32_read(&sport->port, UARTBAUD); + bd = lpuart32_read(port, UARTBAUD); if (read_poll_timeout(lpuart32_tx_empty, val, val, 1, 100000, false, port)) { - dev_warn(sport->port.dev, + dev_warn(port->dev, "timeout waiting for transmit engine to complete\n"); clk_disable_unprepare(sport->ipg_clk); return 0; @@ -3192,7 +3186,7 @@ static void lpuart_console_fixup(struct lpuart_port *sport) * in VLLS mode, or restore console setting here. */ if (is_imx7ulp_lpuart(sport) && lpuart_uport_is_active(sport) && - console_suspend_enabled && uart_console(&sport->port)) { + console_suspend_enabled && uart_console(uport)) { mutex_lock(&port->mutex); memset(&termios, 0, sizeof(struct ktermios)); From 1d9ac5bd4eb10eecaa5b18128b9416239dc58d38 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Wed, 12 Mar 2025 10:39:04 +0800 Subject: [PATCH 1518/2157] tty: serial: fsl_lpuart: rename register variables more specifically There are many fuzzy register variables in the lpuart driver, such as temp, tmp, val, reg. Let's give these register variables more specific names. Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20250312023904.1343351-4-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 220 ++++++++++++++++---------------- 1 file changed, 110 insertions(+), 110 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 3b48e320e7f4..c8cc0a241fba 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -441,36 +441,36 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport) static void lpuart_stop_tx(struct uart_port *port) { - u8 temp; + u8 cr2; - temp = readb(port->membase + UARTCR2); - temp &= ~(UARTCR2_TIE | UARTCR2_TCIE); - writeb(temp, port->membase + UARTCR2); + cr2 = readb(port->membase + UARTCR2); + cr2 &= ~(UARTCR2_TIE | UARTCR2_TCIE); + writeb(cr2, port->membase + UARTCR2); } static void lpuart32_stop_tx(struct uart_port *port) { - u32 temp; + u32 ctrl; - temp = lpuart32_read(port, UARTCTRL); - temp &= ~(UARTCTRL_TIE | UARTCTRL_TCIE); - lpuart32_write(port, temp, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); + ctrl &= ~(UARTCTRL_TIE | UARTCTRL_TCIE); + lpuart32_write(port, ctrl, UARTCTRL); } static void lpuart_stop_rx(struct uart_port *port) { - u8 temp; + u8 cr2; - temp = readb(port->membase + UARTCR2); - writeb(temp & ~UARTCR2_RE, port->membase + UARTCR2); + cr2 = readb(port->membase + UARTCR2); + writeb(cr2 & ~UARTCR2_RE, port->membase + UARTCR2); } static void lpuart32_stop_rx(struct uart_port *port) { - u32 temp; + u32 ctrl; - temp = lpuart32_read(port, UARTCTRL); - lpuart32_write(port, temp & ~UARTCTRL_RE, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); + lpuart32_write(port, ctrl & ~UARTCTRL_RE, UARTCTRL); } static void lpuart_dma_tx(struct lpuart_port *sport) @@ -599,7 +599,7 @@ static void lpuart_flush_buffer(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); struct dma_chan *chan = sport->dma_tx_chan; - u32 val; + u32 fifo; if (sport->lpuart_dma_tx_use) { if (sport->dma_tx_in_progress) { @@ -611,13 +611,13 @@ static void lpuart_flush_buffer(struct uart_port *port) } if (lpuart_is_32(sport)) { - val = lpuart32_read(port, UARTFIFO); - val |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH; - lpuart32_write(port, val, UARTFIFO); + fifo = lpuart32_read(port, UARTFIFO); + fifo |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH; + lpuart32_write(port, fifo, UARTFIFO); } else { - val = readb(port->membase + UARTCFIFO); - val |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH; - writeb(val, port->membase + UARTCFIFO); + fifo = readb(port->membase + UARTCFIFO); + fifo |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH; + writeb(fifo, port->membase + UARTCFIFO); } } @@ -642,7 +642,7 @@ static int lpuart_poll_init(struct uart_port *port) struct lpuart_port *sport = container_of(port, struct lpuart_port, port); unsigned long flags; - u8 temp; + u8 fifo; port->fifosize = 0; @@ -650,9 +650,9 @@ static int lpuart_poll_init(struct uart_port *port) /* Disable Rx & Tx */ writeb(0, port->membase + UARTCR2); - temp = readb(port->membase + UARTPFIFO); + fifo = readb(port->membase + UARTPFIFO); /* Enable Rx and Tx FIFO */ - writeb(temp | UARTPFIFO_RXFE | UARTPFIFO_TXFE, + writeb(fifo | UARTPFIFO_RXFE | UARTPFIFO_TXFE, port->membase + UARTPFIFO); /* flush Tx and Rx FIFO */ @@ -694,7 +694,7 @@ static int lpuart32_poll_init(struct uart_port *port) { unsigned long flags; struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u32 temp; + u32 fifo; port->fifosize = 0; @@ -703,10 +703,10 @@ static int lpuart32_poll_init(struct uart_port *port) /* Disable Rx & Tx */ lpuart32_write(port, 0, UARTCTRL); - temp = lpuart32_read(port, UARTFIFO); + fifo = lpuart32_read(port, UARTFIFO); /* Enable Rx and Tx FIFO */ - lpuart32_write(port, temp | UARTFIFO_RXFE | UARTFIFO_TXFE, UARTFIFO); + lpuart32_write(port, fifo | UARTFIFO_RXFE | UARTFIFO_TXFE, UARTFIFO); /* flush Tx and Rx FIFO */ lpuart32_write(port, UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH, UARTFIFO); @@ -789,10 +789,10 @@ static void lpuart_start_tx(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u8 temp; + u8 cr2; - temp = readb(port->membase + UARTCR2); - writeb(temp | UARTCR2_TIE, port->membase + UARTCR2); + cr2 = readb(port->membase + UARTCR2); + writeb(cr2 | UARTCR2_TIE, port->membase + UARTCR2); if (sport->lpuart_dma_tx_use) { if (!lpuart_stopped_or_empty(port)) @@ -806,14 +806,14 @@ static void lpuart_start_tx(struct uart_port *port) static void lpuart32_start_tx(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u32 temp; + u32 ctrl; if (sport->lpuart_dma_tx_use) { if (!lpuart_stopped_or_empty(port)) lpuart_dma_tx(sport); } else { - temp = lpuart32_read(port, UARTCTRL); - lpuart32_write(port, temp | UARTCTRL_TIE, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); + lpuart32_write(port, ctrl | UARTCTRL_TIE, UARTCTRL); if (lpuart32_read(port, UARTSTAT) & UARTSTAT_TDRE) lpuart32_transmit_buffer(sport); @@ -1411,9 +1411,9 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) dma_async_issue_pending(chan); if (lpuart_is_32(sport)) { - u32 temp = lpuart32_read(&sport->port, UARTBAUD); + u32 baud = lpuart32_read(&sport->port, UARTBAUD); - lpuart32_write(&sport->port, temp | UARTBAUD_RDMAE, UARTBAUD); + lpuart32_write(&sport->port, baud | UARTBAUD_RDMAE, UARTBAUD); if (sport->dma_idle_int) { u32 ctrl = lpuart32_read(&sport->port, UARTCTRL); @@ -1520,10 +1520,10 @@ static int lpuart32_config_rs485(struct uart_port *port, struct ktermios *termio static unsigned int lpuart_get_mctrl(struct uart_port *port) { unsigned int mctrl = 0; - u8 reg; + u8 cr1; - reg = readb(port->membase + UARTCR1); - if (reg & UARTCR1_LOOPS) + cr1 = readb(port->membase + UARTCR1); + if (cr1 & UARTCR1_LOOPS) mctrl |= TIOCM_LOOP; return mctrl; @@ -1532,10 +1532,10 @@ static unsigned int lpuart_get_mctrl(struct uart_port *port) static unsigned int lpuart32_get_mctrl(struct uart_port *port) { unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; - u32 reg; + u32 ctrl; - reg = lpuart32_read(port, UARTCTRL); - if (reg & UARTCTRL_LOOPS) + ctrl = lpuart32_read(port, UARTCTRL); + if (ctrl & UARTCTRL_LOOPS) mctrl |= TIOCM_LOOP; return mctrl; @@ -1543,49 +1543,49 @@ static unsigned int lpuart32_get_mctrl(struct uart_port *port) static void lpuart_set_mctrl(struct uart_port *port, unsigned int mctrl) { - u8 reg; + u8 cr1; - reg = readb(port->membase + UARTCR1); + cr1 = readb(port->membase + UARTCR1); /* for internal loopback we need LOOPS=1 and RSRC=0 */ - reg &= ~(UARTCR1_LOOPS | UARTCR1_RSRC); + cr1 &= ~(UARTCR1_LOOPS | UARTCR1_RSRC); if (mctrl & TIOCM_LOOP) - reg |= UARTCR1_LOOPS; + cr1 |= UARTCR1_LOOPS; - writeb(reg, port->membase + UARTCR1); + writeb(cr1, port->membase + UARTCR1); } static void lpuart32_set_mctrl(struct uart_port *port, unsigned int mctrl) { - u32 reg; + u32 ctrl; - reg = lpuart32_read(port, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); /* for internal loopback we need LOOPS=1 and RSRC=0 */ - reg &= ~(UARTCTRL_LOOPS | UARTCTRL_RSRC); + ctrl &= ~(UARTCTRL_LOOPS | UARTCTRL_RSRC); if (mctrl & TIOCM_LOOP) - reg |= UARTCTRL_LOOPS; + ctrl |= UARTCTRL_LOOPS; - lpuart32_write(port, reg, UARTCTRL); + lpuart32_write(port, ctrl, UARTCTRL); } static void lpuart_break_ctl(struct uart_port *port, int break_state) { - u8 temp; + u8 cr2; - temp = readb(port->membase + UARTCR2) & ~UARTCR2_SBK; + cr2 = readb(port->membase + UARTCR2) & ~UARTCR2_SBK; if (break_state != 0) - temp |= UARTCR2_SBK; + cr2 |= UARTCR2_SBK; - writeb(temp, port->membase + UARTCR2); + writeb(cr2, port->membase + UARTCR2); } static void lpuart32_break_ctl(struct uart_port *port, int break_state) { - u32 temp; + u32 ctrl; - temp = lpuart32_read(port, UARTCTRL); + ctrl = lpuart32_read(port, UARTCTRL); /* * LPUART IP now has two known bugs, one is CTS has higher priority than the @@ -1602,22 +1602,22 @@ static void lpuart32_break_ctl(struct uart_port *port, int break_state) * Disable the transmitter to prevent any data from being sent out * during break, then invert the TX line to send break. */ - temp &= ~UARTCTRL_TE; - lpuart32_write(port, temp, UARTCTRL); - temp |= UARTCTRL_TXINV; - lpuart32_write(port, temp, UARTCTRL); + ctrl &= ~UARTCTRL_TE; + lpuart32_write(port, ctrl, UARTCTRL); + ctrl |= UARTCTRL_TXINV; + lpuart32_write(port, ctrl, UARTCTRL); } else { /* Disable the TXINV to turn off break and re-enable transmitter. */ - temp &= ~UARTCTRL_TXINV; - lpuart32_write(port, temp, UARTCTRL); - temp |= UARTCTRL_TE; - lpuart32_write(port, temp, UARTCTRL); + ctrl &= ~UARTCTRL_TXINV; + lpuart32_write(port, ctrl, UARTCTRL); + ctrl |= UARTCTRL_TE; + lpuart32_write(port, ctrl, UARTCTRL); } } static void lpuart_setup_watermark(struct lpuart_port *sport) { - u8 val, cr2, cr2_saved; + u8 fifo, cr2, cr2_saved; cr2 = readb(sport->port.membase + UARTCR2); cr2_saved = cr2; @@ -1625,8 +1625,8 @@ static void lpuart_setup_watermark(struct lpuart_port *sport) UARTCR2_RIE | UARTCR2_RE); writeb(cr2, sport->port.membase + UARTCR2); - val = readb(sport->port.membase + UARTPFIFO); - writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE, + fifo = readb(sport->port.membase + UARTPFIFO); + writeb(fifo | UARTPFIFO_TXFE | UARTPFIFO_RXFE, sport->port.membase + UARTPFIFO); /* flush Tx and Rx FIFO */ @@ -1696,14 +1696,14 @@ static void lpuart32_setup_watermark(struct lpuart_port *sport) static void lpuart32_setup_watermark_enable(struct lpuart_port *sport) { - u32 temp; + u32 ctrl; lpuart32_setup_watermark(sport); - temp = lpuart32_read(&sport->port, UARTCTRL); - temp |= UARTCTRL_RE | UARTCTRL_TE; - temp |= FIELD_PREP(UARTCTRL_IDLECFG, 0x7); - lpuart32_write(&sport->port, temp, UARTCTRL); + ctrl = lpuart32_read(&sport->port, UARTCTRL); + ctrl |= UARTCTRL_RE | UARTCTRL_TE; + ctrl |= FIELD_PREP(UARTCTRL_IDLECFG, 0x7); + lpuart32_write(&sport->port, ctrl, UARTCTRL); } static void rx_dma_timer_init(struct lpuart_port *sport) @@ -1820,16 +1820,16 @@ static void lpuart_hw_setup(struct lpuart_port *sport) static int lpuart_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u8 temp; + u8 fifo; /* determine FIFO size and enable FIFO mode */ - temp = readb(port->membase + UARTPFIFO); + fifo = readb(port->membase + UARTPFIFO); - sport->txfifo_size = UARTFIFO_DEPTH((temp >> UARTPFIFO_TXSIZE_OFF) & + sport->txfifo_size = UARTFIFO_DEPTH((fifo >> UARTPFIFO_TXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK); port->fifosize = sport->txfifo_size; - sport->rxfifo_size = UARTFIFO_DEPTH((temp >> UARTPFIFO_RXSIZE_OFF) & + sport->rxfifo_size = UARTFIFO_DEPTH((fifo >> UARTPFIFO_RXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK); lpuart_request_dma(sport); @@ -1840,24 +1840,24 @@ static int lpuart_startup(struct uart_port *port) static void lpuart32_hw_disable(struct lpuart_port *sport) { - u32 temp; + u32 ctrl; - temp = lpuart32_read(&sport->port, UARTCTRL); - temp &= ~(UARTCTRL_RIE | UARTCTRL_ILIE | UARTCTRL_RE | + ctrl = lpuart32_read(&sport->port, UARTCTRL); + ctrl &= ~(UARTCTRL_RIE | UARTCTRL_ILIE | UARTCTRL_RE | UARTCTRL_TIE | UARTCTRL_TE); - lpuart32_write(&sport->port, temp, UARTCTRL); + lpuart32_write(&sport->port, ctrl, UARTCTRL); } static void lpuart32_configure(struct lpuart_port *sport) { - u32 temp; + u32 ctrl; - temp = lpuart32_read(&sport->port, UARTCTRL); + ctrl = lpuart32_read(&sport->port, UARTCTRL); if (!sport->lpuart_dma_rx_use) - temp |= UARTCTRL_RIE | UARTCTRL_ILIE; + ctrl |= UARTCTRL_RIE | UARTCTRL_ILIE; if (!sport->lpuart_dma_tx_use) - temp |= UARTCTRL_TIE; - lpuart32_write(&sport->port, temp, UARTCTRL); + ctrl |= UARTCTRL_TIE; + lpuart32_write(&sport->port, ctrl, UARTCTRL); } static void lpuart32_hw_setup(struct lpuart_port *sport) @@ -1880,16 +1880,16 @@ static void lpuart32_hw_setup(struct lpuart_port *sport) static int lpuart32_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u32 temp; + u32 fifo; /* determine FIFO size */ - temp = lpuart32_read(port, UARTFIFO); + fifo = lpuart32_read(port, UARTFIFO); - sport->txfifo_size = UARTFIFO_DEPTH((temp >> UARTFIFO_TXSIZE_OFF) & + sport->txfifo_size = UARTFIFO_DEPTH((fifo >> UARTFIFO_TXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK); port->fifosize = sport->txfifo_size; - sport->rxfifo_size = UARTFIFO_DEPTH((temp >> UARTFIFO_RXSIZE_OFF) & + sport->rxfifo_size = UARTFIFO_DEPTH((fifo >> UARTFIFO_RXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK); /* @@ -1934,16 +1934,16 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) static void lpuart_shutdown(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - u8 temp; + u8 cr2; unsigned long flags; uart_port_lock_irqsave(port, &flags); /* disable Rx/Tx and interrupts */ - temp = readb(port->membase + UARTCR2); - temp &= ~(UARTCR2_TE | UARTCR2_RE | + cr2 = readb(port->membase + UARTCR2); + cr2 &= ~(UARTCR2_TE | UARTCR2_RE | UARTCR2_TIE | UARTCR2_TCIE | UARTCR2_RIE); - writeb(temp, port->membase + UARTCR2); + writeb(cr2, port->membase + UARTCR2); uart_port_unlock_irqrestore(port, flags); @@ -2141,7 +2141,7 @@ static void __lpuart32_serial_setbrg(struct uart_port *port, unsigned int baudrate, bool use_rx_dma, bool use_tx_dma) { - u32 sbr, osr, baud_diff, tmp_osr, tmp_sbr, tmp_diff, tmp; + u32 sbr, osr, baud_diff, tmp_osr, tmp_sbr, tmp_diff, baud; u32 clk = port->uartclk; /* @@ -2170,9 +2170,9 @@ static void __lpuart32_serial_setbrg(struct uart_port *port, tmp_diff = clk / (tmp_osr * tmp_sbr) - baudrate; /* select best values between sbr and sbr+1 */ - tmp = clk / (tmp_osr * (tmp_sbr + 1)); - if (tmp_diff > (baudrate - tmp)) { - tmp_diff = baudrate - tmp; + baud = clk / (tmp_osr * (tmp_sbr + 1)); + if (tmp_diff > (baudrate - baud)) { + tmp_diff = baudrate - baud; tmp_sbr++; } @@ -2194,23 +2194,23 @@ static void __lpuart32_serial_setbrg(struct uart_port *port, dev_warn(port->dev, "unacceptable baud rate difference of more than 3%%\n"); - tmp = lpuart32_read(port, UARTBAUD); + baud = lpuart32_read(port, UARTBAUD); if ((osr > 3) && (osr < 8)) - tmp |= UARTBAUD_BOTHEDGE; + baud |= UARTBAUD_BOTHEDGE; - tmp &= ~(UARTBAUD_OSR_MASK << UARTBAUD_OSR_SHIFT); - tmp |= ((osr-1) & UARTBAUD_OSR_MASK) << UARTBAUD_OSR_SHIFT; + baud &= ~(UARTBAUD_OSR_MASK << UARTBAUD_OSR_SHIFT); + baud |= ((osr-1) & UARTBAUD_OSR_MASK) << UARTBAUD_OSR_SHIFT; - tmp &= ~UARTBAUD_SBR_MASK; - tmp |= sbr & UARTBAUD_SBR_MASK; + baud &= ~UARTBAUD_SBR_MASK; + baud |= sbr & UARTBAUD_SBR_MASK; if (!use_rx_dma) - tmp &= ~UARTBAUD_RDMAE; + baud &= ~UARTBAUD_RDMAE; if (!use_tx_dma) - tmp &= ~UARTBAUD_TDMAE; + baud &= ~UARTBAUD_TDMAE; - lpuart32_write(port, tmp, UARTBAUD); + lpuart32_write(port, baud, UARTBAUD); } static void lpuart32_serial_setbrg(struct lpuart_port *sport, @@ -3085,7 +3085,7 @@ static int lpuart_suspend_noirq(struct device *dev) static int lpuart_resume_noirq(struct device *dev) { struct lpuart_port *sport = dev_get_drvdata(dev); - u32 val; + u32 stat; pinctrl_pm_select_default_state(dev); @@ -3094,8 +3094,8 @@ static int lpuart_resume_noirq(struct device *dev) /* clear the wakeup flags */ if (lpuart_is_32(sport)) { - val = lpuart32_read(&sport->port, UARTSTAT); - lpuart32_write(&sport->port, val, UARTSTAT); + stat = lpuart32_read(&sport->port, UARTSTAT); + lpuart32_write(&sport->port, stat, UARTSTAT); } } From a26503092c75abba70a0be2aa01145ecf90c2a22 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 24 Feb 2025 12:18:30 +0000 Subject: [PATCH 1519/2157] serial: 8250_dma: terminate correct DMA in tx_dma_flush() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When flushing transmit side DMA, it is the transmit channel that should be terminated, not the receive channel. Fixes: 9e512eaaf8f40 ("serial: 8250: Fix fifo underflow on flush") Cc: stable Reported-by: Wentao Guan Signed-off-by: John Keeping Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250224121831.1429323-1-jkeeping@inmusicbrands.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index f245a84f4a50..bdd26c9f34bd 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -162,7 +162,7 @@ void serial8250_tx_dma_flush(struct uart_8250_port *p) */ dma->tx_size = 0; - dmaengine_terminate_async(dma->rxchan); + dmaengine_terminate_async(dma->txchan); } int serial8250_rx_dma(struct uart_8250_port *p) From 3c3cede051cd49d96f52d7ccb115edadf26a9028 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 20 Mar 2025 11:19:20 +0000 Subject: [PATCH 1520/2157] tty: caif: removed unused function debugfs_tx() Remove debugfs_tx() which was added when the caif driver was added in commit 9b27105b4a44 ("net-caif-driver: add CAIF serial driver (ldisc)") but it has never been used. Flagged by LLVM 19.1.7 W=1 builds. Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20250320-caif-debugfs-tx-v1-1-be5654770088@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/caif/caif_serial.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index e7d1b9301fde..c398ac42eae9 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -126,15 +126,6 @@ static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size) ser->rx_blob.data = ser->rx_data; ser->rx_blob.size = size; } - -static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size) -{ - if (size > sizeof(ser->tx_data)) - size = sizeof(ser->tx_data); - memcpy(ser->tx_data, data, size); - ser->tx_blob.data = ser->tx_data; - ser->tx_blob.size = size; -} #else static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty) { @@ -151,11 +142,6 @@ static inline void update_tty_status(struct ser_device *ser) static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size) { } - -static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size) -{ -} - #endif static void ldisc_receive(struct tty_struct *tty, const u8 *data, From e98ab45ec5182605d2e00114cba3bbf46b0ea27f Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Fri, 7 Mar 2025 14:54:46 +0800 Subject: [PATCH 1521/2157] tty: serial: lpuart: only disable CTS instead of overwriting the whole UARTMODIR register No need to overwrite the whole UARTMODIR register before waiting the transmit engine complete, actually our target here is only to disable CTS flow control to avoid the dirty data in TX FIFO may block the transmit engine complete. Also delete the following duplicate CTS disable configuration. Fixes: d5a2e0834364 ("tty: serial: lpuart: disable flow control while waiting for the transmit engine to complete") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20250307065446.1122482-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c8cc0a241fba..33eeefa6fa8f 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2349,15 +2349,19 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, /* update the per-port timeout */ uart_update_timeout(port, termios->c_cflag, baud); + /* + * disable CTS to ensure the transmit engine is not blocked by the flow + * control when there is dirty data in TX FIFO + */ + lpuart32_write(port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); + /* * LPUART Transmission Complete Flag may never be set while queuing a break * character, so skip waiting for transmission complete when UARTCTRL_SBK is * asserted. */ - if (!(old_ctrl & UARTCTRL_SBK)) { - lpuart32_write(port, 0, UARTMODIR); + if (!(old_ctrl & UARTCTRL_SBK)) lpuart32_wait_bit_set(port, UARTSTAT, UARTSTAT_TC); - } /* disable transmit and receive */ lpuart32_write(port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE), @@ -2365,8 +2369,6 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ - lpuart32_write(port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ lpuart32_write(port, ctrl, UARTCTRL); /* re-enable the CTS if needed */ From 87975ca9917741167a2531a6281c1a4d84f87f8b Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 27 Feb 2025 19:19:06 +0800 Subject: [PATCH 1522/2157] dt-bindings: serial: snps-dw-apb-uart: Add support for rk3562 The UART core on Rockchip's RK3562 is the same as the one already included in generic dw-apb-uart. Extend the binding accordingly to allow compatible = "rockchip,rk3562-uart", "snps,dw-apb-uart"; Signed-off-by: Kever Yang Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250227111913.2344207-9-kever.yang@rock-chips.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml index 1c163cb5dff1..1c16ca3b4e29 100644 --- a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml +++ b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml @@ -51,6 +51,7 @@ properties: - rockchip,rk3368-uart - rockchip,rk3399-uart - rockchip,rk3528-uart + - rockchip,rk3562-uart - rockchip,rk3568-uart - rockchip,rk3576-uart - rockchip,rk3588-uart From b5ad18a5d87aa3b564e4bb19b9690f5fc7039b9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 17:35:37 +0100 Subject: [PATCH 1523/2157] tty: serial: pl011: remove incorrect of_match_ptr annotation Building with W=1 shows a warning about sbsa_uart_of_match being unused when CONFIG_OF is disabled: drivers/tty/serial/amba-pl011.c:2945:34: error: unused variable 'sbsa_uart_of_match' [-Werror,-Wunused-const-variable] The driver is not actually used on any machines that are built with CONFIG_OF disabled, so using of_match_ptr() won't save any actual memory, and it can be best removed. The corresponding ACPI_PTR() annotation does save a few bytes on 32-bit arm since CONFIG_ACPI is not available, but for consistency it seems better to remove both along with the __maybe_unused annotation on the ACPI table. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250225163556.4169086-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 6 +++--- drivers/tty/serial/ma35d1_serial.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 047fb9f51539..dc092204b472 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -3051,7 +3051,7 @@ static const struct of_device_id sbsa_uart_of_match[] = { }; MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); -static const struct acpi_device_id __maybe_unused sbsa_uart_acpi_match[] = { +static const struct acpi_device_id sbsa_uart_acpi_match[] = { { "ARMH0011", 0 }, { "ARMHB000", 0 }, {}, @@ -3064,8 +3064,8 @@ static struct platform_driver arm_sbsa_uart_platform_driver = { .driver = { .name = "sbsa-uart", .pm = &pl011_dev_pm_ops, - .of_match_table = of_match_ptr(sbsa_uart_of_match), - .acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match), + .of_match_table = sbsa_uart_of_match, + .acpi_match_table = sbsa_uart_acpi_match, .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), }, }; diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c index 8dcad52eedfd..285b0fe41a86 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -799,7 +799,7 @@ static struct platform_driver ma35d1serial_driver = { .resume = ma35d1serial_resume, .driver = { .name = "ma35d1-uart", - .of_match_table = of_match_ptr(ma35d1_serial_of_match), + .of_match_table = ma35d1_serial_of_match, }, }; From 81100b9a7b0515132996d62a7a676a77676cb6e3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Mar 2025 20:06:11 +0100 Subject: [PATCH 1524/2157] serial: sh-sci: Save and restore more registers On (H)SCIF with a Baud Rate Generator for External Clock (BRG), there are multiple ways to configure the requested serial speed. If firmware uses a different method than Linux, and if any debug info is printed after the Bit Rate Register (SCBRR) is restored, but before termios is reconfigured (which configures the alternative method), the system may lock-up during resume. Fix this by saving and restoring the contents of the BRG Frequency Division (SCDL) and Clock Select (SCCKS) registers as well. Also save and restore the HSCIF's Sampling Rate Register (HSSRR), which configures the sampling point, and the SCIFA/SCIFB's Serial Port Control and Data Registers (SCPCR/SCPDR), which configure the optional control flow signals. After this, all registers that are not saved/restored are either: - read-only, - write-only, - status registers containing flags with clear-after-set semantics, - FIFO Data Count Trigger registers, which do not matter much for the serial console. Fixes: 22a6984c5b5df8ea ("serial: sh-sci: Update the suspend/resume support") Signed-off-by: Geert Uytterhoeven Tested-by: Claudiu Beznea Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/11c2eab45d48211e75d8b8202cce60400880fe55.1741114989.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 2db2d85003f7..1c8480d0338e 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -105,10 +105,15 @@ struct plat_sci_reg { }; struct sci_suspend_regs { + u16 scdl; + u16 sccks; u16 scsmr; u16 scscr; u16 scfcr; u16 scsptr; + u16 hssrr; + u16 scpcr; + u16 scpdr; u8 scbrr; u8 semr; }; @@ -3563,6 +3568,10 @@ static void sci_console_save(struct sci_port *s) struct sci_suspend_regs *regs = &s->suspend_regs; struct uart_port *port = &s->port; + if (sci_getreg(port, SCDL)->size) + regs->scdl = sci_serial_in(port, SCDL); + if (sci_getreg(port, SCCKS)->size) + regs->sccks = sci_serial_in(port, SCCKS); if (sci_getreg(port, SCSMR)->size) regs->scsmr = sci_serial_in(port, SCSMR); if (sci_getreg(port, SCSCR)->size) @@ -3573,6 +3582,12 @@ static void sci_console_save(struct sci_port *s) regs->scsptr = sci_serial_in(port, SCSPTR); if (sci_getreg(port, SCBRR)->size) regs->scbrr = sci_serial_in(port, SCBRR); + if (sci_getreg(port, HSSRR)->size) + regs->hssrr = sci_serial_in(port, HSSRR); + if (sci_getreg(port, SCPCR)->size) + regs->scpcr = sci_serial_in(port, SCPCR); + if (sci_getreg(port, SCPDR)->size) + regs->scpdr = sci_serial_in(port, SCPDR); if (sci_getreg(port, SEMR)->size) regs->semr = sci_serial_in(port, SEMR); } @@ -3582,6 +3597,10 @@ static void sci_console_restore(struct sci_port *s) struct sci_suspend_regs *regs = &s->suspend_regs; struct uart_port *port = &s->port; + if (sci_getreg(port, SCDL)->size) + sci_serial_out(port, SCDL, regs->scdl); + if (sci_getreg(port, SCCKS)->size) + sci_serial_out(port, SCCKS, regs->sccks); if (sci_getreg(port, SCSMR)->size) sci_serial_out(port, SCSMR, regs->scsmr); if (sci_getreg(port, SCSCR)->size) @@ -3592,6 +3611,12 @@ static void sci_console_restore(struct sci_port *s) sci_serial_out(port, SCSPTR, regs->scsptr); if (sci_getreg(port, SCBRR)->size) sci_serial_out(port, SCBRR, regs->scbrr); + if (sci_getreg(port, HSSRR)->size) + sci_serial_out(port, HSSRR, regs->hssrr); + if (sci_getreg(port, SCPCR)->size) + sci_serial_out(port, SCPCR, regs->scpcr); + if (sci_getreg(port, SCPDR)->size) + sci_serial_out(port, SCPDR, regs->scpdr); if (sci_getreg(port, SEMR)->size) sci_serial_out(port, SEMR, regs->semr); } From 9e2a0d4591d2fef24f79940b884470e674374ed8 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Wed, 5 Mar 2025 17:51:20 +0800 Subject: [PATCH 1525/2157] serial: icom: fix code format problems Fix below inconsistent indenting smatch warning. smatch warnings: drivers/tty/serial/icom.c:1768 icom_probe() warn: inconsistent indenting Removed that useless (void *), the code would fit on a single 100c line Removed '{' and '}'. Signed-off-by: Charles Han Link: https://lore.kernel.org/r/20250305095120.7518-1-hanchunchao@inspur.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/icom.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c index 29e42831df39..7fb995a8490e 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -1764,11 +1764,10 @@ static int icom_probe(struct pci_dev *dev, goto probe_exit1; } - /* save off irq and request irq line */ - retval = request_irq(dev->irq, icom_interrupt, IRQF_SHARED, ICOM_DRIVER_NAME, (void *)icom_adapter); - if (retval) { - goto probe_exit2; - } + /* save off irq and request irq line */ + retval = request_irq(dev->irq, icom_interrupt, IRQF_SHARED, ICOM_DRIVER_NAME, icom_adapter); + if (retval) + goto probe_exit2; retval = icom_load_ports(icom_adapter); From fbb1dcd8871b7d04880ed793af03febb9669db04 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 18 Mar 2025 09:53:53 +0100 Subject: [PATCH 1526/2157] dt-bindings: serial: snps-dw-apb-uart: document RZ/N1 binding without DMA Renesas RZ/N1D has this UART with and without DMA support. Currently, only the binding with DMA support is described. Add the missing one without DMA support which can fallback even more. Signed-off-by: Wolfram Sang Reviewed-by: Miquel Raynal Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250318085353.18990-2-wsa+renesas@sang-engineering.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/snps-dw-apb-uart.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml index 1c16ca3b4e29..1aa3480d8d81 100644 --- a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml +++ b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml @@ -13,6 +13,20 @@ allOf: - $ref: serial.yaml# - $ref: rs485.yaml# + - if: + properties: + compatible: + items: + - enum: + - renesas,r9a06g032-uart + - renesas,r9a06g033-uart + - const: renesas,rzn1-uart + - const: snps,dw-apb-uart + then: + properties: + dmas: false + dma-names: false + - if: properties: compatible: @@ -30,6 +44,12 @@ allOf: properties: compatible: oneOf: + - items: + - enum: + - renesas,r9a06g032-uart + - renesas,r9a06g033-uart + - const: renesas,rzn1-uart + - const: snps,dw-apb-uart - items: - enum: - renesas,r9a06g032-uart From 3d5390f4dbe633472b2a4824e66ca5c4eac6fb19 Mon Sep 17 00:00:00 2001 From: Chaitanya Vadrevu Date: Tue, 18 Mar 2025 16:39:12 -0500 Subject: [PATCH 1527/2157] serial: 8250: add driver for NI UARTs The National Instruments (NI) 16550 is a 16550-like UART with larger FIFOs and embedded RS-232/RS-485 transceiver control circuitry. This patch adds a driver that can operate this UART, which is used for onboard serial ports in several NI embedded controller designs. Portions of this driver were originally written by Jaeden Amero and Karthik Manamcheri, with extensive cleanups and refactors since by Brenda Streiff. Cc: Gratian Crisan Co-developed-by: Jason Smith Signed-off-by: Jason Smith Signed-off-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/a3b0df6d-1dd5-4cc4-a7e1-4ed51fb9e4cc@emerson.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 + drivers/tty/serial/8250/8250_ni.c | 461 ++++++++++++++++++++++++++++++ drivers/tty/serial/8250/Kconfig | 13 + drivers/tty/serial/8250/Makefile | 1 + 4 files changed, 481 insertions(+) create mode 100644 drivers/tty/serial/8250/8250_ni.c diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f..1573151c3cd1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16250,6 +16250,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/next F: drivers/mtd/nand/ F: include/linux/mtd/*nand*.h +NATIONAL INSTRUMENTS SERIAL DRIVER +M: Chaitanya Vadrevu +L: linux-serial@vger.kernel.org +S: Maintained +F: drivers/tty/serial/8250/8250_ni.c + NATIVE INSTRUMENTS USB SOUND INTERFACE DRIVER M: Daniel Mack L: linux-sound@vger.kernel.org diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c new file mode 100644 index 000000000000..b10a42d2ad63 --- /dev/null +++ b/drivers/tty/serial/8250/8250_ni.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * NI 16550 UART Driver + * + * The National Instruments (NI) 16550 is a UART that is compatible with the + * TL16C550C and OX16C950B register interfaces, but has additional functions + * for RS-485 transceiver control. This driver implements support for the + * additional functionality on top of the standard serial8250 core. + * + * Copyright 2012-2023 National Instruments Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "8250.h" + +/* Extra bits in UART_ACR */ +#define NI16550_ACR_AUTO_DTR_EN BIT(4) + +/* TFS - TX FIFO Size */ +#define NI16550_TFS_OFFSET 0x0C +/* RFS - RX FIFO Size */ +#define NI16550_RFS_OFFSET 0x0D + +/* PMR - Port Mode Register */ +#define NI16550_PMR_OFFSET 0x0E +/* PMR[1:0] - Port Capabilities */ +#define NI16550_PMR_CAP_MASK GENMASK(1, 0) +#define NI16550_PMR_NOT_IMPL FIELD_PREP(NI16550_PMR_CAP_MASK, 0) /* not implemented */ +#define NI16550_PMR_CAP_RS232 FIELD_PREP(NI16550_PMR_CAP_MASK, 1) /* RS-232 capable */ +#define NI16550_PMR_CAP_RS485 FIELD_PREP(NI16550_PMR_CAP_MASK, 2) /* RS-485 capable */ +#define NI16550_PMR_CAP_DUAL FIELD_PREP(NI16550_PMR_CAP_MASK, 3) /* dual-port */ +/* PMR[4] - Interface Mode */ +#define NI16550_PMR_MODE_MASK GENMASK(4, 4) +#define NI16550_PMR_MODE_RS232 FIELD_PREP(NI16550_PMR_MODE_MASK, 0) /* currently 232 */ +#define NI16550_PMR_MODE_RS485 FIELD_PREP(NI16550_PMR_MODE_MASK, 1) /* currently 485 */ + +/* PCR - Port Control Register */ +/* + * Wire Mode | Tx enabled? | Rx enabled? + * ---------------|----------------------|-------------------------- + * PCR_RS422 | Always | Always + * PCR_ECHO_RS485 | When DTR asserted | Always + * PCR_DTR_RS485 | When DTR asserted | Disabled when TX enabled + * PCR_AUTO_RS485 | When data in TX FIFO | Disabled when TX enabled + */ +#define NI16550_PCR_OFFSET 0x0F +#define NI16550_PCR_WIRE_MODE_MASK GENMASK(1, 0) +#define NI16550_PCR_RS422 FIELD_PREP(NI16550_PCR_WIRE_MODE_MASK, 0) +#define NI16550_PCR_ECHO_RS485 FIELD_PREP(NI16550_PCR_WIRE_MODE_MASK, 1) +#define NI16550_PCR_DTR_RS485 FIELD_PREP(NI16550_PCR_WIRE_MODE_MASK, 2) +#define NI16550_PCR_AUTO_RS485 FIELD_PREP(NI16550_PCR_WIRE_MODE_MASK, 3) +#define NI16550_PCR_TXVR_ENABLE_BIT BIT(3) +#define NI16550_PCR_RS485_TERMINATION_BIT BIT(6) + +/* flags for ni16550_device_info */ +#define NI_HAS_PMR BIT(0) + +struct ni16550_device_info { + u32 uartclk; + u8 prescaler; + u8 flags; +}; + +struct ni16550_data { + int line; + struct clk *clk; +}; + +static int ni16550_enable_transceivers(struct uart_port *port) +{ + u8 pcr; + + pcr = port->serial_in(port, NI16550_PCR_OFFSET); + pcr |= NI16550_PCR_TXVR_ENABLE_BIT; + dev_dbg(port->dev, "enable transceivers: write pcr: 0x%02x\n", pcr); + port->serial_out(port, NI16550_PCR_OFFSET, pcr); + + return 0; +} + +static int ni16550_disable_transceivers(struct uart_port *port) +{ + u8 pcr; + + pcr = port->serial_in(port, NI16550_PCR_OFFSET); + pcr &= ~NI16550_PCR_TXVR_ENABLE_BIT; + dev_dbg(port->dev, "disable transceivers: write pcr: 0x%02x\n", pcr); + port->serial_out(port, NI16550_PCR_OFFSET, pcr); + + return 0; +} + +static int ni16550_rs485_config(struct uart_port *port, + struct ktermios *termios, + struct serial_rs485 *rs485) +{ + struct uart_8250_port *up = container_of(port, struct uart_8250_port, port); + u8 pcr; + + pcr = serial_in(up, NI16550_PCR_OFFSET); + pcr &= ~NI16550_PCR_WIRE_MODE_MASK; + + if ((rs485->flags & SER_RS485_MODE_RS422) || + !(rs485->flags & SER_RS485_ENABLED)) { + /* RS-422 */ + pcr |= NI16550_PCR_RS422; + up->acr &= ~NI16550_ACR_AUTO_DTR_EN; + } else { + /* RS-485 2-wire Auto */ + pcr |= NI16550_PCR_AUTO_RS485; + up->acr |= NI16550_ACR_AUTO_DTR_EN; + } + + dev_dbg(port->dev, "config rs485: write pcr: 0x%02x, acr: %02x\n", pcr, up->acr); + serial_out(up, NI16550_PCR_OFFSET, pcr); + serial_icr_write(up, UART_ACR, up->acr); + + return 0; +} + +static bool is_pmr_rs232_mode(struct uart_8250_port *up) +{ + u8 pmr = serial_in(up, NI16550_PMR_OFFSET); + u8 pmr_mode = pmr & NI16550_PMR_MODE_MASK; + u8 pmr_cap = pmr & NI16550_PMR_CAP_MASK; + + /* + * If the PMR is not implemented, then by default NI UARTs are + * connected to RS-485 transceivers + */ + if (pmr_cap == NI16550_PMR_NOT_IMPL) + return false; + + if (pmr_cap == NI16550_PMR_CAP_DUAL) + /* + * If the port is dual-mode capable, then read the mode bit + * to know the current mode + */ + return pmr_mode == NI16550_PMR_MODE_RS232; + /* + * If it is not dual-mode capable, then decide based on the + * capability + */ + return pmr_cap == NI16550_PMR_CAP_RS232; +} + +static void ni16550_config_prescaler(struct uart_8250_port *up, + u8 prescaler) +{ + /* + * Page in the Enhanced Mode Registers + * Sets EFR[4] for Enhanced Mode. + */ + u8 lcr_value; + u8 efr_value; + + lcr_value = serial_in(up, UART_LCR); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + + efr_value = serial_in(up, UART_EFR); + efr_value |= UART_EFR_ECB; + + serial_out(up, UART_EFR, efr_value); + + /* Page out the Enhanced Mode Registers */ + serial_out(up, UART_LCR, lcr_value); + + /* Set prescaler to CPR register. */ + serial_out(up, UART_SCR, UART_CPR); + serial_out(up, UART_ICR, prescaler); +} + +static const struct serial_rs485 ni16550_rs485_supported = { + .flags = SER_RS485_ENABLED | SER_RS485_MODE_RS422 | SER_RS485_RTS_ON_SEND | + SER_RS485_RTS_AFTER_SEND, + /* + * delay_rts_* and RX_DURING_TX are not supported. + * + * RTS_{ON,AFTER}_SEND are supported, but ignored; the transceiver + * is connected in only one way and we don't need userspace to tell + * us, but want to retain compatibility with applications that do. + */ +}; + +static void ni16550_rs485_setup(struct uart_port *port) +{ + port->rs485_config = ni16550_rs485_config; + port->rs485_supported = ni16550_rs485_supported; + /* + * The hardware comes up by default in 2-wire auto mode and we + * set the flags to represent that + */ + port->rs485.flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND; +} + +static int ni16550_port_startup(struct uart_port *port) +{ + int ret; + + ret = serial8250_do_startup(port); + if (ret) + return ret; + + return ni16550_enable_transceivers(port); +} + +static void ni16550_port_shutdown(struct uart_port *port) +{ + ni16550_disable_transceivers(port); + + serial8250_do_shutdown(port); +} + +static int ni16550_get_regs(struct platform_device *pdev, + struct uart_port *port) +{ + struct resource *regs; + + regs = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (regs) { + port->iotype = UPIO_PORT; + port->iobase = regs->start; + + return 0; + } + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (regs) { + port->iotype = UPIO_MEM; + port->mapbase = regs->start; + port->mapsize = resource_size(regs); + port->flags |= UPF_IOREMAP; + + port->membase = devm_ioremap(&pdev->dev, port->mapbase, + port->mapsize); + if (!port->membase) + return -ENOMEM; + + return 0; + } + + dev_err(&pdev->dev, "no registers defined\n"); + return -EINVAL; +} + +/* + * Very old implementations don't have the TFS or RFS registers + * defined, so we may read all-0s or all-1s. For such devices, + * assume a FIFO size of 128. + */ +static u8 ni16550_read_fifo_size(struct uart_8250_port *uart, int reg) +{ + u8 value = serial_in(uart, reg); + + if (value == 0x00 || value == 0xFF) + return 128; + + return value; +} + +static void ni16550_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + up->mcr |= UART_MCR_CLKSEL; + serial8250_do_set_mctrl(port, mctrl); +} + +static int ni16550_probe(struct platform_device *pdev) +{ + const struct ni16550_device_info *info; + struct device *dev = &pdev->dev; + struct uart_8250_port uart = {}; + unsigned int txfifosz, rxfifosz; + unsigned int prescaler = 0; + struct ni16550_data *data; + const char *portmode; + bool rs232_property; + int ret; + int irq; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&uart.port.lock); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = ni16550_get_regs(pdev, &uart.port); + if (ret < 0) + return ret; + + /* early setup so that serial_in()/serial_out() work */ + serial8250_set_defaults(&uart); + + info = device_get_match_data(dev); + + uart.port.dev = dev; + uart.port.irq = irq; + uart.port.irqflags = IRQF_SHARED; + uart.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF + | UPF_FIXED_PORT | UPF_FIXED_TYPE; + uart.port.startup = ni16550_port_startup; + uart.port.shutdown = ni16550_port_shutdown; + + /* + * Hardware instantiation of FIFO sizes are held in registers. + */ + txfifosz = ni16550_read_fifo_size(&uart, NI16550_TFS_OFFSET); + rxfifosz = ni16550_read_fifo_size(&uart, NI16550_RFS_OFFSET); + + dev_dbg(dev, "NI 16550 has TX FIFO size %u, RX FIFO size %u\n", + txfifosz, rxfifosz); + + uart.port.type = PORT_16550A; + uart.port.fifosize = txfifosz; + uart.tx_loadsz = txfifosz; + uart.fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10; + uart.capabilities = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR; + + /* + * Declaration of the base clock frequency can come from one of: + * - static declaration in this driver (for older ACPI IDs) + * - a "clock-frquency" ACPI + */ + if (info->uartclk) + uart.port.uartclk = info->uartclk; + if (device_property_read_u32(dev, "clock-frequency", + &uart.port.uartclk)) { + data->clk = devm_clk_get_enabled(dev, NULL); + if (!IS_ERR(data->clk)) + uart.port.uartclk = clk_get_rate(data->clk); + } + + if (!uart.port.uartclk) { + dev_err(dev, "unable to determine clock frequency!\n"); + ret = -ENODEV; + goto err; + } + + if (info->prescaler) + prescaler = info->prescaler; + device_property_read_u32(dev, "clock-prescaler", &prescaler); + + if (prescaler != 0) { + uart.port.set_mctrl = ni16550_set_mctrl; + ni16550_config_prescaler(&uart, (u8)prescaler); + } + + /* + * The determination of whether or not this is an RS-485 or RS-232 port + * can come from the PMR (if present), otherwise we're solely an RS-485 + * port. + * + * This is a device-specific property, and there are old devices in the + * field using "transceiver" as an ACPI property, so we have to check + * for that as well. + */ + if (!device_property_read_string(dev, "transceiver", &portmode)) { + rs232_property = strncmp(portmode, "RS-232", 6) == 0; + + dev_dbg(dev, "port is in %s mode (via device property)\n", + rs232_property ? "RS-232" : "RS-485"); + } else if (info->flags & NI_HAS_PMR) { + rs232_property = is_pmr_rs232_mode(&uart); + + dev_dbg(dev, "port is in %s mode (via PMR)\n", + rs232_property ? "RS-232" : "RS-485"); + } else { + rs232_property = 0; + + dev_dbg(dev, "port is fixed as RS-485\n"); + } + + if (!rs232_property) { + /* + * Neither the 'transceiver' property nor the PMR indicate + * that this is an RS-232 port, so it must be an RS-485 one. + */ + ni16550_rs485_setup(&uart.port); + } + + ret = serial8250_register_8250_port(&uart); + if (ret < 0) + goto err; + data->line = ret; + + platform_set_drvdata(pdev, data); + return 0; + +err: + return ret; +} + +static void ni16550_remove(struct platform_device *pdev) +{ + struct ni16550_data *data = platform_get_drvdata(pdev); + + serial8250_unregister_port(data->line); +} + +#ifdef CONFIG_ACPI +/* NI 16550 RS-485 Interface */ +static const struct ni16550_device_info nic7750 = { + .uartclk = 33333333, +}; + +/* NI CVS-145x RS-485 Interface */ +static const struct ni16550_device_info nic7772 = { + .uartclk = 1843200, + .flags = NI_HAS_PMR, +}; + +/* NI cRIO-904x RS-485 Interface */ +static const struct ni16550_device_info nic792b = { + /* Sets UART clock rate to 22.222 MHz with 1.125 prescale */ + .uartclk = 22222222, + .prescaler = 0x09, +}; + +/* NI sbRIO 96x8 RS-232/485 Interfaces */ +static const struct ni16550_device_info nic7a69 = { + /* Set UART clock rate to 29.629 MHz with 1.125 prescale */ + .uartclk = 29629629, + .prescaler = 0x09, +}; +static const struct acpi_device_id ni16550_acpi_match[] = { + { "NIC7750", (kernel_ulong_t)&nic7750 }, + { "NIC7772", (kernel_ulong_t)&nic7772 }, + { "NIC792B", (kernel_ulong_t)&nic792b }, + { "NIC7A69", (kernel_ulong_t)&nic7a69 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, ni16550_acpi_match); +#endif + +static struct platform_driver ni16550_driver = { + .driver = { + .name = "ni16550", + .acpi_match_table = ACPI_PTR(ni16550_acpi_match), + }, + .probe = ni16550_probe, + .remove = ni16550_remove, +}; + +module_platform_driver(ni16550_driver); + +MODULE_AUTHOR("Emerson Electric Co."); +MODULE_DESCRIPTION("NI 16550 Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 55d26d16df9b..bd3d636ff962 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -569,6 +569,19 @@ config SERIAL_8250_BCM7271 including DMA support and high accuracy BAUD rates, say Y to this option. If unsure, say N. +config SERIAL_8250_NI + tristate "NI 16550 based serial port" + depends on SERIAL_8250 + depends on (X86 && ACPI) || COMPILE_TEST + help + This driver supports the integrated serial ports on National + Instruments (NI) controller hardware. This is required for all NI + controller models with onboard RS-485 or dual-mode RS-485/RS-232 + ports. + + To compile this driver as a module, choose M here: the module + will be called 8250_ni. + config SERIAL_OF_PLATFORM tristate "Devicetree based probing for 8250 ports" depends on SERIAL_8250 && OF diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index 1516de629b61..b04eeda03b23 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_SERIAL_8250_LPSS) += 8250_lpss.o obj-$(CONFIG_SERIAL_8250_MEN_MCB) += 8250_men_mcb.o obj-$(CONFIG_SERIAL_8250_MID) += 8250_mid.o obj-$(CONFIG_SERIAL_8250_MT6577) += 8250_mtk.o +obj-$(CONFIG_SERIAL_8250_NI) += 8250_ni.o obj-$(CONFIG_SERIAL_OF_PLATFORM) += 8250_of.o obj-$(CONFIG_SERIAL_8250_OMAP) += 8250_omap.o obj-$(CONFIG_SERIAL_8250_PARISC) += 8250_parisc.o From 2790ce23951f0c497810c44ad60a126a59c8d84c Mon Sep 17 00:00:00 2001 From: Cheick Traore Date: Thu, 20 Mar 2025 16:25:40 +0100 Subject: [PATCH 1528/2157] serial: stm32: do not deassert RS485 RTS GPIO prematurely If stm32_usart_start_tx is called with an empty xmit buffer, RTS GPIO could be deasserted prematurely, as bytes in TX FIFO are still transmitting. So this patch remove rts disable when xmit buffer is empty. Fixes: d7c76716169d ("serial: stm32: Use TC interrupt to deassert GPIO RTS in RS485 mode") Cc: stable Signed-off-by: Cheick Traore Link: https://lore.kernel.org/r/20250320152540.709091-1-cheick.traore@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 4c97965ec43b..ad06b760cfca 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -965,10 +965,8 @@ static void stm32_usart_start_tx(struct uart_port *port) { struct tty_port *tport = &port->state->port; - if (kfifo_is_empty(&tport->xmit_fifo) && !port->x_char) { - stm32_usart_rs485_rts_disable(port); + if (kfifo_is_empty(&tport->xmit_fifo) && !port->x_char) return; - } stm32_usart_rs485_rts_enable(port); From a70a3a6322131632cc6cf71e9d2fa6409a029fd7 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:41 +0100 Subject: [PATCH 1529/2157] irqdomain: soc: Switch to irq_find_mapping() irq_linear_revmap() is deprecated, so remove all its uses and supersede them by an identical call to irq_find_mapping(). Signed-off-by: Jiri Slaby (SUSE) Cc: Qiang Zhao Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20250319092951.37667-49-jirislaby@kernel.org Signed-off-by: Christophe Leroy --- drivers/soc/fsl/qe/qe_ic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c index bbae3d39c7be..77bf0e83ffcc 100644 --- a/drivers/soc/fsl/qe/qe_ic.c +++ b/drivers/soc/fsl/qe/qe_ic.c @@ -344,7 +344,7 @@ static unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) if (irq == 0) return 0; - return irq_linear_revmap(qe_ic->irqhost, irq); + return irq_find_mapping(qe_ic->irqhost, irq); } /* Return an interrupt vector or 0 if no interrupt is pending. */ @@ -360,7 +360,7 @@ static unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) if (irq == 0) return 0; - return irq_linear_revmap(qe_ic->irqhost, irq); + return irq_find_mapping(qe_ic->irqhost, irq); } static void qe_ic_cascade_low(struct irq_desc *desc) From 29904d6c1be66882c7148a1439e0f671fc02e56c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 20 Mar 2025 14:03:17 +0200 Subject: [PATCH 1530/2157] MAINTAINERS: add myself as maintainer for the fsl-mc bus Both Laurentiu and Stuart left the company and are no longer involved with the fsl-mc bus. Remove them and add myself as maintainer. Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20250320120319.3520315-2-ioana.ciornei@nxp.com Signed-off-by: Christophe Leroy --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..1fd70e62829b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19294,8 +19294,7 @@ F: fs/qnx6/ F: include/linux/qnx6_fs.h QORIQ DPAA2 FSL-MC BUS DRIVER -M: Stuart Yoder -M: Laurentiu Tudor +M: Ioana Ciornei L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/ABI/stable/sysfs-bus-fsl-mc From baa9934908ad1cb6f7093c8ba15d518a43419f9a Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 20 Mar 2025 14:03:18 +0200 Subject: [PATCH 1531/2157] MAINTAINERS: fix nonexistent dtbinding file name The blamed commit converted the fsl,qoriq-mc.txt into fsl,qoriq-mc.yaml but forgot to also update the MAINTAINERS file to reference the new filename. Fix this by using the corrent filename - fsl,qoriq-mc.yaml. Fixes: bfb921b2a9d5 ("dt-bindings: misc: fsl,qoriq-mc: convert to yaml format") Signed-off-by: Ioana Ciornei Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20250320120319.3520315-3-ioana.ciornei@nxp.com Signed-off-by: Christophe Leroy --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1fd70e62829b..28a69aaa0fa5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19298,7 +19298,7 @@ M: Ioana Ciornei L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/ABI/stable/sysfs-bus-fsl-mc -F: Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt +F: Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst F: drivers/bus/fsl-mc/ F: include/uapi/linux/fsl_mc.h From 586739b1e8b14a38145a80e7684874d1a3268498 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 20 Mar 2025 14:03:19 +0200 Subject: [PATCH 1532/2157] MAINTAINERS: add the linuppc-dev list to the fsl-mc bus entry As discussed in the thread linked below, the fsl-mc bus lacked a clear maintenance path. Since Christophe accepted to take the fsl-mc bus patches through his soc fsl subtree, add the linuxppc-dev mailing list in the MAINTAINERS entry. Link: https://lore.kernel.org/r/1d822960-85a7-42b3-88cf-9d3dbc75a831@csgroup.eu Signed-off-by: Ioana Ciornei Acked-by: Christophe Leroy Link: https://lore.kernel.org/r/20250320120319.3520315-4-ioana.ciornei@nxp.com Signed-off-by: Christophe Leroy --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 28a69aaa0fa5..0ae0e479bd2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19295,6 +19295,7 @@ F: include/linux/qnx6_fs.h QORIQ DPAA2 FSL-MC BUS DRIVER M: Ioana Ciornei +L: linuxppc-dev@lists.ozlabs.org L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/ABI/stable/sysfs-bus-fsl-mc From 32a43b6014662f1227c906c9de00886aecc0a508 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 27 Feb 2025 09:11:06 +0530 Subject: [PATCH 1533/2157] drm/i915/watermark: Check bounds for scaler_users for dsc prefill latency Currently, during the computation of global watermarks, the latency for each scaler user is calculated to compute the DSC prefill latency. At this point, the number of scaler users can exceed the number of supported scalers, which is checked later in intel_atomic_setup_scalers(). This can cause issues when the number of scaler users exceeds the number of supported scalers. While checking for DSC prefill, ensure that the number of scaler users does not exceed the number of supported scalers. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4341 Fixes: a9b14af999b0 ("drm/i915/dsc: Check if vblank is sufficient for dsc prefill") Cc: Mitul Golani Cc: Ankit Nautiyal Cc: Jani Nikula Signed-off-by: Ankit Nautiyal Reviewed-by: Mitul Golani Link: https://patchwork.freedesktop.org/patch/msgid/20250227034106.1638203-1-ankit.k.nautiyal@intel.com (cherry picked from commit 5d6c69b712f9cb34063ef32168ce6a12af8acf0c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_watermark.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2d0de1c63308..621e97943542 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -2314,6 +2314,7 @@ cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state) static int dsc_prefill_latency(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int linetime = DIV_ROUND_UP(1000 * crtc_state->hw.adjusted_mode.htotal, @@ -2323,7 +2324,9 @@ dsc_prefill_latency(const struct intel_crtc_state *crtc_state) crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1; u32 dsc_prefill_latency = 0; - if (!crtc_state->dsc.compression_enable || !num_scaler_users) + if (!crtc_state->dsc.compression_enable || + !num_scaler_users || + num_scaler_users > crtc->num_scalers) return dsc_prefill_latency; dsc_prefill_latency = DIV_ROUND_UP(15 * linetime * chroma_downscaling_factor, 10); From af9ec6e4682c089028d763b0b77c04fa2ddae268 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sat, 15 Mar 2025 20:01:43 +0800 Subject: [PATCH 1534/2157] drm/i915/display: Fix build error without DRM_FBDEV_EMULATION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from : ./drivers/gpu/drm/i915/display/intel_fbdev.h: In function ‘intel_fbdev_framebuffer’: ./drivers/gpu/drm/i915/display/intel_fbdev.h:32:16: error: ‘NULL’ undeclared (first use in this function) 32 | return NULL; | ^~~~ ./drivers/gpu/drm/i915/display/intel_fbdev.h:1:1: note: ‘NULL’ is defined in header ‘’; did you forget to ‘#include ’? +++ |+#include 1 | /* SPDX-License-Identifier: MIT */ ./drivers/gpu/drm/i915/display/intel_fbdev.h:32:16: note: each undeclared identifier is reported only once for each function it appears in 32 | return NULL; | ^~~~ Build fails if CONFIG_DRM_FBDEV_EMULATION is n, add missing header file. Fixes: 9fa154f40eb6 ("drm/{i915,xe}: Run DRM default client setup") Signed-off-by: Yue Haibing Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250315120143.2344958-1-yuehaibing@huawei.com Signed-off-by: Jani Nikula (cherry picked from commit 97e81f78d3cbf061a809bbb8180a5b96395b8e03) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h index ca2c8c438f02..89bad3a2b01a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev.h @@ -6,6 +6,8 @@ #ifndef __INTEL_FBDEV_H__ #define __INTEL_FBDEV_H__ +#include + struct drm_fb_helper; struct drm_fb_helper_surface_size; struct drm_i915_private; From aae0594a7053c60b82621136257c8b648c67b512 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 17 Mar 2025 15:01:24 +0800 Subject: [PATCH 1535/2157] cxl/region: Fix the first aliased address miscalculation In extended linear cache(ELC) case, cxl_port_get_spa_cache_alias() helps to get the aliased address of a SPA, it considers the first address in CXL memory range is "region start + region cache size + 1", but it should be "region start + region cache size". So if a SPA is equal to "region start + region cache size", its aliased address should be "SPA - region cache size". Signed-off-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250317070124.815028-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6d8bdb53f258..c3f4dc244df7 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3460,7 +3460,7 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) if (!p->cache_size) return ~0ULL; - if (spa > p->res->start + p->cache_size) + if (spa >= p->res->start + p->cache_size) return spa - p->cache_size; return spa + p->cache_size; From 488975c2d3e171bd07ec5caaf3c9cbc6a0746e2d Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 12 Mar 2025 10:31:20 -0700 Subject: [PATCH 1536/2157] drm/xe/eustall: Fix a possible pointer dereference after free If devm_add_action_or_reset() isn't successful, xe_eu_stall_fini() is invoked. So, unsuccessful return from devm_add_action_or_reset() shouldn't dereference gt->eu_stall as xe_eu_stall_fini() already frees it. Fix this issue. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/eae49a414a7314921108e0388810aaee6261ad92.1741800396.git.harish.chegondi@intel.com (cherry picked from commit 278469ff569e1082d56b4a7af26fbaecef9fbf3b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 88a92baf5c95..f2bb9168967c 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -222,13 +222,7 @@ int xe_eu_stall_init(struct xe_gt *gt) goto exit_free; } - ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); - if (ret) - goto exit_destroy; - - return 0; -exit_destroy: - destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); + return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); exit_free: mutex_destroy(>->eu_stall->stream_lock); kfree(gt->eu_stall); From 4a3f77ea77013d8d0178503a8abff33d8c4ba5c5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2025 20:07:23 +0100 Subject: [PATCH 1537/2157] i2c: i801: Switch to iomapped register access Switch to iomapped register access as a prerequisite for adding support for MMIO register access. This changes replaces the delayed inb_p()/outb_p() calls with calls to ioread8()/iowrite8() which don't have this extra delay. According to Documentation/driver-api/device-io.rst the _p versions are needed for ISA device access only, therefore switching to the non-delayed versions should not cause problems. However a certain risk remains, which on the other hand is significantly reduced by the fact that recent systems will use MMIO instead of PIO. ICH7 datasheet from 2012 mentions already that SMBus register space is also memory-mapped. So all systems from at least the last 10 yrs should be safe. Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/67535b17-c3fb-4507-b083-9c1884b4dd7d@gmail.com --- drivers/i2c/busses/i2c-i801.c | 149 +++++++++++++++++----------------- 1 file changed, 73 insertions(+), 76 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 6a4147054b49..bf5702ccb93a 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -276,7 +276,7 @@ struct i801_mux_config { struct i801_priv { struct i2c_adapter adapter; - unsigned long smba; + void __iomem *smba; unsigned char original_hstcfg; unsigned char original_hstcnt; unsigned char original_slvcmd; @@ -345,7 +345,7 @@ static int i801_wait_intr(struct i801_priv *priv) do { usleep_range(250, 500); - status = inb_p(SMBHSTSTS(priv)); + status = ioread8(SMBHSTSTS(priv)); busy = status & SMBHSTSTS_HOST_BUSY; status &= STATUS_ERROR_FLAGS | SMBHSTSTS_INTR; if (!busy && status) @@ -363,7 +363,7 @@ static int i801_wait_byte_done(struct i801_priv *priv) do { usleep_range(250, 500); - status = inb_p(SMBHSTSTS(priv)); + status = ioread8(SMBHSTSTS(priv)); if (status & (STATUS_ERROR_FLAGS | SMBHSTSTS_BYTE_DONE)) return status & STATUS_ERROR_FLAGS; } while (time_is_after_eq_jiffies(timeout)); @@ -373,7 +373,7 @@ static int i801_wait_byte_done(struct i801_priv *priv) static int i801_get_block_len(struct i801_priv *priv) { - u8 len = inb_p(SMBHSTDAT0(priv)); + u8 len = ioread8(SMBHSTDAT0(priv)); if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) { pci_err(priv->pci_dev, "Illegal SMBus block read size %u\n", len); @@ -390,9 +390,9 @@ static int i801_check_and_clear_pec_error(struct i801_priv *priv) if (!(priv->features & FEATURE_SMBUS_PEC)) return 0; - status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE; + status = ioread8(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE; if (status) { - outb_p(status, SMBAUXSTS(priv)); + iowrite8(status, SMBAUXSTS(priv)); return -EBADMSG; } @@ -405,7 +405,7 @@ static int i801_check_pre(struct i801_priv *priv) { int status, result; - status = inb_p(SMBHSTSTS(priv)); + status = ioread8(SMBHSTSTS(priv)); if (status & SMBHSTSTS_HOST_BUSY) { pci_err(priv->pci_dev, "SMBus is busy, can't use it!\n"); return -EBUSY; @@ -414,7 +414,7 @@ static int i801_check_pre(struct i801_priv *priv) status &= STATUS_FLAGS; if (status) { pci_dbg(priv->pci_dev, "Clearing status flags (%02x)\n", status); - outb_p(status, SMBHSTSTS(priv)); + iowrite8(status, SMBHSTSTS(priv)); } /* @@ -440,9 +440,9 @@ static int i801_check_post(struct i801_priv *priv, int status) */ if (unlikely(status < 0)) { /* try to stop the current command */ - outb_p(SMBHSTCNT_KILL, SMBHSTCNT(priv)); + iowrite8(SMBHSTCNT_KILL, SMBHSTCNT(priv)); status = i801_wait_intr(priv); - outb_p(0, SMBHSTCNT(priv)); + iowrite8(0, SMBHSTCNT(priv)); /* Check if it worked */ if (status < 0 || !(status & SMBHSTSTS_FAILED)) @@ -493,13 +493,13 @@ static int i801_transaction(struct i801_priv *priv, int xact) if (priv->features & FEATURE_IRQ) { reinit_completion(&priv->done); - outb_p(xact | SMBHSTCNT_INTREN | SMBHSTCNT_START, + iowrite8(xact | SMBHSTCNT_INTREN | SMBHSTCNT_START, SMBHSTCNT(priv)); result = wait_for_completion_timeout(&priv->done, adap->timeout); return result ? priv->status : -ETIMEDOUT; } - outb_p(xact | SMBHSTCNT_START, SMBHSTCNT(priv)); + iowrite8(xact | SMBHSTCNT_START, SMBHSTCNT(priv)); return i801_wait_intr(priv); } @@ -508,7 +508,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, union i2c_smbus_data *data, char read_write, int command) { - int i, len, status, xact; + int len, status, xact; switch (command) { case I2C_SMBUS_BLOCK_PROC_CALL: @@ -522,14 +522,13 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, } /* Set block buffer mode */ - outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); + iowrite8(ioread8(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; - outb_p(len, SMBHSTDAT0(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - for (i = 0; i < len; i++) - outb_p(data->block[i+1], SMBBLKDAT(priv)); + iowrite8(len, SMBHSTDAT0(priv)); + ioread8(SMBHSTCNT(priv)); /* reset the data buffer index */ + iowrite8_rep(SMBBLKDAT(priv), data->block + 1, len); } status = i801_transaction(priv, xact); @@ -545,12 +544,11 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, } data->block[0] = len; - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - for (i = 0; i < len; i++) - data->block[i + 1] = inb_p(SMBBLKDAT(priv)); + ioread8(SMBHSTCNT(priv)); /* reset the data buffer index */ + ioread8_rep(SMBBLKDAT(priv), data->block + 1, len); } out: - outb_p(inb_p(SMBAUXCTL(priv)) & ~SMBAUXCTL_E32B, SMBAUXCTL(priv)); + iowrite8(ioread8(SMBAUXCTL(priv)) & ~SMBAUXCTL_E32B, SMBAUXCTL(priv)); return status; } @@ -573,17 +571,17 @@ static void i801_isr_byte_done(struct i801_priv *priv) /* Read next byte */ if (priv->count < priv->len) - priv->data[priv->count++] = inb(SMBBLKDAT(priv)); + priv->data[priv->count++] = ioread8(SMBBLKDAT(priv)); else pci_dbg(priv->pci_dev, "Discarding extra byte on block read\n"); /* Set LAST_BYTE for last byte of read transaction */ if (priv->count == priv->len - 1) - outb_p(priv->cmd | SMBHSTCNT_LAST_BYTE, + iowrite8(priv->cmd | SMBHSTCNT_LAST_BYTE, SMBHSTCNT(priv)); } else if (priv->count < priv->len - 1) { /* Write next byte, except for IRQ after last byte */ - outb_p(priv->data[++priv->count], SMBBLKDAT(priv)); + iowrite8(priv->data[++priv->count], SMBBLKDAT(priv)); } } @@ -591,7 +589,7 @@ static irqreturn_t i801_host_notify_isr(struct i801_priv *priv) { unsigned short addr; - addr = inb_p(SMBNTFDADD(priv)) >> 1; + addr = ioread8(SMBNTFDADD(priv)) >> 1; /* * With the tested platforms, reading SMBNTFDDAT (22 + (p)->smba) @@ -601,7 +599,7 @@ static irqreturn_t i801_host_notify_isr(struct i801_priv *priv) i2c_handle_smbus_host_notify(&priv->adapter, addr); /* clear Host Notify bit and return */ - outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv)); + iowrite8(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv)); return IRQ_HANDLED; } @@ -632,12 +630,12 @@ static irqreturn_t i801_isr(int irq, void *dev_id) return IRQ_NONE; if (priv->features & FEATURE_HOST_NOTIFY) { - status = inb_p(SMBSLVSTS(priv)); + status = ioread8(SMBSLVSTS(priv)); if (status & SMBSLVSTS_HST_NTFY_STS) return i801_host_notify_isr(priv); } - status = inb_p(SMBHSTSTS(priv)); + status = ioread8(SMBHSTSTS(priv)); if ((status & (SMBHSTSTS_BYTE_DONE | STATUS_ERROR_FLAGS)) == SMBHSTSTS_BYTE_DONE) i801_isr_byte_done(priv); @@ -647,7 +645,7 @@ static irqreturn_t i801_isr(int irq, void *dev_id) * so clear it always when the status is set. */ status &= STATUS_FLAGS | SMBHSTSTS_SMBALERT_STS; - outb_p(status, SMBHSTSTS(priv)); + iowrite8(status, SMBHSTSTS(priv)); status &= STATUS_ERROR_FLAGS | SMBHSTSTS_INTR; if (status) { @@ -679,8 +677,8 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, len = data->block[0]; if (read_write == I2C_SMBUS_WRITE) { - outb_p(len, SMBHSTDAT0(priv)); - outb_p(data->block[1], SMBBLKDAT(priv)); + iowrite8(len, SMBHSTDAT0(priv)); + iowrite8(data->block[1], SMBBLKDAT(priv)); } if (command == I2C_SMBUS_I2C_BLOCK_DATA && @@ -699,14 +697,14 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, priv->data = &data->block[1]; reinit_completion(&priv->done); - outb_p(priv->cmd | SMBHSTCNT_START, SMBHSTCNT(priv)); + iowrite8(priv->cmd | SMBHSTCNT_START, SMBHSTCNT(priv)); result = wait_for_completion_timeout(&priv->done, adap->timeout); return result ? priv->status : -ETIMEDOUT; } if (len == 1 && read_write == I2C_SMBUS_READ) smbcmd |= SMBHSTCNT_LAST_BYTE; - outb_p(smbcmd | SMBHSTCNT_START, SMBHSTCNT(priv)); + iowrite8(smbcmd | SMBHSTCNT_START, SMBHSTCNT(priv)); for (i = 1; i <= len; i++) { status = i801_wait_byte_done(priv); @@ -722,27 +720,27 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, len = i801_get_block_len(priv); if (len < 0) { /* Recover */ - while (inb_p(SMBHSTSTS(priv)) & + while (ioread8(SMBHSTSTS(priv)) & SMBHSTSTS_HOST_BUSY) - outb_p(SMBHSTSTS_BYTE_DONE, + iowrite8(SMBHSTSTS_BYTE_DONE, SMBHSTSTS(priv)); - outb_p(SMBHSTSTS_INTR, SMBHSTSTS(priv)); + iowrite8(SMBHSTSTS_INTR, SMBHSTSTS(priv)); return -EPROTO; } data->block[0] = len; } if (read_write == I2C_SMBUS_READ) { - data->block[i] = inb_p(SMBBLKDAT(priv)); + data->block[i] = ioread8(SMBBLKDAT(priv)); if (i == len - 1) - outb_p(smbcmd | SMBHSTCNT_LAST_BYTE, SMBHSTCNT(priv)); + iowrite8(smbcmd | SMBHSTCNT_LAST_BYTE, SMBHSTCNT(priv)); } if (read_write == I2C_SMBUS_WRITE && i+1 <= len) - outb_p(data->block[i+1], SMBBLKDAT(priv)); + iowrite8(data->block[i+1], SMBBLKDAT(priv)); /* signals SMBBLKDAT ready */ - outb_p(SMBHSTSTS_BYTE_DONE, SMBHSTSTS(priv)); + iowrite8(SMBHSTSTS_BYTE_DONE, SMBHSTSTS(priv)); } return i801_wait_intr(priv); @@ -750,7 +748,7 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, static void i801_set_hstadd(struct i801_priv *priv, u8 addr, char read_write) { - outb_p((addr << 1) | (read_write & 0x01), SMBHSTADD(priv)); + iowrite8((addr << 1) | (read_write & 0x01), SMBHSTADD(priv)); } /* Single value transaction function */ @@ -767,30 +765,30 @@ static int i801_simple_transaction(struct i801_priv *priv, union i2c_smbus_data case I2C_SMBUS_BYTE: i801_set_hstadd(priv, addr, read_write); if (read_write == I2C_SMBUS_WRITE) - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); xact = I801_BYTE; break; case I2C_SMBUS_BYTE_DATA: i801_set_hstadd(priv, addr, read_write); if (read_write == I2C_SMBUS_WRITE) - outb_p(data->byte, SMBHSTDAT0(priv)); - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(data->byte, SMBHSTDAT0(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); xact = I801_BYTE_DATA; break; case I2C_SMBUS_WORD_DATA: i801_set_hstadd(priv, addr, read_write); if (read_write == I2C_SMBUS_WRITE) { - outb_p(data->word & 0xff, SMBHSTDAT0(priv)); - outb_p((data->word & 0xff00) >> 8, SMBHSTDAT1(priv)); + iowrite8(data->word & 0xff, SMBHSTDAT0(priv)); + iowrite8((data->word & 0xff00) >> 8, SMBHSTDAT1(priv)); } - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); xact = I801_WORD_DATA; break; case I2C_SMBUS_PROC_CALL: i801_set_hstadd(priv, addr, I2C_SMBUS_WRITE); - outb_p(data->word & 0xff, SMBHSTDAT0(priv)); - outb_p((data->word & 0xff00) >> 8, SMBHSTDAT1(priv)); - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(data->word & 0xff, SMBHSTDAT0(priv)); + iowrite8((data->word & 0xff00) >> 8, SMBHSTDAT1(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); read_write = I2C_SMBUS_READ; xact = I801_PROC_CALL; break; @@ -806,12 +804,12 @@ static int i801_simple_transaction(struct i801_priv *priv, union i2c_smbus_data switch (command) { case I2C_SMBUS_BYTE: case I2C_SMBUS_BYTE_DATA: - data->byte = inb_p(SMBHSTDAT0(priv)); + data->byte = ioread8(SMBHSTDAT0(priv)); break; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: - data->word = inb_p(SMBHSTDAT0(priv)) + - (inb_p(SMBHSTDAT1(priv)) << 8); + data->word = ioread8(SMBHSTDAT0(priv)) + + (ioread8(SMBHSTDAT1(priv)) << 8); break; } @@ -832,7 +830,7 @@ static int i801_smbus_block_transaction(struct i801_priv *priv, union i2c_smbus_ i801_set_hstadd(priv, addr, I2C_SMBUS_WRITE); else i801_set_hstadd(priv, addr, read_write); - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); if (priv->features & FEATURE_BLOCK_BUFFER) return i801_block_transaction_by_block(priv, data, read_write, command); @@ -858,9 +856,9 @@ static int i801_i2c_block_transaction(struct i801_priv *priv, union i2c_smbus_da /* NB: page 240 of ICH5 datasheet shows that DATA1 is the cmd field when reading */ if (read_write == I2C_SMBUS_READ) - outb_p(hstcmd, SMBHSTDAT1(priv)); + iowrite8(hstcmd, SMBHSTDAT1(priv)); else - outb_p(hstcmd, SMBHSTCMD(priv)); + iowrite8(hstcmd, SMBHSTCMD(priv)); if (read_write == I2C_SMBUS_WRITE) { /* set I2C_EN bit in configuration register */ @@ -903,9 +901,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, && size != I2C_SMBUS_I2C_BLOCK_DATA; if (hwpec) /* enable/disable hardware PEC */ - outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_CRC, SMBAUXCTL(priv)); + iowrite8(ioread8(SMBAUXCTL(priv)) | SMBAUXCTL_CRC, SMBAUXCTL(priv)); else - outb_p(inb_p(SMBAUXCTL(priv)) & (~SMBAUXCTL_CRC), + iowrite8(ioread8(SMBAUXCTL(priv)) & (~SMBAUXCTL_CRC), SMBAUXCTL(priv)); if (size == I2C_SMBUS_BLOCK_DATA || size == I2C_SMBUS_BLOCK_PROC_CALL) @@ -921,13 +919,13 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, * time, so we forcibly disable it after every transaction. */ if (hwpec) - outb_p(inb_p(SMBAUXCTL(priv)) & ~SMBAUXCTL_CRC, SMBAUXCTL(priv)); + iowrite8(ioread8(SMBAUXCTL(priv)) & ~SMBAUXCTL_CRC, SMBAUXCTL(priv)); out: /* * Unlock the SMBus device for use by BIOS/ACPI, * and clear status flags if not done already. */ - outb_p(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); + iowrite8(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); @@ -964,11 +962,11 @@ static void i801_enable_host_notify(struct i2c_adapter *adapter) * from the SMB_ALERT signal because the driver does not support * SMBus Alert. */ - outb_p(SMBSLVCMD_HST_NTFY_INTREN | SMBSLVCMD_SMBALERT_DISABLE | + iowrite8(SMBSLVCMD_HST_NTFY_INTREN | SMBSLVCMD_SMBALERT_DISABLE | priv->original_slvcmd, SMBSLVCMD(priv)); /* clear Host Notify bit to allow a new notification */ - outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv)); + iowrite8(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv)); } static void i801_disable_host_notify(struct i801_priv *priv) @@ -976,7 +974,7 @@ static void i801_disable_host_notify(struct i801_priv *priv) if (!(priv->features & FEATURE_HOST_NOTIFY)) return; - outb_p(priv->original_slvcmd, SMBSLVCMD(priv)); + iowrite8(priv->original_slvcmd, SMBSLVCMD(priv)); } static const struct i2c_algorithm smbus_algorithm = { @@ -1441,7 +1439,7 @@ static void i801_add_tco(struct i801_priv *priv) static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv, acpi_physical_address address) { - return address >= priv->smba && + return address >= pci_resource_start(priv->pci_dev, SMBBAR) && address <= pci_resource_end(priv->pci_dev, SMBBAR); } @@ -1518,7 +1516,7 @@ static void i801_setup_hstcfg(struct i801_priv *priv) static void i801_restore_regs(struct i801_priv *priv) { - outb_p(priv->original_hstcnt, SMBHSTCNT(priv)); + iowrite8(priv->original_hstcnt, SMBHSTCNT(priv)); pci_write_config_byte(priv->pci_dev, SMBHSTCFG, priv->original_hstcfg); } @@ -1564,8 +1562,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) } /* Determine the address of the SMBus area */ - priv->smba = pci_resource_start(dev, SMBBAR); - if (!priv->smba) { + if (!pci_resource_start(dev, SMBBAR)) { pci_err(dev, "SMBus base address uninitialized, upgrade BIOS\n"); return -ENODEV; } @@ -1573,12 +1570,12 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) if (i801_acpi_probe(priv)) return -ENODEV; - err = pcim_iomap_regions(dev, 1 << SMBBAR, DRV_NAME); - if (err) { + priv->smba = pcim_iomap_region(dev, SMBBAR, DRV_NAME); + if (IS_ERR(priv->smba)) { pci_err(dev, "Failed to request SMBus region %pr\n", pci_resource_n(dev, SMBBAR)); i801_acpi_remove(priv); - return err; + return PTR_ERR(priv->smba); } pci_read_config_byte(dev, SMBHSTCFG, &priv->original_hstcfg); @@ -1596,7 +1593,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Clear special mode bits */ if (priv->features & (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER)) - outb_p(inb_p(SMBAUXCTL(priv)) & + iowrite8(ioread8(SMBAUXCTL(priv)) & ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv)); /* Default timeout in interrupt mode: 200 ms */ @@ -1632,9 +1629,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) priv->features &= ~FEATURE_HOST_NOTIFY; /* Remember original Interrupt and Host Notify settings */ - priv->original_hstcnt = inb_p(SMBHSTCNT(priv)) & ~SMBHSTCNT_KILL; + priv->original_hstcnt = ioread8(SMBHSTCNT(priv)) & ~SMBHSTCNT_KILL; if (priv->features & FEATURE_HOST_NOTIFY) - priv->original_slvcmd = inb_p(SMBSLVCMD(priv)); + priv->original_slvcmd = ioread8(SMBSLVCMD(priv)); i801_add_tco(priv); @@ -1643,9 +1640,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) * to instantiante i2c_clients, do not change. */ snprintf(priv->adapter.name, sizeof(priv->adapter.name), - "SMBus %s adapter at %04lx", + "SMBus %s adapter at %s", (priv->features & FEATURE_IDF) ? "I801 IDF" : "I801", - priv->smba); + pci_name(dev)); err = i2c_add_adapter(&priv->adapter); if (err) { From d50f2f5d51ea609194439e6e3f470d1a4fad761b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2025 20:08:18 +0100 Subject: [PATCH 1538/2157] i2c: i801: Use MMIO if available Newer versions of supported chips support MMIO in addition to legacy PMIO register access. Probe the MMIO PCI BAR and use faster MMIO register access if available. Signed-off-by: Heiner Kallweit Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/b4748b7a-aac5-445c-b813-20c4d2c23ec3@gmail.com --- drivers/i2c/busses/i2c-i801.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index bf5702ccb93a..48e1af544b75 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -144,6 +144,7 @@ #define SMBNTFDADD(p) (20 + (p)->smba) /* ICH3 and later */ /* PCI Address Constants */ +#define SMBBAR_MMIO 0 #define SMBBAR 4 #define SMBHSTCFG 0x040 #define TCOBASE 0x050 @@ -1522,7 +1523,7 @@ static void i801_restore_regs(struct i801_priv *priv) static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) { - int err, i; + int err, i, bar = SMBBAR; struct i801_priv *priv; priv = devm_kzalloc(&dev->dev, sizeof(*priv), GFP_KERNEL); @@ -1570,10 +1571,13 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) if (i801_acpi_probe(priv)) return -ENODEV; - priv->smba = pcim_iomap_region(dev, SMBBAR, DRV_NAME); + if (pci_resource_flags(dev, SMBBAR_MMIO) & IORESOURCE_MEM) + bar = SMBBAR_MMIO; + + priv->smba = pcim_iomap_region(dev, bar, DRV_NAME); if (IS_ERR(priv->smba)) { pci_err(dev, "Failed to request SMBus region %pr\n", - pci_resource_n(dev, SMBBAR)); + pci_resource_n(dev, bar)); i801_acpi_remove(priv); return PTR_ERR(priv->smba); } From ef1d3455bbc1922f94a91ed58d3d7db440652959 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 20 Mar 2025 12:22:22 +0100 Subject: [PATCH 1539/2157] libnvdimm/labels: Fix divide error in nd_label_data_init() If a faulty CXL memory device returns a broken zero LSA size in its memory device information (Identify Memory Device (Opcode 4000h), CXL spec. 3.1, 8.2.9.9.1.1), a divide error occurs in the libnvdimm driver: Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:nd_label_data_init+0x10e/0x800 [libnvdimm] Code and flow: 1) CXL Command 4000h returns LSA size = 0 2) config_size is assigned to zero LSA size (CXL pmem driver): drivers/cxl/pmem.c: .config_size = mds->lsa_size, 3) max_xfer is set to zero (nvdimm driver): drivers/nvdimm/label.c: max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size); 4) A subsequent DIV_ROUND_UP() causes a division by zero: drivers/nvdimm/label.c: /* Make our initial read size a multiple of max_xfer size */ drivers/nvdimm/label.c: read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer, drivers/nvdimm/label.c- config_size); Fix this by checking the config size parameter by extending an existing check. Signed-off-by: Robert Richter Reviewed-by: Pankaj Gupta Reviewed-by: Ira Weiny Link: https://patch.msgid.link/20250320112223.608320-1-rrichter@amd.com Signed-off-by: Ira Weiny --- drivers/nvdimm/label.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 082253a3a956..04f4a049599a 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -442,7 +442,8 @@ int nd_label_data_init(struct nvdimm_drvdata *ndd) if (ndd->data) return 0; - if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) { + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 || + ndd->nsarea.config_size == 0) { dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n", ndd->nsarea.max_xfer, ndd->nsarea.config_size); return -ENXIO; From e8d2d287e26d9bd9114cf258a123a6b70812442e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 19 Mar 2025 09:08:01 -0700 Subject: [PATCH 1540/2157] i3c: master: svc: Fix implicit fallthrough in svc_i3c_master_ibi_work() Clang warns (or errors with CONFIG_WERROR=y): drivers/i3c/master/svc-i3c-master.c:596:2: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] 596 | default: | ^ drivers/i3c/master/svc-i3c-master.c:596:2: note: insert 'break;' to avoid fall-through 596 | default: | ^ | break; 1 error generated. Clang is a little more pedantic than GCC, which does not warn when falling through to a case that is just break or return. Clang's version is more in line with the kernel's own stance in deprecated.rst, which states that all switch/case blocks must end in either break, fallthrough, continue, goto, or return. Add the missing break to silence the warning. Fixes: 0430bf9bc1ac ("i3c: master: svc: Fix missing STOP for master request") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250319-i3c-fix-clang-fallthrough-v1-1-d8e02be1ef5c@kernel.org Signed-off-by: Alexandre Belloni --- drivers/i3c/master/svc-i3c-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index e0cd3ce28b7f..85e16de208d3 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -593,6 +593,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) break; case SVC_I3C_MSTATUS_IBITYPE_MASTER_REQUEST: svc_i3c_master_emit_stop(master); + break; default: break; } From 7202745e29f860114331b431906a6854117b4167 Mon Sep 17 00:00:00 2001 From: Aryan Srivastava Date: Tue, 18 Mar 2025 15:16:29 +1300 Subject: [PATCH 1541/2157] i2c: octeon: fix return commenting Kernel-docs require a ':' to signify the return behaviour of a function with within the comment. Many functions in this file were missing ':' after the "Returns" line, resulting in kernel-doc warnings. Add the ':' to satisfy kernel-doc requirements. Signed-off-by: Aryan Srivastava Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250318021632.2710792-2-aryan.srivastava@alliedtelesis.co.nz Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-octeon-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index 3fbc828508ab..0094fe5f7460 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -45,7 +45,7 @@ static bool octeon_i2c_test_iflg(struct octeon_i2c *i2c) * octeon_i2c_wait - wait for the IFLG to be set * @i2c: The struct octeon_i2c * - * Returns 0 on success, otherwise a negative errno. + * Returns: 0 on success, otherwise a negative errno. */ static int octeon_i2c_wait(struct octeon_i2c *i2c) { @@ -139,7 +139,7 @@ static void octeon_i2c_hlc_disable(struct octeon_i2c *i2c) * octeon_i2c_hlc_wait - wait for an HLC operation to complete * @i2c: The struct octeon_i2c * - * Returns 0 on success, otherwise -ETIMEDOUT. + * Returns: 0 on success, otherwise -ETIMEDOUT. */ static int octeon_i2c_hlc_wait(struct octeon_i2c *i2c) { @@ -273,7 +273,7 @@ static int octeon_i2c_recovery(struct octeon_i2c *i2c) * octeon_i2c_start - send START to the bus * @i2c: The struct octeon_i2c * - * Returns 0 on success, otherwise a negative errno. + * Returns: 0 on success, otherwise a negative errno. */ static int octeon_i2c_start(struct octeon_i2c *i2c) { @@ -314,7 +314,7 @@ static void octeon_i2c_stop(struct octeon_i2c *i2c) * * The address is sent over the bus, then the data is read. * - * Returns 0 on success, otherwise a negative errno. + * Returns: 0 on success, otherwise a negative errno. */ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, u8 *data, u16 *rlength, bool recv_len) @@ -382,7 +382,7 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, * * The address is sent over the bus, then the data. * - * Returns 0 on success, otherwise a negative errno. + * Returns: 0 on success, otherwise a negative errno. */ static int octeon_i2c_write(struct octeon_i2c *i2c, int target, const u8 *data, int length) @@ -625,7 +625,7 @@ static int octeon_i2c_hlc_comp_write(struct octeon_i2c *i2c, struct i2c_msg *msg * @msgs: Pointer to the messages to be processed * @num: Length of the MSGS array * - * Returns the number of messages processed, or a negative errno on failure. + * Returns: the number of messages processed, or a negative errno on failure. */ int octeon_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { From b1c010bd25f8ba5fd09c617daed2fb03343f1f67 Mon Sep 17 00:00:00 2001 From: Aryan Srivastava Date: Tue, 18 Mar 2025 15:16:30 +1300 Subject: [PATCH 1542/2157] i2c: octeon: remove 10-bit addressing support The driver gives the illusion of 10-bit address support, but the upper 3 bits of the given address are always thrown away. Remove unnecessary considerations for 10 bit addressing and always complete 7 bit ops when using the hlc methods. Signed-off-by: Aryan Srivastava Reviewed-by: Wolfram Sang Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250318021632.2710792-3-aryan.srivastava@alliedtelesis.co.nz Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-octeon-core.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index 0094fe5f7460..baf6b27f3752 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -421,17 +421,12 @@ static int octeon_i2c_hlc_read(struct octeon_i2c *i2c, struct i2c_msg *msgs) octeon_i2c_hlc_enable(i2c); octeon_i2c_hlc_int_clear(i2c); - cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR; + cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR | SW_TWSI_OP_7; /* SIZE */ cmd |= (u64)(msgs[0].len - 1) << SW_TWSI_SIZE_SHIFT; /* A */ cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT; - if (msgs[0].flags & I2C_M_TEN) - cmd |= SW_TWSI_OP_10; - else - cmd |= SW_TWSI_OP_7; - octeon_i2c_writeq_flush(cmd, i2c->twsi_base + OCTEON_REG_SW_TWSI(i2c)); ret = octeon_i2c_hlc_wait(i2c); if (ret) @@ -463,17 +458,12 @@ static int octeon_i2c_hlc_write(struct octeon_i2c *i2c, struct i2c_msg *msgs) octeon_i2c_hlc_enable(i2c); octeon_i2c_hlc_int_clear(i2c); - cmd = SW_TWSI_V | SW_TWSI_SOVR; + cmd = SW_TWSI_V | SW_TWSI_SOVR | SW_TWSI_OP_7; /* SIZE */ cmd |= (u64)(msgs[0].len - 1) << SW_TWSI_SIZE_SHIFT; /* A */ cmd |= (u64)(msgs[0].addr & 0x7full) << SW_TWSI_ADDR_SHIFT; - if (msgs[0].flags & I2C_M_TEN) - cmd |= SW_TWSI_OP_10; - else - cmd |= SW_TWSI_OP_7; - for (i = 0, j = msgs[0].len - 1; i < msgs[0].len && i < 4; i++, j--) cmd |= (u64)msgs[0].buf[j] << (8 * i); @@ -513,11 +503,6 @@ static bool octeon_i2c_hlc_ext(struct octeon_i2c *i2c, struct i2c_msg msg, u64 * bool set_ext = false; u64 cmd = 0; - if (msg.flags & I2C_M_TEN) - cmd |= SW_TWSI_OP_10_IA; - else - cmd |= SW_TWSI_OP_7_IA; - if (msg.len == 2) { cmd |= SW_TWSI_EIA; *ext = (u64)msg.buf[0] << SW_TWSI_IA_SHIFT; @@ -550,7 +535,7 @@ static int octeon_i2c_hlc_comp_read(struct octeon_i2c *i2c, struct i2c_msg *msgs octeon_i2c_hlc_enable(i2c); - cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR; + cmd = SW_TWSI_V | SW_TWSI_R | SW_TWSI_SOVR | SW_TWSI_OP_7_IA; /* SIZE */ cmd |= (u64)(msgs[1].len - 1) << SW_TWSI_SIZE_SHIFT; /* A */ @@ -587,7 +572,7 @@ static int octeon_i2c_hlc_comp_write(struct octeon_i2c *i2c, struct i2c_msg *msg octeon_i2c_hlc_enable(i2c); - cmd = SW_TWSI_V | SW_TWSI_SOVR; + cmd = SW_TWSI_V | SW_TWSI_SOVR | SW_TWSI_OP_7_IA; /* SIZE */ cmd |= (u64)(msgs[1].len - 1) << SW_TWSI_SIZE_SHIFT; /* A */ From 0fc829dbde9bf1f349631c677a85e08782037ecf Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Tue, 18 Mar 2025 16:06:21 +0530 Subject: [PATCH 1543/2157] dt-bindings: i2c: omap: Add mux-states property Add mux controller support for when the I2C lines are muxed after signals come out of SoC and before they go to any client. Signed-off-by: Jayesh Choudhary Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250318103622.29979-2-j-choudhary@ti.com Signed-off-by: Andi Shyti --- Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml index 8c2e35fabf5b..58d32ceeacfc 100644 --- a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml @@ -47,6 +47,11 @@ properties: $ref: /schemas/types.yaml#/definitions/string deprecated: true + mux-states: + description: + mux controller node to route the I2C signals from SoC to clients. + maxItems: 1 + required: - compatible - reg @@ -87,4 +92,5 @@ examples: interrupts = ; #address-cells = <1>; #size-cells = <0>; + mux-states = <&i2c_mux 1>; }; From b6ef830c60b6f4adfb72d0780b4363df3a1feb9c Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Tue, 18 Mar 2025 16:06:22 +0530 Subject: [PATCH 1544/2157] i2c: omap: Add support for setting mux Some SoCs require muxes in the routing for SDA and SCL lines. Therefore, add support for setting the mux by reading the mux-states property from the dt-node. Signed-off-by: Jayesh Choudhary Link: https://lore.kernel.org/r/20250318103622.29979-3-j-choudhary@ti.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-omap.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index fc438f445771..0648e58b083e 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -940,6 +940,7 @@ config I2C_OMAP tristate "OMAP I2C adapter" depends on ARCH_OMAP || ARCH_K3 || COMPILE_TEST default MACH_OMAP_OSK + select MULTIPLEXER help If you say yes to this option, support will be included for the I2C interface on the Texas Instruments OMAP1/2 family of processors. diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index f18c3e74b076..16afb9ca19bb 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,7 @@ struct omap_i2c_dev { u16 syscstate; u16 westate; u16 errata; + struct mux_state *mux_state; }; static const u8 reg_map_ip_v1[] = { @@ -1452,6 +1454,23 @@ omap_i2c_probe(struct platform_device *pdev) (1000 * omap->speed / 8); } + if (of_property_read_bool(node, "mux-states")) { + struct mux_state *mux_state; + + mux_state = devm_mux_state_get(&pdev->dev, NULL); + if (IS_ERR(mux_state)) { + r = PTR_ERR(mux_state); + dev_dbg(&pdev->dev, "failed to get I2C mux: %d\n", r); + goto err_disable_pm; + } + omap->mux_state = mux_state; + r = mux_state_select(omap->mux_state); + if (r) { + dev_err(&pdev->dev, "failed to select I2C mux: %d\n", r); + goto err_disable_pm; + } + } + /* reset ASAP, clearing any IRQs */ omap_i2c_init(omap); @@ -1511,6 +1530,9 @@ static void omap_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&omap->adapter); + if (omap->mux_state) + mux_state_deselect(omap->mux_state); + ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) dev_err(omap->dev, "Failed to resume hardware, skip disable\n"); From 76fe9ac17f6c5a4d722657cd7b705f7f114fd105 Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Wed, 19 Mar 2025 17:28:59 +0800 Subject: [PATCH 1545/2157] dt-bindings: i2c: spacemit: add support for K1 SoC The I2C of K1 supports fast-speed-mode and high-speed-mode, and supports FIFO transmission. Signed-off-by: Troy Mitchell Reviewed-by: Conor Dooley Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250319-k1-i2c-master-v8-1-013e2df2b78d@gmail.com --- .../bindings/i2c/spacemit,k1-i2c.yaml | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml diff --git a/Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml b/Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml new file mode 100644 index 000000000000..3d6aefb0d0f1 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/spacemit,k1-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: I2C controller embedded in SpacemiT's K1 SoC + +maintainers: + - Troy Mitchell + +properties: + compatible: + const: spacemit,k1-i2c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: I2C Functional Clock + - description: APB Bus Clock + + clock-names: + items: + - const: func + - const: bus + + clock-frequency: + description: | + K1 support three different modes which running different frequencies + standard speed mode: up to 100000 (100Hz) + fast speed mode : up to 400000 (400Hz) + high speed mode : up to 3300000 (3.3Mhz) + default: 400000 + maximum: 3300000 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + i2c@d4010800 { + compatible = "spacemit,k1-i2c"; + reg = <0xd4010800 0x38>; + interrupt-parent = <&plic>; + interrupts = <36>; + clocks =<&ccu 32>, <&ccu 84>; + clock-names = "func", "bus"; + clock-frequency = <100000>; + }; + +... From 5ea558473fa31f4eeb5c76d58277a7ab68fd501d Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Wed, 19 Mar 2025 17:29:00 +0800 Subject: [PATCH 1546/2157] i2c: spacemit: add support for SpacemiT K1 SoC This patch introduces basic I2C support for the SpacemiT K1 SoC, utilizing interrupts for transfers. The driver has been tested using i2c-tools on a Bananapi-F3 board, and basic I2C read/write operations have been confirmed to work. Signed-off-by: Troy Mitchell Reviewed-by: Alex Elder Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250319-k1-i2c-master-v8-2-013e2df2b78d@gmail.com --- drivers/i2c/busses/Kconfig | 17 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-k1.c | 602 ++++++++++++++++++++++++++++++++++++ 3 files changed, 620 insertions(+) create mode 100644 drivers/i2c/busses/i2c-k1.c diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 0648e58b083e..83c88c79afe2 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -783,6 +783,23 @@ config I2C_JZ4780 If you don't know what to do here, say N. +config I2C_K1 + tristate "SpacemiT K1 I2C adapter" + depends on ARCH_SPACEMIT || COMPILE_TEST + depends on OF + help + This option enables support for the I2C interface on the SpacemiT K1 + platform. + + If you enable this configuration, the kernel will include support for + the I2C adapter specific to the SpacemiT K1 platform. This driver can + be used to manage I2C bus transactions, which are necessary for + interfacing with I2C peripherals such as sensors, EEPROMs, and other + devices. + + This driver can also be built as a module. If so, the + module will be called `i2c-k1`. + config I2C_KEBA tristate "KEBA I2C controller support" depends on HAS_IOMEM diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 1c2a4510abe4..c1252e2b779e 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_I2C_IMX) += i2c-imx.o obj-$(CONFIG_I2C_IMX_LPI2C) += i2c-imx-lpi2c.o obj-$(CONFIG_I2C_IOP3XX) += i2c-iop3xx.o obj-$(CONFIG_I2C_JZ4780) += i2c-jz4780.o +obj-$(CONFIG_I2C_K1) += i2c-k1.o obj-$(CONFIG_I2C_KEBA) += i2c-keba.o obj-$(CONFIG_I2C_KEMPLD) += i2c-kempld.o obj-$(CONFIG_I2C_LPC2K) += i2c-lpc2k.o diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c new file mode 100644 index 000000000000..0d8763b852db --- /dev/null +++ b/drivers/i2c/busses/i2c-k1.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024-2025 Troy Mitchell + */ + + #include + #include + #include + #include + #include + #include + +/* spacemit i2c registers */ +#define SPACEMIT_ICR 0x0 /* Control register */ +#define SPACEMIT_ISR 0x4 /* Status register */ +#define SPACEMIT_IDBR 0xc /* Data buffer register */ +#define SPACEMIT_IBMR 0x1c /* Bus monitor register */ + +/* SPACEMIT_ICR register fields */ +#define SPACEMIT_CR_START BIT(0) /* start bit */ +#define SPACEMIT_CR_STOP BIT(1) /* stop bit */ +#define SPACEMIT_CR_ACKNAK BIT(2) /* send ACK(0) or NAK(1) */ +#define SPACEMIT_CR_TB BIT(3) /* transfer byte bit */ +/* Bits 4-7 are reserved */ +#define SPACEMIT_CR_MODE_FAST BIT(8) /* bus mode (master operation) */ +/* Bit 9 is reserved */ +#define SPACEMIT_CR_UR BIT(10) /* unit reset */ +/* Bits 11-12 are reserved */ +#define SPACEMIT_CR_SCLE BIT(13) /* master clock enable */ +#define SPACEMIT_CR_IUE BIT(14) /* unit enable */ +/* Bits 15-17 are reserved */ +#define SPACEMIT_CR_ALDIE BIT(18) /* enable arbitration interrupt */ +#define SPACEMIT_CR_DTEIE BIT(19) /* enable TX interrupts */ +#define SPACEMIT_CR_DRFIE BIT(20) /* enable RX interrupts */ +#define SPACEMIT_CR_GCD BIT(21) /* general call disable */ +#define SPACEMIT_CR_BEIE BIT(22) /* enable bus error ints */ +/* Bits 23-24 are reserved */ +#define SPACEMIT_CR_MSDIE BIT(25) /* master STOP detected int enable */ +#define SPACEMIT_CR_MSDE BIT(26) /* master STOP detected enable */ +#define SPACEMIT_CR_TXDONEIE BIT(27) /* transaction done int enable */ +#define SPACEMIT_CR_TXEIE BIT(28) /* transmit FIFO empty int enable */ +#define SPACEMIT_CR_RXHFIE BIT(29) /* receive FIFO half-full int enable */ +#define SPACEMIT_CR_RXFIE BIT(30) /* receive FIFO full int enable */ +#define SPACEMIT_CR_RXOVIE BIT(31) /* receive FIFO overrun int enable */ + +#define SPACEMIT_I2C_INT_CTRL_MASK (SPACEMIT_CR_ALDIE | SPACEMIT_CR_DTEIE | \ + SPACEMIT_CR_DRFIE | SPACEMIT_CR_BEIE | \ + SPACEMIT_CR_TXDONEIE | SPACEMIT_CR_TXEIE | \ + SPACEMIT_CR_RXHFIE | SPACEMIT_CR_RXFIE | \ + SPACEMIT_CR_RXOVIE | SPACEMIT_CR_MSDIE) + +/* SPACEMIT_ISR register fields */ +/* Bits 0-13 are reserved */ +#define SPACEMIT_SR_ACKNAK BIT(14) /* ACK/NACK status */ +#define SPACEMIT_SR_UB BIT(15) /* unit busy */ +#define SPACEMIT_SR_IBB BIT(16) /* i2c bus busy */ +#define SPACEMIT_SR_EBB BIT(17) /* early bus busy */ +#define SPACEMIT_SR_ALD BIT(18) /* arbitration loss detected */ +#define SPACEMIT_SR_ITE BIT(19) /* TX buffer empty */ +#define SPACEMIT_SR_IRF BIT(20) /* RX buffer full */ +#define SPACEMIT_SR_GCAD BIT(21) /* general call address detected */ +#define SPACEMIT_SR_BED BIT(22) /* bus error no ACK/NAK */ +#define SPACEMIT_SR_SAD BIT(23) /* slave address detected */ +#define SPACEMIT_SR_SSD BIT(24) /* slave stop detected */ +/* Bit 25 is reserved */ +#define SPACEMIT_SR_MSD BIT(26) /* master stop detected */ +#define SPACEMIT_SR_TXDONE BIT(27) /* transaction done */ +#define SPACEMIT_SR_TXE BIT(28) /* TX FIFO empty */ +#define SPACEMIT_SR_RXHF BIT(29) /* RX FIFO half-full */ +#define SPACEMIT_SR_RXF BIT(30) /* RX FIFO full */ +#define SPACEMIT_SR_RXOV BIT(31) /* RX FIFO overrun */ + +#define SPACEMIT_I2C_INT_STATUS_MASK (SPACEMIT_SR_RXOV | SPACEMIT_SR_RXF | SPACEMIT_SR_RXHF | \ + SPACEMIT_SR_TXE | SPACEMIT_SR_TXDONE | SPACEMIT_SR_MSD | \ + SPACEMIT_SR_SSD | SPACEMIT_SR_SAD | SPACEMIT_SR_BED | \ + SPACEMIT_SR_GCAD | SPACEMIT_SR_IRF | SPACEMIT_SR_ITE | \ + SPACEMIT_SR_ALD) + +/* SPACEMIT_IBMR register fields */ +#define SPACEMIT_BMR_SDA BIT(0) /* SDA line level */ +#define SPACEMIT_BMR_SCL BIT(1) /* SCL line level */ + +/* i2c bus recover timeout: us */ +#define SPACEMIT_I2C_BUS_BUSY_TIMEOUT 100000 + +#define SPACEMIT_I2C_MAX_STANDARD_MODE_FREQ 100000 /* Hz */ +#define SPACEMIT_I2C_MAX_FAST_MODE_FREQ 400000 /* Hz */ + +#define SPACEMIT_SR_ERR (SPACEMIT_SR_BED | SPACEMIT_SR_RXOV | SPACEMIT_SR_ALD) + +enum spacemit_i2c_state { + SPACEMIT_STATE_IDLE, + SPACEMIT_STATE_START, + SPACEMIT_STATE_READ, + SPACEMIT_STATE_WRITE, +}; + +/* i2c-spacemit driver's main struct */ +struct spacemit_i2c_dev { + struct device *dev; + struct i2c_adapter adapt; + + /* hardware resources */ + void __iomem *base; + int irq; + u32 clock_freq; + + struct i2c_msg *msgs; + u32 msg_num; + + /* index of the current message being processed */ + u32 msg_idx; + u8 *msg_buf; + /* the number of unprocessed bytes remaining in the current message */ + u32 unprocessed; + + enum spacemit_i2c_state state; + bool read; + struct completion complete; + u32 status; +}; + +static void spacemit_i2c_enable(struct spacemit_i2c_dev *i2c) +{ + u32 val; + + val = readl(i2c->base + SPACEMIT_ICR); + val |= SPACEMIT_CR_IUE; + writel(val, i2c->base + SPACEMIT_ICR); +} + +static void spacemit_i2c_disable(struct spacemit_i2c_dev *i2c) +{ + u32 val; + + val = readl(i2c->base + SPACEMIT_ICR); + val &= ~SPACEMIT_CR_IUE; + writel(val, i2c->base + SPACEMIT_ICR); +} + +static void spacemit_i2c_reset(struct spacemit_i2c_dev *i2c) +{ + writel(SPACEMIT_CR_UR, i2c->base + SPACEMIT_ICR); + udelay(5); + writel(0, i2c->base + SPACEMIT_ICR); +} + +static int spacemit_i2c_handle_err(struct spacemit_i2c_dev *i2c) +{ + dev_dbg(i2c->dev, "i2c error status: 0x%08x\n", i2c->status); + + if (i2c->status & (SPACEMIT_SR_BED | SPACEMIT_SR_ALD)) { + spacemit_i2c_reset(i2c); + return -EAGAIN; + } + + return i2c->status & SPACEMIT_SR_ACKNAK ? -ENXIO : -EIO; +} + +static void spacemit_i2c_conditionally_reset_bus(struct spacemit_i2c_dev *i2c) +{ + u32 status; + + /* if bus is locked, reset unit. 0: locked */ + status = readl(i2c->base + SPACEMIT_IBMR); + if ((status & SPACEMIT_BMR_SDA) && (status & SPACEMIT_BMR_SCL)) + return; + + spacemit_i2c_reset(i2c); + usleep_range(10, 20); + + /* check scl status again */ + status = readl(i2c->base + SPACEMIT_IBMR); + if (!(status & SPACEMIT_BMR_SCL)) + dev_warn_ratelimited(i2c->dev, "unit reset failed\n"); +} + +static int spacemit_i2c_wait_bus_idle(struct spacemit_i2c_dev *i2c) +{ + int ret; + u32 val; + + val = readl(i2c->base + SPACEMIT_ISR); + if (!(val & (SPACEMIT_SR_UB | SPACEMIT_SR_IBB))) + return 0; + + ret = readl_poll_timeout(i2c->base + SPACEMIT_ISR, + val, !(val & (SPACEMIT_SR_UB | SPACEMIT_SR_IBB)), + 1500, SPACEMIT_I2C_BUS_BUSY_TIMEOUT); + if (ret) + spacemit_i2c_reset(i2c); + + return ret; +} + +static void spacemit_i2c_check_bus_release(struct spacemit_i2c_dev *i2c) +{ + /* in case bus is not released after transfer completes */ + if (readl(i2c->base + SPACEMIT_ISR) & SPACEMIT_SR_EBB) { + spacemit_i2c_conditionally_reset_bus(i2c); + usleep_range(90, 150); + } +} + +static void spacemit_i2c_init(struct spacemit_i2c_dev *i2c) +{ + u32 val; + + /* + * Unmask interrupt bits for all xfer mode: + * bus error, arbitration loss detected. + * For transaction complete signal, we use master stop + * interrupt, so we don't need to unmask SPACEMIT_CR_TXDONEIE. + */ + val = SPACEMIT_CR_BEIE | SPACEMIT_CR_ALDIE; + + /* + * Unmask interrupt bits for interrupt xfer mode: + * When IDBR receives a byte, an interrupt is triggered. + * + * For the tx empty interrupt, it will be enabled in the + * i2c_start function. + * Otherwise, it will cause an erroneous empty interrupt before i2c_start. + */ + val |= SPACEMIT_CR_DRFIE; + + if (i2c->clock_freq == SPACEMIT_I2C_MAX_FAST_MODE_FREQ) + val |= SPACEMIT_CR_MODE_FAST; + + /* disable response to general call */ + val |= SPACEMIT_CR_GCD; + + /* enable SCL clock output */ + val |= SPACEMIT_CR_SCLE; + + /* enable master stop detected */ + val |= SPACEMIT_CR_MSDE | SPACEMIT_CR_MSDIE; + + writel(val, i2c->base + SPACEMIT_ICR); +} + +static inline void +spacemit_i2c_clear_int_status(struct spacemit_i2c_dev *i2c, u32 mask) +{ + writel(mask & SPACEMIT_I2C_INT_STATUS_MASK, i2c->base + SPACEMIT_ISR); +} + +static void spacemit_i2c_start(struct spacemit_i2c_dev *i2c) +{ + u32 target_addr_rw, val; + struct i2c_msg *cur_msg = i2c->msgs + i2c->msg_idx; + + i2c->read = !!(cur_msg->flags & I2C_M_RD); + + i2c->state = SPACEMIT_STATE_START; + + target_addr_rw = (cur_msg->addr & 0x7f) << 1; + if (cur_msg->flags & I2C_M_RD) + target_addr_rw |= 1; + + writel(target_addr_rw, i2c->base + SPACEMIT_IDBR); + + /* send start pulse */ + val = readl(i2c->base + SPACEMIT_ICR); + val &= ~SPACEMIT_CR_STOP; + val |= SPACEMIT_CR_START | SPACEMIT_CR_TB | SPACEMIT_CR_DTEIE; + writel(val, i2c->base + SPACEMIT_ICR); +} + +static void spacemit_i2c_stop(struct spacemit_i2c_dev *i2c) +{ + u32 val; + + val = readl(i2c->base + SPACEMIT_ICR); + val |= SPACEMIT_CR_STOP | SPACEMIT_CR_ALDIE | SPACEMIT_CR_TB; + + if (i2c->read) + val |= SPACEMIT_CR_ACKNAK; + + writel(val, i2c->base + SPACEMIT_ICR); +} + +static int spacemit_i2c_xfer_msg(struct spacemit_i2c_dev *i2c) +{ + unsigned long time_left; + struct i2c_msg *msg; + + for (i2c->msg_idx = 0; i2c->msg_idx < i2c->msg_num; i2c->msg_idx++) { + msg = &i2c->msgs[i2c->msg_idx]; + i2c->msg_buf = msg->buf; + i2c->unprocessed = msg->len; + i2c->status = 0; + + reinit_completion(&i2c->complete); + + spacemit_i2c_start(i2c); + + time_left = wait_for_completion_timeout(&i2c->complete, + i2c->adapt.timeout); + if (!time_left) { + dev_err(i2c->dev, "msg completion timeout\n"); + spacemit_i2c_conditionally_reset_bus(i2c); + spacemit_i2c_reset(i2c); + return -ETIMEDOUT; + } + + if (i2c->status & SPACEMIT_SR_ERR) + return spacemit_i2c_handle_err(i2c); + } + + return 0; +} + +static bool spacemit_i2c_is_last_msg(struct spacemit_i2c_dev *i2c) +{ + if (i2c->msg_idx != i2c->msg_num - 1) + return false; + + if (i2c->read) + return i2c->unprocessed == 1; + + return !i2c->unprocessed; +} + +static void spacemit_i2c_handle_write(struct spacemit_i2c_dev *i2c) +{ + /* if transfer completes, SPACEMIT_ISR will handle it */ + if (i2c->status & SPACEMIT_SR_MSD) + return; + + if (i2c->unprocessed) { + writel(*i2c->msg_buf++, i2c->base + SPACEMIT_IDBR); + i2c->unprocessed--; + return; + } + + /* SPACEMIT_STATE_IDLE avoids trigger next byte */ + i2c->state = SPACEMIT_STATE_IDLE; + complete(&i2c->complete); +} + +static void spacemit_i2c_handle_read(struct spacemit_i2c_dev *i2c) +{ + if (i2c->unprocessed) { + *i2c->msg_buf++ = readl(i2c->base + SPACEMIT_IDBR); + i2c->unprocessed--; + } + + /* if transfer completes, SPACEMIT_ISR will handle it */ + if (i2c->status & (SPACEMIT_SR_MSD | SPACEMIT_SR_ACKNAK)) + return; + + /* it has to append stop bit in icr that read last byte */ + if (i2c->unprocessed) + return; + + /* SPACEMIT_STATE_IDLE avoids trigger next byte */ + i2c->state = SPACEMIT_STATE_IDLE; + complete(&i2c->complete); +} + +static void spacemit_i2c_handle_start(struct spacemit_i2c_dev *i2c) +{ + i2c->state = i2c->read ? SPACEMIT_STATE_READ : SPACEMIT_STATE_WRITE; + if (i2c->state == SPACEMIT_STATE_WRITE) + spacemit_i2c_handle_write(i2c); +} + +static void spacemit_i2c_err_check(struct spacemit_i2c_dev *i2c) +{ + u32 val; + + /* + * Send transaction complete signal: + * error happens, detect master stop + */ + if (!(i2c->status & (SPACEMIT_SR_ERR | SPACEMIT_SR_MSD))) + return; + + /* + * Here the transaction is already done, we don't need any + * other interrupt signals from now, in case any interrupt + * happens before spacemit_i2c_xfer to disable irq and i2c unit, + * we mask all the interrupt signals and clear the interrupt + * status. + */ + val = readl(i2c->base + SPACEMIT_ICR); + val &= ~SPACEMIT_I2C_INT_CTRL_MASK; + writel(val, i2c->base + SPACEMIT_ICR); + + spacemit_i2c_clear_int_status(i2c, SPACEMIT_I2C_INT_STATUS_MASK); + + i2c->state = SPACEMIT_STATE_IDLE; + complete(&i2c->complete); +} + +static irqreturn_t spacemit_i2c_irq_handler(int irq, void *devid) +{ + struct spacemit_i2c_dev *i2c = devid; + u32 status, val; + + status = readl(i2c->base + SPACEMIT_ISR); + if (!status) + return IRQ_HANDLED; + + i2c->status = status; + + spacemit_i2c_clear_int_status(i2c, status); + + if (i2c->status & SPACEMIT_SR_ERR) + goto err_out; + + val = readl(i2c->base + SPACEMIT_ICR); + val &= ~(SPACEMIT_CR_TB | SPACEMIT_CR_ACKNAK | SPACEMIT_CR_STOP | SPACEMIT_CR_START); + writel(val, i2c->base + SPACEMIT_ICR); + + switch (i2c->state) { + case SPACEMIT_STATE_START: + spacemit_i2c_handle_start(i2c); + break; + case SPACEMIT_STATE_READ: + spacemit_i2c_handle_read(i2c); + break; + case SPACEMIT_STATE_WRITE: + spacemit_i2c_handle_write(i2c); + break; + default: + break; + } + + if (i2c->state != SPACEMIT_STATE_IDLE) { + if (spacemit_i2c_is_last_msg(i2c)) { + /* trigger next byte with stop */ + spacemit_i2c_stop(i2c); + } else { + /* trigger next byte */ + val |= SPACEMIT_CR_ALDIE | SPACEMIT_CR_TB; + writel(val, i2c->base + SPACEMIT_ICR); + } + } + +err_out: + spacemit_i2c_err_check(i2c); + return IRQ_HANDLED; +} + +static void spacemit_i2c_calc_timeout(struct spacemit_i2c_dev *i2c) +{ + unsigned long timeout; + int idx = 0, cnt = 0; + + for (; idx < i2c->msg_num; idx++) + cnt += (i2c->msgs + idx)->len + 1; + + /* + * Multiply by 9 because each byte in I2C transmission requires + * 9 clock cycles: 8 bits of data plus 1 ACK/NACK bit. + */ + timeout = cnt * 9 * USEC_PER_SEC / i2c->clock_freq; + + i2c->adapt.timeout = usecs_to_jiffies(timeout + USEC_PER_SEC / 10) / i2c->msg_num; +} + +static int spacemit_i2c_xfer(struct i2c_adapter *adapt, struct i2c_msg *msgs, int num) +{ + struct spacemit_i2c_dev *i2c = i2c_get_adapdata(adapt); + int ret; + + i2c->msgs = msgs; + i2c->msg_num = num; + + spacemit_i2c_calc_timeout(i2c); + + spacemit_i2c_init(i2c); + + spacemit_i2c_enable(i2c); + + ret = spacemit_i2c_wait_bus_idle(i2c); + if (!ret) + spacemit_i2c_xfer_msg(i2c); + else if (ret < 0) + dev_dbg(i2c->dev, "i2c transfer error: %d\n", ret); + else + spacemit_i2c_check_bus_release(i2c); + + spacemit_i2c_disable(i2c); + + if (ret == -ETIMEDOUT || ret == -EAGAIN) + dev_err(i2c->dev, "i2c transfer failed, ret %d err 0x%lx\n", + ret, i2c->status & SPACEMIT_SR_ERR); + + return ret < 0 ? ret : num; +} + +static u32 spacemit_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); +} + +static const struct i2c_algorithm spacemit_i2c_algo = { + .xfer = spacemit_i2c_xfer, + .functionality = spacemit_i2c_func, +}; + +static int spacemit_i2c_probe(struct platform_device *pdev) +{ + struct clk *clk; + struct device *dev = &pdev->dev; + struct device_node *of_node = pdev->dev.of_node; + struct spacemit_i2c_dev *i2c; + int ret; + + i2c = devm_kzalloc(dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + of_property_read_u32(of_node, "clock-frequency", &i2c->clock_freq); + if (ret && ret != -EINVAL) + dev_warn(dev, "failed to read clock-frequency property: %d\n", ret); + + /* For now, this driver doesn't support high-speed. */ + if (!i2c->clock_freq || i2c->clock_freq > SPACEMIT_I2C_MAX_FAST_MODE_FREQ) { + dev_warn(dev, "unsupported clock frequency %u; using %u\n", + i2c->clock_freq, SPACEMIT_I2C_MAX_FAST_MODE_FREQ); + i2c->clock_freq = SPACEMIT_I2C_MAX_FAST_MODE_FREQ; + } else if (i2c->clock_freq < SPACEMIT_I2C_MAX_STANDARD_MODE_FREQ) { + dev_warn(dev, "unsupported clock frequency %u; using %u\n", + i2c->clock_freq, SPACEMIT_I2C_MAX_STANDARD_MODE_FREQ); + i2c->clock_freq = SPACEMIT_I2C_MAX_STANDARD_MODE_FREQ; + } + + i2c->dev = &pdev->dev; + + i2c->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(i2c->base)) + return dev_err_probe(dev, PTR_ERR(i2c->base), "failed to do ioremap"); + + i2c->irq = platform_get_irq(pdev, 0); + if (i2c->irq < 0) + return dev_err_probe(dev, i2c->irq, "failed to get irq resource"); + + ret = devm_request_irq(i2c->dev, i2c->irq, spacemit_i2c_irq_handler, + IRQF_NO_SUSPEND | IRQF_ONESHOT, dev_name(i2c->dev), i2c); + if (ret) + return dev_err_probe(dev, ret, "failed to request irq"); + + clk = devm_clk_get_enabled(dev, "func"); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to enable func clock"); + + clk = devm_clk_get_enabled(dev, "bus"); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to enable bus clock"); + + spacemit_i2c_reset(i2c); + + i2c_set_adapdata(&i2c->adapt, i2c); + i2c->adapt.owner = THIS_MODULE; + i2c->adapt.algo = &spacemit_i2c_algo; + i2c->adapt.dev.parent = i2c->dev; + i2c->adapt.nr = pdev->id; + + i2c->adapt.dev.of_node = of_node; + + strscpy(i2c->adapt.name, "spacemit-i2c-adapter", sizeof(i2c->adapt.name)); + + init_completion(&i2c->complete); + + platform_set_drvdata(pdev, i2c); + + ret = i2c_add_numbered_adapter(&i2c->adapt); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to add i2c adapter"); + + return 0; +} + +static void spacemit_i2c_remove(struct platform_device *pdev) +{ + struct spacemit_i2c_dev *i2c = platform_get_drvdata(pdev); + + i2c_del_adapter(&i2c->adapt); +} + +static const struct of_device_id spacemit_i2c_of_match[] = { + { .compatible = "spacemit,k1-i2c", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, spacemit_i2c_of_match); + +static struct platform_driver spacemit_i2c_driver = { + .probe = spacemit_i2c_probe, + .remove = spacemit_i2c_remove, + .driver = { + .name = "i2c-k1", + .of_match_table = spacemit_i2c_of_match, + }, +}; +module_platform_driver(spacemit_i2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("I2C bus driver for SpacemiT K1 SoC"); From c25951eb7518844fcb7fc9ec58e888731e8c46d0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 15 Nov 2024 15:20:55 +0000 Subject: [PATCH 1547/2157] bus: fsl-mc: Remove deadcode fsl_mc_allocator_driver_exit() was added explicitly by commit 1e8ac83b6caf ("bus: fsl-mc: add fsl_mc_allocator cleanup function") but was never used. Remove it. fsl_mc_portal_reset() was added in 2015 by commit 197f4d6a4a00 ("staging: fsl-mc: fsl-mc object allocator driver") but was never used. Remove it. fsl_mc_portal_reset() was the only caller of dpmcp_reset(). Remove it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Ioana Ciornei Acked-by: Christophe Leroy Link: https://lore.kernel.org/r/20241115152055.279732-1-linux@treblig.org Signed-off-by: Christophe Leroy --- drivers/bus/fsl-mc/dpmcp.c | 22 ---------------------- drivers/bus/fsl-mc/fsl-mc-allocator.c | 5 ----- drivers/bus/fsl-mc/fsl-mc-private.h | 6 ------ drivers/bus/fsl-mc/mc-io.c | 20 -------------------- include/linux/fsl/mc.h | 2 -- 5 files changed, 55 deletions(-) diff --git a/drivers/bus/fsl-mc/dpmcp.c b/drivers/bus/fsl-mc/dpmcp.c index 5fbd0dbde24a..7816c0a728ef 100644 --- a/drivers/bus/fsl-mc/dpmcp.c +++ b/drivers/bus/fsl-mc/dpmcp.c @@ -75,25 +75,3 @@ int dpmcp_close(struct fsl_mc_io *mc_io, /* send command to mc*/ return mc_send_command(mc_io, &cmd); } - -/** - * dpmcp_reset() - Reset the DPMCP, returns the object to initial state. - * @mc_io: Pointer to MC portal's I/O object - * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' - * @token: Token of DPMCP object - * - * Return: '0' on Success; Error code otherwise. - */ -int dpmcp_reset(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token) -{ - struct fsl_mc_command cmd = { 0 }; - - /* prepare command */ - cmd.header = mc_encode_cmd_header(DPMCP_CMDID_RESET, - cmd_flags, token); - - /* send command to mc*/ - return mc_send_command(mc_io, &cmd); -} diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c index b5e8c021fa1f..6c3beb82dd1b 100644 --- a/drivers/bus/fsl-mc/fsl-mc-allocator.c +++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c @@ -656,8 +656,3 @@ int __init fsl_mc_allocator_driver_init(void) { return fsl_mc_driver_register(&fsl_mc_allocator_driver); } - -void fsl_mc_allocator_driver_exit(void) -{ - fsl_mc_driver_unregister(&fsl_mc_allocator_driver); -} diff --git a/drivers/bus/fsl-mc/fsl-mc-private.h b/drivers/bus/fsl-mc/fsl-mc-private.h index b3520ea1b9f4..e1b7ec3ed1a7 100644 --- a/drivers/bus/fsl-mc/fsl-mc-private.h +++ b/drivers/bus/fsl-mc/fsl-mc-private.h @@ -66,10 +66,6 @@ int dpmcp_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); -int dpmcp_reset(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token); - /* * Data Path Resource Container (DPRC) API */ @@ -631,8 +627,6 @@ int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev, int __init fsl_mc_allocator_driver_init(void); -void fsl_mc_allocator_driver_exit(void); - void fsl_mc_init_all_resource_pools(struct fsl_mc_device *mc_bus_dev); void fsl_mc_cleanup_all_resource_pools(struct fsl_mc_device *mc_bus_dev); diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c index 95b10a6cf307..a0ad7866cbfc 100644 --- a/drivers/bus/fsl-mc/mc-io.c +++ b/drivers/bus/fsl-mc/mc-io.c @@ -263,23 +263,3 @@ void fsl_mc_portal_free(struct fsl_mc_io *mc_io) dpmcp_dev->consumer_link = NULL; } EXPORT_SYMBOL_GPL(fsl_mc_portal_free); - -/** - * fsl_mc_portal_reset - Resets the dpmcp object for a given fsl_mc_io object - * - * @mc_io: Pointer to the fsl_mc_io object that wraps the MC portal to free - */ -int fsl_mc_portal_reset(struct fsl_mc_io *mc_io) -{ - int error; - struct fsl_mc_device *dpmcp_dev = mc_io->dpmcp_dev; - - error = dpmcp_reset(mc_io, 0, dpmcp_dev->mc_handle); - if (error < 0) { - dev_err(&dpmcp_dev->dev, "dpmcp_reset() failed: %d\n", error); - return error; - } - - return 0; -} -EXPORT_SYMBOL_GPL(fsl_mc_portal_reset); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 99f30c7d6208..897d6211c163 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -417,8 +417,6 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, void fsl_mc_portal_free(struct fsl_mc_io *mc_io); -int fsl_mc_portal_reset(struct fsl_mc_io *mc_io); - int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, enum fsl_mc_pool_type pool_type, struct fsl_mc_device **new_mc_adev); From cfe1f8776f23fd6dfe4ba9fcb695b129f0761187 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 13 Mar 2025 13:01:22 -0400 Subject: [PATCH 1548/2157] NFS: Extend rdirplus mount option with "force|none" There are certain users that wish to force the NFS client to choose READDIRPLUS over READDIR for a particular mount. Update the "rdirplus" mount option to optionally accept values. For "rdirplus=force", the NFS client will always attempt to use READDDIRPLUS. The setting of "rdirplus=none" is aliased to the existing "nordirplus". Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/c4cf0de4c8be0930b91bc74bee310d289781cd3b.1741885071.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ fs/nfs/fs_context.c | 32 ++++++++++++++++++++++++++++---- fs/nfs/super.c | 1 + include/linux/nfs_fs_sb.h | 1 + 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2b04038b0e40..5c4566a8dabb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -666,6 +666,8 @@ static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS) + return true; if (ctx->pos == 0 || cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD) return true; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index b069385eea17..1cabba1231d6 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -72,6 +72,8 @@ enum nfs_param { Opt_posix, Opt_proto, Opt_rdirplus, + Opt_rdirplus_none, + Opt_rdirplus_force, Opt_rdma, Opt_resvport, Opt_retrans, @@ -174,7 +176,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), - fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus + fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=... fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), @@ -288,6 +291,12 @@ static const struct constant_table nfs_xprtsec_policies[] = { {} }; +static const struct constant_table nfs_rdirplus_tokens[] = { + { "none", Opt_rdirplus_none }, + { "force", Opt_rdirplus_force }, + {} +}; + /* * Sanity-check a server address provided by the mount command. * @@ -636,10 +645,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: - if (result.negated) + if (result.negated) { + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; ctx->flags |= NFS_MOUNT_NORDIRPLUS; - else - ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + } else if (!param->string) { + ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS); + } else { + switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) { + case Opt_rdirplus_none: + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_rdirplus_force: + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS; + break; + default: + goto out_invalid_value; + } + } break; case Opt_sharecache: if (result.negated) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aeb715b4a690..96de658a7886 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -454,6 +454,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" }, { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, { 0, NULL, NULL } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1711eefcf24f..b83d16a42afc 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -167,6 +167,7 @@ struct nfs_server { #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 +#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ From e171b965008de4e3c7fec49ce634c698c8bc924d Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:21 -0500 Subject: [PATCH 1549/2157] NFS: Add implid to sysfs The Linux NFS server added support for returning this information during an EXCHANGE_ID in Linux v6.13. This is something and admin might want to query, so let's add it to sysfs. Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Link: https://lore.kernel.org/r/20250207204225.594002-2-anna@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 7b59a40d40c0..b30401b2c939 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -272,6 +272,38 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown); +#if IS_ENABLED(CONFIG_NFS_V4_1) +static ssize_t +implid_domain_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; + + if (!impl_id || strlen(impl_id->domain) == 0) + return 0; //sysfs_emit(buf, ""); + return sysfs_emit(buf, "%s\n", impl_id->domain); +} + +static struct kobj_attribute nfs_sysfs_attr_implid_domain = __ATTR_RO(implid_domain); + + +static ssize_t +implid_name_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; + + if (!impl_id || strlen(impl_id->name) == 0) + return 0; //sysfs_emit(buf, ""); + return sysfs_emit(buf, "%s\n", impl_id->name); +} + +static struct kobj_attribute nfs_sysfs_attr_implid_name = __ATTR_RO(implid_name); + +#endif /* IS_ENABLED(CONFIG_NFS_V4_1) */ + #define RPC_CLIENT_NAME_SIZE 64 void nfs_sysfs_link_rpc_client(struct nfs_server *server, @@ -309,6 +341,32 @@ static struct kobj_type nfs_sb_ktype = { .child_ns_type = nfs_netns_object_child_ns_type, }; +#if IS_ENABLED(CONFIG_NFS_V4_1) +static void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) +{ + int ret; + + if (!server->nfs_client->cl_implid) + return; + + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_domain.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); + + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_name.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); +} +#else /* CONFIG_NFS_V4_1 */ +static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + void nfs_sysfs_add_server(struct nfs_server *server) { int ret; @@ -325,6 +383,8 @@ void nfs_sysfs_add_server(struct nfs_server *server) if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); + + nfs_sysfs_add_nfsv41_server(server); } EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); From 41cb320b816f29e417e2595d128391770bc765f9 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:22 -0500 Subject: [PATCH 1550/2157] sunrpc: Add a sysfs attr for xprtsec This allows the admin to check the TLS configuration for each xprt. Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Link: https://lore.kernel.org/r/20250207204225.594002-3-anna@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 5c8ecdaaa985..dc3b7cd70000 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -129,6 +129,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, return ret; } +static const char *xprtsec_strings[] = { + [RPC_XPRTSEC_NONE] = "none", + [RPC_XPRTSEC_TLS_ANON] = "tls-anon", + [RPC_XPRTSEC_TLS_X509] = "tls-x509", +}; + +static ssize_t rpc_sysfs_xprt_xprtsec_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + ssize_t ret; + + if (!xprt) { + ret = sprintf(buf, "\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", xprtsec_strings[xprt->xprtsec.policy]); + xprt_put(xprt); +out: + return ret; + +} + static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -404,6 +429,9 @@ static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr, static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr, 0644, rpc_sysfs_xprt_srcaddr_show, NULL); +static struct kobj_attribute rpc_sysfs_xprt_xprtsec = __ATTR(xprtsec, + 0644, rpc_sysfs_xprt_xprtsec_show, NULL); + static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, 0444, rpc_sysfs_xprt_info_show, NULL); @@ -413,6 +441,7 @@ static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_dstaddr.attr, &rpc_sysfs_xprt_srcaddr.attr, + &rpc_sysfs_xprt_xprtsec.attr, &rpc_sysfs_xprt_info.attr, &rpc_sysfs_xprt_change_state.attr, NULL, From 88efd79c3f1db75b0f1edac71eebdf66e4835f0a Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:23 -0500 Subject: [PATCH 1551/2157] sunrpc: Add a sysfs files for rpc_clnt information These files display useful information about the RPC client, such as the rpc version number, program name, and maximum number of connections allowed. Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Link: https://lore.kernel.org/r/20250207204225.594002-4-anna@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index dc3b7cd70000..dcd579eab50a 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -59,6 +59,16 @@ static struct kobject *rpc_sysfs_object_alloc(const char *name, return NULL; } +static inline struct rpc_clnt * +rpc_sysfs_client_kobj_get_clnt(struct kobject *kobj) +{ + struct rpc_sysfs_client *c = container_of(kobj, + struct rpc_sysfs_client, kobject); + struct rpc_clnt *ret = c->clnt; + + return refcount_inc_not_zero(&ret->cl_count) ? ret : NULL; +} + static inline struct rpc_xprt * rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj) { @@ -86,6 +96,51 @@ rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj) return xprt_switch_get(x->xprt_switch); } +static ssize_t rpc_sysfs_clnt_version_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "\n"); + + ret = sprintf(buf, "%u", clnt->cl_vers); + refcount_dec(&clnt->cl_count); + return ret; +} + +static ssize_t rpc_sysfs_clnt_program_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "\n"); + + ret = sprintf(buf, "%s", clnt->cl_program->name); + refcount_dec(&clnt->cl_count); + return ret; +} + +static ssize_t rpc_sysfs_clnt_max_connect_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "\n"); + + ret = sprintf(buf, "%u\n", clnt->cl_max_connect); + refcount_dec(&clnt->cl_count); + return ret; +} + static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -423,6 +478,23 @@ static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj) kobject)->xprt->xprt_net; } +static struct kobj_attribute rpc_sysfs_clnt_version = __ATTR(rpc_version, + 0444, rpc_sysfs_clnt_version_show, NULL); + +static struct kobj_attribute rpc_sysfs_clnt_program = __ATTR(program, + 0444, rpc_sysfs_clnt_program_show, NULL); + +static struct kobj_attribute rpc_sysfs_clnt_max_connect = __ATTR(max_connect, + 0444, rpc_sysfs_clnt_max_connect_show, NULL); + +static struct attribute *rpc_sysfs_rpc_clnt_attrs[] = { + &rpc_sysfs_clnt_version.attr, + &rpc_sysfs_clnt_program.attr, + &rpc_sysfs_clnt_max_connect.attr, + NULL, +}; +ATTRIBUTE_GROUPS(rpc_sysfs_rpc_clnt); + static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr, 0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store); @@ -459,6 +531,7 @@ ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static const struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, + .default_groups = rpc_sysfs_rpc_clnt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_client_namespace, }; From df210d9b0951d714c1668c511ca5c8ff38cf6916 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:24 -0500 Subject: [PATCH 1552/2157] sunrpc: Add a sysfs file for adding a new xprt Writing to this file will clone the 'main' xprt of an xprt_switch and add it to be used as an additional connection. -- Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker v3: Replace call to xprt_iter_get_xprt() with xprt_iter_get_next() Link: https://lore.kernel.org/r/20250207204225.594002-5-anna@kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtmultipath.h | 1 + net/sunrpc/sysfs.c | 54 ++++++++++++++++++++++++++++ net/sunrpc/xprtmultipath.c | 21 +++++++++++ 3 files changed, 76 insertions(+) diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index e411368cdacf..e4db5022fe92 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -56,6 +56,7 @@ extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index dcd579eab50a..8fd1975f2fe8 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -305,6 +305,55 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, return ret; } +static ssize_t rpc_sysfs_xprt_switch_add_xprt_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "# add one xprt to this xprt_switch\n"); +} + +static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt_switch *xprt_switch = + rpc_sysfs_xprt_switch_kobj_get_xprt(kobj); + struct xprt_create xprt_create_args; + struct rpc_xprt *xprt, *new; + + if (!xprt_switch) + return 0; + + xprt = rpc_xprt_switch_get_main_xprt(xprt_switch); + if (!xprt) + goto out; + + xprt_create_args.ident = xprt->xprt_class->ident; + xprt_create_args.net = xprt->xprt_net; + xprt_create_args.dstaddr = (struct sockaddr *)&xprt->addr; + xprt_create_args.addrlen = xprt->addrlen; + xprt_create_args.servername = xprt->servername; + xprt_create_args.bc_xprt = xprt->bc_xprt; + xprt_create_args.xprtsec = xprt->xprtsec; + xprt_create_args.connect_timeout = xprt->connect_timeout; + xprt_create_args.reconnect_timeout = xprt->max_reconnect_timeout; + + new = xprt_create_transport(&xprt_create_args); + if (IS_ERR_OR_NULL(new)) { + count = PTR_ERR(new); + goto out_put_xprt; + } + + rpc_xprt_switch_add_xprt(xprt_switch, new); + xprt_put(new); + +out_put_xprt: + xprt_put(xprt); +out: + xprt_switch_put(xprt_switch); + return count; +} + static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -523,8 +572,13 @@ ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); +static struct kobj_attribute rpc_sysfs_xprt_switch_add_xprt = + __ATTR(add_xprt, 0644, rpc_sysfs_xprt_switch_add_xprt_show, + rpc_sysfs_xprt_switch_add_xprt_store); + static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, + &rpc_sysfs_xprt_switch_add_xprt.attr, NULL, }; ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 7e98d4dd9f10..4c5e08b0aa64 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -92,6 +92,27 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, xprt_put(xprt); } +/** + * rpc_xprt_switch_get_main_xprt - Get the 'main' xprt for an xprt switch. + * @xps: pointer to struct rpc_xprt_switch. + */ +struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps) +{ + struct rpc_xprt_iter xpi; + struct rpc_xprt *xprt; + + xprt_iter_init_listall(&xpi, xps); + + xprt = xprt_iter_get_next(&xpi); + while (xprt && !xprt->main) { + xprt_put(xprt); + xprt = xprt_iter_get_next(&xpi); + } + + xprt_iter_destroy(&xpi); + return xprt; +} + static DEFINE_IDA(rpc_xprtswitch_ids); void xprt_multipath_cleanup_ids(void) From 4c2226be8161a12f0a68e81d25cf8ed05fa622e0 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:25 -0500 Subject: [PATCH 1553/2157] sunrpc: Add a sysfs file for one-step xprt deletion Previously, the admin would need to set the xprt state to "offline" before attempting to remove. This patch adds a new sysfs attr that does both these steps in a single call. Suggested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Link: https://lore.kernel.org/r/20250207204225.594002-6-anna@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 8fd1975f2fe8..09434e1143c5 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -286,6 +286,14 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, return ret; } +static ssize_t rpc_sysfs_xprt_del_xprt_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "# delete this xprt\n"); +} + + static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -464,6 +472,40 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, return count; } +static ssize_t rpc_sysfs_xprt_del_xprt(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); + + if (!xprt || !xps) { + count = 0; + goto out; + } + + if (xprt->main) { + count = -EINVAL; + goto release_tasks; + } + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + count = -EINTR; + goto out_put; + } + + xprt_set_offline_locked(xprt, xps); + xprt_delete_locked(xprt, xps); + +release_tasks: + xprt_release_write(xprt, NULL); +out_put: + xprt_put(xprt); + xprt_switch_put(xps); +out: + return count; +} + int rpc_sysfs_init(void) { rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj); @@ -559,12 +601,16 @@ static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change); +static struct kobj_attribute rpc_sysfs_xprt_del = __ATTR(del_xprt, + 0644, rpc_sysfs_xprt_del_xprt_show, rpc_sysfs_xprt_del_xprt); + static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_dstaddr.attr, &rpc_sysfs_xprt_srcaddr.attr, &rpc_sysfs_xprt_xprtsec.attr, &rpc_sysfs_xprt_info.attr, &rpc_sysfs_xprt_change_state.attr, + &rpc_sysfs_xprt_del.attr, NULL, }; ATTRIBUTE_GROUPS(rpc_sysfs_xprt); From 487fae09d7e266a58a13784983cc1ef6a2076526 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 11:45:06 -0400 Subject: [PATCH 1554/2157] NFS: Add a mount option to make ENETUNREACH errors fatal If the NFS client was initially created in a container, and that container is torn down, there is usually no possibity to go back and destroy any NFS clients that are hung because their virtual network devices have been unlinked. Add a flag that tells the NFS client that in these circumstances, it should treat ENETDOWN and ENETUNREACH errors as fatal to the NFS client. The option defaults to being on when the mount happens from inside a net namespace that is not "init_net". Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/fs_context.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/nfs/super.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 43 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 1cabba1231d6..13f71ca8c974 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -50,6 +50,7 @@ enum nfs_param { Opt_clientaddr, Opt_cto, Opt_alignwrite, + Opt_fatal_neterrors, Opt_fg, Opt_fscache, Opt_fscache_flag, @@ -97,6 +98,20 @@ enum nfs_param { Opt_xprtsec, }; +enum { + Opt_fatal_neterrors_default, + Opt_fatal_neterrors_enetunreach, + Opt_fatal_neterrors_none, +}; + +static const struct constant_table nfs_param_enums_fatal_neterrors[] = { + { "default", Opt_fatal_neterrors_default }, + { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach }, + { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach }, + { "none", Opt_fatal_neterrors_none }, + {} +}; + enum { Opt_local_lock_all, Opt_local_lock_flock, @@ -153,6 +168,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), fsparam_flag_no("alignwrite", Opt_alignwrite), + fsparam_enum("fatal_neterrors", Opt_fatal_neterrors, + nfs_param_enums_fatal_neterrors), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), @@ -896,6 +913,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; + case Opt_fatal_neterrors: + trace_nfs_mount_assign(param->key, param->string); + switch (result.uint_32) { + case Opt_fatal_neterrors_default: + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + else + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_enetunreach: + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_none: + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + default: + goto out_invalid_value; + } + break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { @@ -1675,6 +1711,9 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 96de658a7886..9eea9e62afc9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -457,6 +457,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" }, { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, + { NFS_MOUNT_NETUNREACH_FATAL, + ",fatal_neterrors=ENETDOWN:ENETUNREACH", + ",fatal_neterrors=none" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b83d16a42afc..a6ce8590eaaf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -168,6 +168,7 @@ struct nfs_server { #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 +#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ From 9827144bfb2b1baf1e78600da73dcc9e92e28415 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 10:50:26 -0400 Subject: [PATCH 1555/2157] NFS: Treat ENETUNREACH errors as fatal in containers Propagate the NFS_MOUNT_NETUNREACH_FATAL flag to work with the generic NFS client. If the flag is set, the client will receive ENETDOWN and ENETUNREACH errors from the RPC layer, and is expected to treat them as being fatal. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/client.c | 5 +++++ fs/nfs/nfs4client.c | 2 ++ fs/nfs/nfs4proc.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 5 ++++- include/linux/sunrpc/sched.h | 1 + include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 30 ++++++++++++++++++++++-------- 8 files changed, 39 insertions(+), 9 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3b0918ade53c..02c916a55020 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -546,6 +546,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_NOPING; if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_REUSEPORT; + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -709,6 +711,9 @@ static int nfs_init_server(struct nfs_server *server, if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 83378f69b35e..8f7d40844cdc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -233,6 +233,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); if (test_bit(NFS_CS_PNFS, &cl_init->init_flags)) __set_bit(NFS_CS_PNFS, &clp->cl_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags); /* * Set up the connection to the server before we add add to the * global list. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 24406fc1352d..889511650ceb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -195,6 +195,9 @@ static int nfs4_map_errors(int err) return -EBUSY; case -NFS4ERR_NOT_SAME: return -ENOTSYNC; + case -ENETDOWN: + case -ENETUNREACH: + break; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a6ce8590eaaf..71319637a84e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -50,6 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ +#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index fec976e58174..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -64,7 +64,9 @@ struct rpc_clnt { cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1,/* be verbose */ - cl_shutdown : 1;/* rpc immediate -EIO */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -175,6 +177,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) #define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index eac57914dcf3..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -134,6 +134,7 @@ struct rpc_task_setup { #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 851841336ee6..5d331383047b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -343,6 +343,7 @@ TRACE_EVENT(rpc_request, { RPC_TASK_MOVEABLE, "MOVEABLE" }, \ { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ + { RPC_TASK_NETUNREACH_FATAL, "NETUNREACH_FATAL"}, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ { RPC_TASK_SOFT, "SOFT" }, \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2fe88ea79a70..1cfaf93ceec1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -512,6 +512,8 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, clnt->cl_discrtry = 1; if (!(args->flags & RPC_CLNT_CREATE_QUIET)) clnt->cl_chatty = 1; + if (args->flags & RPC_CLNT_CREATE_NETUNREACH_FATAL) + clnt->cl_netunreach_fatal = 1; return clnt; } @@ -662,6 +664,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; + new->cl_netunreach_fatal = clnt->cl_netunreach_fatal; new->cl_principal = clnt->cl_principal; new->cl_max_connect = clnt->cl_max_connect; return new; @@ -1195,6 +1198,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; + if (clnt->cl_netunreach_fatal) + task->tk_flags |= RPC_TASK_NETUNREACH_FATAL; atomic_inc(&clnt->cl_task_count); } @@ -2102,14 +2107,17 @@ call_bind_status(struct rpc_task *task) case -EPROTONOSUPPORT: trace_rpcb_bind_version_err(task); goto retry_timeout; + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: - case -ENETDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EPIPE: trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { @@ -2191,19 +2199,22 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: case -ECONNRESET: /* A positive refusal suggests a rebind is needed. */ - if (RPC_IS_SOFTCONN(task)) - break; if (clnt->cl_autobind) { rpc_force_rebind(clnt); + if (RPC_IS_SOFTCONN(task)) + break; goto out_retry; } fallthrough; case -ECONNABORTED: - case -ENETDOWN: - case -ENETUNREACH: case -EHOSTUNREACH: case -EPIPE: case -EPROTO: @@ -2455,10 +2466,13 @@ call_status(struct rpc_task *task) trace_rpc_call_status(task); task->tk_status = 0; switch(status) { - case -EHOSTDOWN: case -ENETDOWN: - case -EHOSTUNREACH: case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + goto out_exit; + fallthrough; + case -EHOSTDOWN: + case -EHOSTUNREACH: case -EPERM: if (RPC_IS_SOFTCONN(task)) goto out_exit; From 8c9f0df7a82a9f3bbdab4c796d302b20a33c1cc6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 08:04:35 -0400 Subject: [PATCH 1556/2157] pNFS/flexfiles: Treat ENETUNREACH errors as fatal in containers Propagate the NFS_MOUNT_NETUNREACH_FATAL flag to work with the pNFS flexfiles client. In these circumstances, the client needs to treat the ENETDOWN and ENETUNREACH errors as fatal, and should abandon the attempted I/O. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/flexfilelayout/flexfilelayout.c | 23 +++++++++++++++++++++-- fs/nfs/nfs3client.c | 2 ++ fs/nfs/nfs4client.c | 5 +++++ include/linux/nfs4.h | 1 + 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98b45b636be3..f89fdba7289d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1154,10 +1154,14 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; /* RPC connection errors */ + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; case -ECONNREFUSED: case -EHOSTDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EIO: case -ETIMEDOUT: case -EPIPE: @@ -1183,6 +1187,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { @@ -1200,6 +1205,11 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); goto out_retry; + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); @@ -1234,7 +1244,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, lseg, idx); + return ff_layout_async_handle_error_v3(task, clp, lseg, idx); case 4: return ff_layout_async_handle_error_v4(task, state, clp, lseg, idx); @@ -1337,6 +1347,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: goto out_eagain; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } return 0; @@ -1507,6 +1520,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } if (hdr->res.verf->committed == NFS_FILE_SYNC || @@ -1551,6 +1567,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b0c8a39c2bbd..0d7310c1ee0c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -120,6 +120,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8f7d40844cdc..162c85a83a14 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -939,6 +939,9 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); server->port = rpc_get_port((struct sockaddr *)addr); + if (server->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) @@ -1013,6 +1016,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); __set_bit(NFS_CS_PNFS, &cl_init.init_flags); cl_init.max_connect = NFS_MAX_TRANSPORTS; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5fa60fe441b5..d8cad844870a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -300,6 +300,7 @@ enum nfsstat4 { /* error codes for internal client use */ #define NFS4ERR_RESET_TO_MDS 12001 #define NFS4ERR_RESET_TO_PNFS 12002 +#define NFS4ERR_FATAL_IOERROR 12003 static inline bool seqid_mutating_err(u32 err) { From aa42add73ce9b9e3714723d385c254b75814e335 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 12:45:01 -0400 Subject: [PATCH 1557/2157] pNFS/flexfiles: Report ENETDOWN as a connection error If the client should see an ENETDOWN when trying to connect to the data server, it might still be able to talk to the metadata server through another NIC. If so, report the error. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f89fdba7289d..61ad269c825f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1274,6 +1274,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ECONNRESET: case -EHOSTDOWN: case -EHOSTUNREACH: + case -ENETDOWN: case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: From 24ac6fb6e3647fff3646b3ea1811095441380560 Mon Sep 17 00:00:00 2001 From: Ge Yang Date: Mon, 10 Feb 2025 09:56:06 +0800 Subject: [PATCH 1558/2157] mm/cma: using per-CMA locks to improve concurrent allocation performance For different CMAs, concurrent allocation of CMA memory ideally should not require synchronization using locks. Currently, a global cma_mutex lock is employed to synchronize all CMA allocations, which can impact the performance of concurrent allocations across different CMAs. To test the performance impact, follow these steps: 1. Boot the kernel with the command line argument hugetlb_cma=30G to allocate a 30GB CMA area specifically for huge page allocations. (note: on my machine, which has 3 nodes, each node is initialized with 10G of CMA) 2. Use the dd command with parameters if=/dev/zero of=/dev/shm/file bs=1G count=30 to fully utilize the CMA area by writing zeroes to a file in /dev/shm. 3. Open three terminals and execute the following commands simultaneously: (Note: Each of these commands attempts to allocate 10GB [2621440 * 4KB pages] of CMA memory.) On Terminal 1: time echo 2621440 > /sys/kernel/debug/cma/hugetlb1/alloc On Terminal 2: time echo 2621440 > /sys/kernel/debug/cma/hugetlb2/alloc On Terminal 3: time echo 2621440 > /sys/kernel/debug/cma/hugetlb3/alloc We attempt to allocate pages through the CMA debug interface and use the time command to measure the duration of each allocation. Performance comparison: Without this patch With this patch Terminal1 ~7s ~7s Terminal2 ~14s ~8s Terminal3 ~21s ~7s To solve problem above, we could use per-CMA locks to improve concurrent allocation performance. This would allow each CMA to be managed independently, reducing the need for a global lock and thus improving scalability and performance. Link: https://lkml.kernel.org/r/1739152566-744-1-git-send-email-yangge1116@126.com Signed-off-by: Ge Yang Reviewed-by: Barry Song Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Aisheng Dong Cc: Baolin Wang Signed-off-by: Andrew Morton --- mm/cma.c | 7 ++++--- mm/cma.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 09322b8284bd..b06d5fe73399 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -34,7 +34,6 @@ struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; -static DEFINE_MUTEX(cma_mutex); static int __init __cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, @@ -175,6 +174,8 @@ static void __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->lock); + mutex_init(&cma->alloc_mutex); + #ifdef CONFIG_CMA_DEBUGFS INIT_HLIST_HEAD(&cma->mem_head); spin_lock_init(&cma->mem_head_lock); @@ -813,9 +814,9 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, spin_unlock_irq(&cma->lock); pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); - mutex_lock(&cma_mutex); + mutex_lock(&cma->alloc_mutex); ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp); - mutex_unlock(&cma_mutex); + mutex_unlock(&cma->alloc_mutex); if (ret == 0) { page = pfn_to_page(pfn); break; diff --git a/mm/cma.h b/mm/cma.h index df7fc623b7a6..41a3ab0ec3de 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -39,6 +39,7 @@ struct cma { unsigned long available_count; unsigned int order_per_bit; /* Order of pages represented by one bit */ spinlock_t lock; + struct mutex alloc_mutex; #ifdef CONFIG_CMA_DEBUGFS struct hlist_head mem_head; spinlock_t mem_head_lock; From 09bdc4fe700d1c499d94452d7a20e69c26a8c007 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 25 Feb 2025 10:30:16 +0200 Subject: [PATCH 1559/2157] mm/mm_init: rename __init_reserved_page_zone to __init_page_from_nid __init_reserved_page_zone() function finds the zone for pfn and nid and performs initialization of a struct page with that zone and nid. There is nothing in that function about reserved pages and it is misnamed. Rename it to __init_page_from_nid() to better reflect what the function does. Link: https://lkml.kernel.org/r/20250225083017.567649-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Wei Yang Cc: Frank van der Linden Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- mm/internal.h | 2 +- mm/mm_init.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index af9b8c1fca67..6fccfe6d046c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3407,7 +3407,7 @@ static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page, while (npages--) { pfn = page_to_pfn(page); - __init_reserved_page_zone(pfn, nid); + __init_page_from_nid(pfn, nid); free_reserved_page(page); page++; } diff --git a/mm/internal.h b/mm/internal.h index 286520a424fe..21f2643f3d95 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1518,7 +1518,7 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); -void __meminit __init_reserved_page_zone(unsigned long pfn, int nid); +void __meminit __init_page_from_nid(unsigned long pfn, int nid); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, diff --git a/mm/mm_init.c b/mm/mm_init.c index c82b0162f1cb..16a96aaf65c4 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -668,7 +668,7 @@ static inline void fixup_hashdist(void) {} /* * Initialize a reserved page unconditionally, finding its zone first. */ -void __meminit __init_reserved_page_zone(unsigned long pfn, int nid) +void __meminit __init_page_from_nid(unsigned long pfn, int nid) { pg_data_t *pgdat; int zid; @@ -748,7 +748,7 @@ static void __meminit init_reserved_page(unsigned long pfn, int nid) if (early_page_initialised(pfn, nid)) return; - __init_reserved_page_zone(pfn, nid); + __init_page_from_nid(pfn, nid); } #else static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} From b4f65dbdf87812056c224fd8d2c66318b2140ab5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 25 Feb 2025 10:30:17 +0200 Subject: [PATCH 1560/2157] mm/mm_init: rename init_reserved_page to init_deferred_page When CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, init_reserved_page() function performs initialization of a struct page that would have been deferred normally. Rename it to init_deferred_page() to better reflect what the function does. Link: https://lkml.kernel.org/r/20250225083017.567649-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Wei Yang Cc: Frank van der Linden Cc: Muchun Song Cc: Changyuan Lyu Signed-off-by: Andrew Morton --- mm/mm_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 16a96aaf65c4..a38a1909b407 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -743,7 +743,7 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } -static void __meminit init_reserved_page(unsigned long pfn, int nid) +static void __meminit init_deferred_page(unsigned long pfn, int nid) { if (early_page_initialised(pfn, nid)) return; @@ -763,7 +763,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } -static inline void init_reserved_page(unsigned long pfn, int nid) +static inline void init_deferred_page(unsigned long pfn, int nid) { } #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ @@ -784,7 +784,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, if (pfn_valid(start_pfn)) { struct page *page = pfn_to_page(start_pfn); - init_reserved_page(start_pfn, nid); + init_deferred_page(start_pfn, nid); /* * no need for atomic set_bit because the struct From 20d6c17252282c3af261d1cde8e34def1b2458c8 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 12 Mar 2025 22:48:12 -0700 Subject: [PATCH 1561/2157] memcg: avoid refill_stock for root memcg We never charge the page counters of root memcg, so there is no need to put root memcg in the memcg stock. At the moment, refill_stock() can be called from try_charge_memcg(), obj_cgroup_uncharge_pages() and mem_cgroup_uncharge_skmem(). The try_charge_memcg() and mem_cgroup_uncharge_skmem() are never called with root memcg, so those are fine. However obj_cgroup_uncharge_pages() can potentially call refill_stock() with root memcg if the objcg object has been reparented over to the root memcg. Let's just avoid refill_stock() from obj_cgroup_uncharge_pages() for root memcg. Link: https://lkml.kernel.org/r/20250313054812.2185900-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b07eff78414b..ce57660bf5a2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2643,7 +2643,8 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); - refill_stock(memcg, nr_pages); + if (!mem_cgroup_is_root(memcg)) + refill_stock(memcg, nr_pages); css_put(&memcg->css); } From cb44821e1f524a5b8c05a738a40d572efd1ca430 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 12 Mar 2025 15:25:52 -0700 Subject: [PATCH 1562/2157] memcg: move do_memsw_account() to CONFIG_MEMCG_V1 The do_memsw_account() is used to enable or disable legacy memory+swap accounting in memory cgroup. However with disabled CONFIG_MEMCG_V1, we don't need to keep checking it. So, let's always return false for !CONFIG_MEMCG_V1 configs. Before the patch: $ size mm/memcontrol.o text data bss dec hex filename 49928 10736 4172 64836 fd44 mm/memcontrol.o After the patch: $ size mm/memcontrol.o text data bss dec hex filename 49430 10480 4172 64082 fa52 mm/memcontrol.o Link: https://lkml.kernel.org/r/20250312222552.3284173-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- mm/memcontrol-v1.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h index 653ff1bad244..6358464bb416 100644 --- a/mm/memcontrol-v1.h +++ b/mm/memcontrol-v1.h @@ -22,12 +22,6 @@ iter != NULL; \ iter = mem_cgroup_iter(NULL, iter, NULL)) -/* Whether legacy memory+swap accounting is active */ -static inline bool do_memsw_account(void) -{ - return !cgroup_subsys_on_dfl(memory_cgrp_subsys); -} - unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap); void drain_all_stock(struct mem_cgroup *root_memcg); @@ -42,6 +36,12 @@ struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg); /* Cgroup v1-specific declarations */ #ifdef CONFIG_MEMCG_V1 +/* Whether legacy memory+swap accounting is active */ +static inline bool do_memsw_account(void) +{ + return !cgroup_subsys_on_dfl(memory_cgrp_subsys); +} + unsigned long memcg_events_local(struct mem_cgroup *memcg, int event); unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx); unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item); @@ -94,6 +94,7 @@ extern struct cftype mem_cgroup_legacy_files[]; #else /* CONFIG_MEMCG_V1 */ +static inline bool do_memsw_account(void) { return false; } static inline bool memcg1_alloc_events(struct mem_cgroup *memcg) { return true; } static inline void memcg1_free_events(struct mem_cgroup *memcg) {} From fa23a338de93aa03eb0b6146a0440f5762309f85 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Mar 2025 13:36:11 +0000 Subject: [PATCH 1563/2157] mm: separate folio_split_memcg_refs() from split_page_memcg() Patch series "Minor memcg cleanups & prep for memdescs", v2. Separate the handling of accounted folios and GFP_ACCOUNT pages for easier to understand code. For more detail, see https://lore.kernel.org/linux-mm/Z9LwTOudOlCGny3f@casper.infradead.org/ This patch (of 5): Folios always use memcg_data to refer to the mem_cgroup while pages allocated with GFP_ACCOUNT have a pointer to the obj_cgroup. Since the caller already knows what it has, split the function into two and then we don't need to check. Move the assignment of split folio memcg_data to the point where we set up the other parts of the new folio. That leaves folio_split_memcg_refs() just handling the memcg accounting. Link: https://lkml.kernel.org/r/20250314133617.138071-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250314133617.138071-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Zi Yan Acked-by: Roman Gushchin Cc: David Hildenbrand Cc: Matthew Wilcow (Oracle) Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 7 +++++++ mm/huge_memory.c | 16 ++++------------ mm/memcontrol.c | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 57664e2a8fb7..d090089c5497 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1039,6 +1039,8 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, } void split_page_memcg(struct page *head, int old_order, int new_order); +void folio_split_memcg_refs(struct folio *folio, unsigned old_order, + unsigned new_order); static inline u64 cgroup_id_from_mm(struct mm_struct *mm) { @@ -1463,6 +1465,11 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or { } +static inline void folio_split_memcg_refs(struct folio *folio, + unsigned old_order, unsigned new_order) +{ +} + static inline u64 cgroup_id_from_mm(struct mm_struct *mm) { return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 10a86b681cf1..2a47682d1ab7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3394,6 +3394,9 @@ static void __split_folio_to_order(struct folio *folio, int old_order, folio_set_young(new_folio); if (folio_test_idle(folio)) folio_set_idle(new_folio); +#ifdef CONFIG_MEMCG + new_folio->memcg_data = folio->memcg_data; +#endif folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio)); } @@ -3525,18 +3528,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, } } - /* - * Reset any memcg data overlay in the tail pages. - * folio_nr_pages() is unreliable until prep_compound_page() - * was called again. - */ -#ifdef NR_PAGES_IN_LARGE_FOLIO - folio->_nr_pages = 0; -#endif - - - /* complete memcg works before add pages to LRU */ - split_page_memcg(&folio->page, old_order, split_order); + folio_split_memcg_refs(folio, old_order, split_order); split_page_owner(&folio->page, old_order, split_order); pgalloc_tag_split(folio, old_order, split_order); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ce57660bf5a2..f267b309b5b7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3081,10 +3081,19 @@ void split_page_memcg(struct page *head, int old_order, int new_order) for (i = new_nr; i < old_nr; i += new_nr) folio_page(folio, i)->memcg_data = folio->memcg_data; - if (folio_memcg_kmem(folio)) - obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1); - else - css_get_many(&folio_memcg(folio)->css, old_nr / new_nr - 1); + obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1); +} + +void folio_split_memcg_refs(struct folio *folio, unsigned old_order, + unsigned new_order) +{ + unsigned new_refs; + + if (mem_cgroup_disabled() || !folio_memcg_charged(folio)) + return; + + new_refs = (1 << (old_order - new_order)) - 1; + css_get_many(&__folio_memcg(folio)->css, new_refs); } unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) From 1506c25508acd740ced5e92c539ed3d12f622c5b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Mar 2025 13:36:12 +0000 Subject: [PATCH 1564/2157] mm: simplify split_page_memcg() The last argument to split_page_memcg() is now always 0, so remove it, effectively reverting commit b8791381d7ed. Link: https://lkml.kernel.org/r/20250314133617.138071-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Zi Yan Acked-by: Roman Gushchin Cc: David Hildenbrand Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 15 +++++++-------- mm/page_alloc.c | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d090089c5497..ea28cacfb0d2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1038,7 +1038,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, int old_order, int new_order); +void split_page_memcg(struct page *first, unsigned order); void folio_split_memcg_refs(struct folio *folio, unsigned old_order, unsigned new_order); @@ -1461,7 +1461,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, int old_order, int new_order) +static inline void split_page_memcg(struct page *first, unsigned order) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f267b309b5b7..fa7a3a1b710a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3066,22 +3066,21 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, } /* - * Because folio_memcg(head) is not set on tails, set it now. + * The objcg is only set on the first page, so transfer it to all the + * other pages. */ -void split_page_memcg(struct page *head, int old_order, int new_order) +void split_page_memcg(struct page *first, unsigned order) { - struct folio *folio = page_folio(head); - int i; - unsigned int old_nr = 1 << old_order; - unsigned int new_nr = 1 << new_order; + struct folio *folio = page_folio(first); + unsigned int i, nr = 1 << order; if (mem_cgroup_disabled() || !folio_memcg_charged(folio)) return; - for (i = new_nr; i < old_nr; i += new_nr) + for (i = 1; i < nr; i++) folio_page(folio, i)->memcg_data = folio->memcg_data; - obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1); + obj_cgroup_get_many(__folio_objcg(folio), nr - 1); } void folio_split_memcg_refs(struct folio *folio, unsigned old_order, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4337467eaf5a..a6d060eea638 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2778,7 +2778,7 @@ void split_page(struct page *page, unsigned int order) set_page_refcounted(page + i); split_page_owner(page, order, 0); pgalloc_tag_split(page_folio(page), order, 0); - split_page_memcg(page, order, 0); + split_page_memcg(page, order); } EXPORT_SYMBOL_GPL(split_page); @@ -4992,7 +4992,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, split_page_owner(page, order, 0); pgalloc_tag_split(page_folio(page), order, 0); - split_page_memcg(page, order, 0); + split_page_memcg(page, order); while (page < --last) set_page_refcounted(last); From 7cc57ecae40a68ff7204bca5e17a0241fe505ec7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Mar 2025 13:36:13 +0000 Subject: [PATCH 1565/2157] mm: remove references to folio in split_page_memcg() We know that the passed in page is not part of a folio (it's a plain page allocated with GFP_ACCOUNT), so we should get rid of the misleading references to folios. Introduce page_objcg() and page_set_objcg() helpers to make things more clear. Link: https://lkml.kernel.org/r/20250314133617.138071-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: David Hildenbrand Cc: Michal Hocko Cc: Muchun Song Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/memcontrol.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fa7a3a1b710a..d95b1f4216ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2677,6 +2677,23 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, return ret; } +static struct obj_cgroup *page_objcg(const struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + if (mem_cgroup_disabled() || !memcg_data) + return NULL; + + VM_BUG_ON_PAGE((memcg_data & OBJEXTS_FLAGS_MASK) != MEMCG_DATA_KMEM, + page); + return (struct obj_cgroup *)(memcg_data - MEMCG_DATA_KMEM); +} + +static void page_set_objcg(struct page *page, const struct obj_cgroup *objcg) +{ + page->memcg_data = (unsigned long)objcg | MEMCG_DATA_KMEM; +} + /** * __memcg_kmem_charge_page: charge a kmem page to the current memory cgroup * @page: page to charge @@ -2695,8 +2712,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order); if (!ret) { obj_cgroup_get(objcg); - page->memcg_data = (unsigned long)objcg | - MEMCG_DATA_KMEM; + page_set_objcg(page, objcg); return 0; } } @@ -3069,18 +3085,18 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, * The objcg is only set on the first page, so transfer it to all the * other pages. */ -void split_page_memcg(struct page *first, unsigned order) +void split_page_memcg(struct page *page, unsigned order) { - struct folio *folio = page_folio(first); + struct obj_cgroup *objcg = page_objcg(page); unsigned int i, nr = 1 << order; - if (mem_cgroup_disabled() || !folio_memcg_charged(folio)) + if (!objcg) return; for (i = 1; i < nr; i++) - folio_page(folio, i)->memcg_data = folio->memcg_data; + page_set_objcg(&page[i], objcg); - obj_cgroup_get_many(__folio_objcg(folio), nr - 1); + obj_cgroup_get_many(objcg, nr - 1); } void folio_split_memcg_refs(struct folio *folio, unsigned old_order, From 8492936abb49eda26e00eab01e58d7e69f2355ba Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Mar 2025 13:36:14 +0000 Subject: [PATCH 1566/2157] mm: simplify folio_memcg_charged() There's no need to check which kind of pointer is in the memcg_data field, all we actually care about is whether it's zero or not. Saves 70 bytes in workingset_activation() with the Debian config. Link: https://lkml.kernel.org/r/20250314133617.138071-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: David Hildenbrand Cc: Michal Hocko Cc: Muchun Song Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ea28cacfb0d2..53364526d877 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -438,9 +438,7 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) */ static inline bool folio_memcg_charged(struct folio *folio) { - if (folio_memcg_kmem(folio)) - return __folio_objcg(folio) != NULL; - return __folio_memcg(folio) != NULL; + return folio->memcg_data != 0; } /* From 0d2a2605237341f9dfde99cd0ed3c2d003322464 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Mar 2025 13:36:15 +0000 Subject: [PATCH 1567/2157] mm: remove references to folio in __memcg_kmem_uncharge_page() This use of folios is misleading because these pages are not part of a folio. Remove an unnecessary call to page_folio(), saving 58 bytes of text in a Debian kernel build. Link: https://lkml.kernel.org/r/20250314133617.138071-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: David Hildenbrand Cc: Michal Hocko Cc: Muchun Song Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/memcontrol.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d95b1f4216ac..57cf5a6c279c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2726,16 +2726,14 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { - struct folio *folio = page_folio(page); - struct obj_cgroup *objcg; + struct obj_cgroup *objcg = page_objcg(page); unsigned int nr_pages = 1 << order; - if (!folio_memcg_kmem(folio)) + if (!objcg) return; - objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); - folio->memcg_data = 0; + page->memcg_data = 0; obj_cgroup_put(objcg); } From 835de37603ef6412949df0a607128f5fffed4576 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 14 Mar 2025 15:37:54 -0600 Subject: [PATCH 1568/2157] meminfo: add a per node counter for balloon drivers Patch series "track memory used by balloon drivers", v2. This series introduces a way to track memory used by balloon drivers. Add a NR_BALLOON_PAGES counter to track how many pages are reclaimed by the balloon drivers. First add the accounting, then updates the balloon drivers (virtio, Hyper-V, VMware, Pseries-cmm, and Xen) to maintain this counter. The virtio, Vmware, and pseries-cmm balloon drivers utilize the balloon_compaction interface to allocate and free balloon pages. Other balloon drivers will have to maintain this counter manually. This makes the information visible in memory reporting interfaces like /proc/meminfo, show_mem, and OOM reporting. This provides admins visibility into their VM balloon sizes without requiring different virtualization tooling. Furthermore, this information is helpful when debugging an OOM inside a VM. This patch (of 4): Add NR_BALLOON_PAGES counter to track memory used by balloon drivers and expose it through /proc/meminfo and other memory reporting interfaces. [npache@redhat.com: document Balloon Meminfo entry] Link: https://lkml.kernel.org/r/a0315ccf-f244-460e-8643-fd7388724fe5@redhat.com Link: https://lkml.kernel.org/r/20250314213757.244258-1-npache@redhat.com Link: https://lkml.kernel.org/r/20250314213757.244258-2-npache@redhat.com Signed-off-by: Nico Pache Cc: Alexander Atanasov Cc: Chengming Zhou Cc: David Hildenbrand Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Johannes Weiner Cc: Juegren Gross Cc: Kanchana P Sridhar Cc: K. Y. Srinivasan Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Muchun Song Cc: Nhat Pham Cc: Oleksandr Tyshchenko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Stefano Stabellini Cc: Wei Liu Cc: Michael Kelley Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 3 +++ fs/proc/meminfo.c | 2 ++ include/linux/mmzone.h | 1 + mm/show_mem.c | 4 +++- mm/vmstat.c | 1 + 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 3c37b248fc4f..80f33bce5fe1 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1081,6 +1081,7 @@ Example output. You may not have all of these fields. FilePmdMapped: 0 kB CmaTotal: 0 kB CmaFree: 0 kB + Balloon: 0 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 @@ -1255,6 +1256,8 @@ CmaTotal Memory reserved for the Contiguous Memory Allocator (CMA) CmaFree Free remaining memory in the CMA reserves +Balloon + Memory returned to Host by VM Balloon Drivers HugePages_Total, HugePages_Free, HugePages_Rsvd, HugePages_Surp, Hugepagesize, Hugetlb See Documentation/admin-guide/mm/hugetlbpage.rst. DirectMap4k, DirectMap2M, DirectMap1G diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8ba9b1472390..83be312159c9 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -162,6 +162,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Unaccepted: ", global_zone_page_state(NR_UNACCEPTED)); #endif + show_val_kb(m, "Balloon: ", + global_node_page_state(NR_BALLOON_PAGES)); hugetlb_report_meminfo(m); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 37c29f3fbca8..a9db0fbd2b94 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -224,6 +224,7 @@ enum node_stat_item { #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_BALLOON_PAGES, NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/show_mem.c b/mm/show_mem.c index 43afb56abbd3..6af13bcd2ab3 100644 --- a/mm/show_mem.c +++ b/mm/show_mem.c @@ -260,6 +260,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z " pagetables:%lukB" " sec_pagetables:%lukB" " all_unreclaimable? %s" + " Balloon:%lukB" "\n", pgdat->node_id, K(node_page_state(pgdat, NR_ACTIVE_ANON)), @@ -285,7 +286,8 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z #endif K(node_page_state(pgdat, NR_PAGETABLE)), K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), - str_yes_no(pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)); + str_yes_no(pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES), + K(node_page_state(pgdat, NR_BALLOON_PAGES))); } for_each_populated_zone(zone) { diff --git a/mm/vmstat.c b/mm/vmstat.c index ed49a86348f7..ae0e4259ac23 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1277,6 +1277,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_HUGETLB_PAGE "nr_hugetlb", #endif + "nr_balloon_pages", /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", From 4d689474e1b263aa14e93f3995cf0b6cd1910f74 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 14 Mar 2025 15:37:55 -0600 Subject: [PATCH 1569/2157] balloon_compaction: update the NR_BALLOON_PAGES state Update the NR_BALLOON_PAGES counter when pages are added or removed using the balloon compaction interface. The virtio, Vmware, and pseries-cmm balloon drivers utilize the balloon_compaction interface to allocate and free balloon pages. Other balloon drivers will have to maintain this counter manually. Link: https://lkml.kernel.org/r/20250314213757.244258-3-npache@redhat.com Signed-off-by: Nico Pache Cc: Alexander Atanasov Cc: Chengming Zhou Cc: David Hildenbrand Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Johannes Weiner Cc: Juegren Gross Cc: Kanchana P Sridhar Cc: K. Y. Srinivasan Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Muchun Song Cc: Nhat Pham Cc: Oleksandr Tyshchenko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Stefano Stabellini Cc: Wei Liu Cc: Michael Kelley Signed-off-by: Andrew Morton --- mm/balloon_compaction.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 6597ebea8ae2..d3e00731e262 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -24,6 +24,7 @@ static void balloon_page_enqueue_one(struct balloon_dev_info *b_dev_info, balloon_page_insert(b_dev_info, page); unlock_page(page); __count_vm_event(BALLOON_INFLATE); + inc_node_page_state(page, NR_BALLOON_PAGES); } /** @@ -103,6 +104,7 @@ size_t balloon_page_list_dequeue(struct balloon_dev_info *b_dev_info, __count_vm_event(BALLOON_DEFLATE); list_add(&page->lru, pages); unlock_page(page); + dec_node_page_state(page, NR_BALLOON_PAGES); n_pages++; } spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); From 02ec35963bc830f17c2aaa6f1008dcf7745c7c17 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 14 Mar 2025 15:37:56 -0600 Subject: [PATCH 1570/2157] hv_balloon: update the NR_BALLOON_PAGES state Update the NR_BALLOON_PAGES counter when pages are added to or removed from the Hyper-V balloon. Link: https://lkml.kernel.org/r/20250314213757.244258-4-npache@redhat.com Signed-off-by: Nico Pache Reviewed-by: Michael Kelley Cc: Alexander Atanasov Cc: Chengming Zhou Cc: David Hildenbrand Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Johannes Weiner Cc: Juegren Gross Cc: Kanchana P Sridhar Cc: K. Y. Srinivasan Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Muchun Song Cc: Nhat Pham Cc: Oleksandr Tyshchenko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Stefano Stabellini Cc: Wei Liu Signed-off-by: Andrew Morton --- drivers/hv/hv_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index fec2f18679e3..2b4080e51f97 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1192,6 +1192,7 @@ static void free_balloon_pages(struct hv_dynmem_device *dm, __ClearPageOffline(pg); __free_page(pg); dm->num_pages_ballooned--; + mod_node_page_state(page_pgdat(pg), NR_BALLOON_PAGES, -1); adjust_managed_page_count(pg, 1); } } @@ -1221,6 +1222,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, return i * alloc_unit; dm->num_pages_ballooned += alloc_unit; + mod_node_page_state(page_pgdat(pg), NR_BALLOON_PAGES, alloc_unit); /* * If we allocatted 2M pages; split them so we From f6a09e6800936c6c9ba5667ac3efc18feb8f3a2f Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 14 Mar 2025 15:37:57 -0600 Subject: [PATCH 1571/2157] xen: balloon: update the NR_BALLOON_PAGES state Update the NR_BALLOON_PAGES counter when pages are added to or removed from the Xen balloon. Link: https://lkml.kernel.org/r/20250314213757.244258-5-npache@redhat.com Signed-off-by: Nico Pache Reviewed-by: Juergen Gross Cc: Alexander Atanasov Cc: Chengming Zhou Cc: David Hildenbrand Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Johannes Weiner Cc: Juegren Gross Cc: Kanchana P Sridhar Cc: K. Y. Srinivasan Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Muchun Song Cc: Nhat Pham Cc: Oleksandr Tyshchenko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Stefano Stabellini Cc: Wei Liu Cc: Michael Kelley Signed-off-by: Andrew Morton --- drivers/xen/balloon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 163f7f1d70f1..65d4e7fa1eb8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -157,6 +157,8 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } + inc_node_page_state(page, NR_BALLOON_PAGES); + wake_up(&balloon_wq); } @@ -179,6 +181,8 @@ static struct page *balloon_retrieve(bool require_lowmem) balloon_stats.balloon_low--; __ClearPageOffline(page); + dec_node_page_state(page, NR_BALLOON_PAGES); + return page; } From 9f171d94be80d80067c6445e53b00d098150391e Mon Sep 17 00:00:00 2001 From: Jiwen Qi Date: Sat, 15 Mar 2025 21:13:17 +0000 Subject: [PATCH 1572/2157] docs/mm: Physical Memory: Populate the "Zones" section Briefly describe what zones are and the fields of struct zone. Link: https://lkml.kernel.org/r/20250315211317.27612-1-jiwen7.qi@gmail.com Signed-off-by: Jiwen Qi Acked-by: Mike Rapoport (Microsoft) Cc: Bagas Sanjaya Cc: Jonathan Corbet Cc: Randy Dunlap Signed-off-by: Andrew Morton --- Documentation/mm/physical_memory.rst | 266 ++++++++++++++++++++++++++- 1 file changed, 264 insertions(+), 2 deletions(-) diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst index 71fd4a6acf42..d3ac106e6b14 100644 --- a/Documentation/mm/physical_memory.rst +++ b/Documentation/mm/physical_memory.rst @@ -338,10 +338,272 @@ Statistics Zones ===== +As we have mentioned, each zone in memory is described by a ``struct zone`` +which is an element of the ``node_zones`` array of the node it belongs to. +``struct zone`` is the core data structure of the page allocator. A zone +represents a range of physical memory and may have holes. -.. admonition:: Stub +The page allocator uses the GFP flags, see :ref:`mm-api-gfp-flags`, specified by +a memory allocation to determine the highest zone in a node from which the +memory allocation can allocate memory. The page allocator first allocates memory +from that zone, if the page allocator can't allocate the requested amount of +memory from the zone, it will allocate memory from the next lower zone in the +node, the process continues up to and including the lowest zone. For example, if +a node contains ``ZONE_DMA32``, ``ZONE_NORMAL`` and ``ZONE_MOVABLE`` and the +highest zone of a memory allocation is ``ZONE_MOVABLE``, the order of the zones +from which the page allocator allocates memory is ``ZONE_MOVABLE`` > +``ZONE_NORMAL`` > ``ZONE_DMA32``. - This section is incomplete. Please list and describe the appropriate fields. +At runtime, free pages in a zone are in the Per-CPU Pagesets (PCP) or free areas +of the zone. The Per-CPU Pagesets are a vital mechanism in the kernel's memory +management system. By handling most frequent allocations and frees locally on +each CPU, the Per-CPU Pagesets improve performance and scalability, especially +on systems with many cores. The page allocator in the kernel employs a two-step +strategy for memory allocation, starting with the Per-CPU Pagesets before +falling back to the buddy allocator. Pages are transferred between the Per-CPU +Pagesets and the global free areas (managed by the buddy allocator) in batches. +This minimizes the overhead of frequent interactions with the global buddy +allocator. + +Architecture specific code calls free_area_init() to initializes zones. + +Zone structure +-------------- +The zones structure ``struct zone`` is defined in ``include/linux/mmzone.h``. +Here we briefly describe fields of this structure: + +General +~~~~~~~ + +``_watermark`` + The watermarks for this zone. When the amount of free pages in a zone is below + the min watermark, boosting is ignored, an allocation may trigger direct + reclaim and direct compaction, it is also used to throttle direct reclaim. + When the amount of free pages in a zone is below the low watermark, kswapd is + woken up. When the amount of free pages in a zone is above the high watermark, + kswapd stops reclaiming (a zone is balanced) when the + ``NUMA_BALANCING_MEMORY_TIERING`` bit of ``sysctl_numa_balancing_mode`` is not + set. The promo watermark is used for memory tiering and NUMA balancing. When + the amount of free pages in a zone is above the promo watermark, kswapd stops + reclaiming when the ``NUMA_BALANCING_MEMORY_TIERING`` bit of + ``sysctl_numa_balancing_mode`` is set. The watermarks are set by + ``__setup_per_zone_wmarks()``. The min watermark is calculated according to + ``vm.min_free_kbytes`` sysctl. The other three watermarks are set according + to the distance between two watermarks. The distance itself is calculated + taking ``vm.watermark_scale_factor`` sysctl into account. + +``watermark_boost`` + The number of pages which are used to boost watermarks to increase reclaim + pressure to reduce the likelihood of future fallbacks and wake kswapd now + as the node may be balanced overall and kswapd will not wake naturally. + +``nr_reserved_highatomic`` + The number of pages which are reserved for high-order atomic allocations. + +``nr_free_highatomic`` + The number of free pages in reserved highatomic pageblocks + +``lowmem_reserve`` + The array of the amounts of the memory reserved in this zone for memory + allocations. For example, if the highest zone a memory allocation can + allocate memory from is ``ZONE_MOVABLE``, the amount of memory reserved in + this zone for this allocation is ``lowmem_reserve[ZONE_MOVABLE]`` when + attempting to allocate memory from this zone. This is a mechanism the page + allocator uses to prevent allocations which could use ``highmem`` from using + too much ``lowmem``. For some specialised workloads on ``highmem`` machines, + it is dangerous for the kernel to allow process memory to be allocated from + the ``lowmem`` zone. This is because that memory could then be pinned via the + ``mlock()`` system call, or by unavailability of swapspace. + ``vm.lowmem_reserve_ratio`` sysctl determines how aggressive the kernel is in + defending these lower zones. This array is recalculated by + ``setup_per_zone_lowmem_reserve()`` at runtime if ``vm.lowmem_reserve_ratio`` + sysctl changes. + +``node`` + The index of the node this zone belongs to. Available only when + ``CONFIG_NUMA`` is enabled because there is only one zone in a UMA system. + +``zone_pgdat`` + Pointer to the ``struct pglist_data`` of the node this zone belongs to. + +``per_cpu_pageset`` + Pointer to the Per-CPU Pagesets (PCP) allocated and initialized by + ``setup_zone_pageset()``. By handling most frequent allocations and frees + locally on each CPU, PCP improves performance and scalability on systems with + many cores. + +``pageset_high_min`` + Copied to the ``high_min`` of the Per-CPU Pagesets for faster access. + +``pageset_high_max`` + Copied to the ``high_max`` of the Per-CPU Pagesets for faster access. + +``pageset_batch`` + Copied to the ``batch`` of the Per-CPU Pagesets for faster access. The + ``batch``, ``high_min`` and ``high_max`` of the Per-CPU Pagesets are used to + calculate the number of elements the Per-CPU Pagesets obtain from the buddy + allocator under a single hold of the lock for efficiency. They are also used + to decide if the Per-CPU Pagesets return pages to the buddy allocator in page + free process. + +``pageblock_flags`` + The pointer to the flags for the pageblocks in the zone (see + ``include/linux/pageblock-flags.h`` for flags list). The memory is allocated + in ``setup_usemap()``. Each pageblock occupies ``NR_PAGEBLOCK_BITS`` bits. + Defined only when ``CONFIG_FLATMEM`` is enabled. The flags is stored in + ``mem_section`` when ``CONFIG_SPARSEMEM`` is enabled. + +``zone_start_pfn`` + The start pfn of the zone. It is initialized by + ``calculate_node_totalpages()``. + +``managed_pages`` + The present pages managed by the buddy system, which is calculated as: + ``managed_pages`` = ``present_pages`` - ``reserved_pages``, ``reserved_pages`` + includes pages allocated by the memblock allocator. It should be used by page + allocator and vm scanner to calculate all kinds of watermarks and thresholds. + It is accessed using ``atomic_long_xxx()`` functions. It is initialized in + ``free_area_init_core()`` and then is reinitialized when memblock allocator + frees pages into buddy system. + +``spanned_pages`` + The total pages spanned by the zone, including holes, which is calculated as: + ``spanned_pages`` = ``zone_end_pfn`` - ``zone_start_pfn``. It is initialized + by ``calculate_node_totalpages()``. + +``present_pages`` + The physical pages existing within the zone, which is calculated as: + ``present_pages`` = ``spanned_pages`` - ``absent_pages`` (pages in holes). It + may be used by memory hotplug or memory power management logic to figure out + unmanaged pages by checking (``present_pages`` - ``managed_pages``). Write + access to ``present_pages`` at runtime should be protected by + ``mem_hotplug_begin/done()``. Any reader who can't tolerant drift of + ``present_pages`` should use ``get_online_mems()`` to get a stable value. It + is initialized by ``calculate_node_totalpages()``. + +``present_early_pages`` + The present pages existing within the zone located on memory available since + early boot, excluding hotplugged memory. Defined only when + ``CONFIG_MEMORY_HOTPLUG`` is enabled and initialized by + ``calculate_node_totalpages()``. + +``cma_pages`` + The pages reserved for CMA use. These pages behave like ``ZONE_MOVABLE`` when + they are not used for CMA. Defined only when ``CONFIG_CMA`` is enabled. + +``name`` + The name of the zone. It is a pointer to the corresponding element of + the ``zone_names`` array. + +``nr_isolate_pageblock`` + Number of isolated pageblocks. It is used to solve incorrect freepage counting + problem due to racy retrieving migratetype of pageblock. Protected by + ``zone->lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled. + +``span_seqlock`` + The seqlock to protect ``zone_start_pfn`` and ``spanned_pages``. It is a + seqlock because it has to be read outside of ``zone->lock``, and it is done in + the main allocator path. However, the seqlock is written quite infrequently. + Defined only when ``CONFIG_MEMORY_HOTPLUG`` is enabled. + +``initialized`` + The flag indicating if the zone is initialized. Set by + ``init_currently_empty_zone()`` during boot. + +``free_area`` + The array of free areas, where each element corresponds to a specific order + which is a power of two. The buddy allocator uses this structure to manage + free memory efficiently. When allocating, it tries to find the smallest + sufficient block, if the smallest sufficient block is larger than the + requested size, it will be recursively split into the next smaller blocks + until the required size is reached. When a page is freed, it may be merged + with its buddy to form a larger block. It is initialized by + ``zone_init_free_lists()``. + +``unaccepted_pages`` + The list of pages to be accepted. All pages on the list are ``MAX_PAGE_ORDER``. + Defined only when ``CONFIG_UNACCEPTED_MEMORY`` is enabled. + +``flags`` + The zone flags. The least three bits are used and defined by + ``enum zone_flags``. ``ZONE_BOOSTED_WATERMARK`` (bit 0): zone recently boosted + watermarks. Cleared when kswapd is woken. ``ZONE_RECLAIM_ACTIVE`` (bit 1): + kswapd may be scanning the zone. ``ZONE_BELOW_HIGH`` (bit 2): zone is below + high watermark. + +``lock`` + The main lock that protects the internal data structures of the page allocator + specific to the zone, especially protects ``free_area``. + +``percpu_drift_mark`` + When free pages are below this point, additional steps are taken when reading + the number of free pages to avoid per-cpu counter drift allowing watermarks + to be breached. It is updated in ``refresh_zone_stat_thresholds()``. + +Compaction control +~~~~~~~~~~~~~~~~~~ + +``compact_cached_free_pfn`` + The PFN where compaction free scanner should start in the next scan. + +``compact_cached_migrate_pfn`` + The PFNs where compaction migration scanner should start in the next scan. + This array has two elements: the first one is used in ``MIGRATE_ASYNC`` mode, + and the other one is used in ``MIGRATE_SYNC`` mode. + +``compact_init_migrate_pfn`` + The initial migration PFN which is initialized to 0 at boot time, and to the + first pageblock with migratable pages in the zone after a full compaction + finishes. It is used to check if a scan is a whole zone scan or not. + +``compact_init_free_pfn`` + The initial free PFN which is initialized to 0 at boot time and to the last + pageblock with free ``MIGRATE_MOVABLE`` pages in the zone. It is used to check + if it is the start of a scan. + +``compact_considered`` + The number of compactions attempted since last failure. It is reset in + ``defer_compaction()`` when a compaction fails to result in a page allocation + success. It is increased by 1 in ``compaction_deferred()`` when a compaction + should be skipped. ``compaction_deferred()`` is called before + ``compact_zone()`` is called, ``compaction_defer_reset()`` is called when + ``compact_zone()`` returns ``COMPACT_SUCCESS``, ``defer_compaction()`` is + called when ``compact_zone()`` returns ``COMPACT_PARTIAL_SKIPPED`` or + ``COMPACT_COMPLETE``. + +``compact_defer_shift`` + The number of compactions skipped before trying again is + ``1< Date: Mon, 17 Mar 2025 17:36:14 +0100 Subject: [PATCH 1573/2157] fork: use __vmalloc_node() for stack allocation Replace __vmalloc_node_range() by __vmalloc_node(). The last variant requires less parameters and it uses exactly the same arguments which are partly now hidden inside __vmalloc_node(). This change does not change any functionality. It makes the code a bit simpler. Link: https://lkml.kernel.org/r/20250317163614.166502-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Michal Hocko Cc: Christian Brauner Cc: Oleg Nesterov Cc: Tejun Heo Signed-off-by: Andrew Morton --- kernel/fork.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index f9cf0f056eb6..83cb82643817 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -311,11 +311,9 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * so memcg accounting is performed manually on assigning/releasing * stacks to tasks. Drop __GFP_ACCOUNT. */ - stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, + stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP & ~__GFP_ACCOUNT, - PAGE_KERNEL, - 0, node, __builtin_return_address(0)); + node, __builtin_return_address(0)); if (!stack) return -ENOMEM; From d8a866c766ebe34b4371d42f4a3edca200f5e645 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Mon, 17 Mar 2025 10:20:34 +0000 Subject: [PATCH 1574/2157] selftests/mm: add commentary about 9pfs bugs As discussed here: https://lore.kernel.org/lkml/Z9RRkL1hom48z3Tt@google.com/ This code could benefit from some more commentary. To avoid needing to comment the same thing in multiple places (I guess more of these SKIPs will need to be added over time, for now I am only like 20% of the way through Project Run run_vmtests.sh Successfully), add a dummy "skip tests for this specific reason" function that basically just serves as a hook to hang comments on. Link: https://lkml.kernel.org/r/20250317-9pfs-comments-v1-1-9ac96043e146@google.com Signed-off-by: Brendan Jackman Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 6 +----- tools/testing/selftests/mm/map_populate.c | 8 +++----- tools/testing/selftests/mm/vm_util.h | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 03271442aae5..21595b20bbc3 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -97,11 +97,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) if (ftruncate(fd, size)) { if (errno == ENOENT) { - /* - * This can happen if the file has been unlinked and the - * filesystem doesn't support truncating unlinked files. - */ - ksft_test_result_skip("ftruncate() failed with ENOENT\n"); + skip_test_dodgy_fs("ftruncate()"); } else { ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); } diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 433e54fb634f..9df2636c829b 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -18,6 +18,8 @@ #include #include "../kselftest.h" +#include "vm_util.h" + #define MMAP_SZ 4096 #define BUG_ON(condition, description) \ @@ -88,11 +90,7 @@ int main(int argc, char **argv) ret = ftruncate(fileno(ftmp), MMAP_SZ); if (ret < 0 && errno == ENOENT) { - /* - * This probably means tmpfile() made a file on a filesystem - * that doesn't handle temporary files the way we want. - */ - ksft_exit_skip("ftruncate(fileno(tmpfile())) gave ENOENT, weird filesystem?\n"); + skip_test_dodgy_fs("ftruncate()"); } BUG_ON(ret, "ftruncate()"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 0e629586556b..6effafdc4d8a 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -5,6 +5,7 @@ #include #include /* ffsl() */ #include /* _SC_PAGESIZE */ +#include "../kselftest.h" #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) @@ -32,6 +33,23 @@ static inline unsigned int pshift(void) return __page_shift; } +/* + * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with + * ENOENT on unlinked files. See + * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such + * bugs. There are rumours of NFS implementations with similar bugs. + * + * Ideally, tests should just detect filesystems known to have such issues and + * bail early. But 9pfs has the additional "feature" that it causes fstatfs to + * pass through the f_type field from the host filesystem. To avoid having to + * scrape /proc/mounts or some other hackery, tests can call this function when + * it seems such a bug might have been encountered. + */ +static inline void skip_test_dodgy_fs(const char *op_name) +{ + ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name); +} + uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); From 0bfd4586855cf6919d025b5914be212fd4cd27b5 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Mon, 17 Mar 2025 17:04:03 -0600 Subject: [PATCH 1575/2157] MM documentation: add "Unaccepted" meminfo entry Commit dcdfdd40fa82 ("mm: Add support for unaccepted memory") added a entry to meminfo but did not document it in the proc.rst file. This counter tracks the amount of "Unaccepted" guest memory for some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP. Add the missing entry in the documentation. Link: https://lkml.kernel.org/r/20250317230403.79632-1-npache@redhat.com Signed-off-by: Nico Pache Acked-by: Kirill A. Shutemov Acked-by: David Hildenbrand Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: Jeff Xu Cc: Jonathan Corbet Cc: Pasha Tatashin Cc: Suren Baghdasaryan Cc: xu xin Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 80f33bce5fe1..f97692b31a2d 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1081,6 +1081,7 @@ Example output. You may not have all of these fields. FilePmdMapped: 0 kB CmaTotal: 0 kB CmaFree: 0 kB + Unaccepted: 0 kB Balloon: 0 kB HugePages_Total: 0 HugePages_Free: 0 @@ -1256,6 +1257,8 @@ CmaTotal Memory reserved for the Contiguous Memory Allocator (CMA) CmaFree Free remaining memory in the CMA reserves +Unaccepted + Memory that has not been accepted by the guest Balloon Memory returned to Host by VM Balloon Drivers HugePages_Total, HugePages_Free, HugePages_Rsvd, HugePages_Surp, Hugepagesize, Hugetlb From 98c183a4fccf3b855fa64005a8b6d892570dfd66 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 18 Mar 2025 18:33:01 -0700 Subject: [PATCH 1576/2157] fs/dax: don't disassociate zero page entries Prior to commit 38607c62b34b ("fs/dax: properly refcount fs dax pages") dax_associate_entry() and dax_disassociate_entry() would implicitly skip zero and empty dax entries using the for_each_mapped_pfn() macro. The use of compound ZONE_DEVICE folios removed the need for this macro and so it was removed, leading dax_folio_put() to be called on zero pages. This lead to the below warning. To fix this explicitly skip zero and empty entries in dax_associate/disassociate_entry(). [ 27.536963] ------------[ cut here ]------------ [ 27.537674] WARNING: CPU: 11 PID: 874 at fs/dax.c:415 dax_folio_put.isra.0+0x10d/0x170 [ 27.538844] Modules linked in: nd_pmem nd_btt nd_e820 libnvdimm [ 27.539732] CPU: 11 UID: 0 PID: 874 Comm: ctl_prefault Tainted: G W 6.14.0-rc2+ #1104 [ 27.541093] Tainted: [W]=WARN [ 27.541549] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/204 [ 27.543197] RIP: 0010:dax_folio_put.isra.0+0x10d/0x170 [ 27.543970] Code: 20 48 85 c0 0f 84 29 ff ff ff 48 83 e8 01 48 89 47 20 0f 84 1b ff ff ff 48 83 c4 10 5b 5d 41 5c c3 cc cc4 [ 27.546723] RSP: 0000:ffff961e4102fae0 EFLAGS: 00010002 [ 27.547505] RAX: 0000000000000001 RBX: ffffc9cce4e18000 RCX: 0000000000000009 [ 27.548564] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8a2a7badca40 [ 27.549630] RBP: ffffc9cce4e18000 R08: 0000000000009ffb R09: 00000000ffffdfff [ 27.550691] R10: 00000000ffffdfff R11: ffffffffa4e823a0 R12: 0000000000000000 [ 27.551748] R13: 0000000000000000 R14: 0000000010f10005 R15: 0000000000000004 [ 27.552819] FS: 00007f5f539d74c0(0000) GS:ffff8a2a7bac0000(0000) knlGS:0000000000000000 [ 27.554015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.554873] CR2: 00007f5f52e00000 CR3: 0000000909340000 CR4: 00000000000006f0 [ 27.555938] Call Trace: [ 27.556318] [ 27.556650] ? __warn+0x91/0x190 [ 27.557146] ? dax_folio_put.isra.0+0x10d/0x170 [ 27.557824] ? report_bug+0x164/0x190 [ 27.558378] ? handle_bug+0x54/0x90 [ 27.558898] ? exc_invalid_op+0x17/0x70 [ 27.559489] ? asm_exc_invalid_op+0x1a/0x20 [ 27.560125] ? dax_folio_put.isra.0+0x10d/0x170 [ 27.560808] dax_insert_entry+0x1e1/0x420 [ 27.561419] dax_fault_iter+0x252/0x860 [ 27.561995] dax_iomap_pmd_fault+0x23c/0x4a0 [ 27.562651] ext4_dax_huge_fault+0x1e2/0x450 [ 27.563296] __handle_mm_fault+0x6c8/0x12b0 [ 27.563920] ? do_user_addr_fault+0x1ca/0x670 [ 27.564577] ? lock_vma_under_rcu+0x178/0x3b0 [ 27.565235] handle_mm_fault+0xe5/0x290 [ 27.565816] do_user_addr_fault+0x208/0x670 [ 27.566446] exc_page_fault+0x6d/0x230 [ 27.567008] asm_exc_page_fault+0x26/0x30 [ 27.567610] RIP: 0033:0x7f5f543bcb4f [ 27.568152] Code: 45 f0 48 8b 45 f0 48 8b 4d f8 48 03 41 18 48 89 45 e8 48 8b 45 f0 48 3b 45 e8 0f 83 97 00 00 00 48 8b 458 [ 27.570895] RSP: 002b:00007ffc2d774460 EFLAGS: 00010287 [ 27.571672] RAX: 00007f5f52e00000 RBX: 0000000000200000 RCX: 000055760153fc00 [ 27.572731] RDX: 0000000000000000 RSI: 0000557601542a20 RDI: 000055760153fc00 [ 27.573787] RBP: 00007ffc2d774460 R08: 0000000000000000 R09: 0000000000000073 [ 27.574840] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffc2d77534b [ 27.575897] R13: 00007ffc2d774aa0 R14: 0000000000800000 R15: 0000000000800000 [ 27.576961] [ 27.577301] irq event stamp: 13394 [ 27.577810] hardirqs last enabled at (13393): [] flush_tlb_mm_range+0x1c0/0x220 [ 27.579138] hardirqs last disabled at (13394): [] _raw_spin_lock_irq+0x47/0x50 [ 27.580428] softirqs last enabled at (12530): [] xs_tcp_send_request+0x22a/0x2e0 [ 27.581762] softirqs last disabled at (12528): [] release_sock+0x1d/0xb0 [ 27.582986] ---[ end trace 0000000000000000 ]--- Link: https://lkml.kernel.org/r/20250319013301.369822-1-apopple@nvidia.com Signed-off-by: Alistair Popple Fixes: 38607c62b34b ("fs/dax: properly refcount fs dax pages") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202503102229.122fbd6c-lkp@intel.com Cc: Dan Williams Cc: Alison Schofield Cc: David Hildenbrand Cc: Balbir Singh Cc: "Darrick J. Wong" Cc: Dave Hansen Cc: Jason Gunthorpe Cc: John Hubbard Signed-off-by: Andrew Morton --- fs/dax.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index cf96f3dd4e5f..464c2badc135 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -445,6 +445,9 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, unsigned long size = dax_entry_size(entry), index; struct folio *folio = dax_to_folio(entry); + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return; + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; @@ -473,6 +476,9 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return; + dax_folio_put(folio); } @@ -1074,8 +1080,7 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, void *old; dax_disassociate_entry(entry, mapping, false); - if (!(flags & DAX_ZERO_PAGE)) - dax_associate_entry(new_entry, mapping, vmf->vma, + dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, shared); /* From 3b23a44f1f196741596616082e759f7f3a400e78 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Tue, 18 Mar 2025 11:30:28 -0700 Subject: [PATCH 1577/2157] mm/damon: implement a new DAMOS filter type for active pages Patch series "mm/damon: introduce DAMOS filter type for active pages". The memory reclaim algorithm categorizes pages into active and inactive lists, separately for file and anon pages. The system's performance relies heavily on the (relative and absolute) accuracy of this categorization. This patch series add a new DAMOS filter for pages' activeness, giving us visibility into the access frequency of the pages on each list. This insight can help us diagnose issues with the active-inactive balancing dynamics, and make decisions to optimize reclaim efficiency and memory utilization. For instance, we might decide to enable DAMON_LRU_SORT, if we find that there are pages on the active list that are infrequently accessed, or less frequently accessed than pages on the inactive list. This patch (of 2): Implement a DAMOS filter type for active pages on DAMON kernel API, and add support of it from the physical address space DAMON operations set (paddr). Link: https://lkml.kernel.org/r/20250318183029.2062917-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20250318183029.2062917-2-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: SeongJae Park Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ mm/damon/paddr.c | 3 +++ mm/damon/sysfs-schemes.c | 1 + 3 files changed, 6 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 3db4f77261f5..47e36e6ea203 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -334,6 +334,7 @@ struct damos_stat { /** * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. + * @DAMOS_FILTER_TYPE_ACTIVE: Active pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. @@ -355,6 +356,7 @@ struct damos_stat { */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, + DAMOS_FILTER_TYPE_ACTIVE, DAMOS_FILTER_TYPE_MEMCG, DAMOS_FILTER_TYPE_YOUNG, DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index b08847ef9b81..1b70d3f36046 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -217,6 +217,9 @@ static bool damos_pa_filter_match(struct damos_filter *filter, case DAMOS_FILTER_TYPE_ANON: matched = folio_test_anon(folio); break; + case DAMOS_FILTER_TYPE_ACTIVE: + matched = folio_test_active(folio); + break; case DAMOS_FILTER_TYPE_MEMCG: rcu_read_lock(); memcg = folio_memcg_check(folio); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 5023f2b690d6..23b562df0839 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -344,6 +344,7 @@ static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc( /* Should match with enum damos_filter_type */ static const char * const damon_sysfs_scheme_filter_type_strs[] = { "anon", + "active", "memcg", "young", "hugepage_size", From af96c610c6fdcd3a765f8a158293fe208a205ac2 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Tue, 18 Mar 2025 11:30:29 -0700 Subject: [PATCH 1578/2157] docs/mm/damon/design: document active DAMOS filter type Document availability and meaning of "active" DAMOS filter type on design document. Since introduction of the type requires no additional user ABI, usage and ABI document need no update. Link: https://lkml.kernel.org/r/20250318183029.2062917-3-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: SeongJae Park Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- Documentation/mm/damon/design.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index aae3a691ee69..f12d33749329 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -656,6 +656,8 @@ Below ``type`` of filters are currently supported. - Operations layer handled, supported by only ``paddr`` operations set. - anon - Applied to pages that containing data that not stored in files. + - active + - Applied to active pages. - memcg - Applied to pages that belonging to a given cgroup. - young From 735b3f7e773bd09d459537562754debd1f8e816b Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 18 Mar 2025 17:43:40 +0000 Subject: [PATCH 1579/2157] selftests/mm: uffd-unit-tests support for hugepages > 2M uffd-unit-tests uses a memory area with a fixed 32M size. Then it calculates the number of pages by dividing by page_size, which itself is either the base page size or the PMD huge page size depending on the test config. For the latter, we end up with nr_pages=1 for arm64 16K base pages, and nr_pages=0 for 64K base pages. This doesn't end well. So let's make the 32M size a floor and also ensure that we have at least 2 pages given the PMD size. With this change, the tests pass on arm64 64K base page size configuration. Link: https://lkml.kernel.org/r/20250318174343.243631-2-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: Peter Xu Acked-by: Rafael Aquini Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-unit-tests.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 24ea82ee2231..e8fd9011c2a3 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -26,6 +26,8 @@ #define ALIGN_UP(x, align_to) \ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + struct mem_type { const char *name; unsigned int mem_flag; @@ -196,7 +198,8 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, else page_size = psize(); - nr_pages = UFFD_TEST_MEM_SIZE / page_size; + /* Ensure we have at least 2 pages */ + nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; /* TODO: remove this global var.. it's so ugly */ nr_parallel = 1; From a2c6f9c3cafac02a48db83714f4b62fee2508bc3 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 18 Mar 2025 17:43:41 +0000 Subject: [PATCH 1580/2157] selftests/mm: speed up split_huge_page_test create_pagecache_thp_and_fd() was previously writing a file sized at twice the PMD size by making a per-byte write syscall. This was quite slow when the PMD size is 4M, but completely intolerable for 32M (PMD size for arm64's 16K page size), and 512M (PMD size for arm64's 64K page size). The byte pattern has a 256 byte period, so let's create a 1K buffer and fill it with exactly 4 periods. Then we can write the buffer as many times as is required to fill the file. This makes things much more tolerable. The test now passes for 16K page size. It still fails for 64K page size because MAX_PAGECACHE_ORDER is too small for 512M folio size (I think). Link: https://lkml.kernel.org/r/20250318174343.243631-3-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Acked-by: Peter Xu Acked-by: Rafael Aquini Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/split_huge_page_test.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 719c5e2a6624..aa7400ed0e99 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -5,6 +5,7 @@ */ #define _GNU_SOURCE +#include #include #include #include @@ -398,6 +399,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, { size_t i; int dummy = 0; + unsigned char buf[1024]; srand(time(NULL)); @@ -405,11 +407,12 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, if (*fd == -1) ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); - for (i = 0; i < fd_size; i++) { - unsigned char byte = (unsigned char)i; + assert(fd_size % sizeof(buf) == 0); + for (i = 0; i < sizeof(buf); i++) + buf[i] = (unsigned char)i; + for (i = 0; i < fd_size; i += sizeof(buf)) + write(*fd, buf, sizeof(buf)); - write(*fd, &byte, sizeof(byte)); - } close(*fd); sync(); *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); From e452872b40e3f1fb92adf0d573a0a6a7c9f6ce22 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Tue, 18 Mar 2025 15:58:32 +0800 Subject: [PATCH 1581/2157] mm: vmscan: split proactive reclaim statistics from direct reclaim statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Adding Proactive Memory Reclaim Statistics". These two patches are related to proactive memory reclaim. Patch 1 Split proactive reclaim statistics from direct reclaim counters and introduces new counters: pgsteal_proactive, pgdemote_proactive, and pgscan_proactive. Patch 2 Adds pswpin and pswpout items to the cgroup-v2 documentation. This patch (of 2): In proactive memory reclaim scenarios, it is necessary to accurately track proactive reclaim statistics to dynamically adjust the frequency and amount of memory being reclaimed proactively. Currently, proactive reclaim is included in direct reclaim statistics, which can make these direct reclaim statistics misleading. Therefore, separate proactive reclaim memory from the direct reclaim counters by introducing new counters: pgsteal_proactive, pgdemote_proactive, and pgscan_proactive, to avoid confusion with direct reclaim. Link: https://lkml.kernel.org/r/20250318075833.90615-1-jiahao.kernel@gmail.com Link: https://lkml.kernel.org/r/20250318075833.90615-2-jiahao.kernel@gmail.com Signed-off-by: Hao Jia Acked-by: Johannes Weiner Cc: Jonathan Corbet Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 9 +++++++ include/linux/mmzone.h | 1 + include/linux/vm_event_item.h | 2 ++ mm/memcontrol.c | 7 +++++ mm/vmscan.c | 35 ++++++++++++++----------- mm/vmstat.c | 3 +++ 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index f8a894a16307..d7624e500610 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1576,6 +1576,9 @@ The following nested keys are defined. pgscan_khugepaged (npn) Amount of scanned pages by khugepaged (in an inactive LRU list) + pgscan_proactive (npn) + Amount of scanned pages proactively (in an inactive LRU list) + pgsteal_kswapd (npn) Amount of reclaimed pages by kswapd @@ -1585,6 +1588,9 @@ The following nested keys are defined. pgsteal_khugepaged (npn) Amount of reclaimed pages by khugepaged + pgsteal_proactive (npn) + Amount of reclaimed pages proactively + pgfault (npn) Total number of page faults incurred @@ -1662,6 +1668,9 @@ The following nested keys are defined. pgdemote_khugepaged Number of pages demoted by khugepaged. + pgdemote_proactive + Number of pages demoted by proactively. + hugetlb Amount of memory used by hugetlb pages. This metric only shows up if hugetlb usage is accounted for in memory.current (i.e. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a9db0fbd2b94..f7fe1126dc75 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -221,6 +221,7 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095c..f11b6fa9c5b3 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, PGSCAN_DIRECT_THROTTLE, PGSCAN_ANON, PGSCAN_FILE, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 57cf5a6c279c..40c07b8699ae 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -315,6 +315,7 @@ static const unsigned int memcg_node_stat_items[] = { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif @@ -431,9 +432,11 @@ static const unsigned int memcg_vm_event_stat[] = { PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, PGFAULT, PGMAJFAULT, PGREFILL, @@ -1394,6 +1397,7 @@ static const struct memory_stat memory_stats[] = { { "pgdemote_kswapd", PGDEMOTE_KSWAPD }, { "pgdemote_direct", PGDEMOTE_DIRECT }, { "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED }, + { "pgdemote_proactive", PGDEMOTE_PROACTIVE }, #ifdef CONFIG_NUMA_BALANCING { "pgpromote_success", PGPROMOTE_SUCCESS }, #endif @@ -1436,6 +1440,7 @@ static int memcg_page_state_output_unit(int item) case PGDEMOTE_KSWAPD: case PGDEMOTE_DIRECT: case PGDEMOTE_KHUGEPAGED: + case PGDEMOTE_PROACTIVE: #ifdef CONFIG_NUMA_BALANCING case PGPROMOTE_SUCCESS: #endif @@ -1509,10 +1514,12 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) seq_buf_printf(s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT) + + memcg_events(memcg, PGSCAN_PROACTIVE) + memcg_events(memcg, PGSCAN_KHUGEPAGED)); seq_buf_printf(s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT) + + memcg_events(memcg, PGSTEAL_PROACTIVE) + memcg_events(memcg, PGSTEAL_KHUGEPAGED)); for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { diff --git a/mm/vmscan.c b/mm/vmscan.c index b5c7dfc2b189..98e6ac82e428 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -456,21 +456,26 @@ void drop_slab(void) } while ((freed >> shift++) > 1); } -static int reclaimer_offset(void) +#define CHECK_RECLAIMER_OFFSET(type) \ + do { \ + BUILD_BUG_ON(PGSTEAL_##type - PGSTEAL_KSWAPD != \ + PGDEMOTE_##type - PGDEMOTE_KSWAPD); \ + BUILD_BUG_ON(PGSTEAL_##type - PGSTEAL_KSWAPD != \ + PGSCAN_##type - PGSCAN_KSWAPD); \ + } while (0) + +static int reclaimer_offset(struct scan_control *sc) { - BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != - PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD); - BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != - PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD); - BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != - PGSCAN_DIRECT - PGSCAN_KSWAPD); - BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != - PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD); + CHECK_RECLAIMER_OFFSET(DIRECT); + CHECK_RECLAIMER_OFFSET(KHUGEPAGED); + CHECK_RECLAIMER_OFFSET(PROACTIVE); if (current_is_kswapd()) return 0; if (current_is_khugepaged()) return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD; + if (sc->proactive) + return PGSTEAL_PROACTIVE - PGSTEAL_KSWAPD; return PGSTEAL_DIRECT - PGSTEAL_KSWAPD; } @@ -2008,7 +2013,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); - item = PGSCAN_KSWAPD + reclaimer_offset(); + item = PGSCAN_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_scanned); __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); @@ -2024,10 +2029,10 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, spin_lock_irq(&lruvec->lru_lock); move_folios_to_lru(lruvec, &folio_list); - __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(), + __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc), stat.nr_demoted); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); - item = PGSTEAL_KSWAPD + reclaimer_offset(); + item = PGSTEAL_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); @@ -4571,7 +4576,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, break; } - item = PGSCAN_KSWAPD + reclaimer_offset(); + item = PGSCAN_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) { __count_vm_events(item, isolated); __count_vm_events(PGREFILL, sorted); @@ -4721,10 +4726,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap reset_batch_size(walk); } - __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(), + __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(sc), stat.nr_demoted); - item = PGSTEAL_KSWAPD + reclaimer_offset(); + item = PGSTEAL_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, reclaimed); __count_memcg_events(memcg, item, reclaimed); diff --git a/mm/vmstat.c b/mm/vmstat.c index ae0e4259ac23..ab5c840941f3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1274,6 +1274,7 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", + "pgdemote_proactive", #ifdef CONFIG_HUGETLB_PAGE "nr_hugetlb", #endif @@ -1309,9 +1310,11 @@ const char * const vmstat_text[] = { "pgsteal_kswapd", "pgsteal_direct", "pgsteal_khugepaged", + "pgsteal_proactive", "pgscan_kswapd", "pgscan_direct", "pgscan_khugepaged", + "pgscan_proactive", "pgscan_direct_throttle", "pgscan_anon", "pgscan_file", From 4c8bc7c4e3fbbdf07a879429c34a78f31d9894d4 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Tue, 18 Mar 2025 15:58:33 +0800 Subject: [PATCH 1582/2157] cgroup: docs: add pswpin and pswpout items in cgroup v2 doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 15ff4d409e1a ("mm/memcontrol: add per-memcg pgpgin/pswpin counter") introduced the pswpin and pswpout items in the memory.stat of cgroup v2. Therefore, update them accordingly in the cgroup-v2 documentation. Link: https://lkml.kernel.org/r/20250318075833.90615-3-jiahao.kernel@gmail.com Fixes: 15ff4d409e1a ("mm/memcontrol: add per-memcg pgpgin/pswpin counter") Signed-off-by: Hao Jia Acked-by: Tejun Heo Acked-by: Johannes Weiner Cc: Jonathan Corbet Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index d7624e500610..ba11a4d7321b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1561,6 +1561,12 @@ The following nested keys are defined. workingset_nodereclaim Number of times a shadow node has been reclaimed + pswpin (npn) + Number of pages swapped into memory + + pswpout (npn) + Number of pages swapped out of memory + pgscan (npn) Amount of scanned pages (in an inactive LRU list) From 5f5ee52d4f58605330b09851273d6e56aaadd29e Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Tue, 18 Mar 2025 16:39:38 +0800 Subject: [PATCH 1583/2157] mm/hwpoison: introduce folio_contain_hwpoisoned_page() helper Patch series "mm/vmscan: don't try to reclaim hwpoison folio". Fix a bug during memory reclaim if folio is hwpoisoned. This patch (of 2): Introduce helper folio_contain_hwpoisoned_page() to check if the entire folio is hwpoisoned or it contains hwpoisoned pages. Link: https://lkml.kernel.org/r/20250318083939.987651-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250318083939.987651-2-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: Miaohe Lin Cc: David Hildenbrand Cc: Kefeng Wang Cc: Nanyong Sun Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 ++++++ mm/memory_hotplug.c | 3 +-- mm/shmem.c | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f26ce54c7aa4..68f4b188fc6b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1098,6 +1098,12 @@ static inline bool is_page_hwpoison(const struct page *page) return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } +static inline bool folio_contain_hwpoisoned_page(struct folio *folio) +{ + return folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); +} + bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 16cf9e17077e..75401866fb76 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1828,8 +1828,7 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (unlikely(page_folio(page) != folio)) goto put_folio; - if (folio_test_hwpoison(folio) || - (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { + if (folio_contain_hwpoisoned_page(folio)) { if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); if (folio_mapped(folio)) { diff --git a/mm/shmem.c b/mm/shmem.c index 7b738d8d6581..17f27d92c664 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3290,8 +3290,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, if (ret) return ret; - if (folio_test_hwpoison(folio) || - (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { + if (folio_contain_hwpoisoned_page(folio)) { folio_unlock(folio); folio_put(folio); return -EIO; From 1b0449544c6482179ac84530b61fc192a6527bfd Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Tue, 18 Mar 2025 16:39:39 +0800 Subject: [PATCH 1584/2157] mm/vmscan: don't try to reclaim hwpoison folio Syzkaller reports a bug as follows: Injecting memory failure for pfn 0x18b00e at process virtual address 0x20ffd000 Memory failure: 0x18b00e: dirty swapcache page still referenced by 2 users Memory failure: 0x18b00e: recovery action for dirty swapcache page: Failed page: refcount:2 mapcount:0 mapping:0000000000000000 index:0x20ffd pfn:0x18b00e memcg:ffff0000dd6d9000 anon flags: 0x5ffffe00482011(locked|dirty|arch_1|swapbacked|hwpoison|node=0|zone=2|lastcpupid=0xfffff) raw: 005ffffe00482011 dead000000000100 dead000000000122 ffff0000e232a7c9 raw: 0000000000020ffd 0000000000000000 00000002ffffffff ffff0000dd6d9000 page dumped because: VM_BUG_ON_FOLIO(!folio_test_uptodate(folio)) ------------[ cut here ]------------ kernel BUG at mm/swap_state.c:184! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP Modules linked in: CPU: 0 PID: 60 Comm: kswapd0 Not tainted 6.6.0-gcb097e7de84e #3 Hardware name: linux,dummy-virt (DT) pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : add_to_swap+0xbc/0x158 lr : add_to_swap+0xbc/0x158 sp : ffff800087f37340 x29: ffff800087f37340 x28: fffffc00052c0380 x27: ffff800087f37780 x26: ffff800087f37490 x25: ffff800087f37c78 x24: ffff800087f377a0 x23: ffff800087f37c50 x22: 0000000000000000 x21: fffffc00052c03b4 x20: 0000000000000000 x19: fffffc00052c0380 x18: 0000000000000000 x17: 296f696c6f662865 x16: 7461646f7470755f x15: 747365745f6f696c x14: 6f6621284f494c4f x13: 0000000000000001 x12: ffff600036d8b97b x11: 1fffe00036d8b97a x10: ffff600036d8b97a x9 : dfff800000000000 x8 : 00009fffc9274686 x7 : ffff0001b6c5cbd3 x6 : 0000000000000001 x5 : ffff0000c25896c0 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : ffff0000c25896c0 x0 : 0000000000000000 Call trace: add_to_swap+0xbc/0x158 shrink_folio_list+0x12ac/0x2648 shrink_inactive_list+0x318/0x948 shrink_lruvec+0x450/0x720 shrink_node_memcgs+0x280/0x4a8 shrink_node+0x128/0x978 balance_pgdat+0x4f0/0xb20 kswapd+0x228/0x438 kthread+0x214/0x230 ret_from_fork+0x10/0x20 I can reproduce this issue with the following steps: 1) When a dirty swapcache page is isolated by reclaim process and the page isn't locked, inject memory failure for the page. me_swapcache_dirty() clears uptodate flag and tries to delete from lru, but fails. Reclaim process will put the hwpoisoned page back to lru. 2) The process that maps the hwpoisoned page exits, the page is deleted the page will never be freed and will be in the lru forever. 3) If we trigger a reclaim again and tries to reclaim the page, add_to_swap() will trigger VM_BUG_ON_FOLIO due to the uptodate flag is cleared. To fix it, skip the hwpoisoned page in shrink_folio_list(). Besides, the hwpoison folio may not be unmapped by hwpoison_user_mappings() yet, unmap it in shrink_folio_list(), otherwise the folio will fail to be unmaped by hwpoison_user_mappings() since the folio isn't in lru list. Link: https://lkml.kernel.org/r/20250318083939.987651-3-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: Miaohe Lin Cc: David Hildenbrand Cc: Kefeng Wang Cc: Nanyong Sun Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 98e6ac82e428..2b2ab386cab5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1127,6 +1127,13 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!folio_trylock(folio)) goto keep; + if (folio_contain_hwpoisoned_page(folio)) { + unmap_poisoned_folio(folio, folio_pfn(folio), false); + folio_unlock(folio); + folio_put(folio); + continue; + } + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); nr_pages = folio_nr_pages(folio); From d893aca973c315ee985e1f8220fcd239c0ab7d19 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 19 Mar 2025 14:23:37 +0200 Subject: [PATCH 1585/2157] x86/mm: restore early initialization of high_memory for 32-bits Kernel test robot reports the following crash on 32-bit system with HIGHMEM and DEBUG_VIRTUAL: [ 0.056128][ T0] kernel BUG at arch/x86/mm/physaddr.c:77! PANIC: early exception 0x06 IP 60:c116539d error 0 cr2 0x0 [ 0.056916][ T0] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.14.0-rc4-00010-ga4dbe5c71817 #1 [ 0.057570][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 0.058299][ T0] EIP: __phys_addr (arch/x86/mm/physaddr.c:77) [ 0.058633][ T0] Code: 00 74 33 89 f0 e8 d3 8b 2e 00 89 c3 0f b6 d0 b8 58 bb 4b c5 31 c9 6a 00 e8 70 f5 15 00 83 c4 04 84 db 74 25 ff 05 78 de 5d c5 <0f> 0b b8 c8 91 ea c4 e8 e7 6e ea ff b8 58 bb 4b c5 31 d2 31 c9 6a All code [ 0.060017][ T0] EAX: 00000000 EBX: c61f7001 ECX: 00000000 EDX: 00000000 [ 0.060519][ T0] ESI: c61f7000 EDI: 061f7000 EBP: c4e31f04 ESP: c61f7000 [ 0.061016][ T0] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: cff4 EFLAGS: 00210002 [ 0.061560][ T0] CR0: 80050033 CR2: 00000000 CR3: 059fc000 CR4: 00000090 [ 0.062060][ T0] Call Trace: [ 0.062288][ T0] ? show_regs (arch/x86/kernel/dumpstack.c:478) [ 0.062588][ T0] ? early_fixup_exception (arch/x86/include/asm/nospec-branch.h:595) [ 0.062968][ T0] ? early_idt_handler_common (arch/x86/kernel/head_32.S:352) [ 0.063360][ T0] ? __phys_addr (arch/x86/mm/physaddr.c:77) [ 0.063677][ T0] ? one_page_table_init (arch/x86/mm/init_32.c:100) [ 0.064037][ T0] ? page_table_range_init (arch/x86/mm/init_32.c:227) [ 0.064411][ T0] ? permanent_kmaps_init (include/linux/pgtable.h:191 include/linux/pgtable.h:196 arch/x86/mm/init_32.c:395) [ 0.064814][ T0] ? paging_init (arch/x86/mm/init_32.c:677) [ 0.065118][ T0] ? native_pagetable_init (arch/x86/mm/init_32.c:481) [ 0.065503][ T0] ? setup_arch (arch/x86/kernel/setup.c:1131) [ 0.065819][ T0] ? start_kernel (include/linux/jump_label.h:267 init/main.c:920) [ 0.066143][ T0] ? i386_start_kernel (arch/x86/kernel/head32.c:79) [ 0.066501][ T0] ? startup_32_smp (arch/x86/kernel/head_32.S:292) The crash happens because commit e120d1bc12da ("arch, mm: set high_memory in free_area_init()") moved initialization of high_memory after __vmalloc_start_set and with high_memory still set to 0 any address passes is_vmalloc_addr() check. Restore early initialization of high_memory on 32-bit systems in initmem_init(). Link: https://lkml.kernel.org/r/20250319122337.1538924-1-rppt@kernel.org Fixes: e120d1bc12da ("arch, mm: set high_memory in free_area_init()") Signed-off-by: Mike Rapoport (Microsoft) Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202503191442.112e954f-lkp@intel.com Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- arch/x86/mm/init_32.c | 3 +++ arch/x86/mm/numa_32.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 95b2758b4e4d..f69d2436d780 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -626,6 +626,9 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 442ef3facff0..65fda406e6f2 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -41,6 +41,9 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); From 0a1e082b64ccce165e7307a7b49d22b2504f9d1f Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Wed, 19 Mar 2025 17:17:26 +0800 Subject: [PATCH 1586/2157] mm/page_alloc: remove unnecessary __maybe_unused in order_to_pindex() The `movable` variable is always used when `CONFIG_TRANSPARENT_HUGEPAGE` is enabled, so the `__maybe_unused` attribute is not necessary. This patch removes it and keeps the variable declaration within the `#ifdef` block for better clarity. Link: https://lkml.kernel.org/r/20250319091726.401158-1-liuyerd@163.com Signed-off-by: Liu Ye Signed-off-by: Andrew Morton --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a6d060eea638..0c01998cb3a0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -509,9 +509,9 @@ static void bad_page(struct page *page, const char *reason) static inline unsigned int order_to_pindex(int migratetype, int order) { - bool __maybe_unused movable; #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool movable; if (order > PAGE_ALLOC_COSTLY_ORDER) { VM_BUG_ON(order != HPAGE_PMD_ORDER); From 3cf67d61ff98672120f6ad07528afa165df12588 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 25 Feb 2025 16:02:34 +0900 Subject: [PATCH 1587/2157] hung_task: show the blocker task if the task is hung on mutex Patch series "hung_task: Dump the blocking task stacktrace", v4. The hung_task detector is very useful for detecting the lockup. However, since it only dumps the blocked (uninterruptible sleep) processes, it is not enough to identify the root cause of that lockup. For example, if a process holds a mutex and sleep an event in interruptible state long time, the other processes will wait on the mutex in uninterruptible state. In this case, the waiter processes are dumped, but the blocker process is not shown because it is sleep in interruptible state. This adds a feature to dump the blocker task which holds a mutex when detecting a hung task. e.g. INFO: task cat:115 blocked for more than 122 seconds. Not tainted 6.14.0-rc3-00003-ga8946be3de00 #156 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:cat state:D stack:13432 pid:115 tgid:115 ppid:106 task_flags:0x400100 flags:0x00000002 Call Trace: __schedule+0x731/0x960 ? schedule_preempt_disabled+0x54/0xa0 schedule+0xb7/0x140 ? __mutex_lock+0x51b/0xa60 ? __mutex_lock+0x51b/0xa60 schedule_preempt_disabled+0x54/0xa0 __mutex_lock+0x51b/0xa60 read_dummy+0x23/0x70 full_proxy_read+0x6a/0xc0 vfs_read+0xc2/0x340 ? __pfx_direct_file_splice_eof+0x10/0x10 ? do_sendfile+0x1bd/0x2e0 ksys_read+0x76/0xe0 do_syscall_64+0xe3/0x1c0 ? exc_page_fault+0xa9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x4840cd RSP: 002b:00007ffe99071828 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd RDX: 0000000000001000 RSI: 00007ffe99071870 RDI: 0000000000000003 RBP: 00007ffe99071870 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000 R13: 00000000132fd3a0 R14: 0000000000000001 R15: ffffffffffffffff INFO: task cat:115 is blocked on a mutex likely owned by task cat:114. task:cat state:S stack:13432 pid:114 tgid:114 ppid:106 task_flags:0x400100 flags:0x00000002 Call Trace: __schedule+0x731/0x960 ? schedule_timeout+0xa8/0x120 schedule+0xb7/0x140 schedule_timeout+0xa8/0x120 ? __pfx_process_timeout+0x10/0x10 msleep_interruptible+0x3e/0x60 read_dummy+0x2d/0x70 full_proxy_read+0x6a/0xc0 vfs_read+0xc2/0x340 ? __pfx_direct_file_splice_eof+0x10/0x10 ? do_sendfile+0x1bd/0x2e0 ksys_read+0x76/0xe0 do_syscall_64+0xe3/0x1c0 ? exc_page_fault+0xa9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x4840cd RSP: 002b:00007ffe3e0147b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd RDX: 0000000000001000 RSI: 00007ffe3e014800 RDI: 0000000000000003 RBP: 00007ffe3e014800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000 R13: 000000001a0a93a0 R14: 0000000000000001 R15: ffffffffffffffff TBD: We can extend this feature to cover other locks like rwsem and rt_mutex, but rwsem requires to dump all the tasks which acquire and wait that rwsem. We can follow the waiter link but the output will be a bit different compared with mutex case. This patch (of 2): The "hung_task" shows a long-time uninterruptible slept task, but most often, it's blocked on a mutex acquired by another task. Without dumping such a task, investigating the root cause of the hung task problem is very difficult. This introduce task_struct::blocker_mutex to point the mutex lock which this task is waiting for. Since the mutex has "owner" information, we can find the owner task and dump it with hung tasks. Note: the owner can be changed while dumping the owner task, so this is "likely" the owner of the mutex. With this change, the hung task shows blocker task's info like below; INFO: task cat:115 blocked for more than 122 seconds. Not tainted 6.14.0-rc3-00003-ga8946be3de00 #156 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:cat state:D stack:13432 pid:115 tgid:115 ppid:106 task_flags:0x400100 flags:0x00000002 Call Trace: __schedule+0x731/0x960 ? schedule_preempt_disabled+0x54/0xa0 schedule+0xb7/0x140 ? __mutex_lock+0x51b/0xa60 ? __mutex_lock+0x51b/0xa60 schedule_preempt_disabled+0x54/0xa0 __mutex_lock+0x51b/0xa60 read_dummy+0x23/0x70 full_proxy_read+0x6a/0xc0 vfs_read+0xc2/0x340 ? __pfx_direct_file_splice_eof+0x10/0x10 ? do_sendfile+0x1bd/0x2e0 ksys_read+0x76/0xe0 do_syscall_64+0xe3/0x1c0 ? exc_page_fault+0xa9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x4840cd RSP: 002b:00007ffe99071828 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd RDX: 0000000000001000 RSI: 00007ffe99071870 RDI: 0000000000000003 RBP: 00007ffe99071870 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000 R13: 00000000132fd3a0 R14: 0000000000000001 R15: ffffffffffffffff INFO: task cat:115 is blocked on a mutex likely owned by task cat:114. task:cat state:S stack:13432 pid:114 tgid:114 ppid:106 task_flags:0x400100 flags:0x00000002 Call Trace: __schedule+0x731/0x960 ? schedule_timeout+0xa8/0x120 schedule+0xb7/0x140 schedule_timeout+0xa8/0x120 ? __pfx_process_timeout+0x10/0x10 msleep_interruptible+0x3e/0x60 read_dummy+0x2d/0x70 full_proxy_read+0x6a/0xc0 vfs_read+0xc2/0x340 ? __pfx_direct_file_splice_eof+0x10/0x10 ? do_sendfile+0x1bd/0x2e0 ksys_read+0x76/0xe0 do_syscall_64+0xe3/0x1c0 ? exc_page_fault+0xa9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x4840cd RSP: 002b:00007ffe3e0147b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004840cd RDX: 0000000000001000 RSI: 00007ffe3e014800 RDI: 0000000000000003 RBP: 00007ffe3e014800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000001000000 R11: 0000000000000246 R12: 0000000000001000 R13: 000000001a0a93a0 R14: 0000000000000001 R15: ffffffffffffffff [akpm@linux-foundation.org: implement debug_show_blocker() in C rather than in CPP] Link: https://lkml.kernel.org/r/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com Link: https://lkml.kernel.org/r/174046695384.2194069.16796289525958195643.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Waiman Long Reviewed-by: Lance Yang Reviewed-by: Sergey Senozhatsky Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: Kent Overstreet Cc: Steven Rostedt Cc: Tomasz Figa Cc: Will Deacon Cc: Yongliang Gao Signed-off-by: Andrew Morton --- include/linux/mutex.h | 2 ++ include/linux/sched.h | 4 ++++ kernel/hung_task.c | 38 ++++++++++++++++++++++++++++++++++++++ kernel/locking/mutex.c | 14 ++++++++++++++ lib/Kconfig.debug | 11 +++++++++++ 5 files changed, 69 insertions(+) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2bf91b57591b..2143d05116be 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -202,4 +202,6 @@ DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +extern unsigned long mutex_get_owner(struct mutex *lock); + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c15365a30c0..1419d94c8e87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1217,6 +1217,10 @@ struct task_struct { struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + struct mutex *blocker_mutex; +#endif + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 04efa7a6e69b..dc898ec93463 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -93,6 +93,43 @@ static struct notifier_block panic_block = { .notifier_call = hung_task_panic, }; + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +static void debug_show_blocker(struct task_struct *task) +{ + struct task_struct *g, *t; + unsigned long owner; + struct mutex *lock; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); + + lock = READ_ONCE(task->blocker_mutex); + if (!lock) + return; + + owner = mutex_get_owner(lock); + if (unlikely(!owner)) { + pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", + task->comm, task->pid); + return; + } + + /* Ensure the owner information is correct. */ + for_each_process_thread(g, t) { + if ((unsigned long)t == owner) { + pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", + task->comm, task->pid, t->comm, t->pid); + sched_show_task(t); + return; + } + } +} +#else +static inline void debug_show_blocker(struct task_struct *task) +{ +} +#endif + static void check_hung_task(struct task_struct *t, unsigned long timeout) { unsigned long switch_count = t->nvcsw + t->nivcsw; @@ -152,6 +189,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); + debug_show_blocker(t); hung_task_show_lock = true; if (sysctl_hung_task_all_cpu_backtrace) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index b36f23de48f1..6a543c204a14 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -72,6 +72,14 @@ static inline unsigned long __owner_flags(unsigned long owner) return owner & MUTEX_FLAGS; } +/* Do not use the return value as a pointer directly. */ +unsigned long mutex_get_owner(struct mutex *lock) +{ + unsigned long owner = atomic_long_read(&lock->owner); + + return (unsigned long)__owner_task(owner); +} + /* * Returns: __mutex_owner(lock) on failure or NULL on success. */ @@ -180,6 +188,9 @@ static void __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct list_head *list) { +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + WRITE_ONCE(current->blocker_mutex, lock); +#endif debug_mutex_add_waiter(lock, waiter, current); list_add_tail(&waiter->list, list); @@ -195,6 +206,9 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) __mutex_clear_flag(lock, MUTEX_FLAGS); debug_mutex_remove_waiter(lock, waiter, current); +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + WRITE_ONCE(current->blocker_mutex, NULL); +#endif } /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 35796c290ca3..1f5943f6be75 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1260,6 +1260,17 @@ config BOOTPARAM_HUNG_TASK_PANIC Say N if unsure. +config DETECT_HUNG_TASK_BLOCKER + bool "Dump Hung Tasks Blocker" + depends on DETECT_HUNG_TASK + depends on !PREEMPT_RT + default y + help + Say Y here to show the blocker task's stacktrace who acquires + the mutex lock which "hung tasks" are waiting. + This will add overhead a bit but shows suspicious tasks and + call trace if it comes from waiting a mutex. + config WQ_WATCHDOG bool "Detect Workqueue Stalls" depends on DEBUG_KERNEL From 2158599a4b6d735e1a57c8320723ca74c42dd7ae Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 25 Feb 2025 16:02:43 +0900 Subject: [PATCH 1588/2157] samples: add hung_task detector mutex blocking sample Add a hung_task detector mutex blocking test sample code. This module will create a dummy file on the debugfs. That file will cause the read process to sleep for enough long time (256 seconds) while holding a mutex. As a result, the second process will wait on the mutex for a prolonged duration and be detected by the hung_task detector. Usage is; > cd /sys/kernel/debug/hung_task > cat mutex & cat mutex and wait for hung_task message. [akpm@linux-foundation.org: make `hung_task_dir' static] Closes: https://lore.kernel.org/oe-kbuild-all/202503180827.4StpuFrD-lkp@intel.com/ Link: https://lkml.kernel.org/r/174046696281.2194069.4567490148001547311.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: Kent Overstreet Cc: Lance Yang Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Signed-off-by: Andrew Morton --- samples/Kconfig | 9 ++++ samples/Makefile | 1 + samples/hung_task/Makefile | 2 + samples/hung_task/hung_task_mutex.c | 66 +++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 samples/hung_task/Makefile create mode 100644 samples/hung_task/hung_task_mutex.c diff --git a/samples/Kconfig b/samples/Kconfig index 820e00b2ed68..09011be2391a 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -300,6 +300,15 @@ config SAMPLE_CHECK_EXEC demonstrate how they should be used with execveat(2) + AT_EXECVE_CHECK. +config SAMPLE_HUNG_TASK + tristate "Hung task detector test code" + depends on DETECT_HUNG_TASK && DEBUG_FS + help + Build a module which provide a simple debugfs file. If user reads + the file, it will sleep long time (256 seconds) with holding a + mutex. Thus if there are 2 or more processes read this file, it + will be detected by the hung_task watchdog. + source "samples/rust/Kconfig" source "samples/damon/Kconfig" diff --git a/samples/Makefile b/samples/Makefile index f24cd0d72dd0..bf6e6fca5410 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -42,3 +42,4 @@ obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/ obj-$(CONFIG_SAMPLES_RUST) += rust/ obj-$(CONFIG_SAMPLE_DAMON_WSSE) += damon/ obj-$(CONFIG_SAMPLE_DAMON_PRCL) += damon/ +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task/ diff --git a/samples/hung_task/Makefile b/samples/hung_task/Makefile new file mode 100644 index 000000000000..f4d6ab563488 --- /dev/null +++ b/samples/hung_task/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_mutex.o diff --git a/samples/hung_task/hung_task_mutex.c b/samples/hung_task/hung_task_mutex.c new file mode 100644 index 000000000000..47ed38239ea3 --- /dev/null +++ b/samples/hung_task/hung_task_mutex.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hung_task_mutex.c - Sample code which causes hung task by mutex + * + * Usage: load this module and read `/hung_task/mutex` + * by 2 or more processes. + * + * This is for testing kernel hung_task error message. + * Note that this will make your system freeze and maybe + * cause panic. So do not use this except for the test. + */ + +#include +#include +#include +#include +#include + +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_FILE "mutex" +#define SLEEP_SECOND 256 + +static const char dummy_string[] = "This is a dummy string."; +static DEFINE_MUTEX(dummy_mutex); +static struct dentry *hung_task_dir; + +static ssize_t read_dummy(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* If the second task waits on the lock, it is uninterruptible sleep. */ + guard(mutex)(&dummy_mutex); + + /* When the first task sleep here, it is interruptible. */ + msleep_interruptible(SLEEP_SECOND * 1000); + + return simple_read_from_buffer(user_buf, count, ppos, + dummy_string, sizeof(dummy_string)); +} + +static const struct file_operations hung_task_fops = { + .read = read_dummy, +}; + +static int __init hung_task_sample_init(void) +{ + hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); + if (IS_ERR(hung_task_dir)) + return PTR_ERR(hung_task_dir); + + debugfs_create_file(HUNG_TASK_FILE, 0400, hung_task_dir, + NULL, &hung_task_fops); + + return 0; +} + +static void __exit hung_task_sample_exit(void) +{ + debugfs_remove_recursive(hung_task_dir); +} + +module_init(hung_task_sample_init); +module_exit(hung_task_sample_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Masami Hiramatsu"); +MODULE_DESCRIPTION("Simple sleep under mutex file for testing hung task"); From 8ee065a6fdd285387d0eca5e1139ffb182a6e626 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 20:11:10 +0200 Subject: [PATCH 1589/2157] resource: split DEFINE_RES_NAMED_DESC() out of DEFINE_RES_NAMED() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "resource: Split and use DEFINE_RES*() macros", v2. Replace open coded variants of DEFINE_RES*() macros. Note, there are many more possibilities over the kernel and even in reources.c, however the latter contains not so trivial leftovers. That's why the examples cover only straightforward conversions. This patch (of 4): In some cases it would be useful to supply predefined descriptor of the resource. For this, introduce DEFINE_RES_NAMED_DESC() macro. While at it, provide DEFINE_RES() that takes only start, size, and flags. Link: https://lkml.kernel.org/r/20250317181412.1560630-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20250317181412.1560630-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Andrew Morton --- include/linux/ioport.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5385349f0b8a..e8b2d6aa4013 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -154,15 +154,20 @@ enum { }; /* helpers to define resources */ -#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ +#define DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, _desc) \ (struct resource) { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ - .desc = IORES_DESC_NONE, \ + .desc = (_desc), \ } +#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ + DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE) +#define DEFINE_RES(_start, _size, _flags) \ + DEFINE_RES_NAMED(_start, _size, NULL, _flags) + #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ From 76709e0a3f3b04dfba588f7985c1f3b302092418 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 20:11:11 +0200 Subject: [PATCH 1590/2157] resource: replace open coded variant of DEFINE_RES_NAMED_DESC() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace open coded variant of DEFINE_RES_NAMED_DESC(). Link: https://lkml.kernel.org/r/20250317181412.1560630-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Andrew Morton --- kernel/resource.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 12004452d999..3464d6a38424 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1975,11 +1975,7 @@ get_free_mem_region(struct device *dev, struct resource *base, */ revoke_iomem(res); } else { - res->start = addr; - res->end = addr + size - 1; - res->name = name; - res->desc = desc; - res->flags = IORESOURCE_MEM; + *res = DEFINE_RES_NAMED_DESC(addr, size, name, IORESOURCE_MEM, desc); /* * Only succeed if the resource hosts an exclusive From 1af56ff09e676899c9a37468098f4a0d71fef848 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 20:11:12 +0200 Subject: [PATCH 1591/2157] resource: replace open coded variants of DEFINE_RES_*_NAMED() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace open coded variants of DEFINE_RES_*_NAMED(). Link: https://lkml.kernel.org/r/20250317181412.1560630-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Andrew Morton --- kernel/resource.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 3464d6a38424..8f3652a41ea4 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1714,18 +1714,13 @@ static int __init reserve_setup(char *str) * I/O port space; otherwise assume it's memory. */ if (io_start < 0x10000) { - res->flags = IORESOURCE_IO; + *res = DEFINE_RES_IO_NAMED(io_start, io_num, "reserved"); parent = &ioport_resource; } else { - res->flags = IORESOURCE_MEM; + *res = DEFINE_RES_MEM_NAMED(io_start, io_num, "reserved"); parent = &iomem_resource; } - res->name = "reserved"; - res->start = io_start; - res->end = io_start + io_num - 1; res->flags |= IORESOURCE_BUSY; - res->desc = IORES_DESC_NONE; - res->child = NULL; if (request_resource(parent, res) == 0) reserved = x+1; } From 48376a4fa6af8642ab5e19016c38b072d73772c1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 20:11:13 +0200 Subject: [PATCH 1592/2157] resource: replace open coded variant of DEFINE_RES() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace open coded variant of DEFINE_RES(). No functional changes intended. Link: https://lkml.kernel.org/r/20250317181412.1560630-5-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Andrew Morton --- kernel/resource.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 8f3652a41ea4..8d3e6ed0bdc1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -561,8 +561,7 @@ static int __region_intersects(struct resource *parent, resource_size_t start, struct resource res, o; bool covered; - res.start = start; - res.end = start + size - 1; + res = DEFINE_RES(start, size, 0); for (p = parent->child; p ; p = p->sibling) { if (!resource_intersection(p, &res, &o)) From 81ca2970b770d967f64803eff6e7016a68802da3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Mar 2025 23:29:47 +0200 Subject: [PATCH 1593/2157] relay: use kasprintf() instead of fixed buffer formatting Improve readability and maintainability by replacing a hard coded string allocation and formatting by using the kasprintf() helper. It also eliminates the GCC compiler warning (with CONFIG_WERROR=y, which is default, it becomes an error: kernel/relay.c:357:42: error: `snprintf' output may be truncated before the last format character [-Werror=format-truncation=] Link: https://lkml.kernel.org/r/20250317212948.1811176-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- kernel/relay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/relay.c b/kernel/relay.c index a8ae436dc77e..5ac7e711e4b6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -351,10 +351,9 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, struct dentry *dentry; char *tmpname; - tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); + tmpname = kasprintf(GFP_KERNEL, "%s%d", chan->base_filename, cpu); if (!tmpname) return NULL; - snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); /* Create file in fs */ dentry = chan->cb->create_buf_file(tmpname, chan->parent, From 6287fbad1cd91f0c25cdc3a580499060828a8f30 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 19 Mar 2025 14:02:22 -0700 Subject: [PATCH 1594/2157] fs/procfs: fix the comment above proc_pid_wchan() proc_pid_wchan() used to report kernel addresses to user space but that is no longer the case today. Bring the comment above proc_pid_wchan() in sync with the implementation. Link: https://lkml.kernel.org/r/20250319210222.1518771-1-bvanassche@acm.org Fixes: b2f73922d119 ("fs/proc, core/debug: Don't expose absolute kernel addresses via wchan") Signed-off-by: Bart Van Assche Cc: Kees Cook Cc: Eric W. Biederman Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index cd89e956c322..7feb8f41aa25 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -416,7 +416,7 @@ static const struct file_operations proc_pid_cmdline_ops = { #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. - * Returns the resolved symbol. If that fails, simply return the address. + * Returns the resolved symbol to user space. */ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) From 434333dd3f66f9d1ad387dabd2a565182a823f31 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 19 Mar 2025 09:52:46 +0100 Subject: [PATCH 1595/2157] mailmap: consolidate email addresses of Alexander Sverdlin Alias all the addresses used in the past and currently to the single contact address. Link: https://lkml.kernel.org/r/20250319085251.3335678-1-alexander.sverdlin@siemens.com Signed-off-by: Alexander Sverdlin Signed-off-by: Andrew Morton --- .mailmap | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.mailmap b/.mailmap index f88e5e28ce00..71074abd4699 100644 --- a/.mailmap +++ b/.mailmap @@ -31,6 +31,13 @@ Alexander Lobakin Alexander Lobakin Alexander Mikhalitsyn Alexander Mikhalitsyn +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin +Alexander Sverdlin Alexandre Belloni Alexandre Ghiti Alexei Avshalom Lazar From 088b1ca970ba6cac141f684b7592ce56bd25e7ea Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 20 Mar 2025 12:35:21 +0100 Subject: [PATCH 1596/2157] i2c: k1: Initialize variable before use Commit 95a8ca229032 ("i2c: spacemit: add support for SpacemiT K1 SoC") introduced a check in the probe function to warn the user if the DTS has incorrect frequency settings. In such cases, the driver falls back to default values. However, the return value of of_property_read_u32() was not stored, making the check ineffective. Fix this by storing the return value in 'ret' and evaluating it properly. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503200928.eBWfwnHG-lkp@intel.com/ Cc: Troy Mitchell Link: https://lore.kernel.org/r/20250320113521.3966762-1-andi.shyti@kernel.org Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-k1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c index 0d8763b852db..5965b4cf6220 100644 --- a/drivers/i2c/busses/i2c-k1.c +++ b/drivers/i2c/busses/i2c-k1.c @@ -514,7 +514,7 @@ static int spacemit_i2c_probe(struct platform_device *pdev) if (!i2c) return -ENOMEM; - of_property_read_u32(of_node, "clock-frequency", &i2c->clock_freq); + ret = of_property_read_u32(of_node, "clock-frequency", &i2c->clock_freq); if (ret && ret != -EINVAL) dev_warn(dev, "failed to read clock-frequency property: %d\n", ret); From 8b4da3ef92060c281aa3c541ed7aab51500cf7c8 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Sat, 22 Feb 2025 13:38:33 +0000 Subject: [PATCH 1597/2157] i2c: pasemi: Add registers bits and switch to BIT() Add the missing register bits to the defines and also switch those to use the BIT macro which is much more readable than using hardcoded masks Co-developed-by: Hector Martin Signed-off-by: Hector Martin Signed-off-by: Sven Peter Reviewed-by: Neal Gompa Reviewed-by: Alyssa Rosenzweig Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250222-pasemi-fixes-v1-1-d7ea33d50c5e@svenpeter.dev --- drivers/i2c/busses/i2c-pasemi-core.c | 34 ++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index dac694a9d781..bd128ab2e2eb 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -5,6 +5,7 @@ * SMBus host driver for PA Semi PWRficient */ +#include #include #include #include @@ -26,21 +27,30 @@ #define REG_REV 0x28 /* Register defs */ -#define MTXFIFO_READ 0x00000400 -#define MTXFIFO_STOP 0x00000200 -#define MTXFIFO_START 0x00000100 -#define MTXFIFO_DATA_M 0x000000ff +#define MTXFIFO_READ BIT(10) +#define MTXFIFO_STOP BIT(9) +#define MTXFIFO_START BIT(8) +#define MTXFIFO_DATA_M GENMASK(7, 0) -#define MRXFIFO_EMPTY 0x00000100 -#define MRXFIFO_DATA_M 0x000000ff +#define MRXFIFO_EMPTY BIT(8) +#define MRXFIFO_DATA_M GENMASK(7, 0) -#define SMSTA_XEN 0x08000000 -#define SMSTA_MTN 0x00200000 +#define SMSTA_XIP BIT(28) +#define SMSTA_XEN BIT(27) +#define SMSTA_JMD BIT(25) +#define SMSTA_JAM BIT(24) +#define SMSTA_MTO BIT(23) +#define SMSTA_MTA BIT(22) +#define SMSTA_MTN BIT(21) +#define SMSTA_MRNE BIT(19) +#define SMSTA_MTE BIT(16) +#define SMSTA_TOM BIT(6) -#define CTL_MRR 0x00000400 -#define CTL_MTR 0x00000200 -#define CTL_EN 0x00000800 -#define CTL_CLK_M 0x000000ff +#define CTL_EN BIT(11) +#define CTL_MRR BIT(10) +#define CTL_MTR BIT(9) +#define CTL_UJM BIT(8) +#define CTL_CLK_M GENMASK(7, 0) static inline void reg_write(struct pasemi_smbus *smbus, int reg, int val) { From f8d311b4b8f34f3dd1e5b488547d7eb31bcc7083 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2025 18:51:28 +0200 Subject: [PATCH 1598/2157] i2c: mlxbf: Use readl_poll_timeout_atomic() for polling Convert the usage of an open-coded custom tight poll while loop with the provided readl_poll_timeout_atomic() macro. Signed-off-by: Andy Shevchenko Reviewed-by: Asmaa Mnebhi Link: https://lore.kernel.org/r/20250212165128.2413430-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-mlxbf.c | 106 ++++++++------------------------- 1 file changed, 26 insertions(+), 80 deletions(-) diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c index 21f67f3b65b6..280dde53d7f3 100644 --- a/drivers/i2c/busses/i2c-mlxbf.c +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -495,65 +496,6 @@ static u8 mlxbf_i2c_bus_count; static struct mutex mlxbf_i2c_bus_lock; -/* - * Function to poll a set of bits at a specific address; it checks whether - * the bits are equal to zero when eq_zero is set to 'true', and not equal - * to zero when eq_zero is set to 'false'. - * Note that the timeout is given in microseconds. - */ -static u32 mlxbf_i2c_poll(void __iomem *io, u32 addr, u32 mask, - bool eq_zero, u32 timeout) -{ - u32 bits; - - timeout = (timeout / MLXBF_I2C_POLL_FREQ_IN_USEC) + 1; - - do { - bits = readl(io + addr) & mask; - if (eq_zero ? bits == 0 : bits != 0) - return eq_zero ? 1 : bits; - udelay(MLXBF_I2C_POLL_FREQ_IN_USEC); - } while (timeout-- != 0); - - return 0; -} - -/* - * SW must make sure that the SMBus Master GW is idle before starting - * a transaction. Accordingly, this function polls the Master FSM stop - * bit; it returns false when the bit is asserted, true if not. - */ -static bool mlxbf_i2c_smbus_master_wait_for_idle(struct mlxbf_i2c_priv *priv) -{ - u32 mask = MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK; - u32 addr = priv->chip->smbus_master_fsm_off; - u32 timeout = MLXBF_I2C_SMBUS_TIMEOUT; - - if (mlxbf_i2c_poll(priv->mst->io, addr, mask, true, timeout)) - return true; - - return false; -} - -/* - * wait for the lock to be released before acquiring it. - */ -static bool mlxbf_i2c_smbus_master_lock(struct mlxbf_i2c_priv *priv) -{ - if (mlxbf_i2c_poll(priv->mst->io, MLXBF_I2C_SMBUS_MASTER_GW, - MLXBF_I2C_MASTER_LOCK_BIT, true, - MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT)) - return true; - - return false; -} - -static void mlxbf_i2c_smbus_master_unlock(struct mlxbf_i2c_priv *priv) -{ - /* Clear the gw to clear the lock */ - writel(0, priv->mst->io + MLXBF_I2C_SMBUS_MASTER_GW); -} - static bool mlxbf_i2c_smbus_transaction_success(u32 master_status, u32 cause_status) { @@ -583,6 +525,7 @@ static int mlxbf_i2c_smbus_check_status(struct mlxbf_i2c_priv *priv) { u32 master_status_bits; u32 cause_status_bits; + u32 bits; /* * GW busy bit is raised by the driver and cleared by the HW @@ -591,9 +534,9 @@ static int mlxbf_i2c_smbus_check_status(struct mlxbf_i2c_priv *priv) * then read the cause and master status bits to determine if * errors occurred during the transaction. */ - mlxbf_i2c_poll(priv->mst->io, MLXBF_I2C_SMBUS_MASTER_GW, - MLXBF_I2C_MASTER_BUSY_BIT, true, - MLXBF_I2C_SMBUS_TIMEOUT); + readl_poll_timeout_atomic(priv->mst->io + MLXBF_I2C_SMBUS_MASTER_GW, + bits, !(bits & MLXBF_I2C_MASTER_BUSY_BIT), + MLXBF_I2C_POLL_FREQ_IN_USEC, MLXBF_I2C_SMBUS_TIMEOUT); /* Read cause status bits. */ cause_status_bits = readl(priv->mst_cause->io + @@ -740,7 +683,8 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, u8 read_en, write_en, block_en, pec_en; u8 slave, flags, addr; u8 *read_buf; - int ret = 0; + u32 bits; + int ret; if (request->operation_cnt > MLXBF_I2C_SMBUS_MAX_OP_CNT) return -EINVAL; @@ -760,11 +704,22 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, * Try to acquire the smbus gw lock before any reads of the GW register since * a read sets the lock. */ - if (WARN_ON(!mlxbf_i2c_smbus_master_lock(priv))) + ret = readl_poll_timeout_atomic(priv->mst->io + MLXBF_I2C_SMBUS_MASTER_GW, + bits, !(bits & MLXBF_I2C_MASTER_LOCK_BIT), + MLXBF_I2C_POLL_FREQ_IN_USEC, + MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT); + if (WARN_ON(ret)) return -EBUSY; - /* Check whether the HW is idle */ - if (WARN_ON(!mlxbf_i2c_smbus_master_wait_for_idle(priv))) { + /* + * SW must make sure that the SMBus Master GW is idle before starting + * a transaction. Accordingly, this call polls the Master FSM stop bit; + * it returns -ETIMEDOUT when the bit is asserted, 0 if not. + */ + ret = readl_poll_timeout_atomic(priv->mst->io + priv->chip->smbus_master_fsm_off, + bits, !(bits & MLXBF_I2C_SMBUS_MASTER_FSM_STOP_MASK), + MLXBF_I2C_POLL_FREQ_IN_USEC, MLXBF_I2C_SMBUS_TIMEOUT); + if (WARN_ON(ret)) { ret = -EBUSY; goto out_unlock; } @@ -855,7 +810,8 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, } out_unlock: - mlxbf_i2c_smbus_master_unlock(priv); + /* Clear the gw to clear the lock */ + writel(0, priv->mst->io + MLXBF_I2C_SMBUS_MASTER_GW); return ret; } @@ -1829,18 +1785,6 @@ static bool mlxbf_i2c_has_coalesce(struct mlxbf_i2c_priv *priv, bool *read, return true; } -static bool mlxbf_i2c_slave_wait_for_idle(struct mlxbf_i2c_priv *priv, - u32 timeout) -{ - u32 mask = MLXBF_I2C_CAUSE_S_GW_BUSY_FALL; - u32 addr = MLXBF_I2C_CAUSE_ARBITER; - - if (mlxbf_i2c_poll(priv->slv_cause->io, addr, mask, false, timeout)) - return true; - - return false; -} - static struct i2c_client *mlxbf_i2c_get_slave_from_addr( struct mlxbf_i2c_priv *priv, u8 addr) { @@ -1943,7 +1887,9 @@ static int mlxbf_i2c_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) * Wait until the transfer is completed; the driver will wait * until the GW is idle, a cause will rise on fall of GW busy. */ - mlxbf_i2c_slave_wait_for_idle(priv, MLXBF_I2C_SMBUS_TIMEOUT); + readl_poll_timeout_atomic(priv->slv_cause->io + MLXBF_I2C_CAUSE_ARBITER, + data32, data32 & MLXBF_I2C_CAUSE_S_GW_BUSY_FALL, + MLXBF_I2C_POLL_FREQ_IN_USEC, MLXBF_I2C_SMBUS_TIMEOUT); clear_csr: /* Release the Slave GW. */ From a815975cbaeb4ab29f45312ef23be2871b2e8b82 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Savaliya Date: Wed, 22 Jan 2025 12:16:34 +0530 Subject: [PATCH 1599/2157] i2c: qcom-geni: Update i2c frequency table to match hardware guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current settings, the I2C buses are achieving around 370KHz instead of the expected 400KHz. For 100KHz and 1MHz, the settings are now more compliant and adhere to the Qualcomm’s internal programming guide. Update the I2C frequency table to align with the recommended values outlined in the I2C hardware programming guide, ensuring proper communication and performance. Signed-off-by: Mukesh Kumar Savaliya Reviewed-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/20250122064634.2864432-1-quic_msavaliy@quicinc.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 7bbd478171e0..515a784c951c 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -148,9 +148,9 @@ struct geni_i2c_clk_fld { * source_clock = 19.2 MHz */ static const struct geni_i2c_clk_fld geni_i2c_clk_map_19p2mhz[] = { - {KHZ(100), 7, 10, 11, 26}, - {KHZ(400), 2, 5, 12, 24}, - {KHZ(1000), 1, 3, 9, 18}, + {KHZ(100), 7, 10, 12, 26}, + {KHZ(400), 2, 5, 11, 22}, + {KHZ(1000), 1, 2, 8, 18}, {}, }; From 39f8d63804505222dccf265797c2d03de7f2d5b3 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 21 Jan 2025 16:48:18 +0800 Subject: [PATCH 1600/2157] i2c: iproc: Refactor prototype and remove redundant error checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bcm_iproc_i2c_init() always returns 0. As a result, there is no need to check its return value or handle errors. This patch changes the prototype of bcm_iproc_i2c_init() to return void and removes the redundant error handling code after calls to bcm_iproc_i2c_init() in both the bcm_iproc_i2c_probe() and bcm_iproc_i2c_resume(). Signed-off-by: Wentao Liang Acked-by: Uwe Kleine-König Acked-by: Ray Jui Link: https://lore.kernel.org/r/20250121084818.2719-1-vulab@iscas.ac.cn Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-bcm-iproc.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 15b632a146e1..332a0fcca28d 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -678,7 +678,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data) return IRQ_HANDLED; } -static int bcm_iproc_i2c_init(struct bcm_iproc_i2c_dev *iproc_i2c) +static void bcm_iproc_i2c_init(struct bcm_iproc_i2c_dev *iproc_i2c) { u32 val; @@ -706,8 +706,6 @@ static int bcm_iproc_i2c_init(struct bcm_iproc_i2c_dev *iproc_i2c) /* clear all pending interrupts */ iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, 0xffffffff); - - return 0; } static void bcm_iproc_i2c_enable_disable(struct bcm_iproc_i2c_dev *iproc_i2c, @@ -1081,9 +1079,7 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev) bcm_iproc_algo.unreg_slave = NULL; } - ret = bcm_iproc_i2c_init(iproc_i2c); - if (ret) - return ret; + bcm_iproc_i2c_init(iproc_i2c); ret = bcm_iproc_i2c_cfg_speed(iproc_i2c); if (ret) @@ -1162,16 +1158,13 @@ static int bcm_iproc_i2c_suspend(struct device *dev) static int bcm_iproc_i2c_resume(struct device *dev) { struct bcm_iproc_i2c_dev *iproc_i2c = dev_get_drvdata(dev); - int ret; u32 val; /* * Power domain could have been shut off completely in system deep * sleep, so re-initialize the block here */ - ret = bcm_iproc_i2c_init(iproc_i2c); - if (ret) - return ret; + bcm_iproc_i2c_init(iproc_i2c); /* configure to the desired bus speed */ val = iproc_i2c_rd_reg(iproc_i2c, TIM_CFG_OFFSET); From 7e752910b8acd1527af34b51851998feb33cc028 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 12 Mar 2025 04:01:33 +0900 Subject: [PATCH 1601/2157] kbuild: deb-pkg: fix versioning for -rc releases The version number with -rc should be considered older than the final release. For example, 6.14-rc1 should be older than 6.14, but to handle this correctly (just like Debian kernel), "-rc" must be replace with "~rc". $ dpkg --compare-versions 6.14-rc1 lt 6.14 $ echo $? 1 $ dpkg --compare-versions 6.14~rc1 lt 6.14 $ echo $? 0 Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/package/mkdebian | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 0178000197fe..07ca3528e8ea 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -161,7 +161,9 @@ version=$KERNELRELEASE if [ "${KDEB_PKGVERSION:+set}" ]; then packageversion=$KDEB_PKGVERSION else - packageversion=$(${srctree}/scripts/setlocalversion --no-local ${srctree})-$($srctree/scripts/build-version) + upstream_version=$("${srctree}/scripts/setlocalversion" --no-local "${srctree}" | sed 's/-\(rc[1-9]\)/~\1/') + debian_revision=$("${srctree}/scripts/build-version") + packageversion=${upstream_version}-${debian_revision} fi sourcename=${KDEB_SOURCENAME:-linux-upstream} From 1c3107ec73921088e55b7ff35861b9303962fd34 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 12 Mar 2025 04:02:24 +0900 Subject: [PATCH 1602/2157] kbuild: deb-pkg: remove "version" variable in mkdebian ${version} and ${KERNELRELEASE} are the same. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/package/mkdebian | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 07ca3528e8ea..744ddba01d93 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -157,7 +157,6 @@ while [ $# -gt 0 ]; do done # Some variables and settings used throughout the script -version=$KERNELRELEASE if [ "${KDEB_PKGVERSION:+set}" ]; then packageversion=$KDEB_PKGVERSION else @@ -216,11 +215,11 @@ Build-Depends-Arch: bc, bison, flex, python3:native, rsync Homepage: https://www.kernel.org/ -Package: $packagename-$version +Package: $packagename-${KERNELRELEASE} Architecture: $debarch -Description: Linux kernel, version $version +Description: Linux kernel, version ${KERNELRELEASE} This package contains the Linux kernel, modules and corresponding other - files, version: $version. + files, version: ${KERNELRELEASE}. EOF if [ "${SRCARCH}" != um ]; then @@ -239,11 +238,11 @@ EOF if is_enabled CONFIG_MODULES; then cat <> debian/control -Package: linux-headers-$version +Package: linux-headers-${KERNELRELEASE} Architecture: $debarch Build-Profiles: -Description: Linux kernel headers for $version on $debarch - This package provides kernel header files for $version on $debarch +Description: Linux kernel headers for ${KERNELRELEASE} on $debarch + This package provides kernel header files for ${KERNELRELEASE} on $debarch . This is useful for people who need to build external modules EOF @@ -253,11 +252,11 @@ fi if is_enabled CONFIG_DEBUG_INFO; then cat <> debian/control -Package: linux-image-$version-dbg +Package: linux-image-${KERNELRELEASE}-dbg Section: debug Architecture: $debarch Build-Profiles: -Description: Linux kernel debugging symbols for $version +Description: Linux kernel debugging symbols for ${KERNELRELEASE} This package will come in handy if you need to debug the kernel. It provides all the necessary debug symbols for the kernel and its modules. EOF From 00e81f4fc15aff8efef529ce9a4dc020686ab854 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 11 Mar 2025 20:34:21 -0700 Subject: [PATCH 1603/2157] kbuild: Add a help message for "headers" Meanwhile explicitly state that the headers are uapi headers. Suggested-by: Borislav Petkov Signed-off-by: Xin Li (Intel) Signed-off-by: Masahiro Yamada --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 22593a774cf6..5c333682dc91 100644 --- a/Makefile +++ b/Makefile @@ -1672,7 +1672,8 @@ help: @echo ' kernelrelease - Output the release version string (use with make -s)' @echo ' kernelversion - Output the version stored in Makefile (use with make -s)' @echo ' image_name - Output the image name (use with make -s)' - @echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ + @echo ' headers - Build ready-to-install UAPI headers in usr/include' + @echo ' headers_install - Install sanitised kernel UAPI headers to INSTALL_HDR_PATH'; \ echo ' (default: $(INSTALL_HDR_PATH))'; \ echo '' @echo 'Static analysers:' From a7a05b1b2739b94870b499818986b82974839fe0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 14 Mar 2025 18:53:35 +0900 Subject: [PATCH 1604/2157] kbuild: deb-pkg: add comment about future removal of KDEB_COMPRESS 'man dpkg-deb' describes as follows: DPKG_DEB_COMPRESSOR_TYPE Sets the compressor type to use (since dpkg 1.21.10). The -Z option overrides this value. When commit 1a7f0a34ea7d ("builddeb: allow selection of .deb compressor") was applied, dpkg-deb did not support this environment variable. Later, dpkg commit c10aeffc6d71 ("dpkg-deb: Add support for DPKG_DEB_COMPRESSOR_TYPE/LEVEL") introduced support for DPKG_DEB_COMPRESSOR_TYPE, which provides the same functionality as KDEB_COMPRESS. KDEB_COMPRESS is still useful for users of older dpkg versions, but I would like to remove this redundant functionality in the future. This commit adds comments to notify users of the planned removal and to encourage migration to DPKG_DEB_COMPRESSOR_TYPE where possible. Signed-off-by: Masahiro Yamada --- lib/Kconfig.debug | 6 +++--- scripts/package/debian/rules | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 17ccd913975d..be9f5af4c05c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -335,12 +335,12 @@ config DEBUG_INFO_COMPRESSED_ZLIB Compress the debug information using zlib. Requires GCC 5.0+ or Clang 5.0+, binutils 2.26+, and zlib. - Users of dpkg-deb via scripts/package/builddeb may find an increase in + Users of dpkg-deb via debian/rules may find an increase in size of their debug .deb packages with this config set, due to the debug info being compressed with zlib, then the object files being recompressed with a different compression scheme. But this is still - preferable to setting $KDEB_COMPRESS to "none" which would be even - larger. + preferable to setting KDEB_COMPRESS or DPKG_DEB_COMPRESSOR_TYPE to + "none" which would be even larger. config DEBUG_INFO_COMPRESSED_ZSTD bool "Compress debugging information with zstd" diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules index ca07243bd5cd..33bfd00974b3 100755 --- a/scripts/package/debian/rules +++ b/scripts/package/debian/rules @@ -41,6 +41,10 @@ package = $($(@:binary-%=%-package)) # which package is being processed in the build log. DH_OPTIONS = -p$(package) +# Note: future removal of KDEB_COMPRESS +# dpkg-deb >= 1.21.10 supports the DPKG_DEB_COMPRESSOR_TYPE environment +# variable, which provides the same functionality as KDEB_COMPRESS. The +# KDEB_COMPRESS variable will be removed in the future. define binary $(Q)dh_testdir $(DH_OPTIONS) $(Q)dh_testroot $(DH_OPTIONS) From 97282e6d380db8a07120fe1b794ac969ee4a3b5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 22 Mar 2025 10:03:16 +0100 Subject: [PATCH 1605/2157] x86: drop unnecessary prefix map configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The toplevel Makefile already provides -fmacro-prefix-map as part of KBUILD_CPPFLAGS. In contrast to the KBUILD_CFLAGS and KBUILD_AFLAGS variables, KBUILD_CPPFLAGS is not redefined in the architecture specific Makefiles. Therefore the toplevel KBUILD_CPPFLAGS do apply just fine, to both C and ASM sources. The custom configuration was necessary when it was added in commit 9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths") but has since become unnecessary in commit a716bd743210 ("kbuild: use -fmacro-prefix-map for .S sources"). Drop the now unnecessary custom prefix map configuration. Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- arch/x86/boot/Makefile | 1 - arch/x86/boot/compressed/Makefile | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9cc0ff6e9067..75e7a76deee1 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -54,7 +54,6 @@ targets += cpustr.h KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(CONFIG_CC_IMPLICIT_FALLTHROUGH) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5edee7a9786c..4d3f714ad871 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -38,7 +38,6 @@ KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += -Wno-pointer-sign -KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS # Disable relocation relaxation in case the link is not PIE. From cacd22ce69585a91c386243cd662ada962431e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 15 Mar 2025 14:20:14 +0100 Subject: [PATCH 1606/2157] kbuild: make all file references relative to source root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -fmacro-prefix-map only affects __FILE__ and __BASE_FILE__. Other references, for example in debug information, are not affected. This makes handling of file references in the compiler outputs harder to use and creates problems for reproducible builds. Switch to -ffile-prefix map which affects all references. Also drop the documentation section advising manual specification of -fdebug-prefix-map for reproducible builds, as it is not necessary anymore. Suggested-by: Ben Hutchings Link: https://lore.kernel.org/lkml/c49cc967294f9a3a4a34f69b6a8727a6d3959ed8.camel@decadent.org.uk/ Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- Documentation/kbuild/reproducible-builds.rst | 17 ----------------- Makefile | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index f2dcc39044e6..a7762486c93f 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -46,21 +46,6 @@ The kernel embeds the building user and host names in `KBUILD_BUILD_USER and KBUILD_BUILD_HOST`_ variables. If you are building from a git commit, you could use its committer address. -Absolute filenames ------------------- - -When the kernel is built out-of-tree, debug information may include -absolute filenames for the source files. This must be overridden by -including the ``-fdebug-prefix-map`` option in the `KCFLAGS`_ variable. - -Depending on the compiler used, the ``__FILE__`` macro may also expand -to an absolute filename in an out-of-tree build. Kbuild automatically -uses the ``-fmacro-prefix-map`` option to prevent this, if it is -supported. - -The Reproducible Builds web site has more information about these -`prefix-map options`_. - Generated files in source packages ---------------------------------- @@ -131,7 +116,5 @@ See ``scripts/setlocalversion`` for details. .. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp .. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host -.. _KCFLAGS: kbuild.html#kcflags -.. _prefix-map options: https://reproducible-builds.org/docs/build-path/ .. _Reproducible Builds project: https://reproducible-builds.org/ .. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/ diff --git a/Makefile b/Makefile index 5c333682dc91..4f920187cee6 100644 --- a/Makefile +++ b/Makefile @@ -1067,7 +1067,7 @@ endif # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree -KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) +KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=) KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= endif From 6c6c1fc09de35f409f6971cb9e881103afe5dbe0 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 11 Mar 2025 12:49:02 -0700 Subject: [PATCH 1607/2157] modpost: require a MODULE_DESCRIPTION() Since commit 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing"), a module without a MODULE_DESCRIPTION() has resulted in a warning with make W=1. Since that time, all known instances of this issue have been fixed. Therefore, now make it an error if a MODULE_DESCRIPTION() is not present. Signed-off-by: Jeff Johnson Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7f4c72eca72c..92627e8d0e16 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1602,8 +1602,8 @@ static void read_symbols(const char *modname) namespace); } - if (extra_warn && !get_modinfo(&info, "description")) - warn("missing MODULE_DESCRIPTION() in %s\n", modname); + if (!get_modinfo(&info, "description")) + error("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { From 62604063621fb075c7966286bdddcb057d883fa8 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Fri, 14 Mar 2025 13:10:53 +0000 Subject: [PATCH 1608/2157] kbuild: deb-pkg: don't set KBUILD_BUILD_VERSION unconditionally In ThinPro, we use the convention +hp for the kernel package. This does not have a dash in the name or version. This is built by editing ".version" before a build, and setting EXTRAVERSION="+hp" and KDEB_PKGVERSION make variables: echo 68 > .version make -j EXTRAVERSION="+hp" bindeb-pkg KDEB_PKGVERSION=6.12.2+hp69 .deb name: linux-image-6.12.2+hp_6.12.2+hp69_amd64.deb Since commit 7d4f07d5cb71 ("kbuild: deb-pkg: squash scripts/package/deb-build-option to debian/rules"), this no longer works. The deb build logic changed, even though, the commit message implies that the logic should be unmodified. Before, KBUILD_BUILD_VERSION was not set if the KDEB_PKGVERSION did not contain a dash. After the change KBUILD_BUILD_VERSION is always set to KDEB_PKGVERSION. Since this determines UTS_VERSION, the uname output to look off: (now) uname -a: version 6.12.2+hp ... #6.12.2+hp69 (expected) uname -a: version 6.12.2+hp ... #69 Update the debian/rules logic to restore the original behavior. Fixes: 7d4f07d5cb71 ("kbuild: deb-pkg: squash scripts/package/deb-build-option to debian/rules") Signed-off-by: Alexandru Gagniuc Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/package/debian/rules | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules index 33bfd00974b3..a417a7f8bbc1 100755 --- a/scripts/package/debian/rules +++ b/scripts/package/debian/rules @@ -21,9 +21,11 @@ ifeq ($(origin KBUILD_VERBOSE),undefined) endif endif -revision = $(lastword $(subst -, ,$(shell dpkg-parsechangelog -S Version))) +revision = $(shell dpkg-parsechangelog -S Version | sed -n 's/.*-//p') CROSS_COMPILE ?= $(filter-out $(DEB_BUILD_GNU_TYPE)-, $(DEB_HOST_GNU_TYPE)-) -make-opts = ARCH=$(ARCH) KERNELRELEASE=$(KERNELRELEASE) KBUILD_BUILD_VERSION=$(revision) $(addprefix CROSS_COMPILE=,$(CROSS_COMPILE)) +make-opts = ARCH=$(ARCH) KERNELRELEASE=$(KERNELRELEASE) \ + $(addprefix KBUILD_BUILD_VERSION=,$(revision)) \ + $(addprefix CROSS_COMPILE=,$(CROSS_COMPILE)) binary-targets := $(addprefix binary-, image image-dbg headers libc-dev) From 8bdd53e066012bed431667393676d1b5e8cce153 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Mar 2025 00:15:20 +0900 Subject: [PATCH 1609/2157] kbuild: pacman-pkg: hardcode module installation path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'make pacman-pkg' for architectures with device tree support (i.e., arm, arm64, etc.) shows logs like follows: Installing dtbs... INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr//lib/modules/6.14.0-rc6+/dtb/actions/s700-cubieboard7.dtb INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr//lib/modules/6.14.0-rc6+/dtb/actions/s900-bubblegum-96.dtb INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr//lib/modules/6.14.0-rc6+/dtb/airoha/en7581-evb.dtb ... The double slashes ('//') between 'usr' and 'lib' are somewhat ugly. Let's hardcode the module installation path because the package contents should remain unaffected even if ${MODLIB} is overridden. Please note that scripts/packages/{builddeb,kernel.spec} also hardcode the module installation path. With this change, the log will look better, as follows: Installing dtbs... INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr/lib/modules/6.14.0-rc6+/dtb/actions/s700-cubieboard7.dtb INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr/lib/modules/6.14.0-rc6+/dtb/actions/s900-bubblegum-96.dtb INSTALL /home/masahiro/linux/pacman/linux-upstream/pkg/linux-upstream/usr/lib/modules/6.14.0-rc6+/dtb/airoha/en7581-evb.dtb ... Signed-off-by: Masahiro Yamada Acked-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor --- scripts/package/PKGBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index 0cf3a55b05e1..452374d63c24 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -53,7 +53,7 @@ build() { _package() { pkgdesc="The ${pkgdesc} kernel and modules" - local modulesdir="${pkgdir}/usr/${MODLIB}" + local modulesdir="${pkgdir}/usr/lib/modules/${KERNELRELEASE}" _prologue @@ -81,7 +81,7 @@ _package() { _package-headers() { pkgdesc="Headers and scripts for building modules for the ${pkgdesc} kernel" - local builddir="${pkgdir}/usr/${MODLIB}/build" + local builddir="${pkgdir}/usr/lib/modules/${KERNELRELEASE}/build" _prologue @@ -114,7 +114,7 @@ _package-debug(){ pkgdesc="Non-stripped vmlinux file for the ${pkgdesc} kernel" local debugdir="${pkgdir}/usr/src/debug/${pkgbase}" - local builddir="${pkgdir}/usr/${MODLIB}/build" + local builddir="${pkgdir}/usr/lib/modules/${KERNELRELEASE}/build" _prologue From 2c8725c1dca3de043670b38592b1b43105322496 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 15 Mar 2025 20:40:45 +0100 Subject: [PATCH 1610/2157] rust: kbuild: skip `--remap-path-prefix` for `rustdoc` `rustdoc` only recognizes `--remap-path-prefix` starting with Rust 1.81.0, which is later than on minimum, so we cannot pass it unconditionally. Otherwise, we get: error: Unrecognized option: 'remap-path-prefix' Note that `rustc` (the compiler) does recognize the flag since a long time ago (1.26.0). Moreover, `rustdoc` since Rust 1.82.0 ICEs in out-of-tree builds when using `--remap-path-prefix`. The issue has been reduced and reported upstream [1]. Thus workaround both issues by simply skipping the flag when generating the docs -- it is not critical there anyway. The ICE does not reproduce under `--test`, but we still need to skip the flag as well for `RUSTDOC TK` since it is not recognized. Fixes: dbdffaf50ff9 ("kbuild, rust: use -fremap-path-prefix to make paths relative") Link: https://github.com/rust-lang/rust/issues/138520 [1] Signed-off-by: Miguel Ojeda Reviewed-by: Tamir Duberstein Signed-off-by: Masahiro Yamada --- rust/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index ea3849eb78f6..089473a89d46 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -57,10 +57,14 @@ endif core-cfgs = \ --cfg no_fp_fmt_parse +# `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only +# since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust +# 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both +# issues skipping the flag. The former also applies to `RUSTDOC TK`. quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< cmd_rustdoc = \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) $(filter-out $(skip_flags),$(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ + $(RUSTDOC) $(filter-out $(skip_flags) --remap-path-prefix=%,$(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ $(rustc_target_flags) -L$(objtree)/$(obj) \ -Zunstable-options --generate-link-to-definition \ --output $(rustdoc_output) \ @@ -171,7 +175,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< rm -rf $(objtree)/$(obj)/test/doctests/kernel; \ mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) --test $(rust_flags) \ + $(RUSTDOC) --test $(filter-out --remap-path-prefix=%,$(rust_flags)) \ -L$(objtree)/$(obj) --extern ffi --extern kernel \ --extern build_error --extern macros \ --extern bindings --extern uapi \ From 34ceb69edd6cb9581978e0f3e459da4959c0c387 Mon Sep 17 00:00:00 2001 From: Tuomas Ahola Date: Sat, 22 Mar 2025 17:36:39 +0200 Subject: [PATCH 1611/2157] Documentation/fs/9p: fix broken link In b529c06f9dc7 (Update the documentation referencing Plan 9 from User Space., 2020-04-26), another instance of the link was left unfixed. Fix that as well. Signed-off-by: Tuomas Ahola Message-ID: <20250322153639.4917-1-taahol@utu.fi> Signed-off-by: Dominique Martinet --- Documentation/filesystems/9p.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst index 2bbf68b56b0d..28871200e87c 100644 --- a/Documentation/filesystems/9p.rst +++ b/Documentation/filesystems/9p.rst @@ -40,7 +40,7 @@ For remote file server:: mount -t 9p 10.10.1.2 /mnt/9 -For Plan 9 From User Space applications (http://swtch.com/plan9):: +For Plan 9 From User Space applications (https://9fans.github.io/plan9port/):: mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER From ad2e2a77dcc7912c737a07fe061d93c414c6745e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Mar 2025 10:52:00 -0400 Subject: [PATCH 1612/2157] 9p: Use hashtable.h for hash_errmap Convert hash_errmap in error.c to use the generic hashtable implementation from hashtable.h instead of the manual hlist_head array implementation. This simplifies the code and makes it more maintainable by using the standard hashtable API and removes the need for manual hash table management. Signed-off-by: Sasha Levin Message-ID: <20250320145200.3124863-1-sashal@kernel.org> Signed-off-by: Dominique Martinet --- net/9p/error.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/9p/error.c b/net/9p/error.c index 8da744494b68..8ba8afc91482 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /** @@ -33,8 +34,8 @@ struct errormap { struct hlist_node list; }; -#define ERRHASHSZ 32 -static struct hlist_head hash_errmap[ERRHASHSZ]; +#define ERRHASH_BITS 5 +static DEFINE_HASHTABLE(hash_errmap, ERRHASH_BITS); /* FixMe - reduce to a reasonable size */ static struct errormap errmap[] = { @@ -176,18 +177,14 @@ static struct errormap errmap[] = { int p9_error_init(void) { struct errormap *c; - int bucket; - - /* initialize hash table */ - for (bucket = 0; bucket < ERRHASHSZ; bucket++) - INIT_HLIST_HEAD(&hash_errmap[bucket]); + u32 hash; /* load initial error map into hash table */ for (c = errmap; c->name; c++) { c->namelen = strlen(c->name); - bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; + hash = jhash(c->name, c->namelen, 0); INIT_HLIST_NODE(&c->list); - hlist_add_head(&c->list, &hash_errmap[bucket]); + hash_add(hash_errmap, &c->list, hash); } return 1; @@ -205,12 +202,12 @@ int p9_errstr2errno(char *errstr, int len) { int errno; struct errormap *c; - int bucket; + u32 hash; errno = 0; c = NULL; - bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, &hash_errmap[bucket], list) { + hash = jhash(errstr, len, 0); + hash_for_each_possible(hash_errmap, c, list, hash) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; From 9f8fe348ac9544f6855f82565e754bf085d81f88 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Mon, 24 Mar 2025 10:10:51 +0800 Subject: [PATCH 1613/2157] tty: serial: fsl_lpuart: Fix unused variable 'sport' build warning Remove the unused variable 'sport' to avoid the kernel build warning. Fixes: 3cc16ae096f1 ("tty: serial: fsl_lpuart: use port struct directly to simply code") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503210614.2qGlnbIq-lkp@intel.com/ Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20250324021051.162676-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 33eeefa6fa8f..4470966b826c 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -639,8 +639,6 @@ static void lpuart32_wait_bit_set(struct uart_port *port, unsigned int offset, static int lpuart_poll_init(struct uart_port *port) { - struct lpuart_port *sport = container_of(port, - struct lpuart_port, port); unsigned long flags; u8 fifo; @@ -693,7 +691,6 @@ static int lpuart_poll_get_char(struct uart_port *port) static int lpuart32_poll_init(struct uart_port *port) { unsigned long flags; - struct lpuart_port *sport = container_of(port, struct lpuart_port, port); u32 fifo; port->fifosize = 0; From d43929ef65a60b4c44a5f85cdce826c4e33a67d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Mar 2025 08:18:16 +0100 Subject: [PATCH 1614/2157] dm-delay: support zoned devices Add support for zoned device by passing through report_zoned to the underlying read device. This is required to make enable xfstests xfs/311 on zoned devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- drivers/md/dm-delay.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 08f6387620c1..d4cf0ac2a7aa 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -369,6 +369,21 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, c, bio); } +#ifdef CONFIG_BLK_DEV_ZONED +static int delay_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) +{ + struct delay_c *dc = ti->private; + struct delay_class *c = &dc->read; + + return dm_report_zones(c->dev->bdev, c->start, + c->start + dm_target_offset(ti, args->next_sector), + args, nr_zones); +} +#else +#define delay_report_zones NULL +#endif + #define DMEMIT_DELAY_CLASS(c) \ DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) @@ -424,11 +439,12 @@ static int delay_iterate_devices(struct dm_target *ti, static struct target_type delay_target = { .name = "delay", .version = {1, 4, 0}, - .features = DM_TARGET_PASSES_INTEGRITY, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, .map = delay_map, + .report_zones = delay_report_zones, .presuspend = delay_presuspend, .resume = delay_resume, .status = delay_status, From ef753d66051ca03bee1982ce047f9eaf90f81ab4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:51 -0700 Subject: [PATCH 1615/2157] objtool: Fix detection of consecutive jump tables on Clang 20 The jump table detection code assumes jump tables are in the same order as their corresponding indirect branches. That's apparently not always true with Clang 20. Fix that by changing how multiple jump tables are detected. In the first detection pass, mark the beginning of each jump table so the second pass can tell where one ends and the next one begins. Fixes the following warnings: vmlinux.o: warning: objtool: SiS_GetCRT2Ptr+0x1ad: stack state mismatch: cfa1=4+8 cfa2=5+16 sound/core/seq/snd-seq.o: warning: objtool: cc_ev_to_ump_midi2+0x589: return with modified stack frame Fixes: be2f0b1e1264 ("objtool: Get rid of reloc->jump_table_start") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Linus Torvalds Link: https://lore.kernel.org/r/141752fff614eab962dba6bdfaa54aa67ff03bba.1742852846.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503171547.LlCTJLQL-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202503200535.J3hAvcjw-lkp@intel.com/ --- tools/objtool/check.c | 26 ++++++++------------------ tools/objtool/elf.c | 6 +++--- tools/objtool/include/objtool/elf.h | 27 ++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ca3435acc326..dae17ed4a5d7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1941,8 +1941,7 @@ __weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct relo return reloc->sym->offset + reloc_addend(reloc); } -static int add_jump_table(struct objtool_file *file, struct instruction *insn, - struct reloc *next_table) +static int add_jump_table(struct objtool_file *file, struct instruction *insn) { unsigned long table_size = insn_jump_table_size(insn); struct symbol *pfunc = insn_func(insn)->pfunc; @@ -1962,7 +1961,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, /* Check for the end of the table: */ if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size) break; - if (reloc != table && reloc == next_table) + if (reloc != table && is_jump_table(reloc)) break; /* Make sure the table entries are consecutive: */ @@ -2053,8 +2052,10 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func, if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; + set_jump_table(table_reloc); orig_insn->_jump_table = table_reloc; orig_insn->_jump_table_size = table_size; + break; } } @@ -2096,31 +2097,20 @@ static void mark_func_jump_tables(struct objtool_file *file, static int add_func_jump_tables(struct objtool_file *file, struct symbol *func) { - struct instruction *insn, *insn_t1 = NULL, *insn_t2; - int ret = 0; + struct instruction *insn; + int ret; func_for_each_insn(file, func, insn) { if (!insn_jump_table(insn)) continue; - if (!insn_t1) { - insn_t1 = insn; - continue; - } - insn_t2 = insn; - - ret = add_jump_table(file, insn_t1, insn_jump_table(insn_t2)); + ret = add_jump_table(file, insn); if (ret) return ret; - - insn_t1 = insn_t2; } - if (insn_t1) - ret = add_jump_table(file, insn_t1, NULL); - - return ret; + return 0; } /* diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index be4f4b62730c..0f38167bd840 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -583,7 +583,7 @@ static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym) { struct reloc *reloc; - for (reloc = sym->relocs; reloc; reloc = reloc->sym_next_reloc) + for (reloc = sym->relocs; reloc; reloc = sym_next_reloc(reloc)) set_reloc_sym(elf, reloc, reloc->sym->idx); return 0; @@ -880,7 +880,7 @@ static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec, set_reloc_addend(elf, reloc, addend); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); - reloc->sym_next_reloc = sym->relocs; + set_sym_next_reloc(reloc, sym->relocs); sym->relocs = reloc; return reloc; @@ -979,7 +979,7 @@ static int read_relocs(struct elf *elf) } elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); - reloc->sym_next_reloc = sym->relocs; + set_sym_next_reloc(reloc, sym->relocs); sym->relocs = reloc; nr_reloc++; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 223ac1c24b90..4edc957a6f6b 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -77,7 +77,7 @@ struct reloc { struct elf_hash_node hash; struct section *sec; struct symbol *sym; - struct reloc *sym_next_reloc; + unsigned long _sym_next_reloc; }; struct elf { @@ -297,6 +297,31 @@ static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned mark_sec_changed(elf, reloc->sec, true); } +#define RELOC_JUMP_TABLE_BIT 1UL + +/* Does reloc mark the beginning of a jump table? */ +static inline bool is_jump_table(struct reloc *reloc) +{ + return reloc->_sym_next_reloc & RELOC_JUMP_TABLE_BIT; +} + +static inline void set_jump_table(struct reloc *reloc) +{ + reloc->_sym_next_reloc |= RELOC_JUMP_TABLE_BIT; +} + +static inline struct reloc *sym_next_reloc(struct reloc *reloc) +{ + return (struct reloc *)(reloc->_sym_next_reloc & ~RELOC_JUMP_TABLE_BIT); +} + +static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next) +{ + unsigned long bit = reloc->_sym_next_reloc & RELOC_JUMP_TABLE_BIT; + + reloc->_sym_next_reloc = (unsigned long)next | bit; +} + #define for_each_sec(file, sec) \ list_for_each_entry(sec, &file->elf->sections, list) From eeff7ac61526a4a9c3916cf46885622078ad886b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:52 -0700 Subject: [PATCH 1616/2157] objtool: Warn when disabling unreachable warnings Print a warning when disabling the unreachable warnings (due to a GCC bug). This will help determine if recent GCCs still have the issue and alert us if any other issues might be silently lurking behind the unreachable disablement. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/df243063787596e6031367e6659e7e43409d6c6d.1742852846.git.jpoimboe@kernel.org --- tools/objtool/arch/x86/special.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 9c1c9df09aaa..5f46d4e7f7f8 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -3,6 +3,7 @@ #include #include +#include #define X86_FEATURE_POPCNT (4 * 32 + 23) #define X86_FEATURE_SMAP (9 * 32 + 20) @@ -156,8 +157,10 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, * indicates a rare GCC quirk/bug which can leave dead * code behind. */ - if (reloc_type(text_reloc) == R_X86_64_PC32) + if (reloc_type(text_reloc) == R_X86_64_PC32) { + WARN_INSN(insn, "ignoring unreachables due to jump table quirk"); file->ignore_unreachables = true; + } *table_size = 0; return rodata_reloc; From c84301d706c5456b1460439b2987a0f0b6362a82 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:53 -0700 Subject: [PATCH 1617/2157] objtool: Ignore entire functions rather than instructions STACK_FRAME_NON_STANDARD applies to functions. Use a function-specific ignore attribute in preparation for getting rid of insn->ignore. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/4af13376567f83331a9372ae2bb25e11a3d0f055.1742852846.git.jpoimboe@kernel.org --- tools/objtool/check.c | 35 +++++++++++++++-------------- tools/objtool/include/objtool/elf.h | 1 + 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index dae17ed4a5d7..f4650205854d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -981,7 +981,6 @@ static int create_direct_call_sections(struct objtool_file *file) */ static void add_ignores(struct objtool_file *file) { - struct instruction *insn; struct section *rsec; struct symbol *func; struct reloc *reloc; @@ -1008,8 +1007,7 @@ static void add_ignores(struct objtool_file *file) continue; } - func_for_each_insn(file, func, insn) - insn->ignore = true; + func->ignore = true; } } @@ -1612,6 +1610,7 @@ static int add_call_destinations(struct objtool_file *file) struct reloc *reloc; for_each_insn(file, insn) { + struct symbol *func = insn_func(insn); if (insn->type != INSN_CALL) continue; @@ -1622,7 +1621,7 @@ static int add_call_destinations(struct objtool_file *file) add_call_dest(file, insn, dest, false); - if (insn->ignore) + if (func && func->ignore) continue; if (!insn_call_dest(insn)) { @@ -1630,7 +1629,7 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) { + if (func && insn_call_dest(insn)->type != STT_FUNC) { WARN_INSN(insn, "unsupported call to non-function"); return -1; } @@ -3470,6 +3469,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, u8 visited; int ret; + if (func && func->ignore) + return 0; + sec = insn->sec; while (1) { @@ -3715,7 +3717,7 @@ static int validate_unwind_hint(struct objtool_file *file, struct instruction *insn, struct insn_state *state) { - if (insn->hint && !insn->visited && !insn->ignore) { + if (insn->hint && !insn->visited) { int ret = validate_branch(file, insn_func(insn), insn, *state); if (ret) BT_INSN(insn, "<=== (hint)"); @@ -3929,10 +3931,11 @@ static bool is_ubsan_insn(struct instruction *insn) static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn) { - int i; + struct symbol *func = insn_func(insn); struct instruction *prev_insn; + int i; - if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) + if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore)) return true; /* @@ -3951,7 +3954,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * In this case we'll find a piece of code (whole function) that is not * covered by a !section symbol. Ignore them. */ - if (opts.link && !insn_func(insn)) { + if (opts.link && !func) { int size = find_symbol_hole_containing(insn->sec, insn->offset); unsigned long end = insn->offset + size; @@ -3977,19 +3980,17 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio */ if (insn->jump_dest && insn_func(insn->jump_dest) && strstr(insn_func(insn->jump_dest)->name, ".cold")) { - struct instruction *dest = insn->jump_dest; - func_for_each_insn(file, insn_func(dest), dest) - dest->ignore = true; + insn_func(insn->jump_dest)->ignore = true; } } return false; } - if (!insn_func(insn)) + if (!func) return false; - if (insn_func(insn)->static_call_tramp) + if (func->static_call_tramp) return true; /* @@ -4020,7 +4021,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio if (insn->type == INSN_JUMP_UNCONDITIONAL) { if (insn->jump_dest && - insn_func(insn->jump_dest) == insn_func(insn)) { + insn_func(insn->jump_dest) == func) { insn = insn->jump_dest; continue; } @@ -4028,7 +4029,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio break; } - if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len) + if (insn->offset + insn->len >= func->offset + func->len) break; insn = next_insn_same_sec(file, insn); @@ -4120,7 +4121,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, return 0; insn = find_insn(file, sec, sym->offset); - if (!insn || insn->ignore || insn->visited) + if (!insn || insn->visited) return 0; state->uaccess = sym->uaccess_safe; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 4edc957a6f6b..eba04392c6fd 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -68,6 +68,7 @@ struct symbol { u8 embedded_insn : 1; u8 local_label : 1; u8 frame_pointer : 1; + u8 ignore : 1; u8 warnings : 2; struct list_head pv_target; struct reloc *relocs; From 1154bbd326de4453858cf78cf29420888b3ffd52 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:54 -0700 Subject: [PATCH 1618/2157] objtool: Fix X86_FEATURE_SMAP alternative handling For X86_FEATURE_SMAP alternatives which replace NOP with STAC or CLAC, uaccess validation skips the NOP branch to avoid following impossible code paths, e.g. where a STAC would be patched but a CLAC wouldn't. However, it's not safe to assume an X86_FEATURE_SMAP alternative is patching STAC/CLAC. There can be other alternatives, like static_cpu_has(), where both branches need to be validated. Fix that by repurposing ANNOTATE_IGNORE_ALTERNATIVE for skipping either original instructions or new ones. This is a more generic approach which enables the removal of the feature checking hacks and the insn->ignore bit. Fixes the following warnings: arch/x86/mm/fault.o: warning: objtool: do_user_addr_fault+0x8ec: __stack_chk_fail() missing __noreturn in .c/.h or NORETURN() in noreturns.h arch/x86/mm/fault.o: warning: objtool: do_user_addr_fault+0x8f1: unreachable instruction [ mingo: Fix up conflicts with recent x86 changes. ] Fixes: ea24213d8088 ("objtool: Add UACCESS validation") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/de0621ca242130156a55d5d74fed86994dfa4c9c.1742852846.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503181736.zkZUBv4N-lkp@intel.com/ --- arch/x86/include/asm/arch_hweight.h | 6 ++-- arch/x86/include/asm/smap.h | 23 ++++++++++----- arch/x86/include/asm/xen/hypercall.h | 6 ++-- tools/objtool/arch/x86/special.c | 33 +-------------------- tools/objtool/check.c | 39 +++++++------------------ tools/objtool/include/objtool/check.h | 3 +- tools/objtool/include/objtool/special.h | 4 +-- tools/objtool/special.c | 12 ++------ 8 files changed, 37 insertions(+), 89 deletions(-) diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index b5982b94bdba..cbc6157f0b4b 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -16,7 +16,8 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm_inline (ALTERNATIVE("call __sw_hweight32", + asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "call __sw_hweight32", "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); @@ -45,7 +46,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm_inline (ALTERNATIVE("call __sw_hweight64", + asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "call __sw_hweight64", "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index daea94c2993c..55a5e656e4b9 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -16,23 +16,23 @@ #ifdef __ASSEMBLER__ #define ASM_CLAC \ - ALTERNATIVE "", "clac", X86_FEATURE_SMAP + ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "clac", X86_FEATURE_SMAP #define ASM_STAC \ - ALTERNATIVE "", "stac", X86_FEATURE_SMAP + ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ - alternative("", "clac", X86_FEATURE_SMAP); + alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ - alternative("", "stac", X86_FEATURE_SMAP); + alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP); } static __always_inline unsigned long smap_save(void) @@ -40,7 +40,8 @@ static __always_inline unsigned long smap_save(void) unsigned long flags; asm volatile ("# smap_save\n\t" - ALTERNATIVE("", "pushf; pop %0; " "clac" "\n\t", + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "", "pushf; pop %0; clac", X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); @@ -50,16 +51,22 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" - ALTERNATIVE("", "push %0; popf\n\t", + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "", "push %0; popf", X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE("", "clac", X86_FEATURE_SMAP) + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE("", "stac", X86_FEATURE_SMAP) + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP) + +#define ASM_CLAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP) +#define ASM_STAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "stac", X86_FEATURE_SMAP) #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 97771b9d33af..59a62c3780a2 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -231,14 +231,12 @@ static __always_inline void __xen_stac(void) * Suppress objtool seeing the STAC/CLAC and getting confused about it * calling random code with AC=1. */ - asm volatile(ANNOTATE_IGNORE_ALTERNATIVE - ASM_STAC ::: "memory", "flags"); + asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags"); } static __always_inline void __xen_clac(void) { - asm volatile(ANNOTATE_IGNORE_ALTERNATIVE - ASM_CLAC ::: "memory", "flags"); + asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags"); } static inline long diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 5f46d4e7f7f8..403e587676f1 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -5,10 +5,7 @@ #include #include -#define X86_FEATURE_POPCNT (4 * 32 + 23) -#define X86_FEATURE_SMAP (9 * 32 + 20) - -void arch_handle_alternative(unsigned short feature, struct special_alt *alt) +void arch_handle_alternative(struct special_alt *alt) { static struct special_alt *group, *prev; @@ -32,34 +29,6 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt) } else group = alt; prev = alt; - - switch (feature) { - case X86_FEATURE_SMAP: - /* - * If UACCESS validation is enabled; force that alternative; - * otherwise force it the other way. - * - * What we want to avoid is having both the original and the - * alternative code flow at the same time, in that case we can - * find paths that see the STAC but take the NOP instead of - * CLAC and the other way around. - */ - if (opts.uaccess) - alt->skip_orig = true; - else - alt->skip_alt = true; - break; - case X86_FEATURE_POPCNT: - /* - * It has been requested that we don't validate the !POPCNT - * feature path which is a "very very small percentage of - * machines". - */ - alt->skip_orig = true; - break; - default: - break; - } } bool arch_support_alt_relocation(struct special_alt *special_alt, diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f4650205854d..b2f6a7ff77b5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -25,7 +25,6 @@ struct alternative { struct alternative *next; struct instruction *insn; - bool skip_orig; }; static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; @@ -1696,6 +1695,7 @@ static int handle_group_alt(struct objtool_file *file, orig_alt_group->first_insn = orig_insn; orig_alt_group->last_insn = last_orig_insn; orig_alt_group->nop = NULL; + orig_alt_group->ignore = orig_insn->ignore_alts; } else { if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len - orig_alt_group->first_insn->offset != special_alt->orig_len) { @@ -1735,7 +1735,6 @@ static int handle_group_alt(struct objtool_file *file, nop->type = INSN_NOP; nop->sym = orig_insn->sym; nop->alt_group = new_alt_group; - nop->ignore = orig_insn->ignore_alts; } if (!special_alt->new_len) { @@ -1752,7 +1751,6 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; - insn->ignore = orig_insn->ignore_alts; insn->sym = orig_insn->sym; insn->alt_group = new_alt_group; @@ -1799,6 +1797,7 @@ static int handle_group_alt(struct objtool_file *file, new_alt_group->first_insn = *new_insn; new_alt_group->last_insn = last_new_insn; new_alt_group->nop = nop; + new_alt_group->ignore = (*new_insn)->ignore_alts; new_alt_group->cfi = orig_alt_group->cfi; return 0; } @@ -1916,8 +1915,6 @@ static int add_special_section_alts(struct objtool_file *file) } alt->insn = new_insn; - alt->skip_orig = special_alt->skip_orig; - orig_insn->ignore_alts |= special_alt->skip_alt; alt->next = orig_insn->alts; orig_insn->alts = alt; @@ -2295,6 +2292,8 @@ static int read_annotate(struct objtool_file *file, static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn) { switch (type) { + + /* Must be before add_special_section_alts() */ case ANNOTYPE_IGNORE_ALTS: insn->ignore_alts = true; break; @@ -3488,11 +3487,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - if (func && insn->ignore) { - WARN_INSN(insn, "BUG: why am I validating an ignored function?"); - return 1; - } - visited = VISITED_BRANCH << state.uaccess; if (insn->visited & VISITED_BRANCH_MASK) { if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) @@ -3564,24 +3558,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (propagate_alt_cfi(file, insn)) return 1; - if (!insn->ignore_alts && insn->alts) { - bool skip_orig = false; - + if (insn->alts) { for (alt = insn->alts; alt; alt = alt->next) { - if (alt->skip_orig) - skip_orig = true; - ret = validate_branch(file, func, alt->insn, state); if (ret) { BT_INSN(insn, "(alt)"); return ret; } } - - if (skip_orig) - return 0; } + if (insn->alt_group && insn->alt_group->ignore) + return 0; + if (handle_insn_ops(insn, next_insn, &state)) return 1; @@ -3768,23 +3757,15 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) insn->visited |= VISITED_UNRET; - if (!insn->ignore_alts && insn->alts) { + if (insn->alts) { struct alternative *alt; - bool skip_orig = false; - for (alt = insn->alts; alt; alt = alt->next) { - if (alt->skip_orig) - skip_orig = true; - ret = validate_unret(file, alt->insn); if (ret) { BT_INSN(insn, "(alt)"); return ret; } } - - if (skip_orig) - return 0; } switch (insn->type) { @@ -3935,7 +3916,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio struct instruction *prev_insn; int i; - if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore)) + if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore)) return true; /* diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index e1cd13cd28a3..00fb745e7233 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -34,6 +34,8 @@ struct alt_group { * This is shared with the other alt_groups in the same alternative. */ struct cfi_state **cfi; + + bool ignore; }; #define INSN_CHUNK_BITS 8 @@ -54,7 +56,6 @@ struct instruction { u32 idx : INSN_CHUNK_BITS, dead_end : 1, - ignore : 1, ignore_alts : 1, hint : 1, save : 1, diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index e049679bb17b..72d09c0adf1a 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -16,8 +16,6 @@ struct special_alt { struct list_head list; bool group; - bool skip_orig; - bool skip_alt; bool jump_or_nop; u8 key_addend; @@ -32,7 +30,7 @@ struct special_alt { int special_get_alts(struct elf *elf, struct list_head *alts); -void arch_handle_alternative(unsigned short feature, struct special_alt *alt); +void arch_handle_alternative(struct special_alt *alt); bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 097a69db82a0..6cd7b1be5331 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -54,7 +54,7 @@ static const struct special_entry entries[] = { {}, }; -void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt) +void __weak arch_handle_alternative(struct special_alt *alt) { } @@ -92,15 +92,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); - if (entry->feature) { - unsigned short feature; - - feature = bswap_if_needed(elf, - *(unsigned short *)(sec->data->d_buf + - offset + - entry->feature)); - arch_handle_alternative(feature, alt); - } + arch_handle_alternative(alt); if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); From 4759670bc3e670069c055c2b33174813099fea4f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:55 -0700 Subject: [PATCH 1619/2157] objtool: Fix CONFIG_OBJTOOL_WERROR for vmlinux.o With (!X86_KERNEL_IBT && !LTO_CLANG && NOINSTR_VALIDATION), objtool runs on both translation units and vmlinux.o. With CONFIG_OBJTOOL_WERROR, the TUs get --Werror but vmlinux.o doesn't. Fix that. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/4f71ab9b947ffc47b6a87dd3b9aff4bb32b36d0a.1742852846.git.jpoimboe@kernel.org --- scripts/Makefile.vmlinux_o | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0b6e2ebf60dc..f476f5605029 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -30,12 +30,20 @@ endif # objtool for vmlinux.o # --------------------------------------------------------------------------- # -# For LTO and IBT, objtool doesn't run on individual translation units. -# Run everything on vmlinux instead. +# For delay-objtool (IBT or LTO), objtool doesn't run on individual translation +# units. Instead it runs on vmlinux.o. +# +# For !delay-objtool + CONFIG_NOINSTR_VALIDATION, it runs on both translation +# units and vmlinux.o, with the latter only used for noinstr/unret validation. objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) -vmlinux-objtool-args-$(delay-objtool) += $(objtool-args-y) +ifeq ($(delay-objtool),y) +vmlinux-objtool-args-y += $(objtool-args-y) +else +vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror +endif + vmlinux-objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_MITIGATION_SRSO)), --unret) From 4fab2d7628dd38f3fa8a5e615199350ecaeb17a8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:56 -0700 Subject: [PATCH 1620/2157] objtool: Fix init_module() handling If IBT is enabled and a module uses the deprecated init_module() magic function name rather than module_init(fn), its ENDBR will get removed, causing an IBT failure during module load. Objtool does print an obscure warning, but then does nothing to either correct it or return an error. Improve the usefulness of the warning and return an error so it will at least fail the build with CONFIG_OBJTOOL_WERROR. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/366bfdbe92736cde9fb01d5d3eb9b98e9070a1ec.1742852846.git.jpoimboe@kernel.org --- tools/objtool/check.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b2f6a7ff77b5..2f7aff1c367d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -828,8 +828,11 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) if (opts.module && sym && sym->type == STT_FUNC && insn->offset == sym->offset && (!strcmp(sym->name, "init_module") || - !strcmp(sym->name, "cleanup_module"))) - WARN("%s(): not an indirect call target", sym->name); + !strcmp(sym->name, "cleanup_module"))) { + WARN("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead", + sym->name); + return -1; + } if (!elf_init_reloc_text_sym(file->elf, sec, idx * sizeof(int), idx, From 6b023c7842048c4bbeede802f3cf36b96c7a8b25 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:57 -0700 Subject: [PATCH 1621/2157] objtool: Silence more KCOV warnings In the past there were issues with KCOV triggering unreachable instruction warnings, which is why unreachable warnings are now disabled with CONFIG_KCOV. Now some new KCOV warnings are showing up with GCC 14: vmlinux.o: warning: objtool: cpuset_write_resmask() falls through to next function cpuset_update_active_cpus.cold() drivers/usb/core/driver.o: error: objtool: usb_deregister() falls through to next function usb_match_device() sound/soc/codecs/snd-soc-wcd934x.o: warning: objtool: .text.wcd934x_slim_irq_handler: unexpected end of section All are caused by GCC KCOV not finishing an optimization, leaving behind a never-taken conditional branch to a basic block which falls through to the next function (or end of section). At a high level this is similar to the unreachable warnings mentioned above, in that KCOV isn't fully removing dead code. Treat it the same way by adding these to the list of warnings to ignore with CONFIG_KCOV. Reported-by: Ingo Molnar Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/66a61a0b65d74e072d3dc02384e395edb2adc3c5.1742852846.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/Z9iTsI09AEBlxlHC@gmail.com Closes: https://lore.kernel.org/oe-kbuild-all/202503180044.oH9gyPeg-lkp@intel.com/ --- tools/objtool/check.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2f7aff1c367d..cb66e6b16841 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3485,6 +3485,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, !strncmp(func->name, "__pfx_", 6)) return 0; + if (file->ignore_unreachables) + return 0; + WARN("%s() falls through to next function %s()", func->name, insn_func(insn)->name); return 1; @@ -3694,6 +3697,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (!next_insn) { if (state.cfi.cfa.base == CFI_UNDEFINED) return 0; + if (file->ignore_unreachables) + return 0; + WARN("%s: unexpected end of section", sec->name); return 1; } From e1a9dda74dbffbc3fa2069ff418a1876dc99fb14 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:58 -0700 Subject: [PATCH 1622/2157] objtool: Properly disable uaccess validation If opts.uaccess isn't set, the uaccess validation is disabled, but only partially: it doesn't read the uaccess_safe_builtin list but still tries to do the validation. Disable it completely to prevent false warnings. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/0e95581c1d2107fb5f59418edf2b26bba38b0cbb.1742852846.git.jpoimboe@kernel.org --- tools/objtool/check.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index cb66e6b16841..b0ef14a5b1ff 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3187,7 +3187,7 @@ static int handle_insn_ops(struct instruction *insn, if (update_cfi_state(insn, next_insn, &state->cfi, op)) return 1; - if (!insn->alt_group) + if (!opts.uaccess || !insn->alt_group) continue; if (op->dest.type == OP_DEST_PUSHF) { @@ -3647,6 +3647,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; case INSN_STAC: + if (!opts.uaccess) + break; + if (state.uaccess) { WARN_INSN(insn, "recursive UACCESS enable"); return 1; @@ -3656,6 +3659,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CLAC: + if (!opts.uaccess) + break; + if (!state.uaccess && func) { WARN_INSN(insn, "redundant UACCESS disable"); return 1; @@ -4114,7 +4120,8 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, if (!insn || insn->visited) return 0; - state->uaccess = sym->uaccess_safe; + if (opts.uaccess) + state->uaccess = sym->uaccess_safe; ret = validate_branch(file, insn_func(insn), insn, *state); if (ret) From c5995abe15476798b2e2f0163a33404c41aafc8f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:55:59 -0700 Subject: [PATCH 1623/2157] objtool: Improve error handling Fix some error handling issues, improve error messages, properly distinguish betwee errors and warnings, and generally try to make all the error handling more consistent. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/3094bb4463dad29b6bd1bea03848d1571ace771c.1742852846.git.jpoimboe@kernel.org --- tools/objtool/builtin-check.c | 37 ++- tools/objtool/check.c | 368 ++++++++++++------------ tools/objtool/elf.c | 22 +- tools/objtool/include/objtool/objtool.h | 2 +- tools/objtool/include/objtool/warn.h | 13 +- tools/objtool/objtool.c | 11 +- 6 files changed, 232 insertions(+), 221 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 5f761f420b8c..c973a751fb7d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -8,15 +8,12 @@ #include #include #include +#include #include #include #include #include - -#define ERROR(format, ...) \ - fprintf(stderr, \ - "error: objtool: " format "\n", \ - ##__VA_ARGS__) +#include const char *objname; @@ -139,22 +136,22 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]) static bool opts_valid(void) { if (opts.mnop && !opts.mcount) { - ERROR("--mnop requires --mcount"); + WARN("--mnop requires --mcount"); return false; } if (opts.noinstr && !opts.link) { - ERROR("--noinstr requires --link"); + WARN("--noinstr requires --link"); return false; } if (opts.ibt && !opts.link) { - ERROR("--ibt requires --link"); + WARN("--ibt requires --link"); return false; } if (opts.unret && !opts.link) { - ERROR("--unret requires --link"); + WARN("--unret requires --link"); return false; } @@ -171,7 +168,7 @@ static bool opts_valid(void) opts.static_call || opts.uaccess) { if (opts.dump_orc) { - ERROR("--dump can't be combined with other actions"); + WARN("--dump can't be combined with other actions"); return false; } @@ -181,7 +178,7 @@ static bool opts_valid(void) if (opts.dump_orc) return true; - ERROR("At least one action required"); + WARN("At least one action required"); return false; } @@ -194,30 +191,30 @@ static int copy_file(const char *src, const char *dst) src_fd = open(src, O_RDONLY); if (src_fd == -1) { - ERROR("can't open '%s' for reading", src); + WARN("can't open %s for reading: %s", src, strerror(errno)); return 1; } dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400); if (dst_fd == -1) { - ERROR("can't open '%s' for writing", dst); + WARN("can't open %s for writing: %s", dst, strerror(errno)); return 1; } if (fstat(src_fd, &stat) == -1) { - perror("fstat"); + WARN_GLIBC("fstat"); return 1; } if (fchmod(dst_fd, stat.st_mode) == -1) { - perror("fchmod"); + WARN_GLIBC("fchmod"); return 1; } for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) { copied = sendfile(dst_fd, src_fd, &offset, to_copy); if (copied == -1) { - perror("sendfile"); + WARN_GLIBC("sendfile"); return 1; } } @@ -233,14 +230,14 @@ static char **save_argv(int argc, const char **argv) orig_argv = calloc(argc, sizeof(char *)); if (!orig_argv) { - perror("calloc"); + WARN_GLIBC("calloc"); return NULL; } for (int i = 0; i < argc; i++) { orig_argv[i] = strdup(argv[i]); if (!orig_argv[i]) { - perror("strdup"); + WARN_GLIBC("strdup(%s)", orig_argv[i]); return NULL; } }; @@ -285,7 +282,7 @@ int objtool_run(int argc, const char **argv) goto err; if (!opts.link && has_multiple_files(file->elf)) { - ERROR("Linked object requires --link"); + WARN("Linked object requires --link"); goto err; } @@ -313,7 +310,7 @@ int objtool_run(int argc, const char **argv) */ backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1); if (!backup) { - perror("malloc"); + WARN_GLIBC("malloc"); return 1; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b0ef14a5b1ff..f4e7ee8e8fb5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -353,7 +353,7 @@ static struct cfi_state *cfi_alloc(void) { struct cfi_state *cfi = calloc(1, sizeof(struct cfi_state)); if (!cfi) { - WARN("calloc failed"); + WARN_GLIBC("calloc"); exit(1); } nr_cfi++; @@ -409,7 +409,7 @@ static void *cfi_hash_alloc(unsigned long size) PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (cfi_hash == (void *)-1L) { - WARN("mmap fail cfi_hash"); + WARN_GLIBC("mmap fail cfi_hash"); cfi_hash = NULL; } else if (opts.stats) { printf("cfi_bits: %d\n", cfi_bits); @@ -465,7 +465,7 @@ static int decode_instructions(struct objtool_file *file) if (!insns || idx == INSN_CHUNK_MAX) { insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE); if (!insns) { - WARN("malloc failed"); + WARN_GLIBC("calloc"); return -1; } idx = 0; @@ -567,14 +567,21 @@ static int add_pv_ops(struct objtool_file *file, const char *symname) if (!reloc) break; + idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long); + func = reloc->sym; if (func->type == STT_SECTION) func = find_symbol_by_offset(reloc->sym->sec, reloc_addend(reloc)); + if (!func) { + WARN_FUNC("can't find func at %s[%d]", + reloc->sym->sec, reloc_addend(reloc), + symname, idx); + return -1; + } - idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long); - - objtool_pv_add(file, idx, func); + if (objtool_pv_add(file, idx, func)) + return -1; off = reloc_offset(reloc) + 1; if (off > end) @@ -598,7 +605,7 @@ static int init_pv_ops(struct objtool_file *file) }; const char *pv_ops; struct symbol *sym; - int idx, nr; + int idx, nr, ret; if (!opts.noinstr) return 0; @@ -611,14 +618,19 @@ static int init_pv_ops(struct objtool_file *file) nr = sym->len / sizeof(unsigned long); file->pv_ops = calloc(sizeof(struct pv_state), nr); - if (!file->pv_ops) + if (!file->pv_ops) { + WARN_GLIBC("calloc"); return -1; + } for (idx = 0; idx < nr; idx++) INIT_LIST_HEAD(&file->pv_ops[idx].targets); - for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) - add_pv_ops(file, pv_ops); + for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) { + ret = add_pv_ops(file, pv_ops); + if (ret) + return ret; + } return 0; } @@ -666,13 +678,12 @@ static int create_static_call_sections(struct objtool_file *file) /* find key symbol */ key_name = strdup(insn_call_dest(insn)->name); if (!key_name) { - perror("strdup"); + WARN_GLIBC("strdup"); return -1; } if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, STATIC_CALL_TRAMP_PREFIX_LEN)) { WARN("static_call: trampoline name malformed: %s", key_name); - free(key_name); return -1; } tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; @@ -682,7 +693,6 @@ static int create_static_call_sections(struct objtool_file *file) if (!key_sym) { if (!opts.module) { WARN("static_call: can't find static_call_key symbol: %s", tmp); - free(key_name); return -1; } @@ -697,7 +707,6 @@ static int create_static_call_sections(struct objtool_file *file) */ key_sym = insn_call_dest(insn); } - free(key_name); /* populate reloc for 'key' */ if (!elf_init_reloc_data_sym(file->elf, sec, @@ -981,7 +990,7 @@ static int create_direct_call_sections(struct objtool_file *file) /* * Warnings shouldn't be reported for ignored functions. */ -static void add_ignores(struct objtool_file *file) +static int add_ignores(struct objtool_file *file) { struct section *rsec; struct symbol *func; @@ -989,7 +998,7 @@ static void add_ignores(struct objtool_file *file) rsec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard"); if (!rsec) - return; + return 0; for_each_reloc(rsec, reloc) { switch (reloc->sym->type) { @@ -1006,11 +1015,13 @@ static void add_ignores(struct objtool_file *file) default: WARN("unexpected relocation symbol type in %s: %d", rsec->name, reloc->sym->type); - continue; + return -1; } func->ignore = true; } + + return 0; } /* @@ -1275,7 +1286,7 @@ static void remove_insn_ops(struct instruction *insn) insn->stack_ops = NULL; } -static void annotate_call_site(struct objtool_file *file, +static int annotate_call_site(struct objtool_file *file, struct instruction *insn, bool sibling) { struct reloc *reloc = insn_reloc(file, insn); @@ -1286,12 +1297,12 @@ static void annotate_call_site(struct objtool_file *file, if (sym->static_call_tramp) { list_add_tail(&insn->call_node, &file->static_call_list); - return; + return 0; } if (sym->retpoline_thunk) { list_add_tail(&insn->call_node, &file->retpoline_call_list); - return; + return 0; } /* @@ -1303,10 +1314,12 @@ static void annotate_call_site(struct objtool_file *file, if (reloc) set_reloc_type(file->elf, reloc, R_NONE); - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - sibling ? arch_ret_insn(insn->len) - : arch_nop_insn(insn->len)); + if (elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + sibling ? arch_ret_insn(insn->len) + : arch_nop_insn(insn->len))) { + return -1; + } insn->type = sibling ? INSN_RETURN : INSN_NOP; @@ -1320,7 +1333,7 @@ static void annotate_call_site(struct objtool_file *file, insn->retpoline_safe = true; } - return; + return 0; } if (opts.mcount && sym->fentry) { @@ -1330,15 +1343,17 @@ static void annotate_call_site(struct objtool_file *file, if (reloc) set_reloc_type(file->elf, reloc, R_NONE); - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); + if (elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len))) { + return -1; + } insn->type = INSN_NOP; } list_add_tail(&insn->call_node, &file->mcount_loc_list); - return; + return 0; } if (insn->type == INSN_CALL && !insn->sec->init && @@ -1347,14 +1362,16 @@ static void annotate_call_site(struct objtool_file *file, if (!sibling && dead_end_function(file, sym)) insn->dead_end = true; + + return 0; } -static void add_call_dest(struct objtool_file *file, struct instruction *insn, +static int add_call_dest(struct objtool_file *file, struct instruction *insn, struct symbol *dest, bool sibling) { insn->_call_dest = dest; if (!dest) - return; + return 0; /* * Whatever stack impact regular CALLs have, should be undone @@ -1365,10 +1382,10 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn, */ remove_insn_ops(insn); - annotate_call_site(file, insn, sibling); + return annotate_call_site(file, insn, sibling); } -static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) +static int add_retpoline_call(struct objtool_file *file, struct instruction *insn) { /* * Retpoline calls/jumps are really dynamic calls/jumps in disguise, @@ -1385,7 +1402,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; break; default: - return; + return 0; } insn->retpoline_safe = true; @@ -1399,7 +1416,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in */ remove_insn_ops(insn); - annotate_call_site(file, insn, false); + return annotate_call_site(file, insn, false); } static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) @@ -1468,6 +1485,7 @@ static int add_jump_destinations(struct objtool_file *file) struct reloc *reloc; struct section *dest_sec; unsigned long dest_off; + int ret; for_each_insn(file, insn) { if (insn->jump_dest) { @@ -1488,7 +1506,9 @@ static int add_jump_destinations(struct objtool_file *file) dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc_addend(reloc)); } else if (reloc->sym->retpoline_thunk) { - add_retpoline_call(file, insn); + ret = add_retpoline_call(file, insn); + if (ret) + return ret; continue; } else if (reloc->sym->return_thunk) { add_return_call(file, insn, true); @@ -1498,7 +1518,9 @@ static int add_jump_destinations(struct objtool_file *file) * External sibling call or internal sibling call with * STT_FUNC reloc. */ - add_call_dest(file, insn, reloc->sym, true); + ret = add_call_dest(file, insn, reloc->sym, true); + if (ret) + return ret; continue; } else if (reloc->sym->sec->idx) { dest_sec = reloc->sym->sec; @@ -1538,7 +1560,9 @@ static int add_jump_destinations(struct objtool_file *file) */ if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) { if (jump_dest->sym->retpoline_thunk) { - add_retpoline_call(file, insn); + ret = add_retpoline_call(file, insn); + if (ret) + return ret; continue; } if (jump_dest->sym->return_thunk) { @@ -1580,7 +1604,9 @@ static int add_jump_destinations(struct objtool_file *file) * Internal sibling call without reloc or with * STT_SECTION reloc. */ - add_call_dest(file, insn, insn_func(jump_dest), true); + ret = add_call_dest(file, insn, insn_func(jump_dest), true); + if (ret) + return ret; continue; } @@ -1610,6 +1636,7 @@ static int add_call_destinations(struct objtool_file *file) unsigned long dest_off; struct symbol *dest; struct reloc *reloc; + int ret; for_each_insn(file, insn) { struct symbol *func = insn_func(insn); @@ -1621,7 +1648,9 @@ static int add_call_destinations(struct objtool_file *file) dest_off = arch_jump_destination(insn); dest = find_call_destination(insn->sec, dest_off); - add_call_dest(file, insn, dest, false); + ret = add_call_dest(file, insn, dest, false); + if (ret) + return ret; if (func && func->ignore) continue; @@ -1645,13 +1674,20 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - add_call_dest(file, insn, dest, false); + ret = add_call_dest(file, insn, dest, false); + if (ret) + return ret; } else if (reloc->sym->retpoline_thunk) { - add_retpoline_call(file, insn); + ret = add_retpoline_call(file, insn); + if (ret) + return ret; - } else - add_call_dest(file, insn, reloc->sym, false); + } else { + ret = add_call_dest(file, insn, reloc->sym, false); + if (ret) + return ret; + } } return 0; @@ -1674,15 +1710,15 @@ static int handle_group_alt(struct objtool_file *file, if (!orig_alt_group) { struct instruction *last_orig_insn = NULL; - orig_alt_group = malloc(sizeof(*orig_alt_group)); + orig_alt_group = calloc(1, sizeof(*orig_alt_group)); if (!orig_alt_group) { - WARN("malloc failed"); + WARN_GLIBC("calloc"); return -1; } orig_alt_group->cfi = calloc(special_alt->orig_len, sizeof(struct cfi_state *)); if (!orig_alt_group->cfi) { - WARN("calloc failed"); + WARN_GLIBC("calloc"); return -1; } @@ -1711,9 +1747,9 @@ static int handle_group_alt(struct objtool_file *file, } } - new_alt_group = malloc(sizeof(*new_alt_group)); + new_alt_group = calloc(1, sizeof(*new_alt_group)); if (!new_alt_group) { - WARN("malloc failed"); + WARN_GLIBC("calloc"); return -1; } @@ -1725,9 +1761,9 @@ static int handle_group_alt(struct objtool_file *file, * instruction affects the stack, the instruction after it (the * nop) will propagate the new state to the shared CFI array. */ - nop = malloc(sizeof(*nop)); + nop = calloc(1, sizeof(*nop)); if (!nop) { - WARN("malloc failed"); + WARN_GLIBC("calloc"); return -1; } memset(nop, 0, sizeof(*nop)); @@ -1827,9 +1863,13 @@ static int handle_jump_alt(struct objtool_file *file, if (reloc) set_reloc_type(file->elf, reloc, R_NONE); - elf_write_insn(file->elf, orig_insn->sec, - orig_insn->offset, orig_insn->len, - arch_nop_insn(orig_insn->len)); + + if (elf_write_insn(file->elf, orig_insn->sec, + orig_insn->offset, orig_insn->len, + arch_nop_insn(orig_insn->len))) { + return -1; + } + orig_insn->type = INSN_NOP; } @@ -1865,9 +1905,8 @@ static int add_special_section_alts(struct objtool_file *file) struct alternative *alt; int ret; - ret = special_get_alts(file->elf, &special_alts); - if (ret) - return ret; + if (special_get_alts(file->elf, &special_alts)) + return -1; list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { @@ -1876,8 +1915,7 @@ static int add_special_section_alts(struct objtool_file *file) if (!orig_insn) { WARN_FUNC("special: can't find orig instruction", special_alt->orig_sec, special_alt->orig_off); - ret = -1; - goto out; + return -1; } new_insn = NULL; @@ -1888,8 +1926,7 @@ static int add_special_section_alts(struct objtool_file *file) WARN_FUNC("special: can't find new instruction", special_alt->new_sec, special_alt->new_off); - ret = -1; - goto out; + return -1; } } @@ -1902,19 +1939,19 @@ static int add_special_section_alts(struct objtool_file *file) ret = handle_group_alt(file, special_alt, orig_insn, &new_insn); if (ret) - goto out; + return ret; + } else if (special_alt->jump_or_nop) { ret = handle_jump_alt(file, special_alt, orig_insn, &new_insn); if (ret) - goto out; + return ret; } - alt = malloc(sizeof(*alt)); + alt = calloc(1, sizeof(*alt)); if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; + WARN_GLIBC("calloc"); + return -1; } alt->insn = new_insn; @@ -1931,8 +1968,7 @@ static int add_special_section_alts(struct objtool_file *file) printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long); } -out: - return ret; + return 0; } __weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table) @@ -1989,9 +2025,9 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn) if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc) break; - alt = malloc(sizeof(*alt)); + alt = calloc(1, sizeof(*alt)); if (!alt) { - WARN("malloc failed"); + WARN_GLIBC("calloc"); return -1; } @@ -2039,7 +2075,7 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func, insn->jump_dest && (insn->jump_dest->offset <= insn->offset || insn->jump_dest->offset > orig_insn->offset)) - break; + break; table_reloc = arch_find_switch_table(file, insn, &table_size); if (!table_reloc) @@ -2103,7 +2139,6 @@ static int add_func_jump_tables(struct objtool_file *file, if (!insn_jump_table(insn)) continue; - ret = add_jump_table(file, insn); if (ret) return ret; @@ -2221,6 +2256,7 @@ static int read_unwind_hints(struct objtool_file *file) if (sym && sym->bind == STB_GLOBAL) { if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) { WARN_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR"); + return -1; } } } @@ -2390,7 +2426,7 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi default: WARN_INSN(insn, "Unknown annotation type: %d", type); - break; + return -1; } return 0; @@ -2503,7 +2539,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - add_ignores(file); + ret = add_ignores(file); + if (ret) + return ret; + add_uaccess_safe(file); ret = read_annotate(file, __annotate_early); @@ -2723,7 +2762,7 @@ static int update_cfi_state(struct instruction *insn, if (cfa->base == CFI_UNDEFINED) { if (insn_func(insn)) { WARN_INSN(insn, "undefined stack state"); - return -1; + return 1; } return 0; } @@ -3166,9 +3205,8 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn if (cficmp(alt_cfi[group_off], insn->cfi)) { struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group; struct instruction *orig = orig_group->first_insn; - char *where = offstr(insn->sec, insn->offset); - WARN_INSN(orig, "stack layout conflict in alternatives: %s", where); - free(where); + WARN_INSN(orig, "stack layout conflict in alternatives: %s", + offstr(insn->sec, insn->offset)); return -1; } } @@ -3181,11 +3219,13 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) { struct stack_op *op; + int ret; for (op = insn->stack_ops; op; op = op->next) { - if (update_cfi_state(insn, next_insn, &state->cfi, op)) - return 1; + ret = update_cfi_state(insn, next_insn, &state->cfi, op); + if (ret) + return ret; if (!opts.uaccess || !insn->alt_group) continue; @@ -3229,36 +3269,41 @@ static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) WARN_INSN(insn, "stack state mismatch: cfa1=%d%+d cfa2=%d%+d", cfi1->cfa.base, cfi1->cfa.offset, cfi2->cfa.base, cfi2->cfa.offset); + return false; - } else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) { + } + + if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) { for (i = 0; i < CFI_NUM_REGS; i++) { - if (!memcmp(&cfi1->regs[i], &cfi2->regs[i], - sizeof(struct cfi_reg))) + + if (!memcmp(&cfi1->regs[i], &cfi2->regs[i], sizeof(struct cfi_reg))) continue; WARN_INSN(insn, "stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d", i, cfi1->regs[i].base, cfi1->regs[i].offset, i, cfi2->regs[i].base, cfi2->regs[i].offset); - break; } + return false; + } - } else if (cfi1->type != cfi2->type) { + if (cfi1->type != cfi2->type) { WARN_INSN(insn, "stack state mismatch: type1=%d type2=%d", cfi1->type, cfi2->type); + return false; + } - } else if (cfi1->drap != cfi2->drap || + if (cfi1->drap != cfi2->drap || (cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) || (cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) { WARN_INSN(insn, "stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)", cfi1->drap, cfi1->drap_reg, cfi1->drap_offset, cfi2->drap, cfi2->drap_reg, cfi2->drap_offset); + return false; + } - } else - return true; - - return false; + return true; } static inline bool func_uaccess_safe(struct symbol *func) @@ -3490,6 +3535,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, WARN("%s() falls through to next function %s()", func->name, insn_func(insn)->name); + func->warnings++; + return 1; } @@ -3597,9 +3644,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - if (insn->dead_end) - return 0; - break; case INSN_JUMP_CONDITIONAL: @@ -3706,7 +3750,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (file->ignore_unreachables) return 0; - WARN("%s: unexpected end of section", sec->name); + WARN("%s%sunexpected end of section %s", + func ? func->name : "", func ? ": " : "", + sec->name); return 1; } @@ -3796,7 +3842,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) if (!is_sibling_call(insn)) { if (!insn->jump_dest) { WARN_INSN(insn, "unresolved jump target after linking?!?"); - return -1; + return 1; } ret = validate_unret(file, insn->jump_dest); if (ret) { @@ -3818,7 +3864,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) if (!dest) { WARN("Unresolved function after linking!?: %s", insn_call_dest(insn)->name); - return -1; + return 1; } ret = validate_unret(file, dest); @@ -3847,7 +3893,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) if (!next) { WARN_INSN(insn, "teh end!"); - return -1; + return 1; } insn = next; } @@ -3862,18 +3908,13 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) static int validate_unrets(struct objtool_file *file) { struct instruction *insn; - int ret, warnings = 0; + int warnings = 0; for_each_insn(file, insn) { if (!insn->unret) continue; - ret = validate_unret(file, insn); - if (ret < 0) { - WARN_INSN(insn, "Failed UNRET validation"); - return ret; - } - warnings += ret; + warnings += validate_unret(file, insn); } return warnings; @@ -3899,13 +3940,13 @@ static int validate_retpoline(struct objtool_file *file) if (insn->type == INSN_RETURN) { if (opts.rethunk) { WARN_INSN(insn, "'naked' return found in MITIGATION_RETHUNK build"); - } else - continue; - } else { - WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build", - insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + warnings++; + } + continue; } + WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build", + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); warnings++; } @@ -4472,7 +4513,7 @@ static int validate_reachable_instructions(struct objtool_file *file) } /* 'funcs' is a space-separated list of function names */ -static int disas_funcs(const char *funcs) +static void disas_funcs(const char *funcs) { const char *objdump_str, *cross_compile; int size, ret; @@ -4505,7 +4546,7 @@ static int disas_funcs(const char *funcs) size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1; if (size <= 0) { WARN("objdump string size calculation failed"); - return -1; + return; } cmd = malloc(size); @@ -4515,13 +4556,11 @@ static int disas_funcs(const char *funcs) ret = system(cmd); if (ret) { WARN("disassembly failed: %d", ret); - return -1; + return; } - - return 0; } -static int disas_warned_funcs(struct objtool_file *file) +static void disas_warned_funcs(struct objtool_file *file) { struct symbol *sym; char *funcs = NULL, *tmp; @@ -4530,9 +4569,17 @@ static int disas_warned_funcs(struct objtool_file *file) if (sym->warnings) { if (!funcs) { funcs = malloc(strlen(sym->name) + 1); + if (!funcs) { + WARN_GLIBC("malloc"); + return; + } strcpy(funcs, sym->name); } else { tmp = malloc(strlen(funcs) + strlen(sym->name) + 2); + if (!tmp) { + WARN_GLIBC("malloc"); + return; + } sprintf(tmp, "%s %s", funcs, sym->name); free(funcs); funcs = tmp; @@ -4542,8 +4589,6 @@ static int disas_warned_funcs(struct objtool_file *file) if (funcs) disas_funcs(funcs); - - return 0; } struct insn_chunk { @@ -4576,7 +4621,7 @@ static void free_insns(struct objtool_file *file) int check(struct objtool_file *file) { - int ret, warnings = 0; + int ret = 0, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); init_cfi_state(&init_cfi); @@ -4594,44 +4639,27 @@ int check(struct objtool_file *file) cfi_hash_add(&func_cfi); ret = decode_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; - if (!nr_insns) goto out; - if (opts.retpoline) { - ret = validate_retpoline(file); - if (ret < 0) - goto out; - warnings += ret; - } + if (opts.retpoline) + warnings += validate_retpoline(file); if (opts.stackval || opts.orc || opts.uaccess) { - ret = validate_functions(file); - if (ret < 0) - goto out; - warnings += ret; + int w = 0; - ret = validate_unwind_hints(file, NULL); - if (ret < 0) - goto out; - warnings += ret; + w += validate_functions(file); + w += validate_unwind_hints(file, NULL); + if (!w) + w += validate_reachable_instructions(file); - if (!warnings) { - ret = validate_reachable_instructions(file); - if (ret < 0) - goto out; - warnings += ret; - } + warnings += w; } else if (opts.noinstr) { - ret = validate_noinstr_sections(file); - if (ret < 0) - goto out; - warnings += ret; + warnings += validate_noinstr_sections(file); } if (opts.unret) { @@ -4639,87 +4667,67 @@ int check(struct objtool_file *file) * Must be after validate_branch() and friends, it plays * further games with insn->visited. */ - ret = validate_unrets(file); - if (ret < 0) - goto out; - warnings += ret; + warnings += validate_unrets(file); } - if (opts.ibt) { - ret = validate_ibt(file); - if (ret < 0) - goto out; - warnings += ret; - } + if (opts.ibt) + warnings += validate_ibt(file); - if (opts.sls) { - ret = validate_sls(file); - if (ret < 0) - goto out; - warnings += ret; - } + if (opts.sls) + warnings += validate_sls(file); if (opts.static_call) { ret = create_static_call_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.retpoline) { ret = create_retpoline_sites_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.cfi) { ret = create_cfi_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.rethunk) { ret = create_return_sites_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; if (opts.hack_skylake) { ret = create_direct_call_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } } if (opts.mcount) { ret = create_mcount_loc_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.prefix) { ret = add_prefix_symbols(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.ibt) { ret = create_ibt_endbr_seal_sections(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } if (opts.orc && nr_insns) { ret = orc_create(file); - if (ret < 0) + if (ret) goto out; - warnings += ret; } free_insns(file); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0f38167bd840..b352a78b596c 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -331,7 +331,7 @@ static int read_sections(struct elf *elf) elf->section_data = calloc(sections_nr, sizeof(*sec)); if (!elf->section_data) { - perror("calloc"); + WARN_GLIBC("calloc"); return -1; } for (i = 0; i < sections_nr; i++) { @@ -467,7 +467,7 @@ static int read_symbols(struct elf *elf) elf->symbol_data = calloc(symbols_nr, sizeof(*sym)); if (!elf->symbol_data) { - perror("calloc"); + WARN_GLIBC("calloc"); return -1; } for (i = 0; i < symbols_nr; i++) { @@ -799,7 +799,7 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) struct symbol *sym = calloc(1, sizeof(*sym)); if (!sym) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } @@ -829,7 +829,7 @@ elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size) char *name = malloc(namelen); if (!sym || !name) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } @@ -963,7 +963,7 @@ static int read_relocs(struct elf *elf) nr_reloc = 0; rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc)); if (!rsec->relocs) { - perror("calloc"); + WARN_GLIBC("calloc"); return -1; } for (i = 0; i < sec_num_entries(rsec); i++) { @@ -1005,7 +1005,7 @@ struct elf *elf_open_read(const char *name, int flags) elf = malloc(sizeof(*elf)); if (!elf) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } memset(elf, 0, sizeof(*elf)); @@ -1099,7 +1099,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec = malloc(sizeof(*sec)); if (!sec) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } memset(sec, 0, sizeof(*sec)); @@ -1114,7 +1114,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->name = strdup(name); if (!sec->name) { - perror("strdup"); + WARN_GLIBC("strdup"); return NULL; } @@ -1132,7 +1132,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, if (size) { sec->data->d_buf = malloc(size); if (!sec->data->d_buf) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } memset(sec->data->d_buf, 0, size); @@ -1179,7 +1179,7 @@ static struct section *elf_create_rela_section(struct elf *elf, rsec_name = malloc(strlen(sec->name) + strlen(".rela") + 1); if (!rsec_name) { - perror("malloc"); + WARN_GLIBC("malloc"); return NULL; } strcpy(rsec_name, ".rela"); @@ -1199,7 +1199,7 @@ static struct section *elf_create_rela_section(struct elf *elf, rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc)); if (!rsec->relocs) { - perror("calloc"); + WARN_GLIBC("calloc"); return NULL; } diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 94a33ee7b363..c0dc86a78ff6 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -41,7 +41,7 @@ struct objtool_file { struct objtool_file *objtool_open_read(const char *_objname); -void objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func); +int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func); int check(struct objtool_file *file); int orc_dump(const char *objname); diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index e72b9d630551..b29ac144e4f5 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -43,8 +44,9 @@ static inline char *offstr(struct section *sec, unsigned long offset) #define WARN(format, ...) \ fprintf(stderr, \ - "%s: %s: objtool: " format "\n", \ - objname, \ + "%s%s%s: objtool: " format "\n", \ + objname ?: "", \ + objname ? ": " : "", \ opts.werror ? "error" : "warning", \ ##__VA_ARGS__) @@ -83,7 +85,10 @@ static inline char *offstr(struct section *sec, unsigned long offset) } \ }) -#define WARN_ELF(format, ...) \ - WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) +#define WARN_ELF(format, ...) \ + WARN("%s: " format " failed: %s", __func__, ##__VA_ARGS__, elf_errmsg(-1)) + +#define WARN_GLIBC(format, ...) \ + WARN("%s: " format " failed: %s", __func__, ##__VA_ARGS__, strerror(errno)) #endif /* _WARN_H */ diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 1c73fb62fd57..e4b49c534e4d 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -44,14 +44,14 @@ struct objtool_file *objtool_open_read(const char *filename) return &file; } -void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) +int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) { if (!opts.noinstr) - return; + return 0; if (!f->pv_ops) { WARN("paravirt confusion"); - return; + return -1; } /* @@ -60,14 +60,15 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) */ if (!strcmp(func->name, "_paravirt_nop") || !strcmp(func->name, "_paravirt_ident_64")) - return; + return 0; /* already added this function */ if (!list_empty(&func->pv_target)) - return; + return 0; list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; + return 0; } int main(int argc, const char **argv) From d39f82a058c0269392a797cb04f2a6064e5dcab6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:00 -0700 Subject: [PATCH 1624/2157] objtool: Reduce CONFIG_OBJTOOL_WERROR verbosity Remove the following from CONFIG_OBJTOOL_WERROR: * backtrace * "upgraded warnings to errors" message * cmdline args This makes the default output less cluttered and makes it easier to spot the actual warnings. Note the above options are still are available with --verbose or OBJTOOL_VERBOSE=1. Also, do the cmdline arg printing on all warnings, regardless of werror. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/d61df69f64b396fa6b2a1335588aad7a34ea9e71.1742852846.git.jpoimboe@kernel.org --- scripts/Makefile.lib | 2 +- tools/objtool/builtin-check.c | 113 ++++++++++++------------ tools/objtool/check.c | 23 ++--- tools/objtool/include/objtool/builtin.h | 6 +- 4 files changed, 73 insertions(+), 71 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 57620b439a1f..b93597420daf 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -277,7 +277,7 @@ objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) -objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror --backtrace +objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c973a751fb7d..2bdff910430e 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,8 +15,11 @@ #include #include -const char *objname; +#define ORIG_SUFFIX ".orig" +int orig_argc; +static char **orig_argv; +const char *objname; struct opts opts; static const char * const check_usage[] = { @@ -224,39 +227,73 @@ static int copy_file(const char *src, const char *dst) return 0; } -static char **save_argv(int argc, const char **argv) +static void save_argv(int argc, const char **argv) { - char **orig_argv; - orig_argv = calloc(argc, sizeof(char *)); if (!orig_argv) { WARN_GLIBC("calloc"); - return NULL; + exit(1); } for (int i = 0; i < argc; i++) { orig_argv[i] = strdup(argv[i]); if (!orig_argv[i]) { WARN_GLIBC("strdup(%s)", orig_argv[i]); - return NULL; + exit(1); } }; - - return orig_argv; } -#define ORIG_SUFFIX ".orig" +void print_args(void) +{ + char *backup = NULL; + + if (opts.output || opts.dryrun) + goto print; + + /* + * Make a backup before kbuild deletes the file so the error + * can be recreated without recompiling or relinking. + */ + backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1); + if (!backup) { + WARN_GLIBC("malloc"); + goto print; + } + + strcpy(backup, objname); + strcat(backup, ORIG_SUFFIX); + if (copy_file(objname, backup)) { + backup = NULL; + goto print; + } + +print: + /* + * Print the cmdline args to make it easier to recreate. If '--output' + * wasn't used, add it to the printed args with the backup as input. + */ + fprintf(stderr, "%s", orig_argv[0]); + + for (int i = 1; i < orig_argc; i++) { + char *arg = orig_argv[i]; + + if (backup && !strcmp(arg, objname)) + fprintf(stderr, " %s -o %s", backup, objname); + else + fprintf(stderr, " %s", arg); + } + + fprintf(stderr, "\n"); +} int objtool_run(int argc, const char **argv) { struct objtool_file *file; - char *backup = NULL; - char **orig_argv; int ret = 0; - orig_argv = save_argv(argc, argv); - if (!orig_argv) - return 1; + orig_argc = argc; + save_argv(argc, argv); cmd_parse_options(argc, argv, check_usage); @@ -279,59 +316,19 @@ int objtool_run(int argc, const char **argv) file = objtool_open_read(objname); if (!file) - goto err; + return 1; if (!opts.link && has_multiple_files(file->elf)) { WARN("Linked object requires --link"); - goto err; + return 1; } ret = check(file); if (ret) - goto err; + return ret; if (!opts.dryrun && file->elf->changed && elf_write(file->elf)) - goto err; + return 1; return 0; - -err: - if (opts.dryrun) - goto err_msg; - - if (opts.output) { - unlink(opts.output); - goto err_msg; - } - - /* - * Make a backup before kbuild deletes the file so the error - * can be recreated without recompiling or relinking. - */ - backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1); - if (!backup) { - WARN_GLIBC("malloc"); - return 1; - } - - strcpy(backup, objname); - strcat(backup, ORIG_SUFFIX); - if (copy_file(objname, backup)) - return 1; - -err_msg: - fprintf(stderr, "%s", orig_argv[0]); - - for (int i = 1; i < argc; i++) { - char *arg = orig_argv[i]; - - if (backup && !strcmp(arg, objname)) - fprintf(stderr, " %s -o %s", backup, objname); - else - fprintf(stderr, " %s", arg); - } - - fprintf(stderr, "\n"); - - return 1; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f4e7ee8e8fb5..ac21f2846ebc 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4732,9 +4732,6 @@ int check(struct objtool_file *file) free_insns(file); - if (opts.verbose) - disas_warned_funcs(file); - if (opts.stats) { printf("nr_insns_visited: %ld\n", nr_insns_visited); printf("nr_cfi: %ld\n", nr_cfi); @@ -4743,19 +4740,25 @@ int check(struct objtool_file *file) } out: + if (!ret && !warnings) + return 0; + + if (opts.verbose) { + if (opts.werror && warnings) + WARN("%d warning(s) upgraded to errors", warnings); + print_args(); + disas_warned_funcs(file); + } + /* * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual * errors. * - * Note that even "fatal" type errors don't actually return an error - * without CONFIG_OBJTOOL_WERROR. That probably needs improved at some - * point. + * Note that even fatal errors don't yet actually return an error + * without CONFIG_OBJTOOL_WERROR. That will be fixed soon-ish. */ - if (opts.werror && (ret || warnings)) { - if (warnings) - WARN("%d warning(s) upgraded to errors", warnings); + if (opts.werror) return 1; - } return 0; } diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 0fafd0f7a209..6b08666fa69d 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -43,8 +43,10 @@ struct opts { extern struct opts opts; -extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); +int cmd_parse_options(int argc, const char **argv, const char * const usage[]); -extern int objtool_run(int argc, const char **argv); +int objtool_run(int argc, const char **argv); + +void print_args(void); #endif /* _BUILTIN_H */ From 24fe172b50b5749c315349e740e4a09e3a0165d5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:01 -0700 Subject: [PATCH 1625/2157] objtool: Fix up some outdated references to ENTRY/ENDPROC ENTRY and ENDPROC were deprecated years ago and replaced with SYM_FUNC_{START,END}. Fix up a few outdated references in the objtool documentation and comments. Also fix a few typos. Suggested-by: Brendan Jackman Suggested-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/5eb7e06e1a0e87aaeda8d583ab060e7638a6ea8e.1742852846.git.jpoimboe@kernel.org --- include/linux/linkage.h | 4 ---- include/linux/objtool.h | 2 +- tools/objtool/Documentation/objtool.txt | 10 +++++----- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5c8865bb59d9..b11660b706c5 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -134,10 +134,6 @@ .size name, .-name #endif -/* If symbol 'name' is treated as a subroutine (gets called, and returns) - * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of - * static analysis tools such as stack depth analyzer. - */ #ifndef ENDPROC /* deprecated, use SYM_FUNC_END */ #define ENDPROC(name) \ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 3ca965a2ddc8..366ad004d794 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -69,7 +69,7 @@ * In asm, there are two kinds of code: normal C-type callable functions and * the rest. The normal callable functions can be called by other code, and * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * are annotated with SYM_FUNC_{START,END}. Most asm code falls in this * category. In this case, no special debugging annotations are needed because * objtool can automatically generate the ORC data for the ORC unwinder to read * at runtime. diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt index 28ac57b9e102..9e97fc25b2d8 100644 --- a/tools/objtool/Documentation/objtool.txt +++ b/tools/objtool/Documentation/objtool.txt @@ -34,7 +34,7 @@ Objtool has the following features: - Return thunk annotation -- annotates all return thunk sites so kernel can patch them inline, depending on enabled mitigations -- Return thunk training valiation -- validate that all entry paths +- Return thunk untraining validation -- validate that all entry paths untrain a "safe return" before the first return (or call) - Non-instrumentation validation -- validates non-instrumentable @@ -281,8 +281,8 @@ the objtool maintainers. If the error is for an asm file, and func() is indeed a callable function, add proper frame pointer logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, if it's not a callable function, remove - its ELF function annotation by changing ENDPROC to END, and instead - use the manual unwind hint macros in asm/unwind_hints.h. + its ELF function annotation by using SYM_CODE_{START,END} and use the + manual unwind hint macros in asm/unwind_hints.h. If it's a GCC-compiled .c file, the error may be because the function uses an inline asm() statement which has a "call" instruction. An @@ -352,7 +352,7 @@ the objtool maintainers. This is a kernel entry/exit instruction like sysenter or iret. Such instructions aren't allowed in a callable function, and are most likely part of the kernel entry code. Such code should probably be - placed in a SYM_FUNC_CODE block with unwind hints. + placed in a SYM_CODE_{START,END} block with unwind hints. 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame @@ -381,7 +381,7 @@ the objtool maintainers. Another possibility is that the code has some asm or inline asm which does some unusual things to the stack or the frame pointer. In such - cases it's probably appropriate to use SYM_FUNC_CODE with unwind + cases it's probably appropriate to use SYM_CODE_{START,END} with unwind hints. From 876a4bce3849b235d752d13ec3180e15a35e52de Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:02 -0700 Subject: [PATCH 1626/2157] objtool: Remove --no-unreachable for noinstr-only vmlinux.o runs For (!X86_KERNEL_IBT && !LTO_CLANG && NOINSTR_VALIDATION), objtool runs on both translation units and vmlinux.o. The vmlinux.o run only does noinstr/noret validation. In that case --no-unreachable has no effect. Remove it. Note that for ((X86_KERNEL_IBT || LTO_CLANG) && KCOV), --no-unreachable still gets set in objtool-args-y by scripts/Makefile.lib. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/05414246a0124db2f21b0d071b652aa9043d039d.1742852847.git.jpoimboe@kernel.org --- scripts/Makefile.vmlinux_o | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index f476f5605029..938c7457717e 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -44,7 +44,6 @@ else vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror endif -vmlinux-objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_MITIGATION_SRSO)), --unret) From a8d39a62c6f5ad76b8a1ebfbf10dd9fe8ca2bbcc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:03 -0700 Subject: [PATCH 1627/2157] objtool: Remove redundant opts.noinstr dependency The --noinstr dependecy on --link is already enforced in the cmdline arg parsing code. Remove the redundant check. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/0ead7ffa0f5be2e81aebbcc585e07b2c98702b44.1742852847.git.jpoimboe@kernel.org --- tools/objtool/check.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ac21f2846ebc..0caabf0e8faf 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -340,12 +340,7 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state, memset(state, 0, sizeof(*state)); init_cfi_state(&state->cfi); - /* - * We need the full vmlinux for noinstr validation, otherwise we can - * not correctly determine insn_call_dest(insn)->sec (external symbols - * do not have a section). - */ - if (opts.link && opts.noinstr && sec) + if (opts.noinstr && sec) state->noinstr = sec->noinstr; } From 6b88dac0ae19fdb9124215f6ec3ca02944a237a4 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:38 +0100 Subject: [PATCH 1628/2157] irqdomain: i2c: Switch to irq_find_mapping() irq_linear_revmap() is deprecated, so remove all its uses and supersede them by an identical call to irq_find_mapping(). Signed-off-by: Jiri Slaby (SUSE) Cc: Peter Rosin Cc: linux-i2c@vger.kernel.org Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 6f84018258c4..db95113a5b49 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -414,7 +414,7 @@ static irqreturn_t pca954x_irq_handler(int irq, void *dev_id) pending = (ret >> PCA954X_IRQ_OFFSET) & (BIT(data->chip->nchans) - 1); for_each_set_bit(i, &pending, data->chip->nchans) - handle_nested_irq(irq_linear_revmap(data->irq, i)); + handle_nested_irq(irq_find_mapping(data->irq, i)); return IRQ_RETVAL(pending); } From a6d86c1b64950424c89da5a468980cf8af9d3003 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 7 Mar 2025 00:57:00 +0000 Subject: [PATCH 1629/2157] dt-bindings: watchdog: sunxi: add Allwinner A523 compatible string The Allwinner A523 SoC features a watchdog similar to the one used in previous SoCs, but moves some registers around (by just one word), making it incompatible to existing IPs. Add the new name to the list of compatible string, and also to the list of IP requiring two clock inputs. Signed-off-by: Andre Przywara Acked-by: Jernej Skrabec Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250307005712.16828-4-andre.przywara@arm.com Signed-off-by: Wim Van Sebroeck --- .../devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml index 64c8f7393809..b35ac03d5172 100644 --- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -32,6 +32,7 @@ properties: - items: - const: allwinner,sun20i-d1-wdt-reset - const: allwinner,sun20i-d1-wdt + - const: allwinner,sun55i-a523-wdt reg: maxItems: 1 @@ -60,6 +61,7 @@ if: - allwinner,sun20i-d1-wdt-reset - allwinner,sun50i-r329-wdt - allwinner,sun50i-r329-wdt-reset + - allwinner,sun55i-a523-wdt then: properties: From 9bc64d338b0b4b2061049df8b701f9786857690e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 7 Mar 2025 00:57:01 +0000 Subject: [PATCH 1630/2157] watchdog: sunxi_wdt: Add support for Allwinner A523 The Allwinner A523 SoC comes with a watchdog very similar to the ones in the previous Allwinner SoCs, but oddly enough moves the first half of its registers up by one word. Since we have different offsets for these registers across the other SoCs as well, this can simply be modelled by just stating the new offsets in our per-SoC struct. The rest of the IP is the same as in the D1, although the A523 moves its watchdog to a separate MMIO frame, so it's not embedded in the timer anymore. The driver can be ignorant of this, because the DT will take care of this. Add a new struct for the A523, specifying the SoC-specific details, and tie the new DT compatible string to it. Signed-off-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20250307005712.16828-5-andre.przywara@arm.com Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sunxi_wdt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index b85354a99582..b6c761acc3de 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -236,10 +236,21 @@ static const struct sunxi_wdt_reg sun20i_wdt_reg = { .wdt_key_val = 0x16aa0000, }; +static const struct sunxi_wdt_reg sun55i_wdt_reg = { + .wdt_ctrl = 0x0c, + .wdt_cfg = 0x10, + .wdt_mode = 0x14, + .wdt_timeout_shift = 4, + .wdt_reset_mask = 0x03, + .wdt_reset_val = 0x01, + .wdt_key_val = 0x16aa0000, +}; + static const struct of_device_id sunxi_wdt_dt_ids[] = { { .compatible = "allwinner,sun4i-a10-wdt", .data = &sun4i_wdt_reg }, { .compatible = "allwinner,sun6i-a31-wdt", .data = &sun6i_wdt_reg }, { .compatible = "allwinner,sun20i-d1-wdt", .data = &sun20i_wdt_reg }, + { .compatible = "allwinner,sun55i-a523-wdt", .data = &sun55i_wdt_reg }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids); From 6aa63a4ec947f350d1a2f9f6aba8591a2455d192 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Mar 2025 21:05:15 -0700 Subject: [PATCH 1631/2157] iommu: Sort out domain user data When DMA/MSI cookies were made first-class citizens back in commit 46983fcd67ac ("iommu: Pull IOVA cookie management into the core"), there was no real need to further expose the two different cookie types. However, now that IOMMUFD wants to add a third type of MSI-mapping cookie, we do have a nicely compelling reason to properly dismabiguate things at the domain level beyond just vaguely guessing from the domain type. Meanwhile, we also effectively have another "cookie" in the form of the anonymous union for other user data, which isn't much better in terms of being vague and unenforced. The fact is that all these cookie types are mutually exclusive, in the sense that combining them makes zero sense and/or would be catastrophic (iommu_set_fault_handler() on an SVA domain, anyone?) - the only combination which *might* be reasonable is perhaps a fault handler and an MSI cookie, but nobody's doing that at the moment, so let's rule it out as well for the sake of being clear and robust. To that end, we pull DMA and MSI cookies apart a little more, mostly to clear up the ambiguity at domain teardown, then for clarity (and to save a little space), move them into the union, whose ownership we can then properly describe and enforce entirely unambiguously. [nicolinc: rebase on latest tree; use prefix IOMMU_COOKIE_; merge unions in iommu_domain; add IOMMU_COOKIE_IOMMUFD for iommufd_hwpt] Link: https://patch.msgid.link/r/1ace9076c95204bbe193ee77499d395f15f44b23.1742871535.git.nicolinc@nvidia.com Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 196 ++++++++++++++------------- drivers/iommu/dma-iommu.h | 5 + drivers/iommu/iommu-sva.c | 1 + drivers/iommu/iommu.c | 18 ++- drivers/iommu/iommufd/hw_pagetable.c | 3 + include/linux/iommu.h | 20 ++- 6 files changed, 144 insertions(+), 99 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 94263ed2c564..31a7b4b81656 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -42,11 +42,6 @@ struct iommu_dma_msi_page { phys_addr_t phys; }; -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - enum iommu_dma_queue_type { IOMMU_DMA_OPTS_PER_CPU_QUEUE, IOMMU_DMA_OPTS_SINGLE_QUEUE, @@ -59,35 +54,31 @@ struct iommu_dma_options { }; struct iommu_dma_cookie { - enum iommu_dma_cookie_type type; + struct iova_domain iovad; + struct list_head msi_page_list; + /* Flush queue */ union { - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct { - struct iova_domain iovad; - /* Flush queue */ - union { - struct iova_fq *single_fq; - struct iova_fq __percpu *percpu_fq; - }; - /* Number of TLB flushes that have been started */ - atomic64_t fq_flush_start_cnt; - /* Number of TLB flushes that have been finished */ - atomic64_t fq_flush_finish_cnt; - /* Timer to regularily empty the flush queues */ - struct timer_list fq_timer; - /* 1 when timer is active, 0 when not */ - atomic_t fq_timer_on; - }; - /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ - dma_addr_t msi_iova; + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; }; - struct list_head msi_page_list; - + /* Number of TLB flushes that have been started */ + atomic64_t fq_flush_start_cnt; + /* Number of TLB flushes that have been finished */ + atomic64_t fq_flush_finish_cnt; + /* Timer to regularily empty the flush queues */ + struct timer_list fq_timer; + /* 1 when timer is active, 0 when not */ + atomic_t fq_timer_on; /* Domain for flush queue callback; NULL if flush queue not in use */ - struct iommu_domain *fq_domain; + struct iommu_domain *fq_domain; /* Options for dma-iommu use */ - struct iommu_dma_options options; - struct mutex mutex; + struct iommu_dma_options options; + struct mutex mutex; +}; + +struct iommu_dma_msi_cookie { + dma_addr_t msi_iova; + struct list_head msi_page_list; }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); @@ -369,40 +360,26 @@ int iommu_dma_init_fq(struct iommu_domain *domain) return 0; } -static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) -{ - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) - return cookie->iovad.granule; - return PAGE_SIZE; -} - -static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) -{ - struct iommu_dma_cookie *cookie; - - cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); - if (cookie) { - INIT_LIST_HEAD(&cookie->msi_page_list); - cookie->type = type; - } - return cookie; -} - /** * iommu_get_dma_cookie - Acquire DMA-API resources for a domain * @domain: IOMMU domain to prepare for DMA-API usage */ int iommu_get_dma_cookie(struct iommu_domain *domain) { - if (domain->iova_cookie) + struct iommu_dma_cookie *cookie; + + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); - if (!domain->iova_cookie) + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); + if (!cookie) return -ENOMEM; - mutex_init(&domain->iova_cookie->mutex); + mutex_init(&cookie->mutex); + INIT_LIST_HEAD(&cookie->msi_page_list); iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); + domain->cookie_type = IOMMU_COOKIE_DMA_IOVA; + domain->iova_cookie = cookie; return 0; } @@ -420,29 +397,30 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) */ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { - struct iommu_dma_cookie *cookie; + struct iommu_dma_msi_cookie *cookie; if (domain->type != IOMMU_DOMAIN_UNMANAGED) return -EINVAL; - if (domain->iova_cookie) + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) return -ENOMEM; cookie->msi_iova = base; - domain->iova_cookie = cookie; + INIT_LIST_HEAD(&cookie->msi_page_list); iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); + domain->cookie_type = IOMMU_COOKIE_DMA_MSI; + domain->msi_cookie = cookie; return 0; } EXPORT_SYMBOL(iommu_get_msi_cookie); /** * iommu_put_dma_cookie - Release a domain's DMA mapping resources - * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or - * iommu_get_msi_cookie() + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() */ void iommu_put_dma_cookie(struct iommu_domain *domain) { @@ -454,20 +432,27 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) return; #endif - if (!cookie) - return; - - if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { + if (cookie->iovad.granule) { iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); } - - list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { - list_del(&msi->list); + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) + kfree(msi); + kfree(cookie); +} + +/** + * iommu_put_msi_cookie - Release a domain's MSI mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie() + */ +void iommu_put_msi_cookie(struct iommu_domain *domain) +{ + struct iommu_dma_msi_cookie *cookie = domain->msi_cookie; + struct iommu_dma_msi_page *msi, *tmp; + + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) kfree(msi); - } kfree(cookie); - domain->iova_cookie = NULL; } /** @@ -687,7 +672,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev struct iova_domain *iovad; int ret; - if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) + if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA) return -EINVAL; iovad = &cookie->iovad; @@ -777,9 +762,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova; - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { - cookie->msi_iova += size; - return cookie->msi_iova - size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) { + domain->msi_cookie->msi_iova += size; + return domain->msi_cookie->msi_iova - size; } shift = iova_shift(iovad); @@ -816,16 +801,16 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, return (dma_addr_t)iova << shift; } -static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) +static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova, + size_t size, struct iommu_iotlb_gather *gather) { - struct iova_domain *iovad = &cookie->iovad; + struct iova_domain *iovad = &domain->iova_cookie->iovad; /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) - cookie->msi_iova -= size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) + domain->msi_cookie->msi_iova -= size; else if (gather && gather->queued) - queue_iova(cookie, iova_pfn(iovad, iova), + queue_iova(domain->iova_cookie, iova_pfn(iovad, iova), size >> iova_shift(iovad), &gather->freelist); else @@ -853,7 +838,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, if (!iotlb_gather.queued) iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); + iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather); } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, @@ -881,7 +866,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); return DMA_MAPPING_ERROR; } return iova + iova_off; @@ -1018,7 +1003,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, out_free_sg: sg_free_table(sgt); out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -1495,7 +1480,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return __finalise_sg(dev, sg, nents, iova); out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len, NULL); + iommu_dma_free_iova(domain, iova, iova_len, NULL); out_restore_sg: __invalidate_sg(sg, nents); out: @@ -1773,17 +1758,47 @@ void iommu_setup_dma_ops(struct device *dev) dev->dma_iommu = false; } +static bool has_msi_cookie(const struct iommu_domain *domain) +{ + return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA || + domain->cookie_type == IOMMU_COOKIE_DMA_MSI); +} + +static size_t cookie_msi_granule(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return domain->iova_cookie->iovad.granule; + case IOMMU_COOKIE_DMA_MSI: + return PAGE_SIZE; + default: + unreachable(); + }; +} + +static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return &domain->iova_cookie->msi_page_list; + case IOMMU_COOKIE_DMA_MSI: + return &domain->msi_cookie->msi_page_list; + default: + unreachable(); + }; +} + static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { - struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct list_head *msi_page_list = cookie_msi_pages(domain); struct iommu_dma_msi_page *msi_page; dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; - size_t size = cookie_msi_granule(cookie); + size_t size = cookie_msi_granule(domain); msi_addr &= ~(phys_addr_t)(size - 1); - list_for_each_entry(msi_page, &cookie->msi_page_list, list) + list_for_each_entry(msi_page, msi_page_list, list) if (msi_page->phys == msi_addr) return msi_page; @@ -1801,11 +1816,11 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, INIT_LIST_HEAD(&msi_page->list); msi_page->phys = msi_addr; msi_page->iova = iova; - list_add(&msi_page->list, &cookie->msi_page_list); + list_add(&msi_page->list, msi_page_list); return msi_page; out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_page: kfree(msi_page); return NULL; @@ -1817,7 +1832,7 @@ static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, struct device *dev = msi_desc_to_dev(desc); const struct iommu_dma_msi_page *msi_page; - if (!domain->iova_cookie) { + if (!has_msi_cookie(domain)) { msi_desc_set_iommu_msi_iova(desc, 0, 0); return 0; } @@ -1827,9 +1842,8 @@ static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, if (!msi_page) return -ENOMEM; - msi_desc_set_iommu_msi_iova( - desc, msi_page->iova, - ilog2(cookie_msi_granule(domain->iova_cookie))); + msi_desc_set_iommu_msi_iova(desc, msi_page->iova, + ilog2(cookie_msi_granule(domain))); return 0; } diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index c12d63457c76..9cca11806e5d 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -13,6 +13,7 @@ void iommu_setup_dma_ops(struct device *dev); int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); +void iommu_put_msi_cookie(struct iommu_domain *domain); int iommu_dma_init_fq(struct iommu_domain *domain); @@ -40,6 +41,10 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } +static inline void iommu_put_msi_cookie(struct iommu_domain *domain) +{ +} + static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { } diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 503c5d23c1ea..ab18bc494eef 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -310,6 +310,7 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, } domain->type = IOMMU_DOMAIN_SVA; + domain->cookie_type = IOMMU_COOKIE_SVA; mmgrab(mm); domain->mm = mm; domain->owner = ops; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0ee17893810f..c92e47f333cb 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1953,8 +1953,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token) { - BUG_ON(!domain); + if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) + return; + domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; domain->handler = handler; domain->handler_token = token; } @@ -2024,9 +2026,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); void iommu_domain_free(struct iommu_domain *domain) { - if (domain->type == IOMMU_DOMAIN_SVA) + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + iommu_put_dma_cookie(domain); + break; + case IOMMU_COOKIE_DMA_MSI: + iommu_put_msi_cookie(domain); + break; + case IOMMU_COOKIE_SVA: mmdrop(domain->mm); - iommu_put_dma_cookie(domain); + break; + default: + break; + } if (domain->ops->free) domain->ops->free(domain); } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 9a89f3a28dc5..fded3f07bfa7 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -160,6 +160,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } } hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* @@ -257,6 +258,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, } hwpt->domain->owner = ops; hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { @@ -315,6 +317,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, } hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e93d2e918599..06cc14e9993d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,7 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_dma_msi_cookie; struct iommu_fault_param; struct iommufd_ctx; struct iommufd_viommu; @@ -165,6 +166,15 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +enum iommu_domain_cookie_type { + IOMMU_COOKIE_NONE, + IOMMU_COOKIE_DMA_IOVA, + IOMMU_COOKIE_DMA_MSI, + IOMMU_COOKIE_FAULT_HANDLER, + IOMMU_COOKIE_SVA, + IOMMU_COOKIE_IOMMUFD, +}; + /* Domain feature flags */ #define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API @@ -211,12 +221,12 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; + enum iommu_domain_cookie_type cookie_type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; - struct iommu_dma_cookie *iova_cookie; int (*iopf_handler)(struct iopf_group *group); #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) @@ -224,10 +234,10 @@ struct iommu_domain { phys_addr_t msi_addr); #endif - union { /* Pointer usable by owner of the domain */ - struct iommufd_hw_pagetable *iommufd_hwpt; /* iommufd */ - }; - union { /* Fault handler */ + union { /* cookie */ + struct iommu_dma_cookie *iova_cookie; + struct iommu_dma_msi_cookie *msi_cookie; + struct iommufd_hw_pagetable *iommufd_hwpt; struct { iommu_fault_handler_t handler; void *handler_token; From ec031e1b35ded5acfcef100d9ee7144bbfa4bc12 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 24 Mar 2025 21:05:16 -0700 Subject: [PATCH 1632/2157] iommufd: Move iommufd_sw_msi and related functions to driver.c To provide the iommufd_sw_msi() to the iommu core that is under a different Kconfig, move it and its related functions to driver.c. Then, stub it into the iommu-priv header. The iommufd_sw_msi_install() continues to be used by iommufd internal, so put it in the private header. Note that iommufd_sw_msi() will be called in the iommu core, replacing the sw_msi function pointer. Given that IOMMU_API is "bool" in Kconfig, change IOMMUFD_DRIVER_CORE to "bool" as well. Since this affects the module size, here is before-n-after size comparison: [Before] text data bss dec hex filename 18797 848 56 19701 4cf5 drivers/iommu/iommufd/device.o 722 44 0 766 2fe drivers/iommu/iommufd/driver.o [After] text data bss dec hex filename 17735 808 56 18599 48a7 drivers/iommu/iommufd/device.o 3020 180 0 3200 c80 drivers/iommu/iommufd/driver.o Link: https://patch.msgid.link/r/374c159592dba7852bee20968f3f66fa0ee8ca93.1742871535.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu-priv.h | 13 +++ drivers/iommu/iommufd/Kconfig | 2 +- drivers/iommu/iommufd/device.c | 131 ++---------------------- drivers/iommu/iommufd/driver.c | 126 +++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 7 +- 5 files changed, 153 insertions(+), 126 deletions(-) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index b4508423e13b..c74fff25be78 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -5,6 +5,7 @@ #define __LINUX_IOMMU_PRIV_H #include +#include static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) { @@ -43,4 +44,16 @@ void iommu_detach_group_handle(struct iommu_domain *domain, int iommu_replace_group_handle(struct iommu_group *group, struct iommu_domain *new_domain, struct iommu_attach_handle *handle); + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */ +static inline int iommufd_sw_msi(struct iommu_domain *domain, + struct msi_desc *desc, phys_addr_t msi_addr) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */ + #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 0a07f9449fd9..2beeb4f60ee5 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config IOMMUFD_DRIVER_CORE - tristate + bool default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n config IOMMUFD diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index bd50146e2ad0..d18ea9a61522 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -5,7 +5,6 @@ #include #include #include -#include #include "../iommu-priv.h" #include "io_pagetable.h" @@ -294,129 +293,7 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); -/* - * Get a iommufd_sw_msi_map for the msi physical address requested by the irq - * layer. The mapping to IOVA is global to the iommufd file descriptor, every - * domain that is attached to a device using the same MSI parameters will use - * the same IOVA. - */ -static __maybe_unused struct iommufd_sw_msi_map * -iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, - phys_addr_t sw_msi_start) -{ - struct iommufd_sw_msi_map *cur; - unsigned int max_pgoff = 0; - - lockdep_assert_held(&ictx->sw_msi_lock); - - list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { - if (cur->sw_msi_start != sw_msi_start) - continue; - max_pgoff = max(max_pgoff, cur->pgoff + 1); - if (cur->msi_addr == msi_addr) - return cur; - } - - if (ictx->sw_msi_id >= - BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) - return ERR_PTR(-EOVERFLOW); - - cur = kzalloc(sizeof(*cur), GFP_KERNEL); - if (!cur) - return ERR_PTR(-ENOMEM); - - cur->sw_msi_start = sw_msi_start; - cur->msi_addr = msi_addr; - cur->pgoff = max_pgoff; - cur->id = ictx->sw_msi_id++; - list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); - return cur; -} - -static int iommufd_sw_msi_install(struct iommufd_ctx *ictx, - struct iommufd_hwpt_paging *hwpt_paging, - struct iommufd_sw_msi_map *msi_map) -{ - unsigned long iova; - - lockdep_assert_held(&ictx->sw_msi_lock); - - iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; - if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { - int rc; - - rc = iommu_map(hwpt_paging->common.domain, iova, - msi_map->msi_addr, PAGE_SIZE, - IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, - GFP_KERNEL_ACCOUNT); - if (rc) - return rc; - __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); - } - return 0; -} - -/* - * Called by the irq code if the platform translates the MSI address through the - * IOMMU. msi_addr is the physical address of the MSI page. iommufd will - * allocate a fd global iova for the physical page that is the same on all - * domains and devices. - */ #ifdef CONFIG_IRQ_MSI_IOMMU -int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr) -{ - struct device *dev = msi_desc_to_dev(desc); - struct iommufd_hwpt_paging *hwpt_paging; - struct iommu_attach_handle *raw_handle; - struct iommufd_attach_handle *handle; - struct iommufd_sw_msi_map *msi_map; - struct iommufd_ctx *ictx; - unsigned long iova; - int rc; - - /* - * It is safe to call iommu_attach_handle_get() here because the iommu - * core code invokes this under the group mutex which also prevents any - * change of the attach handle for the duration of this function. - */ - iommu_group_mutex_assert(dev); - - raw_handle = - iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); - if (IS_ERR(raw_handle)) - return 0; - hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); - - handle = to_iommufd_handle(raw_handle); - /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ - if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) - return 0; - - ictx = handle->idev->ictx; - guard(mutex)(&ictx->sw_msi_lock); - /* - * The input msi_addr is the exact byte offset of the MSI doorbell, we - * assume the caller has checked that it is contained with a MMIO region - * that is secure to map at PAGE_SIZE. - */ - msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, - msi_addr & PAGE_MASK, - handle->idev->igroup->sw_msi_start); - if (IS_ERR(msi_map)) - return PTR_ERR(msi_map); - - rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); - if (rc) - return rc; - __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); - - iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; - msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); - return 0; -} -#endif - static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { @@ -443,6 +320,14 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, } return 0; } +#else +static inline int +iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + return 0; +} +#endif static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 75b365561c16..a08ff0f37fc6 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -121,5 +121,131 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); +#ifdef CONFIG_IRQ_MSI_IOMMU +/* + * Get a iommufd_sw_msi_map for the msi physical address requested by the irq + * layer. The mapping to IOVA is global to the iommufd file descriptor, every + * domain that is attached to a device using the same MSI parameters will use + * the same IOVA. + */ +static struct iommufd_sw_msi_map * +iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, + phys_addr_t sw_msi_start) +{ + struct iommufd_sw_msi_map *cur; + unsigned int max_pgoff = 0; + + lockdep_assert_held(&ictx->sw_msi_lock); + + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + if (cur->sw_msi_start != sw_msi_start) + continue; + max_pgoff = max(max_pgoff, cur->pgoff + 1); + if (cur->msi_addr == msi_addr) + return cur; + } + + if (ictx->sw_msi_id >= + BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) + return ERR_PTR(-EOVERFLOW); + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) + return ERR_PTR(-ENOMEM); + + cur->sw_msi_start = sw_msi_start; + cur->msi_addr = msi_addr; + cur->pgoff = max_pgoff; + cur->id = ictx->sw_msi_id++; + list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); + return cur; +} + +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map) +{ + unsigned long iova; + + lockdep_assert_held(&ictx->sw_msi_lock); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { + int rc; + + rc = iommu_map(hwpt_paging->common.domain, iova, + msi_map->msi_addr, PAGE_SIZE, + IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, + GFP_KERNEL_ACCOUNT); + if (rc) + return rc; + __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); + } + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL"); + +/* + * Called by the irq code if the platform translates the MSI address through the + * IOMMU. msi_addr is the physical address of the MSI page. iommufd will + * allocate a fd global iova for the physical page that is the same on all + * domains and devices. + */ +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommufd_hwpt_paging *hwpt_paging; + struct iommu_attach_handle *raw_handle; + struct iommufd_attach_handle *handle; + struct iommufd_sw_msi_map *msi_map; + struct iommufd_ctx *ictx; + unsigned long iova; + int rc; + + /* + * It is safe to call iommu_attach_handle_get() here because the iommu + * core code invokes this under the group mutex which also prevents any + * change of the attach handle for the duration of this function. + */ + iommu_group_mutex_assert(dev); + + raw_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(raw_handle)) + return 0; + hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); + + handle = to_iommufd_handle(raw_handle); + /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ + if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; + + ictx = handle->idev->ictx; + guard(mutex)(&ictx->sw_msi_lock); + /* + * The input msi_addr is the exact byte offset of the MSI doorbell, we + * assume the caller has checked that it is contained with a MMIO region + * that is secure to map at PAGE_SIZE. + */ + msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, + msi_addr & PAGE_MASK, + handle->idev->igroup->sw_msi_start); + if (IS_ERR(msi_map)) + return PTR_ERR(msi_map); + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); + if (rc) + return rc; + __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD"); +#endif + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); +MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 8cda9c4672eb..8c49ca16919a 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -32,8 +32,11 @@ struct iommufd_sw_msi_maps { DECLARE_BITMAP(bitmap, 64); }; -int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); +#ifdef CONFIG_IRQ_MSI_IOMMU +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map); +#endif struct iommufd_ctx { struct file *file; From 06d54f00f3f5a29cbf43410ac93ee2dd89e3b711 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 24 Mar 2025 21:05:17 -0700 Subject: [PATCH 1633/2157] iommu: Drop sw_msi from iommu_domain There are only two sw_msi implementations in the entire system, thus it's not very necessary to have an sw_msi pointer. Instead, check domain->cookie_type to call the two sw_msi implementations directly from the core code. Link: https://patch.msgid.link/r/7ded87c871afcbaac665b71354de0a335087bf0f.1742871535.git.nicolinc@nvidia.com Suggested-by: Robin Murphy Reviewed-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 14 ++------------ drivers/iommu/dma-iommu.h | 9 +++++++++ drivers/iommu/iommu.c | 18 ++++++++++++++++-- drivers/iommu/iommufd/hw_pagetable.c | 3 --- include/linux/iommu.h | 15 --------------- 5 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 31a7b4b81656..2bd9f80a83fe 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -94,9 +94,6 @@ static int __init iommu_dma_forcedac_setup(char *str) } early_param("iommu.forcedac", iommu_dma_forcedac_setup); -static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); - /* Number of entries per flush queue */ #define IOVA_DEFAULT_FQ_SIZE 256 #define IOVA_SINGLE_FQ_SIZE 32768 @@ -377,7 +374,6 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) mutex_init(&cookie->mutex); INIT_LIST_HEAD(&cookie->msi_page_list); - iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); domain->cookie_type = IOMMU_COOKIE_DMA_IOVA; domain->iova_cookie = cookie; return 0; @@ -411,7 +407,6 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) cookie->msi_iova = base; INIT_LIST_HEAD(&cookie->msi_page_list); - iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); domain->cookie_type = IOMMU_COOKIE_DMA_MSI; domain->msi_cookie = cookie; return 0; @@ -427,11 +422,6 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - if (domain->sw_msi != iommu_dma_sw_msi) - return; -#endif - if (cookie->iovad.granule) { iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); @@ -1826,8 +1816,8 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return NULL; } -static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr) +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); const struct iommu_dma_msi_page *msi_page; diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 9cca11806e5d..eca201c1f963 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -19,6 +19,9 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); + extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ @@ -49,5 +52,11 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline int iommu_dma_sw_msi(struct iommu_domain *domain, + struct msi_desc *desc, phys_addr_t msi_addr) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c92e47f333cb..d96e6fabb4da 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3650,8 +3651,21 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return 0; mutex_lock(&group->mutex); - if (group->domain && group->domain->sw_msi) - ret = group->domain->sw_msi(group->domain, desc, msi_addr); + /* An IDENTITY domain must pass through */ + if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { + switch (group->domain->cookie_type) { + case IOMMU_COOKIE_DMA_MSI: + case IOMMU_COOKIE_DMA_IOVA: + ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); + break; + case IOMMU_COOKIE_IOMMUFD: + ret = iommufd_sw_msi(group->domain, desc, msi_addr); + break; + default: + ret = -EOPNOTSUPP; + break; + } + } mutex_unlock(&group->mutex); return ret; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index fded3f07bfa7..8e87ae71e128 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -161,7 +161,6 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* * Set the coherency mode before we do iopt_table_add_domain() as some @@ -259,7 +258,6 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, hwpt->domain->owner = ops; hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -318,7 +316,6 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; - iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 06cc14e9993d..e01c855ae8a7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -229,11 +229,6 @@ struct iommu_domain { struct iommu_domain_geometry geometry; int (*iopf_handler)(struct iopf_group *group); -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); -#endif - union { /* cookie */ struct iommu_dma_cookie *iova_cookie; struct iommu_dma_msi_cookie *msi_cookie; @@ -254,16 +249,6 @@ struct iommu_domain { }; }; -static inline void iommu_domain_set_sw_msi( - struct iommu_domain *domain, - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr)) -{ -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - domain->sw_msi = sw_msi; -#endif -} - static inline bool iommu_is_dma_domain(struct iommu_domain *domain) { return domain->type & __IOMMU_DOMAIN_DMA_API; From ada14b9f1aab5b60d50dec14c17eb84a55c0f682 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:23 -0700 Subject: [PATCH 1634/2157] iommu: Require passing new handles to APIs supporting handle Add kdoc to highligt the caller of iommu_[attach|replace]_group_handle() and iommu_attach_device_pasid() should always provide a new handle. This can avoid race with lockless reference to the handle. e.g. the find_fault_handler() and iommu_report_device_fault() in the PRI path. Link: https://patch.msgid.link/r/20250321171940.7213-2-yi.l.liu@intel.com Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d96e6fabb4da..34ea7e5e7d00 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3365,6 +3365,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, * @pasid: the pasid of the device. * @handle: the attach handle. * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle if it intends to pass a valid handle. + * * Return: 0 on success, or an error. */ int iommu_attach_device_pasid(struct iommu_domain *domain, @@ -3525,6 +3528,9 @@ EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); * This is a variant of iommu_attach_group(). It allows the caller to provide * an attach handle and use it when the domain is attached. This is currently * used by IOMMUFD to deliver the I/O page faults. + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle. */ int iommu_attach_group_handle(struct iommu_domain *domain, struct iommu_group *group, @@ -3594,6 +3600,9 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); * * If the currently attached domain is a core domain (e.g. a default_domain), * it will act just like the iommu_attach_group_handle(). + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle. */ int iommu_replace_group_handle(struct iommu_group *group, struct iommu_domain *new_domain, From 8a9e1e773f60080f6d56bd997719d4e62048b2d3 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:24 -0700 Subject: [PATCH 1635/2157] iommu: Introduce a replace API for device pasid Provide a high-level API to allow replacements of one domain with another for specific pasid of a device. This is similar to iommu_replace_group_handle() and it is expected to be used only by IOMMUFD. Link: https://patch.msgid.link/r/20250321171940.7213-3-yi.l.liu@intel.com Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu-priv.h | 3 + drivers/iommu/iommu.c | 115 +++++++++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index c74fff25be78..b8528d5dd1e7 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -56,4 +56,7 @@ static inline int iommufd_sw_msi(struct iommu_domain *domain, } #endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */ +int iommu_replace_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 34ea7e5e7d00..f24963be6170 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -514,6 +514,13 @@ static void iommu_deinit_device(struct device *dev) dev_iommu_free(dev); } +static struct iommu_domain *pasid_array_entry_to_domain(void *entry) +{ + if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) + return xa_untag_pointer(entry); + return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; +} + DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) @@ -3324,14 +3331,15 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, } static int __iommu_set_group_pasid(struct iommu_domain *domain, - struct iommu_group *group, ioasid_t pasid) + struct iommu_group *group, ioasid_t pasid, + struct iommu_domain *old) { struct group_device *device, *last_gdev; int ret; for_each_group_device(group, device) { ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, NULL); + pasid, old); if (ret) goto err_revert; } @@ -3343,7 +3351,15 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, for_each_group_device(group, device) { if (device == last_gdev) break; - iommu_remove_dev_pasid(device->dev, pasid, domain); + /* + * If no old domain, undo the succeeded devices/pasid. + * Otherwise, rollback the succeeded devices/pasid to the old + * domain. And it is a driver bug to fail attaching with a + * previously good domain. + */ + if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev, + pasid, domain))) + iommu_remove_dev_pasid(device->dev, pasid, domain); } return ret; } @@ -3412,7 +3428,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, if (ret) goto out_unlock; - ret = __iommu_set_group_pasid(domain, group, pasid); + ret = __iommu_set_group_pasid(domain, group, pasid, NULL); if (ret) { xa_release(&group->pasid_array, pasid); goto out_unlock; @@ -3433,6 +3449,97 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); +/** + * iommu_replace_device_pasid - Replace the domain that a specific pasid + * of the device is attached to + * @domain: the new iommu domain + * @dev: the attached device. + * @pasid: the pasid of the device. + * @handle: the attach handle. + * + * This API allows the pasid to switch domains. The @pasid should have been + * attached. Otherwise, this fails. The pasid will keep the old configuration + * if replacement failed. + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle if it intends to pass a valid handle. + * + * Return 0 on success, or an error. + */ +int iommu_replace_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) +{ + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; + struct iommu_attach_handle *entry; + struct iommu_domain *curr_domain; + void *curr; + int ret; + + if (!group) + return -ENODEV; + + if (!domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + if (dev_iommu_ops(dev) != domain->owner || + pasid == IOMMU_NO_PASID || !handle) + return -EINVAL; + + mutex_lock(&group->mutex); + entry = iommu_make_pasid_array_entry(domain, handle); + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(curr)) { + ret = xa_err(curr); + goto out_unlock; + } + + /* + * No domain (with or without handle) attached, hence not + * a replace case. + */ + if (!curr) { + xa_release(&group->pasid_array, pasid); + ret = -EINVAL; + goto out_unlock; + } + + /* + * Reusing handle is problematic as there are paths that refers + * the handle without lock. To avoid race, reject the callers that + * attempt it. + */ + if (curr == entry) { + WARN_ON(1); + ret = -EINVAL; + goto out_unlock; + } + + curr_domain = pasid_array_entry_to_domain(curr); + ret = 0; + + if (curr_domain != domain) { + ret = __iommu_set_group_pasid(domain, group, + pasid, curr_domain); + if (ret) + goto out_unlock; + } + + /* + * The above xa_cmpxchg() reserved the memory, and the + * group->mutex is held, this cannot fail. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + pasid, entry, GFP_KERNEL))); + +out_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); + /* * iommu_detach_device_pasid() - Detach the domain from pasid of device * @domain: the iommu domain. From 03c9b102bea6f4f0b517c841fe1d2f9c616c95b9 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:25 -0700 Subject: [PATCH 1636/2157] iommufd: Pass @pasid through the device attach/replace path Most of the core logic before conducting the actual device attach/ replace operation can be shared with pasid attach/replace. So pass @pasid through the device attach/replace helpers to prepare adding pasid attach/replace. So far the @pasid should only be IOMMU_NO_PASID. No functional change. Link: https://patch.msgid.link/r/20250321171940.7213-4-yi.l.liu@intel.com Signed-off-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 70 +++++++++++++++---------- drivers/iommu/iommufd/hw_pagetable.c | 13 ++--- drivers/iommu/iommufd/iommufd_private.h | 8 +-- 3 files changed, 52 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index d18ea9a61522..7051feda2fab 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -368,7 +368,8 @@ static bool iommufd_device_is_attached(struct iommufd_device *idev) } static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, + ioasid_t pasid) { struct iommufd_attach_handle *handle; int rc; @@ -386,6 +387,7 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, } handle->idev = idev; + WARN_ON(pasid != IOMMU_NO_PASID); rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, &handle->handle); if (rc) @@ -402,25 +404,28 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, } static struct iommufd_attach_handle * -iommufd_device_get_attach_handle(struct iommufd_device *idev) +iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) { struct iommu_attach_handle *handle; lockdep_assert_held(&idev->igroup->lock); handle = - iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + iommu_attach_handle_get(idev->igroup->group, pasid, 0); if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); } static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, + ioasid_t pasid) { struct iommufd_attach_handle *handle; - handle = iommufd_device_get_attach_handle(idev); + WARN_ON(pasid != IOMMU_NO_PASID); + + handle = iommufd_device_get_attach_handle(idev, pasid); iommu_detach_group_handle(hwpt->domain, idev->igroup->group); if (hwpt->fault) { iommufd_auto_response_faults(hwpt, handle); @@ -430,13 +435,17 @@ static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, } static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + ioasid_t pasid, struct iommufd_hw_pagetable *hwpt, struct iommufd_hw_pagetable *old) { - struct iommufd_attach_handle *handle, *old_handle = - iommufd_device_get_attach_handle(idev); + struct iommufd_attach_handle *handle, *old_handle; int rc; + WARN_ON(pasid != IOMMU_NO_PASID); + + old_handle = iommufd_device_get_attach_handle(idev, pasid); + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; @@ -471,7 +480,7 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, } int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); int rc; @@ -497,7 +506,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * attachment. */ if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_hwpt_attach_device(hwpt, idev); + rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; idev->igroup->hwpt = hwpt; @@ -515,7 +524,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, } struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev) +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); @@ -523,7 +532,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); if (list_empty(&idev->igroup->device_list)) { - iommufd_hwpt_detach_device(hwpt, idev); + iommufd_hwpt_detach_device(hwpt, idev, pasid); idev->igroup->hwpt = NULL; } if (hwpt_paging) @@ -535,12 +544,12 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) } static struct iommufd_hw_pagetable * -iommufd_device_do_attach(struct iommufd_device *idev, +iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { int rc; - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) return ERR_PTR(rc); return NULL; @@ -589,7 +598,7 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, } static struct iommufd_hw_pagetable * -iommufd_device_do_replace(struct iommufd_device *idev, +iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); @@ -623,7 +632,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, goto err_unlock; } - rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); + rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); if (rc) goto err_unresv; @@ -656,7 +665,8 @@ iommufd_device_do_replace(struct iommufd_device *idev, } typedef struct iommufd_hw_pagetable *(*attach_fn)( - struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + struct iommufd_device *idev, ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in @@ -664,7 +674,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)( * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * -iommufd_device_auto_get_domain(struct iommufd_device *idev, +iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { @@ -693,7 +703,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* @@ -711,8 +721,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } - hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, - immediate_attach, NULL); + hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, + 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; @@ -720,7 +730,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!immediate_attach) { - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { @@ -741,8 +751,9 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, return destroy_hwpt; } -static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, - attach_fn do_attach) +static int iommufd_device_change_pt(struct iommufd_device *idev, + ioasid_t pasid, + u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; @@ -757,7 +768,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -766,8 +777,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, - do_attach); + destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, + pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -804,7 +815,8 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + rc = iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, + &iommufd_device_do_attach); if (rc) return rc; @@ -834,7 +846,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); */ int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) { - return iommufd_device_change_pt(idev, pt_id, + return iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); @@ -850,7 +862,7 @@ void iommufd_device_detach(struct iommufd_device *idev) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev); + hwpt = iommufd_hw_pagetable_detach(idev, IOMMU_NO_PASID); iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 8e87ae71e128..bd9dd26a5295 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) * @ictx: iommufd context * @ioas: IOAS to associate the domain with * @idev: Device to get an iommu_domain for + * @pasid: PASID to get an iommu_domain for * @flags: Flags from userspace * @immediate_attach: True if idev should be attached to the hwpt * @user_data: The user provided driver specific data describing the domain to @@ -105,8 +106,8 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) */ struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data) { const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | @@ -189,7 +190,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, * sequence. Once those drivers are fixed this should be removed. */ if (immediate_attach) { - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) goto out_abort; } @@ -202,7 +203,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, out_detach: if (immediate_attach) - iommufd_hw_pagetable_detach(idev); + iommufd_hw_pagetable_detach(idev, pasid); out_abort: iommufd_object_abort_and_destroy(ictx, &hwpt->obj); return ERR_PTR(rc); @@ -364,8 +365,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ioas = container_of(pt_obj, struct iommufd_ioas, obj); mutex_lock(&ioas->mutex); hwpt_paging = iommufd_hwpt_paging_alloc( - ucmd->ictx, ioas, idev, cmd->flags, false, - user_data.len ? &user_data : NULL); + ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags, + false, user_data.len ? &user_data : NULL); if (IS_ERR(hwpt_paging)) { rc = PTR_ERR(hwpt_paging); goto out_unlock; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 8c49ca16919a..891800948d1a 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -369,13 +369,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); + struct iommufd_device *idev, ioasid_t pasid); struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev); +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid); void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); From bc06f7f66de404ae6323963361fe4e2f5f71a1e5 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:26 -0700 Subject: [PATCH 1637/2157] iommufd/device: Only add reserved_iova in non-pasid path As the pasid is passed through the attach/replace/detach helpers, it is necessary to ensure only the non-pasid path adds reserved_iova. Link: https://patch.msgid.link/r/20250321171940.7213-5-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 7051feda2fab..4625f084f7d0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -483,6 +483,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; int rc; mutex_lock(&idev->igroup->lock); @@ -492,7 +493,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, goto err_unlock; } - if (hwpt_paging) { + if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) goto err_unlock; @@ -516,7 +517,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, mutex_unlock(&idev->igroup->lock); return 0; err_unresv: - if (hwpt_paging) + if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); err_unlock: mutex_unlock(&idev->igroup->lock); @@ -535,7 +536,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) iommufd_hwpt_detach_device(hwpt, idev, pasid); idev->igroup->hwpt = NULL; } - if (hwpt_paging) + if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); mutex_unlock(&idev->igroup->lock); @@ -602,6 +603,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; @@ -626,7 +628,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, } old_hwpt = igroup->hwpt; - if (hwpt_paging) { + if (attach_resv) { rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; @@ -637,7 +639,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, goto err_unresv; old_hwpt_paging = find_hwpt_paging(old_hwpt); - if (old_hwpt_paging && + if (old_hwpt_paging && pasid == IOMMU_NO_PASID && (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); @@ -657,7 +659,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_paging) + if (attach_resv) iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: mutex_unlock(&idev->igroup->lock); From 2eaa7f845e149370a162bedb0437c9e26229760c Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:27 -0700 Subject: [PATCH 1638/2157] iommufd/device: Replace idev->igroup with local variable With more use of the fields of igroup, use a local vairable instead of using the idev->igroup heavily. No functional change expected. Link: https://patch.msgid.link/r/20250321171940.7213-6-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 43 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 4625f084f7d0..15733b316b70 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -333,18 +333,19 @@ static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_group *igroup = idev->igroup; int rc; - lockdep_assert_held(&idev->igroup->lock); + lockdep_assert_held(&igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, - &idev->igroup->sw_msi_start); + &igroup->sw_msi_start); if (rc) return rc; - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); + if (list_empty(&igroup->device_list)) { + rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); @@ -484,11 +485,12 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; + struct iommufd_group *igroup = idev->igroup; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { + if (igroup->hwpt && igroup->hwpt != hwpt) { rc = -EINVAL; goto err_unlock; } @@ -506,39 +508,40 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * reserved regions are only updated during individual device * attachment. */ - if (list_empty(&idev->igroup->device_list)) { + if (list_empty(&igroup->device_list)) { rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; - idev->igroup->hwpt = hwpt; + igroup->hwpt = hwpt; } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &idev->igroup->device_list); - mutex_unlock(&idev->igroup->lock); + list_add_tail(&idev->group_item, &igroup->device_list); + mutex_unlock(&igroup->lock); return 0; err_unresv: if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return rc; } struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { - struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *hwpt = igroup->hwpt; struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); list_del(&idev->group_item); - if (list_empty(&idev->igroup->device_list)) { + if (list_empty(&igroup->device_list)) { iommufd_hwpt_detach_device(hwpt, idev, pasid); - idev->igroup->hwpt = NULL; + igroup->hwpt = NULL; } if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy hwpt */ return hwpt; @@ -610,7 +613,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, unsigned int num_devices; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); if (igroup->hwpt == NULL) { rc = -EINVAL; @@ -623,7 +626,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, } if (hwpt == igroup->hwpt) { - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return NULL; } @@ -654,7 +657,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; @@ -662,7 +665,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, if (attach_resv) iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return ERR_PTR(rc); } From ba1de6cd41d0654245864640b62ae45a1bc01bcd Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:28 -0700 Subject: [PATCH 1639/2157] iommufd/device: Add helper to detect the first attach of a group The existing code detects the first attach by checking the igroup->device_list. However, the igroup->hwpt can also be used to detect the first attach. In future modifications, it is better to check the igroup->hwpt instead of the device_list. To improve readbility and also prepare for further modifications on this part, this adds a helper for it. Link: https://patch.msgid.link/r/20250321171940.7213-7-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 15733b316b70..2cc3c12d301d 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -329,6 +329,13 @@ iommufd_group_setup_msi(struct iommufd_group *igroup, } #endif +static bool +iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) +{ + lockdep_assert_held(&igroup->lock); + return !igroup->hwpt; +} + static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) @@ -344,7 +351,7 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, if (rc) return rc; - if (list_empty(&igroup->device_list)) { + if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, @@ -508,7 +515,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * reserved regions are only updated during individual device * attachment. */ - if (list_empty(&igroup->device_list)) { + if (iommufd_group_first_attach(igroup, pasid)) { rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; From 75f990aef38e930f8b676562c4d4b02c1f5eccfd Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:29 -0700 Subject: [PATCH 1640/2157] iommufd/device: Wrap igroup->hwpt and igroup->device_list into attach struct The igroup->hwpt and igroup->device_list are used to track the hwpt attach of a group in the RID path. While the coming PASID path also needs such tracking. To be prepared, wrap igroup->hwpt and igroup->device_list into attach struct which is allocated per attaching the first device of the group and freed per detaching the last device of the group. Link: https://patch.msgid.link/r/20250321171940.7213-8-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 76 ++++++++++++++++++------- drivers/iommu/iommufd/iommufd_private.h | 5 +- 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 2cc3c12d301d..6b4764c2d9af 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -17,12 +17,17 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +struct iommufd_attach { + struct iommufd_hw_pagetable *hwpt; + struct list_head device_list; +}; + static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); - WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + WARN_ON(igroup->attach); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); @@ -89,7 +94,6 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); - INIT_LIST_HEAD(&new_igroup->device_list); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; @@ -333,7 +337,7 @@ static bool iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) { lockdep_assert_held(&igroup->lock); - return !igroup->hwpt; + return !igroup->attach; } static int @@ -369,7 +373,7 @@ static bool iommufd_device_is_attached(struct iommufd_device *idev) { struct iommufd_device *cur; - list_for_each_entry(cur, &idev->igroup->device_list, group_item) + list_for_each_entry(cur, &idev->igroup->attach->device_list, group_item) if (cur == idev) return true; return false; @@ -493,19 +497,33 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; int rc; mutex_lock(&igroup->lock); - if (igroup->hwpt && igroup->hwpt != hwpt) { + attach = igroup->attach; + if (!attach) { + attach = kzalloc(sizeof(*attach), GFP_KERNEL); + if (!attach) { + rc = -ENOMEM; + goto err_unlock; + } + INIT_LIST_HEAD(&attach->device_list); + } + + old_hwpt = attach->hwpt; + + if (old_hwpt && old_hwpt != hwpt) { rc = -EINVAL; - goto err_unlock; + goto err_free_attach; } if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) - goto err_unlock; + goto err_free_attach; } /* @@ -519,15 +537,19 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; - igroup->hwpt = hwpt; + attach->hwpt = hwpt; + igroup->attach = attach; } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &igroup->device_list); + list_add_tail(&idev->group_item, &attach->device_list); mutex_unlock(&igroup->lock); return 0; err_unresv: if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); +err_free_attach: + if (iommufd_group_first_attach(igroup, pasid)) + kfree(attach); err_unlock: mutex_unlock(&igroup->lock); return rc; @@ -537,14 +559,20 @@ struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_group *igroup = idev->igroup; - struct iommufd_hw_pagetable *hwpt = igroup->hwpt; - struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_attach *attach; mutex_lock(&igroup->lock); + attach = igroup->attach; + hwpt = attach->hwpt; + hwpt_paging = find_hwpt_paging(hwpt); + list_del(&idev->group_item); - if (list_empty(&igroup->device_list)) { + if (list_empty(&attach->device_list)) { iommufd_hwpt_detach_device(hwpt, idev, pasid); - igroup->hwpt = NULL; + igroup->attach = NULL; + kfree(attach); } if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); @@ -574,7 +602,7 @@ iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, lockdep_assert_held(&igroup->lock); - list_for_each_entry(cur, &igroup->device_list, group_item) + list_for_each_entry(cur, &igroup->attach->device_list, group_item) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } @@ -588,9 +616,10 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, lockdep_assert_held(&igroup->lock); - old_hwpt_paging = find_hwpt_paging(igroup->hwpt); + old_hwpt_paging = find_hwpt_paging(igroup->attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) { + list_for_each_entry(cur, + &igroup->attach->device_list, group_item) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -617,27 +646,32 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; unsigned int num_devices; int rc; mutex_lock(&igroup->lock); - if (igroup->hwpt == NULL) { + attach = igroup->attach; + if (!attach) { rc = -EINVAL; goto err_unlock; } + old_hwpt = attach->hwpt; + + WARN_ON(!old_hwpt || list_empty(&attach->device_list)); + if (!iommufd_device_is_attached(idev)) { rc = -EINVAL; goto err_unlock; } - if (hwpt == igroup->hwpt) { + if (hwpt == old_hwpt) { mutex_unlock(&igroup->lock); return NULL; } - old_hwpt = igroup->hwpt; if (attach_resv) { rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) @@ -653,9 +687,9 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); - igroup->hwpt = hwpt; + attach->hwpt = hwpt; - num_devices = list_count_nodes(&igroup->device_list); + num_devices = list_count_nodes(&attach->device_list); /* * Move the refcounts held by the device_list to the new hwpt. Retain a * refcount for this thread as the caller will free it. diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 891800948d1a..5b4d8962166b 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -399,13 +399,14 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, refcount_dec(&hwpt->obj.users); } +struct iommufd_attach; + struct iommufd_group { struct kref ref; struct mutex lock; struct iommufd_ctx *ictx; struct iommu_group *group; - struct iommufd_hw_pagetable *hwpt; - struct list_head device_list; + struct iommufd_attach *attach; struct iommufd_sw_msi_maps required_sw_msi; phys_addr_t sw_msi_start; }; From 831b40f8416cf393faf41b3ae2e877a73aa6baa3 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:30 -0700 Subject: [PATCH 1641/2157] iommufd/device: Replace device_list with device_array igroup->attach->device_list is used to track attached device of a group in the RID path. Such tracking is also needed in the PASID path in order to share path with the RID path. While there is only one list_head in the iommufd_device. It cannot work if the device has been attached in both RID path and PASID path. To solve it, replacing the device_list with an xarray. The attached iommufd_device is stored in the entry indexed by the idev->obj.id. Link: https://patch.msgid.link/r/20250321171940.7213-9-yi.l.liu@intel.com Reviewed-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 58 +++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 6b4764c2d9af..760917f5d764 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -19,7 +19,7 @@ MODULE_PARM_DESC( struct iommufd_attach { struct iommufd_hw_pagetable *hwpt; - struct list_head device_list; + struct xarray device_array; }; static void iommufd_group_release(struct kref *kref) @@ -297,6 +297,20 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); +static unsigned int iommufd_group_device_num(struct iommufd_group *igroup) +{ + struct iommufd_device *idev; + unsigned int count = 0; + unsigned long index; + + lockdep_assert_held(&igroup->lock); + + if (igroup->attach) + xa_for_each(&igroup->attach->device_array, index, idev) + count++; + return count; +} + #ifdef CONFIG_IRQ_MSI_IOMMU static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) @@ -371,12 +385,7 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, /* Check if idev is attached to igroup->hwpt */ static bool iommufd_device_is_attached(struct iommufd_device *idev) { - struct iommufd_device *cur; - - list_for_each_entry(cur, &idev->igroup->attach->device_list, group_item) - if (cur == idev) - return true; - return false; + return xa_load(&idev->igroup->attach->device_array, idev->obj.id); } static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, @@ -510,20 +519,27 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, rc = -ENOMEM; goto err_unlock; } - INIT_LIST_HEAD(&attach->device_list); + xa_init(&attach->device_array); } old_hwpt = attach->hwpt; + rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, + GFP_KERNEL); + if (rc) { + WARN_ON(rc == -EBUSY && !old_hwpt); + goto err_free_attach; + } + if (old_hwpt && old_hwpt != hwpt) { rc = -EINVAL; - goto err_free_attach; + goto err_release_devid; } if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) - goto err_free_attach; + goto err_release_devid; } /* @@ -541,12 +557,15 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, igroup->attach = attach; } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &attach->device_list); + WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, + idev, GFP_KERNEL))); mutex_unlock(&igroup->lock); return 0; err_unresv: if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); +err_release_devid: + xa_release(&attach->device_array, idev->obj.id); err_free_attach: if (iommufd_group_first_attach(igroup, pasid)) kfree(attach); @@ -568,8 +587,8 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) hwpt = attach->hwpt; hwpt_paging = find_hwpt_paging(hwpt); - list_del(&idev->group_item); - if (list_empty(&attach->device_list)) { + xa_erase(&attach->device_array, idev->obj.id); + if (xa_empty(&attach->device_array)) { iommufd_hwpt_detach_device(hwpt, idev, pasid); igroup->attach = NULL; kfree(attach); @@ -599,10 +618,11 @@ iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_device *cur; + unsigned long index; lockdep_assert_held(&igroup->lock); - list_for_each_entry(cur, &igroup->attach->device_list, group_item) + xa_for_each(&igroup->attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } @@ -612,14 +632,14 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, { struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_device *cur; + unsigned long index; int rc; lockdep_assert_held(&igroup->lock); old_hwpt_paging = find_hwpt_paging(igroup->attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { - list_for_each_entry(cur, - &igroup->attach->device_list, group_item) { + xa_for_each(&igroup->attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -660,7 +680,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, old_hwpt = attach->hwpt; - WARN_ON(!old_hwpt || list_empty(&attach->device_list)); + WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); if (!iommufd_device_is_attached(idev)) { rc = -EINVAL; @@ -689,9 +709,9 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, attach->hwpt = hwpt; - num_devices = list_count_nodes(&attach->device_list); + num_devices = iommufd_group_device_num(igroup); /* - * Move the refcounts held by the device_list to the new hwpt. Retain a + * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); From c0e301b2978d319d78ed332290989f3499ef9e63 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:31 -0700 Subject: [PATCH 1642/2157] iommufd/device: Add pasid_attach array to track per-PASID attach PASIDs of PASID-capable device can be attached to hwpt separately, hence a pasid array to track per-PASID attachment is necessary. The index IOMMU_NO_PASID is used by the RID path. Hence drop the igroup->attach. Link: https://patch.msgid.link/r/20250321171940.7213-10-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 59 +++++++++++++++++-------- drivers/iommu/iommufd/iommufd_private.h | 2 +- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 760917f5d764..175f3d39baaa 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -27,7 +27,7 @@ static void iommufd_group_release(struct kref *kref) struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); - WARN_ON(igroup->attach); + WARN_ON(!xa_empty(&igroup->pasid_attach)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); @@ -94,6 +94,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); + xa_init(&new_igroup->pasid_attach); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; @@ -297,16 +298,19 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); -static unsigned int iommufd_group_device_num(struct iommufd_group *igroup) +static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, + ioasid_t pasid) { + struct iommufd_attach *attach; struct iommufd_device *idev; unsigned int count = 0; unsigned long index; lockdep_assert_held(&igroup->lock); - if (igroup->attach) - xa_for_each(&igroup->attach->device_array, index, idev) + attach = xa_load(&igroup->pasid_attach, pasid); + if (attach) + xa_for_each(&attach->device_array, index, idev) count++; return count; } @@ -351,7 +355,7 @@ static bool iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) { lockdep_assert_held(&igroup->lock); - return !igroup->attach; + return !xa_load(&igroup->pasid_attach, pasid); } static int @@ -382,10 +386,13 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, /* The device attach/detach/replace helpers for attach_handle */ -/* Check if idev is attached to igroup->hwpt */ -static bool iommufd_device_is_attached(struct iommufd_device *idev) +static bool iommufd_device_is_attached(struct iommufd_device *idev, + ioasid_t pasid) { - return xa_load(&idev->igroup->attach->device_array, idev->obj.id); + struct iommufd_attach *attach; + + attach = xa_load(&idev->igroup->pasid_attach, pasid); + return xa_load(&attach->device_array, idev->obj.id); } static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, @@ -512,12 +519,18 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, mutex_lock(&igroup->lock); - attach = igroup->attach; + attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(attach)) { + rc = xa_err(attach); + goto err_unlock; + } + if (!attach) { attach = kzalloc(sizeof(*attach), GFP_KERNEL); if (!attach) { rc = -ENOMEM; - goto err_unlock; + goto err_release_pasid; } xa_init(&attach->device_array); } @@ -554,7 +567,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, if (rc) goto err_unresv; attach->hwpt = hwpt; - igroup->attach = attach; + WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, + GFP_KERNEL))); } refcount_inc(&hwpt->obj.users); WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, @@ -569,6 +583,9 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, err_free_attach: if (iommufd_group_first_attach(igroup, pasid)) kfree(attach); +err_release_pasid: + if (iommufd_group_first_attach(igroup, pasid)) + xa_release(&igroup->pasid_attach, pasid); err_unlock: mutex_unlock(&igroup->lock); return rc; @@ -583,14 +600,14 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) struct iommufd_attach *attach; mutex_lock(&igroup->lock); - attach = igroup->attach; + attach = xa_load(&igroup->pasid_attach, pasid); hwpt = attach->hwpt; hwpt_paging = find_hwpt_paging(hwpt); xa_erase(&attach->device_array, idev->obj.id); if (xa_empty(&attach->device_array)) { iommufd_hwpt_detach_device(hwpt, idev, pasid); - igroup->attach = NULL; + xa_erase(&igroup->pasid_attach, pasid); kfree(attach); } if (hwpt_paging && pasid == IOMMU_NO_PASID) @@ -617,12 +634,14 @@ static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_attach *attach; struct iommufd_device *cur; unsigned long index; lockdep_assert_held(&igroup->lock); - xa_for_each(&igroup->attach->device_array, index, cur) + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + xa_for_each(&attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } @@ -631,15 +650,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_hwpt_paging *old_hwpt_paging; + struct iommufd_attach *attach; struct iommufd_device *cur; unsigned long index; int rc; lockdep_assert_held(&igroup->lock); - old_hwpt_paging = find_hwpt_paging(igroup->attach->hwpt); + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + old_hwpt_paging = find_hwpt_paging(attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { - xa_for_each(&igroup->attach->device_array, index, cur) { + xa_for_each(&attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -672,7 +693,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, mutex_lock(&igroup->lock); - attach = igroup->attach; + attach = xa_load(&igroup->pasid_attach, pasid); if (!attach) { rc = -EINVAL; goto err_unlock; @@ -682,7 +703,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); - if (!iommufd_device_is_attached(idev)) { + if (!iommufd_device_is_attached(idev, pasid)) { rc = -EINVAL; goto err_unlock; } @@ -709,7 +730,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, attach->hwpt = hwpt; - num_devices = iommufd_group_device_num(igroup); + num_devices = iommufd_group_device_num(igroup, pasid); /* * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 5b4d8962166b..85467f53bdb2 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -406,7 +406,7 @@ struct iommufd_group { struct mutex lock; struct iommufd_ctx *ictx; struct iommu_group *group; - struct iommufd_attach *attach; + struct xarray pasid_attach; struct iommufd_sw_msi_maps required_sw_msi; phys_addr_t sw_msi_start; }; From ff3f014ebb1e2fbafd407243e57fbad314472cc1 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:32 -0700 Subject: [PATCH 1643/2157] iommufd: Enforce PASID-compatible domain in PASID path AMD IOMMU requires attaching PASID-compatible domains to PASID-capable devices. This includes the domains attached to RID and PASIDs. Related discussions in link [1] and [2]. ARM also has such a requirement, Intel does not need it, but can live up with it. Hence, iommufd is going to enforce this requirement as it is not harmful to vendors that do not need it. Mark the PASID-compatible domains and enforce it in the PASID path. [1] https://lore.kernel.org/linux-iommu/20240709182303.GK14050@ziepe.ca/ [2] https://lore.kernel.org/linux-iommu/20240822124433.GD3468552@ziepe.ca/ Link: https://patch.msgid.link/r/20250321171940.7213-11-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 17 +++++++++++++++++ drivers/iommu/iommufd/hw_pagetable.c | 3 +++ drivers/iommu/iommufd/iommufd_private.h | 1 + 3 files changed, 21 insertions(+) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 175f3d39baaa..ba21b81e43bc 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -395,6 +395,15 @@ static bool iommufd_device_is_attached(struct iommufd_device *idev, return xa_load(&attach->device_array, idev->obj.id); } +static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + if (pasid != IOMMU_NO_PASID && !hwpt->pasid_compat) + return -EINVAL; + return 0; +} + static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) @@ -404,6 +413,10 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, lockdep_assert_held(&idev->igroup->lock); + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; @@ -472,6 +485,10 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, WARN_ON(pasid != IOMMU_NO_PASID); + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + old_handle = iommufd_device_get_attach_handle(idev, pasid); handle = kzalloc(sizeof(*handle), GFP_KERNEL); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index bd9dd26a5295..3724533a23c9 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -136,6 +136,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if (IS_ERR(hwpt_paging)) return ERR_CAST(hwpt_paging); hwpt = &hwpt_paging->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; INIT_LIST_HEAD(&hwpt_paging->hwpt_item); /* Pairs with iommufd_hw_pagetable_destroy() */ @@ -244,6 +245,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; @@ -300,6 +302,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; hwpt_nested->viommu = viommu; refcount_inc(&viommu->obj.users); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 85467f53bdb2..80e8c76d25f2 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -299,6 +299,7 @@ struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; struct iommufd_fault *fault; + bool pasid_compat : 1; }; struct iommufd_hwpt_paging { From 2fb69c602d57f77483b8dcdd12d17408a09f76fe Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:33 -0700 Subject: [PATCH 1644/2157] iommufd: Support pasid attach/replace This extends the below APIs to support PASID. Device drivers to manage pasid attach/replace/detach. int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); The pasid operations share underlying attach/replace/detach infrastructure with the device operations, but still have some different implications: - no reserved region per pasid otherwise SVA architecture is already broken (CPU address space doesn't count device reserved regions); - accordingly no sw_msi trick; Cache coherency enforcement is still applied to pasid operations since it is about memory accesses post page table walking (no matter the walk is per RID or per PASID). Link: https://patch.msgid.link/r/20250321171940.7213-12-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Signed-off-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 59 ++++++++++++++++++++------------ drivers/iommu/iommufd/selftest.c | 8 ++--- drivers/vfio/iommufd.c | 10 +++--- include/linux/iommufd.h | 9 +++-- 4 files changed, 53 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ba21b81e43bc..4cc6de03f76e 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -428,9 +428,12 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, } handle->idev = idev; - WARN_ON(pasid != IOMMU_NO_PASID); - rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + else + rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, + &handle->handle); if (rc) goto out_disable_iopf; @@ -464,10 +467,12 @@ static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, { struct iommufd_attach_handle *handle; - WARN_ON(pasid != IOMMU_NO_PASID); - handle = iommufd_device_get_attach_handle(idev, pasid); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + if (pasid == IOMMU_NO_PASID) + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + else + iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); + if (hwpt->fault) { iommufd_auto_response_faults(hwpt, handle); iommufd_fault_iopf_disable(idev); @@ -483,8 +488,6 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, struct iommufd_attach_handle *handle, *old_handle; int rc; - WARN_ON(pasid != IOMMU_NO_PASID); - rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); if (rc) return rc; @@ -502,8 +505,12 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev, } handle->idev = idev; - rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + else + rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, + pasid, &handle->handle); if (rc) goto out_disable_iopf; @@ -904,22 +911,25 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, } /** - * iommufd_device_attach - Connect a device to an iommu_domain + * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach + * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. + * This connects the device/pasid to an iommu_domain, either automatically + * or manually selected. Once this completes the device could do DMA with + * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, + rc = iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_attach); if (rc) return rc; @@ -934,8 +944,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** - * iommufd_device_replace - Change the device's iommu_domain + * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change + * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * @@ -946,27 +957,31 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be - * called if iommufd_device_attach() has already succeeded. + * called if iommufd_device_attach() has already succeeded. @pasid is + * IOMMU_NO_PASID for no pasid usage. */ -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { - return iommufd_device_change_pt(idev, IOMMU_NO_PASID, pt_id, + return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** - * iommufd_device_detach - Disconnect a device to an iommu_domain + * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach + * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. + * @pasid is IOMMU_NO_PASID for no pasid usage. */ -void iommufd_device_detach(struct iommufd_device *idev) +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev, IOMMU_NO_PASID); + hwpt = iommufd_hw_pagetable_detach(idev, pasid); iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d55dde28e9bc..0b3f5cbf242b 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -945,7 +945,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, } sobj->idev.idev = idev; - rc = iommufd_device_attach(idev, &pt_id); + rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_unbind; @@ -960,7 +960,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, return 0; out_detach: - iommufd_device_detach(idev); + iommufd_device_detach(idev, IOMMU_NO_PASID); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -994,7 +994,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, goto out_dev_obj; } - rc = iommufd_device_replace(sobj->idev.idev, &pt_id); + rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_dev_obj; @@ -1655,7 +1655,7 @@ void iommufd_selftest_destroy(struct iommufd_object *obj) switch (sobj->type) { case TYPE_IDEV: - iommufd_device_detach(sobj->idev.idev); + iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID); iommufd_device_unbind(sobj->idev.idev); mock_dev_destroy(sobj->idev.mock_dev); break; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 516294fd901b..37e1efa2c7bf 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -128,7 +128,7 @@ void vfio_iommufd_physical_unbind(struct vfio_device *vdev) lockdep_assert_held(&vdev->dev_set->lock); if (vdev->iommufd_attached) { - iommufd_device_detach(vdev->iommufd_device); + iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; } iommufd_device_unbind(vdev->iommufd_device); @@ -146,9 +146,11 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) return -EINVAL; if (vdev->iommufd_attached) - rc = iommufd_device_replace(vdev->iommufd_device, pt_id); + rc = iommufd_device_replace(vdev->iommufd_device, + IOMMU_NO_PASID, pt_id); else - rc = iommufd_device_attach(vdev->iommufd_device, pt_id); + rc = iommufd_device_attach(vdev->iommufd_device, + IOMMU_NO_PASID, pt_id); if (rc) return rc; vdev->iommufd_attached = true; @@ -163,7 +165,7 @@ void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) if (WARN_ON(!vdev->iommufd_device) || !vdev->iommufd_attached) return; - iommufd_device_detach(vdev->iommufd_device); + iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 60eff9272551..34b6e6ca4bfa 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -54,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); -void iommufd_device_detach(struct iommufd_device *idev); +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); u32 iommufd_device_to_id(struct iommufd_device *idev); From 4c3f4f432c2d61ed266c797702bb58659f90bdff Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:34 -0700 Subject: [PATCH 1645/2157] iommufd: Enforce PASID-compatible domain for RID Per the definition of IOMMU_HWPT_ALLOC_PASID, iommufd needs to enforce the RID to use PASID-compatible domain if PASID has been attached, and vice versa. The PASID path has already enforced it. This adds the enforcement in the RID path. This enforcement requires a lock across the RID and PASID attach path, the idev->igroup->lock is used as both the RID and the PASID path holds it. Link: https://patch.msgid.link/r/20250321171940.7213-13-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 4cc6de03f76e..1605f6c0e1ee 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -399,8 +399,28 @@ static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev, ioasid_t pasid) { - if (pasid != IOMMU_NO_PASID && !hwpt->pasid_compat) - return -EINVAL; + struct iommufd_group *igroup = idev->igroup; + + lockdep_assert_held(&igroup->lock); + + if (pasid == IOMMU_NO_PASID) { + unsigned long start = IOMMU_NO_PASID; + + if (!hwpt->pasid_compat && + xa_find_after(&igroup->pasid_attach, + &start, UINT_MAX, XA_PRESENT)) + return -EINVAL; + } else { + struct iommufd_attach *attach; + + if (!hwpt->pasid_compat) + return -EINVAL; + + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + if (attach && attach->hwpt && !attach->hwpt->pasid_compat) + return -EINVAL; + } + return 0; } @@ -411,8 +431,6 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, struct iommufd_attach_handle *handle; int rc; - lockdep_assert_held(&idev->igroup->lock); - rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); if (rc) return rc; From ce15c13e7a1423cf418f825d33ab1747b151cfd6 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:35 -0700 Subject: [PATCH 1646/2157] iommu/vt-d: Add IOMMU_HWPT_ALLOC_PASID support Intel iommu driver just treats it as a nop since Intel VT-d does not have special requirement on domains attached to either the PASID or RID of a PASID-capable device. Link: https://patch.msgid.link/r/20250321171940.7213-14-yi.l.liu@intel.com Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/intel/iommu.c | 3 ++- drivers/iommu/intel/nested.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc46098f875b..7bc890609b90 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3338,7 +3338,8 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, bool first_stage; if (flags & - (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | + IOMMU_HWPT_ALLOC_PASID))) return ERR_PTR(-EOPNOTSUPP); if (nested_parent && !nested_supported(iommu)) return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index aba92c00b427..6ac5c534bef4 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -198,7 +198,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, struct dmar_domain *domain; int ret; - if (!nested_supported(iommu) || flags) + if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); /* Must be nested domain */ From dbc5f37b4f8ad833132f77c1f67e68bb11ca9b9e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:36 -0700 Subject: [PATCH 1647/2157] iommufd: Allow allocating PASID-compatible domain The underlying infrastructure has supported the PASID attach and related enforcement per the requirement of the IOMMU_HWPT_ALLOC_PASID flag. This extends iommufd to support PASID compatible domain requested by userspace. Link: https://patch.msgid.link/r/20250321171940.7213-15-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 7 ++++--- include/uapi/linux/iommufd.h | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 3724533a23c9..487779470261 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -112,7 +112,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, { const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_FAULT_ID_VALID; + IOMMU_HWPT_FAULT_ID_VALID | + IOMMU_HWPT_ALLOC_PASID; const struct iommu_ops *ops = dev_iommu_ops(idev->dev); struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; @@ -233,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) || !user_data->len || !ops->domain_alloc_nested) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent || @@ -290,7 +291,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, struct iommufd_hw_pagetable *hwpt; int rc; - if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) return ERR_PTR(-EOPNOTSUPP); if (!user_data->len) return ERR_PTR(-EOPNOTSUPP); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 8719d4f5d618..6901804ec736 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -393,6 +393,9 @@ struct iommu_vfio_ioas { * Any domain attached to the non-PASID part of the * device must also be flagged, otherwise attaching a * PASID will blocked. + * For the user that wants to attach PASID, ioas is + * not recommended for both the non-PASID part + * and PASID part of the device. * If IOMMU does not support PASID it will return * error (-EOPNOTSUPP). */ From 9eb59204d5197b4add63968c8c5b7633631f9a5a Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:37 -0700 Subject: [PATCH 1648/2157] iommufd/selftest: Add set_dev_pasid in mock iommu The callback is needed to make pasid_attach/detach path complete for mock device. A nop is enough for set_dev_pasid. A MOCK_FLAGS_DEVICE_PASID is added to indicate a pasid-capable mock device for the pasid test cases. Other test cases will still create a non-pasid mock device. While the mock iommu always pretends to be pasid-capable. Link: https://patch.msgid.link/r/20250321171940.7213-16-yi.l.liu@intel.com Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 4 +++ drivers/iommu/iommufd/selftest.c | 37 ++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 87e9165cea27..1a066feb8697 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -49,6 +49,7 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, + MOCK_FLAGS_DEVICE_PASID = 1 << 2, }; enum { @@ -154,6 +155,9 @@ struct iommu_test_cmd { }; #define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) +/* Mock device/iommu PASID width */ +#define MOCK_PASID_WIDTH 20 + /* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */ #define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef #define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 0b3f5cbf242b..aa3da0adc4e1 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -223,8 +223,16 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, return 0; } +static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) +{ + return 0; +} + static const struct iommu_domain_ops mock_blocking_ops = { .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop }; static struct iommu_domain mock_blocking_domain = { @@ -366,7 +374,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, struct mock_iommu_domain_nested *mock_nested; struct mock_iommu_domain *mock_parent; - if (flags) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); if (!parent || parent->ops != mock_ops.default_domain_ops) return ERR_PTR(-EINVAL); @@ -388,7 +396,8 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags, { bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_ALLOC_NEST_PARENT; + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID; struct mock_dev *mdev = to_mock_dev(dev); bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; struct mock_iommu_domain *mock; @@ -608,7 +617,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, struct mock_viommu *mock_viommu = to_mock_viommu(viommu); struct mock_iommu_domain_nested *mock_nested; - if (flags) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); mock_nested = __mock_domain_alloc_nested(user_data); @@ -743,6 +752,7 @@ static const struct iommu_ops mock_ops = { .map_pages = mock_domain_map_pages, .unmap_pages = mock_domain_unmap_pages, .iova_to_phys = mock_domain_iova_to_phys, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }, }; @@ -803,6 +813,7 @@ static struct iommu_domain_ops domain_nested_ops = { .free = mock_domain_free_nested, .attach_dev = mock_domain_nop_attach, .cache_invalidate_user = mock_domain_cache_invalidate_user, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }; static inline struct iommufd_hw_pagetable * @@ -862,11 +873,17 @@ static void mock_dev_release(struct device *dev) static struct mock_dev *mock_dev_create(unsigned long dev_flags) { + struct property_entry prop[] = { + PROPERTY_ENTRY_U32("pasid-num-bits", 0), + {}, + }; + const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY | + MOCK_FLAGS_DEVICE_HUGE_IOVA | + MOCK_FLAGS_DEVICE_PASID; struct mock_dev *mdev; int rc, i; - if (dev_flags & - ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA)) + if (dev_flags & ~valid_flags) return ERR_PTR(-EINVAL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); @@ -890,6 +907,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) if (rc) goto err_put; + if (dev_flags & MOCK_FLAGS_DEVICE_PASID) + prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH); + + rc = device_create_managed_software_node(&mdev->dev, prop, NULL); + if (rc) { + dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc); + goto err_put; + } + rc = device_add(&mdev->dev); if (rc) goto err_put; @@ -1778,6 +1804,7 @@ int __init iommufd_test_init(void) init_completion(&mock_iommu.complete); mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq"); + mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH); return 0; From 068e14025158986842f783147f9e41a59fbc97cd Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:38 -0700 Subject: [PATCH 1649/2157] iommufd/selftest: Add a helper to get test device There is need to get the selftest device (sobj->type == TYPE_IDEV) in multiple places, so have a helper to for it. Link: https://patch.msgid.link/r/20250321171940.7213-17-yi.l.liu@intel.com Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 36 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index aa3da0adc4e1..04a4b84f5fa1 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -996,39 +996,49 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, return rc; } -/* Replace the mock domain with a manually allocated hw_pagetable */ -static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, - unsigned int device_id, u32 pt_id, - struct iommu_test_cmd *cmd) +static struct selftest_obj * +iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id) { struct iommufd_object *dev_obj; struct selftest_obj *sobj; - int rc; /* * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure * it doesn't race with detach, which is not allowed. */ - dev_obj = - iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); + dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST); if (IS_ERR(dev_obj)) - return PTR_ERR(dev_obj); + return ERR_CAST(dev_obj); sobj = to_selftest_obj(dev_obj); if (sobj->type != TYPE_IDEV) { - rc = -EINVAL; - goto out_dev_obj; + iommufd_put_object(ictx, dev_obj); + return ERR_PTR(-EINVAL); } + return sobj; +} + +/* Replace the mock domain with a manually allocated hw_pagetable */ +static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, + unsigned int device_id, u32 pt_id, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id); if (rc) - goto out_dev_obj; + goto out_sobj; cmd->mock_domain_replace.pt_id = pt_id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); -out_dev_obj: - iommufd_put_object(ucmd->ictx, dev_obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); return rc; } From c1b52b0a97aeae22462496cda064323255d10b3b Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:39 -0700 Subject: [PATCH 1650/2157] iommufd/selftest: Add test ops to test pasid attach/detach This adds 4 test ops for pasid attach/replace/detach testing. There are ops to attach/detach pasid, and also op to check the attached hwpt of a pasid. Link: https://patch.msgid.link/r/20250321171940.7213-18-yi.l.liu@intel.com Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 26 +++++ drivers/iommu/iommufd/selftest.c | 162 +++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 1a066feb8697..1cd7e8394129 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -25,6 +25,10 @@ enum { IOMMU_TEST_OP_TRIGGER_IOPF, IOMMU_TEST_OP_DEV_CHECK_CACHE, IOMMU_TEST_OP_TRIGGER_VEVENT, + IOMMU_TEST_OP_PASID_ATTACH, + IOMMU_TEST_OP_PASID_REPLACE, + IOMMU_TEST_OP_PASID_DETACH, + IOMMU_TEST_OP_PASID_CHECK_HWPT, }; enum { @@ -62,6 +66,9 @@ enum { MOCK_DEV_CACHE_NUM = 4, }; +/* Reserved for special pasid replace test */ +#define IOMMU_TEST_PASID_RESERVED 1024 + struct iommu_test_cmd { __u32 size; __u32 op; @@ -150,6 +157,25 @@ struct iommu_test_cmd { struct { __u32 dev_id; } trigger_vevent; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_attach; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_replace; + struct { + __u32 pasid; + /* @id is stdev_id */ + } pasid_detach; + struct { + __u32 pasid; + __u32 hwpt_id; + /* @id is stdev_id */ + } pasid_check; }; __u32 last; }; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 04a4b84f5fa1..18d9a216eb30 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -167,6 +167,7 @@ struct mock_dev { unsigned long vdev_id; int id; u32 cache[MOCK_DEV_CACHE_NUM]; + atomic_t pasid_1024_fake_error; }; static inline struct mock_dev *to_mock_dev(struct device *dev) @@ -227,6 +228,34 @@ static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain, struct device *dev, ioasid_t pasid, struct iommu_domain *old) { + struct mock_dev *mdev = to_mock_dev(dev); + + /* + * Per the first attach with pasid 1024, set the + * mdev->pasid_1024_fake_error. Hence the second call of this op + * can fake an error to validate the error path of the core. This + * is helpful to test the case in which the iommu core needs to + * rollback to the old domain due to driver failure. e.g. replace. + * User should be careful about the third call of this op, it shall + * succeed since the mdev->pasid_1024_fake_error is cleared in the + * second call. + */ + if (pasid == 1024) { + if (domain->type == IOMMU_DOMAIN_BLOCKED) { + atomic_set(&mdev->pasid_1024_fake_error, 0); + } else if (atomic_read(&mdev->pasid_1024_fake_error)) { + /* + * Clear the flag, and fake an error to fail the + * replacement. + */ + atomic_set(&mdev->pasid_1024_fake_error, 0); + return -ENOMEM; + } else { + /* Set the flag to fake an error in next call */ + atomic_set(&mdev->pasid_1024_fake_error, 1); + } + } + return 0; } @@ -1685,6 +1714,131 @@ static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd, return rc; } +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) +{ + struct iommufd_object *pt_obj; + + pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return ERR_CAST(pt_obj); + + if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED && + pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) { + iommufd_put_object(ucmd->ictx, pt_obj); + return ERR_PTR(-EINVAL); + } + + return container_of(pt_obj, struct iommufd_hw_pagetable, obj); +} + +static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + u32 hwpt_id = cmd->pasid_check.hwpt_id; + struct iommu_domain *attached_domain; + struct iommu_attach_handle *handle; + struct iommufd_hw_pagetable *hwpt; + struct selftest_obj *sobj; + struct mock_dev *mdev; + int rc = 0; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + mdev = sobj->idev.mock_dev; + + handle = iommu_attach_handle_get(mdev->dev.iommu_group, + cmd->pasid_check.pasid, 0); + if (IS_ERR(handle)) + attached_domain = NULL; + else + attached_domain = handle->domain; + + /* hwpt_id == 0 means to check if pasid is detached */ + if (!hwpt_id) { + if (attached_domain) + rc = -EINVAL; + goto out_sobj; + } + + hwpt = iommufd_get_hwpt(ucmd, hwpt_id); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_sobj; + } + + if (attached_domain != hwpt->domain) + rc = -EINVAL; + + iommufd_put_object(ucmd->ictx, &hwpt->obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + iommufd_device_detach(sobj->idev.idev, + cmd->pasid_attach.pasid); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid); + iommufd_put_object(ucmd->ictx, &sobj->obj); + return 0; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = to_selftest_obj(obj); @@ -1768,6 +1922,14 @@ int iommufd_test(struct iommufd_ucmd *ucmd) return iommufd_test_trigger_iopf(ucmd, cmd); case IOMMU_TEST_OP_TRIGGER_VEVENT: return iommufd_test_trigger_vevent(ucmd, cmd); + case IOMMU_TEST_OP_PASID_ATTACH: + return iommufd_test_pasid_attach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_REPLACE: + return iommufd_test_pasid_replace(ucmd, cmd); + case IOMMU_TEST_OP_PASID_DETACH: + return iommufd_test_pasid_detach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_CHECK_HWPT: + return iommufd_test_pasid_check_hwpt(ucmd, cmd); default: return -EOPNOTSUPP; } From d57a1fb3425513ec0b02acb9a9f81e5da99b4b85 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:40 -0700 Subject: [PATCH 1651/2157] iommufd/selftest: Add coverage for iommufd pasid attach/detach This tests iommufd pasid attach/replace/detach. Link: https://patch.msgid.link/r/20250321171940.7213-19-yi.l.liu@intel.com Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 301 ++++++++++++++++++ .../selftests/iommu/iommufd_fail_nth.c | 49 ++- tools/testing/selftests/iommu/iommufd_utils.h | 97 +++++- 3 files changed, 437 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 156c74da53cd..c39222b9869b 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2996,4 +2996,305 @@ TEST_F(iommufd_viommu, vdevice_cache) } } +FIXTURE(iommufd_device_pasid) +{ + int fd; + uint32_t ioas_id; + uint32_t hwpt_id; + uint32_t stdev_id; + uint32_t device_id; + uint32_t no_pasid_stdev_id; + uint32_t no_pasid_device_id; +}; + +FIXTURE_VARIANT(iommufd_device_pasid) +{ + bool pasid_capable; +}; + +FIXTURE_SETUP(iommufd_device_pasid) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_PASID, + &self->stdev_id, &self->hwpt_id, + &self->device_id); + if (!variant->pasid_capable) + test_cmd_mock_domain_flags(self->ioas_id, 0, + &self->no_pasid_stdev_id, NULL, + &self->no_pasid_device_id); +} + +FIXTURE_TEARDOWN(iommufd_device_pasid) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_device_pasid, no_pasid) +{ + .pasid_capable = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_device_pasid, has_pasid) +{ + .pasid_capable = true, +}; + +TEST_F(iommufd_device_pasid, pasid_attach) +{ + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t nested_hwpt_id[3] = {}; + uint32_t parent_hwpt_id = 0; + uint32_t fault_id, fault_fd; + uint32_t s2_hwpt_id = 0; + uint32_t iopf_hwpt_id; + uint32_t pasid = 100; + uint32_t viommu_id; + + /* Allocate two nested hwpts sharing one common parent hwpt */ + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &parent_hwpt_id); + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[1], + IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + + /* Fault related preparation */ + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID, + &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + /* Allocate a regular nested hwpt based on viommu */ + test_cmd_viommu_alloc(self->device_id, parent_hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, + &viommu_id); + test_cmd_hwpt_alloc_nested(self->device_id, viommu_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[2], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_PASID, + &s2_hwpt_id); + + /* Attach RID to non-pasid compat domain, */ + test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); + /* then attach to pasid should fail */ + test_err_pasid_attach(EINVAL, pasid, s2_hwpt_id); + + /* Attach RID to pasid compat domain, */ + test_cmd_mock_domain_replace(self->stdev_id, s2_hwpt_id); + /* then attach to pasid should succeed, */ + test_cmd_pasid_attach(pasid, nested_hwpt_id[0]); + /* but attach RID to non-pasid compat domain should fail now. */ + test_err_mock_domain_replace(EINVAL, self->stdev_id, parent_hwpt_id); + /* + * Detach hwpt from pasid 100, and check if the pasid 100 + * has null domain. + */ + test_cmd_pasid_detach(pasid); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + /* RID is attached to pasid-comapt domain, pasid path is not used */ + + if (!variant->pasid_capable) { + /* + * PASID-compatible domain can be used by non-PASID-capable + * device. + */ + test_cmd_mock_domain_replace(self->no_pasid_stdev_id, nested_hwpt_id[0]); + test_cmd_mock_domain_replace(self->no_pasid_stdev_id, self->ioas_id); + /* + * Attach hwpt to pasid 100 of non-PASID-capable device, + * should fail, no matter domain is pasid-comapt or not. + */ + EXPECT_ERRNO(EINVAL, + _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id, + pasid, parent_hwpt_id)); + EXPECT_ERRNO(EINVAL, + _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id, + pasid, s2_hwpt_id)); + } + + /* + * Attach non pasid compat hwpt to pasid-capable device, should + * fail, and have null domain. + */ + test_err_pasid_attach(EINVAL, pasid, parent_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach ioas to pasid 100, should fail, domain should + * be null. + */ + test_err_pasid_attach(EINVAL, pasid, self->ioas_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach the s2_hwpt to pasid 100, should succeed, domain should + * be valid. + */ + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Try attach pasid 100 with another hwpt, should FAIL + * as attach does not allow overwrite, use REPLACE instead. + */ + test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]); + + /* + * Detach hwpt from pasid 100 for next test, should succeed, + * and have null domain. + */ + test_cmd_pasid_detach(pasid); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach nested hwpt to pasid 100, should succeed, domain + * should be valid. + */ + test_cmd_pasid_attach(pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[0])); + + /* Attach to pasid 100 which has been attached, should fail. */ + test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]); + + /* cleanup pasid 100 */ + test_cmd_pasid_detach(pasid); + + /* Replace tests */ + + pasid = 200; + /* + * Replace pasid 200 without attaching it, should fail + * with -EINVAL. + */ + test_err_pasid_replace(EINVAL, pasid, s2_hwpt_id); + + /* + * Attach the s2 hwpt to pasid 200, should succeed, domain should + * be valid. + */ + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace pasid 200 with self->ioas_id, should fail + * and domain should be the prior s2 hwpt. + */ + test_err_pasid_replace(EINVAL, pasid, self->ioas_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace a nested hwpt for pasid 200, should succeed, + * and have valid domain. + */ + test_cmd_pasid_replace(pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[0])); + + /* + * Replace with another nested hwpt for pasid 200, should + * succeed, and have valid domain. + */ + test_cmd_pasid_replace(pasid, nested_hwpt_id[1]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[1])); + + /* cleanup pasid 200 */ + test_cmd_pasid_detach(pasid); + + /* Negative Tests for pasid replace, use pasid 1024 */ + + /* + * Attach the s2 hwpt to pasid 1024, should succeed, domain should + * be valid. + */ + pasid = 1024; + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace pasid 1024 with nested_hwpt_id[0], should fail, + * but have the old valid domain. This is a designed + * negative case. Normally, this shall succeed. + */ + test_err_pasid_replace(ENOMEM, pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* cleanup pasid 1024 */ + test_cmd_pasid_detach(pasid); + + /* Attach to iopf-capable hwpt */ + + /* + * Attach an iopf hwpt to pasid 2048, should succeed, domain should + * be valid. + */ + pasid = 2048; + test_cmd_pasid_attach(pasid, iopf_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, iopf_hwpt_id)); + + test_cmd_trigger_iopf_pasid(self->device_id, pasid, fault_fd); + + /* + * Replace with s2_hwpt_id for pasid 2048, should + * succeed, and have valid domain. + */ + test_cmd_pasid_replace(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* cleanup pasid 2048 */ + test_cmd_pasid_detach(pasid); + + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); + + /* Detach the s2_hwpt_id from RID */ + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 99a7f7897bb2..8fd6f4500090 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -209,12 +209,16 @@ FIXTURE(basic_fail_nth) { int fd; uint32_t access_id; + uint32_t stdev_id; + uint32_t pasid; }; FIXTURE_SETUP(basic_fail_nth) { self->fd = -1; self->access_id = 0; + self->stdev_id = 0; + self->pasid = 0; //test should use a non-zero value } FIXTURE_TEARDOWN(basic_fail_nth) @@ -226,6 +230,8 @@ FIXTURE_TEARDOWN(basic_fail_nth) rc = _test_cmd_destroy_access(self->access_id); assert(rc == 0); } + if (self->pasid && self->stdev_id) + _test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid); teardown_iommufd(self->fd, _metadata); } @@ -622,9 +628,9 @@ TEST_FAIL_NTH(basic_fail_nth, device) uint32_t fault_id, fault_fd; uint32_t veventq_id, veventq_fd; uint32_t fault_hwpt_id; + uint32_t test_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; - uint32_t stdev_id; uint32_t idev_id; uint32_t hwpt_id; uint32_t viommu_id; @@ -655,25 +661,29 @@ TEST_FAIL_NTH(basic_fail_nth, device) fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL, - &idev_id)) + if (_test_cmd_mock_domain_flags(self->fd, ioas_id, + MOCK_FLAGS_DEVICE_PASID, + &self->stdev_id, NULL, &idev_id)) return -1; if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id, + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_PASID, &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; - if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) + if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, ioas_id2, NULL)) return -1; - if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) + if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, hwpt_id, NULL)) return -1; if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, - IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id, + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID, + &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; @@ -699,6 +709,31 @@ TEST_FAIL_NTH(basic_fail_nth, device) return -1; close(veventq_fd); + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_PASID, + &test_hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) + return -1; + + /* Tests for pasid attach/replace/detach */ + + self->pasid = 200; + + if (_test_cmd_pasid_attach(self->fd, self->stdev_id, + self->pasid, hwpt_id)) { + self->pasid = 0; + return -1; + } + + if (_test_cmd_pasid_replace(self->fd, self->stdev_id, + self->pasid, test_hwpt_id)) + return -1; + + if (_test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid)) + return -1; + + self->pasid = 0; + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 6f2ba2fa8f76..27794b6f58fc 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -843,14 +843,15 @@ static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) ASSERT_NE(0, *(fault_fd)); \ }) -static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) +static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid, + __u32 fault_fd) { struct iommu_test_cmd trigger_iopf_cmd = { .size = sizeof(trigger_iopf_cmd), .op = IOMMU_TEST_OP_TRIGGER_IOPF, .trigger_iopf = { .dev_id = device_id, - .pasid = 0x1, + .pasid = pasid, .grpid = 0x2, .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE, .addr = 0xdeadbeaf, @@ -881,7 +882,10 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) } #define test_cmd_trigger_iopf(device_id, fault_fd) \ - ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, 0x1, fault_fd)) +#define test_cmd_trigger_iopf_pasid(device_id, pasid, fault_fd) \ + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, \ + pasid, fault_fd)) static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, __u32 type, __u32 flags, __u32 *viommu_id) @@ -1051,3 +1055,90 @@ static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents, EXPECT_ERRNO(_errno, \ _test_cmd_read_vevents(self->fd, event_fd, nvevents, \ virt_id, prev_seq)) + +static int _test_cmd_pasid_attach(int fd, __u32 stdev_id, __u32 pasid, + __u32 pt_id) +{ + struct iommu_test_cmd test_attach = { + .size = sizeof(test_attach), + .op = IOMMU_TEST_OP_PASID_ATTACH, + .id = stdev_id, + .pasid_attach = { + .pasid = pasid, + .pt_id = pt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_ATTACH), + &test_attach); +} + +#define test_cmd_pasid_attach(pasid, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +#define test_err_pasid_attach(_errno, pasid, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_pasid_attach(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +static int _test_cmd_pasid_replace(int fd, __u32 stdev_id, __u32 pasid, + __u32 pt_id) +{ + struct iommu_test_cmd test_replace = { + .size = sizeof(test_replace), + .op = IOMMU_TEST_OP_PASID_REPLACE, + .id = stdev_id, + .pasid_replace = { + .pasid = pasid, + .pt_id = pt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_REPLACE), + &test_replace); +} + +#define test_cmd_pasid_replace(pasid, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_pasid_replace(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +#define test_err_pasid_replace(_errno, pasid, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_pasid_replace(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +static int _test_cmd_pasid_detach(int fd, __u32 stdev_id, __u32 pasid) +{ + struct iommu_test_cmd test_detach = { + .size = sizeof(test_detach), + .op = IOMMU_TEST_OP_PASID_DETACH, + .id = stdev_id, + .pasid_detach = { + .pasid = pasid, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_DETACH), + &test_detach); +} + +#define test_cmd_pasid_detach(pasid) \ + ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, self->stdev_id, pasid)) + +static int test_cmd_pasid_check_hwpt(int fd, __u32 stdev_id, __u32 pasid, + __u32 hwpt_id) +{ + struct iommu_test_cmd test_pasid_check = { + .size = sizeof(test_pasid_check), + .op = IOMMU_TEST_OP_PASID_CHECK_HWPT, + .id = stdev_id, + .pasid_check = { + .pasid = pasid, + .hwpt_id = hwpt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_CHECK_HWPT), + &test_pasid_check); +} From 7fe6b987166b901efc5c6fce5fe853c9ebb835be Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:39 -0700 Subject: [PATCH 1652/2157] ida: Add ida_find_first_range() There is no helpers for user to check if a given ID is allocated or not, neither a helper to loop all the allocated IDs in an IDA and do something for cleanup. With the two needs, a helper to get the lowest allocated ID of a range and two variants based on it. Caller can check if a given ID is allocated or not by: bool ida_exists(struct ida *ida, unsigned int id) Caller can iterate all allocated IDs by: int id; while ((id = ida_find_first(&pasid_ida)) >= 0) { //anything to do with the allocated ID ida_free(pasid_ida, pasid); } Link: https://patch.msgid.link/r/20250321180143.8468-2-yi.l.liu@intel.com Cc: Matthew Wilcox (Oracle) Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Matthew Wilcox (Oracle) Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/idr.h | 11 +++++++ lib/idr.c | 67 +++++++++++++++++++++++++++++++++++++++++++ lib/test_ida.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index da5f5fa4a3a6..718f9b1b91af 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -257,6 +257,7 @@ struct ida { int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. @@ -328,4 +329,14 @@ static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } + +static inline bool ida_exists(struct ida *ida, unsigned int id) +{ + return ida_find_first_range(ida, id, id) == id; +} + +static inline int ida_find_first(struct ida *ida) +{ + return ida_find_first_range(ida, 0, ~0); +} #endif /* __IDR_H__ */ diff --git a/lib/idr.c b/lib/idr.c index da36054c3ca0..e2adc457abb4 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -476,6 +476,73 @@ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, } EXPORT_SYMBOL(ida_alloc_range); +/** + * ida_find_first_range - Get the lowest used ID. + * @ida: IDA handle. + * @min: Lowest ID to get. + * @max: Highest ID to get. + * + * Get the lowest used ID between @min and @max, inclusive. The returned + * ID will not exceed %INT_MAX, even if @max is larger. + * + * Context: Any context. Takes and releases the xa_lock. + * Return: The lowest used ID, or errno if no used ID is found. + */ +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max) +{ + unsigned long index = min / IDA_BITMAP_BITS; + unsigned int offset = min % IDA_BITMAP_BITS; + unsigned long *addr, size, bit; + unsigned long tmp = 0; + unsigned long flags; + void *entry; + int ret; + + if ((int)min < 0) + return -EINVAL; + if ((int)max < 0) + max = INT_MAX; + + xa_lock_irqsave(&ida->xa, flags); + + entry = xa_find(&ida->xa, &index, max / IDA_BITMAP_BITS, XA_PRESENT); + if (!entry) { + ret = -ENOENT; + goto err_unlock; + } + + if (index > min / IDA_BITMAP_BITS) + offset = 0; + if (index * IDA_BITMAP_BITS + offset > max) { + ret = -ENOENT; + goto err_unlock; + } + + if (xa_is_value(entry)) { + tmp = xa_to_value(entry); + addr = &tmp; + size = BITS_PER_XA_VALUE; + } else { + addr = ((struct ida_bitmap *)entry)->bitmap; + size = IDA_BITMAP_BITS; + } + + bit = find_next_bit(addr, size, offset); + + xa_unlock_irqrestore(&ida->xa, flags); + + if (bit == size || + index * IDA_BITMAP_BITS + bit > max) + return -ENOENT; + + return index * IDA_BITMAP_BITS + bit; + +err_unlock: + xa_unlock_irqrestore(&ida->xa, flags); + return ret; +} +EXPORT_SYMBOL(ida_find_first_range); + /** * ida_free() - Release an allocated ID. * @ida: IDA handle. diff --git a/lib/test_ida.c b/lib/test_ida.c index c80155a1956d..63078f8dc13f 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -189,6 +189,75 @@ static void ida_check_bad_free(struct ida *ida) IDA_BUG_ON(ida, !ida_is_empty(ida)); } +/* + * Check ida_find_first_range() and varriants. + */ +static void ida_check_find_first(struct ida *ida) +{ + /* IDA is empty; all of the below should be not exist */ + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, ida_exists(ida, 3)); + IDA_BUG_ON(ida, ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a single value entry */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 3, GFP_KERNEL) != 3); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + IDA_BUG_ON(ida, ida_alloc_min(ida, 63, GFP_KERNEL) != 63); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a single bitmap */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 1023, GFP_KERNEL) != 1023); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, !ida_exists(ida, 1023)); + IDA_BUG_ON(ida, ida_exists(ida, (1 << 20) - 1)); + + /* IDA contains a tree */ + IDA_BUG_ON(ida, ida_alloc_min(ida, (1 << 20) - 1, GFP_KERNEL) != (1 << 20) - 1); + IDA_BUG_ON(ida, ida_exists(ida, 0)); + IDA_BUG_ON(ida, !ida_exists(ida, 3)); + IDA_BUG_ON(ida, !ida_exists(ida, 63)); + IDA_BUG_ON(ida, !ida_exists(ida, 1023)); + IDA_BUG_ON(ida, !ida_exists(ida, (1 << 20) - 1)); + + /* Now try to find first */ + IDA_BUG_ON(ida, ida_find_first(ida) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, -1, 2) != -EINVAL); + IDA_BUG_ON(ida, ida_find_first_range(ida, 0, 2) != -ENOENT); // no used ID + IDA_BUG_ON(ida, ida_find_first_range(ida, 0, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 3, 3) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 2, 4) != 3); + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 3) != -ENOENT); // min > max, fail + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 60) != -ENOENT); // no used ID + IDA_BUG_ON(ida, ida_find_first_range(ida, 4, 64) != 63); + IDA_BUG_ON(ida, ida_find_first_range(ida, 63, 63) != 63); + IDA_BUG_ON(ida, ida_find_first_range(ida, 64, 1026) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1023, 1023) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1023, (1 << 20) - 1) != 1023); + IDA_BUG_ON(ida, ida_find_first_range(ida, 1024, (1 << 20) - 1) != (1 << 20) - 1); + IDA_BUG_ON(ida, ida_find_first_range(ida, (1 << 20), INT_MAX) != -ENOENT); + + ida_free(ida, 3); + ida_free(ida, 63); + ida_free(ida, 1023); + ida_free(ida, (1 << 20) - 1); + + IDA_BUG_ON(ida, !ida_is_empty(ida)); +} + static DEFINE_IDA(ida); static int ida_checks(void) @@ -202,6 +271,7 @@ static int ida_checks(void) ida_check_max(&ida); ida_check_conv(&ida); ida_check_bad_free(&ida); + ida_check_find_first(&ida); printk("IDA: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run != tests_passed) ? 0 : -EINVAL; From 290641346d0d1eaf400c4f968d5b2cd91f483733 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:40 -0700 Subject: [PATCH 1653/2157] vfio-iommufd: Support pasid [at|de]tach for physical VFIO devices This adds pasid_at|de]tach_ioas ops for attaching hwpt to pasid of a device and the helpers for it. For now, only vfio-pci supports pasid attach/detach. Link: https://patch.msgid.link/r/20250321180143.8468-3-yi.l.liu@intel.com Signed-off-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/iommufd.c | 50 +++++++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci.c | 2 ++ include/linux/vfio.h | 14 +++++++++++ 3 files changed, 66 insertions(+) diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 37e1efa2c7bf..c8c3a2d53f86 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -119,14 +119,22 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, if (IS_ERR(idev)) return PTR_ERR(idev); vdev->iommufd_device = idev; + ida_init(&vdev->pasids); return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); void vfio_iommufd_physical_unbind(struct vfio_device *vdev) { + int pasid; + lockdep_assert_held(&vdev->dev_set->lock); + while ((pasid = ida_find_first(&vdev->pasids)) >= 0) { + iommufd_device_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); + } + if (vdev->iommufd_attached) { iommufd_device_detach(vdev->iommufd_device, IOMMU_NO_PASID); vdev->iommufd_attached = false; @@ -170,6 +178,48 @@ void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id) +{ + int rc; + + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return -EINVAL; + + if (ida_exists(&vdev->pasids, pasid)) + return iommufd_device_replace(vdev->iommufd_device, + pasid, pt_id); + + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); + if (rc < 0) + return rc; + + rc = iommufd_device_attach(vdev->iommufd_device, pasid, pt_id); + if (rc) + ida_free(&vdev->pasids, pasid); + + return rc; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_attach_ioas); + +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return; + + if (!ida_exists(&vdev->pasids, pasid)) + return; + + iommufd_device_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_detach_ioas); + /* * The emulated standard ops mean that vfio_device is going to use the * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e727941f589d..6f7ae7e5b7b0 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -144,6 +144,8 @@ static const struct vfio_device_ops vfio_pci_ops = { .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, .detach_ioas = vfio_iommufd_physical_detach_ioas, + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 000a6cab2d31..707b00772ce1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -67,6 +67,7 @@ struct vfio_device { struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -91,6 +92,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -115,6 +118,9 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, + u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -139,6 +145,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -166,6 +176,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) From ad744ed5dd8b70e9256fc1ff18aaaffeedf5f21e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:41 -0700 Subject: [PATCH 1654/2157] vfio: VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT support pasid This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach a given pasid of a vfio device to/from an IOAS/HWPT. Link: https://patch.msgid.link/r/20250321180143.8468-4-yi.l.liu@intel.com Reviewed-by: Alex Williamson Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/vfio/device_cdev.c | 60 +++++++++++++++++++++++++++++++++----- include/uapi/linux/vfio.h | 29 +++++++++++------- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index bb1817bd4ff3..281a8dc3ed49 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -162,9 +162,9 @@ void vfio_df_unbind_iommufd(struct vfio_device_file *df) int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, struct vfio_device_attach_iommufd_pt __user *arg) { - struct vfio_device *device = df->device; struct vfio_device_attach_iommufd_pt attach; - unsigned long minsz; + struct vfio_device *device = df->device; + unsigned long minsz, xend = 0; int ret; minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); @@ -172,11 +172,34 @@ int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, if (copy_from_user(&attach, arg, minsz)) return -EFAULT; - if (attach.argsz < minsz || attach.flags) + if (attach.argsz < minsz) return -EINVAL; + if (attach.flags & ~VFIO_DEVICE_ATTACH_PASID) + return -EINVAL; + + if (attach.flags & VFIO_DEVICE_ATTACH_PASID) { + if (!device->ops->pasid_attach_ioas) + return -EOPNOTSUPP; + xend = offsetofend(struct vfio_device_attach_iommufd_pt, pasid); + } + + if (xend) { + if (attach.argsz < xend) + return -EINVAL; + + if (copy_from_user((void *)&attach + minsz, + (void __user *)arg + minsz, xend - minsz)) + return -EFAULT; + } + mutex_lock(&device->dev_set->lock); - ret = device->ops->attach_ioas(device, &attach.pt_id); + if (attach.flags & VFIO_DEVICE_ATTACH_PASID) + ret = device->ops->pasid_attach_ioas(device, + attach.pasid, + &attach.pt_id); + else + ret = device->ops->attach_ioas(device, &attach.pt_id); if (ret) goto out_unlock; @@ -198,20 +221,41 @@ int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, int vfio_df_ioctl_detach_pt(struct vfio_device_file *df, struct vfio_device_detach_iommufd_pt __user *arg) { - struct vfio_device *device = df->device; struct vfio_device_detach_iommufd_pt detach; - unsigned long minsz; + struct vfio_device *device = df->device; + unsigned long minsz, xend = 0; minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); if (copy_from_user(&detach, arg, minsz)) return -EFAULT; - if (detach.argsz < minsz || detach.flags) + if (detach.argsz < minsz) return -EINVAL; + if (detach.flags & ~VFIO_DEVICE_DETACH_PASID) + return -EINVAL; + + if (detach.flags & VFIO_DEVICE_DETACH_PASID) { + if (!device->ops->pasid_detach_ioas) + return -EOPNOTSUPP; + xend = offsetofend(struct vfio_device_detach_iommufd_pt, pasid); + } + + if (xend) { + if (detach.argsz < xend) + return -EINVAL; + + if (copy_from_user((void *)&detach + minsz, + (void __user *)arg + minsz, xend - minsz)) + return -EFAULT; + } + mutex_lock(&device->dev_set->lock); - device->ops->detach_ioas(device); + if (detach.flags & VFIO_DEVICE_DETACH_PASID) + device->ops->pasid_detach_ioas(device, detach.pasid); + else + device->ops->detach_ioas(device); mutex_unlock(&device->dev_set->lock); return 0; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index c8dbf8219c4f..6899da70b929 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -931,29 +931,34 @@ struct vfio_device_bind_iommufd { * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, * struct vfio_device_attach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for attach. * @pt_id: Input the target id which can represent an ioas or a hwpt * allocated via iommufd subsystem. * Output the input ioas id or the attached hwpt id which could * be the specified hwpt itself or a hwpt automatically created * for the specified ioas by kernel during the attachment. + * @pasid: The pasid to be attached, only meaningful when + * VFIO_DEVICE_ATTACH_PASID is set in @flags * * Associate the device with an address space within the bound iommufd. * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only * allowed on cdev fds. * - * If a vfio device is currently attached to a valid hw_pagetable, without doing - * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl - * passing in another hw_pagetable (hwpt) id is allowed. This action, also known - * as a hw_pagetable replacement, will replace the device's currently attached - * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. + * If a vfio device or a pasid of this device is currently attached to a valid + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed. + * This action, also known as a hw_pagetable replacement, will replace the + * currently attached hwpt of the device or the pasid of this device with a new + * hwpt corresponding to the given pt_id. * * Return: 0 on success, -errno on failure. */ struct vfio_device_attach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_ATTACH_PASID (1 << 0) __u32 pt_id; + __u32 pasid; }; #define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) @@ -962,17 +967,21 @@ struct vfio_device_attach_iommufd_pt { * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, * struct vfio_device_detach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for detach. + * @pasid: The pasid to be detached, only meaningful when + * VFIO_DEVICE_DETACH_PASID is set in @flags * - * Remove the association of the device and its current associated address - * space. After it, the device should be in a blocking DMA state. This is only - * allowed on cdev fds. + * Remove the association of the device or a pasid of the device and its current + * associated address space. After it, the device or the pasid should be in a + * blocking DMA state. This is only allowed on cdev fds. * * Return: 0 on success, -errno on failure. */ struct vfio_device_detach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_DETACH_PASID (1 << 0) + __u32 pasid; }; #define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) From 0b7747a5477eb22d041997bc085fa8d492fa9b96 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Mar 2025 18:19:55 +0100 Subject: [PATCH 1655/2157] pidfs: cleanup the usage of do_notify_pidfd() If a single-threaded process exits do_notify_pidfd() will be called twice, from exit_notify() and right after that from do_notify_parent(). 1. Change exit_notify() to call do_notify_pidfd() if the exiting task is not ptraced and it is not a group leader. 2. Change do_notify_parent() to call do_notify_pidfd() unconditionally. If tsk is not ptraced, do_notify_parent() will only be called when it is a group-leader and thread_group_empty() is true. This means that if tsk is ptraced, do_notify_pidfd() will be called from do_notify_parent() even if tsk is a delay_group_leader(). But this case is less common, and apart from the unnecessary __wake_up() is harmless. Granted, this unnecessary __wake_up() can be avoided, but I don't want to do it in this patch because it's just a consequence of another historical oddity: we notify the tracer even if !thread_group_empty(), but do_wait() from debugger can't work until all other threads exit. With or without this patch we should either eliminate do_notify_parent() in this case, or change do_wait(WEXITED) to untrace the ptraced delay_group_leader() at least when ptrace_reparented(). Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250323171955.GA834@redhat.com Signed-off-by: Christian Brauner --- kernel/exit.c | 8 ++------ kernel/signal.c | 8 +++----- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index c2e6c7b7779f..5d1226fdfadc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -756,12 +756,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); tsk->exit_state = EXIT_ZOMBIE; - /* - * Ignore thread-group leaders that exited before all - * subthreads did. - */ - if (!delay_group_leader(tsk)) - do_notify_pidfd(tsk); if (unlikely(tsk->ptrace)) { int sig = thread_group_leader(tsk) && @@ -774,6 +768,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) do_notify_parent(tsk, tsk->exit_signal); } else { autoreap = true; + /* untraced sub-thread */ + do_notify_pidfd(tsk); } if (autoreap) { diff --git a/kernel/signal.c b/kernel/signal.c index 027ad9e97417..1d8db0dabb71 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2179,11 +2179,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig) WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); - /* - * Notify for thread-group leaders without subthreads. - */ - if (thread_group_empty(tsk)) - do_notify_pidfd(tsk); + + /* ptraced, or group-leader without sub-threads */ + do_notify_pidfd(tsk); if (sig != SIGCHLD) { /* From 8661bb9c717a07b7636224339fe8818b65db6ddf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Mar 2025 18:45:18 +0100 Subject: [PATCH 1656/2157] selftests/pidfd: fixes syscall number defines I had to spend some (a lot;) time to understand why pidfd_info_test (and more) fails with my patch under qemu on my machine ;) Until I applied the patch below. I think it is a bad idea to do the things like #ifndef __NR_clone3 #define __NR_clone3 -1 #endif because this can hide a problem. My working laptop runs Fedora-23 which doesn't have __NR_clone3/etc in /usr/include/. So "make" happily succeeds, but everything fails and it is not clear why. Link: https://lore.kernel.org/r/20250323174518.GB834@redhat.com Signed-off-by: Christian Brauner --- tools/testing/selftests/clone3/clone3_selftests.h | 2 +- tools/testing/selftests/pidfd/pidfd.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 3d2663fe50ba..eeca8005723f 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -16,7 +16,7 @@ #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) #ifndef __NR_clone3 -#define __NR_clone3 -1 +#define __NR_clone3 435 #endif struct __clone_args { diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index cec22aa11cdf..55bcf81a2b9a 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -32,19 +32,19 @@ #endif #ifndef __NR_pidfd_open -#define __NR_pidfd_open -1 +#define __NR_pidfd_open 434 #endif #ifndef __NR_pidfd_send_signal -#define __NR_pidfd_send_signal -1 +#define __NR_pidfd_send_signal 424 #endif #ifndef __NR_clone3 -#define __NR_clone3 -1 +#define __NR_clone3 435 #endif #ifndef __NR_pidfd_getfd -#define __NR_pidfd_getfd -1 +#define __NR_pidfd_getfd 438 #endif #ifndef PIDFD_NONBLOCK From af7bb0d2ca459f15cb5ca604dab5d9af103643f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Mar 2025 17:00:03 +0100 Subject: [PATCH 1657/2157] exec: fix the racy usage of fs_struct->in_exec check_unsafe_exec() sets fs->in_exec under cred_guard_mutex, then execve() paths clear fs->in_exec lockless. This is fine if exec succeeds, but if it fails we have the following race: T1 sets fs->in_exec = 1, fails, drops cred_guard_mutex T2 sets fs->in_exec = 1 T1 clears fs->in_exec T2 continues with fs->in_exec == 0 Change fs/exec.c to clear fs->in_exec with cred_guard_mutex held. Reported-by: syzbot+1c486d0b62032c82a968@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001f.GAE@google.com/ Cc: stable@vger.kernel.org Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250324160003.GA8878@redhat.com Signed-off-by: Christian Brauner --- fs/exec.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index f45859ad13ac..5d1c0d2dc403 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1227,13 +1227,12 @@ int begin_new_exec(struct linux_binprm * bprm) */ bprm->point_of_no_return = true; - /* - * Make this the only thread in the thread group. - */ + /* Make this the only thread in the thread group */ retval = de_thread(me); if (retval) goto out; - + /* see the comment in check_unsafe_exec() */ + current->fs->in_exec = 0; /* * Cancel any io_uring activity across execve */ @@ -1495,6 +1494,8 @@ static void free_bprm(struct linux_binprm *bprm) } free_arg_pages(bprm); if (bprm->cred) { + /* in case exec fails before de_thread() succeeds */ + current->fs->in_exec = 0; mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1616,6 +1617,10 @@ static void check_unsafe_exec(struct linux_binprm *bprm) * suid exec because the differently privileged task * will be able to manipulate the current directory, etc. * It would be nice to force an unshare instead... + * + * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS) + * from another sub-thread until de_thread() succeeds, this + * state is protected by cred_guard_mutex we hold. */ n_fs = 1; spin_lock(&p->fs->lock); @@ -1860,7 +1865,6 @@ static int bprm_execve(struct linux_binprm *bprm) sched_mm_cid_after_execve(current); /* execve succeeded */ - current->fs->in_exec = 0; current->in_execve = 0; rseq_execve(current); user_events_execve(current); @@ -1879,7 +1883,6 @@ static int bprm_execve(struct linux_binprm *bprm) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); - current->fs->in_exec = 0; current->in_execve = 0; return retval; From 406fad7698f5bf21ab6b5ca195bf4b9e0b3990ed Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 25 Mar 2025 09:59:05 -0300 Subject: [PATCH 1658/2157] cachefiles: Fix oops in vfs_mkdir from cachefiles_get_directory Commit c54b386969a5 ("VFS: Change vfs_mkdir() to return the dentry.") changed cachefiles_get_directory, replacing "subdir" with a ERR_PTR from the result of cachefiles_inject_write_error, which is either 0 or some error code. This causes an oops when the resulting pointer is passed to vfs_mkdir. Use a similar pattern to what is used earlier in the function; replace subdir with either the return value from vfs_mkdir, or the ERR_PTR of the cachefiles_inject_write_error() return value, but only if it is non zero. Fixes: c54b386969a5 ("VFS: Change vfs_mkdir() to return the dentry.") cc: netfs@lists.linux.dev Signed-off-by: Marc Dionne Link: https://lore.kernel.org/r/20250325125905.395372-1-marc.dionne@auristor.com Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 83a60126de0f..14d0cc894000 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -128,10 +128,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, ret = security_path_mkdir(&path, subdir, 0700); if (ret < 0) goto mkdir_error; - subdir = ERR_PTR(cachefiles_inject_write_error()); - if (!IS_ERR(subdir)) + ret = cachefiles_inject_write_error(); + if (ret == 0) subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); - ret = PTR_ERR(subdir); + else + subdir = ERR_PTR(ret); if (IS_ERR(subdir)) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); From 9324571e9eea231321acf0a3d0fbc85a6e0f6ff6 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:44 +0000 Subject: [PATCH 1659/2157] RISC-V: add vector extension validation checks Using Clement's new validation callbacks, support checking that dependencies have been satisfied for the vector extensions. From the kernel's perfective, it's not required to differentiate between the conditions for all the various vector subsets - it's the firmware's job to not report impossible combinations. Instead, the kernel only has to check that the correct config options are enabled and to enforce its requirement of the d extension being present for FPU support. Since vector will now be disabled proactively, there's no need to clear the bit in elf_hwcap in riscv_fill_hwcap() any longer. Signed-off-by: Conor Dooley Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250312-eclair-affluent-55b098c3602b@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/cpufeature.h | 3 ++ arch/riscv/kernel/cpufeature.c | 60 +++++++++++++++++++---------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 569140d6e639..5d9427ccbc7a 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -56,6 +56,9 @@ void __init riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ ARRAY_SIZE(_bundled_exts), NULL) +#define __RISCV_ISA_EXT_BUNDLE_VALIDATE(_name, _bundled_exts, _validate) \ + _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ + ARRAY_SIZE(_bundled_exts), _validate) /* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ #define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 40ac72e407b6..76a3b34d7a70 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -109,6 +109,38 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_ext_vector_x_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + return 0; +} + +static int riscv_ext_vector_float_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * The kernel doesn't support systems that don't implement both of + * F and D, so if any of the vector extensions that do floating point + * are to be usable, both floating point extensions need to be usable. + * + * Since this function validates vector only, and v/Zve* are probed + * after f/d, there's no need for a deferral here. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return -EINVAL; + + return 0; +} + static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -326,12 +358,10 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), - __RISCV_ISA_EXT_SUPERSET(v, RISCV_ISA_EXT_v, riscv_v_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, - riscv_ext_zicbom_validate), - __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, - riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), @@ -372,11 +402,11 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), - __RISCV_ISA_EXT_SUPERSET(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts), - __RISCV_ISA_EXT_DATA(zve32x, RISCV_ISA_EXT_ZVE32X), - __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), - __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), - __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zve32x, RISCV_ISA_EXT_ZVE32X, riscv_ext_vector_x_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts, riscv_ext_vector_x_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), @@ -960,16 +990,6 @@ void __init riscv_fill_hwcap(void) riscv_v_setup_vsize(); } - if (elf_hwcap & COMPAT_HWCAP_ISA_V) { - /* - * ISA string in device tree might have 'v' flag, but - * CONFIG_RISCV_ISA_V is disabled in kernel. - * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled. - */ - if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; - } - memset(print_str, 0, sizeof(print_str)); for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) From 38077ec8fc11fa62e85a3a7630cfa9f2b8fd9f65 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:45 +0000 Subject: [PATCH 1660/2157] RISC-V: add vector crypto extension validation checks Using Clement's new validation callbacks, support checking that dependencies have been satisfied for the vector crpyto extensions. Currently riscv_isa_extension_available() will return true on systems that support the extensions but vector itself has been disabled by the kernel, adding validation callbacks will prevent such a scenario from occuring and make the behaviour of the extension detection functions more consistent with user expectations - it's not expected to have to check for vector AND the specific crypto extension. The Unpriv spec states: | The Zvknhb and Zvbc Vector Crypto Extensions --and accordingly the | composite extensions Zvkn, Zvknc, Zvkng, and Zvksc-- require a Zve64x | base, or application ("V") base Vector Extension. All of the other | Vector Crypto Extensions can be built on any embedded (Zve*) or | application ("V") base Vector Extension. While this could be used as the basis for checking that the correct base for individual crypto extensions, but that's not really the kernel's job in my opinion and it is sufficient to leave that sort of precision to the dt-bindings. The kernel only needs to make sure that vector, in some form, is available. Link: https://github.com/riscv/riscv-isa-manual/blob/main/src/vector-crypto.adoc#extensions-overview Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20250312-entertain-shaking-b664142c2f99@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 76a3b34d7a70..654dd4cfc479 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -141,6 +141,23 @@ static int riscv_ext_vector_float_validate(const struct riscv_isa_ext_data *data return 0; } +static int riscv_ext_vector_crypto_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + /* + * It isn't the kernel's job to check that the binding is correct, so + * it should be enough to check that any of the vector extensions are + * enabled, which in-turn means that vector is usable in this kernel + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32X)) + return -EPROBE_DEFER; + + return 0; +} + static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -400,8 +417,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED), __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH), __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), - __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), - __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvbc, RISCV_ISA_EXT_ZVBC, riscv_ext_vector_crypto_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA_VALIDATE(zve32x, RISCV_ISA_EXT_ZVE32X, riscv_ext_vector_x_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts, riscv_ext_vector_float_validate), @@ -409,20 +426,20 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts, riscv_ext_vector_x_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), - __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), - __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG), - __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED), - __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts), - __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA), - __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB), - __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED), - __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH), - __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkb, RISCV_ISA_EXT_ZVKB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkg, RISCV_ISA_EXT_ZVKG, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkn, riscv_zvkn_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvknc, riscv_zvknc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkned, RISCV_ISA_EXT_ZVKNED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkng, riscv_zvkng_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknha, RISCV_ISA_EXT_ZVKNHA, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknhb, RISCV_ISA_EXT_ZVKNHB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvks, riscv_zvks_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksc, riscv_zvksc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksed, RISCV_ISA_EXT_ZVKSED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksh, RISCV_ISA_EXT_ZVKSH, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksg, riscv_zvksg_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkt, RISCV_ISA_EXT_ZVKT, riscv_ext_vector_crypto_validate), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), From 12e7fbb6a84e6c90a61330d152319e870bc01c46 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:46 +0000 Subject: [PATCH 1661/2157] RISC-V: add f & d extension validation checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using Clement's new validation callbacks, support checking that dependencies have been satisfied for the floating point extensions. The check for "d" might be slightly confusingly shorter than that of "f", despite "d" depending on "f". This is because the requirement that a hart supporting double precision must also support single precision, should be validated by dt-bindings etc, not the kernel but lack of support for single precision only is a limitation of the kernel. Tested-by: Clément Léger Reviewed-by: Clément Léger Signed-off-by: Conor Dooley Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250312-reptile-platinum-62ee0f444a32@spud Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 654dd4cfc479..4024da3fc1dd 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -109,6 +109,33 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * Due to extension ordering, d is checked before f, so no deferral + * is required. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) { + pr_warn_once("This kernel does not support systems with F but not D\n"); + return -EINVAL; + } + + return 0; +} + +static int riscv_ext_d_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + return 0; +} + static int riscv_ext_vector_x_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -371,8 +398,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i), __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m), __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a), - __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), - __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), + __RISCV_ISA_EXT_DATA_VALIDATE(f, RISCV_ISA_EXT_f, riscv_ext_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(d, RISCV_ISA_EXT_d, riscv_ext_d_validate), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), From 534d813a06202c565b4a7e75a3e710db7155e6d3 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:47 +0000 Subject: [PATCH 1662/2157] dt-bindings: riscv: d requires f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per the specifications, the d extension for double-precision floating point operations depends on the f extension for single-precision floating point. Add that requirement to the bindings. This differs from the Linux implementation, where single-precious only is not supported. Reviewed-by: Clément Léger Acked-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20250312-perpetual-daunting-ad489c9a857a@spud Signed-off-by: Alexandre Ghiti --- Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index a63b994e0763..ebb252275ddd 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -639,6 +639,12 @@ properties: https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc. allOf: + - if: + contains: + const: d + then: + contains: + const: f # Zcb depends on Zca - if: contains: From e9f1d61a5e186092d3b8eaa411bb4a76622bf854 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:48 +0000 Subject: [PATCH 1663/2157] dt-bindings: riscv: add vector sub-extension dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Section 33.18.2. Zve*: Vector Extensions for Embedded Processors in [1] says: | The Zve32f and Zve64x extensions depend on the Zve32x extension. The Zve64f extension depends | on the Zve32f and Zve64x extensions. The Zve64d extension depends on the Zve64f extension | The Zve32x extension depends on the Zicsr extension. The Zve32f and Zve64f extensions depend | upon the F extension | The Zve64d extension depends upon the D extension Apply these rules to the bindings to help prevent invalid combinations. Link: https://github.com/riscv/riscv-isa-manual/releases/tag/riscv-isa-release-698e64a-2024-09-09 [1] Reviewed-by: Clément Léger Acked-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20250312-banking-crestless-58f3259a5018@spud Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index ebb252275ddd..02065664f819 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -680,6 +680,52 @@ properties: contains: const: zca + - if: + contains: + const: zve32x + then: + contains: + const: zicsr + + - if: + contains: + const: zve32f + then: + allOf: + - contains: + const: f + - contains: + const: zve32x + + - if: + contains: + const: zve64x + then: + contains: + const: zve32x + + - if: + contains: + const: zve64f + then: + allOf: + - contains: + const: f + - contains: + const: zve32f + - contains: + const: zve64x + + - if: + contains: + const: zve64d + then: + allOf: + - contains: + const: d + - contains: + const: zve64f + allOf: # Zcf extension does not exist on rv64 - if: From a0d857205756af45abaf63ca15b2640f707d5e73 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 12 Mar 2025 13:11:49 +0000 Subject: [PATCH 1664/2157] dt-bindings: riscv: document vector crypto requirements The Unpriv spec states: | The Zvknhb and Zvbc Vector Crypto Extensions --and accordingly the | composite extensions Zvkn, Zvknc, Zvkng, and Zvksc-- require a Zve64x | base, or application ("V") base Vector Extension. All of the other | Vector Crypto Extensions can be built on any embedded (Zve*) or | application ("V") base Vector Extension. Enforce the minimum requirement via schema. Link: https://github.com/riscv/riscv-isa-manual/blob/main/src/vector-crypto.adoc#extensions-overview Acked-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20250312-flask-relay-b36ee622b2c8@spud Signed-off-by: Alexandre Ghiti --- .../devicetree/bindings/riscv/extensions.yaml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 02065664f819..9aeb9d4731ca 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -726,6 +726,39 @@ properties: - contains: const: zve64f + - if: + contains: + anyOf: + - const: zvbc + - const: zvkn + - const: zvknc + - const: zvkng + - const: zvknhb + - const: zvksc + then: + contains: + anyOf: + - const: v + - const: zve64x + + - if: + contains: + anyOf: + - const: zvbb + - const: zvkb + - const: zvkg + - const: zvkned + - const: zvknha + - const: zvksed + - const: zvksh + - const: zvks + - const: zvkt + then: + contains: + anyOf: + - const: v + - const: zve32x + allOf: # Zcf extension does not exist on rv64 - if: From 2593f7e0dc93a898a84220b3fb180d86f1ca8c60 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Sun, 23 Mar 2025 17:05:29 +0000 Subject: [PATCH 1665/2157] firmware: cs_dsp: Ensure cs_dsp_load[_coeff]() returns 0 on success Set ret = 0 on successful completion of the processing loop in cs_dsp_load() and cs_dsp_load_coeff() to ensure that the function returns 0 on success. All normal firmware files will have at least one data block, and processing this block will set ret == 0, from the result of either regmap_raw_write() or cs_dsp_parse_coeff(). The kunit tests create a dummy firmware file that contains only the header, without any data blocks. This gives cs_dsp a file to "load" that will not cause any side-effects. As there aren't any data blocks, the processing loop will not set ret == 0. Originally there was a line after the processing loop: ret = regmap_async_complete(regmap); which would set ret == 0 before the function returned. Commit fe08b7d5085a ("firmware: cs_dsp: Remove async regmap writes") changed the regmap write to a normal sync write, so the call to regmap_async_complete() wasn't necessary and was removed. It was overlooked that the ret here wasn't only to check the result of regmap_async_complete(), it also set the final return value of the function. Fixes: fe08b7d5085a ("firmware: cs_dsp: Remove async regmap writes") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250323170529.197205-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/cs_dsp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 42433c19eb30..560724ce21aa 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1631,6 +1631,7 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, cs_dsp_debugfs_save_wmfwname(dsp, file); + ret = 0; out_fw: cs_dsp_buf_free(&buf_list); @@ -2338,6 +2339,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_debugfs_save_binname(dsp, file); + ret = 0; out_fw: cs_dsp_buf_free(&buf_list); From 1243045c9448cd3f29e9d075de58dc81a0c2c3d9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Mar 2025 16:11:03 -0400 Subject: [PATCH 1666/2157] netfs: add Paulo as maintainer and remove myself as Reviewer My role has changed since I originally agreed to help with netfs, and I'm no longer providing a lot of value here. Luckily, Paulo has agreed to step in as co-maintainer. Acked-by: Paulo Alcantara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250324-master-v1-1-e2dd2fdb15b4@kernel.org Signed-off-by: Christian Brauner --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3b5fa8436987..e79c8a1c58d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8949,7 +8949,7 @@ F: include/linux/iomap.h FILESYSTEMS [NETFS LIBRARY] M: David Howells -R: Jeff Layton +M: Paulo Alcantara L: netfs@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported From 9133607de37a4887c6f89ed937176a0a0c1ebb17 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 24 Mar 2025 18:19:41 +0100 Subject: [PATCH 1667/2157] exit: fix the usage of delay_group_leader->exit_code in do_notify_parent() and pidfs_exit() Consider a process with a group leader L and a sub-thread T. L does sys_exit(1), then T does sys_exit_group(2). In this case wait_task_zombie(L) will notice SIGNAL_GROUP_EXIT and use L->signal->group_exit_code, this is correct. But, before that, do_notify_parent(L) called by release_task(T) will use L->exit_code != L->signal->group_exit_code, and this is not consistent. We don't really care, I think that nobody relies on the info which comes with SIGCHLD, if nothing else SIGCHLD < SIGRTMIN can be queued only once. But pidfs_exit() is more problematic, I think pidfs_exit_info->exit_code should report ->group_exit_code in this case, just like wait_task_zombie(). TODO: with this change we can hopefully cleanup (or may be even kill) the similar SIGNAL_GROUP_EXIT checks, at least in wait_task_zombie(). Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250324171941.GA13114@redhat.com Signed-off-by: Christian Brauner --- kernel/exit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index 5d1226fdfadc..1b51dc099f1e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -268,6 +268,9 @@ void release_task(struct task_struct *p) leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { + /* for pidfs_exit() and do_notify_parent() */ + if (leader->signal->flags & SIGNAL_GROUP_EXIT) + leader->exit_code = leader->signal->group_exit_code; /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, From e3206c4aa06fb7c7165b5a4f49cb3d5f35ccc0e9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:26 +0100 Subject: [PATCH 1668/2157] exportfs: add module description Every loadable module should have a description, to avoid a warning such as: WARNING: modpost: missing MODULE_DESCRIPTION() in fs/exportfs/exportfs.o Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250324173242.1501003-1-arnd@kernel.org Signed-off-by: Christian Brauner --- fs/exportfs/expfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b5845c4846b8..128dd092916b 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -608,4 +608,5 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, } EXPORT_SYMBOL_GPL(exportfs_decode_fh); +MODULE_DESCRIPTION("Code mapping from inodes to file handles"); MODULE_LICENSE("GPL"); From 92009c3ba8903820077d29b6bb8be68780fedbdb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 9 Dec 2024 11:48:58 -0500 Subject: [PATCH 1669/2157] thermal/drivers/qoriq: Use dev_err_probe() simplify the code Use dev_err_probe() and devm_clk_get_optional_enabled() to simplify the code. Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20241209164859.3758906-1-Frank.Li@nxp.com Signed-off-by: Daniel Lezcano --- drivers/thermal/qoriq_thermal.c | 34 ++++++++++----------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 52e26be8c53d..183af15c3376 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -265,7 +265,6 @@ static void qoriq_tmu_action(void *p) struct qoriq_tmu_data *data = p; regmap_write(data->regmap, REGS_TMR, TMR_DISABLE); - clk_disable_unprepare(data->clk); } static int qoriq_tmu_probe(struct platform_device *pdev) @@ -296,38 +295,27 @@ static int qoriq_tmu_probe(struct platform_device *pdev) base = devm_platform_ioremap_resource(pdev, 0); ret = PTR_ERR_OR_ZERO(base); - if (ret) { - dev_err(dev, "Failed to get memory region\n"); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to get memory region\n"); data->regmap = devm_regmap_init_mmio(dev, base, ®map_config); ret = PTR_ERR_OR_ZERO(data->regmap); - if (ret) { - dev_err(dev, "Failed to init regmap (%d)\n", ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to init regmap\n"); - data->clk = devm_clk_get_optional(dev, NULL); + data->clk = devm_clk_get_optional_enabled(dev, NULL); if (IS_ERR(data->clk)) return PTR_ERR(data->clk); - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(dev, "Failed to enable clock\n"); - return ret; - } - ret = devm_add_action_or_reset(dev, qoriq_tmu_action, data); if (ret) return ret; /* version register offset at: 0xbf8 on both v1 and v2 */ ret = regmap_read(data->regmap, REGS_IPBRR(0), &ver); - if (ret) { - dev_err(&pdev->dev, "Failed to read IP block version\n"); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to read IP block version\n"); + data->ver = (ver >> 8) & 0xff; qoriq_tmu_init_device(data); /* TMU initialization */ @@ -337,10 +325,8 @@ static int qoriq_tmu_probe(struct platform_device *pdev) return ret; ret = qoriq_tmu_register_tmu_zone(dev, data); - if (ret < 0) { - dev_err(dev, "Failed to register sensors\n"); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to register sensors\n"); platform_set_drvdata(pdev, data); From 229f3feb4b0442835b27d519679168bea2de96c2 Mon Sep 17 00:00:00 2001 From: Alice Guo Date: Mon, 9 Dec 2024 11:48:59 -0500 Subject: [PATCH 1670/2157] thermal/drivers/qoriq: Power down TMU on system suspend Enable power-down of TMU (Thermal Management Unit) for TMU version 2 during system suspend to save power. Save approximately 4.3mW on VDD_ANA_1P8 on i.MX93 platforms. Signed-off-by: Alice Guo Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20241209164859.3758906-2-Frank.Li@nxp.com Signed-off-by: Daniel Lezcano --- drivers/thermal/qoriq_thermal.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 183af15c3376..01b58be0dcc6 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -18,6 +18,7 @@ #define SITES_MAX 16 #define TMR_DISABLE 0x0 #define TMR_ME 0x80000000 +#define TMR_CMD BIT(29) #define TMR_ALPF 0x0c000000 #define TMR_ALPF_V2 0x03000000 #define TMTMIR_DEFAULT 0x0000000f @@ -342,6 +343,12 @@ static int qoriq_tmu_suspend(struct device *dev) if (ret) return ret; + if (data->ver > TMU_VER1) { + ret = regmap_set_bits(data->regmap, REGS_TMR, TMR_CMD); + if (ret) + return ret; + } + clk_disable_unprepare(data->clk); return 0; @@ -356,6 +363,12 @@ static int qoriq_tmu_resume(struct device *dev) if (ret) return ret; + if (data->ver > TMU_VER1) { + ret = regmap_clear_bits(data->regmap, REGS_TMR, TMR_CMD); + if (ret) + return ret; + } + /* Enable monitoring */ return regmap_update_bits(data->regmap, REGS_TMR, TMR_ME, TMR_ME); } From 1a685e2b3fc70f9a31ea28057471ba6615d8fa7b Mon Sep 17 00:00:00 2001 From: Praveenkumar I Date: Mon, 10 Feb 2025 17:34:31 +0530 Subject: [PATCH 1671/2157] dt-bindings: thermal: tsens: Add ipq5332, ipq5424 compatible The IPQ5332 and IPQ5424 use TSENS v2.3.3 IP with combined interrupt. RPM is not available in these SoCs, hence adding new compatible to have the sensor enablement and calibration function. Also add nvmem-cell-names. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Praveenkumar I Signed-off-by: Manikanta Mylavarapu Link: https://lore.kernel.org/r/20250210120436.821684-2-quic_mmanikan@quicinc.com Signed-off-by: Daniel Lezcano --- .../bindings/thermal/qcom-tsens.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml index b9829bb22cc0..f9d8012c8cf5 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml @@ -75,6 +75,8 @@ properties: - description: v2 of TSENS with combined interrupt enum: + - qcom,ipq5332-tsens + - qcom,ipq5424-tsens - qcom,ipq8074-tsens - description: v2 of TSENS with combined interrupt @@ -212,6 +214,18 @@ properties: - const: s9_p2_backup - const: s10_p1_backup - const: s10_p2_backup + - minItems: 8 + items: + - const: mode + - const: base0 + - const: base1 + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' + - pattern: '^tsens_sens[0-9]+_off$' "#qcom,sensors": description: @@ -271,6 +285,8 @@ allOf: compatible: contains: enum: + - qcom,ipq5332-tsens + - qcom,ipq5424-tsens - qcom,ipq8074-tsens then: properties: @@ -286,6 +302,8 @@ allOf: compatible: contains: enum: + - qcom,ipq5332-tsens + - qcom,ipq5424-tsens - qcom,ipq8074-tsens - qcom,tsens-v0_1 - qcom,tsens-v1 From ff0cf0ab9073727a67f9902dba77a758654ae895 Mon Sep 17 00:00:00 2001 From: Praveenkumar I Date: Mon, 10 Feb 2025 17:34:32 +0530 Subject: [PATCH 1672/2157] thermal/drivers/tsens: Add TSENS enable and calibration support for V2 SoCs without RPM need to enable sensors and calibrate them from the kernel. The IPQ5332 and IPQ5424 use the tsens v2.3.3 IP and do not have RPM. Therefore, add a new calibration function for V2, as the tsens.c calib function only supports V1. Also add new feature_config, ops and data for IPQ5332, IPQ5424. Although the TSENS IP supports 16 sensors, not all are used. The hw_id is used to enable the relevant sensors. Reviewed-by: Dmitry Baryshkov Signed-off-by: Praveenkumar I Signed-off-by: Manikanta Mylavarapu Link: https://lore.kernel.org/r/20250210120436.821684-3-quic_mmanikan@quicinc.com Signed-off-by: Daniel Lezcano --- drivers/thermal/qcom/tsens-v2.c | 178 ++++++++++++++++++++++++++++++++ drivers/thermal/qcom/tsens.c | 8 +- drivers/thermal/qcom/tsens.h | 3 + 3 files changed, 188 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c index 0cb7301eca6e..8d9698ea3ec4 100644 --- a/drivers/thermal/qcom/tsens-v2.c +++ b/drivers/thermal/qcom/tsens-v2.c @@ -4,13 +4,32 @@ * Copyright (c) 2018, Linaro Limited */ +#include #include +#include #include #include "tsens.h" /* ----- SROT ------ */ #define SROT_HW_VER_OFF 0x0000 #define SROT_CTRL_OFF 0x0004 +#define SROT_MEASURE_PERIOD 0x0008 +#define SROT_Sn_CONVERSION 0x0060 +#define V2_SHIFT_DEFAULT 0x0003 +#define V2_SLOPE_DEFAULT 0x0cd0 +#define V2_CZERO_DEFAULT 0x016a +#define ONE_PT_SLOPE 0x0cd0 +#define TWO_PT_SHIFTED_GAIN 921600 +#define ONE_PT_CZERO_CONST 94 +#define SW_RST_DEASSERT 0x0 +#define SW_RST_ASSERT 0x1 +#define MEASURE_PERIOD_2mSEC 0x1 +#define RESULT_FORMAT_TEMP 0x1 +#define TSENS_ENABLE 0x1 +#define SENSOR_CONVERSION(n) (((n) * 4) + SROT_Sn_CONVERSION) +#define CONVERSION_SHIFT_MASK GENMASK(24, 23) +#define CONVERSION_SLOPE_MASK GENMASK(22, 10) +#define CONVERSION_CZERO_MASK GENMASK(9, 0) /* ----- TM ------ */ #define TM_INT_EN_OFF 0x0004 @@ -50,6 +69,17 @@ static struct tsens_features ipq8074_feat = { .trip_max_temp = 204000, }; +static struct tsens_features ipq5332_feat = { + .ver_major = VER_2_X_NO_RPM, + .crit_int = 1, + .combo_int = 1, + .adc = 0, + .srot_split = 1, + .max_sensors = 16, + .trip_min_temp = 0, + .trip_max_temp = 204000, +}; + static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = { /* ----- SROT ------ */ /* VERSION */ @@ -59,6 +89,10 @@ static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = { /* CTRL_OFF */ [TSENS_EN] = REG_FIELD(SROT_CTRL_OFF, 0, 0), [TSENS_SW_RST] = REG_FIELD(SROT_CTRL_OFF, 1, 1), + [SENSOR_EN] = REG_FIELD(SROT_CTRL_OFF, 3, 18), + [CODE_OR_TEMP] = REG_FIELD(SROT_CTRL_OFF, 21, 21), + + [MAIN_MEASURE_PERIOD] = REG_FIELD(SROT_MEASURE_PERIOD, 0, 7), /* ----- TM ------ */ /* INTERRUPT ENABLE */ @@ -104,6 +138,128 @@ static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = { [TRDY] = REG_FIELD(TM_TRDY_OFF, 0, 0), }; +static int tsens_v2_calibrate_sensor(struct device *dev, struct tsens_sensor *sensor, + struct regmap *map, u32 mode, u32 base0, u32 base1) +{ + u32 shift = V2_SHIFT_DEFAULT; + u32 slope = V2_SLOPE_DEFAULT; + u32 czero = V2_CZERO_DEFAULT; + char name[20]; + u32 val; + int ret; + + /* Read offset value */ + ret = snprintf(name, sizeof(name), "tsens_sens%d_off", sensor->hw_id); + if (ret < 0) + return ret; + + ret = nvmem_cell_read_variable_le_u32(dev, name, &sensor->offset); + if (ret) + return ret; + + /* Based on calib mode, program SHIFT, SLOPE and CZERO */ + switch (mode) { + case TWO_PT_CALIB: + slope = (TWO_PT_SHIFTED_GAIN / (base1 - base0)); + + czero = (base0 + sensor->offset - ((base1 - base0) / 3)); + + break; + case ONE_PT_CALIB2: + czero = base0 + sensor->offset - ONE_PT_CZERO_CONST; + + slope = ONE_PT_SLOPE; + + break; + default: + dev_dbg(dev, "calibrationless mode\n"); + } + + val = FIELD_PREP(CONVERSION_SHIFT_MASK, shift) | + FIELD_PREP(CONVERSION_SLOPE_MASK, slope) | + FIELD_PREP(CONVERSION_CZERO_MASK, czero); + + regmap_write(map, SENSOR_CONVERSION(sensor->hw_id), val); + + return 0; +} + +static int tsens_v2_calibration(struct tsens_priv *priv) +{ + struct device *dev = priv->dev; + u32 mode, base0, base1; + int i, ret; + + if (priv->num_sensors > MAX_SENSORS) + return -EINVAL; + + ret = nvmem_cell_read_variable_le_u32(priv->dev, "mode", &mode); + if (ret == -ENOENT) + dev_warn(priv->dev, "Calibration data not present in DT\n"); + if (ret < 0) + return ret; + + dev_dbg(priv->dev, "calibration mode is %d\n", mode); + + ret = nvmem_cell_read_variable_le_u32(priv->dev, "base0", &base0); + if (ret < 0) + return ret; + + ret = nvmem_cell_read_variable_le_u32(priv->dev, "base1", &base1); + if (ret < 0) + return ret; + + /* Calibrate each sensor */ + for (i = 0; i < priv->num_sensors; i++) { + ret = tsens_v2_calibrate_sensor(dev, &priv->sensor[i], priv->srot_map, + mode, base0, base1); + if (ret < 0) + return ret; + } + + return 0; +} + +static int __init init_tsens_v2_no_rpm(struct tsens_priv *priv) +{ + struct device *dev = priv->dev; + int i, ret; + u32 val = 0; + + ret = init_common(priv); + if (ret < 0) + return ret; + + priv->rf[CODE_OR_TEMP] = devm_regmap_field_alloc(dev, priv->srot_map, + priv->fields[CODE_OR_TEMP]); + if (IS_ERR(priv->rf[CODE_OR_TEMP])) + return PTR_ERR(priv->rf[CODE_OR_TEMP]); + + priv->rf[MAIN_MEASURE_PERIOD] = devm_regmap_field_alloc(dev, priv->srot_map, + priv->fields[MAIN_MEASURE_PERIOD]); + if (IS_ERR(priv->rf[MAIN_MEASURE_PERIOD])) + return PTR_ERR(priv->rf[MAIN_MEASURE_PERIOD]); + + regmap_field_write(priv->rf[TSENS_SW_RST], SW_RST_ASSERT); + + regmap_field_write(priv->rf[MAIN_MEASURE_PERIOD], MEASURE_PERIOD_2mSEC); + + /* Enable available sensors */ + for (i = 0; i < priv->num_sensors; i++) + val |= 1 << priv->sensor[i].hw_id; + + regmap_field_write(priv->rf[SENSOR_EN], val); + + /* Select temperature format, unit is deci-Celsius */ + regmap_field_write(priv->rf[CODE_OR_TEMP], RESULT_FORMAT_TEMP); + + regmap_field_write(priv->rf[TSENS_SW_RST], SW_RST_DEASSERT); + + regmap_field_write(priv->rf[TSENS_EN], TSENS_ENABLE); + + return 0; +} + static const struct tsens_ops ops_generic_v2 = { .init = init_common, .get_temp = get_temp_tsens_valid, @@ -122,6 +278,28 @@ struct tsens_plat_data data_ipq8074 = { .fields = tsens_v2_regfields, }; +static const struct tsens_ops ops_ipq5332 = { + .init = init_tsens_v2_no_rpm, + .get_temp = get_temp_tsens_valid, + .calibrate = tsens_v2_calibration, +}; + +const struct tsens_plat_data data_ipq5332 = { + .num_sensors = 5, + .ops = &ops_ipq5332, + .hw_ids = (unsigned int []){11, 12, 13, 14, 15}, + .feat = &ipq5332_feat, + .fields = tsens_v2_regfields, +}; + +const struct tsens_plat_data data_ipq5424 = { + .num_sensors = 7, + .ops = &ops_ipq5332, + .hw_ids = (unsigned int []){9, 10, 11, 12, 13, 14, 15}, + .feat = &ipq5332_feat, + .fields = tsens_v2_regfields, +}; + /* Kept around for backward compatibility with old msm8996.dtsi */ struct tsens_plat_data data_8996 = { .num_sensors = 13, diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index 3aa3736181aa..1f5d4de017d9 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -975,7 +975,7 @@ int __init init_common(struct tsens_priv *priv) ret = regmap_field_read(priv->rf[TSENS_EN], &enabled); if (ret) goto err_put_device; - if (!enabled) { + if (!enabled && (tsens_version(priv) != VER_2_X_NO_RPM)) { dev_err(dev, "%s: device not enabled\n", __func__); ret = -ENODEV; goto err_put_device; @@ -1102,6 +1102,12 @@ static SIMPLE_DEV_PM_OPS(tsens_pm_ops, tsens_suspend, tsens_resume); static const struct of_device_id tsens_table[] = { { + .compatible = "qcom,ipq5332-tsens", + .data = &data_ipq5332, + }, { + .compatible = "qcom,ipq5424-tsens", + .data = &data_ipq5424, + }, { .compatible = "qcom,ipq8064-tsens", .data = &data_8960, }, { diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h index 7b36a0318fa6..336bc868fd7c 100644 --- a/drivers/thermal/qcom/tsens.h +++ b/drivers/thermal/qcom/tsens.h @@ -35,6 +35,7 @@ enum tsens_ver { VER_0_1, VER_1_X, VER_2_X, + VER_2_X_NO_RPM, }; enum tsens_irq_type { @@ -168,6 +169,7 @@ enum regfield_ids { TSENS_SW_RST, SENSOR_EN, CODE_OR_TEMP, + MAIN_MEASURE_PERIOD, /* ----- TM ------ */ /* TRDY */ @@ -651,5 +653,6 @@ extern struct tsens_plat_data data_tsens_v1, data_8937, data_8976, data_8956; /* TSENS v2 targets */ extern struct tsens_plat_data data_8996, data_ipq8074, data_tsens_v2; +extern const struct tsens_plat_data data_ipq5332, data_ipq5424; #endif /* __QCOM_TSENS_H__ */ From ee022e5cae052e0c67ca7c5fec0f2e7bc897c70e Mon Sep 17 00:00:00 2001 From: Trevor Woerner Date: Fri, 7 Feb 2025 12:50:47 -0500 Subject: [PATCH 1673/2157] thermal/drivers/rockchip: Add missing rk3328 mapping entry The mapping table for the rk3328 is missing the entry for -25C which is found in the TRM section 9.5.2 "Temperature-to-code mapping". NOTE: the kernel uses the tsadc_q_sel=1'b1 mode which is defined as: 4096-. Whereas the table in the TRM gives the code "3774" for -25C, the kernel uses 4096-3774=322. [Dragan Simic] : "After going through the RK3308 and RK3328 TRMs, as well as through the downstream kernel code, it seems we may have some troubles at our hands. Let me explain, please. To sum it up, part 1 of the RK3308 TRM v1.1 says on page 538 that the equation for the output when tsadc_q_sel equals 1 is (4096 - tsadc_q), while part 1 of the RK3328 TRM v1.2 says that the output equation is (1024 - tsadc_q) in that case. The downstream kernel code, however, treats the RK3308 and RK3328 tables and their values as being the same. It even mentions 1024 as the "offset" value in a comment block for the rk_tsadcv3_control() function, just like the upstream code does, which is obviously wrong "offset" value when correlated with the table on page 544 of part 1 of the RK3308 TRM v1.1. With all this in mind, it's obvious that more work is needed to make it clear where's the actual mistake (it could be that the TRM is wrong), which I'll volunteer for as part of the SoC binning project. In the meantime, this patch looks fine as-is to me, by offering what's a clear improvement to the current state of the upstream code" Link: https://opensource.rock-chips.com/images/9/97/Rockchip_RK3328TRM_V1.1-Part1-20170321.pdf Cc: stable@vger.kernel.org Fixes: eda519d5f73e ("thermal: rockchip: Support the RK3328 SOC in thermal driver") Signed-off-by: Trevor Woerner Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20250207175048.35959-1-twoerner@gmail.com Signed-off-by: Daniel Lezcano --- drivers/thermal/rockchip_thermal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index f551df48eef9..a8ad85feb68f 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -386,6 +386,7 @@ static const struct tsadc_table rk3328_code_table[] = { {296, -40000}, {304, -35000}, {313, -30000}, + {322, -25000}, {331, -20000}, {340, -15000}, {349, -10000}, From 9e6ec8cf64e2973f0ec74f09023988cabd218426 Mon Sep 17 00:00:00 2001 From: xueqin Luo Date: Thu, 6 Feb 2025 16:14:36 +0800 Subject: [PATCH 1674/2157] thermal: core: Remove duplicate struct declaration The struct thermal_zone_device is already declared on line 32, so the duplicate declaration has been removed. Fixes: b1ae92dcfa8e ("thermal: core: Make struct thermal_zone_device definition internal") Signed-off-by: xueqin Luo Link: https://lore.kernel.org/r/20250206081436.51785-1-luoxueqin@kylinos.cn Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 69f9bedd0ee8..0b5ed6821080 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -86,8 +86,6 @@ struct thermal_trip { #define THERMAL_TRIP_PRIV_TO_INT(_val_) (uintptr_t)(_val_) #define THERMAL_INT_TO_TRIP_PRIV(_val_) (void *)(uintptr_t)(_val_) -struct thermal_zone_device; - struct cooling_spec { unsigned long upper; /* Highest cooling state */ unsigned long lower; /* Lowest cooling state */ From 65594b3745024857f812145a58db3601d733676c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 13 Jan 2025 10:27:12 -0300 Subject: [PATCH 1675/2157] thermal/drivers/mediatek/lvts: Disable monitor mode during suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When configured in filtered mode, the LVTS thermal controller will monitor the temperature from the sensors and trigger an interrupt once a thermal threshold is crossed. Currently this is true even during suspend and resume. The problem with that is that when enabling the internal clock of the LVTS controller in lvts_ctrl_set_enable() during resume, the temperature reading can glitch and appear much higher than the real one, resulting in a spurious interrupt getting generated. Disable the temperature monitoring and give some time for the signals to stabilize during suspend in order to prevent such spurious interrupts. Cc: stable@vger.kernel.org Reported-by: Hsin-Te Yuan Closes: https://lore.kernel.org/all/20241108-lvts-v1-1-eee339c6ca20@chromium.org/ Fixes: 8137bb90600d ("thermal/drivers/mediatek/lvts_thermal: Add suspend and resume") Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250113-mt8192-lvts-filtered-suspend-fix-v2-1-07a25200c7c6@collabora.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 36 +++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 07f7f3b7a2fb..a1a438ebad33 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -860,6 +860,32 @@ static int lvts_ctrl_init(struct device *dev, struct lvts_domain *lvts_td, return 0; } +static void lvts_ctrl_monitor_enable(struct device *dev, struct lvts_ctrl *lvts_ctrl, bool enable) +{ + /* + * Bitmaps to enable each sensor on filtered mode in the MONCTL0 + * register. + */ + static const u8 sensor_filt_bitmap[] = { BIT(0), BIT(1), BIT(2), BIT(3) }; + u32 sensor_map = 0; + int i; + + if (lvts_ctrl->mode != LVTS_MSR_FILTERED_MODE) + return; + + if (enable) { + lvts_for_each_valid_sensor(i, lvts_ctrl) + sensor_map |= sensor_filt_bitmap[i]; + } + + /* + * Bits: + * 9: Single point access flow + * 0-3: Enable sensing point 0-3 + */ + writel(sensor_map | BIT(9), LVTS_MONCTL0(lvts_ctrl->base)); +} + /* * At this point the configuration register is the only place in the * driver where we write multiple values. Per hardware constraint, @@ -1381,8 +1407,11 @@ static int lvts_suspend(struct device *dev) lvts_td = dev_get_drvdata(dev); - for (i = 0; i < lvts_td->num_lvts_ctrl; i++) + for (i = 0; i < lvts_td->num_lvts_ctrl; i++) { + lvts_ctrl_monitor_enable(dev, &lvts_td->lvts_ctrl[i], false); + usleep_range(100, 200); lvts_ctrl_set_enable(&lvts_td->lvts_ctrl[i], false); + } clk_disable_unprepare(lvts_td->clk); @@ -1400,8 +1429,11 @@ static int lvts_resume(struct device *dev) if (ret) return ret; - for (i = 0; i < lvts_td->num_lvts_ctrl; i++) + for (i = 0; i < lvts_td->num_lvts_ctrl; i++) { lvts_ctrl_set_enable(&lvts_td->lvts_ctrl[i], true); + usleep_range(100, 200); + lvts_ctrl_monitor_enable(dev, &lvts_td->lvts_ctrl[i], true); + } return 0; } From c612cbcdf603aefb3358b2e3964dcd5aa3f827a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 13 Jan 2025 10:27:13 -0300 Subject: [PATCH 1676/2157] thermal/drivers/mediatek/lvts: Disable Stage 3 thermal threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Stage 3 thermal threshold is currently configured during the controller initialization to 105 Celsius. From the kernel perspective, this configuration is harmful because: * The stage 3 interrupt that gets triggered when the threshold is crossed is not handled in any way by the IRQ handler, it just gets cleared. Besides, the temperature used for stage 3 comes from the sensors, and the critical thermal trip points described in the Devicetree will already cause a shutdown when crossed (at a lower temperature, of 100 Celsius, for all SoCs currently using this driver). * The only effect of crossing the stage 3 threshold that has been observed is that it causes the machine to no longer be able to enter suspend. Even if that was a result of a momentary glitch in the temperature reading of a sensor (as has been observed on the MT8192-based Chromebooks). For those reasons, disable the Stage 3 thermal threshold configuration. Cc: stable@vger.kernel.org Reported-by: Hsin-Te Yuan Closes: https://lore.kernel.org/all/20241108-lvts-v1-1-eee339c6ca20@chromium.org/ Fixes: f5f633b18234 ("thermal/drivers/mediatek: Add the Low Voltage Thermal Sensor driver") Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250113-mt8192-lvts-filtered-suspend-fix-v2-2-07a25200c7c6@collabora.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index a1a438ebad33..0aaa44b734ca 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -65,7 +65,7 @@ #define LVTS_HW_FILTER 0x0 #define LVTS_TSSEL_CONF 0x13121110 #define LVTS_CALSCALE_CONF 0x300 -#define LVTS_MONINT_CONF 0x8300318C +#define LVTS_MONINT_CONF 0x0300318C #define LVTS_MONINT_OFFSET_SENSOR0 0xC #define LVTS_MONINT_OFFSET_SENSOR1 0x180 @@ -91,8 +91,6 @@ #define LVTS_MSR_READ_TIMEOUT_US 400 #define LVTS_MSR_READ_WAIT_US (LVTS_MSR_READ_TIMEOUT_US / 2) -#define LVTS_HW_TSHUT_TEMP 105000 - #define LVTS_MINIMUM_THRESHOLD 20000 static int golden_temp = LVTS_GOLDEN_TEMP_DEFAULT; @@ -145,7 +143,6 @@ struct lvts_ctrl { struct lvts_sensor sensors[LVTS_SENSOR_MAX]; const struct lvts_data *lvts_data; u32 calibration[LVTS_SENSOR_MAX]; - u32 hw_tshut_raw_temp; u8 valid_sensor_mask; int mode; void __iomem *base; @@ -837,14 +834,6 @@ static int lvts_ctrl_init(struct device *dev, struct lvts_domain *lvts_td, */ lvts_ctrl[i].mode = lvts_data->lvts_ctrl[i].mode; - /* - * The temperature to raw temperature must be done - * after initializing the calibration. - */ - lvts_ctrl[i].hw_tshut_raw_temp = - lvts_temp_to_raw(LVTS_HW_TSHUT_TEMP, - lvts_data->temp_factor); - lvts_ctrl[i].low_thresh = INT_MIN; lvts_ctrl[i].high_thresh = INT_MIN; } @@ -919,7 +908,6 @@ static int lvts_irq_init(struct lvts_ctrl *lvts_ctrl) * 10 : Selected sensor with bits 19-18 * 11 : Reserved */ - writel(BIT(16), LVTS_PROTCTL(lvts_ctrl->base)); /* * LVTS_PROTTA : Stage 1 temperature threshold @@ -932,8 +920,8 @@ static int lvts_irq_init(struct lvts_ctrl *lvts_ctrl) * * writel(0x0, LVTS_PROTTA(lvts_ctrl->base)); * writel(0x0, LVTS_PROTTB(lvts_ctrl->base)); + * writel(0x0, LVTS_PROTTC(lvts_ctrl->base)); */ - writel(lvts_ctrl->hw_tshut_raw_temp, LVTS_PROTTC(lvts_ctrl->base)); /* * LVTS_MONINT : Interrupt configuration register From fa17ff8e325a657c84be1083f06e54ee7eea82e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 13 Jan 2025 10:27:14 -0300 Subject: [PATCH 1677/2157] thermal/drivers/mediatek/lvts: Disable low offset IRQ for minimum threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to get working interrupts, a low offset value needs to be configured. The minimum value for it is 20 Celsius, which is what is configured when there's no lower thermal trip (ie the thermal core passes -INT_MAX as low trip temperature). However, when the temperature gets that low and fluctuates around that value it causes an interrupt storm. Prevent that interrupt storm by not enabling the low offset interrupt if the low threshold is the minimum one. Cc: stable@vger.kernel.org Fixes: 77354eaef821 ("thermal/drivers/mediatek/lvts_thermal: Don't leave threshold zeroed") Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250113-mt8192-lvts-filtered-suspend-fix-v2-3-07a25200c7c6@collabora.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 48 ++++++++++++++++++------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 0aaa44b734ca..04bfbfe93a71 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -67,10 +67,14 @@ #define LVTS_CALSCALE_CONF 0x300 #define LVTS_MONINT_CONF 0x0300318C -#define LVTS_MONINT_OFFSET_SENSOR0 0xC -#define LVTS_MONINT_OFFSET_SENSOR1 0x180 -#define LVTS_MONINT_OFFSET_SENSOR2 0x3000 -#define LVTS_MONINT_OFFSET_SENSOR3 0x3000000 +#define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR0 BIT(3) +#define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR1 BIT(8) +#define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR2 BIT(13) +#define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR3 BIT(25) +#define LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR0 BIT(2) +#define LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR1 BIT(7) +#define LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR2 BIT(12) +#define LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR3 BIT(24) #define LVTS_INT_SENSOR0 0x0009001F #define LVTS_INT_SENSOR1 0x001203E0 @@ -326,11 +330,17 @@ static int lvts_get_temp(struct thermal_zone_device *tz, int *temp) static void lvts_update_irq_mask(struct lvts_ctrl *lvts_ctrl) { - static const u32 masks[] = { - LVTS_MONINT_OFFSET_SENSOR0, - LVTS_MONINT_OFFSET_SENSOR1, - LVTS_MONINT_OFFSET_SENSOR2, - LVTS_MONINT_OFFSET_SENSOR3, + static const u32 high_offset_inten_masks[] = { + LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR0, + LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR1, + LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR2, + LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR3, + }; + static const u32 low_offset_inten_masks[] = { + LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR0, + LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR1, + LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR2, + LVTS_MONINT_OFFSET_LOW_INTEN_SENSOR3, }; u32 value = 0; int i; @@ -339,10 +349,22 @@ static void lvts_update_irq_mask(struct lvts_ctrl *lvts_ctrl) for (i = 0; i < ARRAY_SIZE(masks); i++) { if (lvts_ctrl->sensors[i].high_thresh == lvts_ctrl->high_thresh - && lvts_ctrl->sensors[i].low_thresh == lvts_ctrl->low_thresh) - value |= masks[i]; - else - value &= ~masks[i]; + && lvts_ctrl->sensors[i].low_thresh == lvts_ctrl->low_thresh) { + /* + * The minimum threshold needs to be configured in the + * OFFSETL register to get working interrupts, but we + * don't actually want to generate interrupts when + * crossing it. + */ + if (lvts_ctrl->low_thresh == -INT_MAX) { + value &= ~low_offset_inten_masks[i]; + value |= high_offset_inten_masks[i]; + } else { + value |= low_offset_inten_masks[i] | high_offset_inten_masks[i]; + } + } else { + value &= ~(low_offset_inten_masks[i] | high_offset_inten_masks[i]); + } } writel(value, LVTS_MONINT(lvts_ctrl->base)); From 2738fb3ec6838a10d2c4ce65cefdb3b90b11bd61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 13 Jan 2025 10:27:15 -0300 Subject: [PATCH 1678/2157] thermal/drivers/mediatek/lvts: Start sensor interrupts disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interrupts are enabled per sensor in lvts_update_irq_mask() as needed, there's no point in enabling all of them during initialization. Change the MONINT register initial value so all sensor interrupts start disabled. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250113-mt8192-lvts-filtered-suspend-fix-v2-4-07a25200c7c6@collabora.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 04bfbfe93a71..38668b5b34c7 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -65,7 +65,6 @@ #define LVTS_HW_FILTER 0x0 #define LVTS_TSSEL_CONF 0x13121110 #define LVTS_CALSCALE_CONF 0x300 -#define LVTS_MONINT_CONF 0x0300318C #define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR0 BIT(3) #define LVTS_MONINT_OFFSET_HIGH_INTEN_SENSOR1 BIT(8) @@ -951,7 +950,7 @@ static int lvts_irq_init(struct lvts_ctrl *lvts_ctrl) * The LVTS_MONINT register layout is the same as the LVTS_MONINTSTS * register, except we set the bits to enable the interrupt. */ - writel(LVTS_MONINT_CONF, LVTS_MONINT(lvts_ctrl->base)); + writel(0, LVTS_MONINT(lvts_ctrl->base)); return 0; } From 1ec52c157b4298986257c0380bfcfe72e05c7c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 13 Jan 2025 10:27:16 -0300 Subject: [PATCH 1679/2157] thermal/drivers/mediatek/lvts: Only update IRQ enable for valid sensors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only sensors that are valid need to have their interrupts enable status updated based on their thresholds. Use the lvts_for_each_valid_sensor() helper in lvts_update_irq_mask() to ignore invalid sensors. Currently, since the invalid sensors will always contain zeroed out thresholds (from kzalloc), they will always get their interrupts disabled on this loop. So this commit doesn't change the resulting interrupts configuration, but it slightly optimizes the loop by skipping the invalid sensors, avoids potential future surprises if at some point memory is no longer allocated for invalid sensors, as well as makes the code more obvious. Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250113-mt8192-lvts-filtered-suspend-fix-v2-5-07a25200c7c6@collabora.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 38668b5b34c7..088481d91e6e 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -346,7 +346,7 @@ static void lvts_update_irq_mask(struct lvts_ctrl *lvts_ctrl) value = readl(LVTS_MONINT(lvts_ctrl->base)); - for (i = 0; i < ARRAY_SIZE(masks); i++) { + lvts_for_each_valid_sensor(i, lvts_ctrl) { if (lvts_ctrl->sensors[i].high_thresh == lvts_ctrl->high_thresh && lvts_ctrl->sensors[i].low_thresh == lvts_ctrl->low_thresh) { /* From 2395a02809b06ff619883f5b8437e99de550b7ea Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Jan 2025 11:38:41 -0800 Subject: [PATCH 1680/2157] dt-bindings: thermal: Update for BCM74110 Update the binding with the BCM74110 compatible string which denotes the first device we need to support in a different process node requiring an updated thermal equation. Signed-off-by: Florian Fainelli Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250116193842.758788-2-florian.fainelli@broadcom.com Signed-off-by: Daniel Lezcano --- Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml b/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml index 081486b44382..2f62551a49c1 100644 --- a/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml +++ b/Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml @@ -18,6 +18,7 @@ properties: compatible: items: - enum: + - brcm,avs-tmon-bcm74110 - brcm,avs-tmon-bcm7216 - brcm,avs-tmon-bcm7445 - const: brcm,avs-tmon From 09daf8f0d4204ccfdb59116daa7fe2badb7683bc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Jan 2025 11:38:42 -0800 Subject: [PATCH 1681/2157] thermal/drivers/brcmstb_thermal: Add support for BCM74110 BCM74110 uses a different process node compared to previous chips that requires a different equation, account for that. Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20250116193842.758788-3-florian.fainelli@broadcom.com Signed-off-by: Daniel Lezcano --- drivers/thermal/broadcom/brcmstb_thermal.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c index 270982740fde..f46f2ddc174e 100644 --- a/drivers/thermal/broadcom/brcmstb_thermal.c +++ b/drivers/thermal/broadcom/brcmstb_thermal.c @@ -286,14 +286,20 @@ static int brcmstb_set_trips(struct thermal_zone_device *tz, int low, int high) return 0; } -static const struct thermal_zone_device_ops brcmstb_16nm_of_ops = { +static const struct thermal_zone_device_ops brcmstb_of_ops = { .get_temp = brcmstb_get_temp, }; +static const struct brcmstb_thermal_params brcmstb_8nm_params = { + .offset = 418670, + .mult = 509, + .of_ops = &brcmstb_of_ops, +}; + static const struct brcmstb_thermal_params brcmstb_16nm_params = { .offset = 457829, .mult = 557, - .of_ops = &brcmstb_16nm_of_ops, + .of_ops = &brcmstb_of_ops, }; static const struct thermal_zone_device_ops brcmstb_28nm_of_ops = { @@ -308,6 +314,7 @@ static const struct brcmstb_thermal_params brcmstb_28nm_params = { }; static const struct of_device_id brcmstb_thermal_id_table[] = { + { .compatible = "brcm,avs-tmon-bcm74110", .data = &brcmstb_8nm_params }, { .compatible = "brcm,avs-tmon-bcm7216", .data = &brcmstb_16nm_params }, { .compatible = "brcm,avs-tmon", .data = &brcmstb_28nm_params }, {}, From 5ad72c2b24e1e35f2718bd1d04dcd041aa6047ee Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Jan 2025 14:10:26 +0100 Subject: [PATCH 1682/2157] dt-bindings: thermal: Correct indentation and style in DTS example DTS example in the bindings should be indented with 2- or 4-spaces and aligned with opening '- |', so correct any differences like 3-spaces or mixtures 2- and 4-spaces in one binding. No functional changes here, but saves some comments during reviews of new patches built on existing code. Signed-off-by: Krzysztof Kozlowski Acked-by: Chen-Yu Tsai Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250107131027.246608-1-krzysztof.kozlowski@linaro.org Signed-off-by: Daniel Lezcano --- .../thermal/allwinner,sun8i-a83t-ths.yaml | 48 +++++++++---------- .../bindings/thermal/imx-thermal.yaml | 36 +++++++------- .../bindings/thermal/imx8mm-thermal.yaml | 8 ++-- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml index dad8de900495..3e61689f6dd4 100644 --- a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml +++ b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml @@ -142,38 +142,38 @@ unevaluatedProperties: false examples: - | thermal-sensor@1f04000 { - compatible = "allwinner,sun8i-a83t-ths"; - reg = <0x01f04000 0x100>; - interrupts = <0 31 0>; - nvmem-cells = <&ths_calibration>; - nvmem-cell-names = "calibration"; - #thermal-sensor-cells = <1>; + compatible = "allwinner,sun8i-a83t-ths"; + reg = <0x01f04000 0x100>; + interrupts = <0 31 0>; + nvmem-cells = <&ths_calibration>; + nvmem-cell-names = "calibration"; + #thermal-sensor-cells = <1>; }; - | thermal-sensor@1c25000 { - compatible = "allwinner,sun8i-h3-ths"; - reg = <0x01c25000 0x400>; - clocks = <&ccu 0>, <&ccu 1>; - clock-names = "bus", "mod"; - resets = <&ccu 2>; - interrupts = <0 31 0>; - nvmem-cells = <&ths_calibration>; - nvmem-cell-names = "calibration"; - #thermal-sensor-cells = <0>; + compatible = "allwinner,sun8i-h3-ths"; + reg = <0x01c25000 0x400>; + clocks = <&ccu 0>, <&ccu 1>; + clock-names = "bus", "mod"; + resets = <&ccu 2>; + interrupts = <0 31 0>; + nvmem-cells = <&ths_calibration>; + nvmem-cell-names = "calibration"; + #thermal-sensor-cells = <0>; }; - | thermal-sensor@5070400 { - compatible = "allwinner,sun50i-h6-ths"; - reg = <0x05070400 0x100>; - clocks = <&ccu 0>; - clock-names = "bus"; - resets = <&ccu 2>; - interrupts = <0 15 0>; - nvmem-cells = <&ths_calibration>; - nvmem-cell-names = "calibration"; - #thermal-sensor-cells = <1>; + compatible = "allwinner,sun50i-h6-ths"; + reg = <0x05070400 0x100>; + clocks = <&ccu 0>; + clock-names = "bus"; + resets = <&ccu 2>; + interrupts = <0 15 0>; + nvmem-cells = <&ths_calibration>; + nvmem-cell-names = "calibration"; + #thermal-sensor-cells = <1>; }; ... diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml index 337560562337..949b154856c5 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml @@ -80,19 +80,19 @@ examples: #include efuse@21bc000 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,imx6sx-ocotp", "syscon"; - reg = <0x021bc000 0x4000>; - clocks = <&clks IMX6SX_CLK_OCOTP>; + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,imx6sx-ocotp", "syscon"; + reg = <0x021bc000 0x4000>; + clocks = <&clks IMX6SX_CLK_OCOTP>; - tempmon_calib: calib@38 { - reg = <0x38 4>; - }; + tempmon_calib: calib@38 { + reg = <0x38 4>; + }; - tempmon_temp_grade: temp-grade@20 { - reg = <0x20 4>; - }; + tempmon_temp_grade: temp-grade@20 { + reg = <0x20 4>; + }; }; anatop@20c8000 { @@ -103,12 +103,12 @@ examples: <0 127 IRQ_TYPE_LEVEL_HIGH>; tempmon { - compatible = "fsl,imx6sx-tempmon"; - interrupts = ; - fsl,tempmon = <&anatop>; - nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>; - nvmem-cell-names = "calib", "temp_grade"; - clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>; - #thermal-sensor-cells = <0>; + compatible = "fsl,imx6sx-tempmon"; + interrupts = ; + fsl,tempmon = <&anatop>; + nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>; + nvmem-cell-names = "calib", "temp_grade"; + clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>; + #thermal-sensor-cells = <0>; }; }; diff --git a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml index bef0e95e7416..df6c7c5d519f 100644 --- a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml @@ -63,10 +63,10 @@ examples: #include thermal-sensor@30260000 { - compatible = "fsl,imx8mm-tmu"; - reg = <0x30260000 0x10000>; - clocks = <&clk IMX8MM_CLK_TMU_ROOT>; - #thermal-sensor-cells = <0>; + compatible = "fsl,imx8mm-tmu"; + reg = <0x30260000 0x10000>; + clocks = <&clk IMX8MM_CLK_TMU_ROOT>; + #thermal-sensor-cells = <0>; }; ... From b744af1180dbadcfd994a48d613b4431373da70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 5 Mar 2025 18:46:30 +0100 Subject: [PATCH 1683/2157] thermal: rcar_gen3: Use lowercase hex constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The style of the driver is to use lowercase hex constants, correct the few outlines. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250305174631.4119374-2-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Daniel Lezcano --- drivers/thermal/renesas/rcar_gen3_thermal.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/renesas/rcar_gen3_thermal.c b/drivers/thermal/renesas/rcar_gen3_thermal.c index 1ec169aeacfc..deec17435901 100644 --- a/drivers/thermal/renesas/rcar_gen3_thermal.c +++ b/drivers/thermal/renesas/rcar_gen3_thermal.c @@ -21,11 +21,11 @@ /* Register offsets */ #define REG_GEN3_IRQSTR 0x04 #define REG_GEN3_IRQMSK 0x08 -#define REG_GEN3_IRQCTL 0x0C +#define REG_GEN3_IRQCTL 0x0c #define REG_GEN3_IRQEN 0x10 #define REG_GEN3_IRQTEMP1 0x14 #define REG_GEN3_IRQTEMP2 0x18 -#define REG_GEN3_IRQTEMP3 0x1C +#define REG_GEN3_IRQTEMP3 0x1c #define REG_GEN3_THCTR 0x20 #define REG_GEN3_TEMP 0x28 #define REG_GEN3_THCODE1 0x50 @@ -38,9 +38,9 @@ #define REG_GEN4_THSFMON00 0x180 #define REG_GEN4_THSFMON01 0x184 #define REG_GEN4_THSFMON02 0x188 -#define REG_GEN4_THSFMON15 0x1BC -#define REG_GEN4_THSFMON16 0x1C0 -#define REG_GEN4_THSFMON17 0x1C4 +#define REG_GEN4_THSFMON15 0x1bc +#define REG_GEN4_THSFMON16 0x1c0 +#define REG_GEN4_THSFMON17 0x1c4 /* IRQ{STR,MSK,EN} bits */ #define IRQ_TEMP1 BIT(0) @@ -57,11 +57,11 @@ /* THSCP bits */ #define THSCP_COR_PARA_VLD (BIT(15) | BIT(14)) -#define CTEMP_MASK 0xFFF +#define CTEMP_MASK 0xfff #define MCELSIUS(temp) ((temp) * 1000) -#define GEN3_FUSE_MASK 0xFFF -#define GEN4_FUSE_MASK 0xFFF +#define GEN3_FUSE_MASK 0xfff +#define GEN4_FUSE_MASK 0xfff #define TSC_MAX_NUM 5 From bccdbba51a946fdfa08e972f14526a081c6f8bf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 5 Mar 2025 18:46:31 +0100 Subject: [PATCH 1684/2157] thermal: rcar_gen3: Reuse logic to read fuses on Gen3 and Gen4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware calibration is fused on some, but not all, Gen3 and Gen4 boards. The calibrations values are the same on both generations but located at different register offsets. Instead of having duplicated logic to read the and store the values create structure to hold the register parameters and have a common function do the reading. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250305174631.4119374-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Daniel Lezcano --- drivers/thermal/renesas/rcar_gen3_thermal.c | 89 +++++++++------------ 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/drivers/thermal/renesas/rcar_gen3_thermal.c b/drivers/thermal/renesas/rcar_gen3_thermal.c index deec17435901..24a702ee4c1f 100644 --- a/drivers/thermal/renesas/rcar_gen3_thermal.c +++ b/drivers/thermal/renesas/rcar_gen3_thermal.c @@ -67,11 +67,17 @@ struct rcar_gen3_thermal_priv; +struct rcar_gen3_thermal_fuse_info { + u32 ptat[3]; + u32 thcode[3]; + u32 mask; +}; + struct rcar_thermal_info { int scale; int adj_below; int adj_above; - void (*read_fuses)(struct rcar_gen3_thermal_priv *priv); + const struct rcar_gen3_thermal_fuse_info *fuses; }; struct equation_set_coef { @@ -253,59 +259,31 @@ static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data) return IRQ_HANDLED; } -static void rcar_gen3_thermal_read_fuses_gen3(struct rcar_gen3_thermal_priv *priv) +static void rcar_gen3_thermal_fetch_fuses(struct rcar_gen3_thermal_priv *priv) { - unsigned int i; + const struct rcar_gen3_thermal_fuse_info *fuses = priv->info->fuses; /* * Set the pseudo calibration points with fused values. * PTAT is shared between all TSCs but only fused for the first * TSC while THCODEs are fused for each TSC. */ - priv->ptat[0] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN3_PTAT1) & - GEN3_FUSE_MASK; - priv->ptat[1] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN3_PTAT2) & - GEN3_FUSE_MASK; - priv->ptat[2] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN3_PTAT3) & - GEN3_FUSE_MASK; + priv->ptat[0] = rcar_gen3_thermal_read(priv->tscs[0], fuses->ptat[0]) + & fuses->mask; + priv->ptat[1] = rcar_gen3_thermal_read(priv->tscs[0], fuses->ptat[1]) + & fuses->mask; + priv->ptat[2] = rcar_gen3_thermal_read(priv->tscs[0], fuses->ptat[2]) + & fuses->mask; - for (i = 0; i < priv->num_tscs; i++) { + for (unsigned int i = 0; i < priv->num_tscs; i++) { struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i]; - tsc->thcode[0] = rcar_gen3_thermal_read(tsc, REG_GEN3_THCODE1) & - GEN3_FUSE_MASK; - tsc->thcode[1] = rcar_gen3_thermal_read(tsc, REG_GEN3_THCODE2) & - GEN3_FUSE_MASK; - tsc->thcode[2] = rcar_gen3_thermal_read(tsc, REG_GEN3_THCODE3) & - GEN3_FUSE_MASK; - } -} - -static void rcar_gen3_thermal_read_fuses_gen4(struct rcar_gen3_thermal_priv *priv) -{ - unsigned int i; - - /* - * Set the pseudo calibration points with fused values. - * PTAT is shared between all TSCs but only fused for the first - * TSC while THCODEs are fused for each TSC. - */ - priv->ptat[0] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN4_THSFMON16) & - GEN4_FUSE_MASK; - priv->ptat[1] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN4_THSFMON17) & - GEN4_FUSE_MASK; - priv->ptat[2] = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN4_THSFMON15) & - GEN4_FUSE_MASK; - - for (i = 0; i < priv->num_tscs; i++) { - struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i]; - - tsc->thcode[0] = rcar_gen3_thermal_read(tsc, REG_GEN4_THSFMON01) & - GEN4_FUSE_MASK; - tsc->thcode[1] = rcar_gen3_thermal_read(tsc, REG_GEN4_THSFMON02) & - GEN4_FUSE_MASK; - tsc->thcode[2] = rcar_gen3_thermal_read(tsc, REG_GEN4_THSFMON00) & - GEN4_FUSE_MASK; + tsc->thcode[0] = rcar_gen3_thermal_read(tsc, fuses->thcode[0]) + & fuses->mask; + tsc->thcode[1] = rcar_gen3_thermal_read(tsc, fuses->thcode[1]) + & fuses->mask; + tsc->thcode[2] = rcar_gen3_thermal_read(tsc, fuses->thcode[2]) + & fuses->mask; } } @@ -316,7 +294,7 @@ static bool rcar_gen3_thermal_read_fuses(struct rcar_gen3_thermal_priv *priv) /* If fuses are not set, fallback to pseudo values. */ thscp = rcar_gen3_thermal_read(priv->tscs[0], REG_GEN3_THSCP); - if (!priv->info->read_fuses || + if (!priv->info->fuses || (thscp & THSCP_COR_PARA_VLD) != THSCP_COR_PARA_VLD) { /* Default THCODE values in case FUSEs are not set. */ static const int thcodes[TSC_MAX_NUM][3] = { @@ -342,7 +320,8 @@ static bool rcar_gen3_thermal_read_fuses(struct rcar_gen3_thermal_priv *priv) return false; } - priv->info->read_fuses(priv); + rcar_gen3_thermal_fetch_fuses(priv); + return true; } @@ -370,25 +349,37 @@ static void rcar_gen3_thermal_init(struct rcar_gen3_thermal_priv *priv, usleep_range(1000, 2000); } +static const struct rcar_gen3_thermal_fuse_info rcar_gen3_thermal_fuse_info_gen3 = { + .ptat = { REG_GEN3_PTAT1, REG_GEN3_PTAT2, REG_GEN3_PTAT3 }, + .thcode = { REG_GEN3_THCODE1, REG_GEN3_THCODE2, REG_GEN3_THCODE3 }, + .mask = GEN3_FUSE_MASK, +}; + +static const struct rcar_gen3_thermal_fuse_info rcar_gen3_thermal_fuse_info_gen4 = { + .ptat = { REG_GEN4_THSFMON16, REG_GEN4_THSFMON17, REG_GEN4_THSFMON15 }, + .thcode = { REG_GEN4_THSFMON01, REG_GEN4_THSFMON02, REG_GEN4_THSFMON00 }, + .mask = GEN4_FUSE_MASK, +}; + static const struct rcar_thermal_info rcar_m3w_thermal_info = { .scale = 157, .adj_below = -41, .adj_above = 116, - .read_fuses = rcar_gen3_thermal_read_fuses_gen3, + .fuses = &rcar_gen3_thermal_fuse_info_gen3, }; static const struct rcar_thermal_info rcar_gen3_thermal_info = { .scale = 167, .adj_below = -41, .adj_above = 126, - .read_fuses = rcar_gen3_thermal_read_fuses_gen3, + .fuses = &rcar_gen3_thermal_fuse_info_gen3, }; static const struct rcar_thermal_info rcar_gen4_thermal_info = { .scale = 167, .adj_below = -41, .adj_above = 126, - .read_fuses = rcar_gen3_thermal_read_fuses_gen4, + .fuses = &rcar_gen3_thermal_fuse_info_gen4, }; static const struct of_device_id rcar_gen3_thermal_dt_ids[] = { From 1b4ef46fd6660712afcfdd841eadd928193e850f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Feb 2025 09:29:36 +0100 Subject: [PATCH 1685/2157] thermal/drivers/qcom-spmi-temp-alarm: Drop unused driver data The platform device driver data has not been used since commit 7a4ca51b7040 ("thermal/drivers/qcom-spmi: Use devm_iio_channel_get") so drop the unnecessary assignment. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250228082936.5694-1-johan+linaro@kernel.org Signed-off-by: Daniel Lezcano --- drivers/thermal/qcom/qcom-spmi-temp-alarm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c index c2d59cbfaea9..a81e7d6e865f 100644 --- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c @@ -360,7 +360,6 @@ static int qpnp_tm_probe(struct platform_device *pdev) if (!chip) return -ENOMEM; - dev_set_drvdata(&pdev->dev, chip); chip->dev = &pdev->dev; mutex_init(&chip->lock); From 76e51db43fe4aaaebcc5ddda67b0807f7c9bdecc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:04 -0700 Subject: [PATCH 1686/2157] objtool, spi: amd: Fix out-of-bounds stack access in amd_set_spi_freq() If speed_hz < AMD_SPI_MIN_HZ, amd_set_spi_freq() iterates over the entire amd_spi_freq array without breaking out early, causing 'i' to go beyond the array bounds. Fix that by stopping the loop when it gets to the last entry, so the low speed_hz value gets clamped up to AMD_SPI_MIN_HZ. Fixes the following warning with an UBSAN kernel: drivers/spi/spi-amd.o: error: objtool: amd_set_spi_freq() falls through to next function amd_spi_set_opcode() Fixes: 3fe26121dc3a ("spi: amd: Configure device speed") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Mark Brown Cc: Raju Rangoju Cc: Linus Torvalds Link: https://lore.kernel.org/r/78fef0f2434f35be9095bcc9ffa23dd8cab667b9.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/r/202503161828.RUk9EhWx-lkp@intel.com/ --- drivers/spi/spi-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index c85997478b81..17fc0b17e756 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -302,7 +302,7 @@ static void amd_set_spi_freq(struct amd_spi *amd_spi, u32 speed_hz) { unsigned int i, spd7_val, alt_spd; - for (i = 0; i < ARRAY_SIZE(amd_spi_freq); i++) + for (i = 0; i < ARRAY_SIZE(amd_spi_freq)-1; i++) if (speed_hz >= amd_spi_freq[i].speed_hz) break; From 107a23185d990e3df6638d9a84c835f963fe30a6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:05 -0700 Subject: [PATCH 1687/2157] objtool, nvmet: Fix out-of-bounds stack access in nvmet_ctrl_state_show() The csts_state_names[] array only has six sparse entries, but the iteration code in nvmet_ctrl_state_show() iterates seven, resulting in a potential out-of-bounds stack read. Fix that. Fixes the following warning with an UBSAN kernel: vmlinux.o: warning: objtool: .text.nvmet_ctrl_state_show: unexpected end of section Fixes: 649fd41420a8 ("nvmet: add debugfs support") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Chaitanya Kulkarni Cc: Linus Torvalds Link: https://lore.kernel.org/r/f1f60858ee7a941863dc7f5506c540cb9f97b5f6.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503171547.LlCTJLQL-lkp@intel.com/ --- drivers/nvme/target/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c index 220c7391fc19..c6571fbd35e3 100644 --- a/drivers/nvme/target/debugfs.c +++ b/drivers/nvme/target/debugfs.c @@ -78,7 +78,7 @@ static int nvmet_ctrl_state_show(struct seq_file *m, void *p) bool sep = false; int i; - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(csts_state_names); i++) { int state = BIT(i); if (!(ctrl->csts & state)) From e63d465f59011dede0a0f1d21718b59a64c3ff5c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:06 -0700 Subject: [PATCH 1688/2157] objtool, media: dib8000: Prevent divide-by-zero in dib8000_set_dds() If dib8000_set_dds()'s call to dib8000_read32() returns zero, the result is a divide-by-zero. Prevent that from happening. Fixes the following warning with an UBSAN kernel: drivers/media/dvb-frontends/dib8000.o: warning: objtool: dib8000_tune() falls through to next function dib8096p_cfg_DibRx() Fixes: 173a64cb3fcf ("[media] dib8000: enhancement") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Mauro Carvalho Chehab Cc: Linus Torvalds Link: https://lore.kernel.org/r/bd1d504d930ae3f073b1e071bcf62cae7708773c.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/r/202503210602.fvH5DO1i-lkp@intel.com/ --- drivers/media/dvb-frontends/dib8000.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c index 2f5165918163..cfe59c3255f7 100644 --- a/drivers/media/dvb-frontends/dib8000.c +++ b/drivers/media/dvb-frontends/dib8000.c @@ -2701,8 +2701,11 @@ static void dib8000_set_dds(struct dib8000_state *state, s32 offset_khz) u8 ratio; if (state->revision == 0x8090) { + u32 internal = dib8000_read32(state, 23) / 1000; + ratio = 4; - unit_khz_dds_val = (1<<26) / (dib8000_read32(state, 23) / 1000); + + unit_khz_dds_val = (1<<26) / (internal ?: 1); if (offset_khz < 0) dds = (1 << 26) - (abs_offset_khz * unit_khz_dds_val); else From 72c774aa9d1e16bfd247096935e7dae194d84929 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:07 -0700 Subject: [PATCH 1689/2157] objtool, panic: Disable SMAP in __stack_chk_fail() __stack_chk_fail() can be called from uaccess-enabled code. Make sure uaccess gets disabled before calling panic(). Fixes the following warning: kernel/trace/trace_branch.o: error: objtool: ftrace_likely_update+0x1ea: call to __stack_chk_fail() with UACCESS enabled Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Kees Cook Cc: Andrew Morton Cc: Linus Torvalds Link: https://lore.kernel.org/r/a3e97e0119e1b04c725a8aa05f7bc83d98e657eb.1742852847.git.jpoimboe@kernel.org --- kernel/panic.c | 6 ++++++ tools/objtool/check.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index d8635d5cecb2..f9f0c5148f6a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -832,9 +832,15 @@ device_initcall(register_warn_debugfs); */ __visible noinstr void __stack_chk_fail(void) { + unsigned long flags; + instrumentation_begin(); + flags = user_access_save(); + panic("stack-protector: Kernel stack is corrupted in: %pB", __builtin_return_address(0)); + + user_access_restore(flags); instrumentation_end(); } EXPORT_SYMBOL(__stack_chk_fail); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0caabf0e8faf..3bf29923d5c0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1194,12 +1194,15 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_load_invalid_value", /* STACKLEAK */ "stackleak_track_stack", + /* TRACE_BRANCH_PROFILING */ + "ftrace_likely_update", + /* STACKPROTECTOR */ + "__stack_chk_fail", /* misc */ "csum_partial_copy_generic", "copy_mc_fragile", "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", - "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ "rep_stos_alternative", "rep_movs_alternative", "__copy_user_nocache", From 7501153750b47416c9891038330359fad0205839 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:08 -0700 Subject: [PATCH 1690/2157] objtool, Input: cyapa - Remove undefined behavior in cyapa_update_fw_store() In cyapa_update_fw_store(), if 'count' is zero, the write to fw_name[count-1] underflows the array. Presumably that's not possible in normal operation, as its caller sysfs_kf_write() already checks for zero. Regardless it's a good idea to check for the error explicitly and avoid undefined behavior. Fixes the following warning with an UBSAN kernel: vmlinux.o: warning: objtool: .text.cyapa_update_fw_store: unexpected end of section Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Dmitry Torokhov Cc: Linus Torvalds Link: https://lore.kernel.org/r/73ae0bb3c656735890d914b74c9d6bb40c25d3cd.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503171547.LlCTJLQL-lkp@intel.com/ --- drivers/input/mouse/cyapa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c index 2f2d925a55d7..00c87c0532a6 100644 --- a/drivers/input/mouse/cyapa.c +++ b/drivers/input/mouse/cyapa.c @@ -1080,8 +1080,8 @@ static ssize_t cyapa_update_fw_store(struct device *dev, char fw_name[NAME_MAX]; int ret, error; - if (count >= NAME_MAX) { - dev_err(dev, "File name too long\n"); + if (!count || count >= NAME_MAX) { + dev_err(dev, "Bad file name size\n"); return -EINVAL; } From 060aed9c0093b341480770457093449771cf1496 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:09 -0700 Subject: [PATCH 1691/2157] objtool, ASoC: codecs: wcd934x: Remove potential undefined behavior in wcd934x_slim_irq_handler() If 'port_id' is negative, the shift counts in wcd934x_slim_irq_handler() also become negative, resulting in undefined behavior due to shift out of bounds. If I'm reading the code correctly, that appears to be not possible, but with KCOV enabled, Clang's range analysis isn't always able to determine that and generates undefined behavior. As a result the code generation isn't optimal, and undefined behavior should be avoided regardless. Improve code generation and remove the undefined behavior by converting the signed variables to unsigned. Fixes the following warning with UBSAN: sound/soc/codecs/snd-soc-wcd934x.o: warning: objtool: .text.wcd934x_slim_irq_handler: unexpected end of section Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Mark Brown Cc: Srinivas Kandagatla Cc: Liam Girdwood Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Linus Torvalds Link: https://lore.kernel.org/r/7e863839ec7301bf9c0f429a03873d44e484c31c.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503180044.oH9gyPeg-lkp@intel.com/ --- sound/soc/codecs/wcd934x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 910852eb9698..c7f1b28f3b23 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -2273,7 +2273,7 @@ static irqreturn_t wcd934x_slim_irq_handler(int irq, void *data) { struct wcd934x_codec *wcd = data; unsigned long status = 0; - int i, j, port_id; + unsigned int i, j, port_id; unsigned int val, int_val = 0; irqreturn_t ret = IRQ_NONE; bool tx; From 29c578c848402a34e8c8e115bf66cb6008b77062 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:10 -0700 Subject: [PATCH 1692/2157] objtool, regulator: rk808: Remove potential undefined behavior in rk806_set_mode_dcdc() If 'ctr_bit' is negative, the shift counts become negative, causing a shift of bounds and undefined behavior. Presumably that's not possible in normal operation, but the code generation isn't optimal. And undefined behavior should be avoided regardless. Improve code generation and remove the undefined behavior by converting the signed variables to unsigned. Fixes the following warning with an UBSAN kernel: vmlinux.o: warning: objtool: rk806_set_mode_dcdc() falls through to next function rk806_get_mode_dcdc() vmlinux.o: warning: objtool: .text.rk806_set_mode_dcdc: unexpected end of section Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Mark Brown Cc: Liam Girdwood Cc: Linus Torvalds Link: https://lore.kernel.org/r/2023abcddf3f524ba478d64339996f25dc4097d2.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503182350.52KeHGD4-lkp@intel.com/ --- drivers/regulator/rk808-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 7d82bd1b36df..1e8142479656 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -270,8 +270,8 @@ static const unsigned int rk817_buck1_4_ramp_table[] = { static int rk806_set_mode_dcdc(struct regulator_dev *rdev, unsigned int mode) { - int rid = rdev_get_id(rdev); - int ctr_bit, reg; + unsigned int rid = rdev_get_id(rdev); + unsigned int ctr_bit, reg; reg = RK806_POWER_FPWM_EN0 + rid / 8; ctr_bit = rid % 8; From 4fa752a3bddaad1e94a67668cb6fea15883f24b6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Mar 2025 12:40:41 +0100 Subject: [PATCH 1693/2157] drm/xe/vf: Don't check CTC_MODE[0] if VF Starting from commit 18778b5fdd01 ("drm/xe: Eliminate usage of TIMESTAMP_OVERRIDE") we access the CTC_MODE register only to warn if it has undocumented value. There is no point in doing that on the VF driver. While here, move this check to a helper function. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20250311114042.1954-2-michal.wajdeczko@intel.com (cherry picked from commit fce3fb7b914bcd19341de8d8eff8bef371c2cddf) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_clock.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 2a958c92d8ea..fca38738e610 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -40,11 +40,8 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) } } -int xe_gt_clock_init(struct xe_gt *gt) +static void check_ctc_mode(struct xe_gt *gt) { - u32 c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); - u32 freq = 0; - /* * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later * platforms. In theory it could be a valid setting for pre-Xe2 @@ -57,7 +54,17 @@ int xe_gt_clock_init(struct xe_gt *gt) */ if (xe_mmio_read32(>->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC) xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n"); +} +int xe_gt_clock_init(struct xe_gt *gt) +{ + u32 freq; + u32 c0; + + if (!IS_SRIOV_VF(gt_to_xe(gt))) + check_ctc_mode(gt); + + c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); freq = get_crystal_clock_freq(c0); /* From d2de4410a88ffc6053300d03041cc73b8c85dbaf Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 20 Mar 2025 10:51:23 -0700 Subject: [PATCH 1694/2157] drm/xe: Apply Wa_16023105232 The WA requires KMD to disable DOP clock gating during a semaphore wait and also ensure that idle delay for every CS is lower than the idle wait time in the PWRCTX_MAXCNT register. Default values for these registers already comply with this restriction. v2: Store timestamp_base in gt info and other comments (Daniele) v3: Skip WA check for VF v4: Review comments (Matt Roper) v5: Cleanup the clock functions and use reg_field_get (Matt Roper) v6: Fix checkpatch issue v7: Fix CI issue Cc: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20250320175123.3026754-1-vinay.belgaumkar@intel.com (cherry picked from commit 7c53ff050ba88bb37eed3e17f2bb8ec592d83302) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 +++ drivers/gpu/drm/xe/xe_gt_clock.c | 39 ++++++++++++++++-------- drivers/gpu/drm/xe/xe_gt_types.h | 2 ++ drivers/gpu/drm/xe/xe_hw_engine.c | 33 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 6 ++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 6 files changed, 74 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 4f372dc2cb89..fb8ec317b6ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -130,6 +130,10 @@ #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) +#define RING_IDLEDLY(base) XE_REG((base) + 0x23c) +#define INHIBIT_SWITCH_UNTIL_PREEMPTED REG_BIT(31) +#define IDLE_DELAY REG_GENMASK(20, 0) + #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define CTX_CTRL_PXP_ENABLE REG_BIT(10) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index fca38738e610..4f011d1573c6 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -16,27 +16,42 @@ #include "xe_macros.h" #include "xe_mmio.h" -static u32 get_crystal_clock_freq(u32 rpm_config_reg) +#define f19_2_mhz 19200000 +#define f24_mhz 24000000 +#define f25_mhz 25000000 +#define f38_4_mhz 38400000 +#define ts_base_83 83333 +#define ts_base_52 52083 +#define ts_base_80 80000 + +static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq, + u32 *timestamp_base) { - const u32 f19_2_mhz = 19200000; - const u32 f24_mhz = 24000000; - const u32 f25_mhz = 25000000; - const u32 f38_4_mhz = 38400000; u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, rpm_config_reg); switch (crystal_clock) { case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - return f24_mhz; + *freq = f24_mhz; + *timestamp_base = ts_base_83; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - return f19_2_mhz; + *freq = f19_2_mhz; + *timestamp_base = ts_base_52; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: - return f38_4_mhz; + *freq = f38_4_mhz; + *timestamp_base = ts_base_52; + return; case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: - return f25_mhz; + *freq = f25_mhz; + *timestamp_base = ts_base_80; + return; default: - XE_WARN_ON("NOT_POSSIBLE"); - return 0; + xe_gt_warn(gt, "Invalid crystal clock frequency: %u", crystal_clock); + *freq = 0; + *timestamp_base = 0; + return; } } @@ -65,7 +80,7 @@ int xe_gt_clock_init(struct xe_gt *gt) check_ctc_mode(gt); c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); - freq = get_crystal_clock_freq(c0); + read_crystal_clock(gt, c0, &freq, >->info.timestamp_base); /* * Now figure out how the command stream's timestamp diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index e3cfb026ac88..7def0959da35 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -121,6 +121,8 @@ struct xe_gt { enum xe_gt_type type; /** @info.reference_clock: clock frequency */ u32 reference_clock; + /** @info.timestamp_base: GT timestamp base */ + u32 timestamp_base; /** * @info.engine_mask: mask of engines present on GT. Some of * them may be reserved in runtime and not available for user. diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 223b95de388c..8c05fd30b7df 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -8,7 +8,9 @@ #include #include +#include #include +#include #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -21,6 +23,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_clock.h" #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" @@ -564,6 +567,33 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_whitelist_process_engine(hwe); } +static void adjust_idledly(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 idledly, maxcnt; + u32 idledly_units_ps = 8 * gt->info.timestamp_base; + u32 maxcnt_units_ns = 640; + bool inhibit_switch = 0; + + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { + idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); + maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); + + inhibit_switch = idledly & INHIBIT_SWITCH_UNTIL_PREEMPTED; + idledly = REG_FIELD_GET(IDLE_DELAY, idledly); + idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000); + maxcnt = REG_FIELD_GET(IDLE_WAIT_TIME, maxcnt); + maxcnt *= maxcnt_units_ns; + + if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { + idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), + idledly_units_ps); + idledly = DIV_ROUND_CLOSEST(idledly, 1000); + xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); + } + } +} + static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { @@ -604,6 +634,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; + /* Ensure IDLEDLY is lower than MAXCNT */ + adjust_idledly(hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_hwsp: diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a25afb757f70..24f644c0a673 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -622,6 +622,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, + { XE_RTP_NAME("16023105232"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR, + GRAPHICS_VERSION_RANGE(2001, 3001)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e0c5fa460487..0c738af24f7c 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -53,3 +53,5 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION_RANGE(1270, 1274) 1508761755 GRAPHICS_VERSION(1255) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) +16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) + MEDIA_VERSION_RANGE(1301, 3000) From caf2f15648ba6cb4c329465c5686c9864a081a71 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 14 Mar 2025 06:48:58 -0700 Subject: [PATCH 1695/2157] drm/xe: Move survivability back to xe Commit d40f275d96e8 ("drm/xe: Move survivability entirely to xe_pci") moved the survivability handling to be done entirely in the xe_pci layer. However there are some issues with that approach: 1) Survivability mode needs at least the mmio initialized, otherwise it can't really read a register to decide if it should enter that state 2) SR-IOV mode should be initialized, otherwise it's not possible to check if it's VF Besides, as pointed by Riana the check for xe_survivability_mode_enable() was wrong in xe_pci_probe() since it's not a bool return. Fix that by moving the initialization to be entirely in the xe_device layer, with the correct dependencies handled: only after mmio and sriov initialization, and not triggering it on error from wait_for_lmem_ready(). This restores the trigger behavior before that commit. The xe_pci layer now only checks for "is it enabled?", like it's doing in xe_pci_suspend()/xe_pci_remove(), etc. Cc: Riana Tauro Fixes: d40f275d96e8 ("drm/xe: Move survivability entirely to xe_pci") Reviewed-by: Riana Tauro Link: https://patchwork.freedesktop.org/patch/msgid/20250314-fix-survivability-v5-1-fdb3559ea965@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 86b5e0dbba07438de91dd81095464c6c4aa7a372) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 17 +++++++++++++++-- drivers/gpu/drm/xe/xe_pci.c | 16 +++++++--------- drivers/gpu/drm/xe/xe_survivability_mode.c | 19 ++++++++++++------- drivers/gpu/drm/xe/xe_survivability_mode.h | 1 - 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5d79b439dd62..00191227bc95 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -53,6 +53,7 @@ #include "xe_pxp.h" #include "xe_query.h" #include "xe_shrinker.h" +#include "xe_survivability_mode.h" #include "xe_sriov.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" @@ -705,8 +706,20 @@ int xe_device_probe_early(struct xe_device *xe) sriov_update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) - return err; + if (err) { + int save_err = err; + + /* + * Try to leave device in survivability mode if device is + * possible, but still return the previous error for error + * propagation + */ + err = xe_survivability_mode_enable(xe); + if (err) + return err; + + return save_err; + } err = wait_for_lmem_ready(xe); if (err) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index da9679c8cf26..818f023166d5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -803,16 +803,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe_early(xe); - - /* - * In Boot Survivability mode, no drm card is exposed and driver is - * loaded with bare minimum to allow for firmware to be flashed through - * mei. If early probe fails, check if survivability mode is flagged by - * HW to be enabled. In that case enable it and return success. - */ if (err) { - if (xe_survivability_mode_required(xe) && - xe_survivability_mode_enable(xe)) + /* + * In Boot Survivability mode, no drm card is exposed and driver + * is loaded with bare minimum to allow for firmware to be + * flashed through mei. If early probe failed, but it managed to + * enable survivability mode, return success. + */ + if (xe_survivability_mode_is_enabled(xe)) return 0; return err; diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index d939ce70e6fa..7b1ec643f0f0 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -178,15 +178,16 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe) return xe->survivability.mode; } -/** - * xe_survivability_mode_required - checks if survivability mode is required - * @xe: xe device instance +/* + * survivability_mode_requested - check if it's possible to enable + * survivability mode and that was requested by firmware * - * This function reads the boot status from Pcode + * This function reads the boot status from Pcode. * - * Return: true if boot status indicates failure, false otherwise + * Return: true if platform support is available and boot status indicates + * failure, false otherwise. */ -bool xe_survivability_mode_required(struct xe_device *xe) +static bool survivability_mode_requested(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -208,7 +209,8 @@ bool xe_survivability_mode_required(struct xe_device *xe) * * Initialize survivability information and enable survivability mode * - * Return: 0 for success, negative error code otherwise. + * Return: 0 if survivability mode is enabled or not requested; negative error + * code otherwise. */ int xe_survivability_mode_enable(struct xe_device *xe) { @@ -216,6 +218,9 @@ int xe_survivability_mode_enable(struct xe_device *xe) struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + if (!survivability_mode_requested(xe)) + return 0; + survivability->size = MAX_SCRATCH_MMIO; info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index f4df5f9025ce..d7e64885570d 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -12,6 +12,5 @@ struct xe_device; int xe_survivability_mode_enable(struct xe_device *xe); bool xe_survivability_mode_is_enabled(struct xe_device *xe); -bool xe_survivability_mode_required(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ From 22d00862a62a52571a6f244c6d248476e997b9b7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 14 Mar 2025 06:54:26 -0700 Subject: [PATCH 1696/2157] drm/xe: Set survivability mode before heci init Commit d40f275d96e8 ("drm/xe: Move survivability entirely to xe_pci") tried to follow the logic: initialize everything needed and if everything succeeds, set the flag that it's enabled. While it fixed some corner cases of those calls failing, it was wrong for setting the flag after the call to xe_heci_gsc_init(): that function does a different initialization for survivability mode. Fix that and add comments about this being done on purpose. Suggested-by: Riana Tauro Fixes: d40f275d96e8 ("drm/xe: Move survivability entirely to xe_pci") Reviewed-by: Riana Tauro Link: https://patchwork.freedesktop.org/patch/msgid/20250314-fix-survivability-v5-2-fdb3559ea965@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 14efa739ca70514e8b923a02b5bcb42511dd1ee8) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_survivability_mode.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 7b1ec643f0f0..cb813b337fd3 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -155,13 +155,21 @@ static int enable_survivability_mode(struct pci_dev *pdev) if (ret) return ret; + /* Make sure xe_heci_gsc_init() knows about survivability mode */ + survivability->mode = true; + ret = xe_heci_gsc_init(xe); - if (ret) + if (ret) { + /* + * But if it fails, device can't enter survivability + * so move it back for correct error handling + */ + survivability->mode = false; return ret; + } xe_vsec_init(xe); - survivability->mode = true; dev_err(dev, "In Survivability Mode\n"); return 0; From 5e66cf6edddb5f6237e3afb07475ace57ecb56bc Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sun, 23 Mar 2025 19:41:03 +0800 Subject: [PATCH 1697/2157] drm/xe: Fix unmet direct dependencies warning WARNING: unmet direct dependencies detected for FB_IOMEM_HELPERS Depends on [n]: HAS_IOMEM [=y] && FB_CORE [=n] Selected by [m]: - DRM_XE_DISPLAY [=y] && HAS_IOMEM [=y] && DRM [=m] && DRM_XE [=m] && DRM_XE [=m]=m [=m] && HAS_IOPORT [=y] DRM_XE_DISPLAY requires FB_IOMEM_HELPERS, but the dependency FB_CORE is missing, selecting FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION is set as other drm drivers. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Yue Haibing Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250323114103.1960511-1-yuehaibing@huawei.com Signed-off-by: Lucas De Marchi (cherry picked from commit 689582882802cd64986c1eb584c9f5184d67f0cf) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 7d7995196702..5c2f459a2925 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -53,7 +53,7 @@ config DRM_XE config DRM_XE_DISPLAY bool "Enable display support" depends on DRM_XE && DRM_XE=m && HAS_IOPORT - select FB_IOMEM_HELPERS + select FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION select I2C select I2C_ALGOBIT default y From 22cc5ca5de52bbfc36a7d4a55323f91fb4492264 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 28 Feb 2025 01:44:14 +0000 Subject: [PATCH 1698/2157] x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT CONFIG_PARAVIRT_XXL is mainly defined/used by XEN PV guests. For other VM guest types, features supported under CONFIG_PARAVIRT are self sufficient. CONFIG_PARAVIRT mainly provides support for TLB flush operations and time related operations. For TDX guest as well, paravirt calls under CONFIG_PARVIRT meets most of its requirement except the need of HLT and SAFE_HLT paravirt calls, which is currently defined under CONFIG_PARAVIRT_XXL. Since enabling CONFIG_PARAVIRT_XXL is too bloated for TDX guest like platforms, move HLT and SAFE_HLT paravirt calls under CONFIG_PARAVIRT. Moving HLT and SAFE_HLT paravirt calls are not fatal and should not break any functionality for current users of CONFIG_PARAVIRT. Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Co-developed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Vishal Annapurve Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Reviewed-by: Juergen Gross Tested-by: Ryan Afranji Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-2-vannapurve@google.com --- arch/x86/include/asm/irqflags.h | 40 +++++++++++++++------------ arch/x86/include/asm/paravirt.h | 20 +++++++------- arch/x86/include/asm/paravirt_types.h | 3 +- arch/x86/kernel/paravirt.c | 14 ++++++---- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index abb8374c9ff7..9a9b21b78905 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #endif +#ifndef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static __always_inline void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static __always_inline void halt(void) +{ + native_halt(); +} +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + #ifdef CONFIG_PARAVIRT_XXL #include #else @@ -97,24 +119,6 @@ static __always_inline void arch_local_irq_enable(void) native_irq_enable(); } -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static __always_inline void arch_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static __always_inline void halt(void) -{ - native_halt(); -} - /* * For spinlocks, etc: */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index bed346bfac89..c4c23190925c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -102,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn, PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); } +static __always_inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { @@ -165,16 +175,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static __always_inline void arch_safe_halt(void) -{ - PVOP_VCALL0(irq.safe_halt); -} - -static inline void halt(void) -{ - PVOP_VCALL0(irq.halt); -} - static inline u64 paravirt_read_msr(unsigned msr) { return PVOP_CALL1(u64, cpu.read_msr, msr); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 62912023b46f..631c306ce1ff 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -120,10 +120,9 @@ struct pv_irq_ops { struct paravirt_callee_save save_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; - +#endif void (*safe_halt)(void); void (*halt)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 97925632c28e..1ccd05d8999f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -75,6 +75,11 @@ void paravirt_set_sched_clock(u64 (*func)(void)) static_call_update(pv_sched_clock, func); } +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} + #ifdef CONFIG_PARAVIRT_XXL static noinstr void pv_native_write_cr2(unsigned long val) { @@ -100,11 +105,6 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val) { native_set_debugreg(regno, val); } - -static noinstr void pv_native_safe_halt(void) -{ - native_safe_halt(); -} #endif struct pv_info pv_info = { @@ -161,9 +161,11 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), +#endif /* CONFIG_PARAVIRT_XXL */ + + /* Irq HLT ops. */ .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, -#endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ .mmu.flush_tlb_user = native_flush_tlb_local, From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 28 Feb 2025 01:44:15 +0000 Subject: [PATCH 1699/2157] x86/tdx: Fix arch_safe_halt() execution for TDX VMs Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts. Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example: kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt() To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether. As tested by Ryan Afranji: "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset. Verified the major performance improvement with this patchset applied." Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve Signed-off-by: Ingo Molnar Reviewed-by: Kirill A. Shutemov Tested-by: Ryan Afranji Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com --- arch/x86/Kconfig | 1 + arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++- arch/x86/include/asm/tdx.h | 4 ++-- arch/x86/kernel/process.c | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 05b4eca156cf..f614c0522a0b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -878,6 +878,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 7772b01ab738..aa0eb4057226 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false; @@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_module_args args = { @@ -1109,6 +1120,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 65394aa9b49f..4a1922ec80cf 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_halt(void); bool tdx_early_handle_ve(struct pt_regs *regs); @@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls); #else static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { }; static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 91f6ff618852..962c3ce39323 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -939,7 +939,7 @@ void __init select_idle_routine(void) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else { static_call_update(x86_idle, default_idle); } From e8f45927ee5d99fa52f14205a2c7ac3820c64457 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 28 Feb 2025 01:44:16 +0000 Subject: [PATCH 1700/2157] x86/tdx: Emit warning if IRQs are enabled during HLT #VE handling Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. safe_halt() routines execute HLT in STI-shadow so IRQs need to remain disabled until the TDCALL to ensure that pending IRQs are correctly treated as wake events. Emit warning and fail emulation if IRQs are enabled during HLT #VE handling to avoid running into scenarios where IRQ wake events are lost resulting in indefinite HLT execution times. Signed-off-by: Vishal Annapurve Signed-off-by: Ingo Molnar Reviewed-by: Kirill A. Shutemov Tested-by: Ryan Afranji Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250228014416.3925664-4-vannapurve@google.com --- arch/x86/coco/tdx/tdx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index aa0eb4057226..edab6d6049be 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -393,6 +393,14 @@ static int handle_halt(struct ve_info *ve) { const bool irq_disabled = irqs_disabled(); + /* + * HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a + * wake event may be consumed before requesting HLT emulation, leaving + * the vCPU blocking indefinitely. + */ + if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled")) + return -EIO; + if (__halt(irq_disabled)) return -EIO; From ed3b274abc4008efffebf1997968a3f2720a86d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 Mar 2025 20:24:59 +0100 Subject: [PATCH 1701/2157] ASoC: codecs: wsa883x: Correct VI sense channel mask VI sense port on WSA883x speaker takes only one channel, so use 0x1 as channel mask. This fixes garbage being recorded by the speaker when testing the VI sense feedback path. Cc: Srinivas Kandagatla Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250312-asoc-wsa88xx-visense-v1-1-9ca705881122@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa883x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index d259e1d4d83d..1c9df7c061bd 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -568,7 +568,7 @@ static const struct sdw_port_config wsa883x_pconfig[WSA883X_MAX_SWR_PORTS] = { }, [WSA883X_PORT_VISENSE] = { .num = WSA883X_PORT_VISENSE + 1, - .ch_mask = 0x3, + .ch_mask = 0x1, }, }; From 060fac202eb8e5c83961f0e0bf6dad8ab6e46643 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 Mar 2025 20:25:00 +0100 Subject: [PATCH 1702/2157] ASoC: codecs: wsa884x: Correct VI sense channel mask VI sense port on WSA883x speaker takes only one channel, so use 0x1 as channel mask. This fixes garbage being recorded by the speaker when testing the VI sense feedback path. Cc: Srinivas Kandagatla Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250312-asoc-wsa88xx-visense-v1-2-9ca705881122@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa884x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index 8051483aa1ac..daada1a2a34c 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -891,7 +891,7 @@ static const struct sdw_port_config wsa884x_pconfig[WSA884X_MAX_SWR_PORTS] = { }, [WSA884X_PORT_VISENSE] = { .num = WSA884X_PORT_VISENSE + 1, - .ch_mask = 0x3, + .ch_mask = 0x1, }, [WSA884X_PORT_CPS] = { .num = WSA884X_PORT_CPS + 1, From 012a6efcc805308b1d90a1056ba963eb08858645 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:35:25 +0300 Subject: [PATCH 1703/2157] ASoC: sma1307: Fix error handling in sma1307_setting_loaded() There are a couple bugs in this code: 1) The cleanup code calls kfree(sma1307->set.header) and kfree(sma1307->set.def) but those functions were allocated using devm_kzalloc(). It results in a double free. Delete all these kfree() calls. 2) A missing call to kfree(data) if the checksum was wrong on this error path: if ((sma1307->set.checksum >> 8) != SMA1307_SETTING_CHECKSUM) { Since the "data" pointer is supposed to be freed on every return, I changed that to use the __free(kfree) cleanup attribute. Fixes: 0ec6bd16705f ("ASoC: sma1307: Add NULL check in sma1307_setting_loaded()") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/8d32dd96-1404-4373-9b6c-c612a9c18c4c@stanley.mountain Signed-off-by: Mark Brown --- sound/soc/codecs/sma1307.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index f5c303d4bb62..498189ab691c 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c @@ -1705,7 +1705,7 @@ static void sma1307_check_fault_worker(struct work_struct *work) static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *file) { const struct firmware *fw; - int *data, size, offset, num_mode; + int size, offset, num_mode; int ret; ret = request_firmware(&fw, file, sma1307->dev); @@ -1722,7 +1722,7 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil return; } - data = kzalloc(fw->size, GFP_KERNEL); + int *data __free(kfree) = kzalloc(fw->size, GFP_KERNEL); if (!data) { release_firmware(fw); sma1307->set.status = false; @@ -1742,7 +1742,6 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.header_size, GFP_KERNEL); if (!sma1307->set.header) { - kfree(data); sma1307->set.status = false; return; } @@ -1763,8 +1762,6 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil = devm_kzalloc(sma1307->dev, sma1307->set.def_size * sizeof(int), GFP_KERNEL); if (!sma1307->set.def) { - kfree(data); - kfree(sma1307->set.header); sma1307->set.status = false; return; } @@ -1782,9 +1779,6 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.mode_size * 2 * sizeof(int), GFP_KERNEL); if (!sma1307->set.mode_set[i]) { - kfree(data); - kfree(sma1307->set.header); - kfree(sma1307->set.def); for (int j = 0; j < i; j++) kfree(sma1307->set.mode_set[j]); sma1307->set.status = false; @@ -1799,7 +1793,6 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil } } - kfree(data); sma1307->set.status = true; } From 7a874e8b54ea21094f7fd2d428b164394c6cb316 Mon Sep 17 00:00:00 2001 From: Luis de Arquer Date: Fri, 21 Mar 2025 13:57:53 +0100 Subject: [PATCH 1704/2157] spi-rockchip: Fix register out of bounds access Do not write native chip select stuff for GPIO chip selects. GPIOs can be numbered much higher than native CS. Also, it makes no sense. Signed-off-by: Luis de Arquer Link: https://patch.msgid.link/365ccddfba110549202b3520f4401a6a936e82a8.camel@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 1bc012fce7cb..1a6381de6f33 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -547,7 +547,7 @@ static int rockchip_spi_config(struct rockchip_spi *rs, cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET; if (spi->mode & SPI_LSB_FIRST) cr0 |= CR0_FBM_LSB << CR0_FBM_OFFSET; - if (spi->mode & SPI_CS_HIGH) + if ((spi->mode & SPI_CS_HIGH) && !(spi_get_csgpiod(spi, 0))) cr0 |= BIT(spi_get_chipselect(spi, 0)) << CR0_SOI_OFFSET; if (xfer->rx_buf && xfer->tx_buf) From f9733aa925d99268e3c1f284d3e62e6a643dcc1d Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 14 Jan 2025 14:45:38 +0100 Subject: [PATCH 1705/2157] ARM: 9439/1: arm32: simplify ARM_MMU_KEEP usage All current users need to add a KEEP() around the argument so the value is actually kept, which doesn't feel very natural and is prone to upcoming bugs as the name suggests that this macro alone already keeps things. Move that directly into the definition. Signed-off-by: Rolf Eike Beer Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/vmlinux.lds.h | 2 +- arch/arm/kernel/vmlinux-xip.lds.S | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index d60f6e83a9f7..89697f204715 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -19,7 +19,7 @@ #endif #ifdef CONFIG_MMU -#define ARM_MMU_KEEP(x) x +#define ARM_MMU_KEEP(x) KEEP(x) #define ARM_MMU_DISCARD(x) #else #define ARM_MMU_KEEP(x) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 5eddb75a7174..f2e8d4fac068 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(KEEP(*(__ex_table))) + ARM_MMU_KEEP(*(__ex_table)) __stop___ex_table = .; } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index de373c6c2ae8..d592a203f9c6 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(KEEP(*(__ex_table))) + ARM_MMU_KEEP(*(__ex_table)) __stop___ex_table = .; } From ccb8ce526807fcbd4578d6619100d8ec48769ea8 Mon Sep 17 00:00:00 2001 From: Christian Schrrefl Date: Fri, 31 Jan 2025 00:03:45 +0100 Subject: [PATCH 1706/2157] ARM: 9441/1: rust: Enable Rust support for ARMv7 This commit allows building ARMv7 kernels with Rust support. The rust core library expects some __eabi_... functions that are not implemented in the kernel. Those functions are some float operations and __aeabi_uldivmod. For now those are implemented with define_panicking_intrinsics!. This is based on the code by Sven Van Asbroeck from the original rust branch and inspired by the AArch version by Jamie Cunliffe. I have tested the rust samples and a custom simple MMIO module on hardware (De1SoC FPGA + Arm A9 CPU). Tested-by: Rudraksha Gupta Reviewed-by: Alice Ryhl Acked-by: Miguel Ojeda Tested-by: Miguel Ojeda Acked-by: Ard Biesheuvel Signed-off-by: Christian Schrefl Signed-off-by: Russell King (Oracle) --- Documentation/rust/arch-support.rst | 1 + arch/arm/Kconfig | 1 + arch/arm/Makefile | 1 + rust/Makefile | 8 ++++++++ rust/compiler_builtins.rs | 24 ++++++++++++++++++++++++ scripts/generate_rust_target.rs | 4 +++- 6 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 54be7ddf3e57..6e6a515d0899 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -15,6 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. ============= ================ ============================================== Architecture Level of support Constraints ============= ================ ============================================== +``arm`` Maintained ARMv7 Little Endian only. ``arm64`` Maintained Little Endian only. ``loongarch`` Maintained \- ``riscv`` Maintained ``riscv64`` and LLVM/Clang only. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 835b5f100e92..202dbd17ad2f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -133,6 +133,7 @@ config ARM select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_RUST if CPU_LITTLE_ENDIAN && CPU_32v7 select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 00ca7886b18e..4808d3ed98e4 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -150,6 +150,7 @@ endif KBUILD_CPPFLAGS +=$(cpp-y) KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi CHECKFLAGS += -D__arm__ diff --git a/rust/Makefile b/rust/Makefile index 8fcfd60447bc..5b45f760f462 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -245,6 +245,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu BINDGEN_TARGET_arm64 := aarch64-linux-gnu +BINDGEN_TARGET_arm := arm-linux-gnueabi BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) # All warnings are inhibited since GCC builds are very experimental, @@ -397,6 +398,13 @@ redirect-intrinsics = \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 +ifdef CONFIG_ARM + # Add eabi initrinsics for ARM 32-bit + redirect-intrinsics += \ + __aeabi_fadd __aeabi_fmul __aeabi_fcmpeq __aeabi_fcmple __aeabi_fcmplt __aeabi_fcmpun \ + __aeabi_dadd __aeabi_dmul __aeabi_dcmple __aeabi_dcmplt __aeabi_dcmpun \ + __aeabi_uldivmod +endif ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) # These intrinsics are defined for ARM64 and RISCV64 redirect-intrinsics += \ diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index f14b8d7caf89..dd16c1dc899c 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -73,5 +73,29 @@ pub extern "C" fn $ident() { __umodti3, }); +#[cfg(target_arch = "arm")] +define_panicking_intrinsics!("`f32` should not be used", { + __aeabi_fadd, + __aeabi_fmul, + __aeabi_fcmpeq, + __aeabi_fcmple, + __aeabi_fcmplt, + __aeabi_fcmpun, +}); + +#[cfg(target_arch = "arm")] +define_panicking_intrinsics!("`f64` should not be used", { + __aeabi_dadd, + __aeabi_dmul, + __aeabi_dcmple, + __aeabi_dcmplt, + __aeabi_dcmpun, +}); + +#[cfg(target_arch = "arm")] +define_panicking_intrinsics!("`u64` division/modulo should not be used", { + __aeabi_uldivmod, +}); + // NOTE: if you are adding a new intrinsic here, you should also add it to // `redirect-intrinsics` in `rust/Makefile`. diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 0d00ac3723b5..f8e7fb38bf16 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -172,7 +172,9 @@ fn main() { let mut ts = TargetSpec::new(); // `llvm-target`s are taken from `scripts/Makefile.clang`. - if cfg.has("ARM64") { + if cfg.has("ARM") { + panic!("arm uses the builtin rustc target"); + } else if cfg.has("ARM64") { panic!("arm64 uses the builtin rustc aarch64-unknown-none target"); } else if cfg.has("RISCV") { if cfg.has("64BIT") { From 9cac324d6f4900f92d41db5fd6b73b6feb0bfb68 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Feb 2025 14:24:15 +0100 Subject: [PATCH 1707/2157] ARM: 9442/1: smp: Fix IPI alignment in /proc/interrupts On a system with less than 1000 interrupts, prec = 3, causing a misalignment for the IPI interrupts. E.g. on Koelsch (R-Car M2-W): 200: 0 0 gpio-rcar 6 Edge SW36 IPI0: 0 0 CPU wakeup interrupts IPI1: 0 0 Timer broadcast interrupts IPI2: 1701 2844 Rescheduling interrupts IPI3: 10338 21181 Function call interrupts IPI4: 0 0 CPU stop interrupts IPI5: 651 825 IRQ work interrupts IPI6: 0 0 completion interrupts Err: 0 Fix this by adopting the same solution as used on arm64. Signed-off-by: Geert Uytterhoeven Reviewed-by: Anshuman Khandual Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3431c0553f45..50999886a8b5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -551,7 +551,8 @@ void show_ipi_list(struct seq_file *p, int prec) if (!ipi_desc[i]) continue; - seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, + prec >= 4 ? " " : ""); for_each_online_cpu(cpu) seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); From e7607f7d6d81af71dcc5171278aadccc94d277cd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 20 Mar 2025 22:33:49 +0100 Subject: [PATCH 1708/2157] ARM: 9443/1: Require linker to support KEEP within OVERLAY for DCE ld.lld prior to 21.0.0 does not support using the KEEP keyword within an overlay description, which may be needed to avoid discarding necessary sections within an overlay with '--gc-sections', which can be enabled for the kernel via CONFIG_LD_DEAD_CODE_DATA_ELIMINATION. Disallow CONFIG_LD_DEAD_CODE_DATA_ELIMINATION without support for KEEP within OVERLAY and introduce a macro, OVERLAY_KEEP, that can be used to conditionally add KEEP when it is properly supported to avoid breaking old versions of ld.lld. Cc: stable@vger.kernel.org Link: https://github.com/llvm/llvm-project/commit/381599f1fe973afad3094e55ec99b1620dba7d8c Reviewed-by: Linus Walleij Signed-off-by: Nathan Chancellor Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 2 +- arch/arm/include/asm/vmlinux.lds.h | 6 ++++++ init/Kconfig | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 202dbd17ad2f..25ed6f1a7c7a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -121,7 +121,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD) + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_CAN_USE_KEEP_IN_OVERLAY) select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 89697f204715..a54db342653a 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -34,6 +34,12 @@ #define NOCROSSREFS #endif +#ifdef CONFIG_LD_CAN_USE_KEEP_IN_OVERLAY +#define OVERLAY_KEEP(x) KEEP(x) +#else +#define OVERLAY_KEEP(x) x +#endif + /* Set start/end symbol names to the LMA for the section */ #define ARM_LMA(sym, section) \ sym##_start = LOADADDR(section); \ diff --git a/init/Kconfig b/init/Kconfig index d0d021b3fa3b..fc994f5cd5db 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -129,6 +129,11 @@ config CC_HAS_COUNTED_BY # https://github.com/llvm/llvm-project/pull/112636 depends on !(CC_IS_CLANG && CLANG_VERSION < 190103) +config LD_CAN_USE_KEEP_IN_OVERLAY + # ld.lld prior to 21.0.0 did not support KEEP within an overlay description + # https://github.com/llvm/llvm-project/pull/130661 + def_bool LD_IS_BFD || LLD_VERSION >= 210000 + config RUSTC_HAS_COERCE_POINTEE def_bool RUSTC_VERSION >= 108400 From c3d944a367c0d9e4e125c7006e52f352e75776dc Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Thu, 20 Mar 2025 22:33:51 +0100 Subject: [PATCH 1709/2157] ARM: 9444/1: add KEEP() keyword to ARM_VECTORS Without this, the vectors are removed if LD_DEAD_CODE_DATA_ELIMINATION is enabled. At startup, the CPU (silently) hangs in the undefined instruction exception as soon as the first timer interrupt arrives. On my setup, the system also boots fine without the 2nd and 3rd KEEP() statements, so I cannot tell whether these are actually required. [nathan: Use OVERLAY_KEEP() to avoid breaking old ld.lld versions] Cc: stable@vger.kernel.org Fixes: ed0f94102251 ("ARM: 9404/1: arm32: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION") Signed-off-by: Christian Eggers Reviewed-by: Linus Walleij Signed-off-by: Nathan Chancellor Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/vmlinux.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index a54db342653a..0341973e30e1 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -131,13 +131,13 @@ __vectors_lma = .; \ OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ .vectors { \ - *(.vectors) \ + OVERLAY_KEEP(*(.vectors)) \ } \ .vectors.bhb.loop8 { \ - *(.vectors.bhb.loop8) \ + OVERLAY_KEEP(*(.vectors.bhb.loop8)) \ } \ .vectors.bhb.bpiall { \ - *(.vectors.bhb.bpiall) \ + OVERLAY_KEEP(*(.vectors.bhb.bpiall)) \ } \ } \ ARM_LMA(__vectors, .vectors); \ From 623c3015d8c9b7d7c6b9796f6e3667428ab6327a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Mar 2025 20:46:51 +0100 Subject: [PATCH 1710/2157] ARM: 9445/1: clkdev: Mark some functions with __printf() attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the functions are using printf() type of format, and compiler is not happy about them as is: clkdev.c:185:17: error: function ‘vclkdev_alloc’ might be a candidate for gnu_printf’ format attribute [-Werror=suggest-attribute=format] clkdev.c:224:9: error: function ‘vclkdev_create’ might be a candidate for gnu_printf’ format attribute [-Werror=suggest-attribute=format] clkdev.c:314:9: error: function ‘__clk_register_clkdev’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors by adding __printf() attributes. Signed-off-by: Andy Shevchenko Signed-off-by: Russell King (Oracle) --- drivers/clk/clkdev.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 2f83fb97c6fb..e0bede6350e1 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -153,7 +153,7 @@ struct clk_lookup_alloc { char con_id[MAX_CON_ID]; }; -static struct clk_lookup * __ref +static __printf(3, 0) struct clk_lookup * __ref vclkdev_alloc(struct clk_hw *hw, const char *con_id, const char *dev_fmt, va_list ap) { @@ -215,7 +215,7 @@ vclkdev_alloc(struct clk_hw *hw, const char *con_id, const char *dev_fmt, return &cla->cl; } -static struct clk_lookup * +static __printf(3, 0) struct clk_lookup * vclkdev_create(struct clk_hw *hw, const char *con_id, const char *dev_fmt, va_list ap) { @@ -303,9 +303,8 @@ void clkdev_drop(struct clk_lookup *cl) } EXPORT_SYMBOL(clkdev_drop); -static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw, - const char *con_id, - const char *dev_id, ...) +static __printf(3, 4) struct clk_lookup * +__clk_register_clkdev(struct clk_hw *hw, const char *con_id, const char *dev_id, ...) { struct clk_lookup *cl; va_list ap; From 214c13e380ad7636631279f426387f9c4e3c14d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Mar 2025 19:05:48 -0400 Subject: [PATCH 1711/2157] SUNRPC: rpcbind should never reset the port to the value '0' If we already had a valid port number for the RPC service, then we should not allow the rpcbind client to set it to the invalid value '0'. Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 102c3818bc54..53bcca365fb1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -820,9 +820,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) } trace_rpcb_setport(child, map->r_status, map->r_port); - xprt->ops->set_port(xprt, map->r_port); - if (map->r_port) + if (map->r_port) { + xprt->ops->set_port(xprt, map->r_port); xprt_set_bound(xprt); + } } /* From bf9be373b830a3e48117da5d89bb6145a575f880 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Mar 2025 19:35:01 -0400 Subject: [PATCH 1712/2157] SUNRPC: rpc_clnt_set_transport() must not change the autobind setting The autobind setting was supposed to be determined in rpc_create(), since commit c2866763b402 ("SUNRPC: use sockaddr + size when creating remote transport endpoints"). Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1cfaf93ceec1..6f75862d9782 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -270,9 +270,6 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, old = rcu_dereference_protected(clnt->cl_xprt, lockdep_is_held(&clnt->cl_lock)); - if (!xprt_bound(xprt)) - clnt->cl_autobind = 1; - clnt->cl_timeout = timeout; rcu_assign_pointer(clnt->cl_xprt, xprt); spin_unlock(&clnt->cl_lock); From 2d3e998a0bc7fe26a724f87a8ce217848040520e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Mar 2025 17:58:50 -0400 Subject: [PATCH 1713/2157] NFS: Shut down the nfs_client only after all the superblocks The nfs_client manages state for all the superblocks in the "cl_superblocks" list, so it must not be shut down until all of them are gone. Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when shutdown") Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index b30401b2c939..37cb2b776435 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -14,6 +14,7 @@ #include #include +#include "internal.h" #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt) rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); } +/* + * Shut down the nfs_client only once all the superblocks + * have been shut down. + */ +static void shutdown_nfs_client(struct nfs_client *clp) +{ + struct nfs_server *server; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + nfs_mark_client_ready(clp, -EIO); + shutdown_client(clp->cl_rpcclient); +} + static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, server->flags |= NFS_MOUNT_SHUTDOWN; shutdown_client(server->client); - shutdown_client(server->nfs_client->cl_rpcclient); if (!IS_ERR(server->client_acl)) shutdown_client(server->client_acl); @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, if (server->nlm_host) shutdown_client(server->nlm_host->h_rpcclnt); out: + shutdown_nfs_client(server->nfs_client); return count; } From 3eba080e4d4f7bcc22ba19b43ab5fd412d99d1ba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Mar 2025 18:18:40 -0400 Subject: [PATCH 1714/2157] NFSv4: Further cleanups to shutdown loops Replace the tests for the RPC client being shut down with tests for whether the nfs_client is in an error state. Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 889511650ceb..50be54e0f578 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9580,7 +9580,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) return; trace_nfs4_sequence(clp, task->tk_status); - if (task->tk_status < 0 && !task->tk_client->cl_shutdown) { + if (task->tk_status < 0 && clp->cl_cons_state >= 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) return; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 542cdf71229f..f1f7eaa97973 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1198,7 +1198,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct rpc_clnt *clnt = clp->cl_rpcclient; bool swapon = false; - if (clnt->cl_shutdown) + if (clp->cl_cons_state < 0) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); From c81d5bcb7b38ab0322aea93152c091451b82d3f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Mar 2025 20:03:03 -0400 Subject: [PATCH 1715/2157] NFSv4: clp->cl_cons_state < 0 signifies an invalid nfs_client If someone calls nfs_mark_client_ready(clp, status) with a negative value for status, then that should signal that the nfs_client is no longer valid. Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f1f7eaa97973..272d2ebdae0f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1403,7 +1403,7 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); - return 0; + return clp->cl_cons_state < 0 ? clp->cl_cons_state : 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); From 0af5fb5ed3d2fd9e110c6112271f022b744a849a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Mar 2025 20:35:33 -0400 Subject: [PATCH 1716/2157] NFSv4: Treat ENETUNREACH errors as fatal for state recovery If a containerised process is killed and causes an ENETUNREACH or ENETDOWN error to be propagated to the state manager, then mark the nfs_client as being dead so that we don't loop in functions that are expecting recovery to succeed. Reviewed-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 272d2ebdae0f..7612e977e80b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2739,7 +2739,15 @@ static void nfs4_state_manager(struct nfs_client *clp) pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" " with error %d\n", section_sep, section, clp->cl_hostname, -status); - ssleep(1); + switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + nfs_mark_client_ready(clp, -EIO); + break; + default: + ssleep(1); + break; + } out_drain: memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); From d32c4e58545f17caaa854415f854691e32d42075 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Mar 2025 15:22:19 +0100 Subject: [PATCH 1717/2157] spi: SPI_QPIC_SNAND should be tristate and depend on MTD SPI_QPIC_SNAND is the only driver that selects MTD instead of depending on it, which could lead to circular dependencies. Moreover, as SPI_QPIC_SNAND is bool, this forces MTD (and various related symbols) to be built-in, as can be seen in an allmodconfig kernel. Except for a missing semicolon, there is no reason why SPI_QPIC_SNAND cannot be tristate; all MODULE_*() boilerplate is already present. Hence make SPI_QPIC_SNAND tristate, let it depend on MTD, and add the missing semicolon. Fixes: 7304d1909080ef0c ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/b63db431cbf35223a4400e44c296293d32c4543c.1742998909.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 4 ++-- drivers/spi/spi-qpic-snand.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index f40c282d4d63..ed38f6d41f47 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -937,9 +937,9 @@ config SPI_QCOM_QSPI QSPI(Quad SPI) driver for Qualcomm QSPI controller. config SPI_QPIC_SNAND - bool "QPIC SNAND controller" + tristate "QPIC SNAND controller" depends on ARCH_QCOM || COMPILE_TEST - select MTD + depends on MTD help QPIC_SNAND (QPIC SPI NAND) driver for Qualcomm QPIC controller. QPIC controller supports both parallel nand and serial nand. diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index fbba7741a9bf..17eb67e19132 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1614,7 +1614,7 @@ static const struct of_device_id qcom_snandc_of_match[] = { .data = &ipq9574_snandc_props, }, {} -} +}; MODULE_DEVICE_TABLE(of, qcom_snandc_of_match); static struct platform_driver qcom_spi_driver = { From 84c3c08f5a6c2e2209428b76156bcaf349c3a62d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 26 Mar 2025 16:22:01 +0100 Subject: [PATCH 1718/2157] ALSA: hda/realtek: Fix built-in mic breakage on ASUS VivoBook X515JA ASUS VivoBook X515JA with PCI SSID 1043:14f2 also hits the same issue as other VivoBook model about the mic pin assignment, and the same workaround is required to apply ALC256_FIXUP_ASUS_MIC_NO_PRESENCE quirk. Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Link: https://bugzilla.kernel.org/show_bug.cgi?id=219902 Link: https://patch.msgid.link/20250326152205.26733-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b4fe681ec3cb..bbe22530e241 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10804,6 +10804,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601VV/VU/VJ/VQ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x14d3, "ASUS G614JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x14f2, "ASUS VivoBook X515JA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), From 37477d9ecabd15323700313724479bea94d0421e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 26 Mar 2025 20:18:49 +0100 Subject: [PATCH 1719/2157] sched_ext: idle: Fix return code of scx_select_cpu_dfl() Return -EBUSY when using %SCX_PICK_IDLE_CORE with scx_select_cpu_dfl() if a fully idle SMT core cannot be found, instead of falling back to @prev_cpu, which is not a fully idle SMT core in this case. Fixes: c414c2171cd9e ("sched_ext: idle: Honor idle flags in the built-in idle selection policy") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 52c36a70a3d0..45061b584380 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -544,7 +544,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 * core. */ if (flags & SCX_PICK_IDLE_CORE) { - cpu = prev_cpu; + cpu = -EBUSY; goto out_unlock; } } From 883cc35e9fef6ee5cedf535d26d7143172d60225 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 26 Mar 2025 10:09:47 -1000 Subject: [PATCH 1720/2157] sched_ext: Remove a meaningless conditional goto in scx_select_cpu_dfl() scx_select_cpu_dfl() has a meaningless conditional goto at the end. Remove it. No functional changes. Signed-off-by: Tejun Heo Cc: Andrea Righi --- kernel/sched/ext_idle.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 45061b584380..b5f451e616d4 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -584,8 +584,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 * increasing distance. */ cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags); - if (cpu >= 0) - goto out_unlock; out_unlock: rcu_read_unlock(); From a8897ed8523d4c9d782e282b18005a3779c92714 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Tue, 25 Mar 2025 22:41:52 +0000 Subject: [PATCH 1721/2157] sched_ext: create_dsq: Return -EEXIST on duplicate request create_dsq and therefore the scx_bpf_create_dsq kfunc currently silently ignore duplicate entries. As a sched_ext scheduler is creating each DSQ for a different purpose this is surprising behaviour. Replace rhashtable_insert_fast which ignores duplicates with rhashtable_lookup_insert_fast that reports duplicates (though doesn't return their value). The rest of the code is structured correctly and this now returns -EEXIST. Tested by adding an extra scx_bpf_create_dsq to scx_simple. Previously this was ignored, now init fails with a -17 code. Also ran scx_lavd which continued to work well. Signed-off-by: Jake Hillion Acked-by: Andrea Righi Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 21575d39c376..b47be2729ece 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4171,8 +4171,8 @@ static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node) init_dsq(dsq, dsq_id); - ret = rhashtable_insert_fast(&dsq_hash, &dsq->hash_node, - dsq_hash_params); + ret = rhashtable_lookup_insert_fast(&dsq_hash, &dsq->hash_node, + dsq_hash_params); if (ret) { kfree(dsq); return ERR_PTR(ret); From f0c6eab5e45c529f449fbc595873719e00de6d79 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 25 Mar 2025 10:32:12 +0100 Subject: [PATCH 1722/2157] sched_ext: initialize built-in idle state before ops.init() A BPF scheduler may want to use the built-in idle cpumasks in ops.init() before the scheduler is fully initialized, either directly or through a BPF timer for example. However, this would result in an error, since the idle state has not been properly initialized yet. This can be easily verified by modifying scx_simple to call scx_bpf_get_idle_cpumask() in ops.init(): $ sudo scx_simple DEBUG DUMP =========================================================================== scx_simple[121] triggered exit kind 1024: runtime error (built-in idle tracking is disabled) ... Fix this by properly initializing the idle state before ops.init() is called. With this change applied: $ sudo scx_simple local=2 global=0 local=19 global=11 local=23 global=11 ... Fixes: d73249f88743d ("sched_ext: idle: Make idle static keys private") Signed-off-by: Andrea Righi Reviewed-by: Joel Fernandes Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- kernel/sched/ext_idle.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index b47be2729ece..66bcd40a28ca 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5361,6 +5361,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) */ cpus_read_lock(); + scx_idle_enable(ops); + if (scx_ops.init) { ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); if (ret) { @@ -5427,8 +5429,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (scx_ops.cpu_acquire || scx_ops.cpu_release) static_branch_enable(&scx_ops_cpu_preempt); - scx_idle_enable(ops); - /* * Lock out forks, cgroup on/offlining and moves before opening the * floodgate so that they don't wander into the operations prematurely. diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index b5f451e616d4..cb343ca889e0 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -721,14 +721,14 @@ static void reset_idle_masks(struct sched_ext_ops *ops) void scx_idle_enable(struct sched_ext_ops *ops) { if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE)) - static_branch_enable(&scx_builtin_idle_enabled); + static_branch_enable_cpuslocked(&scx_builtin_idle_enabled); else - static_branch_disable(&scx_builtin_idle_enabled); + static_branch_disable_cpuslocked(&scx_builtin_idle_enabled); if (ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE) - static_branch_enable(&scx_builtin_idle_per_node); + static_branch_enable_cpuslocked(&scx_builtin_idle_per_node); else - static_branch_disable(&scx_builtin_idle_per_node); + static_branch_disable_cpuslocked(&scx_builtin_idle_per_node); #ifdef CONFIG_SMP reset_idle_masks(ops); From f23e9116ebb71b63fe9cec0dcac792aa9af30b0c Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 21 Mar 2025 13:52:31 +0300 Subject: [PATCH 1723/2157] drm/amd/pm: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c05d1c401572 ("drm/amd/swsmu: add aldebaran smu13 ip support (v3)") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 0915d6377613..ba5a9012dbd5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1226,7 +1226,7 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t tach_period; int ret; - if (!speed) + if (!speed || speed > UINT_MAX/8) return -EINVAL; ret = smu_v13_0_auto_fan_control(smu, 0); From 7d641c2b83275d3b0424127b2e0d2d0f7dd82aef Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 21 Mar 2025 13:52:32 +0300 Subject: [PATCH 1724/2157] drm/amd/pm: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b64625a303de ("drm/amd/pm: correct the address of Arcturus fan related registers") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 8aa61a9f7778..453952cdc353 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1267,6 +1267,9 @@ static int arcturus_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (!speed || speed > UINT_MAX/8) + return -EINVAL; + tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT, REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT), From 4e3d9508c056d7e0a56b58d5c81253e2a0d22b6c Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 21 Mar 2025 13:52:33 +0300 Subject: [PATCH 1725/2157] drm/amd/pm: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 031db09017da ("drm/amd/powerplay/vega20: enable fan RPM and pwm settings V2") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c index a3331ffb2daf..1b1c88590156 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c @@ -191,7 +191,7 @@ int vega20_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) uint32_t tach_period, crystal_clock_freq; int result = 0; - if (!speed) + if (!speed || speed > UINT_MAX/8) return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) { From 4b8c3c0d17c07f301011e2908fecd2ebdcfe3d1c Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 21 Mar 2025 14:08:15 +0300 Subject: [PATCH 1726/2157] drm/amd/pm: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c52dcf49195d ("drm/amd/pp: Avoid divide-by-zero in fan_ctrl_set_fan_speed_rpm") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c index 379012494da5..56423aedf3fa 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c @@ -307,10 +307,10 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) int result = 0; if (hwmgr->thermal_controller.fanInfo.bNoFan || - speed == 0 || + (!speed || speed > UINT_MAX/8) || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || (speed > hwmgr->thermal_controller.fanInfo.ulMaxRPM)) - return -1; + return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) result = vega10_fan_ctrl_stop_smc_fan_control(hwmgr); From 7c246a05df51c52fe0852ce56ba10c41e6ed1f39 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 21 Mar 2025 14:08:16 +0300 Subject: [PATCH 1727/2157] drm/amd/pm: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c52dcf49195d ("drm/amd/pp: Avoid divide-by-zero in fan_ctrl_set_fan_speed_rpm") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c index a8fc0fa44db6..ba5c1237fcfe 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c @@ -267,10 +267,10 @@ int smu7_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) if (hwmgr->thermal_controller.fanInfo.bNoFan || (hwmgr->thermal_controller.fanInfo. ucTachometerPulsesPerRevolution == 0) || - speed == 0 || + (!speed || speed > UINT_MAX/8) || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || (speed > hwmgr->thermal_controller.fanInfo.ulMaxRPM)) - return 0; + return -EINVAL; if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) smu7_fan_ctrl_stop_smc_fan_control(hwmgr); From b03f1810db7bf609a64b90704f11da46e3baa050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Paku=C5=82a?= Date: Tue, 25 Mar 2025 11:26:52 +0100 Subject: [PATCH 1728/2157] drm/amd/pm: Add zero RPM enabled OD setting support for SMU14.0.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook up zero RPM enable for 9070 and 9070 XT based on RDNA3 (smu 13.0.0 and 13.0.7) code. Tested on 9070 XT Hellhound Signed-off-by: Tomasz Pakuła Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.12.x Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index f7cfe1f35cae..82c2db972491 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -79,6 +79,7 @@ #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 +#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -1052,6 +1053,10 @@ static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanMinimumPwm; od_max_setting = overdrive_upperlimits->FanMinimumPwm; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: + od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; + od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1330,6 +1335,24 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_ENABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmEnable); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -2270,7 +2293,9 @@ static void smu_v14_0_2_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; } static int smu_v14_0_2_get_overdrive_table(struct smu_context *smu, @@ -2349,6 +2374,8 @@ static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanTargetTemperature; user_od_table->OverDriveTable.FanMinimumPwm = user_od_table_bak.OverDriveTable.FanMinimumPwm; + user_od_table->OverDriveTable.FanZeroRpmEnable = + user_od_table_bak.OverDriveTable.FanZeroRpmEnable; } smu_v14_0_2_set_supported_od_feature_mask(smu); @@ -2396,6 +2423,11 @@ static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + od_table->OverDriveTable.FanZeroRpmEnable = + boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; case PP_OD_EDIT_ACOUSTIC_LIMIT: od_table->OverDriveTable.AcousticLimitRpmThreshold = boot_overdrive_table->OverDriveTable.AcousticLimitRpmThreshold; @@ -2678,6 +2710,27 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmEnable = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v14_0_2_od_restore_table_single(smu, input[0]); From 5f054ddead33c1622ea9c0c0aaf07c6843fc7ab0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 19 Mar 2025 11:58:31 -0500 Subject: [PATCH 1729/2157] drm/amd: Handle being compiled without SI or CIK support better If compiled without SI or CIK support but amdgpu tries to load it will run into failures with uninitialized callbacks. Show a nicer message in this case and fail probe instead. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4050 Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 44 ++++++++++++++----------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 23cfce5aa1fc..26bf896f1444 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1809,7 +1809,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1882,8 +1881,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU}, @@ -1966,7 +1963,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, -#endif /* topaz */ {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, @@ -2313,14 +2309,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENOTSUPP; } + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: #ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: + if (!amdgpu_si_support) { dev_info(&pdev->dev, "SI support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2328,16 +2324,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without SI support.\n"); + return -ENODEV; #endif + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: #ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: + if (!amdgpu_cik_support) { dev_info(&pdev->dev, "CIK support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2345,8 +2343,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without CIK support.\n"); + return -ENODEV; #endif + default: + break; + } adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) From f21e6d149b49c92f9e68aa0c76033e1e13d9f5da Mon Sep 17 00:00:00 2001 From: Leo Li Date: Mon, 10 Mar 2025 12:20:39 -0400 Subject: [PATCH 1730/2157] drm/amd/display: Increase vblank offdelay for PSR panels [Why] Depending on when the HW latching event (vupdate) of double-buffered registers happen relative to the PSR SDP (signals panel psr enter/exit) deadline, and how bad the Panel clock has drifted since the last ALPM off event, there can be up to 3 frames of delay between sending the PSR exit cmd to DMUB fw, and when the panel starts displaying live frames. This can manifest as micro-stuttering when userspace commit patterns cause rapid toggling of the DRM vblank counter, since PSR enter/exit is hooked up to DRM vblank disable/enable respectively. In the ideal world, the panel should present the live frame immediately on PSR exit cmd. But due to HW design and PSR limitations, immediate exit can only happen by chance, when: 1. PSR exit cmd is ack'd by FW before HW latching (vupdate) event, and 2. Panel's SDP deadline -- determined by it's PSR Start Delay in DPCD 71h -- is after the vupdate event. The PSR exit SDP can then be sent immediately after HW latches. Otherwise, we have to wait 1 frame. And 3. There is negligible drift between the panel's clock and source clock. Otherwise, there can be up to 1 frame of drift. Note that this delay is not expected with Panel Replay. [How] Since PSR power savings can be quite substantial, and there are a lot of systems in the wild with PSR panels, It'll be nice to have a middle ground that balances user experience with power savings. A simple way to achieve this is by extending the vblank offdelay, such that additional PSR exit delays will be less perceivable. We can set: 20/100 * offdelay_ms = 3_frames_ms => offdelay_ms = 5 * 3_frames_ms This ensures that `3_frames_ms` will only be experienced as a 20% delay on top how long the panel has been static, and thus make the delay less perceivable. If this ends up being too high of a percentage, it can be dropped further in a future change. Fixes: 537ef0f88897 ("drm/amd/display: use new vblank enable policy for DCN35+") Reviewed-by: Harry Wentland Signed-off-by: Leo Li Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bae83a129b5f..c75fcf726a44 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8707,14 +8707,39 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, int offdelay; if (acrtc_state) { - if (amdgpu_ip_version(adev, DCE_HWIP, 0) < - IP_VERSION(3, 5, 0) || - acrtc_state->stream->link->psr_settings.psr_version < - DC_PSR_VERSION_UNSUPPORTED || - !(adev->flags & AMD_IS_APU)) { - timing = &acrtc_state->stream->timing; + timing = &acrtc_state->stream->timing; - /* at least 2 frames */ + /* + * Depending on when the HW latching event of double-buffered + * registers happen relative to the PSR SDP deadline, and how + * bad the Panel clock has drifted since the last ALPM off + * event, there can be up to 3 frames of delay between sending + * the PSR exit cmd to DMUB fw, and when the panel starts + * displaying live frames. + * + * We can set: + * + * 20/100 * offdelay_ms = 3_frames_ms + * => offdelay_ms = 5 * 3_frames_ms + * + * This ensures that `3_frames_ms` will only be experienced as a + * 20% delay on top how long the display has been static, and + * thus make the delay less perceivable. + */ + if (acrtc_state->stream->link->psr_settings.psr_version < + DC_PSR_VERSION_UNSUPPORTED) { + offdelay = DIV64_U64_ROUND_UP((u64)5 * 3 * 10 * + timing->v_total * + timing->h_total, + timing->pix_clk_100hz); + config.offdelay_ms = offdelay ?: 30; + } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0) || + !(adev->flags & AMD_IS_APU)) { + /* + * Older HW and DGPU have issues with instant off; + * use a 2 frame offdelay. + */ offdelay = DIV64_U64_ROUND_UP((u64)20 * timing->v_total * timing->h_total, From 0389f2a3a23c9a2d4cf58d2ab0d3b283f19e8630 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 13 Mar 2025 20:07:13 -0400 Subject: [PATCH 1731/2157] Revert "drm/amd/display: dml2 soc dscclk use DPM table clk setting" [why] this dscclk use DCN defined per DPM level will cause a DCFCLK increase. needs to follow up. This reverts commit 15b959534a39530a21d378190557cc8d1eab7b09 Reviewed-by: Yihan Zhu Reviewed-by: Alvin Lee Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 70c39df62533..2061d43b92e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -590,11 +590,11 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; /* Dependent states. */ - p->out_states->state_array[i].dscclk_mhz = p->in_states->state_array[i].dscclk_mhz; p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; From 8058061ed9d6bc259d1e678607b07d259342c08f Mon Sep 17 00:00:00 2001 From: Brendan Tam Date: Fri, 14 Mar 2025 13:09:13 -0400 Subject: [PATCH 1732/2157] drm/amd/display: prevent hang on link training fail [Why] When link training fails, the phy clock will be disabled. However, in enable_streams, it is assumed that link training succeeded and the mux selects the phy clock, causing a hang when a register write is made. [How] When enable_stream is hit, check if link training failed. If it did, fall back to the ref clock to avoid a hang and keep the system in a recoverable state. Reviewed-by: Dillon Varone Signed-off-by: Brendan Tam Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 6 +++++- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 926c08e790c1..846c9c51f2d9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -3033,7 +3033,11 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) { + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + } else { + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } } else { if (dccg->funcs->enable_symclk_se) dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 8f5da0ded850..5489f3d431f6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -936,8 +936,11 @@ void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { dccg->funcs->set_dpstreamclk(dccg, DPREFCLK, tg->inst, dp_hpo_inst); - - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) { + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + } else { + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } } else { dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); From 704bc361e3a4ead1c0eb40acc255b636b788dc89 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Tue, 11 Mar 2025 09:43:38 -0400 Subject: [PATCH 1733/2157] drm/amd/display: Actually do immediate vblank disable [Why] The `vblank_config.offdelay` field follows the same semantics as the `drm_vblank_offdelay` parameter. Setting it to 0 will never disable vblank. [How] Set `offdelay` to a positive number. Fixes: e45b6716de4b ("drm/amd/display: use a more lax vblank enable policy for DCN35+") Reviewed-by: Harry Wentland Signed-off-by: Leo Li Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c75fcf726a44..d0d8ad5368c3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8747,6 +8747,8 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, config.offdelay_ms = offdelay ?: 30; } else { + /* offdelay_ms = 0 will never disable vblank */ + config.offdelay_ms = 1; config.disable_immediate = true; } From f3f05a0ec584855c53f2d95024e23259f3ee101d Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Fri, 21 Mar 2025 20:47:23 +0800 Subject: [PATCH 1734/2157] drm/amdgpu: Use correct gfx deferred error count In the case of parsing GFX deferred error from SMU corrected error channel, the error count should be set to 1 instead of parsing from MISC0 register, which is 0. Signed-off-by: Xiang Liu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 736398b0d16d..3caac4a1564a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -872,9 +872,10 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, break; case ACA_SMU_TYPE_CE: bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); - ret = aca_error_cache_log_bank_error(handle, &info, - bank->aca_err_type, - ACA_REG__MISC0__ERRCNT(misc0)); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + (bank->aca_err_type == ACA_ERROR_TYPE_CE) ? + ACA_REG__MISC0__ERRCNT(misc0) : + 1); break; default: return -EINVAL; From ee97326fb96505f20b17f38f54df731cd6496bc0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 24 Mar 2025 13:26:26 +0530 Subject: [PATCH 1735/2157] drm/amdgpu: Add NPS2 to DPX compatible mode Compute partition DPX is possible in NPS2 mode. Update the compatible modes for DPX. Signed-off-by: Lijo Lazar Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 3c07517be09a..ae071985f26e 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -473,7 +473,8 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, break; case AMDGPU_DPX_PARTITION_MODE: num_xcp = 2; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS2_PARTITION_MODE); break; case AMDGPU_TPX_PARTITION_MODE: num_xcp = 3; From cc11dffc14bd2322d92a896a639cc42145401515 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Tue, 25 Mar 2025 11:10:43 +0800 Subject: [PATCH 1736/2157] drm/amdgpu: Update ta ras block Update ta ra block to keep sync with RAS TA. Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 7 +++++++ drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 3 +++ 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bed2603ae4c4..ebf1f63d0442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -77,6 +77,7 @@ const char *ras_block_string[] = { "jpeg", "ih", "mpio", + "mmsch", }; const char *ras_mca_block_string[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 764e9fa0a914..927d6bff734a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -98,6 +98,7 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, + AMDGPU_RAS_BLOCK__MMSCH, AMDGPU_RAS_BLOCK__LAST, AMDGPU_RAS_BLOCK__ANY = -1 @@ -795,6 +796,12 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { return TA_RAS_BLOCK__VCN; case AMDGPU_RAS_BLOCK__JPEG: return TA_RAS_BLOCK__JPEG; + case AMDGPU_RAS_BLOCK__IH: + return TA_RAS_BLOCK__IH; + case AMDGPU_RAS_BLOCK__MPIO: + return TA_RAS_BLOCK__MPIO; + case AMDGPU_RAS_BLOCK__MMSCH: + return TA_RAS_BLOCK__MMSCH; default: WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); return TA_RAS_BLOCK__UMC; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index a3b5fda22432..8a3f326474e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -92,6 +92,9 @@ enum ta_ras_block { TA_RAS_BLOCK__MCA, TA_RAS_BLOCK__VCN, TA_RAS_BLOCK__JPEG, + TA_RAS_BLOCK__IH, + TA_RAS_BLOCK__MPIO, + TA_RAS_BLOCK__MMSCH, TA_NUM_BLOCK_MAX }; From aedc92be9621b31ccc90d79ee7f831944e6bfbef Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Mon, 24 Mar 2025 17:19:54 +0800 Subject: [PATCH 1737/2157] drm/amdgpu: Parse all deferred errors with UMC aca handle We should only increase the deferred errors in UMC block. Signed-off-by: Xiang Liu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 8 -------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +- 9 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index dc47f5fd4ea1..b4ad163f42a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -195,6 +195,10 @@ static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, { const struct aca_bank_ops *bank_ops = handle->bank_ops; + /* Parse all deferred errors with UMC aca handle */ + if (ACA_BANK_ERR_IS_DEFFERED(bank)) + return handle->hwip == ACA_HWIP_TYPE_UMC; + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6b180f1b33fd..38c88897e1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -80,14 +80,6 @@ struct ras_query_context; (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) -#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ - (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_CE) - -#define ACA_BANK_ERR_UE_DE_DECODE(bank) \ - (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_UE) - enum aca_reg_idx { ACA_REG_IDX_CTL = 0, ACA_REG_IDX_STATUS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 6029a799074d..477424472bbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1172,7 +1172,7 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban break; case ACA_SMU_TYPE_CE: count = ext_error_code == 6 ? count : 0ULL; - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 3caac4a1564a..780563a97d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -867,15 +867,13 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: - bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, - (bank->aca_err_type == ACA_ERROR_TYPE_CE) ? - ACA_REG__MISC0__ERRCNT(misc0) : - 1); + ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 5598a35f72af..a8ccae361ec7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1328,7 +1328,7 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index a54e7b929295..84cde1239ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -751,7 +751,7 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index dc94d58d33a6..11f9c07f1e8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2595,7 +2595,7 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 74f57b2d30a5..0e404c074975 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -85,7 +85,8 @@ bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_sta return (amdgpu_ras_is_poison_mode_supported(adev) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); + ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1))); } bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 7446ecc55714..3e176b4b7c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1965,7 +1965,7 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; From 510a16d9954811d814d667a25a4e23475e9150d6 Mon Sep 17 00:00:00 2001 From: "Jesse.zhang@amd.com" Date: Tue, 25 Mar 2025 15:01:19 +0800 Subject: [PATCH 1738/2157] Revert "drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA" this temporarily reverts commit 6ec04e38b2f6 ("drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA") it cause a regression. Signed-off-by: Jesse Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 77 ++++--------------- .../gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h | 70 ----------------- 2 files changed, 14 insertions(+), 133 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 11f9c07f1e8d..688a720bbbbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -31,7 +31,6 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" -#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -1291,71 +1290,21 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } -/* - * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value - * @vmid: The VMID to invalidate - * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) + +/** + * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA * - * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID - * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and - * L2 PDEs) are invalidated. - */ -static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, - uint32_t flush_type) -{ - u32 req = 0; - - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); - - return req; -} - -/* - * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA - * @ring: The SDMA ring - * @vmid: The VMID to flush - * @pd_addr: The page directory address + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * - * This function emits the necessary register writes and waits to perform a VM flush for the - * specified VMID. It updates the PTB address registers and issues a VM invalidation request - * using the specified VM invalidation engine. + * Update the page table base and flush the VM TLB + * using sDMA. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { - struct amdgpu_device *adev = ring->adev; - uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); - unsigned int eng = ring->vm_inv_eng; - struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; - - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + - (hub->ctx_addr_distance * vmid), - lower_32_bits(pd_addr)); - - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + - (hub->ctx_addr_distance * vmid), - upper_32_bits(pd_addr)); - /* - * Construct and emit the VM invalidation packet - */ - amdgpu_ring_write(ring, - SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | - SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | - SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | - SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | - SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); - amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -2177,7 +2126,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - 4 + 2 * 3 + + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2209,7 +2159,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - 4 + 2 * 3 + + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h index 3ca8a417c6d8..8de4ccce5e38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -64,9 +64,6 @@ #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 -/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ -#define SDMA_OP_VM_INVALIDATE 8 -#define SDMA_SUBOP_VM_INVALIDATE 4 /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 @@ -3334,72 +3331,5 @@ #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) -/* -** Definitions for SDMA_PKT_VM_INVALIDATION packet -*/ - -/*define for HEADER word*/ -/*define for op field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) - -/*define for sub_op field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 -#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) - -/*define for xcc0_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 -#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) - -/*define for xcc1_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 -#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) - -/*define for mmhub_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 -#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) - -/*define for INVALIDATEREQ word*/ -/*define for invalidatereq field*/ -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) - -/*define for ADDRESSRANGELO word*/ -/*define for addressrangelo field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) - -/*define for ADDRESSRANGEHI word*/ -/*define for invalidateack field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) - -/*define for addressrangehi field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) - -/*define for reserved field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) #endif /* __SDMA_PKT_OPEN_H_ */ From 1f86f4125e167aeb343a5b8136996c0569009c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 Mar 2025 16:15:12 +0100 Subject: [PATCH 1739/2157] drm/amdgpu: stop unmapping MQD for kernel queues v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This looks unnecessary and actually extremely harmful since using kmap() is not possible while inside the ring reset. Remove all the extra mapping and unmapping of the MQDs. v2: also fix debugfs v3: fix coding style typo Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 58 ++----------- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 88 +++---------------- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 88 +++---------------- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 102 ++++------------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 45 ++-------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 57 ++----------- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 59 +++---------- 7 files changed, 66 insertions(+), 431 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d55c8b7fdb59..59acdbfe28d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -608,59 +608,17 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - volatile u32 *mqd; - u32 *kbuf; - int r, i; - uint32_t value, result; + ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size); + void *from = ((u8 *)ring->mqd_ptr) + *pos; - if (*pos & 3 || size & 3) - return -EINVAL; + if (*pos > ring->mqd_size) + return 0; - kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; + if (copy_to_user(buf, from, bytes)) + return -EFAULT; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto err_free; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) - goto err_unreserve; - - /* - * Copy to local buffer to avoid put_user(), which might fault - * and acquire mmap_sem, under reservation_ww_class_mutex. - */ - for (i = 0; i < ring->mqd_size/sizeof(u32); i++) - kbuf[i] = mqd[i]; - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - result = 0; - while (size) { - if (*pos >= ring->mqd_size) - break; - - value = kbuf[*pos/4]; - r = put_user(value, (uint32_t *)buf); - if (r) - goto err_free; - buf += 4; - result += 4; - size -= 4; - *pos += 4; - } - - kfree(kbuf); - return result; - -err_unreserve: - amdgpu_bo_unreserve(ring->mqd_obj); -err_free: - kfree(kbuf); - return r; + *pos += bytes; + return bytes; } static const struct file_operations amdgpu_debugfs_mqd_fops = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6d514efb0a6d..a63ce747863f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6851,22 +6851,9 @@ static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) return r; } @@ -7173,55 +7160,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v10_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v10_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i], + false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) @@ -9579,20 +9535,9 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) if (r) return r; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - DRM_ERROR("fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kgq_init_queue(ring, true); if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + DRM_ERROR("fail to init kgq\n"); return r; } @@ -9649,20 +9594,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d8772cd6db63..f66ca639f391 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4115,22 +4115,9 @@ static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) return r; } @@ -4452,57 +4439,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v11_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; + gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; if (!amdgpu_async_gfx_ring) gfx_v11_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) @@ -6667,20 +6621,9 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) if (r) return r; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kgq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kgq\n"); return r; } @@ -6707,20 +6650,9 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } r = amdgpu_mes_map_legacy_queue(adev, ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index dceb5ad38862..b6889e32dd8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3001,37 +3001,19 @@ static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { - int r, i; - struct amdgpu_ring *ring; + int i, r; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) - goto done; + return r; } r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; + return r; - r = gfx_v12_0_cp_gfx_start(adev); - if (r) - goto done; - -done: - return r; + return gfx_v12_0_cp_gfx_start(adev); } static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -3344,57 +3326,25 @@ static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v12_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; + gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring); + adev->gfx.kiq[0].ring.sched.ready = true; return 0; } static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; if (!amdgpu_async_gfx_ring) gfx_v12_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v12_0_cp_resume(struct amdgpu_device *adev) @@ -5224,20 +5174,9 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kgq_init_queue(ring, true); if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kgq\n"); return r; } @@ -5264,20 +5203,9 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - DRM_ERROR("fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kcq_init_queue(ring, true); if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kcq\n"); return r; } r = amdgpu_mes_map_legacy_queue(adev, ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d116a2e2f469..bfedd487efc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4683,60 +4683,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v8_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v8_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (!r) { - r = gfx_v8_0_kcq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]); if (r) - goto done; + return r; } gfx_v8_0_set_mec_doorbell_range(adev); - r = gfx_v8_0_kiq_kcq_enable(adev); - if (r) - goto done; - -done: - return r; + return gfx_v8_0_kiq_kcq_enable(adev); } static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d345285ea885..1080e9198ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3890,55 +3890,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v9_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v9_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) @@ -7319,20 +7287,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)){ - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } spin_lock_irqsave(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 780563a97d20..53fbf6ca7cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2167,55 +2167,27 @@ static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[xcc_id].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id); return 0; } static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + struct amdgpu_ring *ring; + int i, r; gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + ring = &adev->gfx.compute_ring[i + xcc_id * + adev->gfx.num_compute_rings]; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, xcc_id); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, xcc_id); } static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) @@ -3587,20 +3559,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)){ - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } spin_lock_irqsave(&kiq->ring_lock, flags); From 5b3c08ae9ed324743f5f7286940d45caeb656e6e Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 26 Mar 2025 13:41:01 +0800 Subject: [PATCH 1740/2157] Remove unnecessary firmware version check for gc v9_4_2 GC v9_4_2 uses a new versioning scheme for CP firmware, making the warning ("CP firmware version too old, please update!") irrelevant. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1080e9198ad9..d7db4cb907ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1269,6 +1269,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.mec_fw_write_wait = false; if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) && ((adev->gfx.mec_fw_version < 0x000001a5) || (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || From 1eef87883c18fde23b6d45aed7ed111e13b786ce Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 17 Mar 2025 14:16:04 +0800 Subject: [PATCH 1741/2157] drm/amd/pm: Remove host limit metrics support Firmware algorithm changed and the values in this version are not accurate thereby remove host limit metric support for smu_v13_0_6, smu_v13_0_12 & smu_v13_0_14 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 682646068000..c478b3be37af 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -109,7 +109,6 @@ enum smu_v13_0_6_caps { SMU_CAP(OTHER_END_METRICS), SMU_CAP(SET_UCLK_MAX), SMU_CAP(PCIE_METRICS), - SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(MCA_DEBUG_MODE), SMU_CAP(PER_INST_METRICS), SMU_CAP(CTF_LIMIT), @@ -325,8 +324,6 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu) if (fw_ver >= 0x05550E00) smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); - if (fw_ver >= 0x05551000) - smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); if (fw_ver >= 0x05550B00) smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); if (fw_ver >= 0x5551200) @@ -342,7 +339,6 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) SMU_CAP(RMA_MSG), SMU_CAP(ACA_SYND), SMU_CAP(OTHER_END_METRICS), - SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(PER_INST_METRICS) }; uint32_t fw_ver = smu->smc_fw_version; @@ -387,8 +383,6 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); - if (fw_ver >= 0x04556F00) - smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); if (fw_ver >= 0x04556A00) smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); } else { @@ -408,8 +402,6 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); if (fw_ver < 0x00555600) smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); - if (pgm == 0 && fw_ver >= 0x557900) - smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || @@ -2674,13 +2666,6 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusyAcc, version)[inst]); - - if (smu_v13_0_6_cap_supported( - smu, SMU_CAP(HST_LIMIT_METRICS))) - gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = - SMUQ10_ROUND(GET_GPU_METRIC_FIELD - (GfxclkBelowHostLimitAcc, version) - [inst]); idx++; } } From 942de4ea6921a1d5adc4b92eb044647c7bcf314c Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 17 Mar 2025 14:17:51 +0800 Subject: [PATCH 1742/2157] drm/amd/pm: Update smu metrics table for smu_v13_0_6 Update smu metrics table to vesrion 0x10 for smu_v13_0_6 v2: Host metrics support removal moved to separate patch (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index f8ed45857878..d26f35119a12 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -127,7 +127,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xF +#define SMU_METRICS_TABLE_VERSION 0x10 // Unified metrics table for smu_v13_0_6 typedef struct __attribute__((packed, aligned(4))) { @@ -241,7 +241,10 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated //Total App Clock Counter - uint64_t GfxclkBelowHostLimitAcc[8]; + uint64_t GfxclkBelowHostLimitPptAcc[8]; + uint64_t GfxclkBelowHostLimitThmAcc[8]; + uint64_t GfxclkBelowHostLimitTotalAcc[8]; + uint64_t GfxclkLowUtilizationAcc[8]; } MetricsTableV0_t; // Metrics table for smu_v13_0_6 APUS From 27145f78f56a3178c4f9ffe51c4406d8dd0ca90c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 25 Mar 2025 11:42:08 +0530 Subject: [PATCH 1743/2157] drm/amdgpu: Prefer shadow rom when available Fetch VBIOS from shadow ROM when available before trying other methods like EFI method. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Fixes: 9c081c11c621 ("drm/amdgpu: Reorder to read EFI exported ROM first") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4066 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 34 +++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 75fcc521c171..00e96419fcda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -447,6 +447,13 @@ static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) return true; } +static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev) +{ + struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE]; + + return (res->flags & IORESOURCE_ROM_SHADOW); +} + static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { @@ -465,14 +472,27 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } + if (amdgpu_prefer_rom_resource(adev)) { + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } - if (amdgpu_read_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); - goto success; + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + } else { + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } } if (amdgpu_read_bios_from_rom(adev)) { From eddff9a58f187c72fce5493e1ed8b52c5b24e5e6 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 12 Mar 2025 14:07:49 +0800 Subject: [PATCH 1744/2157] drm/amd/pm: Add gpu_metrics_v1_8 Add new gpu_metrics_v1_8 to acquire below host limit counters Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 114 ++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 2 files changed, 117 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 9189dcb65188..2a9606118d89 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -341,6 +341,7 @@ enum pp_policy_soc_pstate { #define MAX_CLKS 4 #define NUM_VCN 4 #define NUM_JPEG_ENG 32 +#define NUM_JPEG_ENG_V1 40 #define MAX_XCC 8 #define NUM_XCP 8 struct seq_file; @@ -376,6 +377,20 @@ struct amdgpu_xcp_metrics_v1_1 { uint64_t gfx_below_host_limit_acc[MAX_XCC]; }; +struct amdgpu_xcp_metrics_v1_2 { + /* Utilization Instantaneous (%) */ + uint32_t gfx_busy_inst[MAX_XCC]; + uint16_t jpeg_busy[NUM_JPEG_ENG_V1]; + uint16_t vcn_busy[NUM_VCN]; + /* Utilization Accumulated (%) */ + uint64_t gfx_busy_acc[MAX_XCC]; + /* Total App Clock Counter Accumulated */ + uint64_t gfx_below_host_limit_ppt_acc[MAX_XCC]; + uint64_t gfx_below_host_limit_thm_acc[MAX_XCC]; + uint64_t gfx_low_utilization_acc[MAX_XCC]; + uint64_t gfx_below_host_limit_total_acc[MAX_XCC]; +}; + struct amd_pm_funcs { /* export for dpm on ci and si */ int (*pre_set_power_state)(void *handle); @@ -1090,6 +1105,105 @@ struct gpu_metrics_v1_7 { uint32_t pcie_lc_perf_other_end_recovery; }; +struct gpu_metrics_v1_8 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + + /* VRAM max bandwidthi (in GB/sec) at max memory clock */ + uint64_t mem_max_bandwidth; + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Accumulation cycle counter */ + uint32_t accumulation_counter; + + /* Accumulated throttler residencies */ + uint32_t prochot_residency_acc; + uint32_t ppt_residency_acc; + uint32_t socket_thm_residency_acc; + uint32_t vr_thm_residency_acc; + uint32_t hbm_thm_residency_acc; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* XGMI link status(active/inactive) */ + uint16_t xgmi_link_status[NUM_XGMI_LINKS]; + + uint16_t padding; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + /* Number of current partition */ + uint16_t num_partition; + + /* XCP metrics stats */ + struct amdgpu_xcp_metrics_v1_2 xcp_stats[NUM_XCP]; + + /* PCIE other end recovery counter */ + uint32_t pcie_lc_perf_other_end_recovery; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index d834d134ad2b..80eb1a03b3ca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1083,6 +1083,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 7): structure_size = sizeof(struct gpu_metrics_v1_7); break; + case METRICS_VERSION(1, 8): + structure_size = sizeof(struct gpu_metrics_v1_8); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; From 4161050d47e1b083a7e1b0b875c9907e1a6f1f1f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 09:35:02 -0400 Subject: [PATCH 1745/2157] drm/amdgpu/gfx11: fix num_mec GC11 only has 1 mec. Fixes: 3d879e81f0f9 ("drm/amdgpu: add init support for GFX11 (v2)") Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f66ca639f391..d57db42f9536 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1581,7 +1581,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; break; From dce8bd9137b88735dd0efc4e2693213d98c15913 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 12:09:11 -0400 Subject: [PATCH 1746/2157] drm/amdgpu/gfx12: fix num_mec GC12 only has 1 mec. Fixes: 52cb80c12e8a ("drm/amdgpu: Add gfx v12_0 ip block support (v6)") Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index b6889e32dd8f..e7b58e470292 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1355,7 +1355,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; break; From bb58e1579f431d42469b6aed0f03eff383ba6db5 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 26 Mar 2025 15:45:07 -0700 Subject: [PATCH 1747/2157] RISC-V: errata: Use medany for relocatable builds We're trying to mix non-PIC/PIE objects into the otherwise-PIE relocatable kernels, to avoid GOT/PLT references during early boot alternative resolution (which happens before the GOT/PLT are set up). riscv64-unknown-linux-gnu-ld: arch/riscv/errata/sifive/errata.o: relocation R_RISCV_HI20 against `tlb_flush_all_threshold' can not be used when making a shared object; recompile with -fPIC riscv64-unknown-linux-gnu-ld: arch/riscv/errata/thead/errata.o: relocation R_RISCV_HI20 against `riscv_cbom_block_size' can not be used when making a shared object; recompile with -fPIC Fixes: 8dc2a7e8027f ("riscv: Fix relocatable kernels with early alternatives using -fno-pie") Link: https://lore.kernel.org/r/20250326224506.27165-2-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index f0da9d7b39c3..bc6c77ba837d 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,5 +1,9 @@ ifdef CONFIG_RELOCATABLE -KBUILD_CFLAGS += -fno-pie +# We can't use PIC/PIE when handling early-boot errata parsing, as the kernel +# doesn't have a GOT setup at that point. So instead just use medany: it's +# usually position-independent, so it should be good enough for the errata +# handling. +KBUILD_CFLAGS += -fno-pie -mcmodel=medany endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY From bffada8201fc9933ba0974b76b6068d6b4557ef4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:53 -0700 Subject: [PATCH 1748/2157] riscv: Remove duplicate CONFIG_PAGE_OFFSET definition This definition is already provided by include/generated/autoconf.h, so it does not need to be provided on the command line. Signed-off-by: Samuel Holland Reviewed-by: Jesse Taube Link: https://lore.kernel.org/r/20241026171441.3047904-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 13fbc0f94238..600df90bc141 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -98,7 +98,6 @@ KBUILD_AFLAGS += -march=$(riscv-march-y) CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/') KBUILD_CFLAGS += -mno-save-restore -KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS += -mcmodel=medlow From 2c0391b29b27f315c1b4c29ffde66f50b29fab99 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:54 -0700 Subject: [PATCH 1749/2157] riscv: Allow NOMMU kernels to access all of RAM NOMMU kernels currently cannot access memory below the kernel link address. Remove this restriction by setting PAGE_OFFSET to the actual start of RAM, as determined from the devicetree. The kernel link address must be a constant, so keep using CONFIG_PAGE_OFFSET for that purpose. Signed-off-by: Samuel Holland Reviewed-by: Jesse Taube Link: https://lore.kernel.org/r/20241026171441.3047904-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 12 ++++-------- arch/riscv/include/asm/pgtable.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 125f5ecd9565..6409fd78ae6f 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -24,12 +24,9 @@ * When not using MMU this corresponds to the first free page in * physical memory (aligned on a page boundary). */ -#ifdef CONFIG_64BIT #ifdef CONFIG_MMU +#ifdef CONFIG_64BIT #define PAGE_OFFSET kernel_map.page_offset -#else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) -#endif /* * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so * define the PAGE_OFFSET value for SV48 and SV39. @@ -39,6 +36,9 @@ #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* CONFIG_64BIT */ +#else +#define PAGE_OFFSET ((unsigned long)phys_ram_base) +#endif /* CONFIG_MMU */ #ifndef __ASSEMBLY__ @@ -95,11 +95,7 @@ typedef struct page *pgtable_t; #define MIN_MEMBLOCK_ADDR 0 #endif -#ifdef CONFIG_MMU #define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base)) -#else -#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) -#endif /* CONFIG_MMU */ struct kernel_mapping { unsigned long page_offset; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 050fdc49b5ad..eb7b25ef556e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -12,7 +12,7 @@ #include #ifndef CONFIG_MMU -#define KERNEL_LINK_ADDR PAGE_OFFSET +#define KERNEL_LINK_ADDR _AC(CONFIG_PAGE_OFFSET, UL) #define KERN_VIRT_SIZE (UL(-1)) #else From 51b766c79a3d741fb97419c3da1c58fce5e66f0e Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:55 -0700 Subject: [PATCH 1750/2157] riscv: Support CONFIG_RELOCATABLE on NOMMU Move relocate_kernel() out of the CONFIG_MMU block so it can be called from the NOMMU version of setup_vm(). Set some offsets in kernel_map so relocate_kernel() does not need to be modified. Relocatable NOMMU kernels can be loaded to any physical memory address; they no longer depend on CONFIG_PAGE_OFFSET. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241026171441.3047904-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/pgtable.h | 4 ++ arch/riscv/mm/init.c | 82 +++++++++++++++++--------------- 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..62fb6d9560e5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1075,7 +1075,7 @@ config PARAVIRT_TIME_ACCOUNTING config RELOCATABLE bool "Build a relocatable kernel" - depends on MMU && 64BIT && !XIP_KERNEL + depends on 64BIT && !XIP_KERNEL select MODULE_SECTIONS if MODULES help This builds a kernel as a Position Independent Executable (PIE), diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index eb7b25ef556e..babd951222db 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -12,7 +12,11 @@ #include #ifndef CONFIG_MMU +#ifdef CONFIG_RELOCATABLE +#define KERNEL_LINK_ADDR UL(0) +#else #define KERNEL_LINK_ADDR _AC(CONFIG_PAGE_OFFSET, UL) +#endif #define KERN_VIRT_SIZE (UL(-1)) #else diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..82d14f94c996 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -323,6 +323,44 @@ static void __init setup_bootmem(void) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); } +#ifdef CONFIG_RELOCATABLE +extern unsigned long __rela_dyn_start, __rela_dyn_end; + +static void __init relocate_kernel(void) +{ + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; + /* + * This holds the offset between the linked virtual address and the + * relocated virtual address. + */ + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; + /* + * This holds the offset between kernel linked virtual address and + * physical address. + */ + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; + + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { + Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf64_Addr relocated_addr = rela->r_addend; + + if (rela->r_info != R_RISCV_RELATIVE) + continue; + + /* + * Make sure to not relocate vdso symbols like rt_sigreturn + * which are linked from the address 0 in vmlinux since + * vdso symbol addresses are actually used as an offset from + * mm->context.vdso in VDSO_OFFSET macro. + */ + if (relocated_addr >= KERNEL_LINK_ADDR) + relocated_addr += reloc_offset; + + *(Elf64_Addr *)addr = relocated_addr; + } +} +#endif /* CONFIG_RELOCATABLE */ + #ifdef CONFIG_MMU struct pt_alloc_ops pt_ops __meminitdata; @@ -893,44 +931,6 @@ static __init void set_satp_mode(uintptr_t dtb_pa) #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif -#ifdef CONFIG_RELOCATABLE -extern unsigned long __rela_dyn_start, __rela_dyn_end; - -static void __init relocate_kernel(void) -{ - Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; - /* - * This holds the offset between the linked virtual address and the - * relocated virtual address. - */ - uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; - /* - * This holds the offset between kernel linked virtual address and - * physical address. - */ - uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; - - for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { - Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); - Elf64_Addr relocated_addr = rela->r_addend; - - if (rela->r_info != R_RISCV_RELATIVE) - continue; - - /* - * Make sure to not relocate vdso symbols like rt_sigreturn - * which are linked from the address 0 in vmlinux since - * vdso symbol addresses are actually used as an offset from - * mm->context.vdso in VDSO_OFFSET macro. - */ - if (relocated_addr >= KERNEL_LINK_ADDR) - relocated_addr += reloc_offset; - - *(Elf64_Addr *)addr = relocated_addr; - } -} -#endif /* CONFIG_RELOCATABLE */ - #ifdef CONFIG_XIP_KERNEL static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) @@ -1378,6 +1378,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) { dtb_early_va = (void *)dtb_pa; dtb_early_pa = dtb_pa; + +#ifdef CONFIG_RELOCATABLE + kernel_map.virt_addr = (uintptr_t)_start; + kernel_map.phys_addr = (uintptr_t)_start; + relocate_kernel(); +#endif } static inline void setup_vm_final(void) From d073a571e68f42414f8f06f01b59f52224538a83 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:56 -0700 Subject: [PATCH 1751/2157] asm-generic: Always define Elf_Rel and Elf_Rela These definitions are useful for relocating the kernel image as well, regardless of the type of relocations used for modules. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241026171441.3047904-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/asm-generic/module.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h index 98e1541b72b7..a8622501b975 100644 --- a/include/asm-generic/module.h +++ b/include/asm-generic/module.h @@ -19,12 +19,8 @@ struct mod_arch_specific #define Elf_Dyn Elf64_Dyn #define Elf_Ehdr Elf64_Ehdr #define Elf_Addr Elf64_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf64_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf64_Rela -#endif #define ELF_R_TYPE(X) ELF64_R_TYPE(X) #define ELF_R_SYM(X) ELF64_R_SYM(X) @@ -36,12 +32,8 @@ struct mod_arch_specific #define Elf_Dyn Elf32_Dyn #define Elf_Ehdr Elf32_Ehdr #define Elf_Addr Elf32_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf32_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf32_Rela -#endif #define ELF_R_TYPE(X) ELF32_R_TYPE(X) #define ELF_R_SYM(X) ELF32_R_SYM(X) #endif From ea2bde36a46d5724c1b44d80cc9fafbd73c2ecf9 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:57 -0700 Subject: [PATCH 1752/2157] riscv: Support CONFIG_RELOCATABLE on riscv32 When adjusted to use the correctly-sized ELF types, relocate_kernel() works on riscv32 as well. The caveat about crossing an intermediate page table boundary does not apply to riscv32, since for Sv32 the early kernel mapping uses only PGD entries. Since KASLR is not yet supported on riscv32, this option is mostly useful for NOMMU. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241026171441.3047904-6-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/mm/init.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62fb6d9560e5..7d3d457045ee 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1075,7 +1075,7 @@ config PARAVIRT_TIME_ACCOUNTING config RELOCATABLE bool "Build a relocatable kernel" - depends on 64BIT && !XIP_KERNEL + depends on !XIP_KERNEL select MODULE_SECTIONS if MODULES help This builds a kernel as a Position Independent Executable (PIE), diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 82d14f94c996..d1494d48a70d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -20,15 +20,13 @@ #include #include #include -#ifdef CONFIG_RELOCATABLE -#include -#endif #include #include #include #include #include +#include #include #include #include @@ -328,7 +326,7 @@ extern unsigned long __rela_dyn_start, __rela_dyn_end; static void __init relocate_kernel(void) { - Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; + Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start; /* * This holds the offset between the linked virtual address and the * relocated virtual address. @@ -340,9 +338,9 @@ static void __init relocate_kernel(void) */ uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; - for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { - Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); - Elf64_Addr relocated_addr = rela->r_addend; + for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) { + Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf_Addr relocated_addr = rela->r_addend; if (rela->r_info != R_RISCV_RELATIVE) continue; @@ -356,7 +354,7 @@ static void __init relocate_kernel(void) if (relocated_addr >= KERNEL_LINK_ADDR) relocated_addr += reloc_offset; - *(Elf64_Addr *)addr = relocated_addr; + *(Elf_Addr *)addr = relocated_addr; } } #endif /* CONFIG_RELOCATABLE */ @@ -1174,7 +1172,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * makes the kernel cross over a PUD_SIZE boundary, raise a bug * since a part of the kernel would not get mapped. */ - BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); + if (IS_ENABLED(CONFIG_64BIT)) + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); relocate_kernel(); #endif From e1cf2d009b00fd890dbbcb8b79613ff538732559 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:58 -0700 Subject: [PATCH 1753/2157] riscv: Remove CONFIG_PAGE_OFFSET The current definition of CONFIG_PAGE_OFFSET is problematic for a couple of reasons: 1) The value is misleading for normal 64-bit kernels, where it is overridden at runtime if Sv48 or Sv39 is chosen. This is especially the case for XIP kernels, which always use Sv39. 2) The option is not user-visible, but for NOMMU kernels it must be a valid RAM address, and for !RELOCATABLE it must additionally be the exact address where the kernel is loaded. Fix both of these by removing the option. 1) For MMU kernels, drop the indirection through Kconfig. Additionally, for XIP, drop the indirection through kernel_map. 2) For NOMMU kernels, use the user-visible physical RAM base if provided. Otherwise, force the kernel to be relocatable. Signed-off-by: Samuel Holland Reviewed-by: Jesse Taube Link: https://lore.kernel.org/r/20241026171441.3047904-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 8 +------- arch/riscv/include/asm/page.h | 15 ++++++++------- arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/mm/init.c | 8 ++------ 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7d3d457045ee..551b9a9b243f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -201,6 +201,7 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_MSI if PCI + select RELOCATABLE if !MMU && !PHYS_RAM_BASE_FIXED select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_APLIC select RISCV_IMSIC @@ -288,13 +289,6 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config PAGE_OFFSET - hex - default 0x80000000 if !MMU && RISCV_M_MODE - default 0x80200000 if !MMU - default 0xc0000000 if 32BIT - default 0xff60000000000000 if 64BIT - config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 6409fd78ae6f..572a141ddecd 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -26,15 +26,16 @@ */ #ifdef CONFIG_MMU #ifdef CONFIG_64BIT -#define PAGE_OFFSET kernel_map.page_offset -/* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so - * define the PAGE_OFFSET value for SV48 and SV39. - */ +#define PAGE_OFFSET_L5 _AC(0xff60000000000000, UL) #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) +#ifdef CONFIG_XIP_KERNEL +#define PAGE_OFFSET PAGE_OFFSET_L3 #else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#define PAGE_OFFSET kernel_map.page_offset +#endif /* CONFIG_XIP_KERNEL */ +#else +#define PAGE_OFFSET _AC(0xc0000000, UL) #endif /* CONFIG_64BIT */ #else #define PAGE_OFFSET ((unsigned long)phys_ram_base) @@ -98,7 +99,6 @@ typedef struct page *pgtable_t; #define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base)) struct kernel_mapping { - unsigned long page_offset; unsigned long virt_addr; unsigned long virt_offset; uintptr_t phys_addr; @@ -112,6 +112,7 @@ struct kernel_mapping { uintptr_t xiprom; uintptr_t xiprom_sz; #else + unsigned long page_offset; unsigned long va_kernel_pa_offset; #endif }; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index babd951222db..ddebdfb4519a 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -15,7 +15,7 @@ #ifdef CONFIG_RELOCATABLE #define KERNEL_LINK_ADDR UL(0) #else -#define KERNEL_LINK_ADDR _AC(CONFIG_PAGE_OFFSET, UL) +#define KERNEL_LINK_ADDR _AC(CONFIG_PHYS_RAM_BASE, UL) #endif #define KERN_VIRT_SIZE (UL(-1)) #else diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d1494d48a70d..37af6513a50f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -859,6 +859,8 @@ static __init void set_satp_mode(uintptr_t dtb_pa) uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + kernel_map.page_offset = PAGE_OFFSET_L5; + if (satp_mode_cmdline == SATP_MODE_57) { disable_pgtable_l5(); } else if (satp_mode_cmdline == SATP_MODE_48) { @@ -1106,11 +1108,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; #ifdef CONFIG_XIP_KERNEL -#ifdef CONFIG_64BIT - kernel_map.page_offset = PAGE_OFFSET_L3; -#else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); -#endif kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); @@ -1125,7 +1122,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; #else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; From 359595b20a3617da9fe611f35f2197023bdbda62 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 11 Mar 2025 12:42:57 +0100 Subject: [PATCH 1754/2157] ACPI: NUMA: Use str_enabled_disabled() helper function Remove hard-coded strings by using the str_enabled_disabled() helper function. Acked-by: Bruno Faccini Signed-off-by: Thorsten Blum Signed-off-by: Rafael J. Wysocki --- drivers/acpi/numa/srat.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index ce815d7cb8f6..0a725e46d017 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -18,6 +18,7 @@ #include #include #include +#include static nodemask_t nodes_found_map = NODE_MASK_NONE; @@ -188,8 +189,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", p->apic_id, p->local_sapic_eid, p->proximity_domain_lo, - (p->flags & ACPI_SRAT_CPU_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_CPU_ENABLED)); } break; @@ -201,8 +201,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) (unsigned long long)p->base_address, (unsigned long long)p->length, p->proximity_domain, - (p->flags & ACPI_SRAT_MEM_ENABLED) ? - "enabled" : "disabled", + str_enabled_disabled(p->flags & ACPI_SRAT_MEM_ENABLED), (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? " hot-pluggable" : "", (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ? @@ -217,8 +216,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n", p->apic_id, p->proximity_domain, - (p->flags & ACPI_SRAT_CPU_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_CPU_ENABLED)); } break; @@ -229,8 +227,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", p->acpi_processor_uid, p->proximity_domain, - (p->flags & ACPI_SRAT_GICC_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_GICC_ENABLED)); } break; @@ -248,8 +245,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) *(u16 *)(&p->device_handle[0]), *(u16 *)(&p->device_handle[2]), p->proximity_domain, - (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)); } else { /* * In this case we can rely on the device having a @@ -259,8 +255,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) (char *)(&p->device_handle[0]), (char *)(&p->device_handle[8]), p->proximity_domain, - (p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)); } } break; @@ -272,8 +267,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", p->acpi_processor_uid, p->proximity_domain, - (p->flags & ACPI_SRAT_RINTC_ENABLED) ? - "enabled" : "disabled"); + str_enabled_disabled(p->flags & ACPI_SRAT_RINTC_ENABLED)); } break; From 23f00807619d15063d676218f36c5dfeda1eb420 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 25 Mar 2025 11:02:26 +0200 Subject: [PATCH 1755/2157] rtnetlink: Allocate vfinfo size for VF GUIDs when supported Commit 30aad41721e0 ("net/core: Add support for getting VF GUIDs") added support for getting VF port and node GUIDs in netlink ifinfo messages, but their size was not taken into consideration in the function that allocates the netlink message, causing the following warning when a netlink message is filled with many VF port and node GUIDs: # echo 64 > /sys/bus/pci/devices/0000\:08\:00.0/sriov_numvfs # ip link show dev ib0 RTNETLINK answers: Message too long Cannot send link get request: Message too long Kernel warning: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1930 at net/core/rtnetlink.c:4151 rtnl_getlink+0x586/0x5a0 Modules linked in: xt_conntrack xt_MASQUERADE nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay mlx5_ib macsec mlx5_core tls rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm iw_cm ib_ipoib fuse ib_cm ib_core CPU: 2 UID: 0 PID: 1930 Comm: ip Not tainted 6.14.0-rc2+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:rtnl_getlink+0x586/0x5a0 Code: cb 82 e8 3d af 0a 00 4d 85 ff 0f 84 08 ff ff ff 4c 89 ff 41 be ea ff ff ff e8 66 63 5b ff 49 c7 07 80 4f cb 82 e9 36 fc ff ff <0f> 0b e9 16 fe ff ff e8 de a0 56 00 66 66 2e 0f 1f 84 00 00 00 00 RSP: 0018:ffff888113557348 EFLAGS: 00010246 RAX: 00000000ffffffa6 RBX: ffff88817e87aa34 RCX: dffffc0000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: ffff88817e87afb8 RBP: 0000000000000009 R08: ffffffff821f44aa R09: 0000000000000000 R10: ffff8881260f79a8 R11: ffff88817e87af00 R12: ffff88817e87aa00 R13: ffffffff8563d300 R14: 00000000ffffffa6 R15: 00000000ffffffff FS: 00007f63a5dbf280(0000) GS:ffff88881ee00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f63a5ba4493 CR3: 00000001700fe002 CR4: 0000000000772eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0xa5/0x230 ? rtnl_getlink+0x586/0x5a0 ? report_bug+0x22d/0x240 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x14/0x50 ? asm_exc_invalid_op+0x16/0x20 ? skb_trim+0x6a/0x80 ? rtnl_getlink+0x586/0x5a0 ? __pfx_rtnl_getlink+0x10/0x10 ? rtnetlink_rcv_msg+0x1e5/0x860 ? __pfx___mutex_lock+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? __pfx_lock_acquire+0x10/0x10 ? stack_trace_save+0x90/0xd0 ? filter_irq_stacks+0x1d/0x70 ? kasan_save_stack+0x30/0x40 ? kasan_save_stack+0x20/0x40 ? kasan_save_track+0x10/0x30 rtnetlink_rcv_msg+0x21c/0x860 ? entry_SYSCALL_64_after_hwframe+0x76/0x7e ? __pfx_rtnetlink_rcv_msg+0x10/0x10 ? arch_stack_walk+0x9e/0xf0 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd5/0x410 ? rcu_is_watching+0x34/0x60 netlink_rcv_skb+0xe0/0x210 ? __pfx_rtnetlink_rcv_msg+0x10/0x10 ? __pfx_netlink_rcv_skb+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? __pfx___netlink_lookup+0x10/0x10 ? lock_release+0x62/0x200 ? netlink_deliver_tap+0xfd/0x290 ? rcu_is_watching+0x34/0x60 ? lock_release+0x62/0x200 ? netlink_deliver_tap+0x95/0x290 netlink_unicast+0x31f/0x480 ? __pfx_netlink_unicast+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd5/0x410 netlink_sendmsg+0x369/0x660 ? lock_release+0x62/0x200 ? __pfx_netlink_sendmsg+0x10/0x10 ? import_ubuf+0xb9/0xf0 ? __import_iovec+0x254/0x2b0 ? lock_release+0x62/0x200 ? __pfx_netlink_sendmsg+0x10/0x10 ____sys_sendmsg+0x559/0x5a0 ? __pfx_____sys_sendmsg+0x10/0x10 ? __pfx_copy_msghdr_from_user+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? do_read_fault+0x213/0x4a0 ? rcu_is_watching+0x34/0x60 ___sys_sendmsg+0xe4/0x150 ? __pfx____sys_sendmsg+0x10/0x10 ? do_fault+0x2cc/0x6f0 ? handle_pte_fault+0x2e3/0x3d0 ? __pfx_handle_pte_fault+0x10/0x10 ? preempt_count_sub+0x14/0xc0 ? __down_read_trylock+0x150/0x270 ? __handle_mm_fault+0x404/0x8e0 ? __pfx___handle_mm_fault+0x10/0x10 ? lock_release+0x62/0x200 ? __rcu_read_unlock+0x65/0x90 ? rcu_is_watching+0x34/0x60 __sys_sendmsg+0xd5/0x150 ? __pfx___sys_sendmsg+0x10/0x10 ? __up_read+0x192/0x480 ? lock_release+0x62/0x200 ? __rcu_read_unlock+0x65/0x90 ? rcu_is_watching+0x34/0x60 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f63a5b13367 Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007fff8c726bc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000067b687c2 RCX: 00007f63a5b13367 RDX: 0000000000000000 RSI: 00007fff8c726c30 RDI: 0000000000000004 RBP: 00007fff8c726cb8 R08: 0000000000000000 R09: 0000000000000034 R10: 00007fff8c726c7c R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000000 R14: 00007fff8c726cd0 R15: 00007fff8c726cd0 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xd08/0x2830 softirqs last enabled at (0): [] copy_process+0xd08/0x2830 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace 0000000000000000 ]--- Thus, when calculating ifinfo message size, take VF GUIDs sizes into account when supported. Fixes: 30aad41721e0 ("net/core: Add support for getting VF GUIDs") Signed-off-by: Mark Zhang Reviewed-by: Maher Sanalla Signed-off-by: Mark Bloch Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250325090226.749730-1-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a24a30dfc2d..334db17be37d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1177,6 +1177,9 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_TX_DROPPED */ nla_total_size_64bit(sizeof(__u64))); } + if (dev->netdev_ops->ndo_get_vf_guid) + size += num_vfs * 2 * + nla_total_size(sizeof(struct ifla_vf_guid)); return size; } else return 0; From 67d1a8956d2d62fe6b4c13ebabb57806098511d8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 25 Mar 2025 10:58:41 +0100 Subject: [PATCH 1756/2157] rndis_host: Flag RNDIS modems as WWAN devices Set FLAG_WWAN instead of FLAG_ETHERNET for RNDIS interfaces on Mobile Broadband Modems, as opposed to regular Ethernet adapters. Otherwise NetworkManager gets confused, misjudges the device type, and wouldn't know it should connect a modem to get the device to work. What would be the result depends on ModemManager version -- older ModemManager would end up disconnecting a device after an unsuccessful probe attempt (if it connected without needing to unlock a SIM), while a newer one might spawn a separate PPP connection over a tty interface instead, resulting in a general confusion and no end of chaos. The only way to get this work reliably is to fix the device type and have good enough version ModemManager (or equivalent). Fixes: 63ba395cd7a5 ("rndis_host: support Novatel Verizon USB730L") Signed-off-by: Lubomir Rintel Link: https://patch.msgid.link/20250325095842.1567999-1-lkundrak@v3.sk Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 7b3739b29c8f..bb0bf1415872 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,6 +630,16 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; +static const struct driver_info wwan_rndis_info = { + .description = "Mobile Broadband RNDIS device", + .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .bind = rndis_bind, + .unbind = rndis_unbind, + .status = rndis_status, + .rx_fixup = rndis_rx_fixup, + .tx_fixup = rndis_tx_fixup, +}; + /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -666,9 +676,11 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Novatel Verizon USB730L */ + /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and + * Telit FN990A (RNDIS) + */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long) &rndis_info, + .driver_info = (unsigned long)&wwan_rndis_info, }, { }, // END }; From 2eb6c6a34cb1c22b09b219390cdff0f02cd90258 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 25 Mar 2025 10:54:27 -0700 Subject: [PATCH 1757/2157] net: move replay logic to tc_modify_qdisc Eric reports that by the time we call netdev_lock_ops after rtnl_unlock/rtnl_lock, the dev might point to an invalid device. As suggested by Jakub in [0], move rtnl lock/unlock and request_module outside of qdisc_create. This removes extra complexity with relocking the netdev. 0: https://lore.kernel.org/netdev/20250325032803.1542c15e@kernel.org/ Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Reported-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20250305163732.2766420-1-sdf@fomichev.me/T/#me8dfd778ea4c4463acab55644e3f9836bc608771 Signed-off-by: Stanislav Fomichev Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250325175427.3818808-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 73 +++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index defb05c1fba4..f74a097f54ae 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1267,38 +1267,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_MODULES - if (ops == NULL && kind != NULL) { - char name[IFNAMSIZ]; - if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { - /* We dropped the RTNL semaphore in order to - * perform the module load. So, even if we - * succeeded in loading the module we have to - * tell the caller to replay the request. We - * indicate this using -EAGAIN. - * We replay the request because the device may - * go away in the mean time. - */ - netdev_unlock_ops(dev); - rtnl_unlock(); - request_module(NET_SCH_ALIAS_PREFIX "%s", name); - rtnl_lock(); - netdev_lock_ops(dev); - ops = qdisc_lookup_ops(kind); - if (ops != NULL) { - /* We will try again qdisc_lookup_ops, - * so don't keep a reference. - */ - module_put(ops->owner); - err = -EAGAIN; - goto err_out; - } - } - } -#endif - - err = -ENOENT; if (!ops) { + err = -ENOENT; NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } @@ -1623,8 +1593,7 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], - struct tcmsg *tcm, - bool *replay) + struct tcmsg *tcm) { struct Qdisc *q = NULL; struct Qdisc *p = NULL; @@ -1789,13 +1758,8 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } - if (q == NULL) { - if (err == -EAGAIN) { - *replay = true; - return 0; - } + if (!q) return err; - } graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); @@ -1808,6 +1772,27 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static void request_qdisc_module(struct nlattr *kind) +{ + struct Qdisc_ops *ops; + char name[IFNAMSIZ]; + + if (!kind) + return; + + ops = qdisc_lookup_ops(kind); + if (ops) { + module_put(ops->owner); + return; + } + + if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { + rtnl_unlock(); + request_module(NET_SCH_ALIAS_PREFIX "%s", name); + rtnl_lock(); + } +} + /* * Create/change qdisc. */ @@ -1818,27 +1803,23 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct tcmsg *tcm; - bool replay; int err; -replay: - /* Reinit, just in case something touches this. */ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; + request_qdisc_module(tca[TCA_KIND]); + tcm = nlmsg_data(n); dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; - replay = false; netdev_lock_ops(dev); - err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); - if (replay) - goto replay; return err; } From 7eccc86e90f04a0d758d16c08627a620ac59604d Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Thu, 27 Mar 2025 15:46:50 +0000 Subject: [PATCH 1758/2157] ASoC: qdsp6: q6asm-dai: fix q6asm_dai_compr_set_params error path In case of attempts to compress playback something, for instance, when audio routing is not set up correctly, the audio DSP is left in inconsistent state because we are not doing the correct things in the error path of q6asm_dai_compr_set_params(). So, when routing is not set up and compress playback is attempted the following errors are present (simplified log): q6routing routing: Routing not setup for MultiMedia-1 Session q6asm-dai dais: Stream reg failed ret:-22 q6asm-dai dais: ASoC error (-22): at snd_soc_component_compr_set_params() on 17300000.remoteproc:glink-edge:apr:service@7:dais After setting the correct routing the compress playback will always fail: q6asm-dai dais: cmd = 0x10db3 returned error = 0x9 q6asm-dai dais: DSP returned error[9] q6asm-dai dais: q6asm_open_write failed q6asm-dai dais: ASoC error (-22): at snd_soc_component_compr_set_params() on 17300000.remoteproc:glink-edge:apr:service@7:dais 0x9 here means "Operation is already processed". The CMD_OPEN here was sent the second time hence DSP responds that it was already done. Turns out the CMD_CLOSE should be sent after the q6asm_open_write() succeeded but something failed after that, for instance, routing setup. Fix this by slightly reworking the error path in q6asm_dai_compr_set_params(). Tested on QRB5165 RB5 and SDM845 RB3 boards. Cc: stable@vger.kernel.org Fixes: 5b39363e54cc ("ASoC: q6asm-dai: prepare set params to accept profile change") Cc: Srinivas Kandagatla Cc: Vinod Koul Cc: Pierre-Louis Bossart Signed-off-by: Alexey Klimov Reviewed-by: Srinivas Kandagatla Link: https://patch.msgid.link/20250327154650.337404-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 045100c94352..a400c9a31fea 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -892,9 +892,7 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, if (ret < 0) { dev_err(dev, "q6asm_open_write failed\n"); - q6asm_audio_client_free(prtd->audio_client); - prtd->audio_client = NULL; - return ret; + goto open_err; } } @@ -903,7 +901,7 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, prtd->session_id, dir); if (ret) { dev_err(dev, "Stream reg failed ret:%d\n", ret); - return ret; + goto q6_err; } ret = __q6asm_dai_compr_set_codec_params(component, stream, @@ -911,7 +909,7 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, prtd->stream_id); if (ret) { dev_err(dev, "codec param setup failed ret:%d\n", ret); - return ret; + goto q6_err; } ret = q6asm_map_memory_regions(dir, prtd->audio_client, prtd->phys, @@ -920,12 +918,21 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, if (ret < 0) { dev_err(dev, "Buffer Mapping failed ret:%d\n", ret); - return -ENOMEM; + ret = -ENOMEM; + goto q6_err; } prtd->state = Q6ASM_STREAM_RUNNING; return 0; + +q6_err: + q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); + +open_err: + q6asm_audio_client_free(prtd->audio_client); + prtd->audio_client = NULL; + return ret; } static int q6asm_dai_compr_set_metadata(struct snd_soc_component *component, From 40369bfe717e96e26650eeecfa5a6363563df6e4 Mon Sep 17 00:00:00 2001 From: Han Xu Date: Wed, 26 Mar 2025 17:41:51 -0500 Subject: [PATCH 1759/2157] spi: fsl-qspi: use devm function instead of driver remove Driver use devm APIs to manage clk/irq/resources and register the spi controller, but the legacy remove function will be called first during device detach and trigger kernel panic. Drop the remove function and use devm_add_action_or_reset() for driver cleanup to ensure the release sequence. Trigger kernel panic on i.MX8MQ by echo 30bb0000.spi >/sys/bus/platform/drivers/fsl-quadspi/unbind Cc: stable@vger.kernel.org Fixes: 8fcb830a00f0 ("spi: spi-fsl-qspi: use devm_spi_register_controller") Reported-by: Kevin Hao Signed-off-by: Han Xu Reviewed-by: Frank Li Link: https://patch.msgid.link/20250326224152.2147099-1-han.xu@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-qspi.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 355e6a39fb41..5c59fddb32c1 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -844,6 +844,19 @@ static const struct spi_controller_mem_caps fsl_qspi_mem_caps = { .per_op_freq = true, }; +static void fsl_qspi_cleanup(void *data) +{ + struct fsl_qspi *q = data; + + /* disable the hardware */ + qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR); + qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER); + + fsl_qspi_clk_disable_unprep(q); + + mutex_destroy(&q->lock); +} + static int fsl_qspi_probe(struct platform_device *pdev) { struct spi_controller *ctlr; @@ -934,6 +947,10 @@ static int fsl_qspi_probe(struct platform_device *pdev) ctlr->dev.of_node = np; + ret = devm_add_action_or_reset(dev, fsl_qspi_cleanup, q); + if (ret) + goto err_destroy_mutex; + ret = devm_spi_register_controller(dev, ctlr); if (ret) goto err_destroy_mutex; @@ -953,19 +970,6 @@ static int fsl_qspi_probe(struct platform_device *pdev) return ret; } -static void fsl_qspi_remove(struct platform_device *pdev) -{ - struct fsl_qspi *q = platform_get_drvdata(pdev); - - /* disable the hardware */ - qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR); - qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER); - - fsl_qspi_clk_disable_unprep(q); - - mutex_destroy(&q->lock); -} - static int fsl_qspi_suspend(struct device *dev) { return 0; @@ -1003,7 +1007,6 @@ static struct platform_driver fsl_qspi_driver = { .pm = &fsl_qspi_pm_ops, }, .probe = fsl_qspi_probe, - .remove = fsl_qspi_remove, }; module_platform_driver(fsl_qspi_driver); From f06777cf2bbc21dd8c71d6e3906934e56b4e18e4 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Mon, 17 Mar 2025 10:55:07 +0000 Subject: [PATCH 1760/2157] ACPI: PNP: Add Intel OC Watchdog IDs to non-PNP device list Intel Over-Clocking Watchdogs are described in ACPI tables by both the generic PNP0C02 _CID and their ACPI _HID. The presence of the _CID then causes the PNP scan handler to attach to the watchdog, preventing the actual watchdog driver from binding. Address this by adding the ACPI _HIDs to the list of non-PNP devices, so that the PNP scan handler is bypassed. Note that these watchdogs can be described by multiple _HIDs for what seems to be identical hardware. This commit is not a complete list of all the possible watchdog ACPI _HIDs. Signed-off-by: Diogo Ivo Link: https://patch.msgid.link/20250317-ivo-intel_oc_wdt-v3-2-32c396f4eefd@siemens.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pnp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index 01abf26764b0..3f5a1840f573 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -355,8 +355,10 @@ static bool acpi_pnp_match(const char *idstr, const struct acpi_device_id **matc * device represented by it. */ static const struct acpi_device_id acpi_nonpnp_device_ids[] = { + {"INT3F0D"}, {"INTC1080"}, {"INTC1081"}, + {"INTC1099"}, {""}, }; From 2da31ea2a085cd189857f2db0f7b78d0162db87a Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 18 Mar 2025 17:09:02 +0100 Subject: [PATCH 1761/2157] ACPI: resource: Skip IRQ override on ASUS Vivobook 14 X1404VAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like the ASUS Vivobook X1504VAP and Vivobook X1704VAP, the ASUS Vivobook 14 X1404VAP has its keyboard IRQ (1) described as ActiveLow in the DSDT, which the kernel overrides to EdgeHigh breaking the keyboard. $ sudo dmidecode […] System Information Manufacturer: ASUSTeK COMPUTER INC. Product Name: ASUS Vivobook 14 X1404VAP_X1404VA […] $ grep -A 30 PS2K dsdt.dsl | grep IRQ -A 1 IRQ (Level, ActiveLow, Exclusive, ) {1} Add the X1404VAP to the irq1_level_low_skip_override[] quirk table to fix this. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219224 Cc: All applicable Signed-off-by: Paul Menzel Reviewed-by: Hans de Goede Tested-by: Anton Shyndin Link: https://patch.msgid.link/20250318160903.77107-1-pmenzel@molgen.mpg.de Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index b4cd14e7fa76..14c7bac4100b 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -440,6 +440,13 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + /* Asus Vivobook X1404VAP */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "X1404VAP"), + }, + }, { /* Asus Vivobook X1504VAP */ .matches = { From 2fa87c71d2adb4b82c105f9191e6120340feff00 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Mar 2025 22:04:50 +0100 Subject: [PATCH 1762/2157] ACPI: x86: Extend Lenovo Yoga Tab 3 quirk with skip GPIO event-handlers Depending on the secureboot signature on EFI\BOOT\BOOTX86.EFI the Lenovo Yoga Tab 3 UEFI will switch its OSID ACPI variable between 1 (Windows) and 4 (Android(GMIN)). In Windows mode a GPIO event handler gets installed for GPO1 pin 5, causing Linux' x86-android-tables code which deals with the general brokenness of this device's ACPI tables to fail to probe with: [ 17.853705] x86_android_tablets: error -16 getting GPIO INT33FF:01 5 [ 17.859623] x86_android_tablets x86_android_tablets: probe with driver which renders sound, the touchscreen, charging-management, battery-monitoring and more non functional. Add ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS to the existing quirks for this device to fix this. Reported-by: Agoston Lorincz Closes: https://lore.kernel.org/platform-driver-x86/CAMEzqD+DNXrAvUOHviB2O2bjtcbmo3xH=kunKr4nubuMLbb_0A@mail.gmail.com/ Cc: All applicable Fixes: fe820db35275 ("ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga Tab 3 Pro (YT3-X90F)") Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20250325210450.358506-1-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index 068c1612660b..4ee30c2897a2 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -374,7 +374,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | - ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), }, { /* Medion Lifetab S10346 */ From 3035a6dd2d4736738949600b8abebbdb181e86ef Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Tue, 25 Mar 2025 17:39:53 -0300 Subject: [PATCH 1763/2157] ACPI: platform_profile: Optimize _aggregate_choices() Choices aggregates passed to _aggregate_choices() are already filled with ones, therefore we can avoid copying a new bitmap on the first iteration. This makes setting the PLATFORM_PROFILE_LAST bit on aggregates unnecessary, so drop it as well. While at it, add a couple empty lines to improve style. Reviewed-by: Armin Wolf Reviewed-by: Mario Limonciello Signed-off-by: Kurt Borja Link: https://patch.msgid.link/20250325-pprof-opt-v2-1-736291e6e66b@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/platform_profile.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index 671407fc2bd4..ffbfd32f4cf1 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -245,7 +245,8 @@ static const struct class platform_profile_class = { /** * _aggregate_choices - Aggregate the available profile choices * @dev: The device - * @arg: struct aggregate_choices_data + * @arg: struct aggregate_choices_data, with it's aggregate member bitmap + * initially filled with ones * * Return: 0 on success, -errno on failure */ @@ -256,12 +257,10 @@ static int _aggregate_choices(struct device *dev, void *arg) struct platform_profile_handler *handler; lockdep_assert_held(&profile_lock); + handler = to_pprof_handler(dev); bitmap_or(tmp, handler->choices, handler->hidden_choices, PLATFORM_PROFILE_LAST); - if (test_bit(PLATFORM_PROFILE_LAST, data->aggregate)) - bitmap_copy(data->aggregate, tmp, PLATFORM_PROFILE_LAST); - else - bitmap_and(data->aggregate, tmp, data->aggregate, PLATFORM_PROFILE_LAST); + bitmap_and(data->aggregate, tmp, data->aggregate, PLATFORM_PROFILE_LAST); data->count++; return 0; @@ -305,7 +304,6 @@ static ssize_t platform_profile_choices_show(struct kobject *kobj, }; int err; - set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, &data, _aggregate_choices); @@ -422,7 +420,7 @@ static ssize_t platform_profile_store(struct kobject *kobj, i = sysfs_match_string(profile_names, buf); if (i < 0 || i == PLATFORM_PROFILE_CUSTOM) return -EINVAL; - set_bit(PLATFORM_PROFILE_LAST, data.aggregate); + scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { ret = class_for_each_device(&platform_profile_class, NULL, &data, _aggregate_choices); @@ -502,7 +500,6 @@ int platform_profile_cycle(void) enum platform_profile_option profile = PLATFORM_PROFILE_LAST; int err; - set_bit(PLATFORM_PROFILE_LAST, data.aggregate); scoped_cond_guard(mutex_intr, return -ERESTARTSYS, &profile_lock) { err = class_for_each_device(&platform_profile_class, NULL, &profile, _aggregate_profiles); From 8de544883456d1cd86dc971e21e6e764f393c7d0 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Wed, 26 Mar 2025 17:45:30 +0100 Subject: [PATCH 1764/2157] MAINTAINERS: configfs: add Andreas Hindborg as maintainer Remove Joel Becker as maintainer of configfs and add Andreas Hindborg as maintainer and Breno Leitao as reviewer. Also update the tree URL. Add an entry for Joel Becker to CREDITS. Acked-by: Breno Leitao Signed-off-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250326-configfs-maintainer-v1-1-b175189fa27b@kernel.org Signed-off-by: Christian Brauner --- CREDITS | 4 ++++ MAINTAINERS | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 53d11a46fd69..3ec620f7260b 100644 --- a/CREDITS +++ b/CREDITS @@ -317,6 +317,10 @@ S: Code 930.5, Goddard Space Flight Center S: Greenbelt, Maryland 20771 S: USA +N: Joel Becker +E: jlbec@evilplan.org +D: configfs + N: Adam Belay E: ambx1@neo.rr.com D: Linux Plug and Play Support diff --git a/MAINTAINERS b/MAINTAINERS index e79c8a1c58d8..81215f2ee4d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5867,9 +5867,10 @@ S: Maintained F: Documentation/security/snp-tdx-threat-model.rst CONFIGFS -M: Joel Becker +M: Andreas Hindborg +R: Breno Leitao S: Supported -T: git git://git.infradead.org/users/hch/configfs.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/a.hindborg/linux.git configfs-next F: fs/configfs/ F: include/linux/configfs.h F: samples/configfs/ From 9e6901f17a719650be376f04d742bdbe1d7094ce Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Mar 2025 18:17:44 -0600 Subject: [PATCH 1765/2157] fs: namespace: Avoid -Wflex-array-member-not-at-end warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Move the conflicting declaration to the end of the structure. Notice that `struct statmount` is a flexible structure --a structure that contains a flexible-array member. Fix the following warning: fs/namespace.c:5329:26: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/Z-SZKNdCiAkVJvqm@kspp Signed-off-by: Christian Brauner --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 6100e5b962a6..16292ff760c9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5326,8 +5326,10 @@ struct kstatmount { struct mnt_idmap *idmap; u64 mask; struct path root; - struct statmount sm; struct seq_file seq; + + /* Must be last --ends in a flexible-array member. */ + struct statmount sm; }; static u64 mnt_to_attr_flags(struct vfsmount *mnt) From 923936efeb74b3f42e5ad283a0b9110bda102601 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 28 Mar 2025 01:01:19 +0800 Subject: [PATCH 1766/2157] iomap: Fix conflicting values of iomap flags IOMAP_F_ATOMIC_BIO mistakenly took the same value as of IOMAP_F_SIZE_CHANGED in patch '370a6de7651b ("iomap: rework IOMAP atomic flags")'. Let's fix this and let's also create some more space for filesystem reported flags to avoid this in future. This patch makes the core iomap flags to start from bit 15, moving downwards. Note that "flags" member within struct iomap is of type u16. Fixes: 370a6de7651b ("iomap: rework IOMAP atomic flags") Signed-off-by: "Ritesh Harjani (IBM)" Link: https://lore.kernel.org/r/20250327170119.61045-1-ritesh.list@gmail.com Reviewed-by: John Garry Signed-off-by: Christian Brauner --- include/linux/iomap.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 02fe001feebb..68416b135151 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -78,6 +78,11 @@ struct vm_fault; #define IOMAP_F_ANON_WRITE (1U << 7) #define IOMAP_F_ATOMIC_BIO (1U << 8) +/* + * Flag reserved for file system specific usage + */ +#define IOMAP_F_PRIVATE (1U << 12) + /* * Flags set by the core iomap code during operations: * @@ -88,14 +93,8 @@ struct vm_fault; * range it covers needs to be remapped by the high level before the operation * can proceed. */ -#define IOMAP_F_SIZE_CHANGED (1U << 8) -#define IOMAP_F_STALE (1U << 9) - -/* - * Flags from 0x1000 up are for file system specific usage: - */ -#define IOMAP_F_PRIVATE (1U << 12) - +#define IOMAP_F_SIZE_CHANGED (1U << 14) +#define IOMAP_F_STALE (1U << 15) /* * Magic value for addr: From 5c5d0d7050286e14a6ca18b8d77fc7a34f701206 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 27 Mar 2025 10:18:19 +0800 Subject: [PATCH 1767/2157] dm-verity: support block number limits for different ioprio classes Calling verity_verify_io in bh for IO of all sizes is not suitable for embedded devices. From our tests, it can improve the performance of 4K synchronise random reads. For example: ./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \ --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \ --filename=/dev/block/mapper/xx-verity But it will degrade the performance of 512K synchronise sequential reads on our devices. For example: ./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \ --numjobs=8 --runtime=60 --time_based --group_reporting \ --filename=/dev/block/mapper/xx-verity A parameter array is introduced by this change. And users can modify the default config by /sys/module/dm_verity/parameters/use_bh_bytes. The default limits for NONE/RT/BE is set to 8192. The default limits for IDLE is set to 0. Call verity_verify_io directly when verity_end_io is not in hardirq. Signed-off-by: LongPing Wei Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/verity.rst | 11 +++++-- drivers/md/dm-verity-target.c | 29 +++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index fb3a045d0c72..8c3f1f967a3c 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -151,8 +151,15 @@ root_hash_sig_key_desc already in the secondary trusted keyring. try_verify_in_tasklet - If verity hashes are in cache, verify data blocks in kernel tasklet instead - of workqueue. This option can reduce IO latency. + If verity hashes are in cache and the IO size does not exceed the limit, + verify data blocks in bottom half instead of workqueue. This option can + reduce IO latency. The size limits can be configured via + /sys/module/dm_verity/parameters/use_bh_bytes. The four parameters + correspond to limits for IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn. + For example: + ,,, + 4096,4096,4096,4096 Theory of operation =================== diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index fc5ae123670b..40448a8c74b1 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -30,6 +30,7 @@ #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 +#define DM_VERITY_USE_BH_DEFAULT_BYTES 8192 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 @@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644); +static unsigned int dm_verity_use_bh_bytes[4] = { + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_NONE + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_RT + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_BE + 0 // IOPRIO_CLASS_IDLE +}; + +module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644); + static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled); /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ @@ -669,9 +679,17 @@ static void verity_bh_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(err)); } +static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio) +{ + return ioprio <= IOPRIO_CLASS_IDLE && + bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]); +} + static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; + unsigned short ioprio = IOPRIO_PRIO_CLASS(bio->bi_ioprio); + unsigned int bytes = io->n_blocks << io->v->data_dev_block_bits; if (bio->bi_status && (!verity_fec_is_enabled(io->v) || @@ -681,9 +699,14 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) { - INIT_WORK(&io->bh_work, verity_bh_work); - queue_work(system_bh_wq, &io->bh_work); + if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq && + verity_use_bh(bytes, ioprio)) { + if (in_hardirq() || irqs_disabled()) { + INIT_WORK(&io->bh_work, verity_bh_work); + queue_work(system_bh_wq, &io->bh_work); + } else { + verity_bh_work(&io->bh_work); + } } else { INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); From 31396626eaf0be0e8edc87b801fcd205016e42d9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 26 Mar 2025 23:26:58 +0100 Subject: [PATCH 1768/2157] dt-bindings: i2c: snps,designware-i2c: describe Renesas RZ/N1D variant So far, no differences are known, so it can fallback to the default compatible. Signed-off-by: Wolfram Sang Acked-by: Conor Dooley --- .../devicetree/bindings/i2c/snps,designware-i2c.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml index e5d05263c45a..bc5d0fb5abfe 100644 --- a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml @@ -27,6 +27,11 @@ properties: oneOf: - description: Generic Synopsys DesignWare I2C controller const: snps,designware-i2c + - description: Renesas RZ/N1D I2C controller + items: + - const: renesas,r9a06g032-i2c # RZ/N1D + - const: renesas,rzn1-i2c # RZ/N1 + - const: snps,designware-i2c - description: Microsemi Ocelot SoCs I2C controller items: - const: mscc,ocelot-i2c From 52c19f901318d32e01a36d975ea2fdd0a26f56e7 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 27 Mar 2025 12:00:12 +0100 Subject: [PATCH 1769/2157] MAINTAINERS: Add dedicated entries for phy_link_topology The infrastructure to handle multi-phy devices is fairly standalone. Add myself as maintainer for that part as well as the netlink uAPI that exposes it. Reviewed-by: Andrew Lunn Acked-by: Jakub Kicinski Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250327110013.106865-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd25139cc58..36511ed5bf6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16577,6 +16577,13 @@ F: net/ethtool/mm.c F: tools/testing/selftests/drivers/net/hw/ethtool_mm.sh K: ethtool_mm +NETWORKING [ETHTOOL PHY TOPOLOGY] +M: Maxime Chevallier +F: Documentation/networking/phy-link-topology.rst +F: drivers/net/phy/phy_link_topology.c +F: include/linux/phy_link_topology.h +F: net/ethtool/phy.c + NETWORKING [GENERAL] M: "David S. Miller" M: Eric Dumazet From fa37a8849634db2dd3545116873da8cf4b1e67c6 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 25 Mar 2025 09:32:37 -0700 Subject: [PATCH 1770/2157] net: mana: Switch to page pool for jumbo frames Frag allocators, such as netdev_alloc_frag(), were not designed to work for fragsz > PAGE_SIZE. So, switch to page pool for jumbo frames instead of using page frag allocators. This driver is using page pool for smaller MTUs already. Cc: stable@vger.kernel.org Fixes: 80f6215b450e ("net: mana: Add support for jumbo frame") Signed-off-by: Haiyang Zhang Reviewed-by: Long Li Reviewed-by: Shradha Gupta Link: https://patch.msgid.link/1742920357-27263-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 46 ++++--------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index e190d5ee5154..3ac73d97337e 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -661,30 +661,16 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu mpc->rxbpre_total = 0; for (i = 0; i < num_rxb; i++) { - if (mpc->rxbpre_alloc_size > PAGE_SIZE) { - va = netdev_alloc_frag(mpc->rxbpre_alloc_size); - if (!va) - goto error; + page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); + if (!page) + goto error; - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < - get_order(mpc->rxbpre_alloc_size)) { - put_page(page); - goto error; - } - } else { - page = dev_alloc_page(); - if (!page) - goto error; - - va = page_to_virt(page); - } + va = page_to_virt(page); da = dma_map_single(dev, va + mpc->rxbpre_headroom, mpc->rxbpre_datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { - put_page(virt_to_head_page(va)); + put_page(page); goto error; } @@ -1676,7 +1662,7 @@ static void mana_rx_skb(void *buf_va, bool from_pool, } static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, - dma_addr_t *da, bool *from_pool, bool is_napi) + dma_addr_t *da, bool *from_pool) { struct page *page; void *va; @@ -1687,21 +1673,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, if (rxq->xdp_save_va) { va = rxq->xdp_save_va; rxq->xdp_save_va = NULL; - } else if (rxq->alloc_size > PAGE_SIZE) { - if (is_napi) - va = napi_alloc_frag(rxq->alloc_size); - else - va = netdev_alloc_frag(rxq->alloc_size); - - if (!va) - return NULL; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < get_order(rxq->alloc_size)) { - put_page(page); - return NULL; - } } else { page = page_pool_dev_alloc_pages(rxq->page_pool); if (!page) @@ -1734,7 +1705,7 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, dma_addr_t da; void *va; - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; @@ -2176,7 +2147,7 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, if (mpc->rxbufs_pre) va = mana_get_rxbuf_pre(rxq, &da); else - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return -ENOMEM; @@ -2262,6 +2233,7 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; + pprm.order = get_order(rxq->alloc_size); rxq->page_pool = page_pool_create(&pprm); From fab05835688526f9de123d1e98e4d1f838da4e22 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Sun, 23 Mar 2025 14:28:26 +0200 Subject: [PATCH 1771/2157] net/mlx5e: SHAMPO, Make reserved size independent of page size When hw-gro is enabled, the maximum number of header entries that are needed per wqe (hd_per_wqe) is calculated based on the size of the reservations among other parameters. Miscalculation of the size of reservations leads to incorrect calculation of hd_per_wqe as 0, particularly in the case of large page size like in aarch64, this prevents the SHAMPO header from being correctly initialized in the device, ultimately causing the following cqe err that indicates a violation of PD. mlx5_core 0000:00:08.0 eth2: ERR CQE on RQ: 0x1180 mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x510, ci 0x0, qn 0x1180, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 00000000: 00 00 00 00 04 4a 00 00 00 00 00 00 20 00 93 32 00000010: 55 00 00 00 fb cc 00 00 00 00 00 00 07 18 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4a 00000030: 00 00 00 9a 93 00 32 04 00 00 00 00 00 00 da e1 Use the correct formula for calculating the size of reservations, precisely it shouldn't be dependent on page size, instead use the correct multiply of MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE. Fixes: e5ca8fb08ab2 ("net/mlx5e: Add control path for SHAMPO feature") Signed-off-by: Lama Kayal Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1742732906-166564-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index aa36670d9a36..58ec5e44aa7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -430,7 +430,7 @@ u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * - PAGE_SIZE; + MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); } @@ -834,7 +834,8 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * + MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); @@ -1043,7 +1044,8 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { - int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * + MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); From 2ea396448f26d0d7d66224cb56500a6789c7ed07 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 26 Mar 2025 17:32:36 +0900 Subject: [PATCH 1772/2157] net: usb: usbnet: restore usb%d name exception for local mac addresses commit 8a7d12d674ac ("net: usb: usbnet: fix name regression") assumed that local addresses always came from the kernel, but some devices hand out local mac addresses so we ended up with point-to-point devices with a mac set by the driver, renaming to eth%d when they used to be named usb%d. Userspace should not rely on device name, but for the sake of stability restore the local mac address check portion of the naming exception: point to point devices which either have no mac set by the driver or have a local mac handed out by the driver will keep the usb%d name. (some USB LTE modems are known to hand out a stable mac from the locally administered range; that mac appears to be random (different for mulitple devices) and can be reset with device-specific commands, so while such devices would benefit from getting a OUI reserved, we have to deal with these and might as well preserve the existing behavior to avoid breaking fragile openwrt configurations and such on upgrade.) Link: https://lkml.kernel.org/r/20241203130457.904325-1-asmadeus@codewreck.org Fixes: 8a7d12d674ac ("net: usb: usbnet: fix name regression") Cc: stable@vger.kernel.org Tested-by: Ahmed Naseef Signed-off-by: Dominique Martinet Acked-by: Oliver Neukum Link: https://patch.msgid.link/20250326-usbnet_rename-v2-1-57eb21fcff26@atmark-techno.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 44179f4e807f..aeab2308b150 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -178,6 +178,17 @@ int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress) } EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr); +static bool usbnet_needs_usb_name_format(struct usbnet *dev, struct net_device *net) +{ + /* Point to point devices which don't have a real MAC address + * (or report a fake local one) have historically used the usb%d + * naming. Preserve this.. + */ + return (dev->driver_info->flags & FLAG_POINTTOPOINT) != 0 && + (is_zero_ether_addr(net->dev_addr) || + is_local_ether_addr(net->dev_addr)); +} + static void intr_complete (struct urb *urb) { struct usbnet *dev = urb->context; @@ -1762,13 +1773,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (status < 0) goto out1; - // heuristic: "usb%d" for links we know are two-host, - // else "eth%d" when there's reasonable doubt. userspace - // can rename the link if it knows better. + /* heuristic: rename to "eth%d" if we are not sure this link + * is two-host (these links keep "usb%d") + */ if ((dev->driver_info->flags & FLAG_ETHER) != 0 && - ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || - /* somebody touched it*/ - !is_zero_ether_addr(net->dev_addr))) + !usbnet_needs_usb_name_format(dev, net)) strscpy(net->name, "eth%d", sizeof(net->name)); /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) From 2a07804170c716ef1a8c3bc6b2d622abcafb7bde Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 28 Mar 2025 15:43:26 +0800 Subject: [PATCH 1773/2157] ALSA: hda/tas2781: Upgrade calibratd-data writing code to support Alpha and Beta dsp firmware Since 2025, the firmware for tas2781 has been added more audio acoustic features, such as non-linear compensation, advanced battery guard, rattle-noise suppression, etc. The version was divided into two different series. Both series have a slight change on the calibrated data storage addresses, which becames flexible instead of fixed. In order to support new firwmares in time, the code have some related upgrades. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250328074326.796-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 9ed49b0dbe6b..29dc4f500580 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -558,28 +558,38 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv) static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) { - static const unsigned char page_array[CALIB_MAX] = { - 0x17, 0x18, 0x18, 0x13, 0x18, + struct calidata *cali_data = &tas_priv->cali_data; + struct cali_reg *r = &cali_data->cali_reg_array; + unsigned int cali_reg[CALIB_MAX] = { + TASDEVICE_REG(0, 0x17, 0x74), + TASDEVICE_REG(0, 0x18, 0x0c), + TASDEVICE_REG(0, 0x18, 0x14), + TASDEVICE_REG(0, 0x13, 0x70), + TASDEVICE_REG(0, 0x18, 0x7c), }; - static const unsigned char rgno_array[CALIB_MAX] = { - 0x74, 0x0c, 0x14, 0x70, 0x7c, - }; - int offset = 0; int i, j, rc; + int oft = 0; __be32 data; + if (tas_priv->dspbin_typ != TASDEV_BASIC) { + cali_reg[0] = r->r0_reg; + cali_reg[1] = r->invr0_reg; + cali_reg[2] = r->r0_low_reg; + cali_reg[3] = r->pow_reg; + cali_reg[4] = r->tlimit_reg; + } + for (i = 0; i < tas_priv->ndev; i++) { for (j = 0; j < CALIB_MAX; j++) { data = cpu_to_be32( - *(uint32_t *)&tas_priv->cali_data.data[offset]); + *(uint32_t *)&tas_priv->cali_data.data[oft]); rc = tasdevice_dev_bulk_write(tas_priv, i, - TASDEVICE_REG(0, page_array[j], rgno_array[j]), - (unsigned char *)&data, 4); + cali_reg[j], (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); - offset += 4; + oft += 4; } } } From 079a206f51f0c183be96a2f78f183dee42df1d4a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:47 +0200 Subject: [PATCH 1774/2157] seq_buf: Mark binary printing functions with __printf() attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binary printing functions are using printf() type of format, and compiler is not happy about them as is: lib/seq_buf.c:162:17: error: function ‘seq_buf_bprintf’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors by adding __printf() attribute. Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-2-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- include/linux/seq_buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fe41da005970..52791e070506 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -167,8 +167,8 @@ extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, const void *buf, size_t len, bool ascii); #ifdef CONFIG_BINARY_PRINTF -extern int -seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); +__printf(2, 0) +int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif void seq_buf_do_printk(struct seq_buf *s, const char *lvl); From 6b2c1e30ad6846624d935a7ea98dae60458126b8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:48 +0200 Subject: [PATCH 1775/2157] seq_file: Mark binary printing functions with __printf() attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binary printing functions are using printf() type of format, and compiler is not happy about them as is: fs/seq_file.c:418:35: error: function ‘seq_bprintf’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors by adding __printf() attribute. Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-3-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 2fb266ea69fa..d6ebf0596510 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -181,6 +181,7 @@ int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #ifdef CONFIG_BINARY_PRINTF +__printf(2, 0) void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary); #endif From 196a062641fe68d9bfe0ad36b6cd7628c99ad22c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:49 +0200 Subject: [PATCH 1776/2157] tracing: Mark binary printing functions with __printf() attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binary printing functions are using printf() type of format, and compiler is not happy about them as is: kernel/trace/trace.c:3292:9: error: function ‘trace_vbprintk’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] kernel/trace/trace_seq.c:182:9: error: function ‘trace_seq_bprintf’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors by adding __printf() attribute. While at it, move existing __printf() attributes from the implementations to the declarations. IT also fixes incorrect attribute parameters that are used for trace_array_printk(). Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-4-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- include/linux/trace.h | 4 ++-- include/linux/trace_seq.h | 8 ++++---- kernel/trace/trace.c | 11 +++-------- kernel/trace/trace.h | 16 +++++++++------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/linux/trace.h b/include/linux/trace.h index fdcd76b7be83..7eaad857dee0 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -72,8 +72,8 @@ static inline int unregister_ftrace_export(struct trace_export *export) static inline void trace_printk_init_buffers(void) { } -static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, - const char *fmt, ...) +static inline __printf(3, 4) +int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { return 0; } diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 1ef95c0287f0..a93ed5ac3226 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -88,8 +88,8 @@ extern __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern __printf(2, 0) void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); -extern void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int trace_print_seq(struct seq_file *m, struct trace_seq *s); extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt); @@ -113,8 +113,8 @@ static inline __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { } -static inline void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +static inline __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 826267f5b650..b343d32ed3b2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3340,10 +3340,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vbprintk); -__printf(3, 0) -static int -__trace_array_vprintk(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, va_list args) +static __printf(3, 0) +int __trace_array_vprintk(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, va_list args) { struct ring_buffer_event *event; int len = 0, size; @@ -3393,7 +3392,6 @@ __trace_array_vprintk(struct trace_buffer *buffer, return len; } -__printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { @@ -3423,7 +3421,6 @@ int trace_array_vprintk(struct trace_array *tr, * Note, trace_array_init_printk() must be called on @tr before this * can be used. */ -__printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { @@ -3468,7 +3465,6 @@ int trace_array_init_printk(struct trace_array *tr) } EXPORT_SYMBOL_GPL(trace_array_init_printk); -__printf(3, 4) int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { @@ -3484,7 +3480,6 @@ int trace_array_printk_buf(struct trace_buffer *buffer, return ret; } -__printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(printk_trace, ip, fmt, args); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4a6621e2a0fa..b4f12927f304 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -838,13 +838,15 @@ static inline void __init disable_tracing_selftest(const char *reason) extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); -extern int -trace_vbprintk(unsigned long ip, const char *fmt, va_list args); -extern int -trace_vprintk(unsigned long ip, const char *fmt, va_list args); -extern int -trace_array_vprintk(struct trace_array *tr, - unsigned long ip, const char *fmt, va_list args); + +__printf(2, 0) +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); +__printf(2, 0) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args); +__printf(3, 0) +int trace_array_vprintk(struct trace_array *tr, + unsigned long ip, const char *fmt, va_list args); +__printf(3, 4) int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); From 7bf819aa992faee980610e9021aec0c38aac53be Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:50 +0200 Subject: [PATCH 1777/2157] vsnprintf: Mark binary printing functions with __printf() attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binary printf() functions are using printf() type of format, and compiler is not happy about them as is: lib/vsprintf.c:3130:47: error: function ‘vbin_printf’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] lib/vsprintf.c:3298:33: error: function ‘bstr_printf’ might be a candidate for ‘gnu_printf’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors by adding __printf() attribute. Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-5-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- include/linux/string.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 0403a4ca4c11..01621ad0f598 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -336,8 +336,8 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s) #ifdef CONFIG_BINARY_PRINTF -int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +__printf(3, 0) int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +__printf(3, 0) int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, From a1aea76a4ad07045a18be3108dd3cc2e90c6ea96 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:51 +0200 Subject: [PATCH 1778/2157] vsnprintf: Drop unused const char fmt * in va_format() va_format() doesn't use original formatting string, drop that argument as it's done elsewhere in similar cases. Suggested-by: Rasmus Villemoes Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-6-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- lib/vsprintf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 734bd70c8b9b..4a04828916e2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1700,7 +1700,7 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, } static char *va_format(char *buf, char *end, struct va_format *va_fmt, - struct printf_spec spec, const char *fmt) + struct printf_spec spec) { va_list va; @@ -2469,7 +2469,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'U': return uuid_string(buf, end, ptr, spec, fmt); case 'V': - return va_format(buf, end, ptr, spec, fmt); + return va_format(buf, end, ptr, spec); case 'K': return restricted_pointer(buf, end, ptr, spec); case 'N': From bd67c1c3c353b6560f2983bdd23c665e26cf83f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 16:40:52 +0200 Subject: [PATCH 1779/2157] vsnprintf: Silence false positive GCC warning for va_format() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit va_format() is using vsnprintf(), and GCC compiler (Debian 14.2.0-17) is not happy about this: lib/vsprintf.c:1704:9: error: function ‘va_format’ might be a candidate for ‘gnu_print ’ format attribute [-Werror=suggest-attribute=format] Fix the compilation errors (`make W=1` when CONFIG_WERROR=y, which is default) by silencing the false positive GCC warning. Suggested-by: Rasmus Villemoes Signed-off-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250321144822.324050-7-andriy.shevchenko@linux.intel.com Signed-off-by: Petr Mladek --- lib/vsprintf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4a04828916e2..a2195bc81723 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1699,6 +1699,10 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" +#endif static char *va_format(char *buf, char *end, struct va_format *va_fmt, struct printf_spec spec) { @@ -1713,6 +1717,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt, return buf; } +#pragma GCC diagnostic pop static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, From 803f97298e7de9242eb677a1351dcafbbcc9117e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:42 -0700 Subject: [PATCH 1780/2157] iommufd: Extend IOMMU_GET_HW_INFO to report PASID capability PASID usage requires PASID support in both device and IOMMU. Since the iommu drivers always enable the PASID capability for the device if it is supported, this extends the IOMMU_GET_HW_INFO to report the PASID capability to userspace. Also, enhances the selftest accordingly. Link: https://patch.msgid.link/r/20250321180143.8468-5-yi.l.liu@intel.com Cc: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao #aarch64 platform Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 34 +++++++++++++++++++++++++++++++++- drivers/pci/ats.c | 33 +++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 3 +++ include/uapi/linux/iommufd.h | 14 +++++++++++++- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 1605f6c0e1ee..2307daad65c0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include @@ -1455,7 +1456,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1512,6 +1514,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c6b266c772c8..ec6c8dbdc5e9 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -538,4 +538,37 @@ int pci_max_pasids(struct pci_dev *pdev) return (1 << FIELD_GET(PCI_PASID_CAP_WIDTH, supported)); } EXPORT_SYMBOL_GPL(pci_max_pasids); + +/** + * pci_pasid_status - Check the PASID status + * @pdev: PCI device structure + * + * Returns a negative value when no PASID capability is present. + * Otherwise the value of the control register is returned. + * Status reported are: + * + * PCI_PASID_CTRL_ENABLE - PASID enabled + * PCI_PASID_CTRL_EXEC - Execute permission enabled + * PCI_PASID_CTRL_PRIV - Privileged mode enabled + */ +int pci_pasid_status(struct pci_dev *pdev) +{ + int pasid; + u16 ctrl; + + if (pdev->is_virtfn) + pdev = pci_physfn(pdev); + + pasid = pdev->pasid_cap; + if (!pasid) + return -EINVAL; + + pci_read_config_word(pdev, pasid + PCI_PASID_CTRL, &ctrl); + + ctrl &= PCI_PASID_CTRL_ENABLE | PCI_PASID_CTRL_EXEC | + PCI_PASID_CTRL_PRIV; + + return ctrl; +} +EXPORT_SYMBOL_GPL(pci_pasid_status); #endif /* CONFIG_PCI_PASID */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 6901804ec736..e2c04e58a997 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -612,9 +612,17 @@ enum iommu_hw_info_type { * IOMMU_HWPT_GET_DIRTY_BITMAP * IOMMU_HWPT_SET_DIRTY_TRACKING * + * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. + * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. */ enum iommufd_hw_capabilities { IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, + IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; /** @@ -630,6 +638,9 @@ enum iommufd_hw_capabilities { * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined * in the enum iommu_hw_capabilities. + * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. + * PCI devices turn to out_capabilities to check if the + * specific capabilities is supported or not. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -653,7 +664,8 @@ struct iommu_hw_info { __u32 data_len; __aligned_u64 data_uptr; __u32 out_data_type; - __u32 __reserved; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) From 6d9500bb1ff8c7f9c3ce199521c41aa41e8fd994 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:43 -0700 Subject: [PATCH 1781/2157] iommufd/selftest: Add coverage for reporting max_pasid_log2 via IOMMU_HW_INFO IOMMU_HW_INFO is extended to report max_pasid_log2, hence add coverage for it. Link: https://patch.msgid.link/r/20250321180143.8468-6-yi.l.liu@intel.com Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 18 ++++++++++++++++++ .../testing/selftests/iommu/iommufd_fail_nth.c | 3 ++- tools/testing/selftests/iommu/iommufd_utils.h | 17 +++++++++++++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index c39222b9869b..7eb7ee149f2b 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -342,12 +342,14 @@ FIXTURE(iommufd_ioas) uint32_t hwpt_id; uint32_t device_id; uint64_t base_iova; + uint32_t device_pasid_id; }; FIXTURE_VARIANT(iommufd_ioas) { unsigned int mock_domains; unsigned int memory_limit; + bool pasid_capable; }; FIXTURE_SETUP(iommufd_ioas) @@ -372,6 +374,12 @@ FIXTURE_SETUP(iommufd_ioas) IOMMU_TEST_DEV_CACHE_DEFAULT); self->base_iova = MOCK_APERTURE_START; } + + if (variant->pasid_capable) + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_PASID, + NULL, NULL, + &self->device_pasid_id); } FIXTURE_TEARDOWN(iommufd_ioas) @@ -387,6 +395,7 @@ FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain) FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain) { .mock_domains = 1, + .pasid_capable = true, }; FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain) @@ -752,6 +761,8 @@ TEST_F(iommufd_ioas, get_hw_info) } buffer_smaller; if (self->device_id) { + uint8_t max_pasid = 0; + /* Provide a zero-size user_buffer */ test_cmd_get_hw_info(self->device_id, NULL, 0); /* Provide a user_buffer with exact size */ @@ -766,6 +777,13 @@ TEST_F(iommufd_ioas, get_hw_info) * the fields within the size range still gets updated. */ test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + test_cmd_get_hw_info_pasid(self->device_id, &max_pasid); + ASSERT_EQ(0, max_pasid); + if (variant->pasid_capable) { + test_cmd_get_hw_info_pasid(self->device_pasid_id, + &max_pasid); + ASSERT_EQ(MOCK_PASID_WIDTH, max_pasid); + } } else { test_err_get_hw_info(ENOENT, self->device_id, &buffer_exact, sizeof(buffer_exact)); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 8fd6f4500090..e11ec4b121fc 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -666,7 +666,8 @@ TEST_FAIL_NTH(basic_fail_nth, device) &self->stdev_id, NULL, &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, + sizeof(info), NULL, NULL)) return -1; if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 27794b6f58fc..72f6636e5d90 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -758,7 +758,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) /* @data can be NULL */ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, - size_t data_len, uint32_t *capabilities) + size_t data_len, uint32_t *capabilities, + uint8_t *max_pasid) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { @@ -803,6 +804,9 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, assert(!info->flags); } + if (max_pasid) + *max_pasid = cmd.out_max_pasid_log2; + if (capabilities) *capabilities = cmd.out_capabilities; @@ -811,14 +815,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, #define test_cmd_get_hw_info(device_id, data, data_len) \ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL)) + data_len, NULL, NULL)) #define test_err_get_hw_info(_errno, device_id, data, data_len) \ EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL)) + data_len, NULL, NULL)) #define test_cmd_get_hw_capabilities(device_id, caps, mask) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ + 0, &caps, NULL)) + +#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ + 0, NULL, max_pasid)) static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) { From 41464a4628f3b15988bdc3dcd824c2e91064fc6f Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 24 Mar 2025 05:00:33 -0700 Subject: [PATCH 1782/2157] iommufd: Initialize the flags of vevent in iommufd_viommu_report_event() The vevent->header.flags is not initialized per allocation, hence the vevent read path may treat the vevent as lost_events_header wrongly. Use kzalloc() to alloc memory for new vevent. Fixes: e8e1ef9b77a7 ("iommufd/viommu: Add iommufd_viommu_report_event helper") Link: https://patch.msgid.link/r/20250324120034.5940-2-yi.l.liu@intel.com Signed-off-by: Yi Liu Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index a08ff0f37fc6..922cd1fe7ec2 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -102,7 +102,7 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, goto out_set_header; } - vevent = kmalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); + vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); if (!vevent) { rc = -ENOMEM; vevent = &veventq->lost_events_header; From 6fc85bbbeaeae39c61d230ce279c0b0d0952d3e3 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 24 Mar 2025 05:00:34 -0700 Subject: [PATCH 1783/2157] iommufd: Balance veventq->num_events inc/dec iommufd_veventq_fops_read() decrements veventq->num_events when a vevent is read out. However, the report path ony increments veventq->num_events for normal events. To be balanced, make the read path decrement num_events only for normal vevents. Fixes: e36ba5ab808e ("iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC") Link: https://patch.msgid.link/r/20250324120034.5940-3-yi.l.liu@intel.com Signed-off-by: Yi Liu Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/eventq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index 4c43ace8c725..f39cf0797347 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -385,7 +385,8 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, break; } spin_lock(&eventq->lock); - veventq->num_events--; + if (!vevent_for_lost_events_header(cur)) + veventq->num_events--; spin_unlock(&eventq->lock); done += cur->data_len; kfree(cur); From 3a2ffd3f3e1b6df4ed7b35f98565c1ad0fe54840 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 26 Mar 2025 22:28:46 -0700 Subject: [PATCH 1784/2157] iommu: Convert unreachable() to BUG() Bare unreachable() should be avoided as it generates undefined behavior, e.g. falling through to the next function. Use BUG() instead so the error is defined. Fixes the following warnings: drivers/iommu/dma-iommu.o: warning: objtool: iommu_dma_sw_msi+0x92: can't find jump dest instruction at .text+0x54d5 vmlinux.o: warning: objtool: iommu_dma_get_msi_page() falls through to next function __iommu_dma_unmap() Link: https://patch.msgid.link/r/0c801ae017ec078cacd39f8f0898fc7780535f85.1743053325.git.jpoimboe@kernel.org Reported-by: Randy Dunlap Closes: https://lore.kernel.org/314f8809-cd59-479b-97d7-49356bf1c8d1@infradead.org Reported-by: Paul E. McKenney Closes: https://lore.kernel.org/5dd1f35e-8ece-43b7-ad6d-86d02d2718f6@paulmck-laptop Fixes: 6aa63a4ec947 ("iommu: Sort out domain user data") Signed-off-by: Josh Poimboeuf Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2bd9f80a83fe..8cc5397d7dfc 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1762,7 +1762,7 @@ static size_t cookie_msi_granule(const struct iommu_domain *domain) case IOMMU_COOKIE_DMA_MSI: return PAGE_SIZE; default: - unreachable(); + BUG(); }; } @@ -1774,7 +1774,7 @@ static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) case IOMMU_COOKIE_DMA_MSI: return &domain->msi_cookie->msi_page_list; default: - unreachable(); + BUG(); }; } From 858c9c10c123b7b04bba12c689db675c18d48bda Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 28 Mar 2025 18:46:54 +0700 Subject: [PATCH 1785/2157] iommufd: Fix iommu_vevent_header tables markup Stephen Rothwell reports htmldocs warnings on iommufd_vevent_header tables: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: CRITICAL: Unexpected section title or transition. ------------------------------------------------------------------------- [docutils] WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -sphinx-version 8.1.3 ./include/uapi/linux/iommufd.h' processing failed with: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: (SEVERE/4) Unexpected section title or transition. ------------------------------------------------------------------------- These are because Sphinx confuses the tables for section headings. Fix the table markup to squash away above warnings. Fixes: e36ba5ab808e ("iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC") Link: https://patch.msgid.link/r/20250328114654.55840-1-bagasdotme@gmail.com Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250318213359.5dc56fd1@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e2c04e58a997..f29b6c44655e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1045,21 +1045,26 @@ enum iommu_veventq_flag { * [0, INT_MAX] where the following index of INT_MAX is 0 * * Each iommufd_vevent_header reports a sequence index of the following vEVENT: - * ------------------------------------------------------------------------- + * + * +----------------------+-------+----------------------+-------+---+-------+ * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | - * ------------------------------------------------------------------------- + * +----------------------+-------+----------------------+-------+---+-------+ + * * And this sequence index is expected to be monotonic to the sequence index of * the previous vEVENT. If two adjacent sequence indexes has a delta larger than * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: - * ------------------------------------------------------------------------- + * + * +-----+----------------------+-------+----------------------+-------+-----+ * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | - * ------------------------------------------------------------------------- + * +-----+----------------------+-------+----------------------+-------+-----+ + * * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header * would be added to the tail, and no data would follow this header: - * --------------------------------------------------------------------------- + * + * +--+----------------------+-------+-----------------------------------------+ * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | - * --------------------------------------------------------------------------- + * +--+----------------------+-------+-----------------------------------------+ */ struct iommufd_vevent_header { __u32 flags; @@ -1117,9 +1122,11 @@ struct iommu_vevent_arm_smmuv3 { * * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by * a type-specific data structure, in a normal case: - * ------------------------------------------------------------- - * || header0 | data0 | header1 | data1 | ... | headerN | dataN || - * ------------------------------------------------------------- + * + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to * struct iommufd_vevent_header). */ From 05026ea01e95ffdeb0e5ac8fb7fb1b551e3a8726 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Mar 2025 14:56:12 -0700 Subject: [PATCH 1786/2157] objtool, lkdtm: Obfuscate the do_nothing() pointer If execute_location()'s memcpy of do_nothing() gets inlined and unrolled by the compiler, it copies one word at a time: mov 0x0(%rip),%rax R_X86_64_PC32 .text+0x1374 mov %rax,0x38(%rbx) mov 0x0(%rip),%rax R_X86_64_PC32 .text+0x136c mov %rax,0x30(%rbx) ... Those .text references point to the middle of the function, causing objtool to complain about their lack of ENDBR. Prevent that by resolving the function pointer at runtime rather than build time. This fixes the following warning: drivers/misc/lkdtm/lkdtm.o: warning: objtool: execute_location+0x23: relocation to !ENDBR: .text+0x1378 Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Kees Cook Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Linus Torvalds Link: https://lore.kernel.org/r/30b9abffbddeb43c4f6320b1270fa9b4d74c54ed.1742852847.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503191453.uFfxQy5R-lkp@intel.com/ --- drivers/misc/lkdtm/perms.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 5b861dbff27e..6c24426104ba 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -28,6 +28,13 @@ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; +/* + * This is a pointer to do_nothing() which is initialized at runtime rather + * than build time to avoid objtool IBT validation warnings caused by an + * inlined unrolled memcpy() in execute_location(). + */ +static void __ro_after_init *do_nothing_ptr; + /* * This just returns to the caller. It is designed to be copied into * non-executable memory regions. @@ -65,13 +72,12 @@ static noinline __nocfi void execute_location(void *dst, bool write) { void (*func)(void); func_desc_t fdesc; - void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing_text); + pr_info("attempting ok execution at %px\n", do_nothing_ptr); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing_text, EXEC_SIZE); + memcpy(dst, do_nothing_ptr, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } @@ -267,6 +273,8 @@ static void lkdtm_ACCESS_NULL(void) void __init lkdtm_perms_init(void) { + do_nothing_ptr = dereference_function_descriptor(do_nothing); + /* Make sure we can write to __ro_after_init values during __init */ ro_after_init |= 0xAA; } From d9a595c3850ea4383628115df2bb533af3b29f4f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Mar 2025 18:30:37 -0700 Subject: [PATCH 1787/2157] objtool: Fix NULL printf() '%s' argument in builtin-check.c:save_argv() It's probably not the best idea to pass a string pointer to printf() right after confirming said pointer is NULL. Fix the typo and use argv[i] instead. Fixes: c5995abe1547 ("objtool: Improve error handling") Reported-by: Stephen Rothwell Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Tested-by: Stephen Rothwell Link: https://lore.kernel.org/r/a814ed8b08fb410be29498a20a5fbbb26e907ecf.1742952512.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/20250326103854.309e3c60@canb.auug.org.au --- tools/objtool/builtin-check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 2bdff910430e..e364ab6345d3 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -238,7 +238,7 @@ static void save_argv(int argc, const char **argv) for (int i = 0; i < argc; i++) { orig_argv[i] = strdup(argv[i]); if (!orig_argv[i]) { - WARN_GLIBC("strdup(%s)", orig_argv[i]); + WARN_GLIBC("strdup(%s)", argv[i]); exit(1); } }; From 69d41d6dafff0967565b971d950bd10443e4076c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Mar 2025 22:04:21 -0700 Subject: [PATCH 1788/2157] objtool: Fix segfault in ignore_unreachable_insn() Check 'prev_insn' before dereferencing it. Fixes: bd841d6154f5 ("objtool: Fix CONFIG_UBSAN_TRAP unreachable warnings") Reported-by: Arnd Bergmann Reported-by: Ingo Molnar Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/5df4ff89c9e4b9e788b77b0531234ffa7ba03e9e.1743136205.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/d86b4cc6-0b97-4095-8793-a7384410b8ab@app.fastmail.com Closes: https://lore.kernel.org/Z-V_rruKY0-36pqA@gmail.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3bf29923d5c0..29de1709ea00 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4037,7 +4037,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * It may also insert a UD2 after calling a __noreturn function. */ prev_insn = prev_insn_same_sec(file, insn); - if (prev_insn->dead_end && + if (prev_insn && prev_insn->dead_end && (insn->type == INSN_BUG || (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && insn->jump_dest->type == INSN_BUG))) From b5e2cc57f551a1a1e2c0ea36f77c1e26d3d13c35 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Mar 2025 22:04:22 -0700 Subject: [PATCH 1789/2157] objtool: Fix STACK_FRAME_NON_STANDARD for cold subfunctions The recent STACK_FRAME_NON_STANDARD refactoring forgot about .cold subfunctions. They must also be ignored. Fixes the following warning: drivers/gpu/drm/vmwgfx/vmwgfx_msg.o: warning: objtool: vmw_recv_msg.cold+0x0: unreachable instruction Fixes: c84301d706c5 ("objtool: Ignore entire functions rather than instructions") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/70a09ec0b0704398b2bbfb3153ce3d7cb8a381be.1743136205.git.jpoimboe@kernel.org --- tools/objtool/check.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 29de1709ea00..fff9d7a2947a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1014,6 +1014,8 @@ static int add_ignores(struct objtool_file *file) } func->ignore = true; + if (func->cfunc) + func->cfunc->ignore = true; } return 0; From ae958b12940bcd4ffa32c44684e4f2878bc5e140 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Mar 2025 22:04:23 -0700 Subject: [PATCH 1790/2157] objtool, drm/vmwgfx: Don't ignore vmw_send_msg() for ORC The following commit: 0b0d81e3b733 ("objtool, drm/vmwgfx: Fix "duplicate frame pointer save" warning") ... marked vmw_send_msg() STACK_FRAME_NON_STANDARD because it uses RBP in a non-standard way which violates frame pointer convention. That issue only affects the frame pointer unwinder. Remove the annotation for ORC. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/eff3102a7eeb77b4420fcb5e9d9cd9dd81d4514a.1743136205.git.jpoimboe@kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 1f15990d3934..1d9a42cbc88f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -289,7 +289,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg) return -EINVAL; } -STACK_FRAME_NON_STANDARD(vmw_send_msg); +STACK_FRAME_NON_STANDARD_FP(vmw_send_msg); /** From 7be11d34f660bfa6583f3d6e2032d5dcbff56081 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 28 Mar 2025 06:34:48 -0700 Subject: [PATCH 1791/2157] iommufd: Test attach before detaching pasid Check if the pasid has been attached before going further in the detach path. This fixes a crash found by syzkaller. Add a selftest as well. Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 668 Comm: repro Not tainted 6.14.0-next-20250325-eb4bc4b07f66 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org4 RIP: 0010:iommufd_hw_pagetable_detach+0x8a/0x4d0 Code: 00 00 00 44 89 ee 48 89 c7 48 89 75 c8 48 89 45 c0 e8 ca 55 17 02 48 89 c2 49 89 c4 48 b8 00 00 00b RSP: 0018:ffff888021b17b78 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff888014b5a000 RCX: ffff888021b17a64 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88801dad07fc RBP: ffff888021b17bc8 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: ffff88801dad0e58 R12: 0000000000000000 R13: 0000000000000001 R14: ffff888021b17e18 R15: ffff8880132d3008 FS: 00007fca52013600(0000) GS:ffff8880e3684000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200006c0 CR3: 00000000112d0005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: iommufd_device_detach+0x2a/0x2e0 iommufd_test+0x2f99/0x5cd0 iommufd_fops_ioctl+0x38e/0x520 __x64_sys_ioctl+0x1ba/0x220 x64_sys_call+0x122e/0x2150 do_syscall_64+0x6d/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e Link: https://patch.msgid.link/r/20250328133448.22052-1-yi.l.liu@intel.com Reported-by: Lai Yi Closes: https://lore.kernel.org/linux-iommu/Z+X0tzxhiaupJT7b@ly-workstation Fixes: c0e301b2978d ("iommufd/device: Add pasid_attach array to track per-PASID attach") Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 7 +++++++ tools/testing/selftests/iommu/iommufd.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 2307daad65c0..2111bad72c72 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -644,6 +644,11 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) mutex_lock(&igroup->lock); attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { + mutex_unlock(&igroup->lock); + return NULL; + } + hwpt = attach->hwpt; hwpt_paging = find_hwpt_paging(hwpt); @@ -1001,6 +1006,8 @@ void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) struct iommufd_hw_pagetable *hwpt; hwpt = iommufd_hw_pagetable_detach(idev, pasid); + if (!hwpt) + return; iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 7eb7ee149f2b..1a8e85afe9aa 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -3074,6 +3074,12 @@ TEST_F(iommufd_device_pasid, pasid_attach) uint32_t pasid = 100; uint32_t viommu_id; + /* + * Negative, detach pasid without attaching, this is not expected. + * But it should not result in failure anyway. + */ + test_cmd_pasid_detach(pasid); + /* Allocate two nested hwpts sharing one common parent hwpt */ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, IOMMU_HWPT_ALLOC_NEST_PARENT, From 8bde1033f9cfc1c08628255cc434c6cf39c9d9ba Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Fri, 28 Mar 2025 16:04:47 +0100 Subject: [PATCH 1792/2157] dm-integrity: fix non-constant-time tag verification When using dm-integrity in standalone mode with a keyed hmac algorithm, integrity tags are calculated and verified internally. Using plain memcmp to compare the stored and computed tags may leak the position of the first byte mismatch through side-channel analysis, allowing to brute-force expected tags in linear time (e.g., by counting single-stepping interrupts in confidential virtual machine environments). Co-developed-by: Luca Wilke Signed-off-by: Luca Wilke Signed-off-by: Jo Van Bulck Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-integrity.c | 45 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index f41e64f1dab2..516822007921 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -516,7 +517,7 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) dm_integrity_io_error(ic, "crypto_shash_digest", r); return r; } - if (memcmp(mac, actual_mac, mac_size)) { + if (crypto_memneq(mac, actual_mac, mac_size)) { dm_integrity_io_error(ic, "superblock mac", -EILSEQ); dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); return -EILSEQ; @@ -859,7 +860,7 @@ static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool if (likely(wr)) memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); else { - if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { + if (crypto_memneq(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { dm_integrity_io_error(ic, "journal mac", -EILSEQ); dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); } @@ -1401,10 +1402,9 @@ static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, unsigned int *metadata_offset, unsigned int total_size, int op) { -#define MAY_BE_FILLER 1 -#define MAY_BE_HASH 2 unsigned int hash_offset = 0; - unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); + unsigned char mismatch_hash = 0; + unsigned char mismatch_filler = !ic->discard; do { unsigned char *data, *dp; @@ -1425,7 +1425,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se if (op == TAG_READ) { memcpy(tag, dp, to_copy); } else if (op == TAG_WRITE) { - if (memcmp(dp, tag, to_copy)) { + if (crypto_memneq(dp, tag, to_copy)) { memcpy(dp, tag, to_copy); dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); } @@ -1433,29 +1433,30 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se /* e.g.: op == TAG_CMP */ if (likely(is_power_of_2(ic->tag_size))) { - if (unlikely(memcmp(dp, tag, to_copy))) - if (unlikely(!ic->discard) || - unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) { - goto thorough_test; - } + if (unlikely(crypto_memneq(dp, tag, to_copy))) + goto thorough_test; } else { unsigned int i, ts; thorough_test: ts = total_size; for (i = 0; i < to_copy; i++, ts--) { - if (unlikely(dp[i] != tag[i])) - may_be &= ~MAY_BE_HASH; - if (likely(dp[i] != DISCARD_FILLER)) - may_be &= ~MAY_BE_FILLER; + /* + * Warning: the control flow must not be + * dependent on match/mismatch of + * individual bytes. + */ + mismatch_hash |= dp[i] ^ tag[i]; + mismatch_filler |= dp[i] ^ DISCARD_FILLER; hash_offset++; if (unlikely(hash_offset == ic->tag_size)) { - if (unlikely(!may_be)) { + if (unlikely(mismatch_hash) && unlikely(mismatch_filler)) { dm_bufio_release(b); return ts; } hash_offset = 0; - may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); + mismatch_hash = 0; + mismatch_filler = !ic->discard; } } } @@ -1476,8 +1477,6 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se } while (unlikely(total_size)); return 0; -#undef MAY_BE_FILLER -#undef MAY_BE_HASH } struct flush_request { @@ -2076,7 +2075,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); - if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { + if (unlikely(crypto_memneq(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", logical_sector); dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum", @@ -2595,7 +2594,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w) bio_put(outgoing_bio); integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest); - if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) { + if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) { DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", ic->dev->bdev, dio->bio_details.bi_iter.bi_sector); atomic64_inc(&ic->number_of_mismatches); @@ -2634,7 +2633,7 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status char *mem = bvec_kmap_local(&bv); //memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT); integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest); - if (unlikely(memcmp(digest, dio->integrity_payload + pos, + if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) { kunmap_local(mem); dm_integrity_free_payload(dio); @@ -2911,7 +2910,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); - if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { + if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); } From 2de510fccbca3d1906b55f4be5f1de83fa2424ef Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 28 Mar 2025 16:17:45 +0100 Subject: [PATCH 1793/2157] dm-verity: fix prefetch-vs-suspend race There's a possible race condition in dm-verity - the prefetch work item may race with suspend and it is possible that prefetch continues to run while the device is suspended. Fix this by calling flush_workqueue and dm_bufio_client_reset in the postsuspend hook. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-verity-target.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 40448a8c74b1..3c427f18a04b 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -836,6 +836,13 @@ static int verity_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +static void verity_postsuspend(struct dm_target *ti) +{ + struct dm_verity *v = ti->private; + flush_workqueue(v->verify_wq); + dm_bufio_client_reset(v->bufio); +} + /* * Status: V (valid) or C (corruption found) */ @@ -1806,6 +1813,7 @@ static struct target_type verity_target = { .ctr = verity_ctr, .dtr = verity_dtr, .map = verity_map, + .postsuspend = verity_postsuspend, .status = verity_status, .prepare_ioctl = verity_prepare_ioctl, .iterate_devices = verity_iterate_devices, From 9c565428788fb9b49066f94ab7b10efc686a0a4c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 28 Mar 2025 16:19:07 +0100 Subject: [PATCH 1794/2157] dm-ebs: fix prefetch-vs-suspend race There's a possible race condition in dm-ebs - dm bufio prefetch may be in progress while the device is suspended. Fix this by calling dm_bufio_client_reset in the postsuspend hook. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-ebs-target.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 18ae45dcbfb2..b19b0142a690 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -390,6 +390,12 @@ static int ebs_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +static void ebs_postsuspend(struct dm_target *ti) +{ + struct ebs_c *ec = ti->private; + dm_bufio_client_reset(ec->bufio); +} + static void ebs_status(struct dm_target *ti, status_type_t type, unsigned int status_flags, char *result, unsigned int maxlen) { @@ -447,6 +453,7 @@ static struct target_type ebs_target = { .ctr = ebs_ctr, .dtr = ebs_dtr, .map = ebs_map, + .postsuspend = ebs_postsuspend, .status = ebs_status, .io_hints = ebs_io_hints, .prepare_ioctl = ebs_prepare_ioctl, From 89f43e1ce6f60d4f44399059595ac47f7a90a393 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Fri, 21 Mar 2025 15:00:19 +0800 Subject: [PATCH 1795/2157] arm64: mm: Correct the update of max_pfn Hotplugged memory can be smaller than the original memory. For example, on my target: root@genericarmv8:~# cat /sys/kernel/debug/memblock/memory 0: 0x0000000064005000..0x0000000064023fff 0 NOMAP 1: 0x0000000064400000..0x00000000647fffff 0 NOMAP 2: 0x0000000068000000..0x000000006fffffff 0 DRV_MNG 3: 0x0000000088800000..0x0000000094ffefff 0 NONE 4: 0x0000000094fff000..0x0000000094ffffff 0 NOMAP max_pfn will affect read_page_owner. Therefore, it should first compare and then select the larger value for max_pfn. Fixes: 8fac67ca236b ("arm64: mm: update max_pfn after memory hotplug") Cc: # 6.1.x Signed-off-by: Zhenhua Huang Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250321070019.1271859-1-quic_zhenhuah@quicinc.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3c54dea1303f..eaaa9b36202b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1358,7 +1358,8 @@ int arch_add_memory(int nid, u64 start, u64 size, __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); else { - max_pfn = PFN_UP(start + size); + /* Address of hotplugged memory can be smaller */ + max_pfn = max(max_pfn, PFN_UP(start + size)); max_low_pfn = max_pfn; } From a13bfa4fe0d6949cea14718df2d1fe84c38cd113 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Wed, 26 Mar 2025 11:04:47 +0000 Subject: [PATCH 1796/2157] arm64: mops: Do not dereference src reg for a set operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The source register is not used for SET* and reading it can result in a UBSAN out-of-bounds array access error, specifically when the MOPS exception is taken from a SET* sequence with XZR (reg 31) as the source. Architecturally this is the only case where a src/dst/size field in the ESR can be reported as 31. Prior to 2de451a329cf662b the code in do_el0_mops() was benign as the use of pt_regs_read_reg() prevented the out-of-bounds access. Fixes: 2de451a329cf ("KVM: arm64: Add handler for MOPS exceptions") Cc: # 6.12.x Cc: Kristina Martsenko Cc: Will Deacon Cc: stable@vger.kernel.org Reviewed-by: Marc Zyngier Signed-off-by: Keir Fraser Reviewed-by: Kristina Martšenko Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250326110448.3792396-1-keirf@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/traps.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index d780d1bd2eac..82cf1f879c61 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -109,10 +109,9 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); - unsigned long dst, src, size; + unsigned long dst, size; dst = regs->regs[dstreg]; - src = regs->regs[srcreg]; size = regs->regs[sizereg]; /* @@ -129,6 +128,7 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon } } else { /* CPY* instruction */ + unsigned long src = regs->regs[srcreg]; if (!(option_a ^ wrong_option)) { /* Format is from Option B */ if (regs->pstate & PSR_N_BIT) { From 0fff2aa96f6be6d33b584d73b16d3672fd30fd5c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 20 Mar 2025 14:34:05 -0400 Subject: [PATCH 1797/2157] arm64: mm: Drop dead code for pud special bit handling Keith Busch observed some incorrect macros defined in arm64 code [1]. It turns out the two lines should never be needed and won't be exposed to anyone, because aarch64 doesn't select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD, hence ARCH_SUPPORTS_PUD_PFNMAP is always N. The only archs that support THP PUDs so far are x86 and powerpc. Instead of fixing the lines (with no way to test it..), remove the two lines that are in reality dead code, to avoid confusing readers. Fixes tag is attached to reflect where the wrong macros were introduced, but explicitly not copying stable, because there's no real issue to be fixed. So it's only about removing the dead code so far. [1] https://lore.kernel.org/all/Z9tDjOk-JdV_fCY4@kbusch-mbp.dhcp.thefacebook.com/#t Cc: Alex Williamson Cc: Donald Dutile Cc: Will Deacon Fixes: 3e509c9b03f9 ("mm/arm64: support large pfn mappings") Reported-by: Keith Busch Signed-off-by: Peter Xu Reviewed-by: Donald Dutile Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250320183405.12659-1-peterx@redhat.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 84f05f781a70..d3b538be1500 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -633,11 +633,6 @@ static inline pud_t pud_mkhuge(pud_t pud) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP -#define pud_special(pte) pte_special(pud_pte(pud)) -#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud))) -#endif - #define pmd_pgprot pmd_pgprot static inline pgprot_t pmd_pgprot(pmd_t pmd) { From e18c09b204e81702ea63b9f1a81ab003b72e3174 Mon Sep 17 00:00:00 2001 From: Jinqian Yang Date: Tue, 25 Mar 2025 22:19:00 +0800 Subject: [PATCH 1798/2157] arm64: Add support for HIP09 Spectre-BHB mitigation The HIP09 processor is vulnerable to the Spectre-BHB (Branch History Buffer) attack, which can be exploited to leak information through branch prediction side channels. This commit adds the MIDR of HIP09 to the list for software mitigation. Signed-off-by: Jinqian Yang Link: https://lore.kernel.org/r/20250325141900.2057314-1-yangjinqian1@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 20284bf34722..3530887daddb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -132,6 +132,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -218,6 +219,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 0f51fd10b4b0..aaf6578c39ac 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -901,6 +901,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_HISI_HIP09), {}, }; static const struct midr_range spectre_bhb_k11_list[] = { From 14e41b16e8cb677bb440dca2edba8b041646c742 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Mar 2025 12:52:52 -0400 Subject: [PATCH 1799/2157] SUNRPC: Don't allow waiting for exiting tasks Once a task calls exit_signals() it can no longer be signalled. So do not allow it to do killable waits. Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9b45fbdc90ca..73bc39281ef5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -276,6 +276,8 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { + if (unlikely(current->flags & PF_EXITING)) + return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; From 8d3ca331026a7f9700d3747eed59a67b8f828cdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Mar 2025 13:19:18 -0400 Subject: [PATCH 1800/2157] NFS: Don't allow waiting for exiting tasks Once a task calls exit_signals() it can no longer be signalled. So do not allow it to do killable waits. Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ fs/nfs/internal.h | 5 +++++ fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 9 +++++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1aa67fca69b2..119e447758b9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { + if (unlikely(nfs_current_task_exiting())) + return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fae2c7ae4acc..2133b3c20bad 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -912,6 +912,11 @@ static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) } #endif +static inline bool nfs_current_task_exiting(void) +{ + return (current->flags & PF_EXITING) != 0; +} + static inline bool nfs_error_is_fatal(int err) { switch (err) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0c3bc98cd999..c1736dbb92b6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; - } while (!fatal_signal_pending(current)); + } while (!fatal_signal_pending(current) && !nfs_current_task_exiting()); return res; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 50be54e0f578..da97f87ecaa9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -446,6 +446,8 @@ static int nfs4_delay_killable(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!__fatal_signal_pending(current)) @@ -457,6 +459,8 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!signal_pending(current)) @@ -1777,7 +1781,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); - if (!fatal_signal_pending(current)) { + if (!fatal_signal_pending(current) && + !nfs_current_task_exiting()) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; else @@ -3581,7 +3586,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, write_sequnlock(&state->seqlock); trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current) || nfs_current_task_exiting()) status = -EINTR; else if (schedule_timeout(5*HZ) != 0) From 9e8f324bd44c1fe026b582b75213de4eccfa1163 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Mar 2025 19:20:53 -0400 Subject: [PATCH 1801/2157] NFSv4: Check for delegation validity in nfs_start_delegation_return_locked() Check that the delegation is still attached after taking the spin lock in nfs_start_delegation_return_locked(). Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 325ba0663a6d..8bdbc4dca89c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -307,7 +307,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi) if (delegation == NULL) goto out; spin_lock(&delegation->lock); - if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + if (delegation->inode && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); /* Refcount matched in nfs_end_delegation_return() */ ret = nfs_get_delegation(delegation); From a30d4ff8193ef768dbb524824c7aa07c5486a63a Mon Sep 17 00:00:00 2001 From: Nir Lichtman Date: Tue, 4 Feb 2025 05:47:41 +0000 Subject: [PATCH 1802/2157] kdb: remove usage of static environment buffer Problem: The set environment variable logic uses a static "heap" like buffer to store the values of the variables, and they are never freed, on top of that this is redundant since the kernel supplies allocation facilities which are even used also in this file. Solution: Remove the weird static buffer logic and use kmalloc instead, call kfree when overriding an existing variable. Signed-off-by: Nir Lichtman Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20250204054741.GB1219827@lichtman.org Signed-off-by: Daniel Thompson --- include/linux/kdb.h | 2 +- kernel/debug/kdb/kdb_main.c | 48 +++++++------------------------------ 2 files changed, 9 insertions(+), 41 deletions(-) diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 905a2e2f45f6..ecbf819deeca 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -104,7 +104,7 @@ extern int kdb_initial_cpu; #define KDB_NOENVVALUE (-6) #define KDB_NOTIMP (-7) #define KDB_ENVFULL (-8) -#define KDB_ENVBUFFULL (-9) +#define KDB_KMALLOCFAILED (-9) #define KDB_TOOMANYBPT (-10) #define KDB_TOOMANYDBREGS (-11) #define KDB_DUPBPT (-12) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 5f4be507d79f..641481b19ada 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -105,7 +105,7 @@ static kdbmsg_t kdbmsgs[] = { KDBMSG(NOENVVALUE, "Environment variable should have value"), KDBMSG(NOTIMP, "Command not implemented"), KDBMSG(ENVFULL, "Environment full"), - KDBMSG(ENVBUFFULL, "Environment buffer full"), + KDBMSG(KMALLOCFAILED, "Failed to allocate memory"), KDBMSG(TOOMANYBPT, "Too many breakpoints defined"), #ifdef CONFIG_CPU_XSCALE KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"), @@ -130,13 +130,9 @@ static const int __nkdb_err = ARRAY_SIZE(kdbmsgs); /* - * Initial environment. This is all kept static and local to - * this file. We don't want to rely on the memory allocation - * mechanisms in the kernel, so we use a very limited allocate-only - * heap for new and altered environment variables. The entire - * environment is limited to a fixed number of entries (add more - * to __env[] if required) and a fixed amount of heap (add more to - * KDB_ENVBUFSIZE if required). + * Initial environment. This is all kept static and local to this file. + * The entire environment is limited to a fixed number of entries + * (add more to __env[] if required) */ static char *__env[31] = { @@ -258,35 +254,6 @@ char *kdbgetenv(const char *match) return NULL; } -/* - * kdballocenv - This function is used to allocate bytes for - * environment entries. - * Parameters: - * bytes The number of bytes to allocate in the static buffer. - * Returns: - * A pointer to the allocated space in the buffer on success. - * NULL if bytes > size available in the envbuffer. - * Remarks: - * We use a static environment buffer (envbuffer) to hold the values - * of dynamically generated environment variables (see kdb_set). Buffer - * space once allocated is never free'd, so over time, the amount of space - * (currently 512 bytes) will be exhausted if env variables are changed - * frequently. - */ -static char *kdballocenv(size_t bytes) -{ -#define KDB_ENVBUFSIZE 512 - static char envbuffer[KDB_ENVBUFSIZE]; - static int envbufsize; - char *ep = NULL; - - if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) { - ep = &envbuffer[envbufsize]; - envbufsize += bytes; - } - return ep; -} - /* * kdbgetulenv - This function will return the value of an unsigned * long-valued environment variable. @@ -348,9 +315,9 @@ static int kdb_setenv(const char *var, const char *val) varlen = strlen(var); vallen = strlen(val); - ep = kdballocenv(varlen + vallen + 2); - if (ep == (char *)0) - return KDB_ENVBUFFULL; + ep = kmalloc(varlen + vallen + 2, GFP_KDB); + if (!ep) + return KDB_KMALLOCFAILED; sprintf(ep, "%s=%s", var, val); @@ -359,6 +326,7 @@ static int kdb_setenv(const char *var, const char *val) && ((strncmp(__env[i], var, varlen) == 0) && ((__env[i][varlen] == '\0') || (__env[i][varlen] == '=')))) { + kfree_const(__env[i]); __env[i] = ep; return 0; } From afdbe49276accb87a0c7414e75864c78289ece2f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 19 Mar 2025 17:33:39 +0100 Subject: [PATCH 1803/2157] kdb: Remove optional size arguments from strscpy() calls If the destination buffer has a fixed length, strscpy() automatically determines the size of the destination buffer using sizeof() if the argument is omitted. This makes the explicit sizeof() unnecessary. Furthermore, CMD_BUFLEN is equal to sizeof(kdb_prompt_str) and can also be removed. Remove them to shorten and simplify the code. No functional changes intended. Signed-off-by: Thorsten Blum Reviewed-by: Douglas Anderson Reviewed-by: Justin Stitt Link: https://lore.kernel.org/r/20250319163341.2123-2-thorsten.blum@linux.dev Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 6a77f1c779c4..9b11b10b120c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -334,7 +334,7 @@ static char *kdb_read(char *buffer, size_t bufsize) *cp = '\0'; p_tmp = strrchr(buffer, ' '); p_tmp = (p_tmp ? p_tmp + 1 : buffer); - strscpy(tmpbuffer, p_tmp, sizeof(tmpbuffer)); + strscpy(tmpbuffer, p_tmp); *cp = tmp; len = strlen(tmpbuffer); @@ -452,7 +452,7 @@ static char *kdb_read(char *buffer, size_t bufsize) char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) { if (prompt && kdb_prompt_str != prompt) - strscpy(kdb_prompt_str, prompt, CMD_BUFLEN); + strscpy(kdb_prompt_str, prompt); kdb_printf("%s", kdb_prompt_str); kdb_nextline = 1; /* Prompt and input resets line number */ return kdb_read(buffer, bufsize); From b5322b6ec06a6c58650f52abcd2492000396363b Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 20 Mar 2025 11:22:13 -0300 Subject: [PATCH 1804/2157] x86/uaccess: Improve performance by aligning writes to 8 bytes in copy_user_generic(), on non-FSRM/ERMS CPUs History of the performance regression: ====================================== Since the following series of user copy updates were merged upstream ~2 years ago via: a5624566431d ("Merge branch 'x86-rep-insns': x86 user copy clarifications") .. copy_user_generic() on x86_64 stopped doing alignment of the writes to the destination to a 8 byte boundary for the non FSRM case. Previously, this was done through the ALIGN_DESTINATION macro that was used in the now removed copy_user_generic_unrolled function. Turns out this change causes some loss of performance/throughput on some use cases and specific CPU/platforms without FSRM and ERMS. Lately I got two reports of performance/throughput issues after a RHEL 9 kernel pulled the same upstream series with updates to user copy functions. Both reports consisted of running specific networking/TCP related testing using iperf3. Partial upstream fix ==================== The first report was related to a Linux Bridge testing using VMs on a specific machine with an AMD CPU (EPYC 7402), and after a brief investigation it turned out that the later change via: ca96b162bfd2 ("x86: bring back rep movsq for user access on CPUs without ERMS") ... helped/fixed the performance issue. However, after the later commit/fix was applied, then I got another regression reported in a multistream TCP test on a 100Gbit mlx5 nic, also running on an AMD based platform (AMD EPYC 7302 CPU), again that was using iperf3 to run the test. That regression was after applying the later fix/commit, but only this didn't help in telling the whole history. Testing performed to pinpoint residual regression ================================================= So I narrowed down the second regression use case, but running it without traffic through a NIC, on localhost, in trying to narrow down CPU usage and not being limited by other factor like network bandwidth. I used another system also with an AMD CPU (AMD EPYC 7742). Basically, I run iperf3 in server and client mode in the same system, for example: - Start the server binding it to CPU core/thread 19: $ taskset -c 19 iperf3 -D -s -B 127.0.0.1 -p 12000 - Start the client always binding/running on CPU core/thread 17, using perf to get statistics: $ perf stat -o stat.txt taskset -c 17 iperf3 -c 127.0.0.1 -b 0/1000 -V \ -n 50G --repeating-payload -l 16384 -p 12000 --cport 12001 2>&1 \ > stat-19.txt For the client, always running/pinned to CPU 17. But for the iperf3 in server mode, I did test runs using CPUs 19, 21, 23 or not pinned to any specific CPU. So it basically consisted with four runs of the same commands, just changing the CPU which the server is pinned, or without pinning by removing the taskset call before the server command. The CPUs were chosen based on NUMA node they were on, this is the relevant output of lscpu on the system: $ lscpu ... Model name: AMD EPYC 7742 64-Core Processor ... Caches (sum of all): L1d: 2 MiB (64 instances) L1i: 2 MiB (64 instances) L2: 32 MiB (64 instances) L3: 256 MiB (16 instances) NUMA: NUMA node(s): 4 NUMA node0 CPU(s): 0,1,8,9,16,17,24,25,32,33,40,41,48,49,56,57,64,65,72,73,80,81,88,89,96,97,104,105,112,113,120,121 NUMA node1 CPU(s): 2,3,10,11,18,19,26,27,34,35,42,43,50,51,58,59,66,67,74,75,82,83,90,91,98,99,106,107,114,115,122,123 NUMA node2 CPU(s): 4,5,12,13,20,21,28,29,36,37,44,45,52,53,60,61,68,69,76,77,84,85,92,93,100,101,108,109,116,117,124,125 NUMA node3 CPU(s): 6,7,14,15,22,23,30,31,38,39,46,47,54,55,62,63,70,71,78,79,86,87,94,95,102,103,110,111,118,119,126,127 ... So for the server run, when picking a CPU, I chose CPUs to be not on the same node. The reason is with that I was able to get/measure relevant performance differences when changing the alignment of the writes to the destination in copy_user_generic. Testing shows up to +81% performance improvement under iperf3 ============================================================= Here's a summary of the iperf3 runs: # Vanilla upstream alignment: CPU RATE SYS TIME sender-receiver Server bind 19: 13.0Gbits/sec 28.371851000 33.233499566 86.9%-70.8% Server bind 21: 12.9Gbits/sec 28.283381000 33.586486621 85.8%-69.9% Server bind 23: 11.1Gbits/sec 33.660190000 39.012243176 87.7%-64.5% Server bind none: 18.9Gbits/sec 19.215339000 22.875117865 86.0%-80.5% # With the attached patch (aligning writes in non ERMS/FSRM case): CPU RATE SYS TIME sender-receiver Server bind 19: 20.8Gbits/sec 14.897284000 20.811101382 75.7%-89.0% Server bind 21: 20.4Gbits/sec 15.205055000 21.263165909 75.4%-89.7% Server bind 23: 20.2Gbits/sec 15.433801000 21.456175000 75.5%-89.8% Server bind none: 26.1Gbits/sec 12.534022000 16.632447315 79.8%-89.6% So I consistently got better results when aligning the write. The results above were run on 6.14.0-rc6/rc7 based kernels. The sys is sys time and then the total time to run/transfer 50G of data. The last field is the CPU usage of sender/receiver iperf3 process. It's also worth to note that each pair of iperf3 runs may get slightly different results on each run, but I always got consistent higher results with the write alignment for this specific test of running the processes on CPUs in different NUMA nodes. Linus Torvalds helped/provided this version of the patch. Initially I proposed a version which aligned writes for all cases in rep_movs_alternative, however it used two extra registers and thus Linus provided an enhanced version that only aligns the write on the large_movsq case, which is sufficient since the problem happens only on those AMD CPUs like ones mentioned above without ERMS/FSRM, and also doesn't require using extra registers. Also, I validated that aligning only on large_movsq case is really enough for getting the performance back. I also tested this patch on an old Intel based non-ERMS/FRMS system (with Xeon E5-2667 - Sandy Bridge based) and didn't get any problems: no performance enhancement but also no regression either, using the same iperf3 based benchmark. Also newer Intel processors after Sandy Bridge usually have ERMS and should not be affected by this change. [ mingo: Updated the changelog. ] Fixes: ca96b162bfd2 ("x86: bring back rep movsq for user access on CPUs without ERMS") Fixes: 034ff37d3407 ("x86: rewrite '__copy_user_nocache' function") Reported-by: Ondrej Lichtner Co-developed-by: Linus Torvalds Signed-off-by: Linus Torvalds Signed-off-by: Herton R. Krzesinski Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250320142213.2623518-1-herton@redhat.com --- arch/x86/lib/copy_user_64.S | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index aa8c341b2441..06296eb69fd4 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -77,6 +77,24 @@ SYM_FUNC_START(rep_movs_alternative) _ASM_EXTABLE_UA( 0b, 1b) .Llarge_movsq: + /* Do the first possibly unaligned word */ +0: movq (%rsi),%rax +1: movq %rax,(%rdi) + + _ASM_EXTABLE_UA( 0b, .Lcopy_user_tail) + _ASM_EXTABLE_UA( 1b, .Lcopy_user_tail) + + /* What would be the offset to the aligned destination? */ + leaq 8(%rdi),%rax + andq $-8,%rax + subq %rdi,%rax + + /* .. and update pointers and count to match */ + addq %rax,%rdi + addq %rax,%rsi + subq %rax,%rcx + + /* make %rcx contain the number of words, %rax the remainder */ movq %rcx,%rax shrq $3,%rcx andl $7,%eax From f710202b2a45addea3dcdcd862770ecbaf6597ef Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 18 Mar 2025 15:32:30 -0700 Subject: [PATCH 1805/2157] x86/tools: Drop duplicate unlikely() definition in insn_decoder_test.c After commit c104c16073b7 ("Kunit to check the longest symbol length"), there is a warning when building with clang because there is now a definition of unlikely from compiler.h in tools/include/linux, which conflicts with the one in the instruction decoder selftest: arch/x86/tools/insn_decoder_test.c:15:9: warning: 'unlikely' macro redefined [-Wmacro-redefined] Remove the second unlikely() definition, as it is no longer necessary, clearing up the warning. Fixes: c104c16073b7 ("Kunit to check the longest symbol length") Signed-off-by: Nathan Chancellor Signed-off-by: Ingo Molnar Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20250318-x86-decoder-test-fix-unlikely-redef-v1-1-74c84a7bf05b@kernel.org --- arch/x86/tools/insn_decoder_test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index 6c2986d2ad11..08cd913cbd4e 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -12,8 +12,6 @@ #include #include -#define unlikely(cond) (cond) - #include #include #include From 04491732fc996305e1de80255d64ed6d1c472df5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Mar 2025 15:02:20 +0000 Subject: [PATCH 1806/2157] io_uring/net: account memory for zc sendmsg Account pinned pages for IORING_OP_SENDMSG_ZC, just as we for IORING_OP_SEND_ZC and net/ does for MSG_ZEROCOPY. Fixes: 493108d95f146 ("io_uring/net: zerocopy sendmsg") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4f00f67ca6ac8e8ed62343ae92b5816b1e0c9c4b.1743086313.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 8944eb679024..228b4f13d34c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1303,6 +1303,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *notif; + int ret; zc->done_io = 0; zc->retry = false; @@ -1355,7 +1356,16 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= REQ_F_IMPORT_BUFFER; return io_send_setup(req, sqe); } - return io_sendmsg_zc_setup(req, sqe); + ret = io_sendmsg_zc_setup(req, sqe); + if (unlikely(ret)) + return ret; + + if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { + struct io_async_msghdr *iomsg = req->async_data; + + return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); + } + return 0; } static int io_sg_from_iter_iovec(struct sk_buff *skb, From 8741d0737921ec1c03cf59aebf4d01400c2b461a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:10 +0800 Subject: [PATCH 1807/2157] ublk: make sure ubq->canceling is set when queue is frozen Now ublk driver depends on `ubq->canceling` for deciding if the request can be dispatched via uring_cmd & io_uring_cmd_complete_in_task(). Once ubq->canceling is set, the uring_cmd can be done via ublk_cancel_cmd() and io_uring_cmd_done(). So set ubq->canceling when queue is frozen, this way makes sure that the flag can be observed from ublk_queue_rq() reliably, and avoids use-after-free on uring_cmd. Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 41 +++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c060da409ed8..fbcb7c2ff851 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1446,18 +1446,28 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } } -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) +/* Must be called when queue is frozen */ +static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) { - struct gendisk *disk; + bool canceled; spin_lock(&ubq->cancel_lock); - if (ubq->canceling) { - spin_unlock(&ubq->cancel_lock); - return false; - } - ubq->canceling = true; + canceled = ubq->canceling; + if (!canceled) + ubq->canceling = true; spin_unlock(&ubq->cancel_lock); + return canceled; +} + +static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) +{ + bool was_canceled = ubq->canceling; + struct gendisk *disk; + + if (was_canceled) + return false; + spin_lock(&ub->lock); disk = ub->ub_disk; if (disk) @@ -1468,14 +1478,23 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) if (!disk) return false; - /* Now we are serialized with ublk_queue_rq() */ + /* + * Now we are serialized with ublk_queue_rq() + * + * Make sure that ubq->canceling is set when queue is frozen, + * because ublk_queue_rq() has to rely on this flag for avoiding to + * touch completed uring_cmd + */ blk_mq_quiesce_queue(disk->queue); - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); + was_canceled = ublk_mark_queue_canceling(ubq); + if (!was_canceled) { + /* abort queue is for making forward progress */ + ublk_abort_queue(ub, ubq); + } blk_mq_unquiesce_queue(disk->queue); put_device(disk_to_dev(disk)); - return true; + return !was_canceled; } static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, From 7e2fe01a69f6be3e284b38cfd2e4e0598a3b0a8f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:11 +0800 Subject: [PATCH 1808/2157] ublk: comment on ubq->canceling handling in ublk_queue_rq() In ublk_queue_rq(), ubq->canceling has to be handled after ->fail_io and ->force_abort are dealt with, otherwise the request may not be failed when deleting disk. Add comment on this usage. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fbcb7c2ff851..5b0c885dc38f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1310,6 +1310,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; + /* + * ->canceling has to be handled after ->force_abort and ->fail_io + * is dealt with, otherwise this request may not be failed in case + * of recovery, and cause hang when deleting disk + */ if (unlikely(ubq->canceling)) { __ublk_abort_rq(ubq, rq); return BLK_STS_OK; From 705b80841eda212df79a43371f5ccb3bcadbb893 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:12 +0800 Subject: [PATCH 1809/2157] ublk: remove two unused fields from 'struct ublk_queue' Remove two unused fields(`io_addr` & `max_io_sz`) from `struct ublk_queue`. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5b0c885dc38f..b60f4bd647a1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -143,8 +143,6 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; - unsigned long io_addr; /* mapped vm address */ - unsigned int max_io_sz; bool force_abort; bool timeout; bool canceling; From 1d781c0de08c0b35948ad4aaf609a4cc9995d9f6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:13 +0800 Subject: [PATCH 1810/2157] ublk: add helper of ublk_need_map_io() ublk_need_map_io() is more readable. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b60f4bd647a1..200504dd67a9 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -596,6 +596,11 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); } +static inline bool ublk_need_map_io(const struct ublk_queue *ubq) +{ + return !ublk_support_user_copy(ubq); +} + static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) { /* @@ -923,7 +928,7 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (ublk_support_user_copy(ubq)) + if (!ublk_need_map_io(ubq)) return rq_bytes; /* @@ -947,7 +952,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (ublk_support_user_copy(ubq)) + if (!ublk_need_map_io(ubq)) return rq_bytes; if (ublk_need_unmap_req(req)) { @@ -1867,7 +1872,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) goto out; - if (!ublk_support_user_copy(ubq)) { + if (ublk_need_map_io(ubq)) { /* * FETCH_RQ has to provide IO buffer if NEED GET * DATA is not enabled @@ -1889,7 +1894,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; - if (!ublk_support_user_copy(ubq)) { + if (ublk_need_map_io(ubq)) { /* * COMMIT_AND_FETCH_REQ has to provide IO buffer if * NEED GET DATA is not enabled or it is Read IO. From b460f328e257db6af0d127fc8a2437f64ad01d3a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:14 +0800 Subject: [PATCH 1811/2157] ublk: call io_uring_cmd_to_pdu to get uring_cmd pdu Call io_uring_cmd_to_pdu() to get uring_cmd pdu, and one big benefit is the automatic pdu size build check. Suggested-by: Uday Shankar Signed-off-by: Ming Lei Reviewed-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250327095123.179113-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 200504dd67a9..1e11816d0b90 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1040,7 +1040,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( struct io_uring_cmd *ioucmd) { - return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu; + return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu); } static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) From ebf695f129367ed4b26df6baec2ea7fc50c9e6f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:15 +0800 Subject: [PATCH 1812/2157] ublk: add segment parameter IO split is usually bad in io_uring world, since -EAGAIN is caused and IO handling may have to fallback to io-wq, this way does hurt performance. ublk starts to support zero copy recently, for avoiding unnecessary IO split, ublk driver's segment limit should be aligned with backend device's segment limit. Another reason is that io_buffer_register_bvec() needs to allocate bvecs, which number is aligned with ublk request segment number, so that big memory allocation can be avoided by setting reasonable max_segments limit. So add segment parameter for providing ublk server chance to align segment limit with backend, and keep it reasonable from implementation viewpoint. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 20 +++++++++++++++++++- include/uapi/linux/ublk_cmd.h | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1e11816d0b90..a5bcf3aa9d8c 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -74,7 +74,7 @@ #define UBLK_PARAM_TYPE_ALL \ (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ - UBLK_PARAM_TYPE_DMA_ALIGN) + UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) struct ublk_rq_data { struct kref ref; @@ -580,6 +580,18 @@ static int ublk_validate_params(const struct ublk_device *ub) return -EINVAL; } + if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { + const struct ublk_param_segment *p = &ub->params.seg; + + if (!is_power_of_2(p->seg_boundary_mask + 1)) + return -EINVAL; + + if (p->seg_boundary_mask + 1 < UBLK_MIN_SEGMENT_SIZE) + return -EINVAL; + if (p->max_segment_size < UBLK_MIN_SEGMENT_SIZE) + return -EINVAL; + } + return 0; } @@ -2370,6 +2382,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) lim.dma_alignment = ub->params.dma.alignment; + if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { + lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask; + lim.max_segment_size = ub->params.seg.max_segment_size; + lim.max_segments = ub->params.seg.max_segments; + } + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 7255b36b5cf6..583b86681c93 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -410,6 +410,29 @@ struct ublk_param_dma_align { __u8 pad[4]; }; +#define UBLK_MIN_SEGMENT_SIZE 4096 +/* + * If any one of the three segment parameter is set as 0, the behavior is + * undefined. + */ +struct ublk_param_segment { + /* + * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has + * to be >= UBLK_MIN_SEGMENT_SIZE(4096) + */ + __u64 seg_boundary_mask; + + /* + * max_segment_size could be override by virt_boundary_mask, so be + * careful when setting both. + * + * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096) + */ + __u32 max_segment_size; + __u16 max_segments; + __u8 pad[2]; +}; + struct ublk_params { /* * Total length of parameters, userspace has to set 'len' for both @@ -423,6 +446,7 @@ struct ublk_params { #define UBLK_PARAM_TYPE_DEVT (1 << 2) #define UBLK_PARAM_TYPE_ZONED (1 << 3) #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) +#define UBLK_PARAM_TYPE_SEGMENT (1 << 5) __u32 types; /* types of parameter included */ struct ublk_param_basic basic; @@ -430,6 +454,7 @@ struct ublk_params { struct ublk_param_devt devt; struct ublk_param_zoned zoned; struct ublk_param_dma_align dma; + struct ublk_param_segment seg; }; #endif From 17970209167d521da2f48d45a4242a57fd39d223 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:16 +0800 Subject: [PATCH 1813/2157] ublk: document zero copy feature Add words to explain how zero copy feature works, and why it has to be trusted for handling IO read command. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 1e0e7358e14a..74c57488dc9a 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -309,18 +309,35 @@ with specified IO tag in the command data: ``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy the server buffer (pages) read to the IO request pages. -Future development -================== - Zero copy --------- -Zero copy is a generic requirement for nbd, fuse or similar drivers. A -problem [#xiaoguang]_ Xiaoguang mentioned is that pages mapped to userspace -can't be remapped any more in kernel with existing mm interfaces. This can -occurs when destining direct IO to ``/dev/ublkb*``. Also, he reported that -big requests (IO size >= 256 KB) may benefit a lot from zero copy. +ublk zero copy relies on io_uring's fixed kernel buffer, which provides +two APIs: `io_buffer_register_bvec()` and `io_buffer_unregister_bvec`. +ublk adds IO command of `UBLK_IO_REGISTER_IO_BUF` to call +`io_buffer_register_bvec()` for ublk server to register client request +buffer into io_uring buffer table, then ublk server can submit io_uring +IOs with the registered buffer index. IO command of `UBLK_IO_UNREGISTER_IO_BUF` +calls `io_buffer_unregister_bvec()` to unregister the buffer, which is +guaranteed to be live between calling `io_buffer_register_bvec()` and +`io_buffer_unregister_bvec()`. Any io_uring operation which supports this +kind of kernel buffer will grab one reference of the buffer until the +operation is completed. + +ublk server implementing zero copy or user copy has to be CAP_SYS_ADMIN and +be trusted, because it is ublk server's responsibility to make sure IO buffer +filled with data for handling read command, and ublk server has to return +correct result to ublk driver when handling READ command, and the result +has to match with how many bytes filled to the IO buffer. Otherwise, +uninitialized kernel IO buffer will be exposed to client application. + +ublk server needs to align the parameter of `struct ublk_param_dma_align` +with backend for zero copy to work correctly. + +For reaching best IO performance, ublk server should align its segment +parameter of `struct ublk_param_segment` with backend for avoiding +unnecessary IO split, which usually hurts io_uring performance. References ========== From d796cea7b9f33b6315362f504b15fcc26d678493 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:17 +0800 Subject: [PATCH 1814/2157] ublk: implement ->queue_rqs() Implement ->queue_rqs() for improving perf in case of MQ. In this way, we just need to call io_uring_cmd_complete_in_task() once for whole IO batch, then both io_uring and ublk server can get exact batch from ublk frontend. Follows IOPS improvement: - tests tools/testing/selftests/ublk/kublk add -t null -q 2 [-z] fio/t/io_uring -p0 /dev/ublkb0 - results: more than 10% IOPS boost observed Pass all ublk selftests, especially the io dispatch order test. Cc: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 131 +++++++++++++++++++++++++++++++++------ 1 file changed, 111 insertions(+), 20 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a5bcf3aa9d8c..f97919460515 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -81,6 +81,20 @@ struct ublk_rq_data { }; struct ublk_uring_cmd_pdu { + /* + * Store requests in same batch temporarily for queuing them to + * daemon context. + * + * It should have been stored to request payload, but we do want + * to avoid extra pre-allocation, and uring_cmd payload is always + * free for us + */ + struct request *req_list; + + /* + * The following two are valid in this cmd whole lifetime, and + * setup in ublk uring_cmd handler + */ struct ublk_queue *ubq; u16 tag; }; @@ -1170,14 +1184,12 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, - unsigned int issue_flags) +static void ublk_dispatch_req(struct ublk_queue *ubq, + struct io_uring_cmd *cmd, + struct request *req, + unsigned int issue_flags) { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - struct ublk_queue *ubq = pdu->ubq; - int tag = pdu->tag; - struct request *req = blk_mq_tag_to_rq( - ubq->dev->tag_set.tags[ubq->q_id], tag); + int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; unsigned int mapped_bytes; @@ -1252,6 +1264,18 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + int tag = pdu->tag; + struct request *req = blk_mq_tag_to_rq( + ubq->dev->tag_set.tags[ubq->q_id], tag); + + ublk_dispatch_req(ubq, cmd, req, issue_flags); +} + static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { struct ublk_io *io = &ubq->ios[rq->tag]; @@ -1259,6 +1283,35 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); } +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct request *rq = pdu->req_list; + struct ublk_queue *ubq = rq->mq_hctx->driver_data; + struct request *next; + + while (rq) { + struct ublk_io *io = &ubq->ios[rq->tag]; + + next = rq->rq_next; + rq->rq_next = NULL; + ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); + rq = next; + } +} + +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) +{ + struct request *rq = rq_list_peek(l); + struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); + + pdu->req_list = rq; + rq_list_init(l); + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); +} + static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; @@ -1297,21 +1350,12 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_RESET_TIMER; } -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) { - struct ublk_queue *ubq = hctx->driver_data; - struct request *rq = bd->rq; blk_status_t res; - if (unlikely(ubq->fail_io)) { + if (unlikely(ubq->fail_io)) return BLK_STS_TARGET; - } - - /* fill iod to slot in io cmd buffer */ - res = ublk_setup_iod(ubq, rq); - if (unlikely(res != BLK_STS_OK)) - return BLK_STS_IOERR; /* With recovery feature enabled, force_abort is set in * ublk_stop_dev() before calling del_gendisk(). We have to @@ -1325,6 +1369,29 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; + if (unlikely(ubq->canceling)) + return BLK_STS_IOERR; + + /* fill iod to slot in io cmd buffer */ + res = ublk_setup_iod(ubq, rq); + if (unlikely(res != BLK_STS_OK)) + return BLK_STS_IOERR; + + blk_mq_start_request(rq); + return BLK_STS_OK; +} + +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct ublk_queue *ubq = hctx->driver_data; + struct request *rq = bd->rq; + blk_status_t res; + + res = ublk_prep_req(ubq, rq); + if (res != BLK_STS_OK) + return res; + /* * ->canceling has to be handled after ->force_abort and ->fail_io * is dealt with, otherwise this request may not be failed in case @@ -1335,12 +1402,35 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - blk_mq_start_request(bd->rq); ublk_queue_cmd(ubq, rq); - return BLK_STS_OK; } +static void ublk_queue_rqs(struct rq_list *rqlist) +{ + struct rq_list requeue_list = { }; + struct rq_list submit_list = { }; + struct ublk_queue *ubq = NULL; + struct request *req; + + while ((req = rq_list_pop(rqlist))) { + struct ublk_queue *this_q = req->mq_hctx->driver_data; + + if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) + ublk_queue_cmd_list(ubq, &submit_list); + ubq = this_q; + + if (ublk_prep_req(ubq, req) == BLK_STS_OK) + rq_list_add_tail(&submit_list, req); + else + rq_list_add_tail(&requeue_list, req); + } + + if (ubq && !rq_list_empty(&submit_list)) + ublk_queue_cmd_list(ubq, &submit_list); + *rqlist = requeue_list; +} + static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -1353,6 +1443,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, + .queue_rqs = ublk_queue_rqs, .init_hctx = ublk_init_hctx, .timeout = ublk_timeout, }; From daabfb50a56b11a4f15d2bdbfae129e61c08c0ac Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:18 +0800 Subject: [PATCH 1815/2157] ublk: rename ublk_rq_task_work_cb as ublk_cmd_tw_cb The new name is aligned with ublk_cmd_list_tw_cb(), and looks more readable. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-10-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f97919460515..355a59c78539 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1264,8 +1264,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, - unsigned int issue_flags) +static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; @@ -1280,7 +1280,7 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { struct ublk_io *io = &ubq->ios[rq->tag]; - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_tw_cb); } static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, From 8c778614361f288ef552fd6a52a17460a45b2f4f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:19 +0800 Subject: [PATCH 1816/2157] selftests: ublk: add more tests for covering MQ Add test test_generic_02.sh for covering IO dispatch order in case of MQ. Especially we just support ->queue_rqs() which may affect IO dispatch order. Add test_loop_05.sh and test_stripe_03.sh for covering MQ. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-11-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 3 ++ tools/testing/selftests/ublk/test_common.sh | 6 +++ .../testing/selftests/ublk/test_generic_02.sh | 44 +++++++++++++++++++ tools/testing/selftests/ublk/test_loop_01.sh | 14 +++--- tools/testing/selftests/ublk/test_loop_03.sh | 14 +++--- tools/testing/selftests/ublk/test_loop_05.sh | 28 ++++++++++++ .../testing/selftests/ublk/test_stress_01.sh | 6 +-- .../testing/selftests/ublk/test_stress_02.sh | 6 +-- .../testing/selftests/ublk/test_stripe_01.sh | 14 +++--- .../testing/selftests/ublk/test_stripe_03.sh | 30 +++++++++++++ 10 files changed, 132 insertions(+), 33 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_02.sh create mode 100755 tools/testing/selftests/ublk/test_loop_05.sh create mode 100755 tools/testing/selftests/ublk/test_stripe_03.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7817afe29005..7a8c994de244 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -4,6 +4,7 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh +TEST_PROGS += test_generic_02.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -11,8 +12,10 @@ TEST_PROGS += test_loop_01.sh TEST_PROGS += test_loop_02.sh TEST_PROGS += test_loop_03.sh TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_loop_05.sh TEST_PROGS += test_stripe_01.sh TEST_PROGS += test_stripe_02.sh +TEST_PROGS += test_stripe_03.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 75f54ac6b1c4..a88b35943227 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,12 @@ _get_disk_dev_t() { echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_run_fio_verify_io() { + fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ + --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ + --verify_state_save=0 "$@" > /dev/null +} + _create_backfile() { local my_size=$1 local my_file diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh new file mode 100755 index 000000000000..3e80121e3bf5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_02" +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "sequential io order for MQ" + +dev_id=$(_add_ublk_dev -t null -q 2) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then + _cleanup_test "null" + exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk +fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait +if grep -q "io_out_of_order" "$UBLK_TMP"; then + cat "$UBLK_TMP" + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index c882d2a08e13..1ef8b6044777 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -6,6 +6,10 @@ TID="loop_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -14,15 +18,7 @@ dev_id=$(_add_ublk_dev -t loop "$backfile_0") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=16 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index 269c96787d7d..e9ca744de8b1 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -6,6 +6,10 @@ TID="loop_03" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify over zero copy" backfile_0=$(_create_backfile 256M) @@ -13,15 +17,7 @@ dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") _check_add_dev $TID $? "$backfile_0" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=64 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh new file mode 100755 index 000000000000..2e6e2e6978fc --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_05" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +backfile_0=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 7177f6c57bc5..a8be24532b24 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -27,20 +27,20 @@ ublk_io_and_remove() _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null +ublk_io_and_remove 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_remove 256M -t loop -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2a8e60579a06..2159e4cc8140 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -27,20 +27,20 @@ ublk_io_and_kill_daemon() _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null +ublk_io_and_kill_daemon 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index c01f3dc325ab..7e387ef656ea 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -6,6 +6,10 @@ TID="stripe_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "stripe" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -15,15 +19,7 @@ dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=32 \ - --rw=write \ - --size=512M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh new file mode 100755 index 000000000000..c1b34af36145 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_03" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE From c78ae7b71ed66a180708377b45042ef77efc840e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:20 +0800 Subject: [PATCH 1817/2157] selftests: ublk: add test for checking zero copy related parameter ublk zero copy usually requires to set dma and segment parameter correctly, so hard-code null target's dma & segment parameter in non-default value, and verify if they are setup correctly by ublk driver. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-12-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/null.c | 11 +++++++- .../testing/selftests/ublk/test_generic_03.sh | 28 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/ublk/test_generic_03.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7a8c994de244..d98680d64a2f 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -5,6 +5,7 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh +TEST_PROGS += test_generic_03.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 899875ff50fe..91fec3690d4b 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) dev->tgt.dev_size = dev_size; dev->tgt.params = (struct ublk_params) { - .types = UBLK_PARAM_TYPE_BASIC, + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | + UBLK_PARAM_TYPE_SEGMENT, .basic = { .logical_bs_shift = 9, .physical_bs_shift = 12, @@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = info->max_io_buf_bytes >> 9, .dev_sectors = dev_size >> 9, }, + .dma = { + .alignment = 4095, + }, + .seg = { + .seg_boundary_mask = 4095, + .max_segment_size = 32 << 10, + .max_segments = 32, + }, }; if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh new file mode 100755 index 000000000000..b551aa76cb0d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_03.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_03" +ERR_CODE=0 + +_prep_test "null" "check dma & segment limits for zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +sysfs_path=/sys/block/ublkb"${dev_id}" +dma_align=$(cat "$sysfs_path"/queue/dma_alignment) +max_segments=$(cat "$sysfs_path"/queue/max_segments) +max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size) +if [ "$dma_align" != "4095" ]; then + ERR_CODE=255 +fi +if [ "$max_segments" != "32" ]; then + ERR_CODE=255 +fi +if [ "$max_segment_size" != "32768" ]; then + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE From dfbce8b798fb848a42706e2e544b78b3db22aaae Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:07 -0600 Subject: [PATCH 1818/2157] ublk: remove unused cmd argument to ublk_dispatch_req() ublk_dispatch_req() never uses its struct io_uring_cmd *cmd argument. Drop it so callers don't have to pass a value. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-2-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 355a59c78539..39efe443e235 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1185,7 +1185,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, } static void ublk_dispatch_req(struct ublk_queue *ubq, - struct io_uring_cmd *cmd, struct request *req, unsigned int issue_flags) { @@ -1273,7 +1272,7 @@ static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, struct request *req = blk_mq_tag_to_rq( ubq->dev->tag_set.tags[ubq->q_id], tag); - ublk_dispatch_req(ubq, cmd, req, issue_flags); + ublk_dispatch_req(ubq, req, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1292,11 +1291,9 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, struct request *next; while (rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; - next = rq->rq_next; rq->rq_next = NULL; - ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); + ublk_dispatch_req(ubq, rq, issue_flags); rq = next; } } From 9d7fa99189709b80eb16094aad18f7e492b835de Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:08 -0600 Subject: [PATCH 1819/2157] ublk: skip 1 NULL check in ublk_cmd_list_tw_cb() loop ublk_cmd_list_tw_cb() is always performed on a non-empty request list. So don't check whether rq is NULL on the first iteration of the loop, just on subsequent iterations. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-3-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 39efe443e235..8b9780c0feab 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1290,12 +1290,12 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, struct ublk_queue *ubq = rq->mq_hctx->driver_data; struct request *next; - while (rq) { + do { next = rq->rq_next; rq->rq_next = NULL; ublk_dispatch_req(ubq, rq, issue_flags); rq = next; - } + } while (rq); } static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) From 6a87fc437a034e4be2a63d8dfd4d2985c6c574bc Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:09 -0600 Subject: [PATCH 1820/2157] ublk: get ubq from pdu in ublk_cmd_list_tw_cb() Save a few pointer dereferences by obtaining struct ublk_queue *ubq from the ublk_uring_cmd_pdu instead of the request's mq_hctx. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-4-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8b9780c0feab..9276d1fcc100 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1287,7 +1287,7 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct request *rq = pdu->req_list; - struct ublk_queue *ubq = rq->mq_hctx->driver_data; + struct ublk_queue *ubq = pdu->ubq; struct request *next; do { From 108d8aecaeeb52f5fbe98ac94da534954db1da44 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:10 -0600 Subject: [PATCH 1821/2157] ublk: avoid redundant io->cmd in ublk_queue_cmd_list() ublk_queue_cmd_list() loads io->cmd twice. The intervening stores prevent the compiler from combining the loads. Since struct ublk_io *io is only used to compute io->cmd, replace the variable with io->cmd. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-5-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 9276d1fcc100..23250471562a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1301,12 +1301,12 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) { struct request *rq = rq_list_peek(l); - struct ublk_io *io = &ubq->ios[rq->tag]; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); pdu->req_list = rq; rq_list_init(l); - io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); + io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb); } static enum blk_eh_timer_return ublk_timeout(struct request *rq) From 00cfc05cf81f58b1bc2650e18228350a094b1f6d Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:11 -0600 Subject: [PATCH 1822/2157] ublk: store req in ublk_uring_cmd_pdu for ublk_cmd_tw_cb() Pass struct request *rq to ublk_cmd_tw_cb() through ublk_uring_cmd_pdu, mirroring how it works for ublk_cmd_list_tw_cb(). This saves some pointer dereferences, as well as the bounds check in blk_mq_tag_to_rq(). Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-6-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 23250471562a..466a23b89379 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -89,7 +89,10 @@ struct ublk_uring_cmd_pdu { * to avoid extra pre-allocation, and uring_cmd payload is always * free for us */ - struct request *req_list; + union { + struct request *req; + struct request *req_list; + }; /* * The following two are valid in this cmd whole lifetime, and @@ -1268,18 +1271,17 @@ static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - int tag = pdu->tag; - struct request *req = blk_mq_tag_to_rq( - ubq->dev->tag_set.tags[ubq->q_id], tag); - ublk_dispatch_req(ubq, req, issue_flags); + ublk_dispatch_req(ubq, pdu->req, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_tw_cb); + pdu->req = rq; + io_uring_cmd_complete_in_task(cmd, ublk_cmd_tw_cb); } static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, From a20b8631c8885cda45a331a151d29a83dfbfdefb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:54 +0000 Subject: [PATCH 1823/2157] io_uring/net: open code io_sendmsg_copy_hdr() io_sendmsg_setup() is trivial and io_sendmsg_copy_hdr() doesn't add any good abstraction, open code one into another. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/565318ce585665e88053663eeee5178d2c15692f.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 228b4f13d34c..34d103f2469d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -325,25 +325,6 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, return 0; } -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL); - if (unlikely(ret)) - return ret; - - if (!(req->flags & REQ_F_BUFFER_SELECT)) - ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, - ITER_SOURCE); - /* save msg_control as sys_sendmsg() overwrites it */ - sr->msg_control = iomsg->msg.msg_control_user; - return ret; -} - void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) { struct io_async_msghdr *io = req->async_data; @@ -392,10 +373,19 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; + struct user_msghdr msg; + int ret; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); + if (unlikely(ret)) + return ret; + /* save msg_control as sys_sendmsg() overwrites it */ + sr->msg_control = kmsg->msg.msg_control_user; - return io_sendmsg_copy_hdr(req, kmsg); + if (req->flags & REQ_F_BUFFER_SELECT) + return 0; + return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) From 5f364117db942c15980111f2e8ff6025c7e5893a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:55 +0000 Subject: [PATCH 1824/2157] io_uring/net: open code io_net_vec_assign() Get rid of io_net_vec_assign() by open coding it into its only caller. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/19191c34b5cfe1161f7eeefa6e785418ea9ad56d.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 34d103f2469d..68f87d7c74df 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -176,16 +176,6 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) return hdr; } -/* assign new iovec to kmsg, if we need to */ -static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, - struct iovec *iov) -{ - if (iov) { - req->flags |= REQ_F_NEED_CLEANUP; - io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs); - } -} - static inline void io_mshot_prep_retry(struct io_kiocb *req, struct io_async_msghdr *kmsg) { @@ -217,7 +207,11 @@ static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg &iomsg->msg.msg_iter, io_is_compat(req->ctx)); if (unlikely(ret < 0)) return ret; - io_net_vec_assign(req, iomsg, iov); + + if (iov) { + req->flags |= REQ_F_NEED_CLEANUP; + io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); + } return 0; } From c55e2845dfa72e647ed8d9a7b4c6e11a8ed0fc1e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:56 +0000 Subject: [PATCH 1825/2157] io_uring/net: combine sendzc flags writes Save an instruction / trip to the cache and assign some of sendzc flags together. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c519d6f406776c3be3ef988a8339a88e45d1ffd9.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 68f87d7c74df..ef0faa07627f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1291,7 +1291,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->done_io = 0; zc->retry = false; - req->flags |= REQ_F_POLL_NO_LAZY; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; @@ -1305,7 +1304,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) notif->cqe.user_data = req->cqe.user_data; notif->cqe.res = 0; notif->cqe.flags = IORING_CQE_F_NOTIF; - req->flags |= REQ_F_NEED_CLEANUP; + req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; zc->flags = READ_ONCE(sqe->ioprio); if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { From 63b16e4f0b90abad500ecb7bc7a625278febdc2c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:57 +0000 Subject: [PATCH 1826/2157] io_uring/net: unify sendmsg setup with zc io_sendmsg_zc_setup() duplicates parts of io_sendmsg_setup(), and the only difference between them is that the former support vectored registered buffers with nothing zerocopy specific. Merge them together, we want regular sendmsg to eventually support fixed buffers either way. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7e5ec40f9dc93355399dc6fa0cbc8b31f0b20ac5.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index ef0faa07627f..6d02c8822cc9 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -377,32 +377,16 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = kmsg->msg.msg_control_user; + if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; + return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, + msg.msg_iovlen); + } if (req->flags & REQ_F_BUFFER_SELECT) return 0; return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } -static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg = req->async_data; - struct user_msghdr msg; - int ret; - - if (!(sr->flags & IORING_RECVSEND_FIXED_BUF)) - return io_sendmsg_setup(req, sqe); - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - - ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); - if (unlikely(ret)) - return ret; - sr->msg_control = kmsg->msg.msg_control_user; - kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; - - return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, msg.msg_iovlen); -} - #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -1339,7 +1323,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= REQ_F_IMPORT_BUFFER; return io_send_setup(req, sqe); } - ret = io_sendmsg_zc_setup(req, sqe); + ret = io_sendmsg_setup(req, sqe); if (unlikely(ret)) return ret; From 49dbce5602dc50343c9794d0ddf05d1f6c9cb592 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:58 +0000 Subject: [PATCH 1827/2157] io_uring/net: clusterise send vs msghdr branches We have multiple branches at prep for send vs sendmsg handling, put them together so that the variant handling is more localised. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/33abf666d9ded74cba4da2f0d9fe58e88520dffe.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 6d02c8822cc9..9acd8d9f80b2 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -395,12 +395,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->done_io = 0; sr->retry = false; - - if (req->opcode != IORING_OP_SEND) { - if (sqe->addr2 || sqe->file_index) - return -EINVAL; - } - sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) @@ -426,6 +420,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -ENOMEM; if (req->opcode != IORING_OP_SENDMSG) return io_send_setup(req, sqe); + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; return io_sendmsg_setup(req, sqe); } @@ -1303,11 +1299,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) } } - if (req->opcode != IORING_OP_SEND_ZC) { - if (unlikely(sqe->addr2 || sqe->file_index)) - return -EINVAL; - } - zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; req->buf_index = READ_ONCE(sqe->buf_index); @@ -1323,6 +1314,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= REQ_F_IMPORT_BUFFER; return io_send_setup(req, sqe); } + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; ret = io_sendmsg_setup(req, sqe); if (unlikely(ret)) return ret; From ad3f6cc40084f9adb1a53bf386d966073dc6a4e9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:59 +0000 Subject: [PATCH 1828/2157] io_uring/net: set sg_from_iter in advance In preparation to the next patch, set ->sg_from_iter callback at request prep time. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/5fe2972701df3bacdb3d760bce195fa640bee201.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 9acd8d9f80b2..749dd298c502 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -97,6 +97,11 @@ struct io_recvzc { struct io_zcrx_ifq *ifq; }; +static int io_sg_from_iter_iovec(struct sk_buff *skb, + struct iov_iter *from, size_t length); +static int io_sg_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -1266,6 +1271,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_ring_ctx *ctx = req->ctx; + struct io_async_msghdr *iomsg; struct io_kiocb *notif; int ret; @@ -1308,8 +1314,15 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (io_is_compat(req->ctx)) zc->msg_flags |= MSG_CMSG_COMPAT; - if (unlikely(!io_msg_alloc_async(req))) + iomsg = io_msg_alloc_async(req); + if (unlikely(!iomsg)) return -ENOMEM; + + if (zc->flags & IORING_RECVSEND_FIXED_BUF) + iomsg->msg.sg_from_iter = io_sg_from_iter; + else + iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; + if (req->opcode == IORING_OP_SEND_ZC) { req->flags |= REQ_F_IMPORT_BUFFER; return io_send_setup(req, sqe); @@ -1320,11 +1333,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ret)) return ret; - if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { - struct io_async_msghdr *iomsg = req->async_data; - + if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); - } return 0; } @@ -1391,7 +1401,6 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) ITER_SOURCE, issue_flags); if (unlikely(ret)) return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter; } else { ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); if (unlikely(ret)) @@ -1399,7 +1408,6 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) ret = io_notif_account_mem(sr->notif, sr->len); if (unlikely(ret)) return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; } return ret; @@ -1483,8 +1491,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) unsigned flags; int ret, min_ret = 0; - kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; - if (req->flags & REQ_F_IMPORT_BUFFER) { unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; int ret; @@ -1493,7 +1499,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) &kmsg->vec, uvec_segs, issue_flags); if (unlikely(ret)) return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter; req->flags &= ~REQ_F_IMPORT_BUFFER; } From fbe1a30c5d3e6f184ddd63deded6f30c3ecc4c3f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:11:00 +0000 Subject: [PATCH 1829/2157] io_uring/net: import zc ubuf earlier io_send_setup() already sets up the iterator for IORING_OP_SEND_ZC, we don't need repeating that at issue time. Move it all together with mem accounting at prep time, which is more consistent with how the non-zc version does that. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/eb54f007c493ad9f4ca89aa8e715baf30d83fb88.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 749dd298c502..eaa627eddb4a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1318,23 +1318,23 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(!iomsg)) return -ENOMEM; - if (zc->flags & IORING_RECVSEND_FIXED_BUF) - iomsg->msg.sg_from_iter = io_sg_from_iter; - else - iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; - if (req->opcode == IORING_OP_SEND_ZC) { - req->flags |= REQ_F_IMPORT_BUFFER; - return io_send_setup(req, sqe); + if (zc->flags & IORING_RECVSEND_FIXED_BUF) + req->flags |= REQ_F_IMPORT_BUFFER; + ret = io_send_setup(req, sqe); + } else { + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; + ret = io_sendmsg_setup(req, sqe); } - if (unlikely(sqe->addr2 || sqe->file_index)) - return -EINVAL; - ret = io_sendmsg_setup(req, sqe); if (unlikely(ret)) return ret; - if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) + if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { + iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); + } + iomsg->msg.sg_from_iter = io_sg_from_iter; return 0; } @@ -1392,25 +1392,13 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; - int ret; - if (sr->flags & IORING_RECVSEND_FIXED_BUF) { - sr->notif->buf_index = req->buf_index; - ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, - (u64)(uintptr_t)sr->buf, sr->len, - ITER_SOURCE, issue_flags); - if (unlikely(ret)) - return ret; - } else { - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - ret = io_notif_account_mem(sr->notif, sr->len); - if (unlikely(ret)) - return ret; - } + WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); - return ret; + sr->notif->buf_index = req->buf_index; + return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, + (u64)(uintptr_t)sr->buf, sr->len, + ITER_SOURCE, issue_flags); } int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) From b0e9570a6b19fb0e53090489838dc0de27795eb9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:11:49 +0000 Subject: [PATCH 1830/2157] io_uring/msg: rename io_double_lock_ctx() io_double_lock_ctx() doesn't lock both rings. Rename it to prevent any future confusion. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9e5defa000efd9b0f5e169cbb6bad4994d46ec5c.1743190078.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/msg_ring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 0bbcbbcdebfd..bea5a96587b7 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -38,8 +38,8 @@ static void io_double_unlock_ctx(struct io_ring_ctx *octx) mutex_unlock(&octx->uring_lock); } -static int io_double_lock_ctx(struct io_ring_ctx *octx, - unsigned int issue_flags) +static int io_lock_external_ctx(struct io_ring_ctx *octx, + unsigned int issue_flags) { /* * To ensure proper ordering between the two ctxs, we can only @@ -154,7 +154,7 @@ static int __io_msg_ring_data(struct io_ring_ctx *target_ctx, ret = -EOVERFLOW; if (target_ctx->flags & IORING_SETUP_IOPOLL) { - if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) return -EAGAIN; } if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) @@ -199,7 +199,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag struct file *src_file = msg->src_file; int ret; - if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) return -EAGAIN; ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); From 9cc0bbdaba2a66ad90bc6ce45163b7745baffe98 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:11:50 +0000 Subject: [PATCH 1831/2157] io_uring/msg: initialise msg request opcode It's risky to have msg request opcode set to garbage, so at least initialise it to nop. Later we might want to add a user inaccessible opcode for such cases. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9afe650fcb348414a4529d89f52eb8969ba06efd.1743190078.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/msg_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index bea5a96587b7..6c51b942d020 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -93,6 +93,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, kmem_cache_free(req_cachep, req); return -EOWNERDEAD; } + req->opcode = IORING_OP_NOP; req->cqe.user_data = user_data; io_req_set_res(req, res, cflags); percpu_ref_get(&ctx->refs); From ea9106786e264483312b9b270fca1b506223338d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:11:51 +0000 Subject: [PATCH 1832/2157] io_uring: don't pass ctx to tw add remote helper Unlike earlier versions, io_msg_remote_post() creates a valid request with a proper context, so don't pass a context to io_req_task_work_add_remote() explicitly but derive it from the request. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/721f51cf34996d98b48f0bfd24ad40aa2730167e.1743190078.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 14 ++++++-------- io_uring/io_uring.h | 3 +-- io_uring/msg_ring.c | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3ba49c628337..e4484a03e033 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1141,10 +1141,9 @@ void tctx_task_work(struct callback_head *cb) WARN_ON_ONCE(ret); } -static inline void io_req_local_work_add(struct io_kiocb *req, - struct io_ring_ctx *ctx, - unsigned flags) +static void io_req_local_work_add(struct io_kiocb *req, unsigned flags) { + struct io_ring_ctx *ctx = req->ctx; unsigned nr_wait, nr_tw, nr_tw_prev; struct llist_node *head; @@ -1239,17 +1238,16 @@ static void io_req_normal_work_add(struct io_kiocb *req) void __io_req_task_work_add(struct io_kiocb *req, unsigned flags) { if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) - io_req_local_work_add(req, req->ctx, flags); + io_req_local_work_add(req, flags); else io_req_normal_work_add(req); } -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, - unsigned flags) +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) { - if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))) + if (WARN_ON_ONCE(!(req->ctx->flags & IORING_SETUP_DEFER_TASKRUN))) return; - io_req_local_work_add(req, ctx, flags); + __io_req_task_work_add(req, flags); } static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 87f883130286..e4050b2d0821 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -89,8 +89,7 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); void __io_req_task_work_add(struct io_kiocb *req, unsigned flags); -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, - unsigned flags); +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags); void io_req_task_queue(struct io_kiocb *req); void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw); void io_req_task_queue_fail(struct io_kiocb *req, int ret); diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 6c51b942d020..50a958e9c921 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -100,7 +100,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, req->ctx = ctx; req->tctx = NULL; req->io_task_work.func = io_msg_tw_complete; - io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); + io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE); return 0; } From 1dc1e0b9d694eb9016d3105ca4ba8bd90eba888a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Mar 2025 07:31:45 -0700 Subject: [PATCH 1833/2157] srcu: Make FORCE_NEED_SRCU_NMI_SAFE depend on RCU_EXPERT The FORCE_NEED_SRCU_NMI_SAFE is useful only for those wishing to test the SRCU code paths that accommodate architectures that do not have NMI-safe per-CPU operations, that is, those architectures that do not select the ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig option. As such, this is a specialized Kconfig option that is not intended for casual users. This commit therefore hides it behind the RCU_EXPERT Kconfig option. Given that this new FORCE_NEED_SRCU_NMI_SAFE Kconfig option has no effect unless the ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig option is also selected, it also depends on this Kconfig option. [ paulmck: Apply Geert Uytterhoeven feedback. ] [ boqun: Add the "Fixes" tag. ] Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdX6dy9_tmpLkpcnGzxyRbe6qSWYukcPp=H1GzZdyd3qBQ@mail.gmail.com/ Fixes: 536e8b9b80bc ("srcu: Add FORCE_NEED_SRCU_NMI_SAFE Kconfig for testing") Signed-off-by: Paul E. McKenney Reviewed-by: Geert Uytterhoeven Signed-off-by: Boqun Feng --- kernel/rcu/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index aa42de4d2768..4d9b21f69eaa 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -68,6 +68,8 @@ config TREE_SRCU config FORCE_NEED_SRCU_NMI_SAFE bool "Force selection of NEED_SRCU_NMI_SAFE" depends on !TINY_SRCU + depends on RCU_EXPERT + depends on ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select NEED_SRCU_NMI_SAFE default n help From 9a45714fc51321ea8f5e5567f70e06753a848f62 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 13:42:29 -0600 Subject: [PATCH 1834/2157] ublk: specify io_cmd_buf pointer type io_cmd_buf points to an array of ublksrv_io_desc structs but its type is char *. Indexing the array requires an explicit multiplication and cast. The compiler also can't check the pointer types. Change io_cmd_buf's type to struct ublksrv_io_desc * so it can be indexed directly and the compiler can type-check the code. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20250328194230.2726862-2-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 466a23b89379..2fd05c1bd30b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -158,7 +158,7 @@ struct ublk_queue { unsigned long flags; struct task_struct *ubq_daemon; - char *io_cmd_buf; + struct ublksrv_io_desc *io_cmd_buf; bool force_abort; bool timeout; @@ -706,11 +706,11 @@ static inline bool ublk_rq_has_data(const struct request *rq) static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag) { - return (struct ublksrv_io_desc *) - &(ubq->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); + return &ubq->io_cmd_buf[tag]; } -static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) +static inline struct ublksrv_io_desc * +ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) { return ublk_get_queue(ub, q_id)->io_cmd_buf; } From 25aaa81371e7db34ddb42c69ed4f6c5bc8de2afa Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 13:42:30 -0600 Subject: [PATCH 1835/2157] selftests: ublk: specify io_cmd_buf pointer type Matching the ublk driver, change the type of io_cmd_buf from char * to struct ublksrv_io_desc *. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20250328194230.2726862-3-csander@purestorage.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 2 +- tools/testing/selftests/ublk/kublk.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 05147b53c361..83756f97c26e 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -322,7 +322,7 @@ static int ublk_queue_init(struct ublk_queue *q) cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); - q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, + q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, MAP_SHARED | MAP_POPULATE, dev->fds[0], off); if (q->io_cmd_buf == MAP_FAILED) { ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index f31a5c4d4143..760ff8ffb810 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -128,7 +128,7 @@ struct ublk_queue { unsigned int io_inflight; struct ublk_dev *dev; const struct ublk_tgt_ops *tgt_ops; - char *io_cmd_buf; + struct ublksrv_io_desc *io_cmd_buf; struct io_uring ring; struct ublk_io ios[UBLK_QUEUE_DEPTH]; #define UBLKSRV_QUEUE_STOPPING (1U << 0) @@ -302,7 +302,7 @@ static inline void ublk_mark_io_done(struct ublk_io *io, int res) static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) { - return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); + return &q->io_cmd_buf[tag]; } static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) From 9764d5b0cd0ea4846fd46c7d0b4238ea122075a9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Mar 2025 09:32:00 +0800 Subject: [PATCH 1836/2157] Revert "crypto: testmgr - Add multibuffer hash testing" This reverts commit 8b54e6a8f4156ed43627f40300b0711dc977fbc1. The multibuffer tests has a number of bugs. For example, the SG lists for the filler requests weren't initialised properly, and it fails to take data-keyed algorithms such as poly1305 into account. More importantly, the chaining interface itself is under review. Revert this until the interface is fully settled. Reported-by: Manorit Chawdhry Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202503281658.7a078821-lkp@intel.com Signed-off-by: Herbert Xu --- crypto/testmgr.c | 157 ++++++++--------------------------------------- 1 file changed, 24 insertions(+), 133 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index cd0f6a778b62..2c7fc4c94c04 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -302,13 +302,6 @@ struct test_sg_division { * @key_offset_relative_to_alignmask: if true, add the algorithm's alignmask to * the @key_offset * @finalization_type: what finalization function to use for hashes - * @multibuffer: test with multibuffer - * @multibuffer_index: random number used to generate the message index to use - * for multibuffer. - * @multibuffer_uneven: test with multibuffer using uneven lengths - * @multibuffer_lens: random lengths to make chained request uneven - * @multibuffer_count: random number used to generate the num_msgs parameter - * for multibuffer * @nosimd: execute with SIMD disabled? Requires !CRYPTO_TFM_REQ_MAY_SLEEP. * This applies to the parts of the operation that aren't controlled * individually by @nosimd_setkey or @src_divs[].nosimd. @@ -328,11 +321,6 @@ struct testvec_config { enum finalization_type finalization_type; bool nosimd; bool nosimd_setkey; - bool multibuffer; - unsigned int multibuffer_index; - unsigned int multibuffer_count; - bool multibuffer_uneven; - unsigned int multibuffer_lens[MAX_MB_MSGS]; }; #define TESTVEC_CONFIG_NAMELEN 192 @@ -572,7 +560,6 @@ struct test_sglist { char *bufs[XBUFSIZE]; struct scatterlist sgl[XBUFSIZE]; struct scatterlist sgl_saved[XBUFSIZE]; - struct scatterlist full_sgl[XBUFSIZE]; struct scatterlist *sgl_ptr; unsigned int nents; }; @@ -686,11 +673,6 @@ static int build_test_sglist(struct test_sglist *tsgl, sg_mark_end(&tsgl->sgl[tsgl->nents - 1]); tsgl->sgl_ptr = tsgl->sgl; memcpy(tsgl->sgl_saved, tsgl->sgl, tsgl->nents * sizeof(tsgl->sgl[0])); - - sg_init_table(tsgl->full_sgl, XBUFSIZE); - for (i = 0; i < XBUFSIZE; i++) - sg_set_buf(tsgl->full_sgl, tsgl->bufs[i], PAGE_SIZE * 2); - return 0; } @@ -1167,27 +1149,6 @@ static void generate_random_testvec_config(struct rnd_state *rng, break; } - if (prandom_bool(rng)) { - int i; - - cfg->multibuffer = true; - cfg->multibuffer_count = prandom_u32_state(rng); - cfg->multibuffer_count %= MAX_MB_MSGS; - if (cfg->multibuffer_count++) { - cfg->multibuffer_index = prandom_u32_state(rng); - cfg->multibuffer_index %= cfg->multibuffer_count; - } - - cfg->multibuffer_uneven = prandom_bool(rng); - for (i = 0; i < MAX_MB_MSGS; i++) - cfg->multibuffer_lens[i] = - generate_random_length(rng, PAGE_SIZE * 2 * XBUFSIZE); - - p += scnprintf(p, end - p, " multibuffer(%d/%d%s)", - cfg->multibuffer_index, cfg->multibuffer_count, - cfg->multibuffer_uneven ? "/uneven" : ""); - } - if (!(cfg->req_flags & CRYPTO_TFM_REQ_MAY_SLEEP)) { if (prandom_bool(rng)) { cfg->nosimd = true; @@ -1492,7 +1453,6 @@ static int do_ahash_op(int (*op)(struct ahash_request *req), struct ahash_request *req, struct crypto_wait *wait, bool nosimd) { - struct ahash_request *r2; int err; if (nosimd) @@ -1503,15 +1463,7 @@ static int do_ahash_op(int (*op)(struct ahash_request *req), if (nosimd) crypto_reenable_simd_for_test(); - err = crypto_wait_req(err, wait); - if (err) - return err; - - list_for_each_entry(r2, &req->base.list, base.list) - if (r2->base.err) - return r2->base.err; - - return 0; + return crypto_wait_req(err, wait); } static int check_nonfinal_ahash_op(const char *op, int err, @@ -1532,65 +1484,20 @@ static int check_nonfinal_ahash_op(const char *op, int err, return 0; } -static void setup_ahash_multibuffer( - struct ahash_request *reqs[MAX_MB_MSGS], - const struct testvec_config *cfg, - struct test_sglist *tsgl) -{ - struct scatterlist *sg = tsgl->full_sgl; - static u8 trash[HASH_MAX_DIGESTSIZE]; - struct ahash_request *req = reqs[0]; - unsigned int num_msgs; - unsigned int msg_idx; - int i; - - if (!cfg->multibuffer) - return; - - num_msgs = cfg->multibuffer_count; - if (num_msgs == 1) - return; - - msg_idx = cfg->multibuffer_index; - for (i = 1; i < num_msgs; i++) { - struct ahash_request *r2 = reqs[i]; - unsigned int nbytes = req->nbytes; - - if (cfg->multibuffer_uneven) - nbytes = cfg->multibuffer_lens[i]; - - ahash_request_set_callback(r2, req->base.flags, NULL, NULL); - ahash_request_set_crypt(r2, sg, trash, nbytes); - ahash_request_chain(r2, req); - } - - if (msg_idx) { - reqs[msg_idx]->src = req->src; - reqs[msg_idx]->nbytes = req->nbytes; - reqs[msg_idx]->result = req->result; - req->src = sg; - if (cfg->multibuffer_uneven) - req->nbytes = cfg->multibuffer_lens[0]; - req->result = trash; - } -} - /* Test one hash test vector in one configuration, using the ahash API */ static int test_ahash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, - struct ahash_request *reqs[MAX_MB_MSGS], + struct ahash_request *req, struct test_sglist *tsgl, u8 *hashstate) { - struct ahash_request *req = reqs[0]; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); const unsigned int digestsize = crypto_ahash_digestsize(tfm); const unsigned int statesize = crypto_ahash_statesize(tfm); const char *driver = crypto_ahash_driver_name(tfm); const u32 req_flags = CRYPTO_TFM_REQ_MAY_BACKLOG | cfg->req_flags; const struct test_sg_division *divs[XBUFSIZE]; - struct ahash_request *reqi = req; DECLARE_CRYPTO_WAIT(wait); unsigned int i; struct scatterlist *pending_sgl; @@ -1598,9 +1505,6 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, u8 result[HASH_MAX_DIGESTSIZE + TESTMGR_POISON_LEN]; int err; - if (cfg->multibuffer) - reqi = reqs[cfg->multibuffer_index]; - /* Set the key, if specified */ if (vec->ksize) { err = do_setkey(crypto_ahash_setkey, tfm, vec->key, vec->ksize, @@ -1630,7 +1534,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, /* Do the actual hashing */ - testmgr_poison(reqi->__ctx, crypto_ahash_reqsize(tfm)); + testmgr_poison(req->__ctx, crypto_ahash_reqsize(tfm)); testmgr_poison(result, digestsize + TESTMGR_POISON_LEN); if (cfg->finalization_type == FINALIZATION_TYPE_DIGEST || @@ -1639,7 +1543,6 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); ahash_request_set_crypt(req, tsgl->sgl, result, vec->psize); - setup_ahash_multibuffer(reqs, cfg, tsgl); err = do_ahash_op(crypto_ahash_digest, req, &wait, cfg->nosimd); if (err) { if (err == vec->digest_error) @@ -1661,7 +1564,6 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); ahash_request_set_crypt(req, NULL, result, 0); - setup_ahash_multibuffer(reqs, cfg, tsgl); err = do_ahash_op(crypto_ahash_init, req, &wait, cfg->nosimd); err = check_nonfinal_ahash_op("init", err, result, digestsize, driver, vec_name, cfg); @@ -1678,7 +1580,6 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, crypto_req_done, &wait); ahash_request_set_crypt(req, pending_sgl, result, pending_len); - setup_ahash_multibuffer(reqs, cfg, tsgl); err = do_ahash_op(crypto_ahash_update, req, &wait, divs[i]->nosimd); err = check_nonfinal_ahash_op("update", err, @@ -1693,7 +1594,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, /* Test ->export() and ->import() */ testmgr_poison(hashstate + statesize, TESTMGR_POISON_LEN); - err = crypto_ahash_export(reqi, hashstate); + err = crypto_ahash_export(req, hashstate); err = check_nonfinal_ahash_op("export", err, result, digestsize, driver, vec_name, cfg); @@ -1706,8 +1607,8 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, return -EOVERFLOW; } - testmgr_poison(reqi->__ctx, crypto_ahash_reqsize(tfm)); - err = crypto_ahash_import(reqi, hashstate); + testmgr_poison(req->__ctx, crypto_ahash_reqsize(tfm)); + err = crypto_ahash_import(req, hashstate); err = check_nonfinal_ahash_op("import", err, result, digestsize, driver, vec_name, cfg); @@ -1721,7 +1622,6 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, ahash_request_set_callback(req, req_flags, crypto_req_done, &wait); ahash_request_set_crypt(req, pending_sgl, result, pending_len); - setup_ahash_multibuffer(reqs, cfg, tsgl); if (cfg->finalization_type == FINALIZATION_TYPE_FINAL) { /* finish with update() and final() */ err = do_ahash_op(crypto_ahash_update, req, &wait, cfg->nosimd); @@ -1753,7 +1653,7 @@ static int test_ahash_vec_cfg(const struct hash_testvec *vec, static int test_hash_vec_cfg(const struct hash_testvec *vec, const char *vec_name, const struct testvec_config *cfg, - struct ahash_request *reqs[MAX_MB_MSGS], + struct ahash_request *req, struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) @@ -1773,12 +1673,11 @@ static int test_hash_vec_cfg(const struct hash_testvec *vec, return err; } - return test_ahash_vec_cfg(vec, vec_name, cfg, reqs, tsgl, hashstate); + return test_ahash_vec_cfg(vec, vec_name, cfg, req, tsgl, hashstate); } static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, - struct ahash_request *reqs[MAX_MB_MSGS], - struct shash_desc *desc, + struct ahash_request *req, struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) { char vec_name[16]; @@ -1790,7 +1689,7 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, for (i = 0; i < ARRAY_SIZE(default_hash_testvec_configs); i++) { err = test_hash_vec_cfg(vec, vec_name, &default_hash_testvec_configs[i], - reqs, desc, tsgl, hashstate); + req, desc, tsgl, hashstate); if (err) return err; } @@ -1807,7 +1706,7 @@ static int test_hash_vec(const struct hash_testvec *vec, unsigned int vec_num, generate_random_testvec_config(&rng, &cfg, cfgname, sizeof(cfgname)); err = test_hash_vec_cfg(vec, vec_name, &cfg, - reqs, desc, tsgl, hashstate); + req, desc, tsgl, hashstate); if (err) return err; cond_resched(); @@ -1866,12 +1765,11 @@ static void generate_random_hash_testvec(struct rnd_state *rng, */ static int test_hash_vs_generic_impl(const char *generic_driver, unsigned int maxkeysize, - struct ahash_request *reqs[MAX_MB_MSGS], + struct ahash_request *req, struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) { - struct ahash_request *req = reqs[0]; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); const unsigned int digestsize = crypto_ahash_digestsize(tfm); const unsigned int blocksize = crypto_ahash_blocksize(tfm); @@ -1969,7 +1867,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver, sizeof(cfgname)); err = test_hash_vec_cfg(&vec, vec_name, cfg, - reqs, desc, tsgl, hashstate); + req, desc, tsgl, hashstate); if (err) goto out; cond_resched(); @@ -1987,7 +1885,7 @@ static int test_hash_vs_generic_impl(const char *generic_driver, #else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */ static int test_hash_vs_generic_impl(const char *generic_driver, unsigned int maxkeysize, - struct ahash_request *reqs[MAX_MB_MSGS], + struct ahash_request *req, struct shash_desc *desc, struct test_sglist *tsgl, u8 *hashstate) @@ -2034,8 +1932,8 @@ static int __alg_test_hash(const struct hash_testvec *vecs, u32 type, u32 mask, const char *generic_driver, unsigned int maxkeysize) { - struct ahash_request *reqs[MAX_MB_MSGS] = {}; struct crypto_ahash *atfm = NULL; + struct ahash_request *req = NULL; struct crypto_shash *stfm = NULL; struct shash_desc *desc = NULL; struct test_sglist *tsgl = NULL; @@ -2059,14 +1957,12 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } driver = crypto_ahash_driver_name(atfm); - for (i = 0; i < MAX_MB_MSGS; i++) { - reqs[i] = ahash_request_alloc(atfm, GFP_KERNEL); - if (!reqs[i]) { - pr_err("alg: hash: failed to allocate request for %s\n", - driver); - err = -ENOMEM; - goto out; - } + req = ahash_request_alloc(atfm, GFP_KERNEL); + if (!req) { + pr_err("alg: hash: failed to allocate request for %s\n", + driver); + err = -ENOMEM; + goto out; } /* @@ -2102,12 +1998,12 @@ static int __alg_test_hash(const struct hash_testvec *vecs, if (fips_enabled && vecs[i].fips_skip) continue; - err = test_hash_vec(&vecs[i], i, reqs, desc, tsgl, hashstate); + err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate); if (err) goto out; cond_resched(); } - err = test_hash_vs_generic_impl(generic_driver, maxkeysize, reqs, + err = test_hash_vs_generic_impl(generic_driver, maxkeysize, req, desc, tsgl, hashstate); out: kfree(hashstate); @@ -2117,12 +2013,7 @@ static int __alg_test_hash(const struct hash_testvec *vecs, } kfree(desc); crypto_free_shash(stfm); - if (reqs[0]) { - ahash_request_set_callback(reqs[0], 0, NULL, NULL); - for (i = 1; i < MAX_MB_MSGS && reqs[i]; i++) - ahash_request_chain(reqs[i], reqs[0]); - ahash_request_free(reqs[0]); - } + ahash_request_free(req); crypto_free_ahash(atfm); return err; } From 13c23cb4ed09466d73f1beae8956810b95add6ef Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Sun, 30 Mar 2025 16:30:20 +0800 Subject: [PATCH 1837/2157] rust: Fix enabling Rust and building with GCC for LoongArch This patch fixes a build issue on LoongArch when Rust is enabled and compiled with GCC by explicitly setting the bindgen target and skipping C flags that Clang doesn't support. Cc: stable@vger.kernel.org Acked-by: Miguel Ojeda Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- rust/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index ea3849eb78f6..2c57c624fe7d 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -232,7 +232,8 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -mfunction-return=thunk-extern -mrecord-mcount -mabi=lp64 \ -mindirect-branch-cs-prefix -mstack-protector-guard% -mtraceback=no \ -mno-pointers-to-nested-functions -mno-string \ - -mno-strict-align -mstrict-align \ + -mno-strict-align -mstrict-align -mdirect-extern-access \ + -mexplicit-relocs -mno-check-zero-division \ -fconserve-stack -falign-jumps=% -falign-loops=% \ -femit-struct-debug-baseonly -fno-ipa-cp-clone -fno-ipa-sra \ -fno-partial-inlining -fplugin-arg-arm_ssp_per_task_plugin-% \ @@ -246,6 +247,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu BINDGEN_TARGET_arm64 := aarch64-linux-gnu +BINDGEN_TARGET_loongarch := loongarch64-linux-gnusf BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) # All warnings are inhibited since GCC builds are very experimental, From 08dac3b83aac99ad0e07139d350079f63bb24095 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 30 Mar 2025 16:30:20 +0800 Subject: [PATCH 1838/2157] LoongArch: Always select HAVE_VIRT_CPU_ACCOUNTING_GEN Option HAVE_VIRT_CPU_ACCOUNTING_GEN is selected by default for 64bit system in kconfig file arch/Kconfig. There is another selection in file arch/loongarch/Kconfig if SMP is not selected. Indeed this option is SMP-safe so it brings out some misunderstanding for non-SMP case. Here always select option HAVE_VIRT_CPU_ACCOUNTING_GEN for future possible 32bit system (it is also 32bit-safe because we no longer use cputime_t). Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 15aaa2e6757e..c6d8a30a1215 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -175,7 +175,7 @@ config LOONGARCH select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ - select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP + select HAVE_VIRT_CPU_ACCOUNTING_GEN select IRQ_FORCED_THREADING select IRQ_LOONGARCH_CPU select LOCK_MM_AND_FIND_VMA From 892a79634196d2729b81bb8e5b029d095704df63 Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sun, 30 Mar 2025 16:31:08 +0800 Subject: [PATCH 1839/2157] LoongArch: Enable UBSAN (Undefined Behavior Sanitizer) Select ARCH_HAS_UBSAN in order to allow the user to enable CONFIG_UBSAN and instrument the entire kernel for ubsan checks. Co-developed-by: Wentao Guan Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index c6d8a30a1215..5e009126eaa1 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -30,6 +30,7 @@ config LOONGARCH select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION From be216cbc1ddf99a51915414ce147311c0dfd50a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1840/2157] LoongArch: Fix help text of CMDLINE_EXTEND in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is the built-in command line appended to the bootloader command line, not the bootloader command line appended to the built-in command line. Fixes: fa96b57c1490 ("LoongArch: Add build infrastructure") Signed-off-by: 谢致邦 (XIE Zhibang) Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 5e009126eaa1..02fc28020ddd 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -386,8 +386,8 @@ config CMDLINE_BOOTLOADER config CMDLINE_EXTEND bool "Use built-in to extend bootloader kernel arguments" help - The command-line arguments provided during boot will be - appended to the built-in command line. This is useful in + The built-in command line will be appended to the command- + line arguments provided during boot. This is useful in cases where the provided arguments are insufficient and you don't want to or cannot modify them. From ec105cadff5d8c0a029a3dc1084cae46cf3f799d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1841/2157] LoongArch: Increase MAX_IO_PICS up to 8 Begin with Loongson-3C6000, the number of PCI host can be as many as 8 for multi-chip machines, and this number should be the same for I/O interrupt controllers. To support these machines we also increase the MAX_IO_PICS up to 8. Cc: stable@vger.kernel.org Tested-by: Mingcong Bai Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index a0ca84da8541..12bd15578c33 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -53,7 +53,7 @@ void spurious_interrupt(void); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); -#define MAX_IO_PICS 2 +#define MAX_IO_PICS 8 #define NR_IRQS (64 + NR_VECTORS * (NR_CPUS + MAX_IO_PICS)) struct acpi_vector_group { From 4103cfe9dcb88010ae4911d3ff417457d1b6a720 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1842/2157] LoongArch: Increase ARCH_DMA_MINALIGN up to 16 ARCH_DMA_MINALIGN is 1 by default, but some LoongArch-specific devices (such as APBDMA) require 16 bytes alignment. When the data buffer length is too small, the hardware may make an error writing cacheline. Thus, it is dangerous to allocate a small memory buffer for DMA. It's always safe to define ARCH_DMA_MINALIGN as L1_CACHE_BYTES but unnecessary (kmalloc() need small memory objects). Therefore, just increase it to 16. Cc: stable@vger.kernel.org Tested-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h index 1b6d09617199..aa622c754414 100644 --- a/arch/loongarch/include/asm/cache.h +++ b/arch/loongarch/include/asm/cache.h @@ -8,6 +8,8 @@ #define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define ARCH_DMA_MINALIGN (16) + #define __read_mostly __section(".data..read_mostly") #endif /* _ASM_CACHE_H */ From 2e3bc71e4f394ecf8f499d21923cf556b4bfa1e7 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1843/2157] LoongArch: Fix device node refcount leak in fdt_cpu_clk_init() Add missing of_node_put() to properly handle the reference count of the device node obtained from of_get_cpu_node(). Fixes: 44a01f1f726a ("LoongArch: Parsing CPU-related information from DTS") Signed-off-by: Miaoqian Lin Signed-off-by: Huacai Chen --- arch/loongarch/kernel/env.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 2f1f5b08638f..27144de5c5fe 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -68,6 +68,8 @@ static int __init fdt_cpu_clk_init(void) return -ENODEV; clk = of_clk_get(np, 0); + of_node_put(np); + if (IS_ERR(clk)) return -ENODEV; From 29c92a41c6d2879c1f62220fe4758dce191bb38f Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1844/2157] LoongArch: Rework the arch_kgdb_breakpoint() implementation The arch_kgdb_breakpoint() function defines the kgdb_breakinst symbol using inline assembly. 1. There's a potential issue where the compiler might inline arch_kgdb_breakpoint(), which would then define the kgdb_breakinst symbol multiple times, leading to a linker error. To prevent this, declare arch_kgdb_breakpoint() as noinline. Fix follow error with LLVM-19 *only* when LTO_CLANG_FULL: LD vmlinux.o ld.lld-19: error: ld-temp.o :3:1: symbol 'kgdb_breakinst' is already defined kgdb_breakinst: break 2 ^ 2. Remove "nop" in the inline assembly because it's meaningless for LoongArch here. 3. Add "STACK_FRAME_NON_STANDARD" for arch_kgdb_breakpoint() to avoid the objtool warning. Fixes: e14dd076964e ("LoongArch: Add basic KGDB & KDB support") Tested-by: Binbin Zhou Co-developed-by: Winston Wen Signed-off-by: Winston Wen Co-developed-by: Wentao Guan Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kgdb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c index 445c452d72a7..7be5b4c0c900 100644 --- a/arch/loongarch/kernel/kgdb.c +++ b/arch/loongarch/kernel/kgdb.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -224,13 +225,13 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->csr_era = pc; } -void arch_kgdb_breakpoint(void) +noinline void arch_kgdb_breakpoint(void) { __asm__ __volatile__ ( \ ".globl kgdb_breakinst\n\t" \ - "nop\n" \ "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */ } +STACK_FRAME_NON_STANDARD(arch_kgdb_breakpoint); /* * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, From 7e2586991e36663c9bc48c828b83eab180ad30a9 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1845/2157] LoongArch: BPF: Fix off-by-one error in build_prologue() Vincent reported that running BPF progs with tailcalls on LoongArch causes kernel hard lockup. Debugging the issues shows that the JITed image missing a jirl instruction at the end of the epilogue. There are two passes in JIT compiling, the first pass set the flags and the second pass generates JIT code based on those flags. With BPF progs mixing bpf2bpf and tailcalls, build_prologue() generates N insns in the first pass and then generates N+1 insns in the second pass. This makes epilogue_offset off by one and we will jump to some unexpected insn and cause lockup. Fix this by inserting a nop insn. Cc: stable@vger.kernel.org Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls") Reported-by: Vincent Li Tested-by: Vincent Li Closes: https://lore.kernel.org/loongarch/CAK3+h2w6WESdBN3UCr3WKHByD7D6Q_Ve1EDAjotVrnx6Or_c8g@mail.gmail.com/ Closes: https://lore.kernel.org/bpf/CAK3+h2woEjG_N=-XzqEGaAeCmgu2eTCUc7p6bP4u8Q+DFHm-7g@mail.gmail.com/ Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 ++ arch/loongarch/net/bpf_jit.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index ea357a3edc09..2346c0b55043 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -142,6 +142,8 @@ static void build_prologue(struct jit_ctx *ctx) */ if (seen_tail_call(ctx) && seen_call(ctx)) move_reg(ctx, TCC_SAVED, REG_TCC); + else + emit_insn(ctx, nop); ctx->stack_size = stack_adjust; } diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 68586338ecf8..f9c569f53949 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -27,6 +27,11 @@ struct jit_data { struct jit_ctx ctx; }; +static inline void emit_nop(union loongarch_instruction *insn) +{ + insn->word = INSN_NOP; +} + #define emit_insn(ctx, func, ...) \ do { \ if (ctx->image != NULL) { \ From 52266f1015a8b5aabec7d127f83d105f702b388e Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1846/2157] LoongArch: BPF: Use move_addr() for BPF_PSEUDO_FUNC Vincent reported that running XDP synproxy program on LoongArch results in the following error: JIT doesn't support bpf-to-bpf calls With dmesg: multi-func JIT bug 1391 != 1390 The root cause is that verifier will refill the imm with the correct addresses of bpf_calls for BPF_PSEUDO_FUNC instructions and then run the last pass of JIT. So we generate different JIT code for the same instruction in two passes (one for placeholder and the other for the real address). Let's use move_addr() instead. See commit 64f50f6575721ef0 ("LoongArch, bpf: Use 4 instructions for function address in JIT") for a similar fix. Cc: stable@vger.kernel.org Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls") Reported-by: Vincent Li Tested-by: Vincent Li Closes: https://lore.kernel.org/loongarch/CAK3+h2yfM9FTNiXvEQBkvtuoJrvzmN4c_NZsFXqEk4Cj1tsBNA@mail.gmail.com/T/#u Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 2346c0b55043..a06bf89fed67 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -932,7 +932,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext { const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; - move_imm(ctx, dst, imm64, is32); + if (bpf_pseudo_func(insn)) + move_addr(ctx, dst, imm64); + else + move_imm(ctx, dst, imm64, is32); return 1; } From 60f3caff1492e5b8616b9578c4bedb5c0a88ed14 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1847/2157] LoongArch: BPF: Don't override subprog's return value The verifier test `calls: div by 0 in subprog` triggers a panic at the ld.bu instruction. The ld.bu insn is trying to load byte from memory address returned by the subprog. The subprog actually set the correct address at the a5 register (dedicated register for BPF return values). But at commit 73c359d1d356 ("LoongArch: BPF: Sign-extend return values") we also sign extended a5 to the a0 register (return value in LoongArch). For function call insn, we later propagate the a0 register back to a5 register. This is right for native calls but wrong for bpf2bpf calls which expect zero-extended return value in a5 register. So only move a0 to a5 for native calls (i.e. non-BPF_PSEUDO_CALL). Cc: stable@vger.kernel.org Fixes: 73c359d1d356 ("LoongArch: BPF: Sign-extend return values") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index a06bf89fed67..fa1500d4aa3e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -907,7 +907,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext move_addr(ctx, t1, func_addr); emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); - move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + + if (insn->src_reg != BPF_PSEUDO_CALL) + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + break; /* tail call */ From c271c86a4c72c771b313fd9c3b06db61ab8ab8bf Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 30 Mar 2025 16:31:09 +0800 Subject: [PATCH 1848/2157] LoongArch: vDSO: Remove --hash-style=sysv Glibc added support for .gnu.hash in 2006 and .hash has been obsoleted far before the first LoongArch CPU was taped. Using --hash-style=sysv might imply unaddressed issues and confuse readers. Some architectures use an explicit --hash-style=both for vDSO here, but DT_GNU_HASH has already been supported by Glibc and Musl and become the de-facto standard of the distros when the first LoongArch CPU was taped. So DT_HASH seems just wasting storage space for LoongArch. Just drop the option and rely on the linker default, which is likely "gnu" (Arch, Debian, Gentoo, LFS) on all LoongArch distros (confirmed on Arch, Debian, Gentoo, and LFS; AOSC now defaults to "both" but it seems just an oversight). Following the logic of commit 48f6430505c0b049 ("arm64/vdso: Remove --hash-style=sysv"). Link: https://github.com/AOSC-Dev/aosc-os-abbs/pull/9796 Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/vdso/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index fdde1bcd4e26..cf6c41054396 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -36,8 +36,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ - --hash-style=sysv --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared --build-id -T # # Shared build commands. From a34ea549aacefeb01678320cff18a59ec095382d Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 30 Mar 2025 16:31:12 +0800 Subject: [PATCH 1849/2157] LoongArch: vDSO: Make use of the t8 register for vgetrandom-chacha Make use of the t8 register for vgetrandom-chacha, so we don't need to reuse a register and rematerialize a constant. I Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/vdso/vgetrandom-chacha.S | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S index c2733e6c3a8d..c4dd2bab8825 100644 --- a/arch/loongarch/vdso/vgetrandom-chacha.S +++ b/arch/loongarch/vdso/vgetrandom-chacha.S @@ -58,9 +58,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) #define copy0 t5 #define copy1 t6 #define copy2 t7 - -/* Reuse i as copy3 */ -#define copy3 i +#define copy3 t8 /* Packs to be used with OP_4REG */ #define line0 state0, state1, state2, state3 @@ -99,6 +97,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w copy0, 0x61707865 li.w copy1, 0x3320646e li.w copy2, 0x79622d32 + li.w copy3, 0x6b206574 ld.w cnt_lo, counter, 0 ld.w cnt_hi, counter, 4 @@ -108,7 +107,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) move state0, copy0 move state1, copy1 move state2, copy2 - li.w state3, 0x6b206574 + move state3, copy3 /* state[4,5,..,11] = key */ ld.w state4, key, 0 @@ -167,12 +166,6 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) addi.w i, i, -1 bnez i, .Lpermute - /* - * copy[3] = "expa", materialize it here because copy[3] shares the - * same register with i which just became dead. - */ - li.w copy3, 0x6b206574 - /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ OP_4REG add.w line0, copy st.w state0, output, 0 From 17ba839c3c6c95562f329340e67da432309dd0d4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 30 Mar 2025 16:31:16 +0800 Subject: [PATCH 1850/2157] LoongArch: Update Loongson-3 default config file 1. Drop RT_GROUP_SCHED; 2. Enable CGROUP_DMEM. 3. Enable FIRMWARE_EDID/DRM_LOAD_EDID_FIRMWARE. 4. Enable EDAC and EDAC_LOONGSON driver. 5. Enable some Realtek WiFi driver. Signed-off-by: Celeste Liu Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 73c77500ac46..7348b92a7be9 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -24,9 +24,9 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_DMEM=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y @@ -666,6 +666,10 @@ CONFIG_RTW88_8723DE=m CONFIG_RTW88_8723DU=m CONFIG_RTW88_8821CE=m CONFIG_RTW88_8821CU=m +CONFIG_RTW88_8821AU=m +CONFIG_RTW88_8812AU=m +CONFIG_RTW88_8814AE=m +CONFIG_RTW88_8814AU=m CONFIG_RTW89=m CONFIG_RTW89_8851BE=m CONFIG_RTW89_8852AE=m @@ -749,6 +753,7 @@ CONFIG_MEDIA_PCI_SUPPORT=y CONFIG_VIDEO_BT848=m CONFIG_DVB_BT8XX=m CONFIG_DRM=y +CONFIG_DRM_LOAD_EDID_FIRMWARE=y CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y CONFIG_DRM_AMDGPU=m @@ -762,6 +767,7 @@ CONFIG_DRM_LOONGSON=y CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y +CONFIG_FIRMWARE_EDID=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m # CONFIG_VGA_CONSOLE is not set @@ -844,6 +850,9 @@ CONFIG_TYPEC_TCPCI=m CONFIG_TYPEC_UCSI=m CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m +CONFIG_EDAC=y +# CONFIG_EDAC_LEGACY_SYSFS is not set +CONFIG_EDAC_LOONGSON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y CONFIG_RTC_DRV_LOONGSON=y From 85a063b8b281e144ed96463936fb4e6b3d4fe9e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 22:08:07 +0100 Subject: [PATCH 1851/2157] drm/i2c: tda998x: select CONFIG_DRM_KMS_HELPER This fails to build without the KMS helper functions: x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o: in function `tda998x_detect_work': tda998x_drv.c:(.text+0x4e6): undefined reference to `drm_kms_helper_hotplug_event' x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o: in function `tda998x_bind': tda998x_drv.c:(.text.unlikely+0x33): undefined reference to `drm_simple_encoder_init' x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o:(.rodata+0x584): undefined reference to `drm_atomic_helper_connector_reset' x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o:(.rodata+0x590): undefined reference to `drm_helper_probe_single_connector_modes' x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o:(.rodata+0x5a4): undefined reference to `drm_atomic_helper_connector_duplicate_state' x86_64-linux-ld: drivers/gpu/drm/bridge/tda998x_drv.o:(.rodata+0x5a8): undefined reference to `drm_atomic_helper_connector_destroy_state' Select the missing symbol and fix up the broken whitespace. Fixes: 325ba852d148 ("drm/i2c: move TDA998x driver under drivers/gpu/drm/bridge") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250324210824.3094660-1-arnd@kernel.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/Kconfig | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index d20f1646dac2..09a1be234f71 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -91,12 +91,13 @@ config DRM_FSL_LDB Support for i.MX8MP DPI-to-LVDS on-SoC encoder. config DRM_I2C_NXP_TDA998X - tristate "NXP Semiconductors TDA998X HDMI encoder" - default m if DRM_TILCDC - select CEC_CORE if CEC_NOTIFIER - select SND_SOC_HDMI_CODEC if SND_SOC - help - Support for NXP Semiconductors TDA998X HDMI encoders. + tristate "NXP Semiconductors TDA998X HDMI encoder" + default m if DRM_TILCDC + select CEC_CORE if CEC_NOTIFIER + select DRM_KMS_HELPER + select SND_SOC_HDMI_CODEC if SND_SOC + help + Support for NXP Semiconductors TDA998X HDMI encoders. config DRM_ITE_IT6263 tristate "ITE IT6263 LVDS/HDMI bridge" From fcc0f16923621e670d5ccf486160e4a1b960b17f Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 21 Mar 2025 10:30:32 +0800 Subject: [PATCH 1852/2157] ASoC: SOF: Intel: Let SND_SOF_SOF_HDA_SDW_BPT select SND_HDA_EXT_CORE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_SND_HDA_EXT_CORE is required for CONFIG_SND_SOF_SOF_HDA_SDW_BPT. Fixes: 5d5cb86fb46e ("ASoC: SOF: Intel: hda-sdw-bpt: add helpers for SoundWire BPT DMA") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503170249.iPSBJSf5-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202503162042.2cNgaBmC-lkp@intel.com/ Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Mark Brown Acked-by: Bard Liao Link: https://lore.kernel.org/r/20250321023032.7420-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- sound/soc/sof/intel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index fae3598fd601..dc1d21de4ab7 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -345,6 +345,7 @@ endif ## SND_SOC_SOF_HDA_GENERIC config SND_SOF_SOF_HDA_SDW_BPT tristate + select SND_HDA_EXT_CORE help This option is not user-selectable but automagically handled by 'select' statements at a higher level. From e5182305a5199246dbcb4053299dcb1c8867b6ff Mon Sep 17 00:00:00 2001 From: Sharan Kumar M Date: Sat, 29 Mar 2025 21:11:06 +0530 Subject: [PATCH 1853/2157] ALSA: hda/realtek: Enable Mute LED on HP OMEN 16 Laptop xd000xx This patch adds the HP OMEN 16 Laptop xd000xx to enable mute led. it uses ALC245_FIXUP_HP_MUTE_LED_COEFBIT with a slight modification setting mute_led_coef.off to 0(it was set to 4 i guess in that function) which i referred to your previous patch disscusion https://bugzilla.kernel.org/show_bug.cgi?id=214735 . i am not sure whether i can modify the current working function so i added another version calling ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT. and both works for me. Tested on 6.13.4-arch1-1 to 6.14.0-arch1-1 Signed-off-by: Sharan Kumar M Cc: Link: https://patch.msgid.link/20250329154105.7618-2-sharweshraajan@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bbe22530e241..b69b659ae659 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4743,6 +4743,22 @@ static void alc245_fixup_hp_mute_led_coefbit(struct hda_codec *codec, } } +static void alc245_fixup_hp_mute_led_v1_coefbit(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef.idx = 0x0b; + spec->mute_led_coef.mask = 1 << 3; + spec->mute_led_coef.on = 1 << 3; + spec->mute_led_coef.off = 0; + snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); + } +} + /* turn on/off mic-mute LED per capture hook by coef bit */ static int coef_micmute_led_set(struct led_classdev *led_cdev, enum led_brightness brightness) @@ -7911,6 +7927,7 @@ enum { ALC245_FIXUP_TAS2781_SPI_2, ALC287_FIXUP_YOGA7_14ARB7_I2C, ALC245_FIXUP_HP_MUTE_LED_COEFBIT, + ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT, ALC245_FIXUP_HP_X360_MUTE_LEDS, ALC287_FIXUP_THINKPAD_I2S_SPK, ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, @@ -10164,6 +10181,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_mute_led_coefbit, }, + [ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_mute_led_v1_coefbit, + }, [ALC245_FIXUP_HP_X360_MUTE_LEDS] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_mute_led_coefbit, @@ -10658,6 +10679,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8bb3, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bb4, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bdf, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), From 986da36806b1a45b8502f0cc23cfcc685c8f2a1c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Mar 2025 10:07:58 +0300 Subject: [PATCH 1854/2157] ALSA/hda: intel-sdw-acpi: Remove (explicitly) unused header The fwnode.h is not supposed to be used by the drivers as it has the definitions for the core parts for different device property provider implementations. Drop it. Note, that fwnode API for drivers is provided in property.h which is included here. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250331070758.3986134-1-andriy.shevchenko@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 49d3e0e30073..8686adaf4531 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include #include From 465e5486aa5e1cdedc910bc3487ca92c5e6d51c4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 21:30:06 +0200 Subject: [PATCH 1855/2157] i3c: master: Drop duplicate check before calling OF APIs OF APIs are usually NULL-aware and returns an error in case when device node is not present or supported. We already have a check for the returned value, no need to check for the parameter. Signed-off-by: Andy Shevchenko Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250321193044.457649-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index d5dc4180afbc..fd26c4bb8b34 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2276,7 +2276,7 @@ static int of_i3c_master_add_dev(struct i3c_master_controller *master, u32 reg[3]; int ret; - if (!master || !node) + if (!master) return -EINVAL; ret = of_property_read_u32_array(node, "reg", reg, ARRAY_SIZE(reg)); @@ -2369,14 +2369,10 @@ static u8 i3c_master_i2c_get_lvr(struct i2c_client *client) { /* Fall back to no spike filters and FM bus mode. */ u8 lvr = I3C_LVR_I2C_INDEX(2) | I3C_LVR_I2C_FM_MODE; + u32 reg[3]; - if (client->dev.of_node) { - u32 reg[3]; - - if (!of_property_read_u32_array(client->dev.of_node, "reg", - reg, ARRAY_SIZE(reg))) - lvr = reg[2]; - } + if (!of_property_read_u32_array(client->dev.of_node, "reg", reg, ARRAY_SIZE(reg))) + lvr = reg[2]; return lvr; } @@ -2486,7 +2482,7 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) struct i2c_adapter *adap = i3c_master_to_i2c_adapter(master); struct i2c_dev_desc *i2cdev; struct i2c_dev_boardinfo *i2cboardinfo; - int ret, id = -ENODEV; + int ret, id; adap->dev.parent = master->dev.parent; adap->owner = master->dev.parent->driver->owner; @@ -2497,9 +2493,7 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) adap->timeout = 1000; adap->retries = 3; - if (master->dev.of_node) - id = of_alias_get_id(master->dev.of_node, "i2c"); - + id = of_alias_get_id(master->dev.of_node, "i2c"); if (id >= 0) { adap->nr = id; ret = i2c_add_numbered_adapter(adap); From bd496a44f041da9ef3afe14d1d6193d460424e91 Mon Sep 17 00:00:00 2001 From: Manjunatha Venkatesh Date: Wed, 26 Mar 2025 18:00:46 +0530 Subject: [PATCH 1856/2157] i3c: Add NULL pointer check in i3c_master_queue_ibi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The I3C master driver may receive an IBI from a target device that has not been probed yet. In such cases, the master calls `i3c_master_queue_ibi()` to queue an IBI work task, leading to "Unable to handle kernel read from unreadable memory" and resulting in a kernel panic. Typical IBI handling flow: 1. The I3C master scans target devices and probes their respective drivers. 2. The target device driver calls `i3c_device_request_ibi()` to enable IBI and assigns `dev->ibi = ibi`. 3. The I3C master receives an IBI from the target device and calls `i3c_master_queue_ibi()` to queue the target device driver’s IBI handler task. However, since target device events are asynchronous to the I3C probe sequence, step 3 may occur before step 2, causing `dev->ibi` to be `NULL`, leading to a kernel panic. Add a NULL pointer check in `i3c_master_queue_ibi()` to prevent accessing an uninitialized `dev->ibi`, ensuring stability. Fixes: 3a379bbcea0af ("i3c: Add core I3C infrastructure") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/Z9gjGYudiYyl3bSe@lizhi-Precision-Tower-5810/ Signed-off-by: Manjunatha Venkatesh Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250326123047.2797946-1-manjunatha.venkatesh@nxp.com Signed-off-by: Alexandre Belloni --- drivers/i3c/master.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index fd26c4bb8b34..fd81871609d9 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2555,6 +2555,9 @@ static void i3c_master_unregister_i3c_devs(struct i3c_master_controller *master) */ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot) { + if (!dev->ibi || !slot) + return; + atomic_inc(&dev->ibi->pending_ibis); queue_work(dev->ibi->wq, &slot->work); } From 121df45b37a1016ee6828c2ca3ba825f3e18a8c1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 20 Mar 2025 13:25:38 +0100 Subject: [PATCH 1857/2157] s390/entry: Fix setting _CIF_MCCK_GUEST with lowcore relocation When lowcore relocation is enabled, the machine check handler doesn't use the lowcore address when setting _CIF_MCCK_GUEST. Fix this by adding the missing base register. Fixes: 0001b7bbc53a ("s390/entry: Make mchk_int_handler() ready for lowcore relocation") Reported-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index b0c2356697fd..dd291c9ad6a6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -468,7 +468,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 From 1f266fd704ef3be8a4b2a066edf25b75fd90a9c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 20 Mar 2025 14:49:09 +0100 Subject: [PATCH 1858/2157] s390/lowcore: Remove unused machine_flags The machine_flags member in struct lowcore is not used anymore. Remove it. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 4 +--- arch/s390/kernel/setup.c | 1 - arch/s390/kernel/smp.c | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 1a31f1f93ed3..e99e9c87b1ce 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -164,9 +164,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ - __u64 machine_flags; /* 0x03c8 */ - __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ __u32 return_lpswe; /* 0x0400 */ __u32 return_mcck_lpswe; /* 0x0404 */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b6686d63b754..f244c5560e7f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -414,7 +414,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f2f05c5277f4..f9908a41bfe8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -265,7 +265,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); From 8691abd3afaadd816a298503ec1a759df1305d2e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 20 Mar 2025 17:26:12 +0100 Subject: [PATCH 1859/2157] s390/pci: Fix zpci_bus_is_isolated_vf() for non-VFs For non-VFs, zpci_bus_is_isolated_vf() should return false because they aren't VFs. While zpci_iov_find_parent_pf() specifically checks if a function is a VF, it then simply returns that there is no parent. The simplistic check for a parent then leads to these functions being confused with isolated VFs and isolating them on their own domain even if sibling PFs should share the domain. Fix this by explicitly checking if a function is not a VF. Note also that at this point the case where RIDs are ignored is already handled and in this case all PCI functions get isolated by being detected in zpci_bus_is_multifunction_root(). Cc: stable@vger.kernel.org Fixes: 2844ddbd540f ("s390/pci: Fix handling of isolated VFs") Signed-off-by: Niklas Schnelle Reviewed-by: Halil Pasic Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 0e725039861f..4d45edb026f2 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -360,6 +360,9 @@ static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev { struct pci_dev *pdev; + if (!zdev->vfn) + return false; + pdev = zpci_iov_find_parent_pf(zbus, zdev); if (!pdev) return true; From d104937874216421f29dd54e6df93cbb994bc100 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 21 Mar 2025 13:22:12 +0100 Subject: [PATCH 1860/2157] s390/kvm: Split kvm_host header file In order to generate asm offsets into kvm_s390_sie_block linux/kvm_host.h is included in asm-offsets.c. This causes quite often header dependency problems, since linux/kvm_host.h pulls in a lot of other header files. Solve this problem and split out the hardware structure declarations into a separate header file. Include only the new header file into asm-offsets.c instead of linux/kvm_host.h. This is sufficient to generate the two asm offsets required for kvm (__SIE_PROG0C and __SIE_PROG20). Acked-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kvm_host.h | 339 +----------------------- arch/s390/include/asm/kvm_host_types.h | 348 +++++++++++++++++++++++++ arch/s390/kernel/asm-offsets.c | 2 +- 3 files changed, 350 insertions(+), 339 deletions(-) create mode 100644 arch/s390/include/asm/kvm_host_types.h diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 424f899d8163..cb89e54ada25 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,14 +20,13 @@ #include #include #include +#include #include #include #include #include #include -#define KVM_S390_BSCA_CPU_SLOTS 64 -#define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 #define KVM_INTERNAL_MEM_SLOTS 1 @@ -51,342 +50,6 @@ #define KVM_REQ_REFRESH_GUEST_PREFIX \ KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define SIGP_CTRL_C 0x80 -#define SIGP_CTRL_SCN_MASK 0x3f - -union bsca_sigp_ctrl { - __u8 value; - struct { - __u8 c : 1; - __u8 r : 1; - __u8 scn : 6; - }; -}; - -union esca_sigp_ctrl { - __u16 value; - struct { - __u8 c : 1; - __u8 reserved: 7; - __u8 scn; - }; -}; - -struct esca_entry { - union esca_sigp_ctrl sigp_ctrl; - __u16 reserved1[3]; - __u64 sda; - __u64 reserved2[6]; -}; - -struct bsca_entry { - __u8 reserved0; - union bsca_sigp_ctrl sigp_ctrl; - __u16 reserved[3]; - __u64 sda; - __u64 reserved2[2]; -}; - -union ipte_control { - unsigned long val; - struct { - unsigned long k : 1; - unsigned long kh : 31; - unsigned long kg : 32; - }; -}; - -/* - * Utility is defined as two bytes but having it four bytes wide - * generates more efficient code. Since the following bytes are - * reserved this makes no functional difference. - */ -union sca_utility { - __u32 val; - struct { - __u32 mtcr : 1; - __u32 : 31; - }; -}; - -struct bsca_block { - union ipte_control ipte_control; - __u64 reserved[5]; - __u64 mcn; - union sca_utility utility; - __u8 reserved2[4]; - struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; -}; - -struct esca_block { - union ipte_control ipte_control; - __u64 reserved1[6]; - union sca_utility utility; - __u8 reserved2[4]; - __u64 mcn[4]; - __u64 reserved3[20]; - struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; -}; - -/* - * This struct is used to store some machine check info from lowcore - * for machine checks that happen while the guest is running. - * This info in host's lowcore might be overwritten by a second machine - * check from host when host is in the machine check's high-level handling. - * The size is 24 bytes. - */ -struct mcck_volatile_info { - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 reserved; -}; - -#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ - CR0_MEASUREMENT_ALERT_SUBMASK) -#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ - CR14_EXTERNAL_DAMAGE_SUBMASK) - -#define SIDAD_SIZE_MASK 0xff -#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) -#define sida_size(sie_block) \ - ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) - -#define CPUSTAT_STOPPED 0x80000000 -#define CPUSTAT_WAIT 0x10000000 -#define CPUSTAT_ECALL_PEND 0x08000000 -#define CPUSTAT_STOP_INT 0x04000000 -#define CPUSTAT_IO_INT 0x02000000 -#define CPUSTAT_EXT_INT 0x01000000 -#define CPUSTAT_RUNNING 0x00800000 -#define CPUSTAT_RETAINED 0x00400000 -#define CPUSTAT_TIMING_SUB 0x00020000 -#define CPUSTAT_SIE_SUB 0x00010000 -#define CPUSTAT_RRF 0x00008000 -#define CPUSTAT_SLSV 0x00004000 -#define CPUSTAT_SLSR 0x00002000 -#define CPUSTAT_ZARCH 0x00000800 -#define CPUSTAT_MCDS 0x00000100 -#define CPUSTAT_KSS 0x00000200 -#define CPUSTAT_SM 0x00000080 -#define CPUSTAT_IBS 0x00000040 -#define CPUSTAT_GED2 0x00000010 -#define CPUSTAT_G 0x00000008 -#define CPUSTAT_GED 0x00000004 -#define CPUSTAT_J 0x00000002 -#define CPUSTAT_P 0x00000001 - -struct kvm_s390_sie_block { - atomic_t cpuflags; /* 0x0000 */ - __u32 : 1; /* 0x0004 */ - __u32 prefix : 18; - __u32 : 1; - __u32 ibc : 12; - __u8 reserved08[4]; /* 0x0008 */ -#define PROG_IN_SIE (1<<0) - __u32 prog0c; /* 0x000c */ - union { - __u8 reserved10[16]; /* 0x0010 */ - struct { - __u64 pv_handle_cpu; - __u64 pv_handle_config; - }; - }; -#define PROG_BLOCK_SIE (1<<0) -#define PROG_REQUEST (1<<1) - atomic_t prog20; /* 0x0020 */ - __u8 reserved24[4]; /* 0x0024 */ - __u64 cputm; /* 0x0028 */ - __u64 ckc; /* 0x0030 */ - __u64 epoch; /* 0x0038 */ - __u32 svcc; /* 0x0040 */ -#define LCTL_CR0 0x8000 -#define LCTL_CR6 0x0200 -#define LCTL_CR9 0x0040 -#define LCTL_CR10 0x0020 -#define LCTL_CR11 0x0010 -#define LCTL_CR14 0x0002 - __u16 lctl; /* 0x0044 */ - __s16 icpua; /* 0x0046 */ -#define ICTL_OPEREXC 0x80000000 -#define ICTL_PINT 0x20000000 -#define ICTL_LPSW 0x00400000 -#define ICTL_STCTL 0x00040000 -#define ICTL_ISKE 0x00004000 -#define ICTL_SSKE 0x00002000 -#define ICTL_RRBE 0x00001000 -#define ICTL_TPROT 0x00000200 - __u32 ictl; /* 0x0048 */ -#define ECA_CEI 0x80000000 -#define ECA_IB 0x40000000 -#define ECA_SIGPI 0x10000000 -#define ECA_MVPGI 0x01000000 -#define ECA_AIV 0x00200000 -#define ECA_VX 0x00020000 -#define ECA_PROTEXCI 0x00002000 -#define ECA_APIE 0x00000008 -#define ECA_SII 0x00000001 - __u32 eca; /* 0x004c */ -#define ICPT_INST 0x04 -#define ICPT_PROGI 0x08 -#define ICPT_INSTPROGI 0x0C -#define ICPT_EXTREQ 0x10 -#define ICPT_EXTINT 0x14 -#define ICPT_IOREQ 0x18 -#define ICPT_WAIT 0x1c -#define ICPT_VALIDITY 0x20 -#define ICPT_STOP 0x28 -#define ICPT_OPEREXC 0x2C -#define ICPT_PARTEXEC 0x38 -#define ICPT_IOINST 0x40 -#define ICPT_KSS 0x5c -#define ICPT_MCHKREQ 0x60 -#define ICPT_INT_ENABLE 0x64 -#define ICPT_PV_INSTR 0x68 -#define ICPT_PV_NOTIFY 0x6c -#define ICPT_PV_PREF 0x70 - __u8 icptcode; /* 0x0050 */ - __u8 icptstatus; /* 0x0051 */ - __u16 ihcpu; /* 0x0052 */ - __u8 reserved54; /* 0x0054 */ -#define IICTL_CODE_NONE 0x00 -#define IICTL_CODE_MCHK 0x01 -#define IICTL_CODE_EXT 0x02 -#define IICTL_CODE_IO 0x03 -#define IICTL_CODE_RESTART 0x04 -#define IICTL_CODE_SPECIFICATION 0x10 -#define IICTL_CODE_OPERAND 0x11 - __u8 iictl; /* 0x0055 */ - __u16 ipa; /* 0x0056 */ - __u32 ipb; /* 0x0058 */ - __u32 scaoh; /* 0x005c */ -#define FPF_BPBC 0x20 - __u8 fpf; /* 0x0060 */ -#define ECB_GS 0x40 -#define ECB_TE 0x10 -#define ECB_SPECI 0x08 -#define ECB_SRSI 0x04 -#define ECB_HOSTPROTINT 0x02 -#define ECB_PTF 0x01 - __u8 ecb; /* 0x0061 */ -#define ECB2_CMMA 0x80 -#define ECB2_IEP 0x20 -#define ECB2_PFMFI 0x08 -#define ECB2_ESCA 0x04 -#define ECB2_ZPCI_LSI 0x02 - __u8 ecb2; /* 0x0062 */ -#define ECB3_AISI 0x20 -#define ECB3_AISII 0x10 -#define ECB3_DEA 0x08 -#define ECB3_AES 0x04 -#define ECB3_RI 0x01 - __u8 ecb3; /* 0x0063 */ -#define ESCA_SCAOL_MASK ~0x3fU - __u32 scaol; /* 0x0064 */ - __u8 sdf; /* 0x0068 */ - __u8 epdx; /* 0x0069 */ - __u8 cpnc; /* 0x006a */ - __u8 reserved6b; /* 0x006b */ - __u32 todpr; /* 0x006c */ -#define GISA_FORMAT1 0x00000001 - __u32 gd; /* 0x0070 */ - __u8 reserved74[12]; /* 0x0074 */ - __u64 mso; /* 0x0080 */ - __u64 msl; /* 0x0088 */ - psw_t gpsw; /* 0x0090 */ - __u64 gg14; /* 0x00a0 */ - __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[8]; /* 0x00b0 */ -#define HPID_KVM 0x4 -#define HPID_VSIE 0x5 - __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[7]; /* 0x00b9 */ - union { - struct { - __u32 eiparams; /* 0x00c0 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ - }; - __u64 mcic; /* 0x00c0 */ - } __packed; - __u32 reservedc8; /* 0x00c8 */ - union { - struct { - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - }; - __u32 edc; /* 0x00cc */ - } __packed; - union { - struct { - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ - }; - __u64 faddr; /* 0x00d0 */ - } __packed; - __u64 peraddr; /* 0x00d8 */ - __u8 eai; /* 0x00e0 */ - __u8 peraid; /* 0x00e1 */ - __u8 oai; /* 0x00e2 */ - __u8 armid; /* 0x00e3 */ - __u8 reservede4[4]; /* 0x00e4 */ - union { - __u64 tecmc; /* 0x00e8 */ - struct { - __u16 subchannel_id; /* 0x00e8 */ - __u16 subchannel_nr; /* 0x00ea */ - __u32 io_int_parm; /* 0x00ec */ - __u32 io_int_word; /* 0x00f0 */ - }; - } __packed; - __u8 reservedf4[8]; /* 0x00f4 */ -#define CRYCB_FORMAT_MASK 0x00000003 -#define CRYCB_FORMAT0 0x00000000 -#define CRYCB_FORMAT1 0x00000001 -#define CRYCB_FORMAT2 0x00000003 - __u32 crycbd; /* 0x00fc */ - __u64 gcr[16]; /* 0x0100 */ - union { - __u64 gbea; /* 0x0180 */ - __u64 sidad; - }; - __u8 reserved188[8]; /* 0x0188 */ - __u64 sdnxo; /* 0x0190 */ - __u8 reserved198[8]; /* 0x0198 */ - __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[20]; /* 0x01a4 */ - __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[8]; /* 0x01c0 */ -#define ECD_HOSTREGMGMT 0x20000000 -#define ECD_MEF 0x08000000 -#define ECD_ETOKENF 0x02000000 -#define ECD_ECC 0x00200000 -#define ECD_HMAC 0x00004000 - __u32 ecd; /* 0x01c8 */ - __u8 reserved1cc[18]; /* 0x01cc */ - __u64 pp; /* 0x01de */ - __u8 reserved1e6[2]; /* 0x01e6 */ - __u64 itdba; /* 0x01e8 */ - __u64 riccbd; /* 0x01f0 */ - __u64 gvrd; /* 0x01f8 */ -} __packed __aligned(512); - -struct kvm_s390_itdb { - __u8 data[256]; -}; - -struct sie_page { - struct kvm_s390_sie_block sie_block; - struct mcck_volatile_info mcck_info; /* 0x0200 */ - __u8 reserved218[360]; /* 0x0218 */ - __u64 pv_grregs[16]; /* 0x0380 */ - __u8 reserved400[512]; /* 0x0400 */ - struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ -}; - struct kvm_vcpu_stat { struct kvm_vcpu_stat_generic generic; u64 exit_userspace; diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h new file mode 100644 index 000000000000..1394d3fb648f --- /dev/null +++ b/arch/s390/include/asm/kvm_host_types.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_KVM_HOST_TYPES_H +#define _ASM_KVM_HOST_TYPES_H + +#include +#include + +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 + +#define SIGP_CTRL_C 0x80 +#define SIGP_CTRL_SCN_MASK 0x3f + +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +}; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +}; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +}; + +struct bsca_entry { + __u8 reserved0; + union bsca_sigp_ctrl sigp_ctrl; + __u16 reserved[3]; + __u64 sda; + __u64 reserved2[2]; +}; + +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; + +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ +union sca_utility { + __u32 val; + struct { + __u32 mtcr : 1; + __u32 : 31; + }; +}; + +struct bsca_block { + union ipte_control ipte_control; + __u64 reserved[5]; + __u64 mcn; + union sca_utility utility; + __u8 reserved2[4]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; +}; + +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[6]; + union sca_utility utility; + __u8 reserved2[4]; + __u64 mcn[4]; + __u64 reserved3[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +}; + +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + +#define SIDAD_SIZE_MASK 0xff +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) +#define sida_size(sie_block) \ + ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE (1<<0) +#define PROG_REQUEST (1<<1) + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 +#define ECD_HMAC 0x00004000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +struct kvm_s390_itdb { + __u8 data[256]; +}; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[360]; /* 0x0218 */ + __u64 pv_grregs[16]; /* 0x0380 */ + __u8 reserved400[512]; /* 0x0400 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +}; + +#endif /* _ASM_KVM_HOST_TYPES_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 49bb197c8c81..388db1af1a7d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -8,11 +8,11 @@ #define ASM_OFFSETS_C #include -#include #include #include #include #include +#include #include int main(void) From 5eeec5694514527e509028520b0d356eb58a2f50 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 21 Mar 2025 13:22:13 +0100 Subject: [PATCH 1861/2157] s390/asm-offsets: Include ftrace_regs.h instead of ftrace.h Reduce header dependencies by including ftrace_regs.h and ptrace.h, which does not include other header files, instead of ftrace.h which pulls in various other header files. This is sufficient for __FTRACE_REGS_PT_REGS and __FTRACE_REGS_SIZE. Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/asm-offsets.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 388db1af1a7d..6e02e9074bb0 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -11,9 +11,10 @@ #include #include #include -#include +#include #include #include +#include int main(void) { From b9be1bee2f271ed3c68e0bd3ec099951b656447b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 21 Mar 2025 13:22:14 +0100 Subject: [PATCH 1862/2157] s390/asm-offsets: Remove ASM_OFFSETS_C Remove ASM_OFFSETS_C which is used as guard in thread_info.h to decide if asm-offsets can be included or not. There is no reason to include asm-offsets.h in thread_info.h anymore. Remove the define and the not needed include. Explicitly include asm-offsets.h in all header files which require it, and where it used to be included implicitly via thread_info.h. This reduces header dependencies. Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/thread_info.h | 3 --- arch/s390/kernel/asm-offsets.c | 2 -- arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/early.c | 1 + arch/s390/kernel/stacktrace.c | 1 + arch/s390/mm/pfault.c | 1 + 6 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1ad5e82c2f65..91f569cae1ce 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -9,9 +9,6 @@ #define _ASM_THREAD_INFO_H #include -#ifndef ASM_OFFSETS_C -#include -#endif /* * General size of kernel stacks diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6e02e9074bb0..841e05f7fa7e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,8 +5,6 @@ * and format the required data. */ -#define ASM_OFFSETS_C - #include #include #include diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 911b95cd57e5..dd410962ecbe 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b6d3c7a6209d..54cf0923050f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 40edfde25f5b..b153a395f46d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index b01e73f5b9b8..e6175d75e4b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include From 3232f1c8086506c5728f46e5a0d59b860c303b8d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 21 Mar 2025 13:22:15 +0100 Subject: [PATCH 1863/2157] s390/processor: Use bitop functions for cpu flag helper functions Use bitop functions to implement cpu flag helper functions. This way it is guaranteed that bits cannot get lost if modified in different contexts on a cpu. E.g. if process context is interrupted in the middle of a read-modify-write sequence while modifying cpu flags, and within interrupt context cpu flags are also modified, bits can get lost. There is currently no code which is doing this, however upcoming code could potentially run into this problem. Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d09a92db95f7..6c8063cb8fe7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -62,33 +63,27 @@ static __always_inline struct pcpu *this_pcpu(void) static __always_inline void set_cpu_flag(int flag) { - this_pcpu()->flags |= (1UL << flag); + set_bit(flag, &this_pcpu()->flags); } static __always_inline void clear_cpu_flag(int flag) { - this_pcpu()->flags &= ~(1UL << flag); + clear_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_cpu_flag(int flag) { - return this_pcpu()->flags & (1UL << flag); + return test_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_set_cpu_flag(int flag) { - if (test_cpu_flag(flag)) - return true; - set_cpu_flag(flag); - return false; + return test_and_set_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_clear_cpu_flag(int flag) { - if (!test_cpu_flag(flag)) - return false; - clear_cpu_flag(flag); - return true; + return test_and_clear_bit(flag, &this_pcpu()->flags); } /* @@ -97,7 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); + return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) From 991a20173a1fbafd9fc0df0c7e17bb62d44a4deb Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 25 Mar 2025 19:02:45 +0100 Subject: [PATCH 1864/2157] s390: Fix linker error when -no-pie option is unavailable The kernel build may fail if the linker does not support -no-pie option, as it always included in LDFLAGS_vmlinux. Error log: s390-linux-ld: unable to disambiguate: -no-pie (did you mean --no-pie ?) Although the GNU linker defaults to -no-pie, the ability to explicitly specify this option was introduced in binutils 2.36. Hence, fix it by adding -no-pie to LDFLAGS_vmlinux only when it is available. Cc: stable@vger.kernel.org Fixes: 00cda11d3b2e ("s390: Compile kernel with -fPIC and link with -no-pie") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503220342.T3fElO9L-lkp@intel.com/ Suggested-by: Jens Remus Reviewed-by: Jens Remus Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5fae311203c2..fd3b70d9aab1 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -15,7 +15,7 @@ KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 KBUILD_CFLAGS += -fPIC -LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none +LDFLAGS_vmlinux := $(call ld-option,-no-pie) --emit-relocs --discard-none extra_tools := relocs aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ From 1018424ace7ed6dee9ddc36256162250017e4401 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Mar 2025 10:14:43 +0100 Subject: [PATCH 1865/2157] s390/smp: Add support for HOTPLUG_SMT Add support for HOTPLUG_SMT. With this the s390 specific "nosmt" kernel command line parameter handling is replaced with common code handling. This means that just specifying "nosmt" still enables smt from an architectural point of view, however only the primary (base) cpu can be set online. Enabling smt during runtime via /sys/devices/system/cpu/smt/control allows to set secondary cpus online. This way "nosmt" works like on other architectures where enabling and disabling smt during runtime is possible. If "nosmt=force" is specified smt is also still enabled from an architectural point of view, but there is no way to set secondary cpus online during runtime, also like on other architectures. In order to disable smt from architectural point of view, which was previously achieved with the s390 specific "nosmt" command line option, "smt=1" can be used. Tested-by: Mete Durlu Reviewed-by: Mete Durlu Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- arch/s390/Kconfig | 1 + arch/s390/include/asm/topology.h | 6 ++++++ arch/s390/kernel/smp.c | 8 +------- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5e351ac52cca..0d97f811d8b1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4236,10 +4236,10 @@ nosmp [SMP,EARLY] Tells an SMP kernel to act as a UP kernel, and disable the IO APIC. legacy for "maxcpus=0". - nosmt [KNL,MIPS,PPC,S390,EARLY] Disable symmetric multithreading (SMT). + nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,X86,PPC] Disable symmetric multithreading (SMT). + [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6412e39a795d..e220589dae11 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -239,6 +239,7 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE + select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI select KASAN_VMALLOC if KASAN diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cef06bffad80..44110847342a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -61,6 +61,12 @@ static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return smp_get_base_cpu(cpu) == cpu; +} +#define topology_is_primary_thread topology_is_primary_thread + #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f9908a41bfe8..63f41dfaba85 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -99,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -808,6 +801,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; From af6bfcd1698d822ab6a2d543b884b3eedc8c7d82 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Mar 2025 08:57:10 +0100 Subject: [PATCH 1866/2157] s390/mm: Dump fault info in case of low address protection fault In case of an unexpected low address protection fault in user mode dump fault info to make debugging a bit easier. At least the teid is valid, while dumping the page table is racy, since no lock is held. But it might still give some hints. Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 31a763e05287..da84ff6770de 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -376,6 +376,7 @@ void do_protection_exception(struct pt_regs *regs) if (unlikely(!teid.b61)) { if (user_mode(regs)) { /* Low-address protection in user mode: cannot happen */ + dump_fault_info(regs); die(regs, "Low-address protection"); } /* From 807c2743035446cf0484772a76e1c35ce27fd8e3 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 12 Mar 2025 11:32:18 +0100 Subject: [PATCH 1867/2157] s390/pci: Fix dev.dma_range_map missing sentinel element The fixed commit sets up dev.dma_range_map but missed that this is supposed to be an array of struct bus_dma_region with a sentinel element with the size field set to 0 at the end. This would lead to overruns in e.g. dma_range_map_min(). It could also result in wrong translations instead of DMA_MAPPING_ERROR in translate_phys_to_dma() if the paddr were to not fit in the aperture. Fix this by using the dma_direct_set_offset() helper which creates a sentinel for us. Fixes: d236843a6964 ("s390/pci: store DMA offset in bus_dma_region") Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20250312-fix_dma_map_alloc-v2-1-530108d9de21@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_bus.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 4d45edb026f2..81bdb54ad5e3 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -287,23 +287,21 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) static void pci_dma_range_setup(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); - struct bus_dma_region *map; - u64 aligned_end; + u64 aligned_end, size; + dma_addr_t dma_start; + int ret; - map = kzalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return; - - map->cpu_start = 0; - map->dma_start = PAGE_ALIGN(zdev->start_dma); + dma_start = PAGE_ALIGN(zdev->start_dma); aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); - if (aligned_end >= map->dma_start) - map->size = aligned_end - map->dma_start; + if (aligned_end >= dma_start) + size = aligned_end - dma_start; else - map->size = 0; - WARN_ON_ONCE(map->size == 0); + size = 0; + WARN_ON_ONCE(size == 0); - pdev->dev.dma_range_map = map; + ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size); + if (ret) + pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev)); } void pcibios_bus_add_device(struct pci_dev *pdev) From a3c3c66670cee11eb13aa43905904bf29cb92d32 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 26 Mar 2025 08:20:03 +0000 Subject: [PATCH 1868/2157] perf/core: Fix child_total_time_enabled accounting bug at task exit The perf events code fails to account for total_time_enabled of inactive events. Here is a failure case for accounting total_time_enabled for CPU PMU events: sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 2s ... armv8_pmuv3_0/event=0x08/: 1138698008 2289429840 2174835740 armv8_pmuv3_1/event=0x08/: 1826791390 1950025700 847648440 ` ` ` ` ` > total_time_running with child ` > total_time_enabled with child > count with child Performance counter stats for 'stress-ng --pthread=2 -t 2s': 1,138,698,008 armv8_pmuv3_0/event=0x08/ (94.99%) 1,826,791,390 armv8_pmuv3_1/event=0x08/ (43.47%) The two events above are opened on two different CPU PMUs, for example, each event is opened for a cluster in an Arm big.LITTLE system, they will never run on the same CPU. In theory, the total enabled time should be same for both events, as two events are opened and closed together. As the result show, the two events' total enabled time including child event is different (2289429840 vs 1950025700). This is because child events are not accounted properly if a event is INACTIVE state when the task exits: perf_event_exit_event() `> perf_remove_from_context() `> __perf_remove_from_context() `> perf_child_detach() -> Accumulate child_total_time_enabled `> list_del_event() -> Update child event's time The problem is the time accumulation happens prior to child event's time updating. Thus, it misses to account the last period's time when the event exits. The perf core layer follows the rule that timekeeping is tied to state change. To address the issue, make __perf_remove_from_context() handle the task exit case by passing 'DETACH_EXIT' to it and invoke perf_event_state() for state alongside with accounting the time. Then, perf_child_detach() populates the time into the parent's time metrics. After this patch, the bug is fixed: sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 10s ... armv8_pmuv3_0/event=0x08/: 15396770398 32157963940 21898169000 armv8_pmuv3_1/event=0x08/: 22428964974 32157963940 10259794940 Performance counter stats for 'stress-ng --pthread=2 -t 10s': 15,396,770,398 armv8_pmuv3_0/event=0x08/ (68.10%) 22,428,964,974 armv8_pmuv3_1/event=0x08/ (31.90%) [ mingo: Clarified the changelog. ] Fixes: ef54c1a476aef ("perf: Rework perf_event_exit_event()") Suggested-by: Peter Zijlstra Signed-off-by: Yeoreum Yun Signed-off-by: Ingo Molnar Tested-by: Leo Yan Link: https://lore.kernel.org/r/20250326082003.1630986-1-yeoreum.yun@arm.com --- kernel/events/core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0bb21659e252..128db74e9eab 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2451,6 +2451,7 @@ ctx_time_update_event(struct perf_event_context *ctx, struct perf_event *event) #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL #define DETACH_DEAD 0x04UL +#define DETACH_EXIT 0x08UL /* * Cross CPU call to remove a performance event @@ -2465,6 +2466,7 @@ __perf_remove_from_context(struct perf_event *event, void *info) { struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx; + enum perf_event_state state = PERF_EVENT_STATE_OFF; unsigned long flags = (unsigned long)info; ctx_time_update(cpuctx, ctx); @@ -2473,16 +2475,19 @@ __perf_remove_from_context(struct perf_event *event, * Ensure event_sched_out() switches to OFF, at the very least * this avoids raising perf_pending_task() at this time. */ - if (flags & DETACH_DEAD) + if (flags & DETACH_EXIT) + state = PERF_EVENT_STATE_EXIT; + if (flags & DETACH_DEAD) { event->pending_disable = 1; + state = PERF_EVENT_STATE_DEAD; + } event_sched_out(event, ctx); + perf_event_set_state(event, min(event->state, state)); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); - if (flags & DETACH_DEAD) - event->state = PERF_EVENT_STATE_DEAD; if (!pmu_ctx->nr_events) { pmu_ctx->rotate_necessary = 0; @@ -13731,12 +13736,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) mutex_lock(&parent_event->child_mutex); } - perf_remove_from_context(event, detach_flags); - - raw_spin_lock_irq(&ctx->lock); - if (event->state > PERF_EVENT_STATE_EXIT) - perf_event_set_state(event, PERF_EVENT_STATE_EXIT); - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, detach_flags | DETACH_EXIT); /* * Child events can be freed. From 7d783d9074cb1d54179ca03df514fe4b0bbae5ab Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 31 Mar 2025 10:06:23 +0300 Subject: [PATCH 1869/2157] ASoC: SOF: hda/ptl: Move mic privacy change notification sending to a work IPC message cannot be sent from the irq thread directly as the message will not receive the reply (interrupts are disabled) and it will time out - the reply is going to be received right after the we leave the irq thread. This is a different case compared to the delayed IPC messages due to DSP busy state. Add support for sending the mic privacy change notification to the firmware from a work instead of the process callback. The work needs to be canceled if there is a chance that it might be running on module remove or before system/runtime suspend. Fixes: 4a43c3241ec3 ("ASoC: SOF: Intel: ptl: Add support for mic privacy") Signed-off-by: Peter Ujfalusi Reviewed-by: Guennadi Liakhovetski Reviewed-by: Liam Girdwood Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250331070623.5985-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dsp.c | 8 ++++++++ sound/soc/sof/intel/hda.c | 4 ++++ sound/soc/sof/intel/hda.h | 8 ++++++++ sound/soc/sof/intel/ptl.c | 33 +++++++++++++++++++++++++++++---- 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index ccf8eefdca70..f64e8a6a9a33 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -991,6 +991,10 @@ int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev) if (!sdev->dspless_mode_selected) { /* cancel any attempt for DSP D0I3 */ cancel_delayed_work_sync(&hda->d0i3_work); + + /* Cancel the microphone privacy work if mic privacy is active */ + if (hda->mic_privacy.active) + cancel_work_sync(&hda->mic_privacy.work); } /* stop hda controller and power dsp off */ @@ -1017,6 +1021,10 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) if (!sdev->dspless_mode_selected) { /* cancel any attempt for DSP D0I3 */ cancel_delayed_work_sync(&hda->d0i3_work); + + /* Cancel the microphone privacy work if mic privacy is active */ + if (hda->mic_privacy.active) + cancel_work_sync(&hda->mic_privacy.work); } if (target_state == SOF_DSP_PM_D0) { diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 6b1ada566476..b34e5fdf10f1 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -968,6 +968,10 @@ void hda_dsp_remove(struct snd_sof_dev *sdev) if (sdev->dspless_mode_selected) goto skip_disable_dsp; + /* Cancel the microphone privacy work if mic privacy is active */ + if (hda->mic_privacy.active) + cancel_work_sync(&hda->mic_privacy.work); + /* no need to check for error as the DSP will be disabled anyway */ if (chip && chip->power_down_dsp) chip->power_down_dsp(sdev); diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 76154627fc17..108cad04879e 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -487,6 +487,11 @@ enum sof_hda_D0_substate { SOF_HDA_DSP_PM_D0I3, /* low power D0 substate */ }; +struct sof_ace3_mic_privacy { + bool active; + struct work_struct work; +}; + /* represents DSP HDA controller frontend - i.e. host facing control */ struct sof_intel_hda_dev { bool imrboot_supported; @@ -542,6 +547,9 @@ struct sof_intel_hda_dev { /* Intel NHLT information */ struct nhlt_acpi_table *nhlt; + /* work queue for mic privacy state change notification sending */ + struct sof_ace3_mic_privacy mic_privacy; + /* * Pointing to the IPC message if immediate sending was not possible * because the downlink communication channel was BUSY at the time. diff --git a/sound/soc/sof/intel/ptl.c b/sound/soc/sof/intel/ptl.c index 8fa4bdceedd9..aa0b772178bc 100644 --- a/sound/soc/sof/intel/ptl.c +++ b/sound/soc/sof/intel/ptl.c @@ -27,22 +27,44 @@ static bool sof_ptl_check_mic_privacy_irq(struct snd_sof_dev *sdev, bool alt, return hdac_bus_eml_is_mic_privacy_changed(sof_to_bus(sdev), alt, elid); } +static void sof_ptl_mic_privacy_work(struct work_struct *work) +{ + struct sof_intel_hda_dev *hdev = container_of(work, + struct sof_intel_hda_dev, + mic_privacy.work); + struct hdac_bus *bus = &hdev->hbus.core; + struct snd_sof_dev *sdev = dev_get_drvdata(bus->dev); + bool state; + + /* + * The microphone privacy state is only available via Soundwire shim + * in PTL + * The work is only scheduled on change. + */ + state = hdac_bus_eml_get_mic_privacy_state(bus, 1, + AZX_REG_ML_LEPTR_ID_SDW); + sof_ipc4_mic_privacy_state_change(sdev, state); +} + static void sof_ptl_process_mic_privacy(struct snd_sof_dev *sdev, bool alt, int elid) { - bool state; + struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata; if (!alt || elid != AZX_REG_ML_LEPTR_ID_SDW) return; - state = hdac_bus_eml_get_mic_privacy_state(sof_to_bus(sdev), alt, elid); - - sof_ipc4_mic_privacy_state_change(sdev, state); + /* + * Schedule the work to read the microphone privacy state and send IPC + * message about the new state to the firmware + */ + schedule_work(&hdev->mic_privacy.work); } static void sof_ptl_set_mic_privacy(struct snd_sof_dev *sdev, struct sof_ipc4_intel_mic_privacy_cap *caps) { + struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata; u32 micpvcp; if (!caps || !caps->capabilities_length) @@ -58,6 +80,9 @@ static void sof_ptl_set_mic_privacy(struct snd_sof_dev *sdev, hdac_bus_eml_set_mic_privacy_mask(sof_to_bus(sdev), true, AZX_REG_ML_LEPTR_ID_SDW, PTL_MICPVCP_GET_SDW_MASK(micpvcp)); + + INIT_WORK(&hdev->mic_privacy.work, sof_ptl_mic_privacy_work); + hdev->mic_privacy.active = true; } int sof_ptl_set_ops(struct snd_sof_dev *sdev, struct snd_sof_dsp_ops *dsp_ops) From 9e9b893404d43894d69a18dd2fc8fcf1c36abb7e Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Fri, 28 Mar 2025 15:30:39 +0100 Subject: [PATCH 1870/2157] ACPI: processor: idle: Return an error if both P_LVL{2,3} idle states are invalid Prior to commit 496121c02127 ("ACPI: processor: idle: Allow probing on platforms with one ACPI C-state"), the acpi_idle driver wouldn't load on systems without a valid C-State at least as deep as C2. The behavior was desirable for guests on hypervisors such as VMWare ESXi, which by default don't have the _CST ACPI method, and set the C2 and C3 latencies to 101 and 1001 microseconds respectively via the FADT, to signify they're unsupported. Since the above change though, these virtualized deployments end up loading acpi_idle, and thus entering the default C1 C-State set by acpi_processor_get_power_info_default(); this is undesirable for a system that's communicating to the OS it doesn't want C-States (missing _CST, and invalid C2/C3 in FADT). Make acpi_processor_get_power_info_fadt() return -ENODEV in that case, so that acpi_processor_get_cstate_info() exits early and doesn't set pr->flags.power = 1. Fixes: 496121c02127 ("ACPI: processor: idle: Allow probing on platforms with one ACPI C-state") Signed-off-by: Giovanni Gherdovich Reviewed-by: Zhang Rui Link: https://patch.msgid.link/20250328143040.9348-1-ggherdovich@suse.cz [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 586cc7d1d8aa..b181f7fc2090 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -268,6 +268,10 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) ACPI_CX_DESC_LEN, "ACPI P_LVL3 IOPORT 0x%x", pr->power.states[ACPI_STATE_C3].address); + if (!pr->power.states[ACPI_STATE_C2].address && + !pr->power.states[ACPI_STATE_C3].address) + return -ENODEV; + return 0; } From 4f1eaabb4b66a1f7473f584e14e15b2ac19dfaf3 Mon Sep 17 00:00:00 2001 From: Jim Liu Date: Thu, 27 Mar 2025 14:29:42 +0800 Subject: [PATCH 1871/2157] net: phy: broadcom: Correct BCM5221 PHY model detection Correct detect condition is applied to the entire 5221 family of PHYs. Fixes: 3abbd0699b67 ("net: phy: broadcom: add support for BCM5221 phy") Signed-off-by: Jim Liu Reviewed-by: Michal Swiatkowski Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 13e43fee1906..9b1de54fd483 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -859,7 +859,7 @@ static int brcm_fet_config_init(struct phy_device *phydev) return reg; /* Unmask events we are interested in and mask interrupts globally. */ - if (phydev->phy_id == PHY_ID_BCM5221) + if (phydev->drv->phy_id == PHY_ID_BCM5221) reg = MII_BRCM_FET_IR_ENABLE | MII_BRCM_FET_IR_MASK; else @@ -888,7 +888,7 @@ static int brcm_fet_config_init(struct phy_device *phydev) return err; } - if (phydev->phy_id != PHY_ID_BCM5221) { + if (phydev->drv->phy_id != PHY_ID_BCM5221) { /* Set the LED mode */ reg = __phy_read(phydev, MII_BRCM_FET_SHDW_AUXMODE4); if (reg < 0) { @@ -1009,7 +1009,7 @@ static int brcm_fet_suspend(struct phy_device *phydev) return err; } - if (phydev->phy_id == PHY_ID_BCM5221) + if (phydev->drv->phy_id == PHY_ID_BCM5221) /* Force Low Power Mode with clock enabled */ reg = BCM5221_SHDW_AM4_EN_CLK_LPM | BCM5221_SHDW_AM4_FORCE_LPM; else From 09098e62e4be8f0755e58d6078aaf27cbd9a3a8d Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Tue, 25 Mar 2025 18:29:31 +0100 Subject: [PATCH 1872/2157] fuse: {io-uring} Fix a possible req cancellation race task-A (application) might be in request_wait_answer and try to remove the request when it has FR_PENDING set. task-B (a fuse-server io-uring task) might handle this request with FUSE_IO_URING_CMD_COMMIT_AND_FETCH, when fetching the next request and accessed the req from the pending list in fuse_uring_ent_assign_req(). That code path was not protected by fiq->lock and so might race with task-A. For scaling reasons we better don't use fiq->lock, but add a handler to remove canceled requests from the queue. This also removes usage of fiq->lock from fuse_uring_add_req_to_ring_ent() altogether, as it was there just to protect against this race and incomplete. Also added is a comment why FR_PENDING is not cleared. Fixes: c090c8abae4b ("fuse: Add io-uring sqe commit and fetch support") Cc: # v6.14 Reported-by: Joanne Koong Closes: https://lore.kernel.org/all/CAJnrk1ZgHNb78dz-yfNTpxmW7wtT88A=m-zF0ZoLXKLUHRjNTw@mail.gmail.com/ Signed-off-by: Bernd Schubert Reviewed-by: Joanne Koong Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 34 +++++++++++++++++++++++++--------- fs/fuse/dev_uring.c | 15 +++++++++++---- fs/fuse/dev_uring_i.h | 6 ++++++ fs/fuse/fuse_dev_i.h | 1 + fs/fuse/fuse_i.h | 3 +++ 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2c3a4d09e500..2645cd8accfd 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -407,6 +407,24 @@ static int queue_interrupt(struct fuse_req *req) return 0; } +bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock) +{ + spin_lock(lock); + if (test_bit(FR_PENDING, &req->flags)) { + /* + * FR_PENDING does not get cleared as the request will end + * up in destruction anyway. + */ + list_del(&req->list); + spin_unlock(lock); + __fuse_put_request(req); + req->out.h.error = -EINTR; + return true; + } + spin_unlock(lock); + return false; +} + static void request_wait_answer(struct fuse_req *req) { struct fuse_conn *fc = req->fm->fc; @@ -428,22 +446,20 @@ static void request_wait_answer(struct fuse_req *req) } if (!test_bit(FR_FORCE, &req->flags)) { + bool removed; + /* Only fatal signals may interrupt this */ err = wait_event_killable(req->waitq, test_bit(FR_FINISHED, &req->flags)); if (!err) return; - spin_lock(&fiq->lock); - /* Request is not yet in userspace, bail out */ - if (test_bit(FR_PENDING, &req->flags)) { - list_del(&req->list); - spin_unlock(&fiq->lock); - __fuse_put_request(req); - req->out.h.error = -EINTR; + if (test_bit(FR_URING, &req->flags)) + removed = fuse_uring_remove_pending_req(req); + else + removed = fuse_remove_pending_req(req, &fiq->lock); + if (removed) return; - } - spin_unlock(&fiq->lock); } /* diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index ebd2931b4f2a..add7273c8dc4 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -726,8 +726,6 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, struct fuse_req *req) { struct fuse_ring_queue *queue = ent->queue; - struct fuse_conn *fc = req->fm->fc; - struct fuse_iqueue *fiq = &fc->iq; lockdep_assert_held(&queue->lock); @@ -737,9 +735,7 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, ent->state); } - spin_lock(&fiq->lock); clear_bit(FR_PENDING, &req->flags); - spin_unlock(&fiq->lock); ent->fuse_req = req; ent->state = FRRS_FUSE_REQ; list_move(&ent->list, &queue->ent_w_req_queue); @@ -1238,6 +1234,8 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) if (unlikely(queue->stopped)) goto err_unlock; + set_bit(FR_URING, &req->flags); + req->ring_queue = queue; ent = list_first_entry_or_null(&queue->ent_avail_queue, struct fuse_ring_ent, list); if (ent) @@ -1276,6 +1274,8 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req) return false; } + set_bit(FR_URING, &req->flags); + req->ring_queue = queue; list_add_tail(&req->list, &queue->fuse_req_bg_queue); ent = list_first_entry_or_null(&queue->ent_avail_queue, @@ -1306,6 +1306,13 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req) return true; } +bool fuse_uring_remove_pending_req(struct fuse_req *req) +{ + struct fuse_ring_queue *queue = req->ring_queue; + + return fuse_remove_pending_req(req, &queue->lock); +} + static const struct fuse_iqueue_ops fuse_io_uring_ops = { /* should be send over io-uring as enhancement */ .send_forget = fuse_dev_queue_forget, diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index 2102b3d0c1ae..e5b39a92b7ca 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -142,6 +142,7 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring); int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_uring_queue_bq_req(struct fuse_req *req); +bool fuse_uring_remove_pending_req(struct fuse_req *req); static inline void fuse_uring_abort(struct fuse_conn *fc) { @@ -200,6 +201,11 @@ static inline bool fuse_uring_ready(struct fuse_conn *fc) return false; } +static inline bool fuse_uring_remove_pending_req(struct fuse_req *req) +{ + return false; +} + #endif /* CONFIG_FUSE_IO_URING */ #endif /* _FS_FUSE_DEV_URING_I_H */ diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 3b2bfe1248d3..2481da3388c5 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -61,6 +61,7 @@ int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget); void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); +bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock); #endif diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fee96fe7887b..2086dac7243b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -378,6 +378,7 @@ struct fuse_io_priv { * FR_FINISHED: request is finished * FR_PRIVATE: request is on private list * FR_ASYNC: request is asynchronous + * FR_URING: request is handled through fuse-io-uring */ enum fuse_req_flag { FR_ISREPLY, @@ -392,6 +393,7 @@ enum fuse_req_flag { FR_FINISHED, FR_PRIVATE, FR_ASYNC, + FR_URING, }; /** @@ -441,6 +443,7 @@ struct fuse_req { #ifdef CONFIG_FUSE_IO_URING void *ring_entry; + void *ring_queue; #endif }; From 841c7b812c038661e4f659d1b9c9a366c6d24b71 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 7 Feb 2025 13:35:02 +0000 Subject: [PATCH 1873/2157] fuse: removed unused function fuse_uring_create() from header Function fuse_uring_create() is used only from dev_uring.c and does not need to be exposed in the header file. Furthermore, it has the wrong signature. While there, also remove the 'struct fuse_ring' forward declaration. Signed-off-by: Luis Henriques Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- fs/fuse/dev_uring_i.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index e5b39a92b7ca..d87d0a55cb5f 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -173,12 +173,6 @@ static inline bool fuse_uring_ready(struct fuse_conn *fc) #else /* CONFIG_FUSE_IO_URING */ -struct fuse_ring; - -static inline void fuse_uring_create(struct fuse_conn *fc) -{ -} - static inline void fuse_uring_destruct(struct fuse_conn *fc) { } From 8344213571b2ac8caf013cfd3b37bc3467c3a893 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 14 Feb 2025 09:17:53 +0800 Subject: [PATCH 1874/2157] fuse: Return EPERM rather than ENOSYS from link() link() is documented to return EPERM when a filesystem doesn't support the operation, return that instead. Link: https://github.com/libfuse/libfuse/issues/925 Signed-off-by: Matt Johnston Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3805f9b06c9d..1717ae0d0864 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1137,6 +1137,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, else if (err == -EINTR) fuse_invalidate_attr(inode); + if (err == -ENOSYS) + err = -EPERM; return err; } From eef36cf6a7016cb5353d4b0a9dbdfbd52c4bd973 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 14 Feb 2025 11:00:53 +0100 Subject: [PATCH 1875/2157] fuse: optmize missing FUSE_LINK support If filesystem doesn't support FUSE_LINK (i.e. returns -ENOSYS), then remember this and next time return immediately, without incurring the overhead of a round trip to the server. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 9 ++++++++- fs/fuse/fuse_i.h | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1717ae0d0864..83c56ce6ad20 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1123,6 +1123,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); + if (fm->fc->no_link) + goto out; + memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); args.opcode = FUSE_LINK; @@ -1138,7 +1141,11 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, fuse_invalidate_attr(inode); if (err == -ENOSYS) - err = -EPERM; + fm->fc->no_link = 1; +out: + if (fm->fc->no_link) + return -EPERM; + return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2086dac7243b..6f00b177a434 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -870,6 +870,9 @@ struct fuse_conn { /* Use pages instead of pointer for kernel I/O */ unsigned int use_pages_for_kvec_io:1; + /* Is link not implemented by fs? */ + unsigned int no_link:1; + /* Use io_uring for communication */ unsigned int io_uring; From 0f6439f61a6e2ddc92b98362c6d1afc210f56a90 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 22 Jan 2025 13:55:27 -0800 Subject: [PATCH 1876/2157] fuse: add kernel-enforced timeout option for requests There are situations where fuse servers can become unresponsive or stuck, for example if the server is deadlocked. Currently, there's no good way to detect if a server is stuck and needs to be killed manually. This commit adds an option for enforcing a timeout (in seconds) for requests where if the timeout elapses without the server responding to the request, the connection will be automatically aborted. Please note that these timeouts are not 100% precise. For example, the request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the requested timeout due to internal implementation, in order to mitigate overhead. [SzM: Bump the API version number] Signed-off-by: Joanne Koong Reviewed-by: Jeff Layton Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 101 ++++++++++++++++++++++++++++++++++++++ fs/fuse/dev_uring.c | 27 ++++++++++ fs/fuse/dev_uring_i.h | 6 +++ fs/fuse/fuse_dev_i.h | 3 ++ fs/fuse/fuse_i.h | 17 +++++++ fs/fuse/inode.c | 17 ++++++- include/uapi/linux/fuse.h | 12 +++-- 7 files changed, 179 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2645cd8accfd..f48afffcb2a5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -32,6 +32,103 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; +/* Frequency (in seconds) of request timeout checks, if opted into */ +#define FUSE_TIMEOUT_TIMER_FREQ 15 + +const unsigned long fuse_timeout_timer_freq = + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); + +bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list) +{ + struct fuse_req *req; + + req = list_first_entry_or_null(list, struct fuse_req, list); + if (!req) + return false; + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); +} + +bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing) +{ + int i; + + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) + if (fuse_request_expired(fc, &processing[i])) + return true; + + return false; +} + +/* + * Check if any requests aren't being completed by the time the request timeout + * elapses. To do so, we: + * - check the fiq pending list + * - check the bg queue + * - check the fpq io and processing lists + * + * To make this fast, we only check against the head request on each list since + * these are generally queued in order of creation time (eg newer requests get + * queued to the tail). We might miss a few edge cases (eg requests transitioning + * between lists, re-sent requests at the head of the pending list having a + * later creation time than other requests on that list, etc.) but that is fine + * since if the request never gets fulfilled, it will eventually be caught. + */ +void fuse_check_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, + timeout.work); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_dev *fud; + struct fuse_pqueue *fpq; + bool expired = false; + + if (!atomic_read(&fc->num_waiting)) + goto out; + + spin_lock(&fiq->lock); + expired = fuse_request_expired(fc, &fiq->pending); + spin_unlock(&fiq->lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->bg_lock); + expired = fuse_request_expired(fc, &fc->bg_queue); + spin_unlock(&fc->bg_lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->lock); + if (!fc->connected) { + spin_unlock(&fc->lock); + return; + } + list_for_each_entry(fud, &fc->devices, entry) { + fpq = &fud->pq; + spin_lock(&fpq->lock); + if (fuse_request_expired(fc, &fpq->io) || + fuse_fpq_processing_expired(fc, fpq->processing)) { + spin_unlock(&fpq->lock); + spin_unlock(&fc->lock); + goto abort_conn; + } + + spin_unlock(&fpq->lock); + } + spin_unlock(&fc->lock); + + if (fuse_uring_request_expired(fc)) + goto abort_conn; + +out: + queue_delayed_work(system_wq, &fc->timeout.work, + fuse_timeout_timer_freq); + return; + +abort_conn: + fuse_abort_conn(fc); +} + static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); @@ -40,6 +137,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) refcount_set(&req->count, 1); __set_bit(FR_PENDING, &req->flags); req->fm = fm; + req->create_time = jiffies; } static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) @@ -2291,6 +2389,9 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end); unsigned int i; + if (fc->timeout.req_timeout) + cancel_delayed_work(&fc->timeout.work); + /* Background queuing checks fc->connected under bg_lock */ spin_lock(&fc->bg_lock); fc->connected = 0; diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index add7273c8dc4..03062ee69b70 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -140,6 +140,33 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring) } } +bool fuse_uring_request_expired(struct fuse_conn *fc) +{ + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + int qid; + + if (!ring) + return false; + + for (qid = 0; qid < ring->nr_queues; qid++) { + queue = READ_ONCE(ring->queues[qid]); + if (!queue) + continue; + + spin_lock(&queue->lock); + if (fuse_request_expired(fc, &queue->fuse_req_queue) || + fuse_request_expired(fc, &queue->fuse_req_bg_queue) || + fuse_fpq_processing_expired(fc, queue->fpq.processing)) { + spin_unlock(&queue->lock); + return true; + } + spin_unlock(&queue->lock); + } + + return false; +} + void fuse_uring_destruct(struct fuse_conn *fc) { struct fuse_ring *ring = fc->ring; diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index d87d0a55cb5f..51a563922ce1 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -143,6 +143,7 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_uring_queue_bq_req(struct fuse_req *req); bool fuse_uring_remove_pending_req(struct fuse_req *req); +bool fuse_uring_request_expired(struct fuse_conn *fc); static inline void fuse_uring_abort(struct fuse_conn *fc) { @@ -200,6 +201,11 @@ static inline bool fuse_uring_remove_pending_req(struct fuse_req *req) return false; } +static inline bool fuse_uring_request_expired(struct fuse_conn *fc) +{ + return false; +} + #endif /* CONFIG_FUSE_IO_URING */ #endif /* _FS_FUSE_DEV_URING_I_H */ diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 2481da3388c5..b3c2e32254ba 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -63,5 +63,8 @@ void fuse_dev_queue_forget(struct fuse_iqueue *fiq, void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock); +bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list); +bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing); + #endif diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6f00b177a434..519c93c3256e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -44,6 +44,9 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 +/** Frequency (in jiffies) of request timeout checks, if opted into */ +extern const unsigned long fuse_timeout_timer_freq; + /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; @@ -445,6 +448,8 @@ struct fuse_req { void *ring_entry; void *ring_queue; #endif + /** When (in jiffies) the request was created */ + unsigned long create_time; }; struct fuse_iqueue; @@ -941,6 +946,15 @@ struct fuse_conn { /** uring connection information*/ struct fuse_ring *ring; #endif + + /** Only used if the connection opts into request timeouts */ + struct { + /* Worker for checking if any requests have timed out */ + struct delayed_work work; + + /* Request timeout (in jiffies). 0 = no timeout */ + unsigned int req_timeout; + } timeout; }; /* @@ -1222,6 +1236,9 @@ void fuse_request_end(struct fuse_req *req); void fuse_abort_conn(struct fuse_conn *fc); void fuse_wait_aborted(struct fuse_conn *fc); +/* Check if any requests timed out */ +void fuse_check_timeout(struct work_struct *work); + /** * Invalidate inode attributes */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e9db2cb8c150..79ebeb60015c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -979,6 +979,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = fuse_max_pages_limit; + fc->timeout.req_timeout = 0; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_files_init(fc); @@ -1007,6 +1008,8 @@ void fuse_conn_put(struct fuse_conn *fc) if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); + if (fc->timeout.req_timeout) + cancel_delayed_work_sync(&fc->timeout.work); if (fiq->ops->release) fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); @@ -1257,6 +1260,14 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) spin_unlock(&fc->bg_lock); } +static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) +{ + fc->timeout.req_timeout = secs_to_jiffies(timeout); + INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); + queue_delayed_work(system_wq, &fc->timeout.work, + fuse_timeout_timer_freq); +} + struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; @@ -1392,6 +1403,9 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) fc->io_uring = 1; + + if ((flags & FUSE_REQUEST_TIMEOUT) && arg->request_timeout) + set_request_timeout(fc, arg->request_timeout); } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1439,7 +1453,8 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | - FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP; + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP | + FUSE_REQUEST_TIMEOUT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5e0eb41d967e..5ec43ecbceb7 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -229,6 +229,9 @@ * - FUSE_URING_IN_OUT_HEADER_SZ * - FUSE_URING_OP_IN_OUT_SZ * - enum fuse_uring_cmd + * + * 7.43 + * - add FUSE_REQUEST_TIMEOUT */ #ifndef _LINUX_FUSE_H @@ -264,7 +267,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 42 +#define FUSE_KERNEL_MINOR_VERSION 43 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -435,6 +438,8 @@ struct fuse_file_lock { * of the request ID indicates resend requests * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts * FUSE_OVER_IO_URING: Indicate that client supports io-uring + * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests. + * init_out.request_timeout contains the timeout (in secs) */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -477,11 +482,11 @@ struct fuse_file_lock { #define FUSE_PASSTHROUGH (1ULL << 37) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) - /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_ALLOW_IDMAP (1ULL << 40) #define FUSE_OVER_IO_URING (1ULL << 41) +#define FUSE_REQUEST_TIMEOUT (1ULL << 42) /** * CUSE INIT request/reply flags @@ -909,7 +914,8 @@ struct fuse_init_out { uint16_t map_alignment; uint32_t flags2; uint32_t max_stack_depth; - uint32_t unused[6]; + uint16_t request_timeout; + uint16_t unused[11]; }; #define CUSE_INIT_INFO_MAX 4096 From 9b17cb59a7db983a967c3658fe9a2f250f588bbd Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 22 Jan 2025 13:55:28 -0800 Subject: [PATCH 1877/2157] fuse: add default_request_timeout and max_request_timeout sysctls Introduce two new sysctls, "default_request_timeout" and "max_request_timeout". These control how long (in seconds) a server can take to reply to a request. If the server does not reply by the timeout, then the connection will be aborted. The upper bound on these sysctl values is 65535. "default_request_timeout" sets the default timeout if no timeout is specified by the fuse server on mount. 0 (default) indicates no default timeout should be enforced. If the server did specify a timeout, then default_request_timeout will be ignored. "max_request_timeout" sets the max amount of time the server may take to reply to a request. 0 (default) indicates no maximum timeout. If max_request_timeout is set and the fuse server attempts to set a timeout greater than max_request_timeout, the system will use max_request_timeout as the timeout. Similarly, if default_request_timeout is greater than max_request_timeout, the system will use max_request_timeout as the timeout. If the server does not request a timeout and default_request_timeout is set to 0 but max_request_timeout is set, then the timeout will be max_request_timeout. Please note that these timeouts are not 100% precise. The request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max timeout due to how it's internally implemented. $ sysctl -a | grep fuse.default_request_timeout fs.fuse.default_request_timeout = 0 $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout 65535 $ sysctl -a | grep fuse.default_request_timeout fs.fuse.default_request_timeout = 65535 $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout 0 $ sysctl -a | grep fuse.default_request_timeout fs.fuse.default_request_timeout = 0 [Luis Henriques: Limit the timeout to the range [FUSE_TIMEOUT_TIMER_FREQ, fuse_max_req_timeout]] Signed-off-by: Joanne Koong Reviewed-by: Bernd Schubert Reviewed-by: Josef Bacik Reviewed-by: Sergey Senozhatsky Reviewed-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Miklos Szeredi --- Documentation/admin-guide/sysctl/fs.rst | 25 +++++++++++++++++++++ fs/fuse/dev.c | 3 --- fs/fuse/fuse_i.h | 13 +++++++++++ fs/fuse/inode.c | 30 +++++++++++++++++++++++-- fs/fuse/sysctl.c | 24 ++++++++++++++++++++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index 08e89e031714..6c54718c9d04 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -347,3 +347,28 @@ filesystems: ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for setting/getting the maximum number of pages that can be used for servicing requests in FUSE. + +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for +setting/getting the default timeout (in seconds) for a fuse server to +reply to a kernel-issued request in the event where the server did not +specify a timeout at mount. If the server set a timeout, +then default_request_timeout will be ignored. The default +"default_request_timeout" is set to 0. 0 indicates no default timeout. +The maximum value that can be set is 65535. + +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for +setting/getting the maximum timeout (in seconds) for a fuse server to +reply to a kernel-issued request. A value greater than 0 automatically opts +the server into a timeout that will be set to at most "max_request_timeout", +even if the server did not specify a timeout and default_request_timeout is +set to 0. If max_request_timeout is greater than 0 and the server set a timeout +greater than max_request_timeout or default_request_timeout is set to a value +greater than max_request_timeout, the system will use max_request_timeout as the +timeout. 0 indicates no max request timeout. The maximum value that can be set +is 65535. + +For timeouts, if the server does not respond to the request by the time +the set timeout elapses, then the connection to the fuse server will be aborted. +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but +the timeout may kick in after 70 seconds). The upper margin of error for the +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f48afffcb2a5..4e4c2bcabdca 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -32,9 +32,6 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; -/* Frequency (in seconds) of request timeout checks, if opted into */ -#define FUSE_TIMEOUT_TIMER_FREQ 15 - const unsigned long fuse_timeout_timer_freq = secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 519c93c3256e..0bd1deabb289 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -44,11 +44,24 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 +/* Frequency (in seconds) of request timeout checks, if opted into */ +#define FUSE_TIMEOUT_TIMER_FREQ 15 + /** Frequency (in jiffies) of request timeout checks, if opted into */ extern const unsigned long fuse_timeout_timer_freq; /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; +/* + * Default timeout (in seconds) for the server to reply to a request + * before the connection is aborted, if no timeout was specified on mount. + */ +extern unsigned int fuse_default_req_timeout; +/* + * Max timeout (in seconds) for the server to reply to a request before + * the connection is aborted. + */ +extern unsigned int fuse_max_req_timeout; /** List of active connections */ extern struct list_head fuse_conn_list; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79ebeb60015c..6e2b89fbcbb5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned int fuse_max_pages_limit = 256; +/* default is no timeout */ +unsigned int fuse_default_req_timeout; +unsigned int fuse_max_req_timeout; unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, @@ -1268,6 +1271,26 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) fuse_timeout_timer_freq); } +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) +{ + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) + return; + + if (!timeout) + timeout = fuse_default_req_timeout; + + if (fuse_max_req_timeout) { + if (timeout) + timeout = min(fuse_max_req_timeout, timeout); + else + timeout = fuse_max_req_timeout; + } + + timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); + + set_request_timeout(fc, timeout); +} + struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; @@ -1286,6 +1309,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, ok = false; else { unsigned long ra_pages; + unsigned int timeout = 0; process_init_limits(fc, arg); @@ -1404,14 +1428,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) fc->io_uring = 1; - if ((flags & FUSE_REQUEST_TIMEOUT) && arg->request_timeout) - set_request_timeout(fc, arg->request_timeout); + if (flags & FUSE_REQUEST_TIMEOUT) + timeout = arg->request_timeout; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; fc->no_flock = 1; } + init_server_timeout(fc, timeout); + fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c index 63fb1e5bee30..e2d921abcb88 100644 --- a/fs/fuse/sysctl.c +++ b/fs/fuse/sysctl.c @@ -13,6 +13,12 @@ static struct ctl_table_header *fuse_table_header; /* Bound by fuse_init_out max_pages, which is a u16 */ static unsigned int sysctl_fuse_max_pages_limit = 65535; +/* + * fuse_init_out request timeouts are u16. + * This goes up to ~18 hours, which is plenty for a timeout. + */ +static unsigned int sysctl_fuse_req_timeout_limit = 65535; + static const struct ctl_table fuse_sysctl_table[] = { { .procname = "max_pages_limit", @@ -23,6 +29,24 @@ static const struct ctl_table fuse_sysctl_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &sysctl_fuse_max_pages_limit, }, + { + .procname = "default_request_timeout", + .data = &fuse_default_req_timeout, + .maxlen = sizeof(fuse_default_req_timeout), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &sysctl_fuse_req_timeout_limit, + }, + { + .procname = "max_request_timeout", + .data = &fuse_max_req_timeout, + .maxlen = sizeof(fuse_max_req_timeout), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &sysctl_fuse_req_timeout_limit, + }, }; int fuse_sysctl_register(void) From 2412085da370836945c2daa61c5cee38dd979e0d Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Mon, 16 Dec 2024 22:14:06 +0100 Subject: [PATCH 1878/2157] fuse: Allocate only namelen buf memory in fuse_notify_ fuse_notify_inval_entry and fuse_notify_delete were using fixed allocations of FUSE_NAME_MAX to hold the file name. Often that large buffers are not needed as file names might be smaller, so this uses the actual file name size to do the allocation. Signed-off-by: Bernd Schubert Reviewed-by: Jingbo Xu Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4e4c2bcabdca..45d15db38787 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1644,14 +1644,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_inval_entry_out outarg; - int err = -ENOMEM; - char *buf; + int err; + char *buf = NULL; struct qstr name; - buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); - if (!buf) - goto err; - err = -EINVAL; if (size < sizeof(outarg)) goto err; @@ -1668,6 +1664,11 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, if (size != sizeof(outarg) + outarg.namelen + 1) goto err; + err = -ENOMEM; + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + goto err; + name.name = buf; name.len = outarg.namelen; err = fuse_copy_one(cs, buf, outarg.namelen + 1); @@ -1692,14 +1693,10 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_delete_out outarg; - int err = -ENOMEM; - char *buf; + int err; + char *buf = NULL; struct qstr name; - buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); - if (!buf) - goto err; - err = -EINVAL; if (size < sizeof(outarg)) goto err; @@ -1716,6 +1713,11 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, if (size != sizeof(outarg) + outarg.namelen + 1) goto err; + err = -ENOMEM; + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + goto err; + name.name = buf; name.len = outarg.namelen; err = fuse_copy_one(cs, buf, outarg.namelen + 1); From 27992ef80770d61a57f6c3a551735b08cefdffa3 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Mon, 16 Dec 2024 22:14:07 +0100 Subject: [PATCH 1879/2157] fuse: Increase FUSE_NAME_MAX to PATH_MAX Our file system has a translation capability for S3-to-posix. The current value of 1kiB is enough to cover S3 keys, but does not allow encoding of %xx escape characters. The limit is increased to (PATH_MAX - 1), as we need 3 x 1024 and that is close to PATH_MAX (4kB) already. -1 is used as the terminating null is not included in the length calculation. Testing large file names was hard with libfuse/example file systems, so I created a new memfs that does not have a 255 file name length limitation. https://github.com/libfuse/libfuse/pull/1077 The connection is initialized with FUSE_NAME_LOW_MAX, which is set to the previous value of FUSE_NAME_MAX of 1024. With FUSE_MIN_READ_BUFFER of 8192 that is enough for two file names + fuse headers. When FUSE_INIT reply sets max_pages to a value > 1 we know that fuse daemon supports request buffers of at least 2 pages (+ header) and can therefore hold 2 x PATH_MAX file names - operations like rename or link that need two file names are no issue then. Signed-off-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 4 ++-- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 11 +++++++++-- fs/fuse/inode.c | 8 ++++++++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 45d15db38787..696ab0403120 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1657,7 +1657,7 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, goto err; err = -ENAMETOOLONG; - if (outarg.namelen > FUSE_NAME_MAX) + if (outarg.namelen > fc->name_max) goto err; err = -EINVAL; @@ -1706,7 +1706,7 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, goto err; err = -ENAMETOOLONG; - if (outarg.namelen > FUSE_NAME_MAX) + if (outarg.namelen > fc->name_max) goto err; err = -EINVAL; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 83c56ce6ad20..493c5b096b4a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -370,7 +370,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name *inode = NULL; err = -ENAMETOOLONG; - if (name->len > FUSE_NAME_MAX) + if (name->len > fm->fc->name_max) goto out; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0bd1deabb289..d56d4fd956db 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -38,8 +38,12 @@ /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN -/** It could be as large as PATH_MAX, but would that have any uses? */ -#define FUSE_NAME_MAX 1024 +/** Maximum length of a filename, not including terminating null */ + +/* maximum, small enough for FUSE_MIN_READ_BUFFER*/ +#define FUSE_NAME_LOW_MAX 1024 +/* maximum, but needs a request buffer > FUSE_MIN_READ_BUFFER */ +#define FUSE_NAME_MAX (PATH_MAX - 1) /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 @@ -924,6 +928,9 @@ struct fuse_conn { /** Version counter for evict inode */ atomic64_t evict_ctr; + /* maximum file name length */ + u32 name_max; + /** Called on final put */ void (*release)(struct fuse_conn *); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6e2b89fbcbb5..fd48e8d37f2e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -982,6 +982,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = fuse_max_pages_limit; + fc->name_max = FUSE_NAME_LOW_MAX; fc->timeout.req_timeout = 0; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) @@ -1373,6 +1374,13 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->max_pages = min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); + + /* + * PATH_MAX file names might need two pages for + * ops like rename + */ + if (fc->max_pages > 1) + fc->name_max = FUSE_NAME_MAX; } if (IS_ENABLED(CONFIG_FUSE_DAX)) { if (flags & FUSE_MAP_ALIGNMENT && From 1dfe2a220e9cd85861a853b00d8620222b960c1f Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 17 Mar 2025 17:30:28 -0700 Subject: [PATCH 1880/2157] fuse: fix uring race condition for null dereference of fc There is a race condition leading to a kernel crash from a null dereference when attemping to access fc->lock in fuse_uring_create_queue(). fc may be NULL in the case where another thread is creating the uring in fuse_uring_create() and has set fc->ring but has not yet set ring->fc when fuse_uring_create_queue() reads ring->fc. There is another race condition as well where in fuse_uring_register(), ring->nr_queues may still be 0 and not yet set to the new value when we compare qid against it. This fix sets fc->ring only after ring->fc and ring->nr_queues have been set, which guarantees now that ring->fc is a proper pointer when any queues are created and ring->nr_queues reflects the right number of queues if ring is not NULL. We must use smp_store_release() and smp_load_acquire() semantics to ensure the ordering will remain correct where fc->ring is assigned only after ring->fc and ring->nr_queues have been assigned. Signed-off-by: Joanne Koong Fixes: 24fe962c86f5 ("fuse: {io-uring} Handle SQEs - register commands") Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- fs/fuse/dev_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 03062ee69b70..e11786e92250 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -235,11 +235,11 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) init_waitqueue_head(&ring->stop_waitq); - fc->ring = ring; ring->nr_queues = nr_queues; ring->fc = fc; ring->max_payload_sz = max_payload_size; atomic_set(&ring->queue_refs, 0); + smp_store_release(&fc->ring, ring); spin_unlock(&fc->lock); return ring; @@ -1064,7 +1064,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, unsigned int issue_flags, struct fuse_conn *fc) { const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = smp_load_acquire(&fc->ring); struct fuse_ring_queue *queue; struct fuse_ring_ent *ent; int err; From 2d066800a4276340a97acc75c148892eb6f8781a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 17 Mar 2025 17:41:52 -0700 Subject: [PATCH 1881/2157] fuse: remove unneeded atomic set in uring creation When the ring is allocated, it is kzalloc-ed. ring->queue_refs will already be initialized to 0 by default. It does not need to be atomically set to 0. Signed-off-by: Joanne Koong Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- fs/fuse/dev_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index e11786e92250..accdce2977c5 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -238,7 +238,6 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) ring->nr_queues = nr_queues; ring->fc = fc; ring->max_payload_sz = max_payload_size; - atomic_set(&ring->queue_refs, 0); smp_store_release(&fc->ring, ring); spin_unlock(&fc->lock); From f28a71bc979392234cc110cd1e6787fb5b432116 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 31 Mar 2025 07:06:22 -0600 Subject: [PATCH 1882/2157] Documentation: ublk: remove dead footnote A previous commit removed the use of this footnote, delete it. Reported-by: Stephen Rothwell Fixes: 3fdf2ec7da1c ("Documentation: ublk: Drop Stefan Hajnoczi's message footnote") Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 74c57488dc9a..854f823b46c2 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -349,5 +349,3 @@ References .. [#userspace_nbdublk] https://gitlab.com/rwmjones/libnbd/-/tree/nbdublk .. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README - -.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ From 697b2876ac037545ba2761e2ffe9a5c2af6424e6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 08:54:00 +0100 Subject: [PATCH 1883/2157] io_uring: add req flag invariant build assertion We're caching some of file related request flags in a tricky way, put a build check to make sure flags don't get reshuffled. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9877577b83c25dd78224a8274f799187e7ec7639.1743407551.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index e4484a03e033..a4065e3d13d0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1643,6 +1643,8 @@ io_req_flags_t io_file_get_flags(struct file *file) { io_req_flags_t res = 0; + BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); + if (S_ISREG(file_inode(file)->i_mode)) res |= REQ_F_ISREG; if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) From 487a0710f87e7fa1f260a1b2213b77f0496cea43 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 08:55:32 +0100 Subject: [PATCH 1884/2157] io_uring: make zcrx depend on CONFIG_IO_URING Reflect in the kconfig that zcrx requires io_uring compiled. Fixes: 6f377873cb239 ("io_uring/zcrx: add interface queue and refill queue") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8047135a344e79dbd04ee36a7a69cc260aabc2ca.1743356260.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/Kconfig b/io_uring/Kconfig index 9e2a4beba1ef..4b949c42c0bf 100644 --- a/io_uring/Kconfig +++ b/io_uring/Kconfig @@ -5,6 +5,7 @@ config IO_URING_ZCRX def_bool y + depends on IO_URING depends on PAGE_POOL depends on INET depends on NET_RX_BUSY_POLL From 296e16961817e8e5f574661febc608ac0c0c0108 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 08:55:47 +0100 Subject: [PATCH 1885/2157] io_uring: hide caches sqes from drivers There is now an io_uring private part of cmd async_data, move saved sqe into it. Drivers are accessing it via struct io_uring_cmd::cmd. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ecbe078dd57acefdbc4366d083327086c0879378.1743357121.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 1 - io_uring/uring_cmd.c | 4 ++-- io_uring/uring_cmd.h | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index e6723fa95160..0634a3de1782 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -21,7 +21,6 @@ struct io_uring_cmd { struct io_uring_cmd_data { void *op_data; - struct io_uring_sqe sqes[2]; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index f2cfc371f3d0..89cee2af0ec1 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -205,8 +205,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, * that it doesn't read in per-op data, play it safe and ensure that * any SQE data is stable beyond prep. This can later get relaxed. */ - memcpy(ac->data.sqes, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = ac->data.sqes; + memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = ac->sqes; return 0; } diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 14e525255854..b04686b6b5d2 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -6,6 +6,7 @@ struct io_async_cmd { struct io_uring_cmd_data data; struct iou_vec vec; + struct io_uring_sqe sqes[2]; }; int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); From ed344511c584479ce2130d7e01a9a1e638850b0c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 08:55:11 +0100 Subject: [PATCH 1886/2157] io_uring: cleanup {g,s]etsockopt sqe reading Add a local variable for the sqe pointer to avoid repetition. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8dbac0f9acda2d3842534eeb7ce10d9276b021ae.1743357108.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 89cee2af0ec1..a9ea7d29cdd9 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -307,17 +307,18 @@ static inline int io_uring_cmd_getsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { + const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optlen, optname, level, err; void __user *optval; - level = READ_ONCE(cmd->sqe->level); + level = READ_ONCE(sqe->level); if (level != SOL_SOCKET) return -EOPNOTSUPP; - optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); - optname = READ_ONCE(cmd->sqe->optname); - optlen = READ_ONCE(cmd->sqe->optlen); + optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); + optname = READ_ONCE(sqe->optname); + optlen = READ_ONCE(sqe->optlen); err = do_sock_getsockopt(sock, compat, level, optname, USER_SOCKPTR(optval), @@ -333,15 +334,16 @@ static inline int io_uring_cmd_setsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { + const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optname, optlen, level; void __user *optval; sockptr_t optval_s; - optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); - optname = READ_ONCE(cmd->sqe->optname); - optlen = READ_ONCE(cmd->sqe->optlen); - level = READ_ONCE(cmd->sqe->level); + optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); + optname = READ_ONCE(sqe->optname); + optlen = READ_ONCE(sqe->optlen); + level = READ_ONCE(sqe->level); optval_s = USER_SOCKPTR(optval); return do_sock_setsockopt(sock, compat, level, optname, optval_s, From 3d4a4411aa8bbc3653ff22a1ff0432eb93d22ae0 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 14 Mar 2025 17:47:56 +0000 Subject: [PATCH 1887/2157] ASoC: q6apm-dai: schedule all available frames to avoid dsp under-runs With the existing code, we are only setting up one period at a time, in a ping-pong buffer style. This triggers lot of underruns in the dsp leading to jitter noise during audio playback. Fix this by scheduling all available periods, this will ensure that the dsp has enough buffer feed and ultimatley fixing the underruns and audio distortion. Fixes: 9b4fe0f1cd79 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable@vger.kernel.org Reported-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Tested-by: Krzysztof Kozlowski Tested-by: Johan Hovold Link: https://patch.msgid.link/20250314174800.10142-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index c9404b5934c7..9d8e8e37c6de 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -70,6 +70,7 @@ struct q6apm_dai_rtd { unsigned int bytes_received; unsigned int copied_total; uint16_t bits_per_sample; + snd_pcm_uframes_t queue_ptr; bool next_track; enum stream_state state; struct q6apm_graph *graph; @@ -134,8 +135,6 @@ static void event_handler(uint32_t opcode, uint32_t token, void *payload, void * prtd->pos += prtd->pcm_count; spin_unlock_irqrestore(&prtd->lock, flags); snd_pcm_period_elapsed(substream); - if (prtd->state == Q6APM_STREAM_RUNNING) - q6apm_write_async(prtd->graph, prtd->pcm_count, 0, 0, 0); break; case APM_CLIENT_EVENT_DATA_READ_DONE: @@ -294,6 +293,27 @@ static int q6apm_dai_prepare(struct snd_soc_component *component, return 0; } +static int q6apm_dai_ack(struct snd_soc_component *component, struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct q6apm_dai_rtd *prtd = runtime->private_data; + int i, ret = 0, avail_periods; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + avail_periods = (runtime->control->appl_ptr - prtd->queue_ptr)/runtime->period_size; + for (i = 0; i < avail_periods; i++) { + ret = q6apm_write_async(prtd->graph, prtd->pcm_count, 0, 0, NO_TIMESTAMP); + if (ret < 0) { + dev_err(component->dev, "Error queuing playback buffer %d\n", ret); + return ret; + } + prtd->queue_ptr += runtime->period_size; + } + } + + return ret; +} + static int q6apm_dai_trigger(struct snd_soc_component *component, struct snd_pcm_substream *substream, int cmd) { @@ -305,9 +325,6 @@ static int q6apm_dai_trigger(struct snd_soc_component *component, case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - /* start writing buffers for playback only as we already queued capture buffers */ - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - ret = q6apm_write_async(prtd->graph, prtd->pcm_count, 0, 0, 0); break; case SNDRV_PCM_TRIGGER_STOP: /* TODO support be handled via SoftPause Module */ @@ -836,6 +853,7 @@ static const struct snd_soc_component_driver q6apm_fe_dai_component = { .hw_params = q6apm_dai_hw_params, .pointer = q6apm_dai_pointer, .trigger = q6apm_dai_trigger, + .ack = q6apm_dai_ack, .compress_ops = &q6apm_dai_compress_ops, .use_dai_pcm_id = true, }; From 0badb5432fd525a00db5630c459b635e9d47f445 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 14 Mar 2025 17:47:57 +0000 Subject: [PATCH 1888/2157] ASoC: q6apm: add q6apm_get_hw_pointer helper Implement an helper function in q6apm to be able to read the current hardware pointer for both read and write buffers. This should help q6apm-dai to get the hardware pointer consistently without it doing manual calculation, which could go wrong in some race conditions. Fixes: 9b4fe0f1cd79 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Tested-by: Krzysztof Kozlowski Tested-by: Johan Hovold Link: https://patch.msgid.link/20250314174800.10142-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm.c | 18 +++++++++++++++++- sound/soc/qcom/qdsp6/q6apm.h | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 11e252a70f69..b4ffa0f0b188 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -494,6 +494,19 @@ int q6apm_read(struct q6apm_graph *graph) } EXPORT_SYMBOL_GPL(q6apm_read); +int q6apm_get_hw_pointer(struct q6apm_graph *graph, int dir) +{ + struct audioreach_graph_data *data; + + if (dir == SNDRV_PCM_STREAM_PLAYBACK) + data = &graph->rx_data; + else + data = &graph->tx_data; + + return (int)atomic_read(&data->hw_ptr); +} +EXPORT_SYMBOL_GPL(q6apm_get_hw_pointer); + static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) { struct data_cmd_rsp_rd_sh_mem_ep_data_buffer_done_v2 *rd_done; @@ -520,7 +533,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) done = data->payload; phys = graph->rx_data.buf[token].phys; mutex_unlock(&graph->lock); - + /* token numbering starts at 0 */ + atomic_set(&graph->rx_data.hw_ptr, token + 1); if (lower_32_bits(phys) == done->buf_addr_lsw && upper_32_bits(phys) == done->buf_addr_msw) { graph->result.opcode = hdr->opcode; @@ -553,6 +567,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) rd_done = data->payload; phys = graph->tx_data.buf[hdr->token].phys; mutex_unlock(&graph->lock); + /* token numbering starts at 0 */ + atomic_set(&graph->tx_data.hw_ptr, hdr->token + 1); if (upper_32_bits(phys) == rd_done->buf_addr_msw && lower_32_bits(phys) == rd_done->buf_addr_lsw) { diff --git a/sound/soc/qcom/qdsp6/q6apm.h b/sound/soc/qcom/qdsp6/q6apm.h index c248c8d2b1ab..7ce08b401e31 100644 --- a/sound/soc/qcom/qdsp6/q6apm.h +++ b/sound/soc/qcom/qdsp6/q6apm.h @@ -2,6 +2,7 @@ #ifndef __Q6APM_H__ #define __Q6APM_H__ #include +#include #include #include #include @@ -77,6 +78,7 @@ struct audioreach_graph_data { uint32_t num_periods; uint32_t dsp_buf; uint32_t mem_map_handle; + atomic_t hw_ptr; }; struct audioreach_graph { @@ -150,4 +152,5 @@ int q6apm_enable_compress_module(struct device *dev, struct q6apm_graph *graph, int q6apm_remove_initial_silence(struct device *dev, struct q6apm_graph *graph, uint32_t samples); int q6apm_remove_trailing_silence(struct device *dev, struct q6apm_graph *graph, uint32_t samples); int q6apm_set_real_module_id(struct device *dev, struct q6apm_graph *graph, uint32_t codec_id); +int q6apm_get_hw_pointer(struct q6apm_graph *graph, int dir); #endif /* __APM_GRAPH_ */ From 3107019501842c27334554ba9d6583b1f200f61f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 14 Mar 2025 17:47:59 +0000 Subject: [PATCH 1889/2157] ASoC: qdsp6: q6apm-dai: set 10 ms period and buffer alignment. DSP expects the periods to be aligned to fragment sizes, currently setting up to hw constriants on periods bytes is not going to work correctly as we can endup with periods sizes aligned to 32 bytes however not aligned to fragment size. Update the constriants to use fragment size, and also set at step of 10ms for period size to accommodate DSP requirements of 10ms latency. Fixes: 9b4fe0f1cd79 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Tested-by: Johan Hovold Link: https://patch.msgid.link/20250314174800.10142-5-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index 9d8e8e37c6de..d2952d4ac646 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -394,13 +394,14 @@ static int q6apm_dai_open(struct snd_soc_component *component, } } - ret = snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 32); + /* setup 10ms latency to accommodate DSP restrictions */ + ret = snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 480); if (ret < 0) { dev_err(dev, "constraint for period bytes step ret = %d\n", ret); goto err; } - ret = snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 32); + ret = snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 480); if (ret < 0) { dev_err(dev, "constraint for buffer bytes step ret = %d\n", ret); goto err; From 5d01ed9b9939b4c726be74db291a982bc984c584 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 14 Mar 2025 17:48:00 +0000 Subject: [PATCH 1890/2157] ASoC: qdsp6: q6apm-dai: fix capture pipeline overruns. Period sizes less than 6k for capture path triggers overruns in the dsp capture pipeline. Change the period size and number of periods to value which DSP is happy with. Fixes: 9b4fe0f1cd79 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Tested-by: Krzysztof Kozlowski Tested-by: Johan Hovold Link: https://patch.msgid.link/20250314174800.10142-6-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index d2952d4ac646..237c4c8ce9c9 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -24,8 +24,8 @@ #define PLAYBACK_MIN_PERIOD_SIZE 128 #define CAPTURE_MIN_NUM_PERIODS 2 #define CAPTURE_MAX_NUM_PERIODS 8 -#define CAPTURE_MAX_PERIOD_SIZE 4096 -#define CAPTURE_MIN_PERIOD_SIZE 320 +#define CAPTURE_MAX_PERIOD_SIZE 65536 +#define CAPTURE_MIN_PERIOD_SIZE 6144 #define BUFFER_BYTES_MAX (PLAYBACK_MAX_NUM_PERIODS * PLAYBACK_MAX_PERIOD_SIZE) #define BUFFER_BYTES_MIN (PLAYBACK_MIN_NUM_PERIODS * PLAYBACK_MIN_PERIOD_SIZE) #define COMPR_PLAYBACK_MAX_FRAGMENT_SIZE (128 * 1024) From a93dad6f4e6a04a5943f6ee5686585f24abf7063 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 14 Mar 2025 17:47:58 +0000 Subject: [PATCH 1891/2157] ASoC: q6apm-dai: make use of q6apm_get_hw_pointer With the existing code, the buffer position is only reset in pointer callback, which leaves the possiblity of it going over the size of buffer size and reporting incorrect position to userspace. Without this patch, its possible to see errors like: snd-x1e80100 sound: invalid position: pcmC0D0p:0, pos = 12288, buffer size = 12288, period size = 1536 snd-x1e80100 sound: invalid position: pcmC0D0p:0, pos = 12288, buffer size = 12288, period size = 1536 Fixes: 9b4fe0f1cd791 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Tested-by: Krzysztof Kozlowski Tested-by: Johan Hovold Link: https://patch.msgid.link/20250314174800.10142-4-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index 237c4c8ce9c9..2cd522108221 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -64,7 +64,6 @@ struct q6apm_dai_rtd { phys_addr_t phys; unsigned int pcm_size; unsigned int pcm_count; - unsigned int pos; /* Buffer position */ unsigned int periods; unsigned int bytes_sent; unsigned int bytes_received; @@ -124,23 +123,16 @@ static void event_handler(uint32_t opcode, uint32_t token, void *payload, void * { struct q6apm_dai_rtd *prtd = priv; struct snd_pcm_substream *substream = prtd->substream; - unsigned long flags; switch (opcode) { case APM_CLIENT_EVENT_CMD_EOS_DONE: prtd->state = Q6APM_STREAM_STOPPED; break; case APM_CLIENT_EVENT_DATA_WRITE_DONE: - spin_lock_irqsave(&prtd->lock, flags); - prtd->pos += prtd->pcm_count; - spin_unlock_irqrestore(&prtd->lock, flags); snd_pcm_period_elapsed(substream); break; case APM_CLIENT_EVENT_DATA_READ_DONE: - spin_lock_irqsave(&prtd->lock, flags); - prtd->pos += prtd->pcm_count; - spin_unlock_irqrestore(&prtd->lock, flags); snd_pcm_period_elapsed(substream); if (prtd->state == Q6APM_STREAM_RUNNING) q6apm_read(prtd->graph); @@ -247,7 +239,6 @@ static int q6apm_dai_prepare(struct snd_soc_component *component, } prtd->pcm_count = snd_pcm_lib_period_bytes(substream); - prtd->pos = 0; /* rate and channels are sent to audio driver */ ret = q6apm_graph_media_format_shmem(prtd->graph, &cfg); if (ret < 0) { @@ -446,16 +437,12 @@ static snd_pcm_uframes_t q6apm_dai_pointer(struct snd_soc_component *component, struct snd_pcm_runtime *runtime = substream->runtime; struct q6apm_dai_rtd *prtd = runtime->private_data; snd_pcm_uframes_t ptr; - unsigned long flags; - spin_lock_irqsave(&prtd->lock, flags); - if (prtd->pos == prtd->pcm_size) - prtd->pos = 0; + ptr = q6apm_get_hw_pointer(prtd->graph, substream->stream) * runtime->period_size; + if (ptr) + return ptr - 1; - ptr = bytes_to_frames(runtime, prtd->pos); - spin_unlock_irqrestore(&prtd->lock, flags); - - return ptr; + return 0; } static int q6apm_dai_hw_params(struct snd_soc_component *component, @@ -670,8 +657,6 @@ static int q6apm_dai_compr_set_params(struct snd_soc_component *component, prtd->pcm_size = runtime->fragments * runtime->fragment_size; prtd->bits_per_sample = 16; - prtd->pos = 0; - if (prtd->next_track != true) { memcpy(&prtd->codec, codec, sizeof(*codec)); From ebca08fef88febdb0a898cefa7c99b9e25b3a984 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 28 Mar 2025 22:08:56 +0100 Subject: [PATCH 1892/2157] ACPI: video: Handle fetching EDID as ACPI_TYPE_PACKAGE The _DDC method should return a buffer, or an integer in case of an error. But some Lenovo laptops incorrectly return EDID as buffer in ACPI package. Calling _DDC generates this ACPI Warning: ACPI Warning: \_SB.PCI0.GP17.VGA.LCD._DDC: Return type mismatch - \ found Package, expected Integer/Buffer (20240827/nspredef-254) Use the first element of the package to get the EDID buffer. The DSDT: Name (AUOP, Package (0x01) { Buffer (0x80) { ... } }) ... Method (_DDC, 1, NotSerialized) // _DDC: Display Data Current { If ((PAID == AUID)) { Return (AUOP) /* \_SB_.PCI0.GP17.VGA_.LCD_.AUOP */ } ElseIf ((PAID == IVID)) { Return (IVOP) /* \_SB_.PCI0.GP17.VGA_.LCD_.IVOP */ } ElseIf ((PAID == BOID)) { Return (BOEP) /* \_SB_.PCI0.GP17.VGA_.LCD_.BOEP */ } ElseIf ((PAID == SAID)) { Return (SUNG) /* \_SB_.PCI0.GP17.VGA_.LCD_.SUNG */ } Return (Zero) } Link: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/Apx_B_Video_Extensions/output-device-specific-methods.html#ddc-return-the-edid-for-this-device Cc: stable@vger.kernel.org Fixes: c6a837088bed ("drm/amd/display: Fetch the EDID from _DDC if available for eDP") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4085 Signed-off-by: Gergo Koteles Reviewed-by: Hans de Goede Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/61c3df83ab73aba0bc7a941a443cd7faf4cf7fb0.1743195250.git.soyer@irl.hu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index efdadc74e3f4..103f29661576 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -649,6 +649,13 @@ acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length obj = buffer.pointer; + /* + * Some buggy implementations incorrectly return the EDID buffer in an ACPI package. + * In this case, extract the buffer from the package. + */ + if (obj && obj->type == ACPI_TYPE_PACKAGE && obj->package.count == 1) + obj = &obj->package.elements[0]; + if (obj && obj->type == ACPI_TYPE_BUFFER) { *edid = kmemdup(obj->buffer.pointer, obj->buffer.length, GFP_KERNEL); ret = *edid ? obj->buffer.length : -ENOMEM; @@ -658,7 +665,7 @@ acpi_video_device_EDID(struct acpi_video_device *device, void **edid, int length ret = -EFAULT; } - kfree(obj); + kfree(buffer.pointer); return ret; } From 1ebd4944266e86a7ce274f197847f5a6399651e8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 22 Mar 2025 08:45:49 +0100 Subject: [PATCH 1893/2157] ASoC: codecs: rt5665: Fix some error handling paths in rt5665_probe() Should an error occur after a successful regulator_bulk_enable() call, regulator_bulk_disable() should be called, as already done in the remove function. Instead of adding an error handling path in the probe, switch from devm_regulator_bulk_get() to devm_regulator_bulk_get_enable() and simplify the remove function and some other places accordingly. Finally, add a missing const when defining rt5665_supply_names to please checkpatch and constify a few bytes. Fixes: 33ada14a26c8 ("ASoC: add rt5665 codec driver") Signed-off-by: Christophe JAILLET Link: https://patch.msgid.link/e3c2aa1b2fdfa646752d94f4af968630c0d58248.1742629525.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index e0d1991cffdb..bcb6d7c6f301 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -31,9 +31,7 @@ #include "rl6231.h" #include "rt5665.h" -#define RT5665_NUM_SUPPLIES 3 - -static const char *rt5665_supply_names[RT5665_NUM_SUPPLIES] = { +static const char * const rt5665_supply_names[] = { "AVDD", "MICVDD", "VBAT", @@ -46,7 +44,6 @@ struct rt5665_priv { struct gpio_desc *gpiod_ldo1_en; struct gpio_desc *gpiod_reset; struct snd_soc_jack *hs_jack; - struct regulator_bulk_data supplies[RT5665_NUM_SUPPLIES]; struct delayed_work jack_detect_work; struct delayed_work calibrate_work; struct delayed_work jd_check_work; @@ -4471,8 +4468,6 @@ static void rt5665_remove(struct snd_soc_component *component) struct rt5665_priv *rt5665 = snd_soc_component_get_drvdata(component); regmap_write(rt5665->regmap, RT5665_RESET, 0); - - regulator_bulk_disable(ARRAY_SIZE(rt5665->supplies), rt5665->supplies); } #ifdef CONFIG_PM @@ -4758,7 +4753,7 @@ static int rt5665_i2c_probe(struct i2c_client *i2c) { struct rt5665_platform_data *pdata = dev_get_platdata(&i2c->dev); struct rt5665_priv *rt5665; - int i, ret; + int ret; unsigned int val; rt5665 = devm_kzalloc(&i2c->dev, sizeof(struct rt5665_priv), @@ -4774,24 +4769,13 @@ static int rt5665_i2c_probe(struct i2c_client *i2c) else rt5665_parse_dt(rt5665, &i2c->dev); - for (i = 0; i < ARRAY_SIZE(rt5665->supplies); i++) - rt5665->supplies[i].supply = rt5665_supply_names[i]; - - ret = devm_regulator_bulk_get(&i2c->dev, ARRAY_SIZE(rt5665->supplies), - rt5665->supplies); + ret = devm_regulator_bulk_get_enable(&i2c->dev, ARRAY_SIZE(rt5665_supply_names), + rt5665_supply_names); if (ret != 0) { dev_err(&i2c->dev, "Failed to request supplies: %d\n", ret); return ret; } - ret = regulator_bulk_enable(ARRAY_SIZE(rt5665->supplies), - rt5665->supplies); - if (ret != 0) { - dev_err(&i2c->dev, "Failed to enable supplies: %d\n", ret); - return ret; - } - - rt5665->gpiod_ldo1_en = devm_gpiod_get_optional(&i2c->dev, "realtek,ldo1-en", GPIOD_OUT_HIGH); From 7ba0847fa1c22e7801cebfe5f7b75aee4fae317e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 08:33:32 -0700 Subject: [PATCH 1894/2157] spi: cadence: Fix out-of-bounds array access in cdns_mrvl_xspi_setup_clock() If requested_clk > 128, cdns_mrvl_xspi_setup_clock() iterates over the entire cdns_mrvl_xspi_clk_div_list array without breaking out early, causing 'i' to go beyond the array bounds. Fix that by stopping the loop when it gets to the last entry, clamping the clock to the minimum 6.25 MHz. Fixes the following warning with an UBSAN kernel: vmlinux.o: warning: objtool: cdns_mrvl_xspi_setup_clock: unexpected end of section .text.cdns_mrvl_xspi_setup_clock Fixes: 26d34fdc4971 ("spi: cadence: Add clock configuration for Marvell xSPI overlay") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503282236.UhfRsF3B-lkp@intel.com/ Link: https://lore.kernel.org/r/gs2ooxfkblnee6cc5yfcxh7nu4wvoqnuv4lrllkhccxgcac2jg@7snmwd73jkhs Signed-off-by: Josh Poimboeuf Link: https://patch.msgid.link/h6bef6wof6zpjfp3jbhrkigqsnykdfy6j4qmmvb6gsabhianhj@k57a7hwpa3bj Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-xspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c index aed98ab14334..6dcba0e0ddaa 100644 --- a/drivers/spi/spi-cadence-xspi.c +++ b/drivers/spi/spi-cadence-xspi.c @@ -432,7 +432,7 @@ static bool cdns_mrvl_xspi_setup_clock(struct cdns_xspi_dev *cdns_xspi, u32 clk_reg; bool update_clk = false; - while (i < ARRAY_SIZE(cdns_mrvl_xspi_clk_div_list)) { + while (i < (ARRAY_SIZE(cdns_mrvl_xspi_clk_div_list) - 1)) { clk_val = MRVL_XSPI_CLOCK_DIVIDED( cdns_mrvl_xspi_clk_div_list[i]); if (clk_val <= requested_clk) From a1fbe0a12178a006b04a7fa528457f9901d6c6d0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 19:40:21 +0100 Subject: [PATCH 1895/2157] io_uring/rsrc: check size when importing reg buffer We're relying on callers to verify the IO size, do it inside of io_import_fixed() instead. It's safer, easier to deal with, and more consistent as now it's done close to the iter init site. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f9c2c75ec4d356a0c61289073f68d98e8a9db190.1743446271.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3f195e24777e..59b4317b04a7 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1016,6 +1016,8 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) return -EFAULT; + if (unlikely(len > MAX_RW_COUNT)) + return -EFAULT; if (!(imu->dir & (1 << ddir))) return -EFAULT; From 81ed18015d65f111ddbc88599c48338a5e1927d0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:17:58 +0100 Subject: [PATCH 1896/2157] io_uring/net: avoid import_ubuf for regvec send With registered buffers we set up iterators in helpers like io_import_fixed(), and there is no need for a import_ubuf() before that. It was fine as we used real pointers for offset calculation, but that's not the case anymore since introduction of ublk kernel buffers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9b2de1a50844f848f62c8de609b494971033a6b9.1743437358.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index eaa627eddb4a..24040bc3916a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -359,6 +359,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_name = &kmsg->addr; kmsg->msg.msg_namelen = addr_len; } + if (sr->flags & IORING_RECVSEND_FIXED_BUF) + return 0; if (!io_do_buffer_select(req)) { ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); From 95c18b7ccdd1b2e6704651b66565339ada318ba2 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 31 Mar 2025 11:45:24 -0700 Subject: [PATCH 1897/2157] riscv: Add norvc after .option arch in runtime const .option arch clobbers .option norvc. Prevent gas from emitting compressed instructions in the runtime const alternative blocks by setting .option norvc after .option arch. This issue starts appearing on gcc 15, which adds zca to the march. Reported by: Klara Modin Signed-off-by: Charlie Jenkins Fixes: a44fb5722199 ("riscv: Add runtime constant support") Closes: https://lore.kernel.org/all/cc8f3525-20b7-445b-877b-2add28a160a2@gmail.com/ Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250331-fix_runtime_const_norvc-v1-1-89bc62687ab8@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/runtime-const.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index ea2e49c7149c..1ba0985a7ffe 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -56,6 +56,7 @@ #define RISCV_RUNTIME_CONST_64_ZBA \ ".option push\n\t" \ ".option arch,+zba\n\t" \ + ".option norvc\n\t" \ "slli %[__tmp],%[__tmp],32\n\t" \ "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ "nop\n\t" \ @@ -65,6 +66,7 @@ #define RISCV_RUNTIME_CONST_64_ZBKB \ ".option push\n\t" \ ".option arch,+zbkb\n\t" \ + ".option norvc\n\t" \ "pack %[__ret],%[__ret],%[__tmp]\n\t" \ "nop\n\t" \ "nop\n\t" \ From f540876f4eea82295f3af72f786aae51b7378fb2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 30 Mar 2025 21:15:57 -0400 Subject: [PATCH 1898/2157] bcachefs: Fix striping behaviour For striping across devices, we maintain "clocks", and we advance them by the inverse of "how much free space this device has left", so that we round robin biased in favor of devices with more free space. This code was originally trying to do EWMA-ish stuff when originally written, ~10 years ago, and was never properly cleaned up when it was realized that an EWMA is not the right approach here. That left a bug, when we rescale to keep all the clocks in the correct range and prevent overflow. It was assumed that we'd always be allocated from the device with the smallest clock hand, but that's actually not correct: with the target options, allocations will be first tried from a subset of devices, and then the entire filesystem if that fails. Thus, the rescale from the first allocation - allocating from a subset of devices - can pick the wrong rescale value and cause the rest of the clocks to go to 0, losing information. This resuls in incorrect striping behaviour when the desired number of replicas doesn't fit on the foreground target. Link: https://www.reddit.com/r/bcachefs/comments/1jn3t26/replica_allocation_not_evenly_distributed_among/ Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 60 +++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index da0d72928b5b..1a25a8a4ae09 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -606,8 +606,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { - return ((stripe->next_alloc[l] > stripe->next_alloc[r]) - - (stripe->next_alloc[l] < stripe->next_alloc[r])); + return cmp_int(stripe->next_alloc[l], stripe->next_alloc[r]); } #define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r) @@ -626,25 +625,62 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, return ret; } +static const u64 stripe_clock_hand_rescale = 1ULL << 62; /* trigger rescale at */ +static const u64 stripe_clock_hand_max = 1ULL << 56; /* max after rescale */ +static const u64 stripe_clock_hand_inv = 1ULL << 52; /* max increment, if a device is empty */ + +static noinline void bch2_stripe_state_rescale(struct dev_stripe_state *stripe) +{ + /* + * Avoid underflowing clock hands if at all possible, if clock hands go + * to 0 then we lose information - clock hands can be in a wide range if + * we have devices we rarely try to allocate from, if we generally + * allocate from a specified target but only sometimes have to fall back + * to the whole filesystem. + */ + u64 scale_max = U64_MAX; /* maximum we can subtract without underflow */ + u64 scale_min = 0; /* minumum we must subtract to avoid overflow */ + + for (u64 *v = stripe->next_alloc; + v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) { + if (*v) + scale_max = min(scale_max, *v); + if (*v > stripe_clock_hand_max) + scale_min = max(scale_min, *v - stripe_clock_hand_max); + } + + u64 scale = max(scale_min, scale_max); + + for (u64 *v = stripe->next_alloc; + v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) + *v = *v < scale ? 0 : *v - scale; +} + static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca, struct dev_stripe_state *stripe, struct bch_dev_usage *usage) { + /* + * Stripe state has a per device clock hand: we allocate from the device + * with the smallest clock hand. + * + * When we allocate, we don't do a simple increment; we add the inverse + * of the device's free space. This results in round robin behavior that + * biases in favor of the device(s) with more free space. + */ + u64 *v = stripe->next_alloc + ca->dev_idx; u64 free_space = __dev_buckets_available(ca, *usage, BCH_WATERMARK_normal); u64 free_space_inv = free_space - ? div64_u64(1ULL << 48, free_space) - : 1ULL << 48; - u64 scale = *v / 4; + ? div64_u64(stripe_clock_hand_inv, free_space) + : stripe_clock_hand_inv; - if (*v + free_space_inv >= *v) - *v += free_space_inv; - else - *v = U64_MAX; + /* Saturating add, avoid overflow: */ + u64 sum = *v + free_space_inv; + *v = sum >= *v ? sum : U64_MAX; - for (v = stripe->next_alloc; - v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) - *v = *v < scale ? 0 : *v - scale; + if (unlikely(*v > stripe_clock_hand_rescale)) + bch2_stripe_state_rescale(stripe); } void bch2_dev_stripe_increment(struct bch_dev *ca, From 7f10fde38f0ac242760e36394e731d25f27e6063 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 14:32:31 -0400 Subject: [PATCH 1899/2157] bcachefs: Fix field spanning write warning Struct with embedded VLA... memcpy: detected field-spanning write (size 8) of single field "&gc->r.e" at fs/bcachefs/ec.c:465 (size 3) WARNING: CPU: 1 PID: 936 at fs/bcachefs/ec.c:465 bch2_trigger_stripe+0x706/0x730 Modules linked in: CPU: 1 UID: 0 PID: 936 Comm: mount.bcachefs Not tainted 6.14.0-rc6-ktest-00236-gefb0b5c62dbc #55 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:bch2_trigger_stripe+0x706/0x730 Code: b4 00 01 b9 03 00 00 00 48 89 fb 48 c7 c7 33 54 da 81 48 89 d6 49 89 d6 48 c7 c2 c3 36 db 81 e8 60 54 c5 ff 48 89 df 4c 89 f2 <0f> 0b e9 5c fd ff ff e8 fe 5e 4e 00 bf 10 00 00 00 48 c7 c6 ff ff RSP: 0018:ffff88817081f680 EFLAGS: 00010246 RAX: f8fe7dd1c56b5600 RBX: ffff888101265368 RCX: 0000000000000027 RDX: 0000000000000008 RSI: 00000000fffbffff RDI: ffff888101265368 RBP: 0000000000000000 R08: 000000000003ffff R09: ffff88817f1fe000 R10: 00000000000bfffd R11: 0000000000000004 R12: ffff8881012652c0 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88817081f6c9 FS: 00007fc428bc7c80(0000) GS:ffff888179280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd3ee4a038 CR3: 000000010a9bc000 CR4: 0000000000750eb0 PKRU: 55555554 Call Trace: ? __warn+0xce/0x1b0 ? bch2_trigger_stripe+0x706/0x730 ? report_bug+0x11b/0x1a0 ? bch2_trigger_stripe+0x706/0x730 ? handle_bug+0x5e/0x90 ? exc_invalid_op+0x1a/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? bch2_trigger_stripe+0x706/0x730 bch2_gc_mark_key+0x2cf/0x430 bch2_check_allocations+0x1a64/0x1ed0 ? vsnprintf+0x1ad/0x420 ? bch2_check_allocations+0x191f/0x1ed0 bch2_run_recovery_passes+0x13b/0x2b0 bch2_fs_recovery+0x9b7/0x1290 ? __bch2_print+0xb2/0xf0 ? bch2_printbuf_exit+0x1e/0x30 ? print_mount_opts+0x153/0x180 bch2_fs_start+0x274/0x3b0 bch2_fs_get_tree+0x516/0x6e0 vfs_get_tree+0x21/0xa0 do_new_mount+0x153/0x350 __x64_sys_mount+0x16c/0x1f0 do_syscall_64+0x6c/0x140 ? arch_exit_to_user_mode_prepare+0x9/0x40 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/ec.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 0903311cc71e..297adb996751 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -707,7 +707,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, struct disk_accounting_pos acc; memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; - memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); + unsafe_memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e), "VLA"); gc_stripe_unlock(m); acc.replicas.data_type = data_type; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 6faeda7ad03d..012386036a3e 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -462,7 +462,8 @@ int bch2_trigger_stripe(struct btree_trans *trans, return ret; if (gc) - memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas)); + unsafe_memcpy(&gc->r.e, &acc.replicas, + replicas_entry_bytes(&acc.replicas), "VLA"); } if (old_s) { From de399658588931506e1b3f4cc63cb3c9134a692e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 16:19:04 -0400 Subject: [PATCH 1900/2157] bcachefs: Fix null ptr deref in bch2_write_endio() This was previously hard to hit since it requires racing with device removal, but splitting up io_ref uncovered it. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 07b55839768e..0503ac1952cd 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -697,12 +697,19 @@ static void bch2_write_endio(struct bio *bio) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, wbio->submit_time, !bio->bi_status); - if (bio->bi_status) { - bch_err_inum_offset_ratelimited(ca, - op->pos.inode, - wbio->inode_offset << 9, - "data write error: %s", - bch2_blk_status_to_str(bio->bi_status)); + if (unlikely(bio->bi_status)) { + if (ca) + bch_err_inum_offset_ratelimited(ca, + op->pos.inode, + wbio->inode_offset << 9, + "data write error: %s", + bch2_blk_status_to_str(bio->bi_status)); + else + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + wbio->inode_offset << 9, + "data write error: %s", + bch2_blk_status_to_str(bio->bi_status)); set_bit(wbio->dev, op->failed.d); op->flags |= BCH_WRITE_io_error; } From fe135955bed2cad5c1e5797bb8320af06fe085f7 Mon Sep 17 00:00:00 2001 From: PavithraUdayakumar-adi Date: Mon, 17 Feb 2025 18:17:16 +0530 Subject: [PATCH 1901/2157] dt-bindings: rtc: max31335: Add max31331 support Added DT compatible string for MAX31331. Acked-by: Krzysztof Kozlowski Signed-off-by: PavithraUdayakumar-adi Link: https://lore.kernel.org/r/20250217-add_support_max31331_fix_8-v1-1-16ebcfc02336@analog.com Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/adi,max31335.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/adi,max31335.yaml b/Documentation/devicetree/bindings/rtc/adi,max31335.yaml index 0125cf6727cc..bce7558d0d87 100644 --- a/Documentation/devicetree/bindings/rtc/adi,max31335.yaml +++ b/Documentation/devicetree/bindings/rtc/adi,max31335.yaml @@ -18,7 +18,9 @@ allOf: properties: compatible: - const: adi,max31335 + enum: + - adi,max31331 + - adi,max31335 reg: maxItems: 1 From a4193578631b7c55eae31f52cef6b0f09203fd17 Mon Sep 17 00:00:00 2001 From: PavithraUdayakumar-adi Date: Mon, 17 Feb 2025 18:17:17 +0530 Subject: [PATCH 1902/2157] rtc: max31335: Add driver support for max31331 MAX31331 is an ultra-low-power, I2C Real-Time Clock RTC. Signed-off-by: PavithraUdayakumar-adi Link: https://lore.kernel.org/r/20250217-add_support_max31331_fix_8-v1-2-16ebcfc02336@analog.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-max31335.c | 165 +++++++++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 43 deletions(-) diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c index 3fbcf5f6b92f..a7bb37aaab9e 100644 --- a/drivers/rtc/rtc-max31335.c +++ b/drivers/rtc/rtc-max31335.c @@ -184,31 +184,91 @@ #define MAX31335_RAM_SIZE 32 #define MAX31335_TIME_SIZE 0x07 +/* MAX31331 Register Map */ +#define MAX31331_RTC_CONFIG2 0x04 + #define clk_hw_to_max31335(_hw) container_of(_hw, struct max31335_data, clkout) +/* Supported Maxim RTC */ +enum max_rtc_ids { + ID_MAX31331, + ID_MAX31335, + MAX_RTC_ID_NR +}; + +struct chip_desc { + u8 sec_reg; + u8 alarm1_sec_reg; + + u8 int_en_reg; + u8 int_status_reg; + + u8 ram_reg; + u8 ram_size; + + u8 temp_reg; + + u8 trickle_reg; + + u8 clkout_reg; + + enum max_rtc_ids id; +}; + struct max31335_data { struct regmap *regmap; struct rtc_device *rtc; struct clk_hw clkout; + struct clk *clkin; + const struct chip_desc *chip; + int irq; }; static const int max31335_clkout_freq[] = { 1, 64, 1024, 32768 }; +static const struct chip_desc chip[MAX_RTC_ID_NR] = { + [ID_MAX31331] = { + .id = ID_MAX31331, + .int_en_reg = 0x01, + .int_status_reg = 0x00, + .sec_reg = 0x08, + .alarm1_sec_reg = 0x0F, + .ram_reg = 0x20, + .ram_size = 32, + .trickle_reg = 0x1B, + .clkout_reg = 0x04, + }, + [ID_MAX31335] = { + .id = ID_MAX31335, + .int_en_reg = 0x01, + .int_status_reg = 0x00, + .sec_reg = 0x0A, + .alarm1_sec_reg = 0x11, + .ram_reg = 0x40, + .ram_size = 32, + .temp_reg = 0x35, + .trickle_reg = 0x1D, + .clkout_reg = 0x06, + }, +}; + static const u16 max31335_trickle_resistors[] = {3000, 6000, 11000}; static bool max31335_volatile_reg(struct device *dev, unsigned int reg) { + struct max31335_data *max31335 = dev_get_drvdata(dev); + const struct chip_desc *chip = max31335->chip; + /* time keeping registers */ - if (reg >= MAX31335_SECONDS && - reg < MAX31335_SECONDS + MAX31335_TIME_SIZE) + if (reg >= chip->sec_reg && reg < chip->sec_reg + MAX31335_TIME_SIZE) return true; /* interrupt status register */ - if (reg == MAX31335_STATUS1) + if (reg == chip->int_status_reg) return true; - /* temperature registers */ - if (reg == MAX31335_TEMP_DATA_MSB || reg == MAX31335_TEMP_DATA_LSB) + /* temperature registers if valid */ + if (chip->temp_reg && (reg == chip->temp_reg || reg == chip->temp_reg + 1)) return true; return false; @@ -227,7 +287,7 @@ static int max31335_read_time(struct device *dev, struct rtc_time *tm) u8 date[7]; int ret; - ret = regmap_bulk_read(max31335->regmap, MAX31335_SECONDS, date, + ret = regmap_bulk_read(max31335->regmap, max31335->chip->sec_reg, date, sizeof(date)); if (ret) return ret; @@ -262,7 +322,7 @@ static int max31335_set_time(struct device *dev, struct rtc_time *tm) if (tm->tm_year >= 200) date[5] |= FIELD_PREP(MAX31335_MONTH_CENTURY, 1); - return regmap_bulk_write(max31335->regmap, MAX31335_SECONDS, date, + return regmap_bulk_write(max31335->regmap, max31335->chip->sec_reg, date, sizeof(date)); } @@ -273,7 +333,7 @@ static int max31335_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct rtc_time time; u8 regs[6]; - ret = regmap_bulk_read(max31335->regmap, MAX31335_ALM1_SEC, regs, + ret = regmap_bulk_read(max31335->regmap, max31335->chip->alarm1_sec_reg, regs, sizeof(regs)); if (ret) return ret; @@ -292,11 +352,11 @@ static int max31335_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (time.tm_year >= 200) alrm->time.tm_year += 100; - ret = regmap_read(max31335->regmap, MAX31335_INT_EN1, &ctrl); + ret = regmap_read(max31335->regmap, max31335->chip->int_en_reg, &ctrl); if (ret) return ret; - ret = regmap_read(max31335->regmap, MAX31335_STATUS1, &status); + ret = regmap_read(max31335->regmap, max31335->chip->int_status_reg, &status); if (ret) return ret; @@ -320,18 +380,18 @@ static int max31335_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) regs[4] = bin2bcd(alrm->time.tm_mon + 1); regs[5] = bin2bcd(alrm->time.tm_year % 100); - ret = regmap_bulk_write(max31335->regmap, MAX31335_ALM1_SEC, + ret = regmap_bulk_write(max31335->regmap, max31335->chip->alarm1_sec_reg, regs, sizeof(regs)); if (ret) return ret; reg = FIELD_PREP(MAX31335_INT_EN1_A1IE, alrm->enabled); - ret = regmap_update_bits(max31335->regmap, MAX31335_INT_EN1, + ret = regmap_update_bits(max31335->regmap, max31335->chip->int_en_reg, MAX31335_INT_EN1_A1IE, reg); if (ret) return ret; - ret = regmap_update_bits(max31335->regmap, MAX31335_STATUS1, + ret = regmap_update_bits(max31335->regmap, max31335->chip->int_status_reg, MAX31335_STATUS1_A1F, 0); return 0; @@ -341,23 +401,33 @@ static int max31335_alarm_irq_enable(struct device *dev, unsigned int enabled) { struct max31335_data *max31335 = dev_get_drvdata(dev); - return regmap_update_bits(max31335->regmap, MAX31335_INT_EN1, + return regmap_update_bits(max31335->regmap, max31335->chip->int_en_reg, MAX31335_INT_EN1_A1IE, enabled); } static irqreturn_t max31335_handle_irq(int irq, void *dev_id) { struct max31335_data *max31335 = dev_id; - bool status; - int ret; + struct mutex *lock = &max31335->rtc->ops_lock; + int ret, status; - ret = regmap_update_bits_check(max31335->regmap, MAX31335_STATUS1, - MAX31335_STATUS1_A1F, 0, &status); + mutex_lock(lock); + + ret = regmap_read(max31335->regmap, max31335->chip->int_status_reg, &status); if (ret) - return IRQ_HANDLED; + goto exit; + + if (FIELD_GET(MAX31335_STATUS1_A1F, status)) { + ret = regmap_update_bits(max31335->regmap, max31335->chip->int_status_reg, + MAX31335_STATUS1_A1F, 0); + if (ret) + goto exit; - if (status) rtc_update_irq(max31335->rtc, 1, RTC_AF | RTC_IRQF); + } + +exit: + mutex_unlock(lock); return IRQ_HANDLED; } @@ -404,7 +474,7 @@ static int max31335_trickle_charger_setup(struct device *dev, i = i + trickle_cfg; - return regmap_write(max31335->regmap, MAX31335_TRICKLE_REG, + return regmap_write(max31335->regmap, max31335->chip->trickle_reg, FIELD_PREP(MAX31335_TRICKLE_REG_TRICKLE, i) | FIELD_PREP(MAX31335_TRICKLE_REG_EN_TRICKLE, chargeable)); @@ -418,7 +488,7 @@ static unsigned long max31335_clkout_recalc_rate(struct clk_hw *hw, unsigned int reg; int ret; - ret = regmap_read(max31335->regmap, MAX31335_RTC_CONFIG2, ®); + ret = regmap_read(max31335->regmap, max31335->chip->clkout_reg, ®); if (ret) return 0; @@ -449,23 +519,23 @@ static int max31335_clkout_set_rate(struct clk_hw *hw, unsigned long rate, ARRAY_SIZE(max31335_clkout_freq)); freq_mask = __roundup_pow_of_two(ARRAY_SIZE(max31335_clkout_freq)) - 1; - return regmap_update_bits(max31335->regmap, MAX31335_RTC_CONFIG2, - freq_mask, index); + return regmap_update_bits(max31335->regmap, max31335->chip->clkout_reg, + freq_mask, index); } static int max31335_clkout_enable(struct clk_hw *hw) { struct max31335_data *max31335 = clk_hw_to_max31335(hw); - return regmap_set_bits(max31335->regmap, MAX31335_RTC_CONFIG2, - MAX31335_RTC_CONFIG2_ENCLKO); + return regmap_set_bits(max31335->regmap, max31335->chip->clkout_reg, + MAX31335_RTC_CONFIG2_ENCLKO); } static void max31335_clkout_disable(struct clk_hw *hw) { struct max31335_data *max31335 = clk_hw_to_max31335(hw); - regmap_clear_bits(max31335->regmap, MAX31335_RTC_CONFIG2, + regmap_clear_bits(max31335->regmap, max31335->chip->clkout_reg, MAX31335_RTC_CONFIG2_ENCLKO); } @@ -475,7 +545,7 @@ static int max31335_clkout_is_enabled(struct clk_hw *hw) unsigned int reg; int ret; - ret = regmap_read(max31335->regmap, MAX31335_RTC_CONFIG2, ®); + ret = regmap_read(max31335->regmap, max31335->chip->clkout_reg, ®); if (ret) return ret; @@ -500,7 +570,7 @@ static int max31335_nvmem_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct max31335_data *max31335 = priv; - unsigned int reg = MAX31335_TS0_SEC_1_128 + offset; + unsigned int reg = max31335->chip->ram_reg + offset; return regmap_bulk_read(max31335->regmap, reg, val, bytes); } @@ -509,7 +579,7 @@ static int max31335_nvmem_reg_write(void *priv, unsigned int offset, void *val, size_t bytes) { struct max31335_data *max31335 = priv; - unsigned int reg = MAX31335_TS0_SEC_1_128 + offset; + unsigned int reg = max31335->chip->ram_reg + offset; return regmap_bulk_write(max31335->regmap, reg, val, bytes); } @@ -533,7 +603,7 @@ static int max31335_read_temp(struct device *dev, enum hwmon_sensor_types type, if (type != hwmon_temp || attr != hwmon_temp_input) return -EOPNOTSUPP; - ret = regmap_bulk_read(max31335->regmap, MAX31335_TEMP_DATA_MSB, + ret = regmap_bulk_read(max31335->regmap, max31335->chip->temp_reg, reg, 2); if (ret) return ret; @@ -577,8 +647,8 @@ static int max31335_clkout_register(struct device *dev) int ret; if (!device_property_present(dev, "#clock-cells")) - return regmap_clear_bits(max31335->regmap, MAX31335_RTC_CONFIG2, - MAX31335_RTC_CONFIG2_ENCLKO); + return regmap_clear_bits(max31335->regmap, max31335->chip->clkout_reg, + MAX31335_RTC_CONFIG2_ENCLKO); max31335->clkout.init = &max31335_clk_init; @@ -605,6 +675,7 @@ static int max31335_probe(struct i2c_client *client) #if IS_REACHABLE(HWMON) struct device *hwmon; #endif + const struct chip_desc *match; int ret; max31335 = devm_kzalloc(&client->dev, sizeof(*max31335), GFP_KERNEL); @@ -616,7 +687,10 @@ static int max31335_probe(struct i2c_client *client) return PTR_ERR(max31335->regmap); i2c_set_clientdata(client, max31335); - + match = i2c_get_match_data(client); + if (!match) + return -ENODEV; + max31335->chip = match; max31335->rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(max31335->rtc)) return PTR_ERR(max31335->rtc); @@ -639,6 +713,8 @@ static int max31335_probe(struct i2c_client *client) dev_warn(&client->dev, "unable to request IRQ, alarm max31335 disabled\n"); client->irq = 0; + } else { + max31335->irq = client->irq; } } @@ -652,13 +728,13 @@ static int max31335_probe(struct i2c_client *client) "cannot register rtc nvmem\n"); #if IS_REACHABLE(HWMON) - hwmon = devm_hwmon_device_register_with_info(&client->dev, client->name, - max31335, - &max31335_chip_info, - NULL); - if (IS_ERR(hwmon)) - return dev_err_probe(&client->dev, PTR_ERR(hwmon), - "cannot register hwmon device\n"); + if (max31335->chip->temp_reg) { + hwmon = devm_hwmon_device_register_with_info(&client->dev, client->name, max31335, + &max31335_chip_info, NULL); + if (IS_ERR(hwmon)) + return dev_err_probe(&client->dev, PTR_ERR(hwmon), + "cannot register hwmon device\n"); + } #endif ret = max31335_trickle_charger_setup(&client->dev, max31335); @@ -669,14 +745,16 @@ static int max31335_probe(struct i2c_client *client) } static const struct i2c_device_id max31335_id[] = { - { "max31335" }, + { "max31331", (kernel_ulong_t)&chip[ID_MAX31331] }, + { "max31335", (kernel_ulong_t)&chip[ID_MAX31335] }, { } }; MODULE_DEVICE_TABLE(i2c, max31335_id); static const struct of_device_id max31335_of_match[] = { - { .compatible = "adi,max31335" }, + { .compatible = "adi,max31331", .data = &chip[ID_MAX31331] }, + { .compatible = "adi,max31335", .data = &chip[ID_MAX31335] }, { } }; @@ -693,5 +771,6 @@ static struct i2c_driver max31335_driver = { module_i2c_driver(max31335_driver); MODULE_AUTHOR("Antoniu Miclaus "); +MODULE_AUTHOR("Saket Kumar Purwar "); MODULE_DESCRIPTION("MAX31335 RTC driver"); MODULE_LICENSE("GPL"); From 7220e8f4d4eec0b2f682eef45e2d36c092738413 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2025 14:44:39 +0000 Subject: [PATCH 1903/2157] net: lapbether: use netdev_lockdep_set_classes() helper drivers/net/wan/lapbether.c uses stacked devices. Like similar drivers, it must use netdev_lockdep_set_classes() to avoid LOCKDEP splats. This is similar to commit 9bfc9d65a1dc ("hamradio: use netdev_lockdep_set_classes() helper") Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Reported-by: syzbot+377b71db585c9c705f8e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/67cd611c.050a0220.14db68.0073.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250327144439.2463509-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/lapbether.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 56326f38fe8a..995a7207bdf8 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -39,6 +39,7 @@ #include #include +#include #include static const u8 bcast_addr[6] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; @@ -366,6 +367,7 @@ static const struct net_device_ops lapbeth_netdev_ops = { static void lapbeth_setup(struct net_device *dev) { + netdev_lockdep_set_classes(dev); dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; From e514d77334a63f1dcb9a3b47d5aee8f51d66cb1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Mar 2025 15:23:13 -0700 Subject: [PATCH 1904/2157] selftests: drv-net: replace the rpath helper with Path objects The single letter + "path" helpers do not have many fans (see Link). Use a Path object with a better name. test_dir is the replacement for rpath(), net_lib_dir is a new path of the $ksft/net/lib directory. The Path() class overloads the "/" operator and can be cast to string automatically, so to get a path to a file tests can do: path = env.test_dir / "binary" Link: https://lore.kernel.org/CA+FuTSemTNVZ5MxXkq8T9P=DYm=nSXcJnL7CJBPZNAT_9UFisQ@mail.gmail.com Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250327222315.1098596-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hds.py | 2 +- .../testing/selftests/drivers/net/hw/csum.py | 2 +- tools/testing/selftests/drivers/net/hw/irq.py | 2 +- .../selftests/drivers/net/lib/py/env.py | 21 +++++++------------ tools/testing/selftests/drivers/net/queues.py | 4 ++-- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7cc74faed743..8b7f6acad15f 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -20,7 +20,7 @@ def _get_hds_mode(cfg, netnl) -> str: def _xdp_onoff(cfg): - prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index 701aca1361e0..cd23af875317 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -88,7 +88,7 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = cfg.rpath("../../../net/lib/csum") + cfg.bin_local = cfg.net_lib_dir / "csum" cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py index 42ab98370245..d772a18d8a1b 100755 --- a/tools/testing/selftests/drivers/net/hw/irq.py +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -69,7 +69,7 @@ def check_reconfig_queues(cfg) -> None: def check_reconfig_xdp(cfg) -> None: def reconfig(cfg) -> None: ip(f"link set dev %s xdp obj %s sec xdp" % - (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o"))) + (cfg.ifname, cfg.test_dir / "xdp_dummy.bpf.o")) ip(f"link set dev %s xdp off" % cfg.ifname) _check_reconfig(cfg, reconfig) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index fd4d674e6c72..ad5ff645183a 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -13,23 +13,18 @@ from .remote import Remote class NetDrvEnvBase: """ Base class for a NIC / host envirnoments + + Attributes: + test_dir: Path to the source directory of the test + net_lib_dir: Path to the net/lib directory """ def __init__(self, src_path): - self.src_path = src_path + self.src_path = Path(src_path) + self.test_dir = self.src_path.parent.resolve() + self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve() + self.env = self._load_env_file() - def rpath(self, path): - """ - Get an absolute path to a file based on a path relative to the directory - containing the test which constructed env. - - For example, if the test.py is in the same directory as - a binary (built from helper.c), the test can use env.rpath("helper") - to get the absolute path to the binary - """ - src_dir = Path(self.src_path).parent.resolve() - return (src_dir / path).as_posix() - def _load_env_file(self): env = os.environ.copy() diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index cae923f84f69..06abd3f233e1 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'): def check_xsk(cfg, nl, xdp_queue_id=0) -> None: # Probe for support - xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False) + xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False) if xdp.ret == 255: raise KsftSkipEx('AF_XDP unsupported') elif xdp.ret > 0: raise KsftFailEx('unable to create AF_XDP socket') - with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}', + with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', ksft_wait=3): rx = tx = False From c231e12ecd45fcb34ff3b52d6557d614ba49b699 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Mar 2025 15:23:14 -0700 Subject: [PATCH 1905/2157] selftests: net: use the dummy bpf from net/lib Commit 29b036be1b0b ("selftests: drv-net: test XDP, HDS auto and the ioctl path") added an sample XDP_PASS prog in net/lib, so that we can reuse it in various sub-directories. Delete the old sample and use the one from the lib in existing tests. Acked-by: Stanislav Fomichev Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250327222315.1098596-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/irq.py | 2 +- .../selftests/drivers/net/hw/xdp_dummy.bpf.c | 13 ------------- tools/testing/selftests/net/udpgro_bench.sh | 2 +- tools/testing/selftests/net/udpgro_frglist.sh | 2 +- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- tools/testing/selftests/net/veth.sh | 2 +- tools/testing/selftests/net/xdp_dummy.bpf.c | 13 ------------- 7 files changed, 5 insertions(+), 31 deletions(-) delete mode 100644 tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c delete mode 100644 tools/testing/selftests/net/xdp_dummy.bpf.c diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py index d772a18d8a1b..0699d6a8b4e2 100755 --- a/tools/testing/selftests/drivers/net/hw/irq.py +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -69,7 +69,7 @@ def check_reconfig_queues(cfg) -> None: def check_reconfig_xdp(cfg) -> None: def reconfig(cfg) -> None: ip(f"link set dev %s xdp obj %s sec xdp" % - (cfg.ifname, cfg.test_dir / "xdp_dummy.bpf.o")) + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) ip(f"link set dev %s xdp off" % cfg.ifname) _check_reconfig(cfg, reconfig) diff --git a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c deleted file mode 100644 index d988b2e0cee8..000000000000 --- a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#define KBUILD_MODNAME "xdp_dummy" -#include -#include - -SEC("xdp") -int xdp_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index c51ea90a1395..815fad8c53a8 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 17404f49cdb6..5f3d1a110d11 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 550d8eb3e224..f22f6c66997e 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -3,7 +3,7 @@ source net_helper.sh -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 6bb7dfaa30b6..9709dd067c72 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c deleted file mode 100644 index d988b2e0cee8..000000000000 --- a/tools/testing/selftests/net/xdp_dummy.bpf.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#define KBUILD_MODNAME "xdp_dummy" -#include -#include - -SEC("xdp") -int xdp_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; From 88dec030dfcd72fa4322181eb64db06c514f33b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Mar 2025 15:23:15 -0700 Subject: [PATCH 1906/2157] selftests: net: use Path helpers in ping Now that net and net-next have converged we can use the Path helpers in the ping test without conflicts. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250327222315.1098596-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 93120e86e102..4b6822866066 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -56,8 +56,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: return def _set_xdp_generic_sb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +65,7 @@ def _set_xdp_generic_sb_on(cfg) -> None: time.sleep(10) def _set_xdp_generic_mb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) @@ -77,8 +75,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: time.sleep(10) def _set_xdp_native_sb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -95,8 +92,7 @@ def _set_xdp_native_sb_on(cfg) -> None: time.sleep(10) def _set_xdp_native_mb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) try: @@ -109,8 +105,7 @@ def _set_xdp_native_mb_on(cfg) -> None: time.sleep(10) def _set_xdp_offload_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) try: cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) From 42f342387841891bbbd15e25d33eb510a0cf7a9a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Mar 2025 06:22:37 +0000 Subject: [PATCH 1907/2157] net: fix use-after-free in the netdev_nl_sock_priv_destroy() In the netdev_nl_sock_priv_destroy(), an instance lock is acquired before calling net_devmem_unbind_dmabuf(), then releasing an instance lock(netdev_unlock(binding->dev)). However, a binding is freed in the net_devmem_unbind_dmabuf(). So using a binding after net_devmem_unbind_dmabuf() occurs UAF. To fix this UAF, it needs to use temporary variable. Fixes: ba6f418fbf64 ("net: bubble up taking netdev instance lock to callers of net_devmem_unbind_dmabuf()") Signed-off-by: Taehee Yoo Reviewed-by: Jakub Kicinski Reviewed-by: Mina Almasry Reviewed-by: Xuan Zhuo Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250328062237.3746875-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fd1cfa9707dc..3afeaa8c5dc5 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -951,12 +951,14 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; + struct net_device *dev; mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { - netdev_lock(binding->dev); + dev = binding->dev; + netdev_lock(dev); net_devmem_unbind_dmabuf(binding); - netdev_unlock(binding->dev); + netdev_unlock(dev); } mutex_unlock(&priv->lock); } From 0fdba88a211508984eb5df62008c29688692b134 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Thu, 27 Mar 2025 14:44:41 +0530 Subject: [PATCH 1908/2157] octeontx2-af: Fix mbox INTR handler when num VFs > 64 When number of RVU VFs > 64, the vfs value passed to "rvu_queue_work" function is incorrect. Due to which mbox workqueue entries for VFs 0 to 63 never gets added to workqueue. Fixes: 9bdc47a6e328 ("octeontx2-af: Mbox communication support btw AF and it's VFs") Signed-off-by: Geetha sowjanya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250327091441.1284-1-gakula@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index cd0d7b7774f1..6575c422635b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2634,7 +2634,7 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), intr); rvu_queue_work(&rvu->afvf_wq_info, 64, vfs, intr); - vfs -= 64; + vfs = 64; } intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(0)); From 323d6db6dc7decb06f2545efb9496259ddacd4f4 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Thu, 27 Mar 2025 15:10:54 +0530 Subject: [PATCH 1909/2157] octeontx2-af: Free NIX_AF_INT_VEC_GEN irq Due to the incorrect initial vector number in rvu_nix_unregister_interrupts(), NIX_AF_INT_VEC_GEN is not geeting free. Fix the vector number to include NIX_AF_INT_VEC_GEN irq. Fixes: 5ed66306eab6 ("octeontx2-af: Add devlink health reporters for NIX") Signed-off-by: Geetha sowjanya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250327094054.2312-1-gakula@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index dab4deca893f..27c3a2daaaa9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -207,7 +207,7 @@ static void rvu_nix_unregister_interrupts(struct rvu *rvu) rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU] = false; } - for (i = NIX_AF_INT_VEC_AF_ERR; i < NIX_AF_INT_VEC_CNT; i++) + for (i = NIX_AF_INT_VEC_GEN; i < NIX_AF_INT_VEC_CNT; i++) if (rvu->irq_allocated[offs + i]) { free_irq(pci_irq_vector(rvu->pdev, offs + i), rvu_dl); rvu->irq_allocated[offs + i] = false; From 443041deb5ef6a1289a99ed95015ec7442f141dc Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Fri, 28 Mar 2025 15:27:16 +0100 Subject: [PATCH 1910/2157] mptcp: fix NULL pointer in can_accept_new_subflow When testing valkey benchmark tool with MPTCP, the kernel panics in 'mptcp_can_accept_new_subflow' because subflow_req->msk is NULL. Call trace: mptcp_can_accept_new_subflow (./net/mptcp/subflow.c:63 (discriminator 4)) (P) subflow_syn_recv_sock (./net/mptcp/subflow.c:854) tcp_check_req (./net/ipv4/tcp_minisocks.c:863) tcp_v4_rcv (./net/ipv4/tcp_ipv4.c:2268) ip_protocol_deliver_rcu (./net/ipv4/ip_input.c:207) ip_local_deliver_finish (./net/ipv4/ip_input.c:234) ip_local_deliver (./net/ipv4/ip_input.c:254) ip_rcv_finish (./net/ipv4/ip_input.c:449) ... According to the debug log, the same req received two SYN-ACK in a very short time, very likely because the client retransmits the syn ack due to multiple reasons. Even if the packets are transmitted with a relevant time interval, they can be processed by the server on different CPUs concurrently). The 'subflow_req->msk' ownership is transferred to the subflow the first, and there will be a risk of a null pointer dereference here. This patch fixes this issue by moving the 'subflow_req->msk' under the `own_req == true` conditional. Note that the !msk check in subflow_hmac_valid() can be dropped, because the same check already exists under the own_req mpj branch where the code has been moved to. Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-1-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index efe8d86496db..409bd415ef1d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -754,8 +754,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req = mptcp_subflow_rsk(req); msk = subflow_req->msk; - if (!msk) - return false; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), @@ -850,12 +848,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) || - !subflow_hmac_valid(req, &mp_opt) || - !mptcp_can_accept_new_subflow(subflow_req->msk)) { - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK)) fallback = true; - } } create_child: @@ -905,6 +899,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto dispose_child; } + if (!subflow_hmac_valid(req, &mp_opt) || + !mptcp_can_accept_new_subflow(subflow_req->msk)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; From 7335d4ac812917c16e04958775826d12d481c92d Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Fri, 28 Mar 2025 15:27:17 +0100 Subject: [PATCH 1911/2157] selftests: mptcp: fix incorrect fd checks in main_loop Fix a bug where the code was checking the wrong file descriptors when opening the input files. The code was checking 'fd' instead of 'fd_in', which could lead to incorrect error handling. Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Fixes: ca7ae8916043 ("selftests: mptcp: mptfo Initiator/Listener") Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Cong Liu Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-2-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d240d02fa443..893dc36b12f6 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1270,7 +1270,7 @@ int main_loop(void) if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1293,7 +1293,7 @@ int main_loop(void) if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } From c183165f87a486d5879f782c05a23c179c3794ab Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 28 Mar 2025 15:27:18 +0100 Subject: [PATCH 1912/2157] selftests: mptcp: close fd_in before returning in main_loop The file descriptor 'fd_in' is opened when cfg_input is configured, but not closed in main_loop(), this patch fixes it. Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Co-developed-by: Cong Liu Signed-off-by: Cong Liu Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-3-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 893dc36b12f6..c83a8b47bbdf 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1299,7 +1299,7 @@ int main_loop(void) ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { shutdown(fd, SHUT_WR); @@ -1320,7 +1320,10 @@ int main_loop(void) close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) From b44a4c28228fc50b0af05b5d15b44c2172f112a0 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 28 Mar 2025 15:27:19 +0100 Subject: [PATCH 1913/2157] selftests: mptcp: ignore mptcp_diag binary A new binary is now generated by the MPTCP selftests: mptcp_diag. Like the other binaries from this directory, there is no need to track this in Git, it should then be ignored. Fixes: 00f5e338cf7e ("selftests: mptcp: Add a tool to get specific msk_info") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-4-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl From 9e3267cf02c240065fddfbe1a58cdb99d0b00531 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 28 Mar 2025 09:47:42 -0700 Subject: [PATCH 1914/2157] eth: gve: add missing netdev locks on reset and shutdown paths All the misc entry points end up calling into either gve_open() or gve_close(), they take rtnl_lock today but since the recent instance locking changes should also take the instance lock. Found by code inspection and untested. Fixes: cae03e5bdd9e ("net: hold netdev instance lock during queue operations") Acked-by: Stanislav Fomichev Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20250328164742.1268069-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index cb2f9978f45e..f9a73c956861 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2077,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2714,6 +2716,7 @@ static void gve_shutdown(struct pci_dev *pdev) bool was_up = netif_running(priv->dev); rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2721,6 +2724,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } From dd07df9ff3d148aee87fcbab99ff14f0727752f4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 28 Mar 2025 10:42:16 -0700 Subject: [PATCH 1915/2157] bnxt_en: bring back rtnl lock in bnxt_shutdown Taehee reports missing rtnl from bnxt_shutdown path: inetdev_event (./include/linux/inetdevice.h:256 net/ipv4/devinet.c:1585) notifier_call_chain (kernel/notifier.c:85) __dev_close_many (net/core/dev.c:1732 (discriminator 3)) kernel/locking/mutex.c:713 kernel/locking/mutex.c:732) dev_close_many (net/core/dev.c:1786) netif_close (./include/linux/list.h:124 ./include/linux/list.h:215 bnxt_shutdown (drivers/net/ethernet/broadcom/bnxt/bnxt.c:16707) bnxt_en pci_device_shutdown (drivers/pci/pci-driver.c:511) device_shutdown (drivers/base/core.c:4820) kernel_restart (kernel/reboot.c:271 kernel/reboot.c:285) Bring back the rtnl lock. Link: https://lore.kernel.org/netdev/CAMArcTV4P8PFsc6O2tSgzRno050DzafgqkLA2b7t=Fv_SY=brw@mail.gmail.com/ Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reported-by: Taehee Yoo Signed-off-by: Stanislav Fomichev Tested-by: Taehee Yoo Tested-by: Breno Leitao Link: https://patch.msgid.link/20250328174216.3513079-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 934ba9425857..1a70605fad38 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16698,6 +16698,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (!dev) return; + rtnl_lock(); netdev_lock(dev); bp = netdev_priv(dev); if (!bp) @@ -16717,6 +16718,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) shutdown_exit: netdev_unlock(dev); + rtnl_unlock(); } #ifdef CONFIG_PM_SLEEP From f278b6d5bb465c7fd66f3d103812947e55b376ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Mar 2025 07:59:46 +0000 Subject: [PATCH 1916/2157] Revert "tcp: avoid atomic operations on sk->sk_rmem_alloc" This reverts commit 0de2a5c4b824da2205658ebebb99a55c43cdf60f. I forgot that a TCP socket could receive messages in its error queue. sock_queue_err_skb() can be called without socket lock being held, and changes sk->sk_rmem_alloc. The fact that skbs in error queue are limited by sk->sk_rcvbuf means that error messages can be dropped if socket receive queues are full, which is an orthogonal issue. In future kernels, we could use a separate sk->sk_error_mem_alloc counter specifically for the error queue. Fixes: 0de2a5c4b824 ("tcp: avoid atomic operations on sk->sk_rmem_alloc") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250331075946.31960-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 15 --------------- net/ipv4/tcp.c | 18 ++---------------- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- 4 files changed, 6 insertions(+), 35 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index df04dc09c519..4450c384ef17 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,7 +779,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); -void tcp_sock_rfree(struct sk_buff *skb); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -2899,18 +2898,4 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); -/* version of skb_set_owner_r() avoiding one atomic_add() */ -static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = tcp_sock_rfree; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) + skb->truesize); - - sk_forward_alloc_add(sk, -skb->truesize); -} - #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ea8de00f669d..6edc441b3702 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1525,25 +1525,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } -/* private version of sock_rfree() avoiding one atomic_sub() */ -void tcp_sock_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - unsigned int len = skb->truesize; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) - len); - - sk_forward_alloc_add(sk, len); - sk_mem_reclaim(sk); -} - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == tcp_sock_rfree)) { - tcp_sock_rfree(skb); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ca40665145c6..1a6b1bc54245 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e1f952fbac48..a35018e2d0ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5171,7 +5171,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } } @@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } return eaten; } @@ -5504,7 +5504,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - tcp_skb_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */ From 2510859475d7f46ed7940db0853f3342bf1b65ee Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Mon, 31 Mar 2025 11:22:49 +0300 Subject: [PATCH 1917/2157] cifs: fix integer overflow in match_server() The echo_interval is not limited in any way during mounting, which makes it possible to write a large number to it. This can cause an overflow when multiplying ctx->echo_interval by HZ in match_server(). Add constraints for echo_interval to smb3_fs_context_parse_param(). Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: adfeb3e00e8e1 ("cifs: Make echo interval tunable") Cc: stable@vger.kernel.org Signed-off-by: Roman Smirnov Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index bdb762d398af..9c3ded0cf006 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1383,6 +1383,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->closetimeo = HZ * result.uint_32; break; case Opt_echo_interval: + if (result.uint_32 < SMB_ECHO_INTERVAL_MIN || + result.uint_32 > SMB_ECHO_INTERVAL_MAX) { + cifs_errorf(fc, "echo interval is out of bounds\n"); + goto cifs_parse_mount_err; + } ctx->echo_interval = result.uint_32; break; case Opt_snapshot: From be5d361e3083a469385eff34b46ad58eb97b1e38 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Mon, 31 Mar 2025 11:22:50 +0300 Subject: [PATCH 1918/2157] cifs: remove unreachable code in cifs_get_tcp_session() echo_interval is checked at mount time, the code has become unreachable. Signed-off-by: Roman Smirnov Signed-off-by: Steve French --- fs/smb/client/connect.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d7bad2c3af37..0721e557f2e0 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1731,12 +1731,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, */ tcp_ses->tcpStatus = CifsNew; ++tcp_ses->srv_count; + tcp_ses->echo_interval = ctx->echo_interval * HZ; - if (ctx->echo_interval >= SMB_ECHO_INTERVAL_MIN && - ctx->echo_interval <= SMB_ECHO_INTERVAL_MAX) - tcp_ses->echo_interval = ctx->echo_interval * HZ; - else - tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ; if (tcp_ses->rdma) { #ifndef CONFIG_CIFS_SMB_DIRECT cifs_dbg(VFS, "CONFIG_CIFS_SMB_DIRECT is not enabled\n"); From a091d9711bdee46a76fa14fad31cb261a6dad74a Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 31 Mar 2025 21:33:13 +0800 Subject: [PATCH 1919/2157] smb:client: smb: client: Add reverse mapping from tcon to superblocks Currently, when a SMB connection is reset and renegotiated with the server, there's no way to update all related mount points with new negotiated sizes. This is because while superblocks (cifs_sb_info) maintain references to tree connections (tcon) through tcon_link structures, there is no reverse mapping from a tcon back to all the superblocks using it. This patch adds a bidirectional relationship between tcon and cifs_sb_info structures by: 1. Adding a cifs_sb_list to tcon structure with appropriate locking 2. Adding tcon_sb_link to cifs_sb_info to join the list 3. Managing the list entries during mount and umount operations The bidirectional relationship enables future functionality to locate and update all superblocks connected to a specific tree connection, such as: - Updating negotiated parameters after reconnection - Efficiently notifying all affected mounts of capability changes This is the first part of a series to improve connection resilience by keeping all mount parameters in sync with server capabilities after reconnection. Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/cifs_fs_sb.h | 1 + fs/smb/client/cifsglob.h | 3 ++- fs/smb/client/connect.c | 15 +++++++++++++++ fs/smb/client/misc.c | 2 ++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h index 651759192280..5e8d163cb5f8 100644 --- a/fs/smb/client/cifs_fs_sb.h +++ b/fs/smb/client/cifs_fs_sb.h @@ -49,6 +49,7 @@ struct cifs_sb_info { struct rb_root tlink_tree; + struct list_head tcon_sb_link; spinlock_t tlink_tree_lock; struct tcon_link *master_tlink; struct nls_table *local_nls; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 6ae170a2a042..2cb352c16c1a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1321,7 +1321,8 @@ struct cifs_tcon { #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; - /* BB add field for back pointer to sb struct(s)? */ + struct list_head cifs_sb_list; + spinlock_t sb_list_lock; #ifdef CONFIG_CIFS_DFS_UPCALL struct delayed_work dfs_cache_work; struct list_head dfs_ses_list; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 0721e557f2e0..2349597d5bfc 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3477,6 +3477,7 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) struct smb3_fs_context *ctx = cifs_sb->ctx; INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); + INIT_LIST_HEAD(&cifs_sb->tcon_sb_link); spin_lock_init(&cifs_sb->tlink_tree_lock); cifs_sb->tlink_tree = RB_ROOT; @@ -3709,6 +3710,10 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, tlink_rb_insert(&cifs_sb->tlink_tree, tlink); spin_unlock(&cifs_sb->tlink_tree_lock); + spin_lock(&tcon->sb_list_lock); + list_add(&cifs_sb->tcon_sb_link, &tcon->cifs_sb_list); + spin_unlock(&tcon->sb_list_lock); + queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks, TLINK_IDLE_EXPIRE); return 0; @@ -4050,9 +4055,19 @@ cifs_umount(struct cifs_sb_info *cifs_sb) struct rb_root *root = &cifs_sb->tlink_tree; struct rb_node *node; struct tcon_link *tlink; + struct cifs_tcon *tcon = NULL; cancel_delayed_work_sync(&cifs_sb->prune_tlinks); + if (cifs_sb->master_tlink) { + tcon = cifs_sb->master_tlink->tl_tcon; + if (tcon) { + spin_lock(&tcon->sb_list_lock); + list_del_init(&cifs_sb->tcon_sb_link); + spin_unlock(&tcon->sb_list_lock); + } + } + spin_lock(&cifs_sb->tlink_tree_lock); while ((node = rb_first(root))) { tlink = rb_entry(node, struct tcon_link, tl_rbnode); diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index b328dc5c7988..7b6ed9b23e71 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -137,8 +137,10 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) spin_lock_init(&ret_buf->tc_lock); INIT_LIST_HEAD(&ret_buf->openFileList); INIT_LIST_HEAD(&ret_buf->tcon_list); + INIT_LIST_HEAD(&ret_buf->cifs_sb_list); spin_lock_init(&ret_buf->open_file_lock); spin_lock_init(&ret_buf->stat_lock); + spin_lock_init(&ret_buf->sb_list_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); From 287906b20035a04a234d1a3c64f760a5678387be Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 31 Mar 2025 21:33:14 +0800 Subject: [PATCH 1920/2157] smb: client: Store original IO parameters and prevent zero IO sizes During mount option processing and negotiation with the server, the original user-specified rsize/wsize values were being modified directly. This makes it impossible to recover these values after a connection reset, leading to potential degraded performance after reconnection. The other problem is that When negotiating read and write sizes, there are cases where the negotiated values might calculate to zero, especially during reconnection when server->max_read or server->max_write might be reset. In general, these values come from the negotiation response. According to MS-SMB2 specification, these values should be at least 65536 bytes. This patch improves IO parameter handling: 1. Adds vol_rsize and vol_wsize fields to store the original user-specified values separately from the negotiated values 2. Uses got_rsize/got_wsize flags to determine if values were user-specified rather than checking for non-zero values, which is more reliable 3. Adds a prevent_zero_iosize() helper function to ensure IO sizes are never negotiated down to zero, which could happen in edge cases like when server->max_read/write is zero The changes make the CIFS client more resilient to unusual server responses and reconnection scenarios, preventing potential failures when IO sizes are calculated to be zero. Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 ++ fs/smb/client/fs_context.h | 3 +++ fs/smb/client/smb1ops.c | 6 +++--- fs/smb/client/smb2ops.c | 27 +++++++++++++++++++-------- fs/smb/common/smb2pdu.h | 3 +++ 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 9c3ded0cf006..ed543325c518 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1333,6 +1333,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_rsize: ctx->rsize = result.uint_32; ctx->got_rsize = true; + ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: ctx->wsize = result.uint_32; @@ -1348,6 +1349,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->wsize, PAGE_SIZE); } } + ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) { diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 42c6b66c2c1a..23491401dac5 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -280,6 +280,9 @@ struct smb3_fs_context { bool use_client_guid:1; /* reuse existing guid for multichannel */ u8 client_guid[SMB2_CLIENT_GUID_SIZE]; + /* User-specified original r/wsize value */ + unsigned int vol_rsize; + unsigned int vol_wsize; unsigned int bsize; unsigned int rasize; unsigned int rsize; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 8701484805cd..06b28da60a2d 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -444,8 +444,8 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) unsigned int wsize; /* start with specified wsize, or default */ - if (ctx->wsize) - wsize = ctx->wsize; + if (ctx->got_wsize) + wsize = ctx->vol_wsize; else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) wsize = CIFS_DEFAULT_IOSIZE; else @@ -497,7 +497,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) else defsize = server->maxBuf - sizeof(READ_RSP); - rsize = ctx->rsize ? ctx->rsize : defsize; + rsize = ctx->got_rsize ? ctx->vol_rsize : defsize; /* * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a700e5921961..98643a546c68 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -470,6 +470,17 @@ smb2_negotiate(const unsigned int xid, return rc; } +static inline unsigned int +prevent_zero_iosize(unsigned int size, const char *type) +{ + if (size == 0) { + cifs_dbg(VFS, "SMB: Zero %ssize calculated, using minimum value %u\n", + type, CIFS_MIN_DEFAULT_IOSIZE); + return CIFS_MIN_DEFAULT_IOSIZE; + } + return size; +} + static unsigned int smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { @@ -477,12 +488,12 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) unsigned int wsize; /* start with specified wsize, or default */ - wsize = ctx->wsize ? ctx->wsize : CIFS_DEFAULT_IOSIZE; + wsize = ctx->got_wsize ? ctx->vol_wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); - return wsize; + return prevent_zero_iosize(wsize, "w"); } static unsigned int @@ -492,7 +503,7 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) unsigned int wsize; /* start with specified wsize, or default */ - wsize = ctx->wsize ? ctx->wsize : SMB3_DEFAULT_IOSIZE; + wsize = ctx->got_wsize ? ctx->vol_wsize : SMB3_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { @@ -514,7 +525,7 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); - return wsize; + return prevent_zero_iosize(wsize, "w"); } static unsigned int @@ -524,13 +535,13 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) unsigned int rsize; /* start with specified rsize, or default */ - rsize = ctx->rsize ? ctx->rsize : CIFS_DEFAULT_IOSIZE; + rsize = ctx->got_rsize ? ctx->vol_rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); - return rsize; + return prevent_zero_iosize(rsize, "r"); } static unsigned int @@ -540,7 +551,7 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) unsigned int rsize; /* start with specified rsize, or default */ - rsize = ctx->rsize ? ctx->rsize : SMB3_DEFAULT_IOSIZE; + rsize = ctx->got_rsize ? ctx->vol_rsize : SMB3_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { @@ -563,7 +574,7 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); - return rsize; + return prevent_zero_iosize(rsize, "r"); } /* diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c7a0efda4403..764dca80c15c 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -95,6 +95,9 @@ */ #define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024) +/* According to MS-SMB2 specification The minimum recommended value is 65536.*/ +#define CIFS_MIN_DEFAULT_IOSIZE (65536) + /* * SMB2 Header Definition * From 764da2fff399756d09b02db7fa7bd05e57928cc0 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 31 Mar 2025 21:33:15 +0800 Subject: [PATCH 1921/2157] smb: client: Update IO sizes after reconnection When a SMB connection is reset and reconnected, the negotiated IO parameters (rsize/wsize) can become out of sync with the server's current capabilities. This can lead to suboptimal performance or even IO failures if the server's limits have changed. This patch implements automatic IO size renegotiation: 1. Adds cifs_renegotiate_iosize() function to update all superblocks associated with a tree connection 2. Updates each mount's rsize/wsize based on current server capabilities 3. Calls this function after successful tree connection reconnection With this change, all mount points will automatically maintain optimal and reliable IO parameters after network disruptions, using the bidirectional mapping added in previous patches. This completes the series improving connection resilience by keeping mount parameters synchronized with server capabilities. Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4f69a1825e42..81e05db8e4d5 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -43,6 +43,7 @@ #endif #include "cached_dir.h" #include "compress.h" +#include "fs_context.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -4089,6 +4090,24 @@ smb2_echo_callback(struct mid_q_entry *mid) add_credits(server, &credits, CIFS_ECHO_OP); } +static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, + struct cifs_tcon *tcon) +{ + struct cifs_sb_info *cifs_sb; + + if (server == NULL || tcon == NULL) + return; + + spin_lock(&tcon->sb_list_lock); + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tcon, cifs_sb->ctx); + cifs_sb->ctx->wsize = + server->ops->negotiate_wsize(tcon, cifs_sb->ctx); + } + spin_unlock(&tcon->sb_list_lock); +} + void smb2_reconnect_server(struct work_struct *work) { struct TCP_Server_Info *server = container_of(work, @@ -4174,9 +4193,10 @@ void smb2_reconnect_server(struct work_struct *work) list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true); - if (!rc) + if (!rc) { + cifs_renegotiate_iosize(server, tcon); cifs_reopen_persistent_handles(tcon); - else + } else resched = true; list_del_init(&tcon->rlist); if (tcon->ipc) From fa4cdb8cbca7d6cb6aa13e4d8d83d1103f6345db Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 27 Mar 2025 21:22:51 +0900 Subject: [PATCH 1922/2157] ksmbd: fix session use-after-free in multichannel connection There is a race condition between session setup and ksmbd_sessions_deregister. The session can be freed before the connection is added to channel list of session. This patch check reference count of session before freeing it. Cc: stable@vger.kernel.org Reported-by: Sean Heelan Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/auth.c | 4 ++-- fs/smb/server/mgmt/user_session.c | 14 ++++++++------ fs/smb/server/smb2pdu.c | 7 ++++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 00b31cf86462..83caa3849749 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -1016,9 +1016,9 @@ static int ksmbd_get_encryption_key(struct ksmbd_work *work, __u64 ses_id, ses_enc_key = enc ? sess->smb3encryptionkey : sess->smb3decryptionkey; - if (enc) - ksmbd_user_session_get(sess); memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); + if (!enc) + ksmbd_user_session_put(sess); return 0; } diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 53d308f331af..7690f0187dac 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -181,7 +181,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) down_write(&sessions_table_lock); down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { - if (atomic_read(&sess->refcnt) == 0 && + if (atomic_read(&sess->refcnt) <= 1 && (sess->state != SMB2_SESSION_VALID || time_after(jiffies, sess->last_active + SMB2_SESSION_TIMEOUT))) { @@ -233,7 +233,8 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) down_write(&conn->session_lock); xa_erase(&conn->sessions, sess->id); up_write(&conn->session_lock); - ksmbd_session_destroy(sess); + if (atomic_dec_and_test(&sess->refcnt)) + ksmbd_session_destroy(sess); } } } @@ -252,7 +253,8 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) if (xa_empty(&sess->ksmbd_chann_list)) { xa_erase(&conn->sessions, sess->id); hash_del(&sess->hlist); - ksmbd_session_destroy(sess); + if (atomic_dec_and_test(&sess->refcnt)) + ksmbd_session_destroy(sess); } } up_write(&conn->session_lock); @@ -328,8 +330,8 @@ void ksmbd_user_session_put(struct ksmbd_session *sess) if (atomic_read(&sess->refcnt) <= 0) WARN_ON(1); - else - atomic_dec(&sess->refcnt); + else if (atomic_dec_and_test(&sess->refcnt)) + ksmbd_session_destroy(sess); } struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn, @@ -436,7 +438,7 @@ static struct ksmbd_session *__session_create(int protocol) xa_init(&sess->rpc_handle_list); sess->sequence_number = 1; rwlock_init(&sess->tree_conns_lock); - atomic_set(&sess->refcnt, 1); + atomic_set(&sess->refcnt, 2); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 4ddf4300371b..f3ca33d3b69a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2235,13 +2235,14 @@ int smb2_session_logoff(struct ksmbd_work *work) return -ENOENT; } - ksmbd_destroy_file_table(&sess->file_table); down_write(&conn->session_lock); sess->state = SMB2_SESSION_EXPIRED; up_write(&conn->session_lock); - ksmbd_free_user(sess->user); - sess->user = NULL; + if (sess->user) { + ksmbd_free_user(sess->user); + sess->user = NULL; + } ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE); rsp->StructureSize = cpu_to_le16(4); From beff0bc9d69bc8e733f9bca28e2d3df5b3e10e42 Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Sat, 29 Mar 2025 06:58:15 +0000 Subject: [PATCH 1923/2157] ksmbd: fix overflow in dacloffset bounds check The dacloffset field was originally typed as int and used in an unchecked addition, which could overflow and bypass the existing bounds check in both smb_check_perm_dacl() and smb_inherit_dacl(). This could result in out-of-bounds memory access and a kernel crash when dereferencing the DACL pointer. This patch converts dacloffset to unsigned int and uses check_add_overflow() to validate access to the DACL. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 49b128698670..2693ea2d67aa 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1026,7 +1026,9 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; - int rc = 0, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + int rc = 0, pntsd_type, pntsd_size, acl_len, aces_size; + unsigned int dacloffset; + size_t dacl_struct_end; u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); @@ -1035,8 +1037,11 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, parent, &parent_pntsd); if (pntsd_size <= 0) return -ENOENT; + dacloffset = le32_to_cpu(parent_pntsd->dacloffset); - if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) { + if (!dacloffset || + check_add_overflow(dacloffset, sizeof(struct smb_acl), &dacl_struct_end) || + dacl_struct_end > (size_t)pntsd_size) { rc = -EINVAL; goto free_parent_pntsd; } @@ -1240,7 +1245,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, struct smb_ntsd *pntsd = NULL; struct smb_acl *pdacl; struct posix_acl *posix_acls; - int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset; + int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size; + unsigned int dacl_offset; + size_t dacl_struct_end; struct smb_sid sid; int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE); struct smb_ace *ace; @@ -1259,7 +1266,8 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, dacl_offset = le32_to_cpu(pntsd->dacloffset); if (!dacl_offset || - (dacl_offset + sizeof(struct smb_acl) > pntsd_size)) + check_add_overflow(dacl_offset, sizeof(struct smb_acl), &dacl_struct_end) || + dacl_struct_end > (size_t)pntsd_size) goto err_out; pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); From bf21e29d78cd2c2371023953d9c82dfef82ebb36 Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Sat, 29 Mar 2025 16:06:01 +0000 Subject: [PATCH 1924/2157] ksmbd: validate zero num_subauth before sub_auth is accessed Access psid->sub_auth[psid->num_subauth - 1] without checking if num_subauth is non-zero leads to an out-of-bounds read. This patch adds a validation step to ensure num_subauth != 0 before sub_auth is accessed. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 2693ea2d67aa..5aa7a66334d9 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -270,6 +270,11 @@ static int sid_to_id(struct mnt_idmap *idmap, return -EIO; } + if (psid->num_subauth == 0) { + pr_err("%s: zero subauthorities!\n", __func__); + return -EIO; + } + if (sidtype == SIDOWNER) { kuid_t uid; uid_t id; From f1350c2c74e62afbc77048ed35718f42b3baba91 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Apr 2025 01:40:19 -0400 Subject: [PATCH 1925/2157] bcachefs: fix ref leak in btree_node_read_all_replicas Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 1d94a2bf706d..871f3f46a0c2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1609,6 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); + percpu_ref_put(&ca->io_ref); } ra->err[rb->idx] = bio->bi_status; From bf782ada459efde8fe9a488cf30a40d32caf787f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 14 Oct 2024 13:51:21 +0200 Subject: [PATCH 1926/2157] cifs: Add a new xattr system.smb3_ntsd_sacl for getting or setting SACLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to SACL part of SMB security descriptor is granted by SACL privilege which by default is accessible only for local administrator. But it can be granted to any other user by local GPO or AD. SACL access is not granted by DACL permissions and therefore is it possible that some user would not have access to DACLs of some file, but would have access to SACLs of all files. So it means that for accessing SACLs (either getting or setting) in some cases requires not touching or asking for DACLs. Currently Linux SMB client does not allow to get or set SACLs without touching DACLs. Which means that user without DACL access is not able to get or set SACLs even if it has access to SACLs. Fix this problem by introducing a new xattr "system.smb3_ntsd_sacl" for accessing only SACLs part of the security descriptor (therefore without DACLs and OWNER/GROUP). Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/xattr.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index 7d49f38f01f3..95b8269851f3 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -31,6 +31,7 @@ * secure, replaced by SMB2 (then even more highly secure SMB3) many years ago */ #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */ +#define SMB3_XATTR_CIFS_NTSD_SACL "system.smb3_ntsd_sacl" /* SACL only */ #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */ #define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */ #define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */ @@ -38,6 +39,7 @@ /* BB need to add server (Samba e.g) support for security and trusted prefix */ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT, + XATTR_CIFS_NTSD_SACL, XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL }; static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon, @@ -160,6 +162,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: + case XATTR_CIFS_NTSD_SACL: case XATTR_CIFS_NTSD: case XATTR_CIFS_NTSD_FULL: { struct smb_ntsd *pacl; @@ -187,6 +190,9 @@ static int cifs_xattr_set(const struct xattr_handler *handler, CIFS_ACL_GROUP | CIFS_ACL_DACL); break; + case XATTR_CIFS_NTSD_SACL: + aclflags = CIFS_ACL_SACL; + break; case XATTR_CIFS_ACL: default: aclflags = CIFS_ACL_DACL; @@ -308,6 +314,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: + case XATTR_CIFS_NTSD_SACL: case XATTR_CIFS_NTSD: case XATTR_CIFS_NTSD_FULL: { /* @@ -327,6 +334,9 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_CIFS_NTSD: extra_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO; break; + case XATTR_CIFS_NTSD_SACL: + extra_info = SACL_SECINFO; + break; case XATTR_CIFS_ACL: default: extra_info = DACL_SECINFO; @@ -448,6 +458,13 @@ static const struct xattr_handler smb3_acl_xattr_handler = { .set = cifs_xattr_set, }; +static const struct xattr_handler smb3_ntsd_sacl_xattr_handler = { + .name = SMB3_XATTR_CIFS_NTSD_SACL, + .flags = XATTR_CIFS_NTSD_SACL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + static const struct xattr_handler cifs_cifs_ntsd_xattr_handler = { .name = CIFS_XATTR_CIFS_NTSD, .flags = XATTR_CIFS_NTSD, @@ -493,6 +510,7 @@ const struct xattr_handler * const cifs_xattr_handlers[] = { &cifs_os2_xattr_handler, &cifs_cifs_acl_xattr_handler, &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */ + &smb3_ntsd_sacl_xattr_handler, &cifs_cifs_ntsd_xattr_handler, &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_cifs_ntsd_full_xattr_handler, From b1a37df6ba2f13be341130b9fe10649ef6a42e9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 14 Oct 2024 13:56:26 +0200 Subject: [PATCH 1927/2157] cifs: Add a new xattr system.smb3_ntsd_owner for getting or setting owner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changing owner is controlled by DACL permission WRITE_OWNER. Changing DACL itself is controlled by DACL permisssion WRITE_DAC. Owner of the file has implicit WRITE_DAC permission even when it is not explicitly granted for owner by DACL. Reading DACL or owner is controlled only by one permission READ_CONTROL. WRITE_OWNER permission can be bypassed by the SeTakeOwnershipPrivilege, which is by default available for local administrators. So if the local administrator wants to access some file to which does not have access, it is required to first change owner to ourself and then change DACL permissions. Currently Linux SMB client does not support this because client does not provide a way to change owner without touching DACL permissions. Fix this problem by introducing a new xattr "system.smb3_ntsd_owner" for setting/changing only owner part of the security descriptor. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/xattr.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index 95b8269851f3..b88fa04f5792 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -32,6 +32,7 @@ */ #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */ #define SMB3_XATTR_CIFS_NTSD_SACL "system.smb3_ntsd_sacl" /* SACL only */ +#define SMB3_XATTR_CIFS_NTSD_OWNER "system.smb3_ntsd_owner" /* owner only */ #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */ #define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */ #define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */ @@ -39,7 +40,7 @@ /* BB need to add server (Samba e.g) support for security and trusted prefix */ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT, - XATTR_CIFS_NTSD_SACL, + XATTR_CIFS_NTSD_SACL, XATTR_CIFS_NTSD_OWNER, XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL }; static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon, @@ -163,6 +164,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, case XATTR_CIFS_ACL: case XATTR_CIFS_NTSD_SACL: + case XATTR_CIFS_NTSD_OWNER: case XATTR_CIFS_NTSD: case XATTR_CIFS_NTSD_FULL: { struct smb_ntsd *pacl; @@ -190,6 +192,10 @@ static int cifs_xattr_set(const struct xattr_handler *handler, CIFS_ACL_GROUP | CIFS_ACL_DACL); break; + case XATTR_CIFS_NTSD_OWNER: + aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_GROUP); + break; case XATTR_CIFS_NTSD_SACL: aclflags = CIFS_ACL_SACL; break; @@ -315,6 +321,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_CIFS_ACL: case XATTR_CIFS_NTSD_SACL: + case XATTR_CIFS_NTSD_OWNER: case XATTR_CIFS_NTSD: case XATTR_CIFS_NTSD_FULL: { /* @@ -334,6 +341,9 @@ static int cifs_xattr_get(const struct xattr_handler *handler, case XATTR_CIFS_NTSD: extra_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO; break; + case XATTR_CIFS_NTSD_OWNER: + extra_info = OWNER_SECINFO | GROUP_SECINFO; + break; case XATTR_CIFS_NTSD_SACL: extra_info = SACL_SECINFO; break; @@ -465,6 +475,13 @@ static const struct xattr_handler smb3_ntsd_sacl_xattr_handler = { .set = cifs_xattr_set, }; +static const struct xattr_handler smb3_ntsd_owner_xattr_handler = { + .name = SMB3_XATTR_CIFS_NTSD_OWNER, + .flags = XATTR_CIFS_NTSD_OWNER, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + static const struct xattr_handler cifs_cifs_ntsd_xattr_handler = { .name = CIFS_XATTR_CIFS_NTSD, .flags = XATTR_CIFS_NTSD, @@ -511,6 +528,7 @@ const struct xattr_handler * const cifs_xattr_handlers[] = { &cifs_cifs_acl_xattr_handler, &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */ &smb3_ntsd_sacl_xattr_handler, + &smb3_ntsd_owner_xattr_handler, &cifs_cifs_ntsd_xattr_handler, &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_cifs_ntsd_full_xattr_handler, From 7d14dd683b1b00451fecfdfc86d2d6539bd8a21e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 27 Oct 2024 12:13:12 +0100 Subject: [PATCH 1928/2157] cifs: Allow to disable or force initialization of NetBIOS session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently SMB client always tries to initialize NetBIOS session when the server port is 139. This is useful for default cases, but nowadays when using non-standard routing or testing between VMs, it is common that servers are listening on non-standard ports. So add a new mount option -o nbsessinit and -o nonbsessinit which either forces initialization or disables initialization regardless of server port number. This allows Linux SMB client to connect to older SMB1 server listening on non-standard port, which requires initialization of NetBIOS session, by using additional mount options -o port= and -o nbsessinit. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/connect.c | 11 ++++++++++- fs/smb/client/fs_context.c | 14 +++++++++++++- fs/smb/client/fs_context.h | 2 ++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 2cb352c16c1a..9b0c142c832c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -714,6 +714,7 @@ struct TCP_Server_Info { spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ + int rfc1001_sessinit; /* whether to estasblish netbios session */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 2349597d5bfc..7e2208bd0de7 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1701,6 +1701,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); memcpy(tcp_ses->server_RFC1001_name, ctx->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); + tcp_ses->rfc1001_sessinit = ctx->rfc1001_sessinit; tcp_ses->session_estab = false; tcp_ses->sequence_number = 0; tcp_ses->channel_sequence_num = 0; /* only tracked for primary channel */ @@ -3328,7 +3329,15 @@ generic_ip_connect(struct TCP_Server_Info *server) return rc; } trace_smb3_connect_done(server->hostname, server->conn_id, &server->dstaddr); - if (sport == htons(RFC1001_PORT)) + + /* + * Establish RFC1001 NetBIOS session when it was explicitly requested + * by mount option -o nbsessinit, or when connecting to default RFC1001 + * server port (139) and it was not explicitly disabled by mount option + * -o nonbsessinit. + */ + if (server->rfc1001_sessinit == 1 || + (server->rfc1001_sessinit == -1 && sport == htons(RFC1001_PORT))) rc = ip_rfc1001_connect(server); return rc; diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index ed543325c518..2980941b9667 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -135,6 +135,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag("witness", Opt_witness), fsparam_flag_no("nativesocket", Opt_nativesocket), fsparam_flag_no("unicode", Opt_unicode), + fsparam_flag_no("nbsessinit", Opt_nbsessinit), /* Mount options which take uid or gid */ fsparam_uid("backupuid", Opt_backupuid), @@ -968,6 +969,10 @@ static int smb3_verify_reconfigure_ctx(struct fs_context *fc, cifs_errorf(fc, "can not change unicode during remount\n"); return -EINVAL; } + if (new_ctx->rfc1001_sessinit != old_ctx->rfc1001_sessinit) { + cifs_errorf(fc, "can not change nbsessinit during remount\n"); + return -EINVAL; + } return 0; } @@ -1609,6 +1614,10 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, if (i == RFC1001_NAME_LEN && param->string[i] != 0) pr_warn("server netbiosname longer than 15 truncated\n"); break; + case Opt_nbsessinit: + ctx->rfc1001_sessinit = !result.negated; + cifs_dbg(FYI, "rfc1001_sessinit set to %d\n", ctx->rfc1001_sessinit); + break; case Opt_ver: /* version of mount userspace tools, not dialect */ /* If interface changes in mount.cifs bump to new ver */ @@ -1896,13 +1905,16 @@ int smb3_init_fs_context(struct fs_context *fc) memset(ctx->source_rfc1001_name, 0x20, RFC1001_NAME_LEN); for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++) ctx->source_rfc1001_name[i] = toupper(nodename[i]); - ctx->source_rfc1001_name[RFC1001_NAME_LEN] = 0; + /* * null target name indicates to use *SMBSERVR default called name * if we end up sending RFC1001 session initialize */ ctx->target_rfc1001_name[0] = 0; + + ctx->rfc1001_sessinit = -1; /* autodetect based on port number */ + ctx->cred_uid = current_uid(); ctx->linux_uid = current_uid(); ctx->linux_gid = current_gid(); diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 23491401dac5..d1d29249bcdb 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -174,6 +174,7 @@ enum cifs_param { Opt_iocharset, Opt_netbiosname, Opt_servern, + Opt_nbsessinit, Opt_ver, Opt_vers, Opt_sec, @@ -216,6 +217,7 @@ struct smb3_fs_context { char *iocharset; /* local code page for mapping to and from Unicode */ char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ + int rfc1001_sessinit; kuid_t cred_uid; kuid_t linux_uid; kgid_t linux_gid; From 665e18794804f8b42b6ae5d436a154342f62e288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 28 Oct 2024 18:46:40 +0100 Subject: [PATCH 1929/2157] cifs: Improve handling of NetBIOS packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now all NetBIOS session logic is handled in ip_rfc1001_connect() function, so cleanup is_smb_response() function which contains generic handling of incoming SMB packets. Note that function is_smb_response() is not used directly or indirectly (e.g. over cifs_demultiplex_thread() by ip_rfc1001_connect() function. Except the Negative Session Response and the Session Keep Alive packet, the cifs_demultiplex_thread() should not receive any NetBIOS session packets. And Session Keep Alive packet may be received only when the NetBIOS session was established by ip_rfc1001_connect() function. So treat any such packet as error and schedule reconnect. Negative Session Response packet is returned from Windows SMB server (from Windows 98 and also from Windows Server 2022) if client sent over port 139 SMB negotiate request without previously establishing a NetBIOS session. The common scenario is that Negative Session Response packet is returned for the SMB negotiate packet, which is the first one which SMB client sends (if it is not establishing a NetBIOS session). Note that server port 139 may be forwarded and mapped between virtual machines to different number. And Linux SMB client do not call function ip_rfc1001_connect() when prot is not 139. So nowadays when using port mapping or port forwarding between VMs, it is not so uncommon to see this error. Currently the logic on Negative Session Response packet changes server port to 445 and force reconnection. But this logic does not work when using non-standard port numbers and also does not help if the server on specified port is requiring establishing a NetBIOS session. Fix this Negative Session Response logic and instead of changing server port (on which server does not have to listen), force reconnection with establishing a NetBIOS session. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 3 + fs/smb/client/connect.c | 140 +++++++++++++++++++++++++++++++++----- fs/smb/client/transport.c | 3 + 3 files changed, 128 insertions(+), 18 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 9b0c142c832c..07c4688ec4c9 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -715,6 +715,7 @@ struct TCP_Server_Info { __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ int rfc1001_sessinit; /* whether to estasblish netbios session */ + bool with_rfc1001; /* if netbios session is used */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; @@ -1720,6 +1721,7 @@ struct mid_q_entry { void *resp_buf; /* pointer to received SMB header */ unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ + int mid_rc; /* rc for MID_RC */ unsigned int mid_flags; __le16 command; /* smb command code */ unsigned int optype; /* operation type */ @@ -1882,6 +1884,7 @@ static inline bool is_replayable_error(int error) #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 #define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ +#define MID_RC 0x80 /* mid_rc contains custom rc */ /* Flags */ #define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 7e2208bd0de7..69274d6ed2e1 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -371,7 +371,7 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num * */ static int __cifs_reconnect(struct TCP_Server_Info *server, - bool mark_smb_session) + bool mark_smb_session, bool once) { int rc = 0; @@ -399,6 +399,9 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, if (rc) { cifs_server_unlock(server); cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc); + /* If was asked to reconnect only once, do not try it more times */ + if (once) + break; msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); @@ -564,19 +567,33 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) return rc; } -int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) +static int +_cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session, bool once) { if (!server->leaf_fullpath) - return __cifs_reconnect(server, mark_smb_session); + return __cifs_reconnect(server, mark_smb_session, once); return reconnect_dfs_server(server); } #else -int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) +static int +_cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session, bool once) { - return __cifs_reconnect(server, mark_smb_session); + return __cifs_reconnect(server, mark_smb_session, once); } #endif +int +cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) +{ + return _cifs_reconnect(server, mark_smb_session, false); +} + +static int +cifs_reconnect_once(struct TCP_Server_Info *server) +{ + return _cifs_reconnect(server, true, true); +} + static void cifs_echo_request(struct work_struct *work) { @@ -803,26 +820,110 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) /* Regular SMB response */ return true; case RFC1002_SESSION_KEEP_ALIVE: + /* + * RFC 1002 session keep alive can sent by the server only when + * we established a RFC 1002 session. But Samba servers send + * RFC 1002 session keep alive also over port 445 on which + * RFC 1002 session is not established. + */ cifs_dbg(FYI, "RFC 1002 session keep alive\n"); break; case RFC1002_POSITIVE_SESSION_RESPONSE: - cifs_dbg(FYI, "RFC 1002 positive session response\n"); + /* + * RFC 1002 positive session response cannot be returned + * for SMB request. RFC 1002 session response is handled + * exclusively in ip_rfc1001_connect() function. + */ + cifs_server_dbg(VFS, "RFC 1002 positive session response (unexpected)\n"); + cifs_reconnect(server, true); break; case RFC1002_NEGATIVE_SESSION_RESPONSE: /* * We get this from Windows 98 instead of an error on - * SMB negprot response. + * SMB negprot response, when we have not established + * RFC 1002 session (which means ip_rfc1001_connect() + * was skipped). Note that same still happens with + * Windows Server 2022 when connecting via port 139. + * So for this case when mount option -o nonbsessinit + * was not specified, try to reconnect with establishing + * RFC 1002 session. If new socket establishment with + * RFC 1002 session was successful then return to the + * mid's caller -EAGAIN, so it can retry the request. */ - cifs_dbg(FYI, "RFC 1002 negative session response\n"); - /* give server a second to clean up */ - msleep(1000); - /* - * Always try 445 first on reconnect since we get NACK - * on some if we ever connected to port 139 (the NACK - * is since we do not begin with RFC1001 session - * initialize frame). - */ - cifs_set_port((struct sockaddr *)&server->dstaddr, CIFS_PORT); + if (!cifs_rdma_enabled(server) && + server->tcpStatus == CifsInNegotiate && + !server->with_rfc1001 && + server->rfc1001_sessinit != 0) { + int rc, mid_rc; + struct mid_q_entry *mid, *nmid; + LIST_HEAD(dispose_list); + + cifs_dbg(FYI, "RFC 1002 negative session response during SMB Negotiate, retrying with NetBIOS session\n"); + + /* + * Before reconnect, delete all pending mids for this + * server, so reconnect would not signal connection + * aborted error to mid's callbacks. Note that for this + * server there should be exactly one pending mid + * corresponding to SMB1/SMB2 Negotiate packet. + */ + spin_lock(&server->mid_lock); + list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { + kref_get(&mid->refcount); + list_move(&mid->qhead, &dispose_list); + mid->mid_flags |= MID_DELETED; + } + spin_unlock(&server->mid_lock); + + /* Now try to reconnect once with NetBIOS session. */ + server->with_rfc1001 = true; + rc = cifs_reconnect_once(server); + + /* + * If reconnect was successful then indicate -EAGAIN + * to mid's caller. If reconnect failed with -EAGAIN + * then mask it as -EHOSTDOWN, so mid's caller would + * know that it failed. + */ + if (rc == 0) + mid_rc = -EAGAIN; + else if (rc == -EAGAIN) + mid_rc = -EHOSTDOWN; + else + mid_rc = rc; + + /* + * After reconnect (either successful or unsuccessful) + * deliver reconnect status to mid's caller via mid's + * callback. Use MID_RC state which indicates that the + * return code should be read from mid_rc member. + */ + list_for_each_entry_safe(mid, nmid, &dispose_list, qhead) { + list_del_init(&mid->qhead); + mid->mid_rc = mid_rc; + mid->mid_state = MID_RC; + mid->callback(mid); + release_mid(mid); + } + + /* + * If reconnect failed then wait two seconds. In most + * cases we were been called from the mount context and + * delivered failure to mid's callback will stop this + * receiver task thread and fails the mount process. + * So wait two seconds to prevent another reconnect + * in this task thread, which would be useless as the + * mount context will fail at all. + */ + if (rc != 0) + msleep(2000); + } else { + cifs_server_dbg(VFS, "RFC 1002 negative session response (unexpected)\n"); + cifs_reconnect(server, true); + } + break; + case RFC1002_RETARGET_SESSION_RESPONSE: + cifs_server_dbg(VFS, "RFC 1002 retarget session response (unexpected)\n"); cifs_reconnect(server, true); break; default: @@ -1702,6 +1803,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, memcpy(tcp_ses->server_RFC1001_name, ctx->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); tcp_ses->rfc1001_sessinit = ctx->rfc1001_sessinit; + tcp_ses->with_rfc1001 = false; tcp_ses->session_estab = false; tcp_ses->sequence_number = 0; tcp_ses->channel_sequence_num = 0; /* only tracked for primary channel */ @@ -3218,6 +3320,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) return -EIO; } + server->with_rfc1001 = true; return 0; } @@ -3336,7 +3439,8 @@ generic_ip_connect(struct TCP_Server_Info *server) * server port (139) and it was not explicitly disabled by mount option * -o nonbsessinit. */ - if (server->rfc1001_sessinit == 1 || + if (server->with_rfc1001 || + server->rfc1001_sessinit == 1 || (server->rfc1001_sessinit == -1 && sport == htons(RFC1001_PORT))) rc = ip_rfc1001_connect(server); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 03434dbe9374..266af17aa7d9 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -894,6 +894,9 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) case MID_SHUTDOWN: rc = -EHOSTDOWN; break; + case MID_RC: + rc = mid->mid_rc; + break; default: if (!(mid->mid_flags & MID_DELETED)) { list_del_init(&mid->qhead); From e94e882a6d69525c07589222cf3a6ff57ad12b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 2 Nov 2024 20:06:50 +0100 Subject: [PATCH 1930/2157] cifs: Fix negotiate retry functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SMB negotiate retry functionality in cifs_negotiate() is currently broken and does not work when doing socket reconnect. Caller of this function, which is cifs_negotiate_protocol() requires that tcpStatus after successful execution of negotiate callback stay in CifsInNegotiate. But if the CIFSSMBNegotiate() called from cifs_negotiate() fails due to connection issues then tcpStatus is changed as so repeated CIFSSMBNegotiate() call does not help. Fix this problem by moving retrying code from negotiate callback (which is either cifs_negotiate() or smb2_negotiate()) to cifs_negotiate_protocol() which is caller of those callbacks. This allows to properly handle and implement correct transistions between tcpStatus states as function cifs_negotiate_protocol() already handles it. With this change, cifs_negotiate_protocol() now handles also -EAGAIN error set by the RFC1002_NEGATIVE_SESSION_RESPONSE processing after reconnecting with NetBIOS session. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/connect.c | 10 ++++++++++ fs/smb/client/smb1ops.c | 7 ------- fs/smb/client/smb2ops.c | 3 --- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 69274d6ed2e1..f298e86a3c1f 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4202,11 +4202,13 @@ int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server) { + bool in_retry = false; int rc = 0; if (!server->ops->need_neg || !server->ops->negotiate) return -ENOSYS; +retry: /* only send once per connect */ spin_lock(&server->srv_lock); if (server->tcpStatus != CifsGood && @@ -4226,6 +4228,14 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, spin_unlock(&server->srv_lock); rc = server->ops->negotiate(xid, ses, server); + if (rc == -EAGAIN) { + /* Allow one retry attempt */ + if (!in_retry) { + in_retry = true; + goto retry; + } + rc = -EHOSTDOWN; + } if (rc == 0) { spin_lock(&server->srv_lock); if (server->tcpStatus == CifsInNegotiate) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 06b28da60a2d..ad89f60207ab 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -426,13 +426,6 @@ cifs_negotiate(const unsigned int xid, { int rc; rc = CIFSSMBNegotiate(xid, ses, server); - if (rc == -EAGAIN) { - /* retry only once on 1st time connection */ - set_credits(server, 1); - rc = CIFSSMBNegotiate(xid, ses, server); - if (rc == -EAGAIN) - rc = -EHOSTDOWN; - } return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 98643a546c68..374d65cc8123 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -464,9 +464,6 @@ smb2_negotiate(const unsigned int xid, server->CurrentMid = 0; spin_unlock(&server->mid_lock); rc = SMB2_negotiate(xid, ses, server); - /* BB we probably don't need to retry with modern servers */ - if (rc == -EAGAIN) - rc = -EHOSTDOWN; return rc; } From 6aa9f1c9cd09c1c39a35da4fe5f43446ec18ce1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 17 Nov 2024 11:50:18 +0100 Subject: [PATCH 1931/2157] cifs: Fix access_flags_to_smbopen_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting access_flags to SMBOPEN mode, check for all possible access flags, not only GENERIC_READ and GENERIC_WRITE flags. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifssmb.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 29dcb88392e5..60cb264a01e5 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1041,15 +1041,31 @@ static __u16 convert_disposition(int disposition) static int access_flags_to_smbopen_mode(const int access_flags) { - int masked_flags = access_flags & (GENERIC_READ | GENERIC_WRITE); + /* + * SYSTEM_SECURITY grants both read and write access to SACL, treat is as read/write. + * MAXIMUM_ALLOWED grants as many access as possible, so treat it as read/write too. + * SYNCHRONIZE as is does not grant any specific access, so do not check its mask. + * If only SYNCHRONIZE bit is specified then fallback to read access. + */ + bool with_write_flags = access_flags & (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | + FILE_DELETE_CHILD | FILE_WRITE_ATTRIBUTES | DELETE | + WRITE_DAC | WRITE_OWNER | SYSTEM_SECURITY | + MAXIMUM_ALLOWED | GENERIC_WRITE | GENERIC_ALL); + bool with_read_flags = access_flags & (FILE_READ_DATA | FILE_READ_EA | FILE_EXECUTE | + FILE_READ_ATTRIBUTES | READ_CONTROL | + SYSTEM_SECURITY | MAXIMUM_ALLOWED | GENERIC_ALL | + GENERIC_EXECUTE | GENERIC_READ); + bool with_execute_flags = access_flags & (FILE_EXECUTE | MAXIMUM_ALLOWED | GENERIC_ALL | + GENERIC_EXECUTE); - if (masked_flags == GENERIC_READ) - return SMBOPEN_READ; - else if (masked_flags == GENERIC_WRITE) + if (with_write_flags && with_read_flags) + return SMBOPEN_READWRITE; + else if (with_write_flags) return SMBOPEN_WRITE; - - /* just go for read/write */ - return SMBOPEN_READWRITE; + else if (with_execute_flags) + return SMBOPEN_EXECUTE; + else + return SMBOPEN_READ; } int From 4236ac9fe5b8b42756070d4abfb76fed718e87c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 28 Dec 2024 21:09:54 +0100 Subject: [PATCH 1932/2157] cifs: Fix querying and creating MF symlinks over SMB1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old SMB1 servers without CAP_NT_SMBS do not support CIFS_open() function and instead SMBLegacyOpen() needs to be used. This logic is already handled in cifs_open_file() function, which is server->ops->open callback function. So for querying and creating MF symlinks use open callback function instead of CIFS_open() function directly. This change fixes querying and creating new MF symlinks on Windows 98. Currently cifs_query_mf_symlink() is not able to detect MF symlink and cifs_create_mf_symlink() is failing with EIO error. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/link.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index a88253668286..769752ad2c5c 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -258,7 +258,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_io_parms io_parms = {0}; int buf_type = CIFS_NO_BUFFER; - FILE_ALL_INFO file_info; + struct cifs_open_info_data query_data; oparms = (struct cifs_open_parms) { .tcon = tcon, @@ -270,11 +270,11 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, .fid = &fid, }; - rc = CIFS_open(xid, &oparms, &oplock, &file_info); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &query_data); if (rc) return rc; - if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { + if (query_data.fi.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { rc = -ENOENT; /* it's not a symlink */ goto out; @@ -313,7 +313,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, .fid = &fid, }; - rc = CIFS_open(xid, &oparms, &oplock, NULL); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, NULL); if (rc) return rc; From a3313375e88e0075b9048eba15e5eb4dbf93f60e Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Mon, 3 Mar 2025 16:34:24 +0800 Subject: [PATCH 1933/2157] riscv: print hartid on bringup Firmware randomly releases cores, so CPU numbers don't linearly map to hartids. When the system has an exception, we care more about hartids. Adding "dyndbg="file smpboot.c +p" loglevel=8" to the cmdline can output the hartid. Signed-off-by: Yunhui Cui Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250303083424.14309-1-cuiyunhui@bytedance.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/smp.c | 2 ++ arch/riscv/kernel/smpboot.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index d58b5e751286..e650dec44817 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { cpuid_to_hartid_map(0) = boot_cpu_hartid; + + pr_info("Booting Linux on hartid %lu\n", boot_cpu_hartid); } static DEFINE_PER_CPU_READ_MOSTLY(int, ipi_dummy_dev); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index e36d20205bd7..601a321e0f17 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -231,6 +231,10 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); + + pr_debug("CPU%u: Booted secondary hartid %lu\n", curr_cpuid, + cpuid_to_hartid_map(curr_cpuid)); + set_cpu_online(curr_cpuid, true); /* From 83d78ac677b9fdd8ea763507c6fe02d6bf415f3a Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 17 Mar 2025 08:25:51 +0100 Subject: [PATCH 1934/2157] riscv: Fix hugetlb retrieval of number of ptes in case of !present pte Ryan sent a fix [1] for arm64 that applies to riscv too: in some hugetlb functions, we must not use the pte value to get the size of a mapping because the pte may not be present. So use the already present size parameter for huge_pte_clear() and the newly introduced size parameter for huge_ptep_get_and_clear(). And make sure to gather A/D bits only on present ptes. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Link: https://lore.kernel.org/all/20250217140419.1702389-1-ryan.roberts@arm.com/ [1] Link: https://lore.kernel.org/r/20250317072551.572169-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/mm/hugetlbpage.c | 74 ++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index b4a78a4b35cf..375dd96bb4a0 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -148,22 +148,25 @@ unsigned long hugetlb_mask_last_page(struct hstate *h) static pte_t get_clear_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - unsigned long pte_num) + unsigned long ncontig) { - pte_t orig_pte = ptep_get(ptep); - unsigned long i; + pte_t pte, tmp_pte; + bool present; - for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { - pte_t pte = ptep_get_and_clear(mm, addr, ptep); - - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte = ptep_get_and_clear(mm, addr, ptep); + present = pte_present(pte); + while (--ncontig) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_get_and_clear(mm, addr, ptep); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -212,6 +215,26 @@ static void clear_flush(struct mm_struct *mm, flush_tlb_range(&vma, saddr, addr); } +static int num_contig_ptes_from_size(unsigned long sz, size_t *pgsize) +{ + unsigned long hugepage_shift; + + if (sz >= PGDIR_SIZE) + hugepage_shift = PGDIR_SHIFT; + else if (sz >= P4D_SIZE) + hugepage_shift = P4D_SHIFT; + else if (sz >= PUD_SIZE) + hugepage_shift = PUD_SHIFT; + else if (sz >= PMD_SIZE) + hugepage_shift = PMD_SHIFT; + else + hugepage_shift = PAGE_SHIFT; + + *pgsize = 1 << hugepage_shift; + + return sz >> hugepage_shift; +} + /* * When dealing with NAPOT mappings, the privileged specification indicates that * "if an update needs to be made, the OS generally should first mark all of the @@ -226,22 +249,10 @@ void set_huge_pte_at(struct mm_struct *mm, pte_t pte, unsigned long sz) { - unsigned long hugepage_shift, pgsize; + size_t pgsize; int i, pte_num; - if (sz >= PGDIR_SIZE) - hugepage_shift = PGDIR_SHIFT; - else if (sz >= P4D_SIZE) - hugepage_shift = P4D_SHIFT; - else if (sz >= PUD_SIZE) - hugepage_shift = PUD_SHIFT; - else if (sz >= PMD_SIZE) - hugepage_shift = PMD_SHIFT; - else - hugepage_shift = PAGE_SHIFT; - - pte_num = sz >> hugepage_shift; - pgsize = 1 << hugepage_shift; + pte_num = num_contig_ptes_from_size(sz, &pgsize); if (!pte_present(pte)) { for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) @@ -295,13 +306,14 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { + size_t pgsize; pte_t orig_pte = ptep_get(ptep); int pte_num; if (!pte_napot(orig_pte)) return ptep_get_and_clear(mm, addr, ptep); - pte_num = napot_pte_num(napot_cont_order(orig_pte)); + pte_num = num_contig_ptes_from_size(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pte_num); } @@ -351,6 +363,7 @@ void huge_pte_clear(struct mm_struct *mm, pte_t *ptep, unsigned long sz) { + size_t pgsize; pte_t pte = ptep_get(ptep); int i, pte_num; @@ -359,8 +372,9 @@ void huge_pte_clear(struct mm_struct *mm, return; } - pte_num = napot_pte_num(napot_cont_order(pte)); - for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + pte_num = num_contig_ptes_from_size(sz, &pgsize); + + for (i = 0; i < pte_num; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); } From 79ba5c1c7767a539f42c6f6db46961b0bec2bc03 Mon Sep 17 00:00:00 2001 From: Ignacio Encinas Date: Thu, 6 Mar 2025 20:49:27 +0100 Subject: [PATCH 1935/2157] selftests: riscv: fix v_exec_initval_nolibc.c Vector registers are zero initialized by the kernel. Stop accepting "all ones" as a clean value. Note that this was not working as expected given that value == 0xff can be assumed to be always false by the compiler as value's range is [-128, 127]. Both GCC (-Wtype-limits) and clang (-Wtautological-constant-out-of-range-compare) warn about this. Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Signed-off-by: Ignacio Encinas Link: https://lore.kernel.org/r/20250306-fix-v_exec_initval_nolibc-v2-1-97f9dc8a7faf@iencinas.com Signed-off-by: Alexandre Ghiti --- .../selftests/riscv/vector/v_exec_initval_nolibc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c index 35c0812e32de..4dde05e45a04 100644 --- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -6,7 +6,7 @@ * the values. To further ensure consistency, this file is compiled without * libc and without auto-vectorization. * - * To be "clean" all values must be either all ones or all zeroes. + * To be "clean" all values must be all zeroes. */ #define __stringify_1(x...) #x @@ -14,9 +14,8 @@ int main(int argc, char **argv) { - char prev_value = 0, value; + char value = 0; unsigned long vl; - int first = 1; if (argc > 2 && strcmp(argv[2], "x")) asm volatile ( @@ -44,14 +43,11 @@ int main(int argc, char **argv) "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ ".option pop\n\t" \ : "=r" (value)); \ - if (first) { \ - first = 0; \ - } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + if (value != 0x00) { \ printf("Register " __stringify(register) \ " values not clean! value: %u\n", value); \ exit(-1); \ } \ - prev_value = value; \ } \ }) From 55c78035a1a8dfb05f1472018ce2a651701adb7d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:36 -0700 Subject: [PATCH 1936/2157] objtool: Silence more KCOV warnings, part 2 Similar to GCOV, KCOV can leave behind dead code and undefined behavior. Warnings related to those should be ignored. The previous commit: 6b023c784204 ("objtool: Silence more KCOV warnings") ... only did so for CONFIG_CGOV_KERNEL. Also do it for CONFIG_KCOV, but for real this time. Fixes the following warning: vmlinux.o: warning: objtool: synaptics_report_mt_data: unexpected end of section .text.synaptics_report_mt_data Fixes: 6b023c784204 ("objtool: Silence more KCOV warnings") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/a44ba16e194bcbc52c1cef3d3cd9051a62622723.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503282236.UhfRsF3B-lkp@intel.com/ --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index b93597420daf..4d543054f723 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -275,7 +275,7 @@ objtool-args-$(CONFIG_MITIGATION_SLS) += --sls objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess -objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable +objtool-args-$(or $(CONFIG_GCOV_KERNEL),$(CONFIG_KCOV)) += --no-unreachable objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror From 0d7597749f5a3ac67851d3836635d084df15fb66 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:37 -0700 Subject: [PATCH 1937/2157] objtool: Ignore end-of-section jumps for KCOV/GCOV When KCOV or GCOV is enabled, dead code can be left behind, in which case objtool silences unreachable and undefined behavior (fallthrough) warnings. Fallthrough warnings, and their variant "end of section" warnings, were silenced with the following commit: 6b023c784204 ("objtool: Silence more KCOV warnings") Another variant of a fallthrough warning is a jump to the end of a function. If that function happens to be at the end of a section, the jump destination doesn't actually exist. Normally that would be a fatal objtool error, but for KCOV/GCOV it's just another undefined behavior fallthrough. Silence it like the others. Fixes the following warning: drivers/iommu/dma-iommu.o: warning: objtool: iommu_dma_sw_msi+0x92: can't find jump dest instruction at .text+0x54d5 Fixes: 6b023c784204 ("objtool: Silence more KCOV warnings") Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/08fbe7d7e1e20612206f1df253077b94f178d93e.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/314f8809-cd59-479b-97d7-49356bf1c8d1@infradead.org/ --- tools/objtool/check.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index fff9d7a2947a..e6c4eefe295b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1488,6 +1488,8 @@ static int add_jump_destinations(struct objtool_file *file) int ret; for_each_insn(file, insn) { + struct symbol *func = insn_func(insn); + if (insn->jump_dest) { /* * handle_group_alt() may have previously set @@ -1513,7 +1515,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->return_thunk) { add_return_call(file, insn, true); continue; - } else if (insn_func(insn)) { + } else if (func) { /* * External sibling call or internal sibling call with * STT_FUNC reloc. @@ -1548,6 +1550,15 @@ static int add_jump_destinations(struct objtool_file *file) continue; } + /* + * GCOV/KCOV dead code can jump to the end of the + * function/section. + */ + if (file->ignore_unreachables && func && + dest_sec == insn->sec && + dest_off == func->offset + func->len) + continue; + WARN_INSN(insn, "can't find jump dest instruction at %s+0x%lx", dest_sec->name, dest_off); return -1; @@ -1574,8 +1585,7 @@ static int add_jump_destinations(struct objtool_file *file) /* * Cross-function jump. */ - if (insn_func(insn) && insn_func(jump_dest) && - insn_func(insn) != insn_func(jump_dest)) { + if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) { /* * For GCC 8+, create parent/child links for any cold @@ -1592,10 +1602,10 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn_func(insn)->name, ".cold") && + if (!strstr(func->name, ".cold") && strstr(insn_func(jump_dest)->name, ".cold")) { - insn_func(insn)->cfunc = insn_func(jump_dest); - insn_func(jump_dest)->pfunc = insn_func(insn); + func->cfunc = insn_func(jump_dest); + insn_func(jump_dest)->pfunc = func; } } From 28093cfef5dd62f4cbd537f2bdf6f0bf85309c45 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Wed, 26 Mar 2025 05:14:46 +0000 Subject: [PATCH 1938/2157] riscv/kexec_file: Handle R_RISCV_64 in purgatory relocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 58ff537109ac ("riscv: Omit optimized string routines when using KASAN") introduced calls to EXPORT_SYMBOL() in assembly string routines, which result in R_RISCV_64 relocations against .export_symbol section. As these rountines are reused by RISC-V purgatory and our relocator doesn't recognize these relocations, this fails kexec-file-load with dmesg like [ 11.344251] kexec_image: Unknown rela relocation: 2 [ 11.345972] kexec_image: Error loading purgatory ret=-8 Let's support R_RISCV_64 relocation to fix kexec on 64-bit RISC-V. 32-bit variant isn't covered since KEXEC_FILE and KEXEC_PURGATORY isn't available. Fixes: 58ff537109ac ("riscv: Omit optimized string routines when using KASAN") Signed-off-by: Yao Zi Tested-by: Björn Töpel Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250326051445.55131-2-ziyao@disroot.org Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/elf_kexec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 3c37661801f9..e783a72d051f 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -468,6 +468,9 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, case R_RISCV_ALIGN: case R_RISCV_RELAX: break; + case R_RISCV_64: + *(u64 *)loc = val; + break; default: pr_err("Unknown rela relocation: %d\n", r_type); return -ENOEXEC; From 188d90f817e13b66e03e110eb6f82e8f5f0d654b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:38 -0700 Subject: [PATCH 1939/2157] objtool: Append "()" to function name in "unexpected end of section" warning Append with "()" to clarify it's a function. Before: vmlinux.o: warning: objtool: cdns_mrvl_xspi_setup_clock: unexpected end of section .text.cdns_mrvl_xspi_setup_clock After: vmlinux.o: warning: objtool: cdns_mrvl_xspi_setup_clock(): unexpected end of section .text.cdns_mrvl_xspi_setup_clock Fixes: c5995abe1547 ("objtool: Improve error handling") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/692e1e0d0b15a71bd35c6b4b87f3c75cd5a57358.1743481539.git.jpoimboe@kernel.org --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e6c4eefe295b..bd0c78bfe90c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3761,7 +3761,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; WARN("%s%sunexpected end of section %s", - func ? func->name : "", func ? ": " : "", + func ? func->name : "", func ? "(): " : "", sec->name); return 1; } From 3f7023171df43641a8a8a1c9a12124501e589010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 28 Mar 2025 09:53:11 +0100 Subject: [PATCH 1940/2157] riscv/purgatory: 4B align purgatory_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a crashkernel is launched on RISC-V, the entry to purgatory is done by trapping via the stvec CSR. From riscv_kexec_norelocate(): | ... | /* | * Switch to physical addressing | * This will also trigger a jump to CSR_STVEC | * which in this case is the address of the new | * kernel. | */ | csrw CSR_STVEC, a2 | csrw CSR_SATP, zero stvec requires that the address is 4B aligned, which was not the case, e.g.: | Loaded purgatory at 0xffffc000 | kexec_file: kexec_file_load: type:1, start:0xffffd232 head:0x4 flags:0x6 The address 0xffffd232 not 4B aligned. Correct by adding proper function alignment. With this change, crashkernels loaded with kexec-file will be able to properly enter the purgatory. Fixes: 736e30af583fb ("RISC-V: Add purgatory") Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250328085313.1193815-1-bjorn@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/purgatory/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0e6ca6d5ae4b..c5db2f072c34 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -12,6 +12,7 @@ .text +.align 2 SYM_CODE_START(purgatory_start) lla sp, .Lstack From c5610071a69d1c94c70e681874298b4fc6942098 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:39 -0700 Subject: [PATCH 1941/2157] Revert "objtool: Increase per-function WARN_FUNC() rate limit" This reverts commit 0a7fb6f07e3ad497d31ae9a2082d2cacab43d54a. The "skipping duplicate warnings" warning is technically not an actual warning, which can cause confusion. This feature isn't all that useful anyway. It's exceedingly rare for a function to have more than one unrelated warning. Suggested-by: Ingo Molnar Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/e5abe5e858acf1a9207a5dfa0f37d17ac9dca872.1743481539.git.jpoimboe@kernel.org --- tools/objtool/check.c | 4 ++-- tools/objtool/include/objtool/elf.h | 2 +- tools/objtool/include/objtool/warn.h | 14 +++----------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bd0c78bfe90c..c8b3c8e7090c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3545,7 +3545,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, WARN("%s() falls through to next function %s()", func->name, insn_func(insn)->name); - func->warnings++; + func->warned = 1; return 1; } @@ -4576,7 +4576,7 @@ static void disas_warned_funcs(struct objtool_file *file) char *funcs = NULL, *tmp; for_each_sym(file, sym) { - if (sym->warnings) { + if (sym->warned) { if (!funcs) { funcs = malloc(strlen(sym->name) + 1); if (!funcs) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index eba04392c6fd..c7c4e87ebe88 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -65,11 +65,11 @@ struct symbol { u8 return_thunk : 1; u8 fentry : 1; u8 profiling_func : 1; + u8 warned : 1; u8 embedded_insn : 1; u8 local_label : 1; u8 frame_pointer : 1; u8 ignore : 1; - u8 warnings : 2; struct list_head pv_target; struct reloc *relocs; }; diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index b29ac144e4f5..e3ad9b2caf87 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -57,22 +57,14 @@ static inline char *offstr(struct section *sec, unsigned long offset) free(_str); \ }) -#define WARN_LIMIT 2 - #define WARN_INSN(insn, format, ...) \ ({ \ struct instruction *_insn = (insn); \ - BUILD_BUG_ON(WARN_LIMIT > 2); \ - if (!_insn->sym || _insn->sym->warnings < WARN_LIMIT) { \ + if (!_insn->sym || !_insn->sym->warned) \ WARN_FUNC(format, _insn->sec, _insn->offset, \ ##__VA_ARGS__); \ - if (_insn->sym) \ - _insn->sym->warnings++; \ - } else if (_insn->sym && _insn->sym->warnings == WARN_LIMIT) { \ - WARN_FUNC("skipping duplicate warning(s)", \ - _insn->sec, _insn->offset); \ - _insn->sym->warnings++; \ - } \ + if (_insn->sym) \ + _insn->sym->warned = 1; \ }) #define BT_INSN(insn, format, ...) \ From 0b10177114d1e434af850b377cf5e6620dd1d525 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:40 -0700 Subject: [PATCH 1942/2157] objtool: Always fail on fatal errors Objtool writes several object annotations which are used to enable critical kernel runtime functionalities like static calls and retpoline/rethunk patching. In the rare case where it fails to read or write an object, the annotations don't get written, causing runtime code patching to fail and code to become corrupted. Due to the catastrophic nature of such warnings, convert them to errors which fail the build regardless of CONFIG_OBJTOOL_WERROR. Reported-by: Chaitanya Kumar Borah Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/7d35684ca61eac56eb2424f300ca43c5d257b170.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/SJ1PR11MB61295789E25C2F5197EFF2F6B9A72@SJ1PR11MB6129.namprd11.prod.outlook.com --- tools/objtool/check.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c8b3c8e7090c..cde669923b72 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4753,6 +4753,9 @@ int check(struct objtool_file *file) if (!ret && !warnings) return 0; + if (opts.werror && warnings) + ret = 1; + if (opts.verbose) { if (opts.werror && warnings) WARN("%d warning(s) upgraded to errors", warnings); @@ -4760,15 +4763,5 @@ int check(struct objtool_file *file) disas_warned_funcs(file); } - /* - * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual - * errors. - * - * Note that even fatal errors don't yet actually return an error - * without CONFIG_OBJTOOL_WERROR. That will be fixed soon-ish. - */ - if (opts.werror) - return 1; - - return 0; + return ret; } From 3e7be635937d19b91bab70695328214a3d789d51 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:41 -0700 Subject: [PATCH 1943/2157] objtool: Change "warning:" to "error: " for fatal errors This is similar to GCC's behavior and makes it more obvious why the build failed. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/0ea76f4b0e7a370711ed9f75fd0792bb5979c2bf.1743481539.git.jpoimboe@kernel.org --- tools/objtool/arch/loongarch/decode.c | 14 ++- tools/objtool/arch/loongarch/orc.c | 8 +- tools/objtool/arch/x86/decode.c | 15 ++- tools/objtool/arch/x86/orc.c | 6 +- tools/objtool/builtin-check.c | 30 +++--- tools/objtool/check.c | 127 +++++++++++----------- tools/objtool/elf.c | 150 ++++++++++++-------------- tools/objtool/include/objtool/warn.h | 51 ++++++--- tools/objtool/objtool.c | 4 +- tools/objtool/orc_dump.c | 30 +++--- tools/objtool/special.c | 13 +-- 11 files changed, 226 insertions(+), 222 deletions(-) diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c index 02e490555966..b6fdc68053cc 100644 --- a/tools/objtool/arch/loongarch/decode.c +++ b/tools/objtool/arch/loongarch/decode.c @@ -63,7 +63,7 @@ static bool is_loongarch(const struct elf *elf) if (elf->ehdr.e_machine == EM_LOONGARCH) return true; - WARN("unexpected ELF machine type %d", elf->ehdr.e_machine); + ERROR("unexpected ELF machine type %d", elf->ehdr.e_machine); return false; } @@ -327,8 +327,10 @@ const char *arch_nop_insn(int len) { static u32 nop; - if (len != LOONGARCH_INSN_SIZE) - WARN("invalid NOP size: %d\n", len); + if (len != LOONGARCH_INSN_SIZE) { + ERROR("invalid NOP size: %d\n", len); + return NULL; + } nop = LOONGARCH_INSN_NOP; @@ -339,8 +341,10 @@ const char *arch_ret_insn(int len) { static u32 ret; - if (len != LOONGARCH_INSN_SIZE) - WARN("invalid RET size: %d\n", len); + if (len != LOONGARCH_INSN_SIZE) { + ERROR("invalid RET size: %d\n", len); + return NULL; + } emit_jirl((union loongarch_instruction *)&ret, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); diff --git a/tools/objtool/arch/loongarch/orc.c b/tools/objtool/arch/loongarch/orc.c index 873536d009d9..b58c5ff443c9 100644 --- a/tools/objtool/arch/loongarch/orc.c +++ b/tools/objtool/arch/loongarch/orc.c @@ -41,7 +41,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->type = ORC_TYPE_REGS_PARTIAL; break; default: - WARN_INSN(insn, "unknown unwind hint type %d", cfi->type); + ERROR_INSN(insn, "unknown unwind hint type %d", cfi->type); return -1; } @@ -55,7 +55,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->sp_reg = ORC_REG_FP; break; default: - WARN_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base); + ERROR_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base); return -1; } @@ -72,7 +72,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->fp_reg = ORC_REG_FP; break; default: - WARN_INSN(insn, "unknown FP base reg %d", fp->base); + ERROR_INSN(insn, "unknown FP base reg %d", fp->base); return -1; } @@ -89,7 +89,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->ra_reg = ORC_REG_FP; break; default: - WARN_INSN(insn, "unknown RA base reg %d", ra->base); + ERROR_INSN(insn, "unknown RA base reg %d", ra->base); return -1; } diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 7567c893f45e..33d861c04ebd 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -36,7 +36,7 @@ static int is_x86_64(const struct elf *elf) case EM_386: return 0; default: - WARN("unexpected ELF machine type %d", elf->ehdr.e_machine); + ERROR("unexpected ELF machine type %d", elf->ehdr.e_machine); return -1; } } @@ -173,7 +173,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen, x86_64 ? INSN_MODE_64 : INSN_MODE_32); if (ret < 0) { - WARN("can't decode instruction at %s:0x%lx", sec->name, offset); + ERROR("can't decode instruction at %s:0x%lx", sec->name, offset); return -1; } @@ -321,7 +321,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; default: - /* WARN ? */ + /* ERROR ? */ break; } @@ -561,8 +561,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (ins.prefixes.nbytes == 1 && ins.prefixes.bytes[0] == 0xf2) { /* ENQCMD cannot be used in the kernel. */ - WARN("ENQCMD instruction at %s:%lx", sec->name, - offset); + WARN("ENQCMD instruction at %s:%lx", sec->name, offset); } } else if (op2 == 0xa0 || op2 == 0xa8) { @@ -646,7 +645,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (disp->sym->type == STT_SECTION) func = find_symbol_by_offset(disp->sym->sec, reloc_addend(disp)); if (!func) { - WARN("no func for pv_ops[]"); + ERROR("no func for pv_ops[]"); return -1; } @@ -776,7 +775,7 @@ const char *arch_nop_insn(int len) }; if (len < 1 || len > 5) { - WARN("invalid NOP size: %d\n", len); + ERROR("invalid NOP size: %d\n", len); return NULL; } @@ -796,7 +795,7 @@ const char *arch_ret_insn(int len) }; if (len < 1 || len > 5) { - WARN("invalid RET size: %d\n", len); + ERROR("invalid RET size: %d\n", len); return NULL; } diff --git a/tools/objtool/arch/x86/orc.c b/tools/objtool/arch/x86/orc.c index b6cd943e87f9..7176b9ec5b05 100644 --- a/tools/objtool/arch/x86/orc.c +++ b/tools/objtool/arch/x86/orc.c @@ -40,7 +40,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->type = ORC_TYPE_REGS_PARTIAL; break; default: - WARN_INSN(insn, "unknown unwind hint type %d", cfi->type); + ERROR_INSN(insn, "unknown unwind hint type %d", cfi->type); return -1; } @@ -72,7 +72,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->sp_reg = ORC_REG_DX; break; default: - WARN_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base); + ERROR_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base); return -1; } @@ -87,7 +87,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct orc->bp_reg = ORC_REG_BP; break; default: - WARN_INSN(insn, "unknown BP base reg %d", bp->base); + ERROR_INSN(insn, "unknown BP base reg %d", bp->base); return -1; } diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index e364ab6345d3..80239843e9f0 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -139,22 +139,22 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]) static bool opts_valid(void) { if (opts.mnop && !opts.mcount) { - WARN("--mnop requires --mcount"); + ERROR("--mnop requires --mcount"); return false; } if (opts.noinstr && !opts.link) { - WARN("--noinstr requires --link"); + ERROR("--noinstr requires --link"); return false; } if (opts.ibt && !opts.link) { - WARN("--ibt requires --link"); + ERROR("--ibt requires --link"); return false; } if (opts.unret && !opts.link) { - WARN("--unret requires --link"); + ERROR("--unret requires --link"); return false; } @@ -171,7 +171,7 @@ static bool opts_valid(void) opts.static_call || opts.uaccess) { if (opts.dump_orc) { - WARN("--dump can't be combined with other actions"); + ERROR("--dump can't be combined with other actions"); return false; } @@ -181,7 +181,7 @@ static bool opts_valid(void) if (opts.dump_orc) return true; - WARN("At least one action required"); + ERROR("At least one action required"); return false; } @@ -194,30 +194,30 @@ static int copy_file(const char *src, const char *dst) src_fd = open(src, O_RDONLY); if (src_fd == -1) { - WARN("can't open %s for reading: %s", src, strerror(errno)); + ERROR("can't open %s for reading: %s", src, strerror(errno)); return 1; } dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400); if (dst_fd == -1) { - WARN("can't open %s for writing: %s", dst, strerror(errno)); + ERROR("can't open %s for writing: %s", dst, strerror(errno)); return 1; } if (fstat(src_fd, &stat) == -1) { - WARN_GLIBC("fstat"); + ERROR_GLIBC("fstat"); return 1; } if (fchmod(dst_fd, stat.st_mode) == -1) { - WARN_GLIBC("fchmod"); + ERROR_GLIBC("fchmod"); return 1; } for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) { copied = sendfile(dst_fd, src_fd, &offset, to_copy); if (copied == -1) { - WARN_GLIBC("sendfile"); + ERROR_GLIBC("sendfile"); return 1; } } @@ -231,14 +231,14 @@ static void save_argv(int argc, const char **argv) { orig_argv = calloc(argc, sizeof(char *)); if (!orig_argv) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); exit(1); } for (int i = 0; i < argc; i++) { orig_argv[i] = strdup(argv[i]); if (!orig_argv[i]) { - WARN_GLIBC("strdup(%s)", argv[i]); + ERROR_GLIBC("strdup(%s)", argv[i]); exit(1); } }; @@ -257,7 +257,7 @@ void print_args(void) */ backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1); if (!backup) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); goto print; } @@ -319,7 +319,7 @@ int objtool_run(int argc, const char **argv) return 1; if (!opts.link && has_multiple_files(file->elf)) { - WARN("Linked object requires --link"); + ERROR("Linked object requires --link"); return 1; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index cde669923b72..ff83be1aab1d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -348,7 +348,7 @@ static struct cfi_state *cfi_alloc(void) { struct cfi_state *cfi = calloc(1, sizeof(struct cfi_state)); if (!cfi) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); exit(1); } nr_cfi++; @@ -404,7 +404,7 @@ static void *cfi_hash_alloc(unsigned long size) PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (cfi_hash == (void *)-1L) { - WARN_GLIBC("mmap fail cfi_hash"); + ERROR_GLIBC("mmap fail cfi_hash"); cfi_hash = NULL; } else if (opts.stats) { printf("cfi_bits: %d\n", cfi_bits); @@ -460,7 +460,7 @@ static int decode_instructions(struct objtool_file *file) if (!insns || idx == INSN_CHUNK_MAX) { insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE); if (!insns) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } idx = 0; @@ -495,8 +495,6 @@ static int decode_instructions(struct objtool_file *file) nr_insns++; } -// printf("%s: last chunk used: %d\n", sec->name, (int)idx); - sec_for_each_sym(sec, func) { if (func->type != STT_NOTYPE && func->type != STT_FUNC) continue; @@ -505,8 +503,7 @@ static int decode_instructions(struct objtool_file *file) /* Heuristic: likely an "end" symbol */ if (func->type == STT_NOTYPE) continue; - WARN("%s(): STT_FUNC at end of section", - func->name); + ERROR("%s(): STT_FUNC at end of section", func->name); return -1; } @@ -514,8 +511,7 @@ static int decode_instructions(struct objtool_file *file) continue; if (!find_insn(file, sec, func->offset)) { - WARN("%s(): can't find starting instruction", - func->name); + ERROR("%s(): can't find starting instruction", func->name); return -1; } @@ -569,9 +565,8 @@ static int add_pv_ops(struct objtool_file *file, const char *symname) func = find_symbol_by_offset(reloc->sym->sec, reloc_addend(reloc)); if (!func) { - WARN_FUNC("can't find func at %s[%d]", - reloc->sym->sec, reloc_addend(reloc), - symname, idx); + ERROR_FUNC(reloc->sym->sec, reloc_addend(reloc), + "can't find func at %s[%d]", symname, idx); return -1; } @@ -614,7 +609,7 @@ static int init_pv_ops(struct objtool_file *file) nr = sym->len / sizeof(unsigned long); file->pv_ops = calloc(sizeof(struct pv_state), nr); if (!file->pv_ops) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -673,12 +668,12 @@ static int create_static_call_sections(struct objtool_file *file) /* find key symbol */ key_name = strdup(insn_call_dest(insn)->name); if (!key_name) { - WARN_GLIBC("strdup"); + ERROR_GLIBC("strdup"); return -1; } if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, STATIC_CALL_TRAMP_PREFIX_LEN)) { - WARN("static_call: trampoline name malformed: %s", key_name); + ERROR("static_call: trampoline name malformed: %s", key_name); return -1; } tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; @@ -687,7 +682,7 @@ static int create_static_call_sections(struct objtool_file *file) key_sym = find_symbol_by_name(file->elf, tmp); if (!key_sym) { if (!opts.module) { - WARN("static_call: can't find static_call_key symbol: %s", tmp); + ERROR("static_call: can't find static_call_key symbol: %s", tmp); return -1; } @@ -833,8 +828,8 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) insn->offset == sym->offset && (!strcmp(sym->name, "init_module") || !strcmp(sym->name, "cleanup_module"))) { - WARN("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead", - sym->name); + ERROR("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead", + sym->name); return -1; } @@ -1008,8 +1003,8 @@ static int add_ignores(struct objtool_file *file) break; default: - WARN("unexpected relocation symbol type in %s: %d", - rsec->name, reloc->sym->type); + ERROR("unexpected relocation symbol type in %s: %d", + rsec->name, reloc->sym->type); return -1; } @@ -1559,8 +1554,8 @@ static int add_jump_destinations(struct objtool_file *file) dest_off == func->offset + func->len) continue; - WARN_INSN(insn, "can't find jump dest instruction at %s+0x%lx", - dest_sec->name, dest_off); + ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx", + dest_sec->name, dest_off); return -1; } @@ -1666,12 +1661,12 @@ static int add_call_destinations(struct objtool_file *file) continue; if (!insn_call_dest(insn)) { - WARN_INSN(insn, "unannotated intra-function call"); + ERROR_INSN(insn, "unannotated intra-function call"); return -1; } if (func && insn_call_dest(insn)->type != STT_FUNC) { - WARN_INSN(insn, "unsupported call to non-function"); + ERROR_INSN(insn, "unsupported call to non-function"); return -1; } @@ -1679,8 +1674,8 @@ static int add_call_destinations(struct objtool_file *file) dest_off = arch_dest_reloc_offset(reloc_addend(reloc)); dest = find_call_destination(reloc->sym->sec, dest_off); if (!dest) { - WARN_INSN(insn, "can't find call dest symbol at %s+0x%lx", - reloc->sym->sec->name, dest_off); + ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx", + reloc->sym->sec->name, dest_off); return -1; } @@ -1722,13 +1717,13 @@ static int handle_group_alt(struct objtool_file *file, orig_alt_group = calloc(1, sizeof(*orig_alt_group)); if (!orig_alt_group) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } orig_alt_group->cfi = calloc(special_alt->orig_len, sizeof(struct cfi_state *)); if (!orig_alt_group->cfi) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -1748,18 +1743,18 @@ static int handle_group_alt(struct objtool_file *file, } else { if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len - orig_alt_group->first_insn->offset != special_alt->orig_len) { - WARN_INSN(orig_insn, "weirdly overlapping alternative! %ld != %d", - orig_alt_group->last_insn->offset + - orig_alt_group->last_insn->len - - orig_alt_group->first_insn->offset, - special_alt->orig_len); + ERROR_INSN(orig_insn, "weirdly overlapping alternative! %ld != %d", + orig_alt_group->last_insn->offset + + orig_alt_group->last_insn->len - + orig_alt_group->first_insn->offset, + special_alt->orig_len); return -1; } } new_alt_group = calloc(1, sizeof(*new_alt_group)); if (!new_alt_group) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -1773,7 +1768,7 @@ static int handle_group_alt(struct objtool_file *file, */ nop = calloc(1, sizeof(*nop)); if (!nop) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } memset(nop, 0, sizeof(*nop)); @@ -1815,7 +1810,7 @@ static int handle_group_alt(struct objtool_file *file, if (alt_reloc && arch_pc_relative_reloc(alt_reloc) && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { - WARN_INSN(insn, "unsupported relocation in alternatives section"); + ERROR_INSN(insn, "unsupported relocation in alternatives section"); return -1; } @@ -1829,15 +1824,15 @@ static int handle_group_alt(struct objtool_file *file, if (dest_off == special_alt->new_off + special_alt->new_len) { insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn); if (!insn->jump_dest) { - WARN_INSN(insn, "can't find alternative jump destination"); + ERROR_INSN(insn, "can't find alternative jump destination"); return -1; } } } if (!last_new_insn) { - WARN_FUNC("can't find last new alternative instruction", - special_alt->new_sec, special_alt->new_off); + ERROR_FUNC(special_alt->new_sec, special_alt->new_off, + "can't find last new alternative instruction"); return -1; } @@ -1864,7 +1859,7 @@ static int handle_jump_alt(struct objtool_file *file, if (orig_insn->type != INSN_JUMP_UNCONDITIONAL && orig_insn->type != INSN_NOP) { - WARN_INSN(orig_insn, "unsupported instruction at jump label"); + ERROR_INSN(orig_insn, "unsupported instruction at jump label"); return -1; } @@ -1923,8 +1918,8 @@ static int add_special_section_alts(struct objtool_file *file) orig_insn = find_insn(file, special_alt->orig_sec, special_alt->orig_off); if (!orig_insn) { - WARN_FUNC("special: can't find orig instruction", - special_alt->orig_sec, special_alt->orig_off); + ERROR_FUNC(special_alt->orig_sec, special_alt->orig_off, + "special: can't find orig instruction"); return -1; } @@ -1933,16 +1928,15 @@ static int add_special_section_alts(struct objtool_file *file) new_insn = find_insn(file, special_alt->new_sec, special_alt->new_off); if (!new_insn) { - WARN_FUNC("special: can't find new instruction", - special_alt->new_sec, - special_alt->new_off); + ERROR_FUNC(special_alt->new_sec, special_alt->new_off, + "special: can't find new instruction"); return -1; } } if (special_alt->group) { if (!special_alt->orig_len) { - WARN_INSN(orig_insn, "empty alternative entry"); + ERROR_INSN(orig_insn, "empty alternative entry"); continue; } @@ -1960,7 +1954,7 @@ static int add_special_section_alts(struct objtool_file *file) alt = calloc(1, sizeof(*alt)); if (!alt) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -2037,7 +2031,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn) alt = calloc(1, sizeof(*alt)); if (!alt) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -2049,7 +2043,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn) } if (!prev_offset) { - WARN_INSN(insn, "can't find switch jump table"); + ERROR_INSN(insn, "can't find switch jump table"); return -1; } @@ -2207,12 +2201,12 @@ static int read_unwind_hints(struct objtool_file *file) return 0; if (!sec->rsec) { - WARN("missing .rela.discard.unwind_hints section"); + ERROR("missing .rela.discard.unwind_hints section"); return -1; } if (sec->sh.sh_size % sizeof(struct unwind_hint)) { - WARN("struct unwind_hint size mismatch"); + ERROR("struct unwind_hint size mismatch"); return -1; } @@ -2223,7 +2217,7 @@ static int read_unwind_hints(struct objtool_file *file) reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint)); if (!reloc) { - WARN("can't find reloc for unwind_hints[%d]", i); + ERROR("can't find reloc for unwind_hints[%d]", i); return -1; } @@ -2232,13 +2226,13 @@ static int read_unwind_hints(struct objtool_file *file) } else if (reloc->sym->local_label) { offset = reloc->sym->offset; } else { - WARN("unexpected relocation symbol type in %s", sec->rsec->name); + ERROR("unexpected relocation symbol type in %s", sec->rsec->name); return -1; } insn = find_insn(file, reloc->sym->sec, offset); if (!insn) { - WARN("can't find insn for unwind_hints[%d]", i); + ERROR("can't find insn for unwind_hints[%d]", i); return -1; } @@ -2265,7 +2259,7 @@ static int read_unwind_hints(struct objtool_file *file) if (sym && sym->bind == STB_GLOBAL) { if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) { - WARN_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR"); + ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR"); return -1; } } @@ -2280,7 +2274,7 @@ static int read_unwind_hints(struct objtool_file *file) cfi = *(insn->cfi); if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { - WARN_INSN(insn, "unsupported unwind_hint sp base reg %d", hint->sp_reg); + ERROR_INSN(insn, "unsupported unwind_hint sp base reg %d", hint->sp_reg); return -1; } @@ -2326,7 +2320,7 @@ static int read_annotate(struct objtool_file *file, insn = find_insn(file, reloc->sym->sec, offset); if (!insn) { - WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); + ERROR("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; } @@ -2369,7 +2363,7 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio return 0; if (insn->type != INSN_CALL) { - WARN_INSN(insn, "intra_function_call not a direct call"); + ERROR_INSN(insn, "intra_function_call not a direct call"); return -1; } @@ -2383,8 +2377,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio dest_off = arch_jump_destination(insn); insn->jump_dest = find_insn(file, insn->sec, dest_off); if (!insn->jump_dest) { - WARN_INSN(insn, "can't find call dest at %s+0x%lx", - insn->sec->name, dest_off); + ERROR_INSN(insn, "can't find call dest at %s+0x%lx", + insn->sec->name, dest_off); return -1; } @@ -2403,7 +2397,7 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->type != INSN_CALL_DYNAMIC && insn->type != INSN_RETURN && insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); + ERROR_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); return -1; } @@ -2435,7 +2429,7 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi break; default: - WARN_INSN(insn, "Unknown annotation type: %d", type); + ERROR_INSN(insn, "Unknown annotation type: %d", type); return -1; } @@ -4393,9 +4387,8 @@ static int validate_ibt_data_reloc(struct objtool_file *file, if (dest->noendbr) return 0; - WARN_FUNC("data relocation to !ENDBR: %s", - reloc->sec->base, reloc_offset(reloc), - offstr(dest->sec, dest->offset)); + WARN_FUNC(reloc->sec->base, reloc_offset(reloc), + "data relocation to !ENDBR: %s", offstr(dest->sec, dest->offset)); return 1; } @@ -4580,14 +4573,14 @@ static void disas_warned_funcs(struct objtool_file *file) if (!funcs) { funcs = malloc(strlen(sym->name) + 1); if (!funcs) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return; } strcpy(funcs, sym->name); } else { tmp = malloc(strlen(funcs) + strlen(sym->name) + 2); if (!tmp) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return; } sprintf(tmp, "%s %s", funcs, sym->name); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index b352a78b596c..727a3a4fd9d7 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -72,17 +72,17 @@ static inline void __elf_hash_del(struct elf_hash_node *node, obj; \ obj = elf_list_entry(obj->member.next, typeof(*(obj)), member)) -#define elf_alloc_hash(name, size) \ -({ \ - __elf_bits(name) = max(10, ilog2(size)); \ +#define elf_alloc_hash(name, size) \ +({ \ + __elf_bits(name) = max(10, ilog2(size)); \ __elf_table(name) = mmap(NULL, sizeof(struct elf_hash_node *) << __elf_bits(name), \ - PROT_READ|PROT_WRITE, \ - MAP_PRIVATE|MAP_ANON, -1, 0); \ - if (__elf_table(name) == (void *)-1L) { \ - WARN("mmap fail " #name); \ - __elf_table(name) = NULL; \ - } \ - __elf_table(name); \ + PROT_READ|PROT_WRITE, \ + MAP_PRIVATE|MAP_ANON, -1, 0); \ + if (__elf_table(name) == (void *)-1L) { \ + ERROR_GLIBC("mmap fail " #name); \ + __elf_table(name) = NULL; \ + } \ + __elf_table(name); \ }) static inline unsigned long __sym_start(struct symbol *s) @@ -316,12 +316,12 @@ static int read_sections(struct elf *elf) int i; if (elf_getshdrnum(elf->elf, §ions_nr)) { - WARN_ELF("elf_getshdrnum"); + ERROR_ELF("elf_getshdrnum"); return -1; } if (elf_getshdrstrndx(elf->elf, &shstrndx)) { - WARN_ELF("elf_getshdrstrndx"); + ERROR_ELF("elf_getshdrstrndx"); return -1; } @@ -331,7 +331,7 @@ static int read_sections(struct elf *elf) elf->section_data = calloc(sections_nr, sizeof(*sec)); if (!elf->section_data) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } for (i = 0; i < sections_nr; i++) { @@ -341,33 +341,32 @@ static int read_sections(struct elf *elf) s = elf_getscn(elf->elf, i); if (!s) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } sec->idx = elf_ndxscn(s); if (!gelf_getshdr(s, &sec->sh)) { - WARN_ELF("gelf_getshdr"); + ERROR_ELF("gelf_getshdr"); return -1; } sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); if (!sec->name) { - WARN_ELF("elf_strptr"); + ERROR_ELF("elf_strptr"); return -1; } if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) { sec->data = elf_getdata(s, NULL); if (!sec->data) { - WARN_ELF("elf_getdata"); + ERROR_ELF("elf_getdata"); return -1; } if (sec->data->d_off != 0 || sec->data->d_size != sec->sh.sh_size) { - WARN("unexpected data attributes for %s", - sec->name); + ERROR("unexpected data attributes for %s", sec->name); return -1; } } @@ -387,7 +386,7 @@ static int read_sections(struct elf *elf) /* sanity check, one more call to elf_nextscn() should return NULL */ if (elf_nextscn(elf->elf, s)) { - WARN("section entry mismatch"); + ERROR("section entry mismatch"); return -1; } @@ -467,7 +466,7 @@ static int read_symbols(struct elf *elf) elf->symbol_data = calloc(symbols_nr, sizeof(*sym)); if (!elf->symbol_data) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } for (i = 0; i < symbols_nr; i++) { @@ -477,14 +476,14 @@ static int read_symbols(struct elf *elf) if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym, &shndx)) { - WARN_ELF("gelf_getsymshndx"); + ERROR_ELF("gelf_getsymshndx"); goto err; } sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, sym->sym.st_name); if (!sym->name) { - WARN_ELF("elf_strptr"); + ERROR_ELF("elf_strptr"); goto err; } @@ -496,8 +495,7 @@ static int read_symbols(struct elf *elf) sym->sec = find_section_by_index(elf, shndx); if (!sym->sec) { - WARN("couldn't find section for symbol %s", - sym->name); + ERROR("couldn't find section for symbol %s", sym->name); goto err; } if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { @@ -536,8 +534,7 @@ static int read_symbols(struct elf *elf) pnamelen = coldstr - sym->name; pname = strndup(sym->name, pnamelen); if (!pname) { - WARN("%s(): failed to allocate memory", - sym->name); + ERROR("%s(): failed to allocate memory", sym->name); return -1; } @@ -545,8 +542,7 @@ static int read_symbols(struct elf *elf) free(pname); if (!pfunc) { - WARN("%s(): can't find parent function", - sym->name); + ERROR("%s(): can't find parent function", sym->name); return -1; } @@ -613,14 +609,14 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, s = elf_getscn(elf->elf, symtab->idx); if (!s) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } if (symtab_shndx) { t = elf_getscn(elf->elf, symtab_shndx->idx); if (!t) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } } @@ -643,7 +639,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, if (idx) { /* we don't do holes in symbol tables */ - WARN("index out of range"); + ERROR("index out of range"); return -1; } @@ -654,7 +650,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, buf = calloc(num, entsize); if (!buf) { - WARN("malloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -669,7 +665,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, if (t) { buf = calloc(num, sizeof(Elf32_Word)); if (!buf) { - WARN("malloc"); + ERROR_GLIBC("calloc"); return -1; } @@ -687,7 +683,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, /* empty blocks should not happen */ if (!symtab_data->d_size) { - WARN("zero size data"); + ERROR("zero size data"); return -1; } @@ -702,7 +698,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, /* something went side-ways */ if (idx < 0) { - WARN("negative index"); + ERROR("negative index"); return -1; } @@ -714,13 +710,13 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } else { sym->sym.st_shndx = SHN_XINDEX; if (!shndx_data) { - WARN("no .symtab_shndx"); + ERROR("no .symtab_shndx"); return -1; } } if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) { - WARN_ELF("gelf_update_symshndx"); + ERROR_ELF("gelf_update_symshndx"); return -1; } @@ -738,7 +734,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) if (symtab) { symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); } else { - WARN("no .symtab"); + ERROR("no .symtab"); return NULL; } @@ -760,7 +756,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) old->idx = new_idx; if (elf_update_symbol(elf, symtab, symtab_shndx, old)) { - WARN("elf_update_symbol move"); + ERROR("elf_update_symbol move"); return NULL; } @@ -778,7 +774,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) non_local: sym->idx = new_idx; if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { - WARN("elf_update_symbol"); + ERROR("elf_update_symbol"); return NULL; } @@ -799,7 +795,7 @@ elf_create_section_symbol(struct elf *elf, struct section *sec) struct symbol *sym = calloc(1, sizeof(*sym)); if (!sym) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } @@ -829,7 +825,7 @@ elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size) char *name = malloc(namelen); if (!sym || !name) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } @@ -858,16 +854,16 @@ static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec, struct reloc *reloc, empty = { 0 }; if (reloc_idx >= sec_num_entries(rsec)) { - WARN("%s: bad reloc_idx %u for %s with %d relocs", - __func__, reloc_idx, rsec->name, sec_num_entries(rsec)); + ERROR("%s: bad reloc_idx %u for %s with %d relocs", + __func__, reloc_idx, rsec->name, sec_num_entries(rsec)); return NULL; } reloc = &rsec->relocs[reloc_idx]; if (memcmp(reloc, &empty, sizeof(empty))) { - WARN("%s: %s: reloc %d already initialized!", - __func__, rsec->name, reloc_idx); + ERROR("%s: %s: reloc %d already initialized!", + __func__, rsec->name, reloc_idx); return NULL; } @@ -896,8 +892,7 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec, int addend = insn_off; if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) { - WARN("bad call to %s() for data symbol %s", - __func__, sym->name); + ERROR("bad call to %s() for data symbol %s", __func__, sym->name); return NULL; } @@ -926,8 +921,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec, s64 addend) { if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) { - WARN("bad call to %s() for text symbol %s", - __func__, sym->name); + ERROR("bad call to %s() for text symbol %s", __func__, sym->name); return NULL; } @@ -953,8 +947,7 @@ static int read_relocs(struct elf *elf) rsec->base = find_section_by_index(elf, rsec->sh.sh_info); if (!rsec->base) { - WARN("can't find base section for reloc section %s", - rsec->name); + ERROR("can't find base section for reloc section %s", rsec->name); return -1; } @@ -963,7 +956,7 @@ static int read_relocs(struct elf *elf) nr_reloc = 0; rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc)); if (!rsec->relocs) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return -1; } for (i = 0; i < sec_num_entries(rsec); i++) { @@ -973,8 +966,7 @@ static int read_relocs(struct elf *elf) symndx = reloc_sym(reloc); reloc->sym = sym = find_symbol_by_index(elf, symndx); if (!reloc->sym) { - WARN("can't find reloc entry symbol %d for %s", - symndx, rsec->name); + ERROR("can't find reloc entry symbol %d for %s", symndx, rsec->name); return -1; } @@ -1005,7 +997,7 @@ struct elf *elf_open_read(const char *name, int flags) elf = malloc(sizeof(*elf)); if (!elf) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } memset(elf, 0, sizeof(*elf)); @@ -1028,12 +1020,12 @@ struct elf *elf_open_read(const char *name, int flags) elf->elf = elf_begin(elf->fd, cmd, NULL); if (!elf->elf) { - WARN_ELF("elf_begin"); + ERROR_ELF("elf_begin"); goto err; } if (!gelf_getehdr(elf->elf, &elf->ehdr)) { - WARN_ELF("gelf_getehdr"); + ERROR_ELF("gelf_getehdr"); goto err; } @@ -1062,19 +1054,19 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) if (!strtab) strtab = find_section_by_name(elf, ".strtab"); if (!strtab) { - WARN("can't find .strtab section"); + ERROR("can't find .strtab section"); return -1; } s = elf_getscn(elf->elf, strtab->idx); if (!s) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } data = elf_newdata(s); if (!data) { - WARN_ELF("elf_newdata"); + ERROR_ELF("elf_newdata"); return -1; } @@ -1099,7 +1091,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec = malloc(sizeof(*sec)); if (!sec) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } memset(sec, 0, sizeof(*sec)); @@ -1108,13 +1100,13 @@ struct section *elf_create_section(struct elf *elf, const char *name, s = elf_newscn(elf->elf); if (!s) { - WARN_ELF("elf_newscn"); + ERROR_ELF("elf_newscn"); return NULL; } sec->name = strdup(name); if (!sec->name) { - WARN_GLIBC("strdup"); + ERROR_GLIBC("strdup"); return NULL; } @@ -1122,7 +1114,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->data = elf_newdata(s); if (!sec->data) { - WARN_ELF("elf_newdata"); + ERROR_ELF("elf_newdata"); return NULL; } @@ -1132,14 +1124,14 @@ struct section *elf_create_section(struct elf *elf, const char *name, if (size) { sec->data->d_buf = malloc(size); if (!sec->data->d_buf) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } memset(sec->data->d_buf, 0, size); } if (!gelf_getshdr(s, &sec->sh)) { - WARN_ELF("gelf_getshdr"); + ERROR_ELF("gelf_getshdr"); return NULL; } @@ -1154,7 +1146,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, if (!shstrtab) shstrtab = find_section_by_name(elf, ".strtab"); if (!shstrtab) { - WARN("can't find .shstrtab or .strtab section"); + ERROR("can't find .shstrtab or .strtab section"); return NULL; } sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); @@ -1179,7 +1171,7 @@ static struct section *elf_create_rela_section(struct elf *elf, rsec_name = malloc(strlen(sec->name) + strlen(".rela") + 1); if (!rsec_name) { - WARN_GLIBC("malloc"); + ERROR_GLIBC("malloc"); return NULL; } strcpy(rsec_name, ".rela"); @@ -1199,7 +1191,7 @@ static struct section *elf_create_rela_section(struct elf *elf, rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc)); if (!rsec->relocs) { - WARN_GLIBC("calloc"); + ERROR_GLIBC("calloc"); return NULL; } @@ -1232,7 +1224,7 @@ int elf_write_insn(struct elf *elf, struct section *sec, Elf_Data *data = sec->data; if (data->d_type != ELF_T_BYTE || data->d_off) { - WARN("write to unexpected data for section: %s", sec->name); + ERROR("write to unexpected data for section: %s", sec->name); return -1; } @@ -1261,7 +1253,7 @@ static int elf_truncate_section(struct elf *elf, struct section *sec) s = elf_getscn(elf->elf, sec->idx); if (!s) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } @@ -1271,7 +1263,7 @@ static int elf_truncate_section(struct elf *elf, struct section *sec) if (!data) { if (size) { - WARN("end of section data but non-zero size left\n"); + ERROR("end of section data but non-zero size left\n"); return -1; } return 0; @@ -1279,12 +1271,12 @@ static int elf_truncate_section(struct elf *elf, struct section *sec) if (truncated) { /* when we remove symbols */ - WARN("truncated; but more data\n"); + ERROR("truncated; but more data\n"); return -1; } if (!data->d_size) { - WARN("zero size data"); + ERROR("zero size data"); return -1; } @@ -1310,13 +1302,13 @@ int elf_write(struct elf *elf) if (sec_changed(sec)) { s = elf_getscn(elf->elf, sec->idx); if (!s) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } /* Note this also flags the section dirty */ if (!gelf_update_shdr(s, &sec->sh)) { - WARN_ELF("gelf_update_shdr"); + ERROR_ELF("gelf_update_shdr"); return -1; } @@ -1329,7 +1321,7 @@ int elf_write(struct elf *elf) /* Write all changes to the file. */ if (elf_update(elf->elf, ELF_C_WRITE) < 0) { - WARN_ELF("elf_update"); + ERROR_ELF("elf_update"); return -1; } diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index e3ad9b2caf87..cb8fe846d9dd 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -42,26 +42,43 @@ static inline char *offstr(struct section *sec, unsigned long offset) return str; } -#define WARN(format, ...) \ - fprintf(stderr, \ - "%s%s%s: objtool: " format "\n", \ - objname ?: "", \ - objname ? ": " : "", \ - opts.werror ? "error" : "warning", \ +#define ___WARN(severity, extra, format, ...) \ + fprintf(stderr, \ + "%s%s%s: objtool" extra ": " format "\n", \ + objname ?: "", \ + objname ? ": " : "", \ + severity, \ ##__VA_ARGS__) -#define WARN_FUNC(format, sec, offset, ...) \ -({ \ - char *_str = offstr(sec, offset); \ - WARN("%s: " format, _str, ##__VA_ARGS__); \ - free(_str); \ +#define __WARN(severity, format, ...) \ + ___WARN(severity, "", format, ##__VA_ARGS__) + +#define __WARN_LINE(severity, format, ...) \ + ___WARN(severity, " [%s:%d]", format, __FILE__, __LINE__, ##__VA_ARGS__) + +#define __WARN_ELF(severity, format, ...) \ + __WARN_LINE(severity, "%s: " format " failed: %s", __func__, ##__VA_ARGS__, elf_errmsg(-1)) + +#define __WARN_GLIBC(severity, format, ...) \ + __WARN_LINE(severity, "%s: " format " failed: %s", __func__, ##__VA_ARGS__, strerror(errno)) + +#define __WARN_FUNC(severity, sec, offset, format, ...) \ +({ \ + char *_str = offstr(sec, offset); \ + __WARN(severity, "%s: " format, _str, ##__VA_ARGS__); \ + free(_str); \ }) +#define WARN_STR (opts.werror ? "error" : "warning") + +#define WARN(format, ...) __WARN(WARN_STR, format, ##__VA_ARGS__) +#define WARN_FUNC(sec, offset, format, ...) __WARN_FUNC(WARN_STR, sec, offset, format, ##__VA_ARGS__) + #define WARN_INSN(insn, format, ...) \ ({ \ struct instruction *_insn = (insn); \ if (!_insn->sym || !_insn->sym->warned) \ - WARN_FUNC(format, _insn->sec, _insn->offset, \ + WARN_FUNC(_insn->sec, _insn->offset, format, \ ##__VA_ARGS__); \ if (_insn->sym) \ _insn->sym->warned = 1; \ @@ -77,10 +94,12 @@ static inline char *offstr(struct section *sec, unsigned long offset) } \ }) -#define WARN_ELF(format, ...) \ - WARN("%s: " format " failed: %s", __func__, ##__VA_ARGS__, elf_errmsg(-1)) +#define ERROR_STR "error" -#define WARN_GLIBC(format, ...) \ - WARN("%s: " format " failed: %s", __func__, ##__VA_ARGS__, strerror(errno)) +#define ERROR(format, ...) __WARN(ERROR_STR, format, ##__VA_ARGS__) +#define ERROR_ELF(format, ...) __WARN_ELF(ERROR_STR, format, ##__VA_ARGS__) +#define ERROR_GLIBC(format, ...) __WARN_GLIBC(ERROR_STR, format, ##__VA_ARGS__) +#define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__) +#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) #endif /* _WARN_H */ diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index e4b49c534e4d..5c8b974ad0f9 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -23,7 +23,7 @@ static struct objtool_file file; struct objtool_file *objtool_open_read(const char *filename) { if (file.elf) { - WARN("won't handle more than one file at a time"); + ERROR("won't handle more than one file at a time"); return NULL; } @@ -50,7 +50,7 @@ int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) return 0; if (!f->pv_ops) { - WARN("paravirt confusion"); + ERROR("paravirt confusion"); return -1; } diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 05ef0e297837..1dd9fc18fe62 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -36,47 +36,47 @@ int orc_dump(const char *filename) elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (!elf) { - WARN_ELF("elf_begin"); + ERROR_ELF("elf_begin"); return -1; } if (!elf64_getehdr(elf)) { - WARN_ELF("elf64_getehdr"); + ERROR_ELF("elf64_getehdr"); return -1; } memcpy(&dummy_elf.ehdr, elf64_getehdr(elf), sizeof(dummy_elf.ehdr)); if (elf_getshdrnum(elf, &nr_sections)) { - WARN_ELF("elf_getshdrnum"); + ERROR_ELF("elf_getshdrnum"); return -1; } if (elf_getshdrstrndx(elf, &shstrtab_idx)) { - WARN_ELF("elf_getshdrstrndx"); + ERROR_ELF("elf_getshdrstrndx"); return -1; } for (i = 0; i < nr_sections; i++) { scn = elf_getscn(elf, i); if (!scn) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } if (!gelf_getshdr(scn, &sh)) { - WARN_ELF("gelf_getshdr"); + ERROR_ELF("gelf_getshdr"); return -1; } name = elf_strptr(elf, shstrtab_idx, sh.sh_name); if (!name) { - WARN_ELF("elf_strptr"); + ERROR_ELF("elf_strptr"); return -1; } data = elf_getdata(scn, NULL); if (!data) { - WARN_ELF("elf_getdata"); + ERROR_ELF("elf_getdata"); return -1; } @@ -99,7 +99,7 @@ int orc_dump(const char *filename) return 0; if (orc_size % sizeof(*orc) != 0) { - WARN("bad .orc_unwind section size"); + ERROR("bad .orc_unwind section size"); return -1; } @@ -107,36 +107,36 @@ int orc_dump(const char *filename) for (i = 0; i < nr_entries; i++) { if (rela_orc_ip) { if (!gelf_getrela(rela_orc_ip, i, &rela)) { - WARN_ELF("gelf_getrela"); + ERROR_ELF("gelf_getrela"); return -1; } if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) { - WARN_ELF("gelf_getsym"); + ERROR_ELF("gelf_getsym"); return -1; } if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) { scn = elf_getscn(elf, sym.st_shndx); if (!scn) { - WARN_ELF("elf_getscn"); + ERROR_ELF("elf_getscn"); return -1; } if (!gelf_getshdr(scn, &sh)) { - WARN_ELF("gelf_getshdr"); + ERROR_ELF("gelf_getshdr"); return -1; } name = elf_strptr(elf, shstrtab_idx, sh.sh_name); if (!name) { - WARN_ELF("elf_strptr"); + ERROR_ELF("elf_strptr"); return -1; } } else { name = elf_strptr(elf, strtab_idx, sym.st_name); if (!name) { - WARN_ELF("elf_strptr"); + ERROR_ELF("elf_strptr"); return -1; } } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 6cd7b1be5331..c80fed8a840e 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -86,7 +86,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); if (!orig_reloc) { - WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + ERROR_FUNC(sec, offset + entry->orig, "can't find orig reloc"); return -1; } @@ -97,8 +97,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); if (!new_reloc) { - WARN_FUNC("can't find new reloc", - sec, offset + entry->new); + ERROR_FUNC(sec, offset + entry->new, "can't find new reloc"); return -1; } @@ -114,8 +113,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key); if (!key_reloc) { - WARN_FUNC("can't find key reloc", - sec, offset + entry->key); + ERROR_FUNC(sec, offset + entry->key, "can't find key reloc"); return -1; } alt->key_addend = reloc_addend(key_reloc); @@ -145,8 +143,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts) continue; if (sec->sh.sh_size % entry->size != 0) { - WARN("%s size not a multiple of %d", - sec->name, entry->size); + ERROR("%s size not a multiple of %d", sec->name, entry->size); return -1; } @@ -155,7 +152,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts) for (idx = 0; idx < nr_entries; idx++) { alt = malloc(sizeof(*alt)); if (!alt) { - WARN("malloc failed"); + ERROR_GLIBC("malloc failed"); return -1; } memset(alt, 0, sizeof(*alt)); From e77956e4e5c11218e60a1fe8cdbccd02476f2e56 Mon Sep 17 00:00:00 2001 From: David Laight Date: Mon, 31 Mar 2025 21:26:42 -0700 Subject: [PATCH 1944/2157] objtool: Fix verbose disassembly if CROSS_COMPILE isn't set In verbose mode, when printing the disassembly of affected functions, if CROSS_COMPILE isn't set, the objdump command string gets prefixed with "(null)". Somehow this worked before. Maybe some versions of glibc return an empty string instead of NULL. Fix it regardless. [ jpoimboe: Rewrite commit log. ] Fixes: ca653464dd097 ("objtool: Add verbose option for disassembling affected functions") Signed-off-by: David Laight Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250215142321.14081-1-david.laight.linux@gmail.com Link: https://lore.kernel.org/r/b931a4786bc0127aa4c94e8b35ed617dcbd3d3da.1743481539.git.jpoimboe@kernel.org --- tools/objtool/check.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ff83be1aab1d..4a1f6c3169b3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4523,6 +4523,8 @@ static void disas_funcs(const char *funcs) char *cmd; cross_compile = getenv("CROSS_COMPILE"); + if (!cross_compile) + cross_compile = ""; objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '" "BEGIN { split(_funcs, funcs); }" From 8a2f20ac8e14c1dd29d3214016a27e244f266b39 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Mar 2025 12:54:22 +0100 Subject: [PATCH 1945/2157] riscv: Make sure toolchain supports zba before using zba instructions Old toolchain like gcc 8.5.0 does not support zba, so we must check that the toolchain supports this extension before using it in the kernel. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503281836.8pntHm6I-lkp@intel.com/ Link: https://lore.kernel.org/r/20250328115422.253670-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/Kconfig | 8 ++++++++ arch/riscv/include/asm/runtime-const.h | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0d8def968a7e..ae6303f15b28 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -735,6 +735,14 @@ config TOOLCHAIN_HAS_VECTOR_CRYPTO def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb) depends on AS_HAS_OPTION_ARCH +config TOOLCHAIN_HAS_ZBA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_HAS_OPTION_ARCH + config RISCV_ISA_ZBA bool "Zba extension support for bit manipulation instructions" default y diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index ea2e49c7149c..c07d049fdd5d 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -77,7 +77,8 @@ ".long 1b - .\n\t" \ ".popsection" \ -#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB) +#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \ + && defined(CONFIG_RISCV_ISA_ZBKB) #define runtime_const_ptr(sym) \ ({ \ typeof(sym) __ret, __tmp; \ @@ -93,7 +94,7 @@ : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ __ret; \ }) -#elif defined(CONFIG_RISCV_ISA_ZBA) +#elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) #define runtime_const_ptr(sym) \ ({ \ typeof(sym) __ret, __tmp; \ From 09f37f2d7b21ff35b8b533f9ab8cfad2fe8f72f6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:44 -0700 Subject: [PATCH 1946/2157] sched/smt: Always inline sched_smt_active() sched_smt_active() can be called from noinstr code, so it should always be inlined. The CONFIG_SCHED_SMT version already has __always_inline. Do the same for its !CONFIG_SCHED_SMT counterpart. Fixes the following warning: vmlinux.o: error: objtool: intel_idle_ibrs+0x13: call to sched_smt_active() leaves .noinstr.text section Fixes: 321a874a7ef8 ("sched/smt: Expose sched_smt_present static key") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/1d03907b0a247cf7fb5c1d518de378864f603060.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/r/202503311434.lyw2Tveh-lkp@intel.com/ --- include/linux/sched/smt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index fb1e295e7e63..166b19af956f 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -12,7 +12,7 @@ static __always_inline bool sched_smt_active(void) return static_branch_likely(&sched_smt_present); } #else -static inline bool sched_smt_active(void) { return false; } +static __always_inline bool sched_smt_active(void) { return false; } #endif void arch_smt_update(void); From 6ee928185aeb0ff085c73ae2ee163d436eba8352 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 31 Mar 2025 11:45:24 -0700 Subject: [PATCH 1947/2157] riscv: Add norvc after .option arch in runtime const .option arch clobbers .option norvc. Prevent gas from emitting compressed instructions in the runtime const alternative blocks by setting .option norvc after .option arch. This issue starts appearing on gcc 15, which adds zca to the march. Reported by: Klara Modin Signed-off-by: Charlie Jenkins Fixes: a44fb5722199 ("riscv: Add runtime constant support") Closes: https://lore.kernel.org/all/cc8f3525-20b7-445b-877b-2add28a160a2@gmail.com/ Tested-by: Klara Modin Link: https://lore.kernel.org/r/20250331-fix_runtime_const_norvc-v1-1-89bc62687ab8@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/runtime-const.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index c07d049fdd5d..451fd76b8811 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -56,6 +56,7 @@ #define RISCV_RUNTIME_CONST_64_ZBA \ ".option push\n\t" \ ".option arch,+zba\n\t" \ + ".option norvc\n\t" \ "slli %[__tmp],%[__tmp],32\n\t" \ "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ "nop\n\t" \ @@ -65,6 +66,7 @@ #define RISCV_RUNTIME_CONST_64_ZBKB \ ".option push\n\t" \ ".option arch,+zbkb\n\t" \ + ".option norvc\n\t" \ "pack %[__ret],%[__ret],%[__tmp]\n\t" \ "nop\n\t" \ "nop\n\t" \ From 9ac50f7311dc8b39e355582f14c1e82da47a8196 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:45 -0700 Subject: [PATCH 1948/2157] context_tracking: Always inline ct_{nmi,irq}_{enter,exit}() Thanks to CONFIG_DEBUG_SECTION_MISMATCH, empty functions can be generated out of line. These can be called from noinstr code, so make sure they're always inlined. Fixes the following warnings: vmlinux.o: warning: objtool: irqentry_nmi_enter+0xa2: call to ct_nmi_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_nmi_exit+0x16: call to ct_nmi_exit() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_exit+0x78: call to ct_irq_exit() leaves .noinstr.text section Fixes: 6f0e6c1598b1 ("context_tracking: Take IRQ eqs entrypoints over RCU") Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Paul E. McKenney Cc: Linus Torvalds Link: https://lore.kernel.org/r/8509bce3f536bcd4ae7af3a2cf6930d48c5e631a.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/d1eca076-fdde-484a-b33e-70e0d167c36d@infradead.org --- include/linux/context_tracking_irq.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h index c50b5670c4a5..197916ee91a4 100644 --- a/include/linux/context_tracking_irq.h +++ b/include/linux/context_tracking_irq.h @@ -10,12 +10,12 @@ void ct_irq_exit_irqson(void); void ct_nmi_enter(void); void ct_nmi_exit(void); #else -static inline void ct_irq_enter(void) { } -static inline void ct_irq_exit(void) { } +static __always_inline void ct_irq_enter(void) { } +static __always_inline void ct_irq_exit(void) { } static inline void ct_irq_enter_irqson(void) { } static inline void ct_irq_exit_irqson(void) { } -static inline void ct_nmi_enter(void) { } -static inline void ct_nmi_exit(void) { } +static __always_inline void ct_nmi_enter(void) { } +static __always_inline void ct_nmi_exit(void) { } #endif #endif From 6309a5c43b0dc629851f25b2e5ef8beff61d08e5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:46 -0700 Subject: [PATCH 1949/2157] rcu-tasks: Always inline rcu_irq_work_resched() Thanks to CONFIG_DEBUG_SECTION_MISMATCH, empty functions can be generated out of line. rcu_irq_work_resched() can be called from noinstr code, so make sure it's always inlined. Fixes: 564506495ca9 ("rcu/context-tracking: Move deferred nocb resched to context tracking") Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Paul E. McKenney Cc: Linus Torvalds Link: https://lore.kernel.org/r/e84f15f013c07e4c410d972e75620c53b62c1b3e.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/d1eca076-fdde-484a-b33e-70e0d167c36d@infradead.org --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f8159f8a7d73..120536f4c6eb 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -132,7 +132,7 @@ static inline void rcu_sysrq_end(void) { } #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else -static inline void rcu_irq_work_resched(void) { } +static __always_inline void rcu_irq_work_resched(void) { } #endif #ifdef CONFIG_RCU_NOCB_CPU From 7c977393b8277ed319e92e4b598b26598c9d30c0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 31 Mar 2025 21:26:43 -0700 Subject: [PATCH 1950/2157] objtool/loongarch: Add unwind hints in prepare_frametrace() If 'regs' points to a local stack variable, prepare_frametrace() stores all registers to the stack. This confuses objtool as it expects them to be restored from the stack later. The stores don't affect stack tracing, so use unwind hints to hide them from objtool. Fixes the following warnings: arch/loongarch/kernel/traps.o: warning: objtool: show_stack+0xe0: stack state mismatch: reg1[22]=-1+0 reg2[22]=-2-160 arch/loongarch/kernel/traps.o: warning: objtool: show_stack+0xe0: stack state mismatch: reg1[23]=-1+0 reg2[23]=-2-152 Fixes: cb8a2ef0848c ("LoongArch: Add ORC stack unwinder support") Reported-by: kernel test robot Tested-by: Tiezhu Yang Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/270cadd8040dda74db2307f23497bb68e65db98d.1743481539.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202503280703.OARM8SrY-lkp@intel.com/ --- arch/loongarch/include/asm/stacktrace.h | 3 +++ arch/loongarch/include/asm/unwind_hints.h | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h index f23adb15f418..fc8b64773794 100644 --- a/arch/loongarch/include/asm/stacktrace.h +++ b/arch/loongarch/include/asm/stacktrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #include enum stack_type { @@ -43,6 +44,7 @@ int get_stack_info(unsigned long stack, struct task_struct *task, struct stack_i static __always_inline void prepare_frametrace(struct pt_regs *regs) { __asm__ __volatile__( + UNWIND_HINT_SAVE /* Save $ra */ STORE_ONE_REG(1) /* Use $ra to save PC */ @@ -80,6 +82,7 @@ static __always_inline void prepare_frametrace(struct pt_regs *regs) STORE_ONE_REG(29) STORE_ONE_REG(30) STORE_ONE_REG(31) + UNWIND_HINT_RESTORE : "=m" (regs->csr_era) : "r" (regs->regs) : "memory"); diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index a01086ad9dde..2c68bc72736c 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -23,6 +23,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#endif /* __ASSEMBLY__ */ +#else /* !__ASSEMBLY__ */ + +#define UNWIND_HINT_SAVE \ + UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) + +#define UNWIND_HINT_RESTORE \ + UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) + +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ From 7170130e4c72ce0caa0cb42a1627c635cc262821 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 1 Apr 2025 11:07:52 +1100 Subject: [PATCH 1951/2157] x86/mm/init: Handle the special case of device private pages in add_pages(), to not increase max_pfn and trigger dma_addressing_limited() bounce buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Bert Karwatzki reported, the following recent commit causes a performance regression on AMD iGPU and dGPU systems: 7ffb791423c7 ("x86/kaslr: Reduce KASLR entropy on most x86 systems") It exposed a bug with nokaslr and zone device interaction. The root cause of the bug is that, the GPU driver registers a zone device private memory region. When KASLR is disabled or the above commit is applied, the direct_map_physmem_end is set to much higher than 10 TiB typically to the 64TiB address. When zone device private memory is added to the system via add_pages(), it bumps up the max_pfn to the same value. This causes dma_addressing_limited() to return true, since the device cannot address memory all the way up to max_pfn. This caused a regression for games played on the iGPU, as it resulted in the DMA32 zone being used for GPU allocations. Fix this by not bumping up max_pfn on x86 systems, when pgmap is passed into add_pages(). The presence of pgmap is used to determine if device private memory is being added via add_pages(). More details: devm_request_mem_region() and request_free_mem_region() request for device private memory. iomem_resource is passed as the base resource with start and end parameters. iomem_resource's end depends on several factors, including the platform and virtualization. On x86 for example on bare metal, this value is set to boot_cpu_data.x86_phys_bits. boot_cpu_data.x86_phys_bits can change depending on support for MKTME. By default it is set to the same as log2(direct_map_physmem_end) which is 46 to 52 bits depending on the number of levels in the page table. The allocation routines used iomem_resource's end and direct_map_physmem_end to figure out where to allocate the region. [ arch/powerpc is also impacted by this problem, but this patch does not fix the issue for PowerPC. ] Testing: 1. Tested on a virtual machine with test_hmm for zone device inseration 2. A previous version of this patch was tested by Bert, please see: https://lore.kernel.org/lkml/d87680bab997fdc9fb4e638983132af235d9a03a.camel@web.de/ [ mingo: Clarified the comments and the changelog. ] Reported-by: Bert Karwatzki Tested-by: Bert Karwatzki Fixes: 7ffb791423c7 ("x86/kaslr: Reduce KASLR entropy on most x86 systems") Signed-off-by: Balbir Singh Signed-off-by: Ingo Molnar Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Andrew Morton Cc: Christoph Hellwig Cc: Pierre-Eric Pelloux-Prayer Cc: Alex Deucher Cc: Christian König Cc: David Airlie Cc: Simona Vetter Link: https://lore.kernel.org/r/20250401000752.249348-1-balbirs@nvidia.com --- arch/x86/mm/init_64.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 519aa53114fa..821a0b53b21c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -959,9 +959,18 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); - /* update max_pfn, max_low_pfn and high_memory */ - update_end_of_memory_vars(start_pfn << PAGE_SHIFT, - nr_pages << PAGE_SHIFT); + /* + * Special case: add_pages() is called by memremap_pages() for adding device + * private pages. Do not bump up max_pfn in the device private path, + * because max_pfn changes affect dma_addressing_limited(). + * + * dma_addressing_limited() returning true when max_pfn is the device's + * addressable memory can force device drivers to use bounce buffers + * and impact their performance negatively: + */ + if (!params->pgmap) + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); return ret; } From 2b7cbd98495f6ee4cd6422fe77828a19e9edf87f Mon Sep 17 00:00:00 2001 From: Lukas Stockmann Date: Mon, 20 Jan 2025 10:34:49 +0100 Subject: [PATCH 1952/2157] rtc: pcf85063: do a SW reset if POR failed Power-on Reset has a documented issue in PCF85063, refer to its datasheet, section "Software reset": "There is a low probability that some devices will have corruption of the registers after the automatic power-on reset if the device is powered up with a residual VDD level. It is required that the VDD starts at zero volts at power up or upon power cycling to ensure that there is no corruption of the registers. If this is not possible, a reset must be initiated after power-up (i.e. when power is stable) with the software reset command" Trigger SW reset if there is an indication that POR has failed. Link: https://www.nxp.com/docs/en/data-sheet/PCF85063A.pdf Signed-off-by: Lukas Stockmann Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20250120093451.30778-1-alexander.sverdlin@siemens.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 905986c61655..73848f764559 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -35,6 +35,7 @@ #define PCF85063_REG_CTRL1_CAP_SEL BIT(0) #define PCF85063_REG_CTRL1_STOP BIT(5) #define PCF85063_REG_CTRL1_EXT_TEST BIT(7) +#define PCF85063_REG_CTRL1_SWR 0x58 #define PCF85063_REG_CTRL2 0x01 #define PCF85063_CTRL2_AF BIT(6) @@ -589,7 +590,7 @@ static int pcf85063_probe(struct i2c_client *client) i2c_set_clientdata(client, pcf85063); - err = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL1, &tmp); + err = regmap_read(pcf85063->regmap, PCF85063_REG_SC, &tmp); if (err) { dev_err(&client->dev, "RTC chip is not present\n"); return err; @@ -599,6 +600,22 @@ static int pcf85063_probe(struct i2c_client *client) if (IS_ERR(pcf85063->rtc)) return PTR_ERR(pcf85063->rtc); + /* + * If a Power loss is detected, SW reset the device. + * From PCF85063A datasheet: + * There is a low probability that some devices will have corruption + * of the registers after the automatic power-on reset... + */ + if (tmp & PCF85063_REG_SC_OS) { + dev_warn(&client->dev, + "POR issue detected, sending a SW reset\n"); + err = regmap_write(pcf85063->regmap, PCF85063_REG_CTRL1, + PCF85063_REG_CTRL1_SWR); + if (err < 0) + dev_warn(&client->dev, + "SW reset failed, trying to continue\n"); + } + err = pcf85063_load_capacitance(pcf85063, client->dev.of_node, config->force_cap_7000 ? 7000 : 0); if (err < 0) From e255612b5ed9f179abe8196df7c2ba09dd227900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 9 Dec 2024 20:44:23 +0100 Subject: [PATCH 1953/2157] cifs: Add fallback for SMB2 CREATE without FILE_READ_ATTRIBUTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some operations, like WRITE, does not require FILE_READ_ATTRIBUTES access. So when FILE_READ_ATTRIBUTES is not explicitly requested for smb2_open_file() then first try to do SMB2 CREATE with FILE_READ_ATTRIBUTES access (like it was before) and then fallback to SMB2 CREATE without FILE_READ_ATTRIBUTES access (less common case). This change allows to complete WRITE operation to a file when it does not grant FILE_READ_ATTRIBUTES permission and its parent directory does not grant READ_DATA permission (parent directory READ_DATA is implicit grant of child FILE_READ_ATTRIBUTES permission). Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb2file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index d609a20fb98a..2d726e9b950c 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -152,16 +152,25 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 int err_buftype = CIFS_NO_BUFFER; struct cifs_fid *fid = oparms->fid; struct network_resiliency_req nr_ioctl_req; + bool retry_without_read_attributes = false; smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); if (smb2_path == NULL) return -ENOMEM; - oparms->desired_access |= FILE_READ_ATTRIBUTES; + if (!(oparms->desired_access & FILE_READ_ATTRIBUTES)) { + oparms->desired_access |= FILE_READ_ATTRIBUTES; + retry_without_read_attributes = true; + } smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov, &err_buftype); + if (rc == -EACCES && retry_without_read_attributes) { + oparms->desired_access &= ~FILE_READ_ATTRIBUTES; + rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov, + &err_buftype); + } if (rc && data) { struct smb2_hdr *hdr = err_iov.iov_base; From b07687edee99b9e53465fbd7f24406616f67070e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 5 Oct 2024 14:59:21 +0200 Subject: [PATCH 1954/2157] cifs: Improve SMB2+ stat() to work also without FILE_READ_ATTRIBUTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If SMB2_OP_QUERY_INFO (called when POSIX extensions are not used) failed with STATUS_ACCESS_DENIED then it means that caller does not have permission to open the path with FILE_READ_ATTRIBUTES access and therefore cannot issue SMB2_OP_QUERY_INFO command. This will result in the -EACCES error from stat() sycall. There is an alternative way how to query limited information about path but still suitable for stat() syscall. SMB2 OPEN/CREATE operation returns in its successful response subset of query information. So try to open the path without FILE_READ_ATTRIBUTES but with MAXIMUM_ALLOWED access which will grant the maximum possible access to the file and the response will contain required query information for stat() syscall. This will improve smb2_query_path_info() to query also files which do not grant FILE_READ_ATTRIBUTES access to caller. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb2file.c | 3 +- fs/smb/client/smb2glob.h | 1 + fs/smb/client/smb2inode.c | 67 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 2d726e9b950c..5d60410460d2 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -158,7 +158,8 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 if (smb2_path == NULL) return -ENOMEM; - if (!(oparms->desired_access & FILE_READ_ATTRIBUTES)) { + if (!(oparms->desired_access & FILE_READ_ATTRIBUTES) && + !(oparms->desired_access & MAXIMUM_ALLOWED)) { oparms->desired_access |= FILE_READ_ATTRIBUTES; retry_without_read_attributes = true; } diff --git a/fs/smb/client/smb2glob.h b/fs/smb/client/smb2glob.h index 2466e6155136..224495322a05 100644 --- a/fs/smb/client/smb2glob.h +++ b/fs/smb/client/smb2glob.h @@ -38,6 +38,7 @@ enum smb2_compound_ops { SMB2_OP_SET_REPARSE, SMB2_OP_GET_REPARSE, SMB2_OP_QUERY_WSL_EA, + SMB2_OP_OPEN_QUERY, }; /* Used when constructing chained read requests. */ diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index e9fd3e204a6f..57d9bfbadd97 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -176,6 +176,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct kvec *out_iov, int *out_buftype, struct dentry *dentry) { + struct smb2_create_rsp *create_rsp = NULL; struct smb2_query_info_rsp *qi_rsp = NULL; struct smb2_compound_vars *vars = NULL; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -265,7 +266,13 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, num_rqst++; rc = 0; - for (i = 0; i < num_cmds; i++) { + i = 0; + + /* Skip the leading explicit OPEN operation */ + if (num_cmds > 0 && cmds[0] == SMB2_OP_OPEN_QUERY) + i++; + + for (; i < num_cmds; i++) { /* Operation */ switch (cmds[i]) { case SMB2_OP_QUERY_INFO: @@ -640,6 +647,27 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, } tmp_rc = rc; + + if (rc == 0 && num_cmds > 0 && cmds[0] == SMB2_OP_OPEN_QUERY) { + create_rsp = rsp_iov[0].iov_base; + idata = in_iov[0].iov_base; + idata->fi.CreationTime = create_rsp->CreationTime; + idata->fi.LastAccessTime = create_rsp->LastAccessTime; + idata->fi.LastWriteTime = create_rsp->LastWriteTime; + idata->fi.ChangeTime = create_rsp->ChangeTime; + idata->fi.Attributes = create_rsp->FileAttributes; + idata->fi.AllocationSize = create_rsp->AllocationSize; + idata->fi.EndOfFile = create_rsp->EndofFile; + if (le32_to_cpu(idata->fi.NumberOfLinks) == 0) + idata->fi.NumberOfLinks = cpu_to_le32(1); /* dummy value */ + idata->fi.DeletePending = 0; + idata->fi.Directory = !!(le32_to_cpu(create_rsp->FileAttributes) & ATTR_DIRECTORY); + + /* smb2_parse_contexts() fills idata->fi.IndexNumber */ + rc = smb2_parse_contexts(server, &rsp_iov[0], &oparms->fid->epoch, + oparms->fid->lease_key, &oplock, &idata->fi, NULL); + } + for (i = 0; i < num_cmds; i++) { char *buf = rsp_iov[i + i].iov_base; @@ -978,6 +1006,43 @@ int smb2_query_path_info(const unsigned int xid, case 0: rc = parse_create_response(data, cifs_sb, full_path, &out_iov[0]); break; + case -EACCES: + /* + * If SMB2_OP_QUERY_INFO (called when POSIX extensions are not used) failed with + * STATUS_ACCESS_DENIED then it means that caller does not have permission to + * open the path with FILE_READ_ATTRIBUTES access and therefore cannot issue + * SMB2_OP_QUERY_INFO command. + * + * There is an alternative way how to query limited information about path but still + * suitable for stat() syscall. SMB2 OPEN/CREATE operation returns in its successful + * response subset of query information. + * + * So try to open the path without FILE_READ_ATTRIBUTES but with MAXIMUM_ALLOWED + * access which will grant the maximum possible access to the file and the response + * will contain required query information for stat() syscall. + */ + + if (tcon->posix_extensions) + break; + + num_cmds = 1; + cmds[0] = SMB2_OP_OPEN_QUERY; + in_iov[0].iov_base = data; + in_iov[0].iov_len = sizeof(*data); + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, MAXIMUM_ALLOWED, + FILE_OPEN, create_options, ACL_NO_MODE); + free_rsp_iov(out_iov, out_buftype, ARRAY_SIZE(out_iov)); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, + &oparms, in_iov, cmds, num_cmds, + cfile, out_iov, out_buftype, NULL); + + hdr = out_iov[0].iov_base; + if (!hdr || out_buftype[0] == CIFS_NO_BUFFER) + goto out; + + if (!rc) + rc = parse_create_response(data, cifs_sb, full_path, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba From e97aec7889543663202e24ec51e1e2f9cb236472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 22 Dec 2024 17:58:21 +0100 Subject: [PATCH 1955/2157] cifs: Do not add FILE_READ_ATTRIBUTES when using GENERIC_READ/EXECUTE/ALL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Individual bits GENERIC_READ, GENERIC_EXECUTE and GENERIC_ALL have meaning which includes also access right for FILE_READ_ATTRIBUTES. So specifying FILE_READ_ATTRIBUTES bit together with one of those GENERIC (except GENERIC_WRITE) does not do anything. This change prevents calling additional (fallback) code and sending more requests without FILE_READ_ATTRIBUTES when the primary request fails on -EACCES, as it is not needed at all. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb2file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 5d60410460d2..a7f629238830 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -158,7 +158,16 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 if (smb2_path == NULL) return -ENOMEM; + /* + * GENERIC_READ, GENERIC_EXECUTE, GENERIC_ALL and MAXIMUM_ALLOWED + * contains also FILE_READ_ATTRIBUTES access right. So do not append + * FILE_READ_ATTRIBUTES when not needed and prevent calling code path + * for retry_without_read_attributes. + */ if (!(oparms->desired_access & FILE_READ_ATTRIBUTES) && + !(oparms->desired_access & GENERIC_READ) && + !(oparms->desired_access & GENERIC_EXECUTE) && + !(oparms->desired_access & GENERIC_ALL) && !(oparms->desired_access & MAXIMUM_ALLOWED)) { oparms->desired_access |= FILE_READ_ATTRIBUTES; retry_without_read_attributes = true; From 119e90a3a64d9dd48bcf4c2d4fc13ba6bb7d940d Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Tue, 4 Mar 2025 09:14:52 +0100 Subject: [PATCH 1956/2157] rtc: pcf85063: replace dev_err+return with return dev_err_probe Replace the dev_err plus return combo with return dev_err_probe() this actually communicates the error type when it occurs and helps debugging hardware issues. Signed-off-by: Maud Spierings Link: https://lore.kernel.org/r/20250304-rtc_dev_err_probe-v1-1-9dcc042ad17e@gocontroll.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 73848f764559..4fa5c4ecdd5a 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -591,10 +591,8 @@ static int pcf85063_probe(struct i2c_client *client) i2c_set_clientdata(client, pcf85063); err = regmap_read(pcf85063->regmap, PCF85063_REG_SC, &tmp); - if (err) { - dev_err(&client->dev, "RTC chip is not present\n"); - return err; - } + if (err) + return dev_err_probe(&client->dev, err, "RTC chip is not present\n"); pcf85063->rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(pcf85063->rtc)) From c2004b6efb1c891b80bef186771a3668fc25f6b3 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 3 Mar 2025 23:36:37 +0100 Subject: [PATCH 1957/2157] rtc: mt6397: drop unused defines RTC_NUM_YEARS has never been used, the other defines are not used since commit 34bbdc12d04e ("rtc: mt6359: Add RTC hardware range and add support for start-year") Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250303223637.1135362-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/mfd/mt6397/rtc.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 068ae1c0f0e8..27883af44f87 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -60,11 +60,6 @@ #define RTC_PDN2 0x002e #define RTC_PDN2_PWRON_ALARM BIT(4) -#define RTC_MIN_YEAR 1968 -#define RTC_BASE_YEAR 1900 -#define RTC_NUM_YEARS 128 -#define RTC_MIN_YEAR_OFFSET (RTC_MIN_YEAR - RTC_BASE_YEAR) - #define MTK_RTC_POLL_DELAY_US 10 #define MTK_RTC_POLL_TIMEOUT (jiffies_to_usecs(HZ)) From 7b98c1c8e2ab79122f9d00697a6df0ddee6999de Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Sun, 30 Mar 2025 12:39:16 -0300 Subject: [PATCH 1958/2157] platform/x86: thinkpad_acpi: Fix NULL pointer dereferences while probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some subdrivers make use of the global reference tpacpi_pdev during initialization, which is called from the platform driver's probe. However, after the commit 38b9ab80db31 ("platform/x86: thinkpad_acpi: Move subdriver initialization to tpacpi_pdriver's probe.") this variable is only properly initialized *after* probing and this can result in a NULL pointer dereference. In order to fix this without reverting the commit, register the platform bundle in two steps, first create and initialize tpacpi_pdev, then register the driver synchronously with platform_driver_probe(). This way the benefits of commit 38b9ab80db31 are preserved. Additionally, the commit 43fc63a1e8f6 ("platform/x86: thinkpad_acpi: Move HWMON initialization to tpacpi_hwmon_pdriver's probe") introduced a similar problem, however tpacpi_sensors_pdev is only used once inside the probe, so replace the global reference with the one given by the probe. Reported-by: Damian Tometzki Closes: https://lore.kernel.org/r/CAL=B37kdL1orSQZD2A3skDOevRXBzF__cJJgY_GFh9LZO3FMsw@mail.gmail.com/ Fixes: 38b9ab80db31 ("platform/x86: thinkpad_acpi: Move subdriver initialization to tpacpi_pdriver's probe.") Fixes: 43fc63a1e8f6 ("platform/x86: thinkpad_acpi: Move HWMON initialization to tpacpi_hwmon_pdriver's probe") Tested-by: Damian Tometzki Tested-by: Gene C Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250330-thinkpad-fix-v1-1-4906b3fe6b74@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 0384cf311878..a17efb68664c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -367,6 +367,7 @@ static struct { u32 beep_needs_two_args:1; u32 mixer_no_level_control:1; u32 battery_force_primary:1; + u32 platform_drv_registered:1; u32 hotkey_poll_active:1; u32 has_adaptive_kbd:1; u32 kbd_lang:1; @@ -11820,10 +11821,10 @@ static void thinkpad_acpi_module_exit(void) platform_device_unregister(tpacpi_sensors_pdev); } - if (tpacpi_pdev) { + if (tp_features.platform_drv_registered) platform_driver_unregister(&tpacpi_pdriver); + if (tpacpi_pdev) platform_device_unregister(tpacpi_pdev); - } if (proc_dir) remove_proc_entry(TPACPI_PROC_DIR, acpi_root_dir); @@ -11893,9 +11894,8 @@ static int __init tpacpi_pdriver_probe(struct platform_device *pdev) static int __init tpacpi_hwmon_pdriver_probe(struct platform_device *pdev) { - tpacpi_hwmon = devm_hwmon_device_register_with_groups( - &tpacpi_sensors_pdev->dev, TPACPI_NAME, NULL, tpacpi_hwmon_groups); - + tpacpi_hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, TPACPI_NAME, + NULL, tpacpi_hwmon_groups); if (IS_ERR(tpacpi_hwmon)) pr_err("unable to register hwmon device\n"); @@ -11965,16 +11965,24 @@ static int __init thinkpad_acpi_module_init(void) tp_features.quirks = dmi_id->driver_data; /* Device initialization */ - tpacpi_pdev = platform_create_bundle(&tpacpi_pdriver, tpacpi_pdriver_probe, - NULL, 0, NULL, 0); + tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, PLATFORM_DEVID_NONE, + NULL, 0); if (IS_ERR(tpacpi_pdev)) { ret = PTR_ERR(tpacpi_pdev); tpacpi_pdev = NULL; - pr_err("unable to register platform device/driver bundle\n"); + pr_err("unable to register platform device\n"); thinkpad_acpi_module_exit(); return ret; } + ret = platform_driver_probe(&tpacpi_pdriver, tpacpi_pdriver_probe); + if (ret) { + pr_err("unable to register main platform driver\n"); + thinkpad_acpi_module_exit(); + return ret; + } + tp_features.platform_drv_registered = 1; + tpacpi_sensors_pdev = platform_create_bundle(&tpacpi_hwmon_pdriver, tpacpi_hwmon_pdriver_probe, NULL, 0, NULL, 0); From 2b9f84e7dc863afd63357b867cea246aeedda036 Mon Sep 17 00:00:00 2001 From: Eduard Christian Dumitrescu Date: Mon, 24 Mar 2025 11:24:42 -0400 Subject: [PATCH 1959/2157] platform/x86: thinkpad_acpi: disable ACPI fan access for T495* and E560 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T495, T495s, and E560 laptops have the FANG+FANW ACPI methods (therefore fang_handle and fanw_handle are not NULL) but they do not actually work, which results in a "No such device or address" error. The DSDT table code for the FANG+FANW methods doesn't seem to do anything special regarding the fan being secondary. The bug was introduced in commit 57d0557dfa49 ("platform/x86: thinkpad_acpi: Add Thinkpad Edge E531 fan support"), which added a new fan control method via the FANG+FANW ACPI methods. Add a quirk for T495, T495s, and E560 to avoid the FANG+FANW methods. Fan access and control is restored after forcing the legacy non-ACPI fan control method by setting both fang_handle and fanw_handle to NULL. Reported-by: Vlastimil Holer Fixes: 57d0557dfa49 ("platform/x86: thinkpad_acpi: Add Thinkpad Edge E531 fan support") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219643 Cc: stable@vger.kernel.org Tested-by: Alireza Elikahi Reviewed-by: Kurt Borja Signed-off-by: Eduard Christian Dumitrescu Co-developed-by: Seyediman Seyedarab Signed-off-by: Seyediman Seyedarab Link: https://lore.kernel.org/r/20250324152442.106113-1-ImanDevel@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a17efb68664c..5790095c175e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8794,6 +8794,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ #define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ #define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */ +#define TPACPI_FAN_NOACPI 0x0080 /* Don't use ACPI methods even if detected */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8824,6 +8825,9 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */ + TPACPI_Q_LNV3('R', '0', '0', TPACPI_FAN_NOACPI),/* E560 */ + TPACPI_Q_LNV3('R', '1', '2', TPACPI_FAN_NOACPI),/* T495 */ + TPACPI_Q_LNV3('R', '1', '3', TPACPI_FAN_NOACPI),/* T495s */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8875,6 +8879,13 @@ static int __init fan_init(struct ibm_init_struct *iibm) tp_features.fan_ctrl_status_undef = 1; } + if (quirks & TPACPI_FAN_NOACPI) { + /* E560, T495, T495s */ + pr_info("Ignoring buggy ACPI fan access method\n"); + fang_handle = NULL; + fanw_handle = NULL; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; From 9462e74c5c983cce34019bfb27f734552bebe59f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 28 Mar 2025 15:47:49 -0700 Subject: [PATCH 1960/2157] platform/x86: ISST: Correct command storage data length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After resume/online turbo limit ratio (TRL) is restored partially if the admin explicitly changed TRL from user space. A hash table is used to store SST mail box and MSR settings when modified to restore those settings after resume or online. This uses a struct isst_cmd field "data" to store these settings. This is a 64 bit field. But isst_store_new_cmd() is only assigning as u32. This results in truncation of 32 bits. Change the argument to u64 from u32. Fixes: f607874f35cb ("platform/x86: ISST: Restore state on resume") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250328224749.2691272-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index dbcd3087aaa4..31239a93dd71 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -84,7 +84,7 @@ static DECLARE_HASHTABLE(isst_hash, 8); static DEFINE_MUTEX(isst_hash_lock); static int isst_store_new_cmd(int cmd, u32 cpu, int mbox_cmd_type, u32 param, - u32 data) + u64 data) { struct isst_cmd *sst_cmd; From 566d3a52b8f618d22664171633d7106a630f46b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 31 Mar 2025 20:29:47 +0200 Subject: [PATCH 1961/2157] MAINTAINERS: consistently use my dedicated email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I use a dedicated address for kernel development. Unfortunately at some point I used another address and later copied it around to other places. Consistently use the dedicated address everywhere. As the old address does in fact work, an update to mailmap is not necessary. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250331-email-correction-v1-1-4c0e92862202@weissschuh.net Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 8 ++++---- drivers/platform/x86/gigabyte-wmi.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d5dfb9186962..957f70ee85ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5506,12 +5506,12 @@ F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml F: sound/soc/codecs/cros_ec_codec.* CHROMEOS EC CHARGE CONTROL -M: Thomas Weißschuh +M: Thomas Weißschuh S: Maintained F: drivers/power/supply/cros_charge-control.c CHROMEOS EC HARDWARE MONITORING -M: Thomas Weißschuh +M: Thomas Weißschuh L: chrome-platform@lists.linux.dev L: linux-hwmon@vger.kernel.org S: Maintained @@ -5519,7 +5519,7 @@ F: Documentation/hwmon/cros_ec_hwmon.rst F: drivers/hwmon/cros_ec_hwmon.c CHROMEOS EC LED DRIVER -M: Thomas Weißschuh +M: Thomas Weißschuh S: Maintained F: drivers/leds/leds-cros_ec.c @@ -9992,7 +9992,7 @@ F: Documentation/hwmon/gigabyte_waterforce.rst F: drivers/hwmon/gigabyte_waterforce.c GIGABYTE WMI DRIVER -M: Thomas Weißschuh +M: Thomas Weißschuh L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/gigabyte-wmi.c diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index f6ba88baee4d..f42c85607a6b 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Copyright (C) 2021 Thomas Weißschuh + * Copyright (C) 2021 Thomas Weißschuh */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -159,6 +159,6 @@ static struct wmi_driver gigabyte_wmi_driver = { module_wmi_driver(gigabyte_wmi_driver); MODULE_DEVICE_TABLE(wmi, gigabyte_wmi_id_table); -MODULE_AUTHOR("Thomas Weißschuh "); +MODULE_AUTHOR("Thomas Weißschuh "); MODULE_DESCRIPTION("Gigabyte WMI temperature driver"); MODULE_LICENSE("GPL"); From 0cd73ab4df45ed9a78051cfb96a6de48d421852f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 18 Feb 2025 11:15:47 +0100 Subject: [PATCH 1962/2157] selftest: rtc: skip some tests if the alarm only supports minutes There are alarms which have only minute-granularity. The RTC core already has a flag to describe them. Use this flag to skip tests which require the alarm to support seconds. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250218101548.6514-1-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- tools/testing/selftests/rtc/rtctest.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index e103097d0b5b..be175c0e6ae3 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -29,6 +29,7 @@ enum rtc_alarm_state { RTC_ALARM_UNKNOWN, RTC_ALARM_ENABLED, RTC_ALARM_DISABLED, + RTC_ALARM_RES_MINUTE, }; FIXTURE(rtc) { @@ -88,7 +89,7 @@ static void nanosleep_with_retries(long ns) } } -static enum rtc_alarm_state get_rtc_alarm_state(int fd) +static enum rtc_alarm_state get_rtc_alarm_state(int fd, int need_seconds) { struct rtc_param param = { 0 }; int rc; @@ -103,6 +104,10 @@ static enum rtc_alarm_state get_rtc_alarm_state(int fd) if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) return RTC_ALARM_DISABLED; + /* Check if alarm has desired granularity */ + if (need_seconds && (param.uvalue & _BITUL(RTC_FEATURE_ALARM_RES_MINUTE))) + return RTC_ALARM_RES_MINUTE; + return RTC_ALARM_ENABLED; } @@ -227,9 +232,11 @@ TEST_F(rtc, alarm_alm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -295,9 +302,11 @@ TEST_F(rtc, alarm_wkalm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -357,7 +366,7 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); @@ -425,7 +434,7 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); From c28f31deeacda307acfee2f18c0ad904e5123aac Mon Sep 17 00:00:00 2001 From: Angelos Oikonomopoulos Date: Tue, 1 Apr 2025 10:51:50 +0200 Subject: [PATCH 1963/2157] arm64: Don't call NULL in do_compat_alignment_fixup() do_alignment_t32_to_handler() only fixes up alignment faults for specific instructions; it returns NULL otherwise (e.g. LDREX). When that's the case, signal to the caller that it needs to proceed with the regular alignment fault handling (i.e. SIGBUS). Without this patch, the kernel panics: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000006 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000800164aa000 [0000000000000000] pgd=0800081fdbd22003, p4d=0800081fdbd22003, pud=08000815d51c6003, pmd=0000000000000000 Internal error: Oops: 0000000086000006 [#1] SMP Modules linked in: cfg80211 rfkill xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack_netlink nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xfrm_user xfrm_algo xt_addrtype nft_compat br_netfilter veth nvme_fa> libcrc32c crc32c_generic raid0 multipath linear dm_mod dax raid1 md_mod xhci_pci nvme xhci_hcd nvme_core t10_pi usbcore igb crc64_rocksoft crc64 crc_t10dif crct10dif_generic crct10dif_ce crct10dif_common usb_common i2c_algo_bit i2c> CPU: 2 PID: 3932954 Comm: WPEWebProcess Not tainted 6.1.0-31-arm64 #1 Debian 6.1.128-1 Hardware name: GIGABYTE MP32-AR1-00/MP32-AR1-00, BIOS F18v (SCP: 1.08.20211002) 12/01/2021 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : do_compat_alignment_fixup+0xd8/0x3dc sp : ffff80000f973dd0 x29: ffff80000f973dd0 x28: ffff081b42526180 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 x23: 0000000000000004 x22: 0000000000000000 x21: 0000000000000001 x20: 00000000e8551f00 x19: ffff80000f973eb0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffaebc949bc488 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : 0000000000400000 x4 : 0000fffffffffffe x3 : 0000000000000000 x2 : ffff80000f973eb0 x1 : 00000000e8551f00 x0 : 0000000000000001 Call trace: 0x0 do_alignment_fault+0x40/0x50 do_mem_abort+0x4c/0xa0 el0_da+0x48/0xf0 el0t_32_sync_handler+0x110/0x140 el0t_32_sync+0x190/0x194 Code: bad PC value ---[ end trace 0000000000000000 ]--- Signed-off-by: Angelos Oikonomopoulos Fixes: 3fc24ef32d3b ("arm64: compat: Implement misalignment fixups for multiword loads") Cc: # 6.1.x Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250401085150.148313-1-angelos@igalia.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/compat_alignment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c index deff21bfa680..b68e1d328d4c 100644 --- a/arch/arm64/kernel/compat_alignment.c +++ b/arch/arm64/kernel/compat_alignment.c @@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs) return 1; } + if (!handler) + return 1; type = handler(addr, instr, regs); if (type == TYPE_ERROR || type == TYPE_FAULT) From e3e68311ead15d8be61e8e1a8d2f0d1773a7ba9c Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Tue, 1 Apr 2025 10:13:47 +0530 Subject: [PATCH 1964/2157] block: remove unused nseg parameter We are no longer using nr_segs, after blk_mq_attempt_bio_merge was moved out of blk_mq_get_new_request. Signed-off-by: Nitesh Shetty Link: https://lore.kernel.org/r/20250401044348.15588-1-nj.shetty@samsung.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ae8494d88897..0cfd1a149f64 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, @@ -3125,7 +3124,7 @@ void blk_mq_submit_bio(struct bio *bio) if (rq) { blk_mq_use_cached_rq(rq, plug, bio); } else { - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); From 424dfcd441f035769890e6d1faec2081458627b9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 20 Mar 2025 11:33:54 +0100 Subject: [PATCH 1965/2157] rtc: remove 'setdate' test program The tool is not embedded in the testing framework. 'rtc' from rtc-tools serves as a much better programming example. No need to carry this tool in the kernel tree. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250320103433.11673-1-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni --- tools/testing/selftests/rtc/.gitignore | 1 - tools/testing/selftests/rtc/Makefile | 2 - tools/testing/selftests/rtc/setdate.c | 77 -------------------------- 3 files changed, 80 deletions(-) delete mode 100644 tools/testing/selftests/rtc/setdate.c diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore index fb2d533aa575..a2afe7994e85 100644 --- a/tools/testing/selftests/rtc/.gitignore +++ b/tools/testing/selftests/rtc/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only rtctest -setdate diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 9dbb395c5c79..547c244a2ca5 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -4,8 +4,6 @@ LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest -TEST_GEN_PROGS_EXTENDED = setdate - TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c deleted file mode 100644 index b303890b3de2..000000000000 --- a/tools/testing/selftests/rtc/setdate.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Real Time Clock Driver Test - * by: Benjamin Gaignard (benjamin.gaignard@linaro.org) - * - * To build - * gcc rtctest_setdate.c -o rtctest_setdate - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const char default_time[] = "00:00:00"; - -int main(int argc, char **argv) -{ - int fd, retval; - struct rtc_time new, current; - const char *rtc, *date; - const char *time = default_time; - - switch (argc) { - case 4: - time = argv[3]; - /* FALLTHROUGH */ - case 3: - date = argv[2]; - rtc = argv[1]; - break; - default: - fprintf(stderr, "usage: rtctest_setdate [HH:MM:SS]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - if (fd == -1) { - perror(rtc); - exit(errno); - } - - sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year); - new.tm_mon -= 1; - new.tm_year -= 1900; - sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec); - - fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n", - new.tm_mday, new.tm_mon + 1, new.tm_year + 1900, - new.tm_hour, new.tm_min, new.tm_sec); - - /* Write the new date in RTC */ - retval = ioctl(fd, RTC_SET_TIME, &new); - if (retval == -1) { - perror("RTC_SET_TIME ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_RD_TIME, ¤t); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - current.tm_mday, current.tm_mon + 1, current.tm_year + 1900, - current.tm_hour, current.tm_min, current.tm_sec); - - close(fd); - return 0; -} From 3cb2a2f7eebbb0752a834708e720a914e61841a1 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 1 Apr 2025 15:47:47 +0200 Subject: [PATCH 1966/2157] spi: cadence-qspi: revert "Improve spi memory performance" During the v6.14-rc cycles, there has been an issue with syscons which prevented TI chipid controller to probe, itself preventing the only DMA engine on AM62A with the memcpy capability to probe, which is needed for the SPI controller to work in its most efficient configuration. The SPI controller on AM62A can be used in DAC and INDAC mode, which are some kind of direct mapping vs. CPU-controlled SPI operations, respectively. However, because of hardware constraints (some kind of request line not being driven), INDAC mode cannot leverage DMA without risking to underflow the SPI FIFO. This mode costs a lot of CPU cycles. On the other side however, DAC mode can be used with and without DMA support, but in practice, DMA transfers are way more efficient because of the burst capabilities that the CPU does not have. As a result, in terms of read throughput, using a Winbond chip in 1-8-8 SDR mode, we get: - 3.5MiB/s in DAC mode without DMA - 9.0MiB/s in INDAC mode (CPU more busy) - a fluctuating 9 to 12MiB/s in DAC mode with DMA (a constant 14.5MiB/s without CPUfreq) The reason for the patch that is being reverted is that because of the syscon issue, we were using a very un-efficient DAC configuration (no DMA), but since: commit 5728c92ae112 ("mfd: syscon: Restore device_node_to_regmap() for non-syscon nodes") the probing of the DMA controller has been fixed, and the performances are back to normal, so we can safely revert this commit. This is a revert of: commit cce2200dacd6 ("spi: cadence-qspi: Improve spi memory performance") Suggested-by: Vignesh Raghavendra Signed-off-by: Miquel Raynal Link: https://patch.msgid.link/20250401134748.242846-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 559fbdfbd9f7..c90462783b3f 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2073,7 +2073,7 @@ static const struct cqspi_driver_platdata k2g_qspi = { static const struct cqspi_driver_platdata am654_ospi = { .hwcaps_mask = CQSPI_SUPPORTS_OCTAL | CQSPI_SUPPORTS_QUAD, - .quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_NEEDS_WR_DELAY, + .quirks = CQSPI_NEEDS_WR_DELAY, }; static const struct cqspi_driver_platdata intel_lgm_qspi = { From d33d729afcc8ad2148d99f9bc499b33fd0c0d73b Mon Sep 17 00:00:00 2001 From: Anthony Krowiak Date: Tue, 11 Mar 2025 06:32:57 -0400 Subject: [PATCH 1967/2157] s390/vfio-ap: Fix no AP queue sharing allowed message written to kernel log An erroneous message is written to the kernel log when either of the following actions are taken by a user: 1. Assign an adapter or domain to a vfio_ap mediated device via its sysfs assign_adapter or assign_domain attributes that would result in one or more AP queues being assigned that are already assigned to a different mediated device. Sharing of queues between mdevs is not allowed. 2. Reserve an adapter or domain for the host device driver via the AP bus driver's sysfs apmask or aqmask attribute that would result in providing host access to an AP queue that is in use by a vfio_ap mediated device. Reserving a queue for a host driver that is in use by an mdev is not allowed. In both cases, the assignment will return an error; however, a message like the following is written to the kernel log: vfio_ap_mdev e1839397-51a0-4e3c-91e0-c3b9c3d3047d: Userspace may not re-assign queue 00.0028 already assigned to \ e1839397-51a0-4e3c-91e0-c3b9c3d3047d Notice the mdev reporting the error is the same as the mdev identified in the message as the one to which the queue is being assigned. It is perfectly okay to assign a queue to an mdev to which it is already assigned; the assignment is simply ignored by the vfio_ap device driver. This patch logs more descriptive and accurate messages for both 1 and 2 above to the kernel log: Example for 1: vfio_ap_mdev 0fe903a0-a323-44db-9daf-134c68627d61: Userspace may not assign queue 00.0033 to mdev: already assigned to \ 62177883-f1bb-47f0-914d-32a22e3a8804 Example for 2: vfio_ap_mdev 62177883-f1bb-47f0-914d-32a22e3a8804: Can not reserve queue 00.0033 for host driver: in use by mdev Signed-off-by: Anthony Krowiak Link: https://lore.kernel.org/r/20250311103304.1539188-1-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 72 ++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index bc8669b5c304..766557547f83 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -873,48 +873,66 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev) vfio_put_device(&matrix_mdev->vdev); } -#define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \ - "already assigned to %s" +#define MDEV_SHARING_ERR "Userspace may not assign queue %02lx.%04lx to mdev: already assigned to %s" -static void vfio_ap_mdev_log_sharing_err(struct ap_matrix_mdev *matrix_mdev, - unsigned long *apm, - unsigned long *aqm) +#define MDEV_IN_USE_ERR "Can not reserve queue %02lx.%04lx for host driver: in use by mdev" + +static void vfio_ap_mdev_log_sharing_err(struct ap_matrix_mdev *assignee, + struct ap_matrix_mdev *assigned_to, + unsigned long *apm, unsigned long *aqm) { unsigned long apid, apqi; - const struct device *dev = mdev_dev(matrix_mdev->mdev); - const char *mdev_name = dev_name(dev); - for_each_set_bit_inv(apid, apm, AP_DEVICES) + for_each_set_bit_inv(apid, apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + dev_warn(mdev_dev(assignee->mdev), MDEV_SHARING_ERR, + apid, apqi, dev_name(mdev_dev(assigned_to->mdev))); + } + } +} + +static void vfio_ap_mdev_log_in_use_err(struct ap_matrix_mdev *assignee, + unsigned long *apm, unsigned long *aqm) +{ + unsigned long apid, apqi; + + for_each_set_bit_inv(apid, apm, AP_DEVICES) { for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) - dev_warn(dev, MDEV_SHARING_ERR, apid, apqi, mdev_name); + dev_warn(mdev_dev(assignee->mdev), MDEV_IN_USE_ERR, apid, apqi); + } } /** * vfio_ap_mdev_verify_no_sharing - verify APQNs are not shared by matrix mdevs * + * @assignee: the matrix mdev to which @mdev_apm and @mdev_aqm are being + * assigned; or, NULL if this function was called by the AP bus + * driver in_use callback to verify none of the APQNs being reserved + * for the host device driver are in use by a vfio_ap mediated device * @mdev_apm: mask indicating the APIDs of the APQNs to be verified * @mdev_aqm: mask indicating the APQIs of the APQNs to be verified * - * Verifies that each APQN derived from the Cartesian product of a bitmap of - * AP adapter IDs and AP queue indexes is not configured for any matrix - * mediated device. AP queue sharing is not allowed. + * Verifies that each APQN derived from the Cartesian product of APIDs + * represented by the bits set in @mdev_apm and the APQIs of the bits set in + * @mdev_aqm is not assigned to a mediated device other than the mdev to which + * the APQN is being assigned (@assignee). AP queue sharing is not allowed. * * Return: 0 if the APQNs are not shared; otherwise return -EADDRINUSE. */ -static int vfio_ap_mdev_verify_no_sharing(unsigned long *mdev_apm, +static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *assignee, + unsigned long *mdev_apm, unsigned long *mdev_aqm) { - struct ap_matrix_mdev *matrix_mdev; + struct ap_matrix_mdev *assigned_to; DECLARE_BITMAP(apm, AP_DEVICES); DECLARE_BITMAP(aqm, AP_DOMAINS); - list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) { + list_for_each_entry(assigned_to, &matrix_dev->mdev_list, node) { /* - * If the input apm and aqm are fields of the matrix_mdev - * object, then move on to the next matrix_mdev. + * If the mdev to which the mdev_apm and mdev_aqm is being + * assigned is the same as the mdev being verified */ - if (mdev_apm == matrix_mdev->matrix.apm && - mdev_aqm == matrix_mdev->matrix.aqm) + if (assignee == assigned_to) continue; memset(apm, 0, sizeof(apm)); @@ -924,15 +942,16 @@ static int vfio_ap_mdev_verify_no_sharing(unsigned long *mdev_apm, * We work on full longs, as we can only exclude the leftover * bits in non-inverse order. The leftover is all zeros. */ - if (!bitmap_and(apm, mdev_apm, matrix_mdev->matrix.apm, - AP_DEVICES)) + if (!bitmap_and(apm, mdev_apm, assigned_to->matrix.apm, AP_DEVICES)) continue; - if (!bitmap_and(aqm, mdev_aqm, matrix_mdev->matrix.aqm, - AP_DOMAINS)) + if (!bitmap_and(aqm, mdev_aqm, assigned_to->matrix.aqm, AP_DOMAINS)) continue; - vfio_ap_mdev_log_sharing_err(matrix_mdev, apm, aqm); + if (assignee) + vfio_ap_mdev_log_sharing_err(assignee, assigned_to, apm, aqm); + else + vfio_ap_mdev_log_in_use_err(assigned_to, apm, aqm); return -EADDRINUSE; } @@ -961,7 +980,8 @@ static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev) matrix_mdev->matrix.aqm)) return -EADDRNOTAVAIL; - return vfio_ap_mdev_verify_no_sharing(matrix_mdev->matrix.apm, + return vfio_ap_mdev_verify_no_sharing(matrix_mdev, + matrix_mdev->matrix.apm, matrix_mdev->matrix.aqm); } @@ -2516,7 +2536,7 @@ int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm) mutex_lock(&matrix_dev->guests_lock); mutex_lock(&matrix_dev->mdevs_lock); - ret = vfio_ap_mdev_verify_no_sharing(apm, aqm); + ret = vfio_ap_mdev_verify_no_sharing(NULL, apm, aqm); mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&matrix_dev->guests_lock); From ea7789c1541084a3dae65ffd36778348dd98f61b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Mar 2025 17:22:18 +0900 Subject: [PATCH 1968/2157] nvmet: pci-epf: Keep completion queues mapped Instead of mapping and unmapping the completion queues memory to the host PCI address space whenever nvmet_pci_epf_cq_work() is called, map a completion queue to the host PCI address space when the completion queue is created with nvmet_pci_epf_create_cq() and unmap it when the completion queue is deleted with nvmet_pci_epf_delete_cq(). This removes the completion queue mapping/unmapping from nvmet_pci_epf_cq_work() and significantly increases performance. For a single job 4K random read QD=1 workload, the IOPS is increased from 23 KIOPS to 25 KIOPS. Some significant throughput increasde for high queue depth and large IOs workloads can also be seen. Since the functions nvmet_pci_epf_map_queue() and nvmet_pci_epf_unmap_queue() are called respectively only from nvmet_pci_epf_create_cq() and nvmet_pci_epf_delete_cq(), these functions are removed and open-coded in their respective call sites. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 63 ++++++++++++++--------------------- 1 file changed, 25 insertions(+), 38 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index b54b3fdbe389..51c27b32248d 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata; struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; u16 status; + int ret; if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; @@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (status != NVME_SC_SUCCESS) goto err; + /* + * Map the CQ PCI address space and since PCI endpoint controllers may + * return a partial mapping, check that the mapping is large enough. + */ + ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size, + &cq->pci_map); + if (ret) { + dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n", + cq->qid, ret); + goto err_internal; + } + + if (cq->pci_map.pci_size < cq->pci_size) { + dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n", + cq->qid); + goto err_unmap_queue; + } + set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", @@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, return NVME_SC_SUCCESS; +err_unmap_queue: + nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); +err_internal: + status = NVME_SC_INTERNAL | NVME_STATUS_DNR; err: if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); @@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); return NVME_SC_SUCCESS; } @@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl) ctrl->cq = NULL; } -static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl, - struct nvmet_pci_epf_queue *queue) -{ - struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf; - int ret; - - ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr, - queue->pci_size, &queue->pci_map); - if (ret) { - dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n", - queue->qid, ret); - return ret; - } - - if (queue->pci_map.pci_size < queue->pci_size) { - dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n", - queue->qid); - nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map); - return -ENOMEM; - } - - return 0; -} - -static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl, - struct nvmet_pci_epf_queue *queue) -{ - nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map); -} - static void nvmet_pci_epf_exec_iod_work(struct work_struct *work) { struct nvmet_pci_epf_iod *iod = @@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) struct nvme_completion *cqe; struct nvmet_pci_epf_iod *iod; unsigned long flags; - int ret, n = 0; - - ret = nvmet_pci_epf_map_queue(ctrl, cq); - if (ret) - goto again; + int ret = 0, n = 0; while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) { @@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) n++; } - nvmet_pci_epf_unmap_queue(ctrl, cq); - /* * We do not support precise IRQ coalescing time (100ns units as per * NVMe specifications). So if we have posted completion entries without @@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) if (n) nvmet_pci_epf_raise_irq(ctrl, cq, true); -again: if (ret < 0) queue_delayed_work(system_highpri_wq, &cq->work, NVMET_PCI_EPF_CQ_RETRY_INTERVAL); From eada75467fca0b016b9b22212637c07216135c20 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 09:46:45 -0600 Subject: [PATCH 1969/2157] nvme/ioctl: don't warn on vectorized uring_cmd with fixed buffer The vectorized io_uring NVMe passthru opcodes don't yet support fixed buffers. But since userspace can trigger this condition based on the io_uring SQE parameters, it shouldn't cause a kernel warning. Signed-off-by: Caleb Sander Mateos Reviewed-by: Jens Axboe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Fixes: 23fd22e55b76 ("nvme: wire up fixed buffer support for nvme passthrough") Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index ecf136489044..f81748096841 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -142,7 +142,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { + if (flags & NVME_IOCTL_VEC) { ret = -EINVAL; goto out; } From cd683de63e1d7ce3c2bb9c285f7885e96c98af15 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 09:46:46 -0600 Subject: [PATCH 1970/2157] nvme/ioctl: move blk_mq_free_request() out of nvme_map_user_request() The callers of nvme_map_user_request() (nvme_submit_user_cmd() and nvme_uring_cmd_io()) allocate the request, so have them free it if nvme_map_user_request() fails. Signed-off-by: Caleb Sander Mateos Reviewed-by: Jens Axboe Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index f81748096841..809910da8c6e 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -129,10 +129,9 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) { - ret = -EINVAL; - goto out; - } + if (!supports_metadata) + return -EINVAL; + if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); @@ -142,15 +141,14 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (flags & NVME_IOCTL_VEC) { - ret = -EINVAL; - goto out; - } + if (flags & NVME_IOCTL_VEC) + return -EINVAL; + ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd, iou_issue_flags); if (ret < 0) - goto out; + return ret; ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); } else { ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), @@ -159,7 +157,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, } if (ret) - goto out; + return ret; bio = req->bio; if (bdev) @@ -176,8 +174,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, out_unmap: if (bio) blk_rq_unmap_user(bio); -out: - blk_mq_free_request(req); return ret; } @@ -202,7 +198,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, meta_len, NULL, flags, 0); if (ret) - return ret; + goto out_free_req; } bio = req->bio; @@ -218,7 +214,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (effects) nvme_passthru_end(ctrl, ns, effects, cmd, ret); + return ret; +out_free_req: + blk_mq_free_request(req); return ret; } @@ -521,7 +520,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.data_len, nvme_to_user_ptr(d.metadata), d.metadata_len, ioucmd, vec, issue_flags); if (ret) - return ret; + goto out_free_req; } /* to free bio on completion, as req->bio will be null at that time */ @@ -531,6 +530,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, req->end_io = nvme_uring_cmd_end_io; blk_execute_rq_nowait(req, false); return -EIOCBQUEUED; + +out_free_req: + blk_mq_free_request(req); + return ret; } static bool is_ctrl_ioctl(unsigned int cmd) From 38808af53c6e72935d895182d69d63a5149da2ab Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 09:46:47 -0600 Subject: [PATCH 1971/2157] nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io() nvme_map_user_request() is called from both nvme_submit_user_cmd() and nvme_uring_cmd_io(). But the ioucmd branch is only applicable to nvme_uring_cmd_io(). Move it to nvme_uring_cmd_io() and just pass the resulting iov_iter to nvme_map_user_request(). For NVMe passthru operations with fixed buffers, the fixed buffer lookup happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can return -EAGAIN first from nvme_alloc_user_request() if all tags in the tag set are in use. This ordering difference is observable when using UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF to unregister the fixed buffer and the NVMe passthru goes async, the fixed buffer lookup will fail because it happens after the unregister. Userspace should not depend on the order in which io_uring issues SQEs submitted in parallel, but it may try submitting the SQEs together and fall back on a slow path if the fixed buffer lookup fails. To make the fast path more likely, do the import before nvme_alloc_user_request(). Signed-off-by: Caleb Sander Mateos Reviewed-by: Jens Axboe Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 45 +++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 809910da8c6e..ca86d3bf7ea4 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - struct io_uring_cmd *ioucmd, unsigned int flags, - unsigned int iou_issue_flags) + struct iov_iter *iter, unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; @@ -137,24 +136,12 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, "using unchecked metadata buffer\n"); } - if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { - struct iov_iter iter; - - /* fixedbufs is only for non-vectored io */ - if (flags & NVME_IOCTL_VEC) - return -EINVAL; - - ret = io_uring_cmd_import_fixed(ubuffer, bufflen, - rq_data_dir(req), &iter, ioucmd, - iou_issue_flags); - if (ret < 0) - return ret; - ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); - } else { + if (iter) + ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); + else ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 0, rq_data_dir(req)); - } if (ret) return ret; @@ -196,7 +183,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, NULL, flags, 0); + meta_len, NULL, flags); if (ret) goto out_free_req; } @@ -468,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct nvme_uring_data d; struct nvme_command c; + struct iov_iter iter; + struct iov_iter *map_iter = NULL; struct request *req; blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; @@ -503,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.metadata_len = READ_ONCE(cmd->metadata_len); d.timeout_ms = READ_ONCE(cmd->timeout_ms); + if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { + /* fixedbufs is only for non-vectored io */ + if (vec) + return -EINVAL; + + ret = io_uring_cmd_import_fixed(d.addr, d.data_len, + nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd, + issue_flags); + if (ret < 0) + return ret; + + map_iter = &iter; + } + if (issue_flags & IO_URING_F_NONBLOCK) { rq_flags |= REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; @@ -516,9 +519,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; if (d.data_len) { - ret = nvme_map_user_request(req, d.addr, - d.data_len, nvme_to_user_ptr(d.metadata), - d.metadata_len, ioucmd, vec, issue_flags); + ret = nvme_map_user_request(req, d.addr, d.data_len, + nvme_to_user_ptr(d.metadata), d.metadata_len, + map_iter, vec); if (ret) goto out_free_req; } From 47f8cc9e32c37343e18992c16ed2c7a9ac9ccd63 Mon Sep 17 00:00:00 2001 From: John Meneghini Date: Sat, 22 Mar 2025 19:28:48 -0400 Subject: [PATCH 1972/2157] nvme: update the multipath warning in nvme_init_ns_head The new NVME_MULTIPATH_PARAM config option requires updates to the warning message in nvme_init_ns_head(). Signed-off-by: John Meneghini Tested-by: John Meneghini Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 777db89fdaa7..5ffc8f23a174 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) "Found shared namespace %d, but multipathing not supported.\n", info->nsid); dev_warn_once(ctrl->device, - "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n"); + "Shared namespace support requires core_nvme.multipath=Y.\n"); } } From 32c928142c4f2db5f9c11aa1fcc4de49d3f27fcd Mon Sep 17 00:00:00 2001 From: John Meneghini Date: Sat, 22 Mar 2025 19:28:46 -0400 Subject: [PATCH 1973/2157] nvme-multipath: change the NVME_MULTIPATH config option Fix up the NVME_MULTIPATH config description so that it accurately describes what it does. Signed-off-by: John Meneghini Tested-by: John Meneghini Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 10e453b2436e..d47dfa80fb95 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -18,10 +18,15 @@ config NVME_MULTIPATH bool "NVMe multipath support" depends on NVME_CORE help - This option enables support for multipath access to NVMe - subsystems. If this option is enabled only a single - /dev/nvmeXnY device will show up for each NVMe namespace, - even if it is accessible through multiple controllers. + This option controls support for multipath access to NVMe + subsystems. If this option is enabled support for NVMe multipath + access is included in the kernel. If this option is disabled support + for NVMe multipath access is excluded from the kernel. When this + option is disabled each controller/namespace receives its + own /dev/nvmeXnY device entry and NVMe multipath access is + not supported. + + If unsure, say Y. config NVME_VERBOSE_ERRORS bool "NVMe verbose error reporting" From 288ff0d10beb069355036355d5f7612579dc869c Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 31 Mar 2025 18:28:59 +0200 Subject: [PATCH 1974/2157] nvme-pci: skip nvme_write_sq_db on empty rqlist nvme_submit_cmds() should check the rqlist before calling nvme_write_sq_db(); if the list is empty, it must return immediately. Fixes: beadf0088501 ("nvme-pci: reverse request order in nvme_queue_rqs") Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2883d17ee1eb..b178d52eac1b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist) { struct request *req; + if (rq_list_empty(rqlist)) + return; + spin_lock(&nvmeq->sq_lock); while ((req = rq_list_pop(rqlist))) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); From 93d34608fd162f725172e780b1c60cc93a920719 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Tue, 1 Apr 2025 22:25:10 +0800 Subject: [PATCH 1975/2157] ASoC: imx-card: Add NULL check in imx_card_probe() devm_kasprintf() returns NULL when memory allocation fails. Currently, imx_card_probe() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: aa736700f42f ("ASoC: imx-card: Add imx-card machine driver") Signed-off-by: Henry Martin Reviewed-by: Frank Li Link: https://patch.msgid.link/20250401142510.29900-1-bsdhenrymartin@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 905294682996..3686d468506b 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -772,6 +772,8 @@ static int imx_card_probe(struct platform_device *pdev) data->dapm_routes[i].sink = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%d %s", i + 1, "Playback"); + if (!data->dapm_routes[i].sink) + return -ENOMEM; data->dapm_routes[i].source = "CPU-Playback"; } } @@ -789,6 +791,8 @@ static int imx_card_probe(struct platform_device *pdev) data->dapm_routes[i].source = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%d %s", i + 1, "Capture"); + if (!data->dapm_routes[i].source) + return -ENOMEM; data->dapm_routes[i].sink = "CPU-Capture"; } } From 9e4e249018d208678888bdf22f6b652728106528 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:39:08 +0100 Subject: [PATCH 1976/2157] cpufreq: Reference count policy in cpufreq_update_limits() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since acpi_processor_notify() can be called before registering a cpufreq driver or even in cases when a cpufreq driver is not registered at all, cpufreq_update_limits() needs to check if a cpufreq driver is present and prevent it from being unregistered. For this purpose, make it call cpufreq_cpu_get() to obtain a cpufreq policy pointer for the given CPU and reference count the corresponding policy object, if present. Fixes: 5a25e3f7cc53 ("cpufreq: intel_pstate: Driver-specific handling of _PPC updates") Closes: https://lore.kernel.org/linux-acpi/Z-ShAR59cTow0KcR@mail-itl Reported-by: Marek Marczykowski-Górecki Cc: All applicable Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Link: https://patch.msgid.link/1928789.tdWV9SEqCh@rjwysocki.net --- drivers/cpufreq/cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0cf5a320bb5e..3841c9da6cac 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2809,6 +2809,12 @@ EXPORT_SYMBOL(cpufreq_update_policy); */ void cpufreq_update_limits(unsigned int cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy); + + policy = cpufreq_cpu_get(cpu); + if (!policy) + return; + if (cpufreq_driver->update_limits) cpufreq_driver->update_limits(cpu); else From d0ebf4c7eb91fe73981d5250b50e9d22db8fb946 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 25 Dec 2024 17:50:10 +0000 Subject: [PATCH 1977/2157] x86/platform/iosf_mbi: Remove unused iosf_mbi_unregister_pmic_bus_access_notifier() The last use of iosf_mbi_unregister_pmic_bus_access_notifier() was removed in 2017 by: a5266db4d314 ("drm/i915: Acquire PUNIT->PMIC bus for intel_uncore_forcewake_reset()") Remove it. (Note that the '_unlocked' version is still used.) Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Ingo Molnar Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Simona Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/r/20241225175010.91783-1-linux@treblig.org --- arch/x86/include/asm/iosf_mbi.h | 7 ------- arch/x86/platform/intel/iosf_mbi.c | 13 ------------- drivers/gpu/drm/i915/i915_iosf_mbi.h | 6 ------ 3 files changed, 26 deletions(-) diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index af7541c11821..8ace6559d399 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -167,13 +167,6 @@ void iosf_mbi_unblock_punit_i2c_access(void); */ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); -/** - * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier - * - * @nb: notifier_block to unregister - */ -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); - /** * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus * notifier, unlocked diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index c81cea208c2c..40ae94db20d8 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -422,19 +422,6 @@ int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( } EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked); -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) -{ - int ret; - - /* Wait for the bus to go inactive before unregistering */ - iosf_mbi_punit_acquire(); - ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb); - iosf_mbi_punit_release(); - - return ret; -} -EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier); - void iosf_mbi_assert_punit_acquired(void) { WARN_ON(iosf_mbi_pmic_punit_access_count == 0); diff --git a/drivers/gpu/drm/i915/i915_iosf_mbi.h b/drivers/gpu/drm/i915/i915_iosf_mbi.h index 8f81b7603d37..317075d0da4e 100644 --- a/drivers/gpu/drm/i915/i915_iosf_mbi.h +++ b/drivers/gpu/drm/i915/i915_iosf_mbi.h @@ -31,12 +31,6 @@ iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(struct notifier_block *nb) { return 0; } - -static inline -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) -{ - return 0; -} #endif #endif /* __I915_IOSF_MBI_H__ */ From fcfd94d6967a98e88b834c9fd81e73c5f04d83dc Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 1 Apr 2025 12:53:55 -0700 Subject: [PATCH 1978/2157] io_uring/zcrx: return early from io_zcrx_recv_skb if readlen is 0 When readlen is set for a recvzc request, tcp_read_sock() will call io_zcrx_recv_skb() one final time with len == desc->count == 0. This is caused by the !desc->count check happening too late. The offset + 1 != skb->len happens earlier and causes the while loop to continue. Fix this in io_zcrx_recv_skb() instead of tcp_read_sock(). Return early if len is 0 i.e. the read is done. Fixes: 6699ec9a23f8 ("io_uring/zcrx: add a read limit to recvzc requests") Signed-off-by: David Wei Link: https://lore.kernel.org/r/20250401195355.1613813-1-dw@davidwei.uk Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 9c95b5b6ec4e..80d4a6f71d29 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -818,6 +818,14 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, int ret = 0; len = min_t(size_t, len, desc->count); + /* + * __tcp_read_sock() always calls io_zcrx_recv_skb one last time, even + * if desc->count is already 0. This is caused by the if (offset + 1 != + * skb->len) check. Return early in this case to break out of + * __tcp_read_sock(). + */ + if (!len) + return 0; if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT)) return -EAGAIN; From 212120a164d59fd534148d315f13db3d296efb0f Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Tue, 1 Apr 2025 12:58:23 +0100 Subject: [PATCH 1979/2157] Documentation/EDAC: Fix warning document isn't included in any toctree Fix the build (htmldocs) warning: Documentation/edac/index.rst: WARNING: document isn't included in any toctree. Fixes: db99ea5f2c03 ("EDAC: Add support for EDAC device features control") Closes: https://lore.kernel.org/all/20250228185102.15842f8b@canb.auug.org.au/ Reported-by: Stephen Rothwell Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250401115823.573-1-shiju.jose@huawei.com --- Documentation/subsystem-apis.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst index b52ad5b969d4..ff4fe8c936c8 100644 --- a/Documentation/subsystem-apis.rst +++ b/Documentation/subsystem-apis.rst @@ -71,6 +71,7 @@ Other subsystems accounting/index cpu-freq/index + edac/index fpga/index i2c/index iio/index From e5f1e8af9c9e151ecd665f6d2e36fb25fec3b110 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 1 Apr 2025 00:57:27 -0700 Subject: [PATCH 1980/2157] x86/fred: Fix system hang during S4 resume with FRED enabled Upon a wakeup from S4, the restore kernel starts and initializes the FRED MSRs as needed from its perspective. It then loads a hibernation image, including the image kernel, and attempts to load image pages directly into their original page frames used before hibernation unless those frames are currently in use. Once all pages are moved to their original locations, it jumps to a "trampoline" page in the image kernel. At this point, the image kernel takes control, but the FRED MSRs still contain values set by the restore kernel, which may differ from those set by the image kernel before hibernation. Therefore, the image kernel must ensure the FRED MSRs have the same values as before hibernation. Since these values depend only on the location of the kernel text and data, they can be recomputed from scratch. Reported-by: Xi Pardee Reported-by: Todd Brandt Tested-by: Todd Brandt Suggested-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Rafael J. Wysocki Reviewed-by: H. Peter Anvin (Intel) Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250401075728.3626147-1-xin@zytor.com --- arch/x86/power/cpu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 63230ff8cf4f..08e76a5ca155 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -231,6 +232,19 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) */ #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); + + /* + * Reinitialize FRED to ensure the FRED MSRs contain the same values + * as before hibernation. + * + * Note, the setup of FRED RSPs requires access to percpu data + * structures. Therefore, FRED reinitialization can only occur after + * the percpu access pointer (i.e., MSR_GS_BASE) is restored. + */ + if (ctxt->cr4 & X86_CR4_FRED) { + cpu_init_fred_exceptions(); + cpu_init_fred_rsps(); + } #else loadsegment(fs, __KERNEL_PERCPU); #endif From 53c959295bc3cccc1d9eec6711a5fcdd28602fc5 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 1 Apr 2025 14:49:08 -0600 Subject: [PATCH 1981/2157] selftests: ublk: kublk: use ioctl-encoded opcodes There are a couple of places in the kublk selftests ublk server which use the legacy ublk opcodes. These operations fail (with -EOPNOTSUPP) on a kernel compiled without CONFIG_BLKDEV_UBLK_LEGACY_OPCODES set. We could easily require it to be set as a prerequisite for these selftests, but since new applications should not be using the legacy opcodes, use the ioctl-encoded opcodes everywhere in kublk. Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250401-ublk_selftests-v1-1-98129c9bc8bb@purestorage.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 83756f97c26e..d39f166c9dc3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -99,7 +99,7 @@ static int __ublk_ctrl_cmd(struct ublk_dev *dev, static int ublk_ctrl_stop_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { - .cmd_op = UBLK_CMD_STOP_DEV, + .cmd_op = UBLK_U_CMD_STOP_DEV, }; return __ublk_ctrl_cmd(dev, &data); @@ -169,7 +169,7 @@ static int ublk_ctrl_get_params(struct ublk_dev *dev, struct ublk_params *params) { struct ublk_ctrl_cmd_data data = { - .cmd_op = UBLK_CMD_GET_PARAMS, + .cmd_op = UBLK_U_CMD_GET_PARAMS, .flags = CTRL_CMD_HAS_BUF, .addr = (__u64)params, .len = sizeof(*params), From f8554f512b8a1ecaa354fd7b1dd63906759e7335 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 1 Apr 2025 14:49:09 -0600 Subject: [PATCH 1982/2157] selftests: ublk: kublk: fix an error log line When doing io_uring operations using liburing, errno is not used to indicate errors, so the %m format specifier does not provide any relevant information for failed io_uring commands. Fix a log line emitted on get_params failure to translate the error code returned in the cqe->res field instead. Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250401-ublk_selftests-v1-2-98129c9bc8bb@purestorage.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index d39f166c9dc3..91c282bc7674 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -215,7 +215,7 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) ret = ublk_ctrl_get_params(dev, &p); if (ret < 0) { - ublk_err("failed to get params %m\n"); + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); return; } From fe4cdc2c4e248f48de23bc778870fd71e772a274 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 12 Mar 2025 10:51:31 -0400 Subject: [PATCH 1983/2157] mm/userfaultfd: fix release hang over concurrent GUP This patch should fix a possible userfaultfd release() hang during concurrent GUP. This problem was initially reported by Dimitris Siakavaras in July 2023 [1] in a firecracker use case. Firecracker has a separate process handling page faults remotely, and when the process releases the userfaultfd it can race with a concurrent GUP from KVM trying to fault in a guest page during the secondary MMU page fault process. A similar problem was reported recently again by Jinjiang Tu in March 2025 [2], even though the race happened this time with a mlockall() operation, which does GUP in a similar fashion. In 2017, commit 656710a60e36 ("userfaultfd: non-cooperative: closing the uffd without triggering SIGBUS") was trying to fix this issue. AFAIU, that fixes well the fault paths but may not work yet for GUP. In GUP, the issue is NOPAGE will be almost treated the same as "page fault resolved" in faultin_page(), then the GUP will follow page again, seeing page missing, and it'll keep going into a live lock situation as reported. This change makes core mm return RETRY instead of NOPAGE for both the GUP and fault paths, proactively releasing the mmap read lock. This should guarantee the other release thread make progress on taking the write lock and avoid the live lock even for GUP. When at it, rearrange the comments to make sure it's uptodate. [1] https://lore.kernel.org/r/79375b71-db2e-3e66-346b-254c90d915e2@cslab.ece.ntua.gr [2] https://lore.kernel.org/r/20250307072133.3522652-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250312145131.1143062-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Mike Rapoport (IBM) Cc: Axel Rasmussen Cc: Jinjiang Tu Cc: Dimitris Siakavaras Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 97c4d71115d8..d80f94346199 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -395,32 +395,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY)) goto out; - /* - * If it's already released don't get it. This avoids to loop - * in __get_user_pages if userfaultfd_release waits on the - * caller of handle_userfault to release the mmap_lock. - */ - if (unlikely(READ_ONCE(ctx->released))) { - /* - * Don't return VM_FAULT_SIGBUS in this case, so a non - * cooperative manager can close the uffd after the - * last UFFDIO_COPY, without risking to trigger an - * involuntary SIGBUS if the process was starting the - * userfaultfd while the userfaultfd was still armed - * (but after the last UFFDIO_COPY). If the uffd - * wasn't already closed when the userfault reached - * this point, that would normally be solved by - * userfaultfd_must_wait returning 'false'. - * - * If we were to return VM_FAULT_SIGBUS here, the non - * cooperative manager would be instead forced to - * always call UFFDIO_UNREGISTER before it can safely - * close the uffd. - */ - ret = VM_FAULT_NOPAGE; - goto out; - } - /* * Check that we can return VM_FAULT_RETRY. * @@ -457,6 +431,31 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out; + if (unlikely(READ_ONCE(ctx->released))) { + /* + * If a concurrent release is detected, do not return + * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always + * return VM_FAULT_RETRY with lock released proactively. + * + * If we were to return VM_FAULT_SIGBUS here, the non + * cooperative manager would be instead forced to + * always call UFFDIO_UNREGISTER before it can safely + * close the uffd, to avoid involuntary SIGBUS triggered. + * + * If we were to return VM_FAULT_NOPAGE, it would work for + * the fault path, in which the lock will be released + * later. However for GUP, faultin_page() does nothing + * special on NOPAGE, so GUP would spin retrying without + * releasing the mmap read lock, causing possible livelock. + * + * Here only VM_FAULT_RETRY would make sure the mmap lock + * be released immediately, so that the thread concurrently + * releasing the userfault would always make progress. + */ + release_fault_lock(vmf); + goto out; + } + /* take the reference before dropping the mmap_lock */ userfaultfd_ctx_get(ctx); From b98072af60a7ce19214c80f10a7daab924c69182 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 8 Jan 2025 00:48:21 -0700 Subject: [PATCH 1984/2157] mm/hugetlb_vmemmap: fix memory loads ordering Using x86_64 as an example, for a 32KB struct page[] area describing a 2MB hugeTLB, HVO reduces the area to 4KB by the following steps: 1. Split the (r/w vmemmap) PMD mapping the area into 512 (r/w) PTEs; 2. For the 8 PTEs mapping the area, remap PTE 1-7 to the page mapped by PTE 0, and at the same time change the permission from r/w to r/o; 3. Free the pages PTE 1-7 used to map, hence the reduction from 32KB to 4KB. However, the following race can happen due to improperly memory loads ordering: CPU 1 (HVO) CPU 2 (speculative PFN walker) page_ref_freeze() synchronize_rcu() rcu_read_lock() page_is_fake_head() is false vmemmap_remap_pte() XXX: struct page[] becomes r/o page_ref_unfreeze() page_ref_count() is not zero atomic_add_unless(&page->_refcount) XXX: try to modify r/o struct page[] Specifically, page_is_fake_head() must be ordered after page_ref_count() on CPU 2 so that it can only return true for this case, to avoid the later attempt to modify r/o struct page[]. This patch adds the missing memory barrier and makes the tests on page_is_fake_head() and page_ref_count() done in the proper order. Link: https://lkml.kernel.org/r/20250108074822.722696-1-yuzhao@google.com Fixes: bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers") Signed-off-by: Yu Zhao Reported-by: Will Deacon Closes: https://lore.kernel.org/20241128142028.GA3506@willie-the-truck/ Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Acked-by: Will Deacon Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 37 +++++++++++++++++++++++++++++++++++++ include/linux/page_ref.h | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5bd9492a66ee..e6a21b62dcce 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -226,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page } return page; } + +static __always_inline bool page_count_writable(const struct page *page, int u) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return true; + + /* + * The refcount check is ordered before the fake-head check to prevent + * the following race: + * CPU 1 (HVO) CPU 2 (speculative PFN walker) + * + * page_ref_freeze() + * synchronize_rcu() + * rcu_read_lock() + * page_is_fake_head() is false + * vmemmap_remap_pte() + * XXX: struct page[] becomes r/o + * + * page_ref_unfreeze() + * page_ref_count() is not zero + * + * atomic_add_unless(&page->_refcount) + * XXX: try to modify r/o struct page[] + * + * The refcount check also prevents modification attempts to other (r/o) + * tail pages that are not fake heads. + */ + if (atomic_read_acquire(&page->_refcount) == u) + return false; + + return page_fixed_fake_head(page) == page; +} #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } + +static inline bool page_count_writable(const struct page *page, int u) +{ + return true; +} #endif static __always_inline int page_is_fake_head(const struct page *page) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 8c236c651d1d..544150d1d5fd 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (!page_is_fake_head(page) && page_ref_count(page) != u) + if (page_count_writable(page, u)) ret = atomic_add_unless(&page->_refcount, nr, u); rcu_read_unlock(); From a0a9f2180b90c7d5f7c85a77b359856f675cf760 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 22 Jan 2025 14:11:51 +0800 Subject: [PATCH 1985/2157] mm: page_isolation: avoid calling folio_hstate() without hugetlb_lock I found a NULL pointer dereference as followed: BUG: kernel NULL pointer dereference, address: 0000000000000028 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 5 UID: 0 PID: 5964 Comm: sh Kdump: loaded Not tainted 6.13.0-dirty #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1. RIP: 0010:has_unmovable_pages+0x184/0x360 ... Call Trace: set_migratetype_isolate+0xd1/0x180 start_isolate_page_range+0xd2/0x170 alloc_contig_range_noprof+0x101/0x660 alloc_contig_pages_noprof+0x238/0x290 alloc_gigantic_folio.isra.0+0xb6/0x1f0 only_alloc_fresh_hugetlb_folio.isra.0+0xf/0x60 alloc_pool_huge_folio+0x80/0xf0 set_max_huge_pages+0x211/0x490 __nr_hugepages_store_common+0x5f/0xe0 nr_hugepages_store+0x77/0x80 kernfs_fop_write_iter+0x118/0x200 vfs_write+0x23c/0x3f0 ksys_write+0x62/0xe0 do_syscall_64+0x5b/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e As has_unmovable_pages() call folio_hstate() without hugetlb_lock, there is a race to free the HugeTLB page between PageHuge() and folio_hstate(). There is no need to add hugetlb_lock here as the HugeTLB page can be freed in lot of places. So it's enough to unfold folio_hstate() and add a check to avoid NULL pointer dereference for hugepage_migration_supported(). Link: https://lkml.kernel.org/r/20250122061151.578768-1-liushixin2@huawei.com Fixes: 464c7ffbcb16 ("mm/hugetlb: filter out hugetlb pages if HUGEPAGE migration is not supported.") Signed-off-by: Liu Shixin Acked-by: David Hildenbrand Acked-by: Zi Yan Reviewed-by: Oscar Salvador Cc: Johannes Weiner Cc: Kefeng Wang Cc: Kirill A. Shuemov Cc: Muchun Song Cc: Nanyong Sun Signed-off-by: Andrew Morton --- mm/page_isolation.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index a051a29e95ad..b2fc5266e3d2 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -83,7 +83,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e unsigned int skip_pages; if (PageHuge(page)) { - if (!hugepage_migration_supported(folio_hstate(folio))) + struct hstate *h; + + /* + * The huge page may be freed so can not + * use folio_hstate() directly. + */ + h = size_to_hstate(folio_size(folio)); + if (h && !hugepage_migration_supported(h)) return page; } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) { return page; From 1ca77ff1837249701053a7fcbdedabc41f4ae67c Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Wed, 19 Mar 2025 06:00:30 +0000 Subject: [PATCH 1986/2157] mm/hugetlb: move hugetlb_sysctl_init() to the __init section hugetlb_sysctl_init() is only invoked once by an __init function and is merely a wrapper around another __init function so there is not reason to keep it. Fixes the following warning when toning down some GCC inline options: WARNING: modpost: vmlinux: section mismatch in reference: hugetlb_sysctl_init+0x1b (section: .text) -> __register_sysctl_init (section: .init.text) Link: https://lkml.kernel.org/r/20250319060041.2737320-1-marc.herbert@linux.intel.com Signed-off-by: Marc Herbert Reviewed-by: Anshuman Khandual Reviewed-by: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6fccfe6d046c..39f92aad7bd1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5179,7 +5179,7 @@ static const struct ctl_table hugetlb_table[] = { }, }; -static void hugetlb_sysctl_init(void) +static void __init hugetlb_sysctl_init(void) { register_sysctl_init("vm", hugetlb_table); } From c11bcbc0a517acf69282c8225059b2a8ac5fe628 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 26 Feb 2025 18:56:25 +0000 Subject: [PATCH 1987/2157] mm: zswap: fix crypto_free_acomp() deadlock in zswap_cpu_comp_dead() Currently, zswap_cpu_comp_dead() calls crypto_free_acomp() while holding the per-CPU acomp_ctx mutex. crypto_free_acomp() then holds scomp_lock (through crypto_exit_scomp_ops_async()). On the other hand, crypto_alloc_acomp_node() holds the scomp_lock (through crypto_scomp_init_tfm()), and then allocates memory. If the allocation results in reclaim, we may attempt to hold the per-CPU acomp_ctx mutex. The above dependencies can cause an ABBA deadlock. For example in the following scenario: (1) Task A running on CPU #1: crypto_alloc_acomp_node() Holds scomp_lock Enters reclaim Reads per_cpu_ptr(pool->acomp_ctx, 1) (2) Task A is descheduled (3) CPU #1 goes offline zswap_cpu_comp_dead(CPU #1) Holds per_cpu_ptr(pool->acomp_ctx, 1)) Calls crypto_free_acomp() Waits for scomp_lock (4) Task A running on CPU #2: Waits for per_cpu_ptr(pool->acomp_ctx, 1) // Read on CPU #1 DEADLOCK Since there is no requirement to call crypto_free_acomp() with the per-CPU acomp_ctx mutex held in zswap_cpu_comp_dead(), move it after the mutex is unlocked. Also move the acomp_request_free() and kfree() calls for consistency and to avoid any potential sublte locking dependencies in the future. With this, only setting acomp_ctx fields to NULL occurs with the mutex held. This is similar to how zswap_cpu_comp_prepare() only initializes acomp_ctx fields with the mutex held, after performing all allocations before holding the mutex. Opportunistically, move the NULL check on acomp_ctx so that it takes place before the mutex dereference. Link: https://lkml.kernel.org/r/20250226185625.2672936-1-yosry.ahmed@linux.dev Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug") Signed-off-by: Herbert Xu Co-developed-by: Herbert Xu Signed-off-by: Yosry Ahmed Reported-by: syzbot+1a517ccfcbc6a7ab0f82@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67bcea51.050a0220.bbfd1.0096.GAE@google.com/ Acked-by: Herbert Xu Reviewed-by: Chengming Zhou Reviewed-by: Nhat Pham Tested-by: Nhat Pham Cc: David S. Miller Cc: Eric Biggers Cc: Johannes Weiner Cc: Chris Murphy Cc: Signed-off-by: Andrew Morton --- mm/zswap.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 0dcc54eab58b..204fb59da33c 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -883,18 +883,32 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); + struct acomp_req *req; + struct crypto_acomp *acomp; + u8 *buffer; + + if (IS_ERR_OR_NULL(acomp_ctx)) + return 0; mutex_lock(&acomp_ctx->mutex); - if (!IS_ERR_OR_NULL(acomp_ctx)) { - if (!IS_ERR_OR_NULL(acomp_ctx->req)) - acomp_request_free(acomp_ctx->req); - acomp_ctx->req = NULL; - if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) - crypto_free_acomp(acomp_ctx->acomp); - kfree(acomp_ctx->buffer); - } + req = acomp_ctx->req; + acomp = acomp_ctx->acomp; + buffer = acomp_ctx->buffer; + acomp_ctx->req = NULL; + acomp_ctx->acomp = NULL; + acomp_ctx->buffer = NULL; mutex_unlock(&acomp_ctx->mutex); + /* + * Do the actual freeing after releasing the mutex to avoid subtle + * locking dependencies causing deadlocks. + */ + if (!IS_ERR_OR_NULL(req)) + acomp_request_free(req); + if (!IS_ERR_OR_NULL(acomp)) + crypto_free_acomp(acomp); + kfree(buffer); + return 0; } From 36eed5400805b294f1df39b0e3ebc5b7971b3c16 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 30 Mar 2025 17:20:48 +0100 Subject: [PATCH 1988/2157] mm/mremap: do not set vrm->vma NULL immediately prior to checking it This seems rather unwise. If we cannot merge, extend, then we need to recall the original VMA to see if we need to uncharge. If we do need to, do so. Link: https://lkml.kernel.org/r/b2fb6b9c-376d-4e9b-905e-26d847fd3865@lucifer.local Fixes: d5c8aec0542e ("mm/mremap: initial refactor of move_vma()") Signed-off-by: Lorenzo Stoakes Reported-=by: "Lai, Yi" Closes: https://lore.kernel.org/linux-mm/Z+lcvEIHMLiKVR1i@ly-workstation/ Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Harry Yoo Signed-off-by: Andrew Morton --- mm/mremap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 0865387531ed..7db9da609c84 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1561,11 +1561,12 @@ static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) * adjacent to the expanded vma and otherwise * compatible. */ - vma = vrm->vma = vma_merge_extend(&vmi, vma, vrm->delta); + vma = vma_merge_extend(&vmi, vma, vrm->delta); if (!vma) { vrm_uncharge(vrm); return -ENOMEM; } + vrm->vma = vma; vrm_stat_account(vrm, vrm->delta); From 7a95a05f15d570e6087fea59280fe267fe809100 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 22 Mar 2025 19:21:45 -0400 Subject: [PATCH 1989/2157] mm: page_alloc: fix defrag_mode's retry & OOM path Brendan points out that defrag_mode doesn't properly clear ALLOC_NOFRAGMENT on its last-ditch attempt to allocate. But looking closer, the problem is actually more severe: it doesn't actually *check* whether it's already retried, and keeps looping. This means the OOM path is never taken, and the thread can loop indefinitely. This is verified with an intentional OOM test on defrag_mode=1, which results in the machine hanging. After this patch, it triggers the OOM kill reliably and recovers. Clear ALLOC_NOFRAGMENT properly, and only retry once. Link: https://lkml.kernel.org/r/20250401041231.GA2117727@cmpxchg.org Fixes: e3aa7df331bc ("mm: page_alloc: defrag_mode") Signed-off-by: Johannes Weiner Reported-by: Brendan Jackman Signed-off-by: Andrew Morton --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f51aa6051a99..37d111184eee 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4604,8 +4604,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto retry; /* Reclaim/compaction failed to prevent the fallback */ - if (defrag_mode) { - alloc_flags &= ALLOC_NOFRAGMENT; + if (defrag_mode && (alloc_flags & ALLOC_NOFRAGMENT)) { + alloc_flags &= ~ALLOC_NOFRAGMENT; goto retry; } From 7fa46cdfffd29459f9ebf6ed891a4c721db06a33 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Tue, 18 Mar 2025 10:59:26 +0900 Subject: [PATCH 1990/2157] mm/kasan: use SLAB_NO_MERGE flag instead of an empty constructor Use SLAB_NO_MERGE flag to prevent merging instead of providing an empty constructor. Using an empty constructor in this manner is an abuse of slab interface. The SLAB_NO_MERGE flag should be used with caution, but in this case, it is acceptable as the cache is intended solely for debugging purposes. No functional changes intended. Link: https://lkml.kernel.org/r/20250318015926.1629748-1-harry.yoo@oracle.com Signed-off-by: Harry Yoo Reviewed-by: Alexander Potapenko Reviewed-by: Andrey Konovalov Acked-by: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 59d673400085..3ea317837c2d 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1073,14 +1073,11 @@ static void kmem_cache_rcu_uaf(struct kunit *test) kmem_cache_destroy(cache); } -static void empty_cache_ctor(void *object) { } - static void kmem_cache_double_destroy(struct kunit *test) { struct kmem_cache *cache; - /* Provide a constructor to prevent cache merging. */ - cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor); + cache = kmem_cache_create("test_cache", 200, 0, SLAB_NO_MERGE, NULL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); kmem_cache_destroy(cache); KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache)); From 7f29070f4c8599dfe7582415ef162474588fd462 Mon Sep 17 00:00:00 2001 From: Taotao Chen Date: Thu, 20 Mar 2025 18:44:00 +0800 Subject: [PATCH 1991/2157] mm/damon/core: simplify control flow in damon_register_ops() The function logic is not complex, so using goto is unnecessary. Replace it with a straightforward if-else to simplify control flow and improve readability. Link: https://lkml.kernel.org/r/Z9vxcPCw8tDsjKw1@OneApple Signed-off-by: Taotao Chen Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index fc1eba3da419..f0c1676f0599 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -76,14 +76,13 @@ int damon_register_ops(struct damon_operations *ops) if (ops->id >= NR_DAMON_OPS) return -EINVAL; + mutex_lock(&damon_ops_lock); /* Fail for already registered ops */ - if (__damon_is_registered_ops(ops->id)) { + if (__damon_is_registered_ops(ops->id)) err = -EINVAL; - goto out; - } - damon_registered_ops[ops->id] = *ops; -out: + else + damon_registered_ops[ops->id] = *ops; mutex_unlock(&damon_ops_lock); return err; } From bd145bdd26c6845b5403d47b3b094bb5d020c6ef Mon Sep 17 00:00:00 2001 From: Ye Liu Date: Thu, 20 Mar 2025 14:33:46 +0800 Subject: [PATCH 1992/2157] mm/page_alloc: replace flag check with PageHWPoison() in check_new_page_bad() This patch replaces the direct check for the __PG_HWPOISON flag with the PageHWPoison() macro, improving code readability and maintaining consistency with other parts of the memory management code. Link: https://lkml.kernel.org/r/20250320063346.489030-1-ye.liu@linux.dev Signed-off-by: Ye Liu Reviewed-by: Sidhartha Kumar Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 37d111184eee..e892a55b471c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1593,7 +1593,7 @@ static __always_inline void page_del_and_expand(struct zone *zone, static void check_new_page_bad(struct page *page) { - if (unlikely(page->flags & __PG_HWPOISON)) { + if (unlikely(PageHWPoison(page))) { /* Don't complain about hwpoisoned pages */ if (PageBuddy(page)) __ClearPageBuddy(page); From 4a0cb631447fdcfb870a0b56950272cf25c0a6ee Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Mar 2025 20:21:24 -0400 Subject: [PATCH 1993/2157] MAINTAINERS: add peterx as userfaultfd reviewer Add an entry for userfaultfd and make myself a reviewer of it, just in case it helps people manage the cc list. I named it MEMORY USERFAULTFD, could be a bad name, but then it can be together with the MEMORY* entries when everything is in alphabetic order, which is definitely a benefit. The line may not change much on how I'd work with userfaultfd; I think I'll do the same as before.. But maybe it still, more or less, adds some responsibility on top, indeed. [akpm@linux-foundation.org: add include/linux/userfaultfd_k.h, per Mike] [akpm@linux-foundation.org: fix misordering] Link: https://lkml.kernel.org/r/20250322002124.131736-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Suggested-by: Andrew Morton Acked-by: Liam R. Howlett Acked-by: Lorenzo Stoakes Cc: Mike Rapoport Signed-off-by: Andrew Morton --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b1fe0b766cec..3ef706b4b01a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15517,6 +15517,18 @@ F: mm/vma.h F: mm/vma_internal.h F: tools/testing/vma/ +MEMORY USERFAULTFD +M: Andrew Morton +R: Peter Xu +S: Maintained +F: Documentation/admin-guide/mm/userfaultfd.rst +F: fs/userfaultfd.c +F: include/asm-generic/pgtable_uffd.h +F: include/linux/userfaultfd_k.h +F: include/uapi/linux/userfaultfd.h +F: mm/userfaultfd.c +F: tools/testing/selftests/mm/uffd-*.[ch] + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger From 2ebc3b68ac400444d8cc2ec1f4460c37aa7d28da Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 25 Mar 2025 13:49:27 +0200 Subject: [PATCH 1994/2157] mm/mm_init: init holes in the end of the memory map for FLATMEM Patch series "mm: fixes for fallouts from mem_init() cleanup". These are the fixes for fallouts from mem_init() cleanup reported by Nathan Chancellor and kbuild. The details are in the commit messages. This patch (of 2): Kernel test robot reports the following crash on 32-bit system with FLATMEM and DEBUG_VM_PGFLAGS enabled: [ 0.478822][ T0] kernel BUG at include/linux/page-flags.h:536! [ 0.479312][ T0] Oops: invalid opcode: 0000 [#1] PREEMPT SMP [ 0.479768][ T0] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.14.0-rc6-00357-g8268af309d07 #1 [ 0.480470][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 0.481260][ T0] EIP: reserve_bootmem_region (include/linux/page-flags.h:536) [ 0.481683][ T0] Code: 5d c3 01 f1 89 c8 ba e1 38 f4 c3 e8 1e 37 8e fc 0f 0b b8 90 e2 62 c4 e8 e2 05 5e fc 01 f1 89 c8 ba be 85 f7 c3 e8 04 37 8e fc <0f> 0b b8 80 e2 62 c4 e8 c8 05 5e fc 55 89 e5 53 57 56 83 ec 10 89 [ 0.483177][ T0] EAX: 00000000 EBX: c425df50 ECX: 00000000 EDX: 00000000 [ 0.483712][ T0] ESI: 017ffc00 EDI: ffffffff EBP: c425df34 ESP: c425df2c [ 0.484248][ T0] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00210046 [ 0.484846][ T0] CR0: 80050033 CR2: 00000000 CR3: 04b48000 CR4: 00000090 [ 0.485376][ T0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 [ 0.485907][ T0] DR6: fffe0ff0 DR7: 00000400 [ 0.486253][ T0] Call Trace: [ 0.486494][ T0] ? __die_body (arch/x86/kernel/dumpstack.c:478) [ 0.486822][ T0] ? die (arch/x86/kernel/dumpstack.c:?) [ 0.487099][ T0] ? do_trap (arch/x86/kernel/traps.c:? arch/x86/kernel/traps.c:197) [ 0.487409][ T0] ? do_error_trap (arch/x86/kernel/traps.c:217) [ 0.487752][ T0] ? reserve_bootmem_region (include/linux/page-flags.h:536) [ 0.488153][ T0] ? exc_overflow (arch/x86/kernel/traps.c:301) [ 0.488490][ T0] ? handle_invalid_op (arch/x86/kernel/traps.c:254) [ 0.488869][ T0] ? reserve_bootmem_region (include/linux/page-flags.h:536) [ 0.489271][ T0] ? exc_invalid_op (arch/x86/kernel/traps.c:316) [ 0.489619][ T0] ? handle_exception (arch/x86/entry/entry_32.S:1055) [ 0.489996][ T0] ? exc_overflow (arch/x86/kernel/traps.c:301) [ 0.490332][ T0] ? reserve_bootmem_region (include/linux/page-flags.h:536) [ 0.490733][ T0] ? exc_overflow (arch/x86/kernel/traps.c:301) [ 0.491068][ T0] ? reserve_bootmem_region (include/linux/page-flags.h:536) [ 0.491470][ T0] memmap_init_reserved_pages (mm/memblock.c:2203) [ 0.491887][ T0] free_low_memory_core_early (mm/memblock.c:?) [ 0.492302][ T0] memblock_free_all (mm/memblock.c:2272 include/linux/atomic/atomic-arch-fallback.h:546 include/linux/atomic/atomic-long.h:123 include/linux/atomic/atomic-instrumented.h:3261 include/linux/mm.h:67 mm/memblock.c:2273) [ 0.492659][ T0] mem_init (arch/x86/mm/init_32.c:735) [ 0.492952][ T0] mm_core_init (mm/mm_init.c:2730) [ 0.493271][ T0] start_kernel (init/main.c:958) [ 0.493604][ T0] i386_start_kernel (arch/x86/kernel/head32.c:79) [ 0.493969][ T0] startup_32_smp (arch/x86/kernel/head_32.S:292) The crash happens because after commit 8268af309d07 ("arch, mm: set max_mapnr when allocating memory map for FLATMEM") max_mapnr is rounded up to MAX_ORDER_NR_PAGES and the pages in the end of the memory map are passing pfn_valid() check in reserve_bootmem_region(). Make sure that that pages in the end of the memory map are initialized, just like the pages in the end of the last section for SPARSEMEM. Link: https://lkml.kernel.org/r/20250325114928.1791109-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250325114928.1791109-2-rppt@kernel.org Fixes: 8268af309d07 ("arch, mm: set max_mapnr when allocating memory map for FLATMEM") Signed-off-by: Mike Rapoport (Microsoft) Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202503241424.d16223ec-lkp@intel.com Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Nathan Chancellor Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- mm/mm_init.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index a38a1909b407..84f14fa12d0d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -984,19 +984,19 @@ static void __init memmap_init(void) } } -#ifdef CONFIG_SPARSEMEM /* * Initialize the memory map for hole in the range [memory_end, - * section_end]. + * section_end] for SPARSEMEM and in the range [memory_end, memmap_end] + * for FLATMEM. * Append the pages in this hole to the highest zone in the last * node. - * The call to init_unavailable_range() is outside the ifdef to - * silence the compiler warining about zone_id set but not used; - * for FLATMEM it is a nop anyway */ +#ifdef CONFIG_SPARSEMEM end_pfn = round_up(end_pfn, PAGES_PER_SECTION); - if (hole_pfn < end_pfn) +#else + end_pfn = round_up(end_pfn, MAX_ORDER_NR_PAGES); #endif + if (hole_pfn < end_pfn) init_unavailable_range(hole_pfn, end_pfn, zone_id, nid); } From 7790c9c9265eed6d1ae1f03e688b880f409e835d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 25 Mar 2025 13:49:28 +0200 Subject: [PATCH 1995/2157] memblock: don't release high memory to page allocator when HIGHMEM is off Nathan Chancellor reports the following crash on a MIPS system with CONFIG_HIGHMEM=n: Linux version 6.14.0-rc6-00359-g6faea3422e3b (nathan@ax162) (mips-linux-gcc (GCC) 14.2.0, GNU ld (GNU Binutils) 2.42) #1 SMP Fri Mar 21 08:12:02 MST 2025 earlycon: uart8250 at I/O port 0x3f8 (options '38400n8') printk: legacy bootconsole [uart8250] enabled Config serial console: console=ttyS0,38400n8r CPU0 revision is: 00019300 (MIPS 24Kc) FPU revision is: 00739300 MIPS: machine is mti,malta Software DMA cache coherency enabled Initial ramdisk at: 0x8fad0000 (5360128 bytes) OF: reserved mem: Reserved memory: No reserved-memory node in the DT Primary instruction cache 2kB, VIPT, 2-way, linesize 16 bytes. Primary data cache 2kB, 2-way, VIPT, no aliases, linesize 16 bytes Zone ranges: DMA [mem 0x0000000000000000-0x0000000000ffffff] Normal [mem 0x0000000001000000-0x000000001fffffff] Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000000000000-0x000000000fffffff] node 0: [mem 0x0000000090000000-0x000000009fffffff] Initmem setup node 0 [mem 0x0000000000000000-0x000000009fffffff] On node 0, zone Normal: 16384 pages in unavailable ranges random: crng init done percpu: Embedded 3 pages/cpu s18832 r8192 d22128 u49152 Kernel command line: rd_start=0xffffffff8fad0000 rd_size=5360128 console=ttyS0,38400n8r printk: log buffer data + meta data: 32768 + 102400 = 135168 bytes Dentry cache hash table entries: 65536 (order: 4, 262144 bytes, linear) Inode-cache hash table entries: 32768 (order: 3, 131072 bytes, linear) Writing ErrCtl register=00000000 Readback ErrCtl register=00000000 Built 1 zonelists, mobility grouping on. Total pages: 16384 mem auto-init: stack:all(zero), heap alloc:off, heap free:off Unhandled kernel unaligned access[#1]: CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.14.0-rc6-00359-g6faea3422e3b #1 Hardware name: mti,malta $ 0 : 00000000 00000001 81cb0880 00129027 $ 4 : 00000001 0000000a 00000002 00129026 $ 8 : ffffdfff 80101e00 00000002 00000000 $12 : 81c9c224 81c63e68 00000002 00000000 $16 : 805b1e00 00025800 81cb0880 00000002 $20 : 00000000 81c63e64 0000000a 81f10000 $24 : 81c63e64 81c63e60 $28 : 81c60000 81c63de0 00000001 81cc9d20 Hi : 00000000 Lo : 00000000 epc : 814a227c __free_pages_ok+0x144/0x3c0 ra : 81cc9d20 memblock_free_all+0x1d4/0x27c Status: 10000002 KERNEL EXL Cause : 00800410 (ExcCode 04) BadVA : 00129026 PrId : 00019300 (MIPS 24Kc) Modules linked in: Process swapper (pid: 0, threadinfo=(ptrval), task=(ptrval), tls=00000000) Stack : 81f10000 805a9e00 81c80000 00000000 00000002 814aa240 000003ff 00000400 00000000 81f10000 81c9c224 00003b1f 81c80000 81c63e60 81ca0000 81c63e64 81f10000 0000000a 0000001f 81cc9d20 81f10000 81cc96d8 00000000 81c80000 81c9c224 81c63e60 81c63e64 00000000 81f10000 00024000 00028000 00025c00 90000000 a0000000 00000002 00000017 00000000 00000000 81f10000 81f10000 ... Call Trace: [<814a227c>] __free_pages_ok+0x144/0x3c0 [<81cc9d20>] memblock_free_all+0x1d4/0x27c [<81cc6764>] mm_core_init+0x100/0x138 [<81cb4ba4>] start_kernel+0x4a0/0x6e4 Code: 1080ffd5 02003825 2467ffff <8ce30000> 7c630500 1060ffd4 00000000 8ce30000 7c630180 The crash happens because commit 6faea3422e3b ("arch, mm: streamline HIGHMEM freeing") too eagerly frees high memory to the page allocator even when HIGHMEM is disabled. Make sure that when CONFIG_HIGHMEM=n the high memory is not released to the page allocator. Link: https://lore.kernel.org/all/20250323190647.GA1009914@ax162 Link: https://lkml.kernel.org/r/20250325114928.1791109-3-rppt@kernel.org Reported-by: Nathan Chancellor Fixes: 6faea3422e3b ("arch, mm: streamline HIGHMEM freeing") Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Nathan Chancellor Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- mm/memblock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 284154445409..0a53db4d9f7b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2167,6 +2167,9 @@ static unsigned long __init __free_memory_core(phys_addr_t start, unsigned long start_pfn = PFN_UP(start); unsigned long end_pfn = PFN_DOWN(end); + if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) + end_pfn = max_low_pfn; + if (start_pfn >= end_pfn) return 0; From 983e760bcdb6f41ff3cf59e70e32a529537d6ef2 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 27 Mar 2025 19:48:13 +0800 Subject: [PATCH 1996/2157] selftest/mm: va_high_addr_switch: add ppc64 support check Add PPC64 Radix MMU support to the va_high_addr_switch.sh by introducing check_supported_ppc64(). The function verifies: - 5-level paging (PGTABLE_LEVELS >= 5) enable in kernel config - Radix MMU (required for PPC64 5-level translation) - HugePages availability (needed for some tests) If any check fails, the test is skipped (ksft_skip). This ensures compatibility with Power9/Power10 systems running in Radix MMU mode. Avoid failures on 4-level paging system: # mmap(NULL, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(LOW_ADDR, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_HUGETLB) again: 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(-1, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(-1, MAP_HUGETLB) again: 0xffffffffffffffff - FAILED # mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB): 0xffffffffffffffff - FAILED Link: https://lkml.kernel.org/r/20250327114813.25980-1-liwang@redhat.com Signed-off-by: Li Wang Cc: Anshuman Khandual Cc: Dev Jain Cc: Kirill A. Shuemov Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/mm/va_high_addr_switch.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 2c725773cd79..1f92e8caceac 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -41,6 +41,31 @@ check_supported_x86_64() fi } +check_supported_ppc64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + if [[ "${pg_table_levels}" -lt 5 ]]; then + echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + exit $ksft_skip + fi + + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') + if [[ "$mmu_support" != "radix" ]]; then + echo "$0: System does not use Radix MMU, required for 5-level paging" + exit $ksft_skip + fi + + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) + if [[ "${hugepages_total}" -eq 0 ]]; then + echo "$0: HugePages are not enabled, required for some tests" + exit $ksft_skip + fi +} + check_test_requirements() { # The test supports x86_64 and powerpc64. We currently have no useful @@ -50,6 +75,9 @@ check_test_requirements() "x86_64") check_supported_x86_64 ;; + "ppc64le"|"ppc64") + check_supported_ppc64 + ;; *) return 0 ;; From 59aa44d1ee5c73a230ad11a9c3610631ea49982b Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 26 Mar 2025 23:55:38 +0200 Subject: [PATCH 1997/2157] MAINTAINERS: fixup USERFAULTFD entry Patch series "MAINTAINERS: add my isub-entries to MM part." Following discussion at LSF/MM/BPF I'm adding execmem, secretmem and numa memblocks sub-entries for MEMORY MANAGEMENT in MAINTAINERS. This patch (of 4): Change title to "MEMORY MANAGEMENT - USERFAULTFD" and make it sub-topic in memory management and add missing include/linux/userfaultfd_k.h and mailing list Link: https://lkml.kernel.org/r/20250326215541.1809379-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250326215541.1809379-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Michal Hocko Acked-by: Oscar Salvador Acked-by: Vlastimil Babka Cc: Dan Willaims Cc: "Mike Rapoport (IBM)" Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3ef706b4b01a..d143d3f7ca62 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15497,6 +15497,19 @@ F: tools/mm/ F: tools/testing/selftests/mm/ N: include/linux/page[-_]* +MEMORY MANAGEMENT - USERFAULTFD +M: Andrew Morton +R: Peter Xu +L: linux-mm@kvack.org +S: Maintained +F: Documentation/admin-guide/mm/userfaultfd.rst +F: fs/userfaultfd.c +F: include/asm-generic/pgtable_uffd.h +F: include/linux/userfaultfd_k.h +F: include/uapi/linux/userfaultfd.h +F: mm/userfaultfd.c +F: tools/testing/selftests/mm/uffd-*.[ch] + MEMORY MAPPING M: Andrew Morton M: Liam R. Howlett @@ -15517,18 +15530,6 @@ F: mm/vma.h F: mm/vma_internal.h F: tools/testing/vma/ -MEMORY USERFAULTFD -M: Andrew Morton -R: Peter Xu -S: Maintained -F: Documentation/admin-guide/mm/userfaultfd.rst -F: fs/userfaultfd.c -F: include/asm-generic/pgtable_uffd.h -F: include/linux/userfaultfd_k.h -F: include/uapi/linux/userfaultfd.h -F: mm/userfaultfd.c -F: tools/testing/selftests/mm/uffd-*.[ch] - MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger From 8871b533ef995ddf0fe45cdfe08665d3edd84777 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 26 Mar 2025 23:55:39 +0200 Subject: [PATCH 1998/2157] MAINTAINERS: mm: add entry for execmem Link: https://lkml.kernel.org/r/20250326215541.1809379-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Michal Hocko Acked-by: Oscar Salvador Acked-by: Vlastimil Babka Cc: Dan Willaims Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d143d3f7ca62..c6c5b78da2c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15497,6 +15497,14 @@ F: tools/mm/ F: tools/testing/selftests/mm/ N: include/linux/page[-_]* +MEMORY MANAGEMENT - EXECMEM +M: Andrew Morton +M: Mike Rapoport +L: linux-mm@kvack.org +S: Maintained +F: include/linux/execmem.h +F: mm/execmem.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu From 6985850f3e0bfd36e027ea2c0ec08b5deb5b371a Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 26 Mar 2025 23:55:40 +0200 Subject: [PATCH 1999/2157] MAINTAINERS: mm: add entry for numa memblocks and numa emulation Link: https://lkml.kernel.org/r/20250326215541.1809379-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Michal Hocko Acked-by: Oscar Salvador Acked-by: Vlastimil Babka Cc: Dan Willaims Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c6c5b78da2c4..4166bcaa2230 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15505,6 +15505,16 @@ S: Maintained F: include/linux/execmem.h F: mm/execmem.c +MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION +M: Andrew Morton +M: Mike Rapoport +L: linux-mm@kvack.org +S: Maintained +F: include/linux/numa_memblks.h +F: mm/numa.c +F: mm/numa_emulation.c +F: mm/numa_memblks.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu From 38c5ecaaddd09e6e80028b132266e375b22e9b4b Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 26 Mar 2025 23:55:41 +0200 Subject: [PATCH 2000/2157] MAINTAINERS: mm: add entry for secretmem Link: https://lkml.kernel.org/r/20250326215541.1809379-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Michal Hocko Acked-by: Oscar Salvador Acked-by: Vlastimil Babka Cc: Dan Willaims Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4166bcaa2230..193d7e216d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15515,6 +15515,14 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - SECRETMEM +M: Andrew Morton +M: Mike Rapoport +L: linux-mm@kvack.org +S: Maintained +F: include/linux/secretmem.h +F: mm/secretmem.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu From 9342bc134ae73a0b3fddec17075ccf75781a3a70 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Mon, 24 Mar 2025 21:17:50 +0800 Subject: [PATCH 2001/2157] mm/memory_hotplug: fix call folio_test_large with tail page in do_migrate_range We triggered the below BUG: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x2 pfn:0x240402 head: order:9 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x1ffffe0000000040(head|node=1|zone=3|lastcpupid=0x1ffff) page_type: f4(hugetlb) page dumped because: VM_BUG_ON_PAGE(page->compound_head & 1) ------------[ cut here ]------------ kernel BUG at ./include/linux/page-flags.h:310! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 7 UID: 0 PID: 166 Comm: sh Not tainted 6.14.0-rc7-dirty #374 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : const_folio_flags+0x3c/0x58 lr : const_folio_flags+0x3c/0x58 Call trace: const_folio_flags+0x3c/0x58 (P) do_migrate_range+0x164/0x720 offline_pages+0x63c/0x6fc memory_subsys_offline+0x190/0x1f4 device_offline+0xc0/0x13c state_store+0x90/0xd8 dev_attr_store+0x18/0x2c sysfs_kf_write+0x44/0x54 kernfs_fop_write_iter+0x120/0x1cc vfs_write+0x240/0x378 ksys_write+0x70/0x108 __arm64_sys_write+0x1c/0x28 invoke_syscall+0x48/0x10c el0_svc_common.constprop.0+0x40/0xe0 When allocating a hugetlb folio, between the folio is taken from buddy and prep_compound_page() is called, start_isolate_page_range() and do_migrate_range() is called. When do_migrate_range() scans the head page of the hugetlb folio, the compound_head field isn't set, so scans the tail page next. And at this time, the compound_head field of tail page is set, folio_test_large() is called by tail page, thus triggers VM_BUG_ON(). To fix it, get folio refcount before calling folio_test_large(). Link: https://lkml.kernel.org/r/20250324131750.1551884-1-tujinjiang@huawei.com Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration") Fixes: b62b51d2d159 ("mm: memory_hotplug: remove head variable in do_migrate_range()") Signed-off-by: Jinjiang Tu Acked-by: Oscar Salvador Acked-by: David Hildenbrand Cc: Kefeng Wang Cc: Nanyong Sun Cc: Naoya Horiguchi Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 75401866fb76..8305483de38b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1813,21 +1813,15 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) page = pfn_to_page(pfn); folio = page_folio(page); - /* - * No reference or lock is held on the folio, so it might - * be modified concurrently (e.g. split). As such, - * folio_nr_pages() may read garbage. This is fine as the outer - * loop will revisit the split folio later. - */ - if (folio_test_large(folio)) - pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; - if (!folio_try_get(folio)) continue; if (unlikely(page_folio(page) != folio)) goto put_folio; + if (folio_test_large(folio)) + pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; + if (folio_contain_hwpoisoned_page(folio)) { if (WARN_ON(folio_test_lru(folio))) folio_isolate_lru(folio); From 1b3d3e9f4a32b06ca73b084aa526a352318c74ac Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 28 Mar 2025 01:01:36 +0000 Subject: [PATCH 2002/2157] microblaze/mm: put mm_cmdline_setup() in .init.text section As reported by lkp, there is a section mismatch of mm_cmdline_setup() and memblock. The reason is we don't specify the section of mm_cmdline_setup() and gcc put it into .text.unlikely. As mm_cmdline_setup() is only used in mmu_init(), which is in .init.text section, put mm_cmdline_setup() into it too. Link: https://lkml.kernel.org/r/20250328010136.13139-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503241259.kJV3U7Xj-lkp@intel.com/ Reviewed-by: Oscar Salvador Cc: Masahiro Yamada Cc: Michal Simek Cc: Wei Yang Signed-off-by: Andrew Morton --- arch/microblaze/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 65f0d1fb8a2a..31d475cdb1c5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -118,7 +118,7 @@ int page_is_ram(unsigned long pfn) /* * Check for command-line options that affect what MMU_init will do. */ -static void mm_cmdline_setup(void) +static void __init mm_cmdline_setup(void) { unsigned long maxmem = 0; char *p = cmd_line; From f21bb37afbba0878c8d417cd861e43d014119845 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:51 +0800 Subject: [PATCH 2003/2157] mm: pgtable: make generic tlb_remove_table() use struct ptdesc Patch series "remove tlb_remove_page_ptdesc()", v2. As suggested by Peter Zijlstra below [1], this series aims to remove tlb_remove_page_ptdesc(). : Fundamentally tlb_remove_page() is about removing *pages* as from a PTE, : there should not be a page-table anywhere near here *ever*. : : Yes, some architectures use tlb_remove_page() for page-tables too, but : that is more or less an implementation detail that can be fixed. After this series, all architectures use tlb_remove_table() or tlb_remove_ptdesc() to remove the page table pages. In the future, once all architectures using tlb_remove_table() have also converted to using struct ptdesc (eg. powerpc), it may be possible to use only tlb_remove_ptdesc(). [1] https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/ This patch (of 6): Now only arm will call tlb_remove_ptdesc()/tlb_remove_table() when CONFIG_MMU_GATHER_TABLE_FREE is disabled. In this case, the type of the table parameter is actually struct ptdesc * instead of struct page *. Since struct ptdesc still overlaps with struct page and has not been separated from it, forcing the table parameter to struct page * will not cause any problems at this time. But this is definitely incorrect and needs to be fixed. So just like the generic __tlb_remove_table(), let generic tlb_remove_table() use struct ptdesc by default when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Link: https://lkml.kernel.org/r/cover.1740454179.git.zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/5be8c3ab7bd68510bf0db4cf84010f4dfe372917.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Kevin Brodsky Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index d1adfba8387e..e27d24dd8d5e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -227,10 +227,10 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); */ static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { - struct page *page = (struct page *)table; + struct ptdesc *ptdesc = (struct ptdesc *)table; - pagetable_dtor(page_ptdesc(page)); - tlb_remove_page(tlb, page); + pagetable_dtor(ptdesc); + tlb_remove_page(tlb, ptdesc_page(ptdesc)); } #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ From 1a03c275a3ad4e47d479a63037b72f2b305f0c13 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:52 +0800 Subject: [PATCH 2004/2157] mm: pgtable: change pt parameter of tlb_remove_ptdesc() to struct ptdesc* All callers of tlb_remove_ptdesc() pass it a pointer of struct ptdesc, so let's change the pt parameter from void * to struct ptdesc * to perform a type safety check. Link: https://lkml.kernel.org/r/60bb44299cf2d731df6592e446e7f694054d0dbe.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Originally-by: Peter Zijlstra (Intel) Reviewed-by: Kevin Brodsky Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e27d24dd8d5e..bf845019af36 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -493,7 +493,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } -static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { tlb_remove_table(tlb, pt); } From e3ecf7c7d0829a00a4fb02531338ab9e7e75ea0d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:53 +0800 Subject: [PATCH 2005/2157] mm: pgtable: convert some architectures to use tlb_remove_ptdesc() Now, the nine architectures of csky, hexagon, loongarch, m68k, mips, nios2, openrisc, sh and um do not select CONFIG_MMU_GATHER_RCU_TABLE_FREE, and just call pagetable_dtor() + tlb_remove_page_ptdesc() (the wrapper of tlb_remove_page()). This is the same as the implementation of tlb_remove_{ptdesc|table}() under !CONFIG_MMU_GATHER_TABLE_FREE, so convert these architectures to use tlb_remove_ptdesc(). The ultimate goal is to make the architecture only use tlb_remove_ptdesc() or tlb_remove_table() for page table pages. [zhengqi.arch@bytedance.com: v2] Link: https://lkml.kernel.org/r/20250303072603.45423-1-zhengqi.arch@bytedance.com [akpm@linux-foundation.org: remove trailing semi in arch/loongarch/include/asm/pgalloc.h] Link: https://lkml.kernel.org/r/19db3e8673b67bad2f1df1ab37f1c89d99eacfea.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Suggested-by: Peter Zijlstra (Intel) Reviewed-by: Kevin Brodsky Acked-by: Geert Uytterhoeven [m68k] Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- arch/csky/include/asm/pgalloc.h | 7 ++----- arch/hexagon/include/asm/pgalloc.h | 7 ++----- arch/loongarch/include/asm/pgalloc.h | 7 ++----- arch/m68k/include/asm/sun3_pgalloc.h | 7 ++----- arch/mips/include/asm/pgalloc.h | 7 ++----- arch/nios2/include/asm/pgalloc.h | 7 ++----- arch/openrisc/include/asm/pgalloc.h | 7 ++----- arch/sh/include/asm/pgalloc.h | 7 ++----- arch/um/include/asm/pgalloc.h | 21 ++++++--------------- 9 files changed, 22 insertions(+), 55 deletions(-) diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index bf8400c28b5a..11055c574968 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -61,11 +61,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) extern void pagetable_init(void); extern void mmu_init(unsigned long min_pfn, unsigned long max_pfn); diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 1ee5f5f157ca..937a11ef4c33 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -87,10 +87,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, max_kernel_seg = pmdindex; } -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor((page_ptdesc(pte))); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 7211dff8c969..b58f587f0f0a 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -55,11 +55,8 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) return pte; } -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 80afc3a18724..1e21c758b774 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -17,11 +17,8 @@ extern const char bad_pmd_string[]; -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 26c7a6ede983..bbca420c96d3 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -48,11 +48,8 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(void *addr); extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 12a536b7bfbd..db122b093a8b 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -28,10 +28,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ - do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ - } while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif /* _ASM_NIOS2_PGALLOC_H */ diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 3372f4e6ab4b..3f110931d8f6 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -64,10 +64,7 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm) extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 96d938fdf224..6fe7123d38fa 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -32,10 +32,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif /* __ASM_SH_PGALLOC_H */ diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index f0af23c3aeb2..826ec44b58cd 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -25,27 +25,18 @@ */ extern pgd_t *pgd_alloc(struct mm_struct *); -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #if CONFIG_PGTABLE_LEVELS > 2 -#define __pmd_free_tlb(tlb, pmd, address) \ -do { \ - pagetable_dtor(virt_to_ptdesc(pmd)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ -} while (0) +#define __pmd_free_tlb(tlb, pmd, address) \ + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pmd)) #if CONFIG_PGTABLE_LEVELS > 3 -#define __pud_free_tlb(tlb, pud, address) \ -do { \ - pagetable_dtor(virt_to_ptdesc(pud)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ -} while (0) +#define __pud_free_tlb(tlb, pud, address) \ + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pud)) #endif #endif From 4239c198e8410b2b5a2b638daf8777e6407d9fc7 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:54 +0800 Subject: [PATCH 2006/2157] riscv: pgtable: unconditionally use tlb_remove_ptdesc() To support fast gup, the commit 69be3fb111e7 ("riscv: enable MMU_GATHER_RCU_TABLE_FREE for SMP && MMU") did the following: 1) use tlb_remove_page_ptdesc() for those platforms which use IPI to perform TLB shootdown 2) use tlb_remove_ptdesc() for those platforms which use SBI to perform TLB shootdown The tlb_remove_page_ptdesc() is the wrapper of the tlb_remove_page(). By design, the tlb_remove_page() should be used to remove a normal page from a page table entry, and should not be used for page table pages. The tlb_remove_ptdesc() is the wrapper of the tlb_remove_table(), which is designed specifically for freeing page table pages. If the CONFIG_MMU_GATHER_TABLE_FREE is enabled, the tlb_remove_table() will use semi RCU to free page table pages, that is: - batch table freeing: asynchronous free by RCU - single table freeing: IPI + synchronous free If the CONFIG_MMU_GATHER_TABLE_FREE is disabled, the tlb_remove_table() will fall back to pagetable_dtor() + tlb_remove_page(). For case 1), since we need to perform TLB shootdown before freeing the page table page, the local_irq_save() in fast gup can block the freeing and protect the fast gup page walker. Therefore we can ensure safety by just using tlb_remove_page_ptdesc(). In addition, we can also the tlb_remove_ptdesc()/tlb_remove_table() to achieve it, and it doesn't matter whether CONFIG_MMU_GATHER_RCU_TABLE_FREE is selected. And in theory, the performance of freeing pages asynchronously via RCU will not be lower than synchronous free. For case 2), since local_irq_save() only disable S-privilege IPI irq but not M-privilege's, which is used by the SBI implementation to perform TLB shootdown, so we must select CONFIG_MMU_GATHER_RCU_TABLE_FREE and use tlb_remove_ptdesc() to ensure safety. The riscv selects this config for SMP && MMU, the CONFIG_RISCV_SBI is dependent on MMU. Therefore, only the UP system may have the situation where CONFIG_MMU_GATHER_RCU_TABLE_FREE is disabled but CONFIG_RISCV_SBI is enabled. But there is no freeing vs fast gup race in the UP system. So, in summary, we can use tlb_remove_ptdesc() to support fast gup in all cases, and this interface is specifically designed for page table pages. So let's use it unconditionally. Link: https://lkml.kernel.org/r/9025595e895515515c95e48db54b29afa489c41d.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Suggested-by: Peter Zijlstra (Intel) Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Kevin Brodsky Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- arch/riscv/include/asm/pgalloc.h | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 3e2aebea6312..770ce18a7328 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,24 +15,6 @@ #define __HAVE_ARCH_PUD_FREE #include -/* - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. - */ -static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) -{ - if (riscv_use_sbi_for_rfence()) { - tlb_remove_ptdesc(tlb, pt); - } else { - pagetable_dtor(pt); - tlb_remove_page_ptdesc(tlb, pt); - } -} - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -108,14 +90,14 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long addr) { if (pgtable_l4_enabled) - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { if (pgtable_l5_enabled) - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -143,7 +125,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -151,7 +133,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #endif /* CONFIG_MMU */ From f1fdec956f63f7cafc1706e5d907bc9c4d241083 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:55 +0800 Subject: [PATCH 2007/2157] x86: pgtable: convert to use tlb_remove_ptdesc() The x86 has already been converted to use struct ptdesc, so convert it to use tlb_remove_ptdesc() instead of tlb_remove_table(). Link: https://lkml.kernel.org/r/36ad56b7e06fa4b17fb23c4fc650e8e0d72bb3cd.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Kevin Brodsky Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- arch/x86/mm/pgtable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index cec321fb74f2..a05fcddfc811 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -20,7 +20,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { paravirt_release_pte(page_to_pfn(pte)); - tlb_remove_table(tlb, page_ptdesc(pte)); + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -34,21 +34,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - tlb_remove_table(tlb, virt_to_ptdesc(pmd)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - tlb_remove_table(tlb, virt_to_ptdesc(pud)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); - tlb_remove_table(tlb, virt_to_ptdesc(p4d)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ From 02d9e1a2048e47d39733f0ced71ce8e8fee3e56d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Tue, 25 Feb 2025 11:45:56 +0800 Subject: [PATCH 2008/2157] mm: pgtable: remove tlb_remove_page_ptdesc() The tlb_remove_ptdesc()/tlb_remove_table() is specially designed for page table pages, and now all architectures have been converted to use it to remove page table pages. So let's remove tlb_remove_page_ptdesc(), it currently has no users and should not be used for page table pages. Link: https://lkml.kernel.org/r/3df04c8494339073b71be4acb2d92e108ecd1b60.1740454179.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Suggested-by: Peter Zijlstra (Intel) Reviewed-by: Kevin Brodsky Cc: Alexandre Ghiti Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Hildenbrand Cc: Hugh Dickens Cc: Jann Horn Cc: Matthew Wilcow (Oracle) Cc: "Mike Rapoport (IBM)" Cc: Muchun Song Cc: Nicholas Piggin Cc: Rik van Riel Cc: Vishal Moola (Oracle) Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index bf845019af36..88a42973fa47 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -498,12 +498,6 @@ static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) tlb_remove_table(tlb, pt); } -/* Like tlb_remove_ptdesc, but for page-like page directories. */ -static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) -{ - tlb_remove_page(tlb, ptdesc_page(pt)); -} - static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { From 5796d3967c0956734bd1249f76989ca80da0225b Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:05 +0000 Subject: [PATCH 2009/2157] mseal sysmap: kernel config and header change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mseal system mappings", v9. As discussed during mseal() upstream process [1], mseal() protects the VMAs of a given virtual memory range against modifications, such as the read/write (RW) and no-execute (NX) bits. For complete descriptions of memory sealing, please see mseal.rst [2]. The mseal() is useful to mitigate memory corruption issues where a corrupted pointer is passed to a memory management system. For example, such an attacker primitive can break control-flow integrity guarantees since read-only memory that is supposed to be trusted can become writable or .text pages can get remapped. The system mappings are readonly only, memory sealing can protect them from ever changing to writable or unmmap/remapped as different attributes. System mappings such as vdso, vvar, vvar_vclock, vectors (arm compat-mode), sigpage (arm compat-mode), are created by the kernel during program initialization, and could be sealed after creation. Unlike the aforementioned mappings, the uprobe mapping is not established during program startup. However, its lifetime is the same as the process's lifetime [3]. It could be sealed from creation. The vsyscall on x86-64 uses a special address (0xffffffffff600000), which is outside the mm managed range. This means mprotect, munmap, and mremap won't work on the vsyscall. Since sealing doesn't enhance the vsyscall's security, it is skipped in this patch. If we ever seal the vsyscall, it is probably only for decorative purpose, i.e. showing the 'sl' flag in the /proc/pid/smaps. For this patch, it is ignored. It is important to note that the CHECKPOINT_RESTORE feature (CRIU) may alter the system mappings during restore operations. UML(User Mode Linux) and gVisor, rr are also known to change the vdso/vvar mappings. Consequently, this feature cannot be universally enabled across all systems. As such, CONFIG_MSEAL_SYSTEM_MAPPINGS is disabled by default. To support mseal of system mappings, architectures must define CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS and update their special mappings calls to pass mseal flag. Additionally, architectures must confirm they do not unmap/remap system mappings during the process lifetime. The existence of this flag for an architecture implies that it does not require the remapping of thest system mappings during process lifetime, so sealing these mappings is safe from a kernel perspective. This version covers x86-64 and arm64 archiecture as minimum viable feature. While no specific CPU hardware features are required for enable this feature on an archiecture, memory sealing requires a 64-bit kernel. Other architectures can choose whether or not to adopt this feature. Currently, I'm not aware of any instances in the kernel code that actively munmap/mremap a system mapping without a request from userspace. The PPC does call munmap when _install_special_mapping fails for vdso; however, it's uncertain if this will ever fail for PPC - this needs to be investigated by PPC in the future [4]. The UML kernel can add this support when KUnit tests require it [5]. In this version, we've improved the handling of system mapping sealing from previous versions, instead of modifying the _install_special_mapping function itself, which would affect all architectures, we now call _install_special_mapping with a sealing flag only within the specific architecture that requires it. This targeted approach offers two key advantages: 1) It limits the code change's impact to the necessary architectures, and 2) It aligns with the software architecture by keeping the core memory management within the mm layer, while delegating the decision of sealing system mappings to the individual architecture, which is particularly relevant since 32-bit architectures never require sealing. Prior to this patch series, we explored sealing special mappings from userspace using glibc's dynamic linker. This approach revealed several issues: - The PT_LOAD header may report an incorrect length for vdso, (smaller than its actual size). The dynamic linker, which relies on PT_LOAD information to determine mapping size, would then split and partially seal the vdso mapping. Since each architecture has its own vdso/vvar code, fixing this in the kernel would require going through each archiecture. Our initial goal was to enable sealing readonly mappings, e.g. .text, across all architectures, sealing vdso from kernel since creation appears to be simpler than sealing vdso at glibc. - The [vvar] mapping header only contains address information, not length information. Similar issues might exist for other special mappings. - Mappings like uprobe are not covered by the dynamic linker, and there is no effective solution for them. This feature's security enhancements will benefit ChromeOS, Android, and other high security systems. Testing: This feature was tested on ChromeOS and Android for both x86-64 and ARM64. - Enable sealing and verify vdso/vvar, sigpage, vector are sealed properly, i.e. "sl" shown in the smaps for those mappings, and mremap is blocked. - Passing various automation tests (e.g. pre-checkin) on ChromeOS and Android to ensure the sealing doesn't affect the functionality of Chromebook and Android phone. I also tested the feature on Ubuntu on x86-64: - With config disabled, vdso/vvar is not sealed, - with config enabled, vdso/vvar is sealed, and booting up Ubuntu is OK, normal operations such as browsing the web, open/edit doc are OK. Link: https://lore.kernel.org/all/20240415163527.626541-1-jeffxu@chromium.org/ [1] Link: Documentation/userspace-api/mseal.rst [2] Link: https://lore.kernel.org/all/CABi2SkU9BRUnqf70-nksuMCQ+yyiWjo3fM4XkRkL-NrCZxYAyg@mail.gmail.com/ [3] Link: https://lore.kernel.org/all/CABi2SkV6JJwJeviDLsq9N4ONvQ=EFANsiWkgiEOjyT9TQSt+HA@mail.gmail.com/ [4] Link: https://lore.kernel.org/all/202502251035.239B85A93@keescook/ [5] This patch (of 7): Provide infrastructure to mseal system mappings. Establish two kernel configs (CONFIG_MSEAL_SYSTEM_MAPPINGS, ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS) and VM_SEALED_SYSMAP macro for future patches. Link: https://lkml.kernel.org/r/20250305021711.3867874-1-jeffxu@google.com Link: https://lkml.kernel.org/r/20250305021711.3867874-2-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Kees Cook Reviewed-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 10 ++++++++++ init/Kconfig | 22 ++++++++++++++++++++++ security/Kconfig | 21 +++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 32ba0e33422b..778f5de6a12e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4236,4 +4236,14 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + +/* + * mseal of userspace process's system mappings. + */ +#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS +#define VM_SEALED_SYSMAP VM_SEALED +#else +#define VM_SEALED_SYSMAP VM_NONE +#endif + #endif /* _LINUX_MM_H */ diff --git a/init/Kconfig b/init/Kconfig index 681f38ee68db..18717967fc8c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1888,6 +1888,28 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS config ARCH_HAS_MEMBARRIER_SYNC_CORE bool +config ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS + bool + help + Control MSEAL_SYSTEM_MAPPINGS access based on architecture. + + A 64-bit kernel is required for the memory sealing feature. + No specific hardware features from the CPU are needed. + + To enable this feature, the architecture needs to update their + special mappings calls to include the sealing flag and confirm + that it doesn't unmap/remap system mappings during the life + time of the process. The existence of this flag for an architecture + implies that it does not require the remapping of the system + mappings during process lifetime, so sealing these mappings is safe + from a kernel perspective. + + After the architecture enables this, a distribution can set + CONFIG_MSEAL_SYSTEM_MAPPING to manage access to the feature. + + For complete descriptions of memory sealing, please see + Documentation/userspace-api/mseal.rst + config HAVE_PERF_EVENTS bool help diff --git a/security/Kconfig b/security/Kconfig index 536061cf33a9..4816fc74f81e 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -51,6 +51,27 @@ config PROC_MEM_NO_FORCE endchoice +config MSEAL_SYSTEM_MAPPINGS + bool "mseal system mappings" + depends on 64BIT + depends on ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS + depends on !CHECKPOINT_RESTORE + help + Apply mseal on system mappings. + The system mappings includes vdso, vvar, vvar_vclock, + vectors (arm compat-mode), sigpage (arm compat-mode), uprobes. + + A 64-bit kernel is required for the memory sealing feature. + No specific hardware features from the CPU are needed. + + WARNING: This feature breaks programs which rely on relocating + or unmapping system mappings. Known broken software at the time + of writing includes CHECKPOINT_RESTORE, UML, gVisor, rr. Therefore + this config can't be enabled universally. + + For complete descriptions of memory sealing, please see + Documentation/userspace-api/mseal.rst + config SECURITY bool "Enable different security models" depends on SYSFS From 7b0141daf34c5d9b3c665e609d293e37cc692734 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:06 +0000 Subject: [PATCH 2010/2157] selftests: x86: test_mremap_vdso: skip if vdso is msealed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add code to detect if the vdso is memory sealed, skip the test if it is. Link: https://lkml.kernel.org/r/20250305021711.3867874-3-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Kees Cook Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- .../testing/selftests/x86/test_mremap_vdso.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index d53959e03593..94bee6e0c813 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -55,13 +56,55 @@ static int try_to_remap(void *vdso_addr, unsigned long size) } +#define VDSO_NAME "[vdso]" +#define VMFLAGS "VmFlags:" +#define MSEAL_FLAGS "sl" +#define MAX_LINE_LEN 512 + +bool vdso_sealed(FILE *maps) +{ + char line[MAX_LINE_LEN]; + bool has_vdso = false; + + while (fgets(line, sizeof(line), maps)) { + if (strstr(line, VDSO_NAME)) + has_vdso = true; + + if (has_vdso && !strncmp(line, VMFLAGS, strlen(VMFLAGS))) { + if (strstr(line, MSEAL_FLAGS)) + return true; + + return false; + } + } + + return false; +} + int main(int argc, char **argv, char **envp) { pid_t child; + FILE *maps; ksft_print_header(); ksft_set_plan(1); + maps = fopen("/proc/self/smaps", "r"); + if (!maps) { + ksft_test_result_skip( + "Could not open /proc/self/smaps, errno=%d\n", + errno); + + return 0; + } + + if (vdso_sealed(maps)) { + ksft_test_result_skip("vdso is sealed\n"); + return 0; + } + + fclose(maps); + child = fork(); if (child == -1) ksft_exit_fail_msg("failed to fork (%d): %m\n", errno); From 1d6fad7b844cffc85024a362fdcc3bef696dfe2e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Mar 2025 13:33:25 +0100 Subject: [PATCH 2011/2157] mseal sysmap: generic vdso vvar mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of the generic vdso data storage the VM_SEALED_SYSMAP vm flag must be moved from the architecture specific _install_special_mapping() call [1] [2] which maps the vvar mapping to generic code. [1] https://lkml.kernel.org/r/20250305021711.3867874-4-jeffxu@google.com [2] https://lkml.kernel.org/r/20250305021711.3867874-5-jeffxu@google.com Link: https://lkml.kernel.org/r/20250311123326.2686682-2-hca@linux.ibm.com Signed-off-by: Heiko Carstens Reviewed-by: Lorenzo Stoakes Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Jeff Xu Cc: Liam Howlett Cc: Sven Schnelle Cc: Thomas Weißschuh Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/vdso/datastore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index c715e217ec65..3693c6caf2c4 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -99,7 +99,8 @@ const struct vm_special_mapping vdso_vvar_mapping = { struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr) { return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, - VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | VM_PFNMAP, + VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | + VM_PFNMAP | VM_SEALED_SYSMAP, &vdso_vvar_mapping); } From 3049def198481f1d7dfe29c79658e2a0e297a565 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:07 +0000 Subject: [PATCH 2012/2157] mseal sysmap: enable x86-64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide support for CONFIG_MSEAL_SYSTEM_MAPPINGS on x86-64, covering the vdso, vvar, vvar_vclock. Production release testing passes on Android and Chrome OS. Link: https://lkml.kernel.org/r/20250305021711.3867874-4-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Kees Cook Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- arch/x86/Kconfig | 1 + arch/x86/entry/vdso/vma.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9395ec37bb64..1502fd0c3c06 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_PTDUMP + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 9518bf1ddf35..adb299d3b6a1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -162,7 +162,8 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) text_start, image->size, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_SEALED_SYSMAP, &vdso_mapping); if (IS_ERR(vma)) { @@ -181,7 +182,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) VDSO_VCLOCK_PAGES_START(addr), VDSO_NR_VCLOCK_PAGES * PAGE_SIZE, VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, + VM_PFNMAP|VM_SEALED_SYSMAP, &vvar_vclock_mapping); if (IS_ERR(vma)) { From 0061b6e162adaaedb84093cd6908ddf8c85d5b47 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:08 +0000 Subject: [PATCH 2013/2157] mseal sysmap: enable arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide support for CONFIG_MSEAL_SYSTEM_MAPPINGS on arm64, covering the vdso, vvar, and compat-mode vectors and sigpage mappings. Production release testing passes on Android and Chrome OS. Link: https://lkml.kernel.org/r/20250305021711.3867874-5-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Kees Cook Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/vdso.c | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 748c34dc953c..a182295e6f08 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -38,6 +38,7 @@ config ARM64 select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 887ac0b05961..78ddf6bdecad 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -130,7 +130,8 @@ static int __setup_additional_pages(enum vdso_abi abi, mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_SEALED_SYSMAP, vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; @@ -256,7 +257,8 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) */ ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYEXEC, + VM_MAYREAD | VM_MAYEXEC | + VM_SEALED_SYSMAP, &aarch32_vdso_maps[AA32_MAP_VECTORS]); return PTR_ERR_OR_ZERO(ret); @@ -279,7 +281,8 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) */ ret = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | - VM_MAYWRITE | VM_MAYEXEC, + VM_MAYWRITE | VM_MAYEXEC | + VM_SEALED_SYSMAP, &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); if (IS_ERR(ret)) goto out; From 3d38922abff330ec2ec8d0d6d38b647d121a0be9 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:09 +0000 Subject: [PATCH 2014/2157] mseal sysmap: uprobe mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide support to mseal the uprobe mapping. Unlike other system mappings, the uprobe mapping is not established during program startup. However, its lifetime is the same as the process's lifetime. It could be sealed from creation. Test was done with perf tool, and observe the uprobe mapping is sealed. Link: https://lkml.kernel.org/r/20250305021711.3867874-6-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Oleg Nesterov Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Kees Cook Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2746791ce1e2..615b4e6d22c7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1703,7 +1703,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) } vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE, - VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, + VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO| + VM_SEALED_SYSMAP, &xol_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); From a8c15bb4008cb79dc6095a6f6e67441e36fb4e99 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:10 +0000 Subject: [PATCH 2015/2157] mseal sysmap: update mseal.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update memory sealing documentation to include details about system mappings. Link: https://lkml.kernel.org/r/20250305021711.3867874-7-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Kees Cook Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- Documentation/userspace-api/mseal.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst index 41102f74c5e2..56aee46a9307 100644 --- a/Documentation/userspace-api/mseal.rst +++ b/Documentation/userspace-api/mseal.rst @@ -130,6 +130,26 @@ Use cases - Chrome browser: protect some security sensitive data structures. +- System mappings: + The system mappings are created by the kernel and includes vdso, vvar, + vvar_vclock, vectors (arm compat-mode), sigpage (arm compat-mode), uprobes. + + Those system mappings are readonly only or execute only, memory sealing can + protect them from ever changing to writable or unmmap/remapped as different + attributes. This is useful to mitigate memory corruption issues where a + corrupted pointer is passed to a memory management system. + + If supported by an architecture (CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS), + the CONFIG_MSEAL_SYSTEM_MAPPINGS seals all system mappings of this + architecture. + + The following architectures currently support this feature: x86-64 and arm64. + + WARNING: This feature breaks programs which rely on relocating + or unmapping system mappings. Known broken software at the time + of writing includes CHECKPOINT_RESTORE, UML, gVisor, rr. Therefore + this config can't be enabled universally. + When not to use mseal ===================== Applications can apply sealing to any virtual memory region from userspace, From b481341e4cfbc89bbb87cd0a24abef29ebfb49c7 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Wed, 5 Mar 2025 02:17:11 +0000 Subject: [PATCH 2016/2157] selftest: test system mappings are sealed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sysmap_is_sealed.c to test system mappings are sealed. Note: CONFIG_MSEAL_SYSTEM_MAPPINGS must be set, as indicated in config file. Link: https://lkml.kernel.org/r/20250305021711.3867874-8-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Lorenzo Stoakes Reviewed-by: Kees Cook Cc: Adhemerval Zanella Cc: Alexander Mikhalitsyn Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Anna-Maria Behnsen Cc: Ard Biesheuvel Cc: Benjamin Berg Cc: Christoph Hellwig Cc: Dave Hansen Cc: David Rientjes Cc: David S. Miller Cc: Elliot Hughes Cc: Florian Faineli Cc: Greg Ungerer Cc: Guenter Roeck Cc: Heiko Carstens Cc: Helge Deller Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jann Horn Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Jorge Lucangeli Obes Cc: Liam R. Howlett Cc: Linus Waleij Cc: Mark Rutland Cc: Matthew Wilcow (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Miguel Ojeda Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Pedro Falcato Cc: Peter Xu Cc: Randy Dunlap Cc: Stephen Röttger Cc: Thomas Weißschuh Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/Makefile | 1 + .../mseal_system_mappings/.gitignore | 2 + .../selftests/mseal_system_mappings/Makefile | 6 + .../selftests/mseal_system_mappings/config | 1 + .../mseal_system_mappings/sysmap_is_sealed.c | 119 ++++++++++++++++++ 5 files changed, 129 insertions(+) create mode 100644 tools/testing/selftests/mseal_system_mappings/.gitignore create mode 100644 tools/testing/selftests/mseal_system_mappings/Makefile create mode 100644 tools/testing/selftests/mseal_system_mappings/config create mode 100644 tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2694344274bf..c77c8c8e3d9b 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -62,6 +62,7 @@ TARGETS += mount TARGETS += mount_setattr TARGETS += move_mount_set_group TARGETS += mqueue +TARGETS += mseal_system_mappings TARGETS += nci TARGETS += net TARGETS += net/af_unix diff --git a/tools/testing/selftests/mseal_system_mappings/.gitignore b/tools/testing/selftests/mseal_system_mappings/.gitignore new file mode 100644 index 000000000000..319c497a595e --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +sysmap_is_sealed diff --git a/tools/testing/selftests/mseal_system_mappings/Makefile b/tools/testing/selftests/mseal_system_mappings/Makefile new file mode 100644 index 000000000000..2b4504e2f52f --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) + +TEST_GEN_PROGS := sysmap_is_sealed + +include ../lib.mk diff --git a/tools/testing/selftests/mseal_system_mappings/config b/tools/testing/selftests/mseal_system_mappings/config new file mode 100644 index 000000000000..675cb9f37b86 --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/config @@ -0,0 +1 @@ +CONFIG_MSEAL_SYSTEM_MAPPINGS=y diff --git a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c new file mode 100644 index 000000000000..0d2af30c3bf5 --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * test system mappings are sealed when + * KCONFIG_MSEAL_SYSTEM_MAPPINGS=y + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +#define VMFLAGS "VmFlags:" +#define MSEAL_FLAGS "sl" +#define MAX_LINE_LEN 512 + +bool has_mapping(char *name, FILE *maps) +{ + char line[MAX_LINE_LEN]; + + while (fgets(line, sizeof(line), maps)) { + if (strstr(line, name)) + return true; + } + + return false; +} + +bool mapping_is_sealed(char *name, FILE *maps) +{ + char line[MAX_LINE_LEN]; + + while (fgets(line, sizeof(line), maps)) { + if (!strncmp(line, VMFLAGS, strlen(VMFLAGS))) { + if (strstr(line, MSEAL_FLAGS)) + return true; + + return false; + } + } + + return false; +} + +FIXTURE(basic) { + FILE *maps; +}; + +FIXTURE_SETUP(basic) +{ + self->maps = fopen("/proc/self/smaps", "r"); + if (!self->maps) + SKIP(return, "Could not open /proc/self/smap, errno=%d", + errno); +}; + +FIXTURE_TEARDOWN(basic) +{ + if (self->maps) + fclose(self->maps); +}; + +FIXTURE_VARIANT(basic) +{ + char *name; + bool sealed; +}; + +FIXTURE_VARIANT_ADD(basic, vdso) { + .name = "[vdso]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vvar) { + .name = "[vvar]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vvar_vclock) { + .name = "[vvar_vclock]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, sigpage) { + .name = "[sigpage]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vectors) { + .name = "[vectors]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, uprobes) { + .name = "[uprobes]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, stack) { + .name = "[stack]", + .sealed = false, +}; + +TEST_F(basic, check_sealed) +{ + if (!has_mapping(variant->name, self->maps)) { + SKIP(return, "could not find the mapping, %s", + variant->name); + } + + EXPECT_EQ(variant->sealed, + mapping_is_sealed(variant->name, self->maps)); +}; + +TEST_HARNESS_MAIN From 24e3f9fbbd5d93afb41af495c008e76a9005dd06 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Mar 2025 13:33:26 +0100 Subject: [PATCH 2017/2157] mseal sysmap: enable s390 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide support for CONFIG_MSEAL_SYSTEM_MAPPINGS on s390, covering the vdso. [hca@linux.ibm.com: update supported architectures] Link: https://lkml.kernel.org/r/20250317131917.1332402-1-hca@linux.ibm.com Link: https://lkml.kernel.org/r/20250311123326.2686682-3-hca@linux.ibm.com Signed-off-by: Heiko Carstens Reviewed-by: Lorenzo Stoakes Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Jeff Xu Cc: Liam Howlett Cc: Sven Schnelle Cc: Thomas Weißschuh Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- Documentation/userspace-api/mseal.rst | 3 ++- arch/s390/Kconfig | 1 + arch/s390/kernel/vdso.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst index 56aee46a9307..1dabfc29be0d 100644 --- a/Documentation/userspace-api/mseal.rst +++ b/Documentation/userspace-api/mseal.rst @@ -143,7 +143,8 @@ Use cases the CONFIG_MSEAL_SYSTEM_MAPPINGS seals all system mappings of this architecture. - The following architectures currently support this feature: x86-64 and arm64. + The following architectures currently support this feature: x86-64, arm64, + and s390. WARNING: This feature breaks programs which rely on relocating or unmapping system mappings. Known broken software at the time diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c809c486d136..b8fa367c1fc9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -137,6 +137,7 @@ config S390 select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 70c8f9ad13cd..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -80,7 +80,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { From e20706d5385b10a6f6a2fe5ad6b1333dad2d1416 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Fri, 21 Mar 2025 03:26:27 +0000 Subject: [PATCH 2018/2157] mseal sysmap: add arch-support txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Documentation/features/core/mseal_sys_mappings/arch-support.txt N/A: the arch is 32bits only and mseal is not supported in 32 bits, therefore N/A (until mseal is available in 32 bits kernel). [jeffxu@chromium.org: update to v3] Link: https://lkml.kernel.org/r/20250324151537.1106542-2-jeffxu@google.com Link: https://lkml.kernel.org/r/20250321032627.4147562-2-jeffxu@google.com Signed-off-by: Jeff Xu Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Eric Dumaze Cc: Geert Uytterhoeven Cc: guoweikang Cc: Heiko Carstens Cc: Kevin Brodsky Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Meghana Malladi Cc: Qi Zheng Cc: Sven Schnelle Cc: Thomas Weißschuh Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- .../core/mseal_sys_mappings/arch-support.txt | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Documentation/features/core/mseal_sys_mappings/arch-support.txt diff --git a/Documentation/features/core/mseal_sys_mappings/arch-support.txt b/Documentation/features/core/mseal_sys_mappings/arch-support.txt new file mode 100644 index 000000000000..c6cab9760d57 --- /dev/null +++ b/Documentation/features/core/mseal_sys_mappings/arch-support.txt @@ -0,0 +1,30 @@ +# +# Feature name: mseal-system-mappings +# Kconfig: ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS +# description: arch supports mseal system mappings +# + ----------------------- + | arch |status| + ----------------------- + | alpha: | TODO | + | arc: | N/A | + | arm: | N/A | + | arm64: | ok | + | csky: | N/A | + | hexagon: | N/A | + | loongarch: | TODO | + | m68k: | N/A | + | microblaze: | N/A | + | mips: | TODO | + | nios2: | N/A | + | openrisc: | N/A | + | parisc: | TODO | + | powerpc: | TODO | + | riscv: | TODO | + | s390: | ok | + | sh: | N/A | + | sparc: | TODO | + | um: | TODO | + | x86: | ok | + | xtensa: | N/A | + ----------------------- From c8b6d5dd348939c7becbe595ac2ffe63ffd55cd0 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Wed, 26 Mar 2025 21:13:55 +0100 Subject: [PATCH 2019/2157] mailmap: add an entry for Nicolas Schier Add mapping to linux.dev address as mail usage at work is limited and my mail provider for private mails is suffering from its own domain name block lists. Link: https://lkml.kernel.org/r/20250326-mailmap-add-entry-for-nicolas-v1-1-3c26579a7fdf@fjasle.eu Signed-off-by: Nicolas Schier Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index c85576e4695f..4f7cd8e23177 100644 --- a/.mailmap +++ b/.mailmap @@ -549,6 +549,8 @@ Nicolas Pitre Nicolas Pitre Nicolas Saenz Julienne Nicolas Saenz Julienne +Nicolas Schier +Nicolas Schier Niklas Söderlund Nikolay Aleksandrov Nikolay Aleksandrov From e2a33a2a3258794891cdd6ca4b1318da6594d157 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Mar 2025 11:26:06 -0400 Subject: [PATCH 2020/2157] lib/sort.c: add _nonatomic() variants with cond_resched() bcachefs calls sort() during recovery to sort all keys it found in the journal, and this may be very large - gigabytes on large machines. This has been causing "task blocked" warnings, so needs a cond_resched(). [kent.overstreet@linux.dev: fix kerneldoc] Link: https://lkml.kernel.org/r/cgsr5a447pxqomc4gvznsp5yroqmif4omd7o5lsr2swifjhoic@yzjjrx2bvrq7 Link: https://lkml.kernel.org/r/20250326152606.2594920-1-kent.overstreet@linux.dev Signed-off-by: Kent Overstreet Cc: Kuan-Wei Chiu Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/sort.h | 11 +++++ lib/sort.c | 110 +++++++++++++++++++++++++++++++------------ 2 files changed, 90 insertions(+), 31 deletions(-) diff --git a/include/linux/sort.h b/include/linux/sort.h index e163287ac6c1..8e5603b10941 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -13,4 +13,15 @@ void sort(void *base, size_t num, size_t size, cmp_func_t cmp_func, swap_func_t swap_func); +/* Versions that periodically call cond_resched(): */ + +void sort_r_nonatomic(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv); + +void sort_nonatomic(void *base, size_t num, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func); + #endif diff --git a/lib/sort.c b/lib/sort.c index 8e73dc55476b..52363995ccc5 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -186,36 +186,13 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) return i / 2; } -/** - * sort_r - sort an array of elements - * @base: pointer to data to sort - * @num: number of elements - * @size: size of each element - * @cmp_func: pointer to comparison function - * @swap_func: pointer to swap function or NULL - * @priv: third argument passed to comparison function - * - * This function does a heapsort on the given array. You may provide - * a swap_func function if you need to do something more than a memory - * copy (e.g. fix up pointers or auxiliary data), but the built-in swap - * avoids a slow retpoline and so is significantly faster. - * - * The comparison function must adhere to specific mathematical - * properties to ensure correct and stable sorting: - * - Antisymmetry: cmp_func(a, b) must return the opposite sign of - * cmp_func(b, a). - * - Transitivity: if cmp_func(a, b) <= 0 and cmp_func(b, c) <= 0, then - * cmp_func(a, c) <= 0. - * - * Sorting time is O(n log n) both on average and worst-case. While - * quicksort is slightly faster on average, it suffers from exploitable - * O(n*n) worst-case behavior and extra memory requirements that make - * it less suitable for kernel use. - */ -void sort_r(void *base, size_t num, size_t size, - cmp_r_func_t cmp_func, - swap_r_func_t swap_func, - const void *priv) +#include + +static void __sort_r(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv, + bool may_schedule) { /* pre-scale counters for performance */ size_t n = num * size, a = (num/2) * size; @@ -286,6 +263,9 @@ void sort_r(void *base, size_t num, size_t size, b = parent(b, lsbit, size); do_swap(base + b, base + c, size, swap_func, priv); } + + if (may_schedule) + cond_resched(); } n -= size; @@ -293,8 +273,63 @@ void sort_r(void *base, size_t num, size_t size, if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0) do_swap(base, base + size, size, swap_func, priv); } + +/** + * sort_r - sort an array of elements + * @base: pointer to data to sort + * @num: number of elements + * @size: size of each element + * @cmp_func: pointer to comparison function + * @swap_func: pointer to swap function or NULL + * @priv: third argument passed to comparison function + * + * This function does a heapsort on the given array. You may provide + * a swap_func function if you need to do something more than a memory + * copy (e.g. fix up pointers or auxiliary data), but the built-in swap + * avoids a slow retpoline and so is significantly faster. + * + * The comparison function must adhere to specific mathematical + * properties to ensure correct and stable sorting: + * - Antisymmetry: cmp_func(a, b) must return the opposite sign of + * cmp_func(b, a). + * - Transitivity: if cmp_func(a, b) <= 0 and cmp_func(b, c) <= 0, then + * cmp_func(a, c) <= 0. + * + * Sorting time is O(n log n) both on average and worst-case. While + * quicksort is slightly faster on average, it suffers from exploitable + * O(n*n) worst-case behavior and extra memory requirements that make + * it less suitable for kernel use. + */ +void sort_r(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv) +{ + __sort_r(base, num, size, cmp_func, swap_func, priv, false); +} EXPORT_SYMBOL(sort_r); +/** + * sort_r_nonatomic - sort an array of elements, with cond_resched + * @base: pointer to data to sort + * @num: number of elements + * @size: size of each element + * @cmp_func: pointer to comparison function + * @swap_func: pointer to swap function or NULL + * @priv: third argument passed to comparison function + * + * Same as sort_r, but preferred for larger arrays as it does a periodic + * cond_resched(). + */ +void sort_r_nonatomic(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv) +{ + __sort_r(base, num, size, cmp_func, swap_func, priv, true); +} +EXPORT_SYMBOL(sort_r_nonatomic); + void sort(void *base, size_t num, size_t size, cmp_func_t cmp_func, swap_func_t swap_func) @@ -304,6 +339,19 @@ void sort(void *base, size_t num, size_t size, .swap = swap_func, }; - return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); + return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, false); } EXPORT_SYMBOL(sort); + +void sort_nonatomic(void *base, size_t num, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func) +{ + struct wrapper w = { + .cmp = cmp_func, + .swap = swap_func, + }; + + return __sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w, true); +} +EXPORT_SYMBOL(sort_nonatomic); From 8b46fdaea819a679da176b879e7b0674a1161a5e Mon Sep 17 00:00:00 2001 From: T Pratham Date: Wed, 19 Mar 2025 16:44:38 +0530 Subject: [PATCH 2021/2157] lib: scatterlist: fix sg_split_phys to preserve original scatterlist offsets The split_sg_phys function was incorrectly setting the offsets of all scatterlist entries (except the first) to 0. Only the first scatterlist entry's offset and length needs to be modified to account for the skip. Setting the rest entries' offsets to 0 could lead to incorrect data access. I am using this function in a crypto driver that I'm currently developing (not yet sent to mailing list). During testing, it was observed that the output scatterlists (except the first one) contained incorrect garbage data. I narrowed this issue down to the call of sg_split(). Upon debugging inside this function, I found that this resetting of offset is the cause of the problem, causing the subsequent scatterlists to point to incorrect memory locations in a page. By removing this code, I am obtaining expected data in all the split output scatterlists. Thus, this was indeed causing observable runtime effects! This patch removes the offending code, ensuring that the page offsets in the input scatterlist are preserved in the output scatterlist. Link: https://lkml.kernel.org/r/20250319111437.1969903-1-t-pratham@ti.com Fixes: f8bcbe62acd0 ("lib: scatterlist: add sg splitting function") Signed-off-by: T Pratham Cc: Robert Jarzmik Cc: Jens Axboe Cc: Kamlesh Gurudasani Cc: Praneeth Bajjuri Cc: Vignesh Raghavendra Cc: Signed-off-by: Andrew Morton --- lib/sg_split.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/sg_split.c b/lib/sg_split.c index 60a0babebf2e..0f89aab5c671 100644 --- a/lib/sg_split.c +++ b/lib/sg_split.c @@ -88,8 +88,6 @@ static void sg_split_phys(struct sg_splitter *splitters, const int nb_splits) if (!j) { out_sg->offset += split->skip_sg0; out_sg->length -= split->skip_sg0; - } else { - out_sg->offset = 0; } sg_dma_address(out_sg) = 0; sg_dma_len(out_sg) = 0; From d6691010523fe1016f482a1e1defcc6289eeea48 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Apr 2025 15:42:38 -0700 Subject: [PATCH 2022/2157] spi: bcm2835: Do not call gpiod_put() on invalid descriptor If we are unable to lookup the chip-select GPIO, the error path will call bcm2835_spi_cleanup() which unconditionally calls gpiod_put() on the cs->gpio variable which we just determined was invalid. Fixes: 21f252cd29f0 ("spi: bcm2835: reduce the abuse of the GPIO API") Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20250401224238.2854256-1-florian.fainelli@broadcom.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 0d1aa6592484..06a81727d74d 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1162,7 +1162,8 @@ static void bcm2835_spi_cleanup(struct spi_device *spi) sizeof(u32), DMA_TO_DEVICE); - gpiod_put(bs->cs_gpio); + if (!IS_ERR(bs->cs_gpio)) + gpiod_put(bs->cs_gpio); spi_set_csgpiod(spi, 0, NULL); kfree(target); From c8b5b7c5da7d0c31c9b7190b4a7bba5281fc4780 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 2 Apr 2025 09:11:23 +0900 Subject: [PATCH 2023/2157] ksmbd: fix null pointer dereference in alloc_preauth_hash() The Client send malformed smb2 negotiate request. ksmbd return error response. Subsequently, the client can send smb2 session setup even thought conn->preauth_info is not allocated. This patch add KSMBD_SESS_NEED_SETUP status of connection to ignore session setup request if smb2 negotiate phase is not complete. Cc: stable@vger.kernel.org Tested-by: Steve French Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-26505 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.h | 11 +++++++++++ fs/smb/server/mgmt/user_session.c | 4 ++-- fs/smb/server/smb2pdu.c | 14 +++++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 91c2318639e7..14620e147dda 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -27,6 +27,7 @@ enum { KSMBD_SESS_EXITING, KSMBD_SESS_NEED_RECONNECT, KSMBD_SESS_NEED_NEGOTIATE, + KSMBD_SESS_NEED_SETUP, KSMBD_SESS_RELEASING }; @@ -187,6 +188,11 @@ static inline bool ksmbd_conn_need_negotiate(struct ksmbd_conn *conn) return READ_ONCE(conn->status) == KSMBD_SESS_NEED_NEGOTIATE; } +static inline bool ksmbd_conn_need_setup(struct ksmbd_conn *conn) +{ + return READ_ONCE(conn->status) == KSMBD_SESS_NEED_SETUP; +} + static inline bool ksmbd_conn_need_reconnect(struct ksmbd_conn *conn) { return READ_ONCE(conn->status) == KSMBD_SESS_NEED_RECONNECT; @@ -217,6 +223,11 @@ static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_conn *conn) WRITE_ONCE(conn->status, KSMBD_SESS_NEED_NEGOTIATE); } +static inline void ksmbd_conn_set_need_setup(struct ksmbd_conn *conn) +{ + WRITE_ONCE(conn->status, KSMBD_SESS_NEED_SETUP); +} + static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_conn *conn) { WRITE_ONCE(conn->status, KSMBD_SESS_NEED_RECONNECT); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 7690f0187dac..3f45f28f6f0f 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -374,13 +374,13 @@ void destroy_previous_session(struct ksmbd_conn *conn, ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); err = ksmbd_conn_wait_idle_sess_id(conn, id); if (err) { - ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_SETUP); goto out; } ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; - ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_SETUP); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f3ca33d3b69a..d24d95d15d87 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1249,7 +1249,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work) } conn->srv_sec_mode = le16_to_cpu(rsp->SecurityMode); - ksmbd_conn_set_need_negotiate(conn); + ksmbd_conn_set_need_setup(conn); err_out: ksmbd_conn_unlock(conn); @@ -1271,6 +1271,9 @@ static int alloc_preauth_hash(struct ksmbd_session *sess, if (sess->Preauth_HashValue) return 0; + if (!conn->preauth_info) + return -ENOMEM; + sess->Preauth_HashValue = kmemdup(conn->preauth_info->Preauth_HashValue, PREAUTH_HASHVALUE_SIZE, KSMBD_DEFAULT_GFP); if (!sess->Preauth_HashValue) @@ -1674,6 +1677,11 @@ int smb2_sess_setup(struct ksmbd_work *work) ksmbd_debug(SMB, "Received smb2 session setup request\n"); + if (!ksmbd_conn_need_setup(conn) && !ksmbd_conn_good(conn)) { + work->send_no_response = 1; + return rc; + } + WORK_BUFFERS(work, req, rsp); rsp->StructureSize = cpu_to_le16(9); @@ -1909,7 +1917,7 @@ int smb2_sess_setup(struct ksmbd_work *work) if (try_delay) { ksmbd_conn_set_need_reconnect(conn); ssleep(5); - ksmbd_conn_set_need_negotiate(conn); + ksmbd_conn_set_need_setup(conn); } } smb2_set_err_rsp(work); @@ -2243,7 +2251,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_free_user(sess->user); sess->user = NULL; } - ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE); + ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP); rsp->StructureSize = cpu_to_le16(4); err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp)); From 22c7f77247a84d27b785ec5b706f673421ab269d Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 1 Apr 2025 16:50:08 +0800 Subject: [PATCH 2024/2157] ALSA: hda/realtek - Support mute led function for HP platform This patch was integrated CS Amp and support mute led function for HP platform. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/2c960ab58b4d4090ad4ee075f8cfdffd@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 41 +++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b69b659ae659..eec3ea1a7e08 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7590,6 +7590,24 @@ static void alc245_fixup_hp_spectre_x360_16_aa0xxx(struct hda_codec *codec, alc245_fixup_hp_gpio_led(codec, fix, action); } +static void alc245_fixup_hp_zbook_firefly_g12a(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + static const hda_nid_t conn[] = { 0x02 }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.auto_mute_via_amp = 1; + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + } + + cs35l41_fixup_i2c_two(codec, fix, action); + alc245_fixup_hp_mute_led_coefbit(codec, fix, action); + alc285_fixup_hp_coef_micmute_led(codec, fix, action); +} + /* * ALC287 PCM hooks */ @@ -7938,6 +7956,7 @@ enum { ALC256_FIXUP_HEADPHONE_AMP_VOL, ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX, ALC245_FIXUP_HP_SPECTRE_X360_16_AA0XXX, + ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A, ALC285_FIXUP_ASUS_GA403U, ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC, ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1, @@ -10233,6 +10252,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_spectre_x360_16_aa0xxx, }, + [ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_zbook_firefly_g12a, + }, [ALC285_FIXUP_ASUS_GA403U] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_asus_ga403u, @@ -10773,15 +10796,15 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e12, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e13, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8e14, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e15, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e16, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e17, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e18, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8e14, "HP ZBook Firefly 14 G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e15, "HP ZBook Firefly 14 G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e16, "HP ZBook Firefly 14 G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e17, "HP ZBook Firefly 14 G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e18, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), From 8983dc1b66c0e1928a263b8af0bb06f6cb9229c4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 2 Apr 2025 09:42:07 +0200 Subject: [PATCH 2025/2157] ALSA: hda/realtek: Fix built-in mic on another ASUS VivoBook model There is another VivoBook model which built-in mic got broken recently by the fix of the pin sort. Apply the correct quirk ALC256_FIXUP_ASUS_MIC_NO_PRESENCE to this model for addressing the regression, too. Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Closes: https://lore.kernel.org/Z95s5T6OXFPjRnKf@eldamar.lan Link: https://patch.msgid.link/20250402074208.7347-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index eec3ea1a7e08..79004bc8107b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10889,6 +10889,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x1043, 0x1c80, "ASUS VivoBook TP401", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), From e19c1272c80a5ecce387c1b0c3b995f4edf9c525 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Apr 2025 16:36:03 -0700 Subject: [PATCH 2026/2157] spi: bcm2835: Restore native CS probing when pinctrl-bcm2835 is absent The lookup table forces the use of the "pinctrl-bcm2835" GPIO chip provider and essentially assumes that there is going to be such a provider, and if not, we will fail to set-up the SPI device. While this is true on Raspberry Pi based systems (2835/36/37, 2711, 2712), this is not true on 7712/77122 Broadcom STB systems which use the SPI driver, but not the GPIO driver. There used to be an early check: chip = gpiochip_find("pinctrl-bcm2835", chip_match_name); if (!chip) return 0; which would accomplish that nicely, bring something similar back by checking for the compatible strings matched by the pinctrl-bcm2835.c driver, if there is no Device Tree node matching those compatible strings, then we won't find any GPIO provider registered by the "pinctrl-bcm2835" driver. Fixes: 21f252cd29f0 ("spi: bcm2835: reduce the abuse of the GPIO API") Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20250401233603.2938955-1-florian.fainelli@broadcom.com Acked-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 06a81727d74d..77de5a07639a 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1226,7 +1226,12 @@ static int bcm2835_spi_setup(struct spi_device *spi) struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); struct bcm2835_spidev *target = spi_get_ctldata(spi); struct gpiod_lookup_table *lookup __free(kfree) = NULL; - int ret; + const char *pinctrl_compats[] = { + "brcm,bcm2835-gpio", + "brcm,bcm2711-gpio", + "brcm,bcm7211-gpio", + }; + int ret, i; u32 cs; if (!target) { @@ -1291,6 +1296,14 @@ static int bcm2835_spi_setup(struct spi_device *spi) goto err_cleanup; } + for (i = 0; i < ARRAY_SIZE(pinctrl_compats); i++) { + if (of_find_compatible_node(NULL, NULL, pinctrl_compats[i])) + break; + } + + if (i == ARRAY_SIZE(pinctrl_compats)) + return 0; + /* * TODO: The code below is a slightly better alternative to the utter * abuse of the GPIO API that I found here before. It creates a From 892c4e465c360d07f529bc3668fde7cbd4ea6b32 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Apr 2025 23:09:52 +1100 Subject: [PATCH 2027/2157] docs: Fix references to IBM CAPI (cxl) removal version The IBM CAPI (cxl) driver was removed in 6.15, not 6.14. Signed-off-by: Michael Ellerman --- Documentation/ABI/removed/sysfs-class-cxl | 2 +- Documentation/userspace-api/ioctl/ioctl-number.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/removed/sysfs-class-cxl b/Documentation/ABI/removed/sysfs-class-cxl index 15756a49eba2..266c413b96e8 100644 --- a/Documentation/ABI/removed/sysfs-class-cxl +++ b/Documentation/ABI/removed/sysfs-class-cxl @@ -1,4 +1,4 @@ -The cxl driver was removed in 6.14. +The cxl driver was removed in 6.15. Please note that attributes that are shared between devices are stored in the directory pointed to by the symlink device/. diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 0ed32a70c346..949eadb1ca16 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -375,9 +375,9 @@ Code Seq# Include File Comments 0xB8 all uapi/linux/mshv.h Microsoft Hyper-V /dev/mshv driver 0xC0 00-0F linux/usb/iowarrior.h -0xCA 00-0F uapi/misc/cxl.h Dead since 6.14 +0xCA 00-0F uapi/misc/cxl.h Dead since 6.15 0xCA 10-2F uapi/misc/ocxl.h -0xCA 80-BF uapi/scsi/cxlflash_ioctl.h Dead since 6.14 +0xCA 80-BF uapi/scsi/cxlflash_ioctl.h Dead since 6.15 0xCB 00-1F CBM serial IEC bus in development: 0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver From 8622b20f23ed165f48b8ca61503a107d17f8d585 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Mar 2025 21:51:50 +0800 Subject: [PATCH 2028/2157] io_uring: add validate_fixed_range() for validate fixed buffer Add helper of validate_fixed_range() for validating fixed buffer range. Signed-off-by: Ming Lei Reviewed-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250325135155.935398-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 59b4317b04a7..1e56cd3f55d1 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1002,15 +1002,11 @@ int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index, } EXPORT_SYMBOL_GPL(io_buffer_unregister_bvec); -static int io_import_fixed(int ddir, struct iov_iter *iter, - struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len) +static int validate_fixed_range(u64 buf_addr, size_t len, + const struct io_mapped_ubuf *imu) { u64 buf_end; - size_t offset; - if (WARN_ON_ONCE(!imu)) - return -EFAULT; if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) return -EFAULT; /* not inside the mapped region */ @@ -1018,6 +1014,21 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, return -EFAULT; if (unlikely(len > MAX_RW_COUNT)) return -EFAULT; + return 0; +} + +static int io_import_fixed(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, + u64 buf_addr, size_t len) +{ + size_t offset; + int ret; + + if (WARN_ON_ONCE(!imu)) + return -EFAULT; + ret = validate_fixed_range(buf_addr, len, imu); + if (unlikely(ret)) + return ret; if (!(imu->dir & (1 << ddir))) return -EFAULT; @@ -1307,12 +1318,12 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base; struct bio_vec *src_bvec; size_t offset; - u64 buf_end; + int ret; + + ret = validate_fixed_range(buf_addr, iov_len, imu); + if (unlikely(ret)) + return ret; - if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end))) - return -EFAULT; - if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) - return -EFAULT; if (unlikely(!iov_len)) return -EFAULT; if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) From 149974fdb8e186a1c72b87cee806373624a8a375 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Mar 2025 21:51:51 +0800 Subject: [PATCH 2029/2157] block: add for_each_mp_bvec() Add helper of for_each_mp_bvec() for io_uring to import fixed kernel buffer. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250325135155.935398-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/bvec.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ba8f52d48b94..204b22a99c4b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -184,6 +184,12 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv, ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) +#define for_each_mp_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) + /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ { \ From 1045afae4b8892dab99d320b17bff3b9c1f407d8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Mar 2025 21:51:52 +0800 Subject: [PATCH 2030/2157] io_uring: support vectored kernel fixed buffer io_uring has supported fixed kernel buffer via io_buffer_register_bvec() and io_buffer_unregister_bvec(). The vectored fixed buffer has been ready, so it is natural to support fixed kernel buffer, one use case is ublk. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250325135155.935398-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 1e56cd3f55d1..5e64a8bb30a4 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1362,6 +1362,82 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, return max_segs; } +static int io_vec_fill_kern_bvec(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, + struct iovec *iovec, unsigned nr_iovs, + struct iou_vec *vec) +{ + const struct bio_vec *src_bvec = imu->bvec; + struct bio_vec *res_bvec = vec->bvec; + unsigned res_idx = 0; + size_t total_len = 0; + unsigned iov_idx; + + for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) { + size_t offset = (size_t)(uintptr_t)iovec[iov_idx].iov_base; + size_t iov_len = iovec[iov_idx].iov_len; + struct bvec_iter bi = { + .bi_size = offset + iov_len, + }; + struct bio_vec bv; + + bvec_iter_advance(src_bvec, &bi, offset); + for_each_mp_bvec(bv, src_bvec, bi, bi) + res_bvec[res_idx++] = bv; + total_len += iov_len; + } + iov_iter_bvec(iter, ddir, res_bvec, res_idx, total_len); + return 0; +} + +static int iov_kern_bvec_size(const struct iovec *iov, + const struct io_mapped_ubuf *imu, + unsigned int *nr_seg) +{ + size_t offset = (size_t)(uintptr_t)iov->iov_base; + const struct bio_vec *bvec = imu->bvec; + int start = 0, i = 0; + size_t off = 0; + int ret; + + ret = validate_fixed_range(offset, iov->iov_len, imu); + if (unlikely(ret)) + return ret; + + for (i = 0; off < offset + iov->iov_len && i < imu->nr_bvecs; + off += bvec[i].bv_len, i++) { + if (offset >= off && offset < off + bvec[i].bv_len) + start = i; + } + *nr_seg = i - start; + return 0; +} + +static int io_kern_bvec_size(struct iovec *iov, unsigned nr_iovs, + struct io_mapped_ubuf *imu, unsigned *nr_segs) +{ + unsigned max_segs = 0; + size_t total_len = 0; + unsigned i; + int ret; + + *nr_segs = 0; + for (i = 0; i < nr_iovs; i++) { + if (unlikely(!iov[i].iov_len)) + return -EFAULT; + if (unlikely(check_add_overflow(total_len, iov[i].iov_len, + &total_len))) + return -EOVERFLOW; + ret = iov_kern_bvec_size(&iov[i], imu, &max_segs); + if (unlikely(ret)) + return ret; + *nr_segs += max_segs; + } + if (total_len > MAX_RW_COUNT) + return -EINVAL; + return 0; +} + int io_import_reg_vec(int ddir, struct iov_iter *iter, struct io_kiocb *req, struct iou_vec *vec, unsigned nr_iovs, unsigned issue_flags) @@ -1376,14 +1452,20 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, if (!node) return -EFAULT; imu = node->buf; - if (imu->is_kbuf) - return -EOPNOTSUPP; if (!(imu->dir & (1 << ddir))) return -EFAULT; iovec_off = vec->nr - nr_iovs; iov = vec->iovec + iovec_off; - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (imu->is_kbuf) { + int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs); + + if (unlikely(ret)) + return ret; + } else { + nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + } if (sizeof(struct bio_vec) > sizeof(struct iovec)) { size_t bvec_bytes; @@ -1410,6 +1492,9 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, req->flags |= REQ_F_NEED_CLEANUP; } + if (imu->is_kbuf) + return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec); + return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec); } From 57ed58c1325690ff6a46da776e9b42b14a7c37b1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Mar 2025 21:51:53 +0800 Subject: [PATCH 2031/2157] selftests: ublk: enable zero copy for stripe target Use io_uring vectored fixed kernel buffer for handling stripe IO. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250325135155.935398-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/stripe.c | 69 ++++++++++++++++++++------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index d98680d64a2f..c7781efea0f3 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -17,6 +17,7 @@ TEST_PROGS += test_loop_05.sh TEST_PROGS += test_stripe_01.sh TEST_PROGS += test_stripe_02.sh TEST_PROGS += test_stripe_03.sh +TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 98c564b12f3c..179731c3dd6f 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -111,43 +111,67 @@ static void calculate_stripe_array(const struct stripe_conf *conf, } } -static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod) +static inline enum io_uring_op stripe_to_uring_op( + const struct ublksrv_io_desc *iod, int zc) { unsigned ublk_op = ublksrv_get_op(iod); if (ublk_op == UBLK_IO_OP_READ) - return IORING_OP_READV; + return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; else if (ublk_op == UBLK_IO_OP_WRITE) - return IORING_OP_WRITEV; + return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV; assert(0); } static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); - enum io_uring_op op = stripe_to_uring_op(iod); + int zc = !!(ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc); struct io_uring_sqe *sqe[NR_STRIPE]; struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); - int i; + int i, extra = zc ? 2 : 0; io->private_data = s; calculate_stripe_array(conf, iod, s); - ublk_queue_alloc_sqes(q, sqe, s->nr); - for (i = 0; i < s->nr; i++) { - struct stripe *t = &s->s[i]; + ublk_queue_alloc_sqes(q, sqe, s->nr + extra); + + if (zc) { + io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); + } + + for (i = zc; i < s->nr + extra - zc; i++) { + struct stripe *t = &s->s[i - zc]; io_uring_prep_rw(op, sqe[i], t->seq + 1, (void *)t->vec, t->nr_vec, t->start << 9); - io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (zc) { + sqe[i]->buf_index = tag; + io_uring_sqe_set_flags(sqe[i], + IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); + } else { + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + } /* bit63 marks us as tgt io */ - sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1); + sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); } - return s->nr; + if (zc) { + struct io_uring_sqe *unreg = sqe[s->nr + 1]; + + io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag); + unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1); + } + + /* register buffer is skip_success */ + return s->nr + zc; } static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) @@ -208,19 +232,27 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag, struct ublk_io *io = ublk_get_io(q, tag); int res = cqe->res; - if (res < 0) { + if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { if (!io->result) io->result = res; - ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); + if (res < 0) + ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); } + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + /* fail short READ/WRITE simply */ if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { unsigned seq = user_data_to_tgt_data(cqe->user_data); struct stripe_array *s = io->private_data; - if (res < s->s[seq].vec->iov_len) + if (res < s->s[seq].nr_sects << 9) { io->result = -EIO; + ublk_err("%s: short rw op %u res %d exp %u tag %u\n", + __func__, op, res, s->s[seq].vec->iov_len, tag); + } } if (ublk_completed_tgt_io(q, tag)) { @@ -253,7 +285,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; - int ret, i; + int ret, i, mul = 1; if ((chunk_size & (chunk_size - 1)) || !chunk_size) { ublk_err("invalid chunk size %u\n", chunk_size); @@ -295,8 +327,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) dev->tgt.dev_size = bytes; p.basic.dev_sectors = bytes >> 9; dev->tgt.params = p; - dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files; - dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files; + + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + mul = 2; + dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; + dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); From b81ff11c21af688fc8435aad1e5c1463beff8c2d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 1 Apr 2025 11:36:01 -0400 Subject: [PATCH 2032/2157] ftrace: Have tracing function args depend on PROBE_EVENTS_BTF_ARGS The option PROBE_EVENTS_BTF_ARGS enables the functions btf_find_func_proto() and btf_get_func_param() which are used by the function argument tracing code. The option FUNCTION_TRACE_ARGS was dependent on the same configs that PROBE_EVENTS_BTF_ARGS was dependent on, but it was also dependent on PROBE_EVENTS_BTF_ARGS. In fact, if PROBE_EVENTS_BTF_ARGS is supported then FUNCTION_TRACE_ARGS is supported. Just make FUNCTION_TRACE_ARGS depend on PROBE_EVENTS_BTF_ARGS. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Link: https://lore.kernel.org/20250401113601.17fa1129@gandalf.local.home Fixes: 533c20b062d7c ("ftrace: Add print_function_args()") Closes: https://lore.kernel.org/all/DB9PR08MB75820599801BAD118D123D7D93AD2@DB9PR08MB7582.eurprd08.prod.outlook.com/ Reported-by: Christian Loehle Tested-by: Christian Loehle Tested-by: Leon Romanovsky Signed-off-by: Steven Rostedt (Google) --- kernel/trace/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 033fba0633cf..a3f35c7d83b6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -265,8 +265,7 @@ config FUNCTION_GRAPH_RETADDR config FUNCTION_TRACE_ARGS bool - depends on HAVE_FUNCTION_ARG_ACCESS_API - depends on DEBUG_INFO_BTF + depends on PROBE_EVENTS_BTF_ARGS default y help If supported with function argument access API and BTF, then From 2c9ee74a6d6166d0f31f00841cde16a2915fffcb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 1 Apr 2025 12:45:25 -0400 Subject: [PATCH 2033/2157] tracing: Free module_delta on freeing of persistent ring buffer If a persistent ring buffer is created, a "module_delta" array is also allocated to hold the module deltas of loaded modules that match modules in the scratch area. If this buffer gets freed, the module_delta array is not freed and causes a memory leak. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250401124525.1f9ac02a@gandalf.local.home Fixes: 35a380ddbc65 ("tracing: Show last module text symbols in the stacktrace") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 103b193875b3..de6d7f0e6206 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9609,6 +9609,7 @@ static void free_trace_buffers(struct trace_array *tr) return; free_trace_buffer(&tr->array_buffer); + kfree(tr->module_delta); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); From 42ea22e754ba4f2b86f8760ca27f6f71da2d982c Mon Sep 17 00:00:00 2001 From: zhoumin Date: Tue, 1 Apr 2025 01:00:34 +0800 Subject: [PATCH 2034/2157] ftrace: Add cond_resched() to ftrace_graph_set_hash() When the kernel contains a large number of functions that can be traced, the loop in ftrace_graph_set_hash() may take a lot of time to execute. This may trigger the softlockup watchdog. Add cond_resched() within the loop to allow the kernel to remain responsive even when processing a large number of functions. This matches the cond_resched() that is used in other locations of the code that iterates over all functions that can be traced. Cc: stable@vger.kernel.org Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables") Link: https://lore.kernel.org/tencent_3E06CE338692017B5809534B9C5C03DA7705@qq.com Signed-off-by: zhoumin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 92015de6203d..1a48aedb5255 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6855,6 +6855,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) } } } + cond_resched(); } while_for_each_ftrace_rec(); return fail ? -EINVAL : 0; From ea8d7647f9ddf1f81e2027ed305299797299aa03 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Mar 2025 19:53:11 -0400 Subject: [PATCH 2035/2157] tracing: Verify event formats that have "%*p.." The trace event verifier checks the formats of trace events to make sure that they do not point at memory that is not in the trace event itself or in data that will never be freed. If an event references data that was allocated when the event triggered and that same data is freed before the event is read, then the kernel can crash by reading freed memory. The verifier runs at boot up (or module load) and scans the print formats of the events and checks their arguments to make sure that dereferenced pointers are safe. If the format uses "%*p.." the verifier will ignore it, and that could be dangerous. Cover this case as well. Also add to the sample code a use case of "%*pbl". Link: https://lore.kernel.org/all/bcba4d76-2c3f-4d11-baf0-02905db953dd@oracle.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") Link: https://lore.kernel.org/20250327195311.2d89ec66@gandalf.local.home Reported-by: Libo Chen Reviewed-by: Libo Chen Tested-by: Libo Chen Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 7 +++++++ samples/trace_events/trace-events-sample.h | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8638b7f7ff85..069e92856bda 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -470,6 +470,7 @@ static void test_event_printk(struct trace_event_call *call) case '%': continue; case 'p': + do_pointer: /* Find dereferencing fields */ switch (fmt[i + 1]) { case 'B': case 'R': case 'r': @@ -498,6 +499,12 @@ static void test_event_printk(struct trace_event_call *call) continue; if (fmt[i + j] == '*') { star = true; + /* Handle %*pbl case */ + if (!j && fmt[i + 1] == 'p') { + arg++; + i++; + goto do_pointer; + } continue; } if ((fmt[i + j] == 's')) { diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 999f78d380ae..1a05fc153353 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -319,7 +319,8 @@ TRACE_EVENT(foo_bar, __assign_cpumask(cpum, cpumask_bits(mask)); ), - TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s [%d] %*pbl", + __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -370,7 +371,10 @@ TRACE_EVENT(foo_bar, __get_str(str), __get_str(lstr), __get_bitmask(cpus), __get_cpumask(cpum), - __get_str(vstr)) + __get_str(vstr), + __get_dynamic_array_len(cpus), + __get_dynamic_array_len(cpus), + __get_dynamic_array(cpus)) ); /* From 023f124a64174c47e18340ded7e2a39b96eb9523 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 2 Apr 2025 03:15:35 +0200 Subject: [PATCH 2036/2157] scripts/sorttable: Fix endianness handling in build-time mcount sort Kernel cross-compilation with BUILDTIME_MCOUNT_SORT produces zeroed mcount values if the build-host endianness does not match the ELF file endianness. The mcount values array is converted from ELF file endianness to build-host endianness during initialization in fill_relocs()/fill_addrs(). Avoid extra conversion of these values during weak-function zeroing; otherwise, they do not match nm-parsed addresses and all mcount values are zeroed out. Cc: Masami Hiramatsu Cc: Catalin Marinas Cc: Nathan Chancellor Cc: Heiko Carstens Cc: Alexander Gordeev Link: https://lore.kernel.org/patch.git-dca31444b0f1.your-ad-here.call-01743554658-ext-8692@work.hours Fixes: ef378c3b8233 ("scripts/sorttable: Zero out weak functions in mcount_loc table") Reported-by: Ilya Leoshkevich Reported-by: Ihor Solodrai Closes: https://lore.kernel.org/all/your-ad-here.call-01743522822-ext-4975@work.hours/ Signed-off-by: Vasily Gorbik Signed-off-by: Steven Rostedt (Google) --- scripts/sorttable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 7b4b3714b1af..deed676bfe38 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -857,7 +857,7 @@ static void *sort_mcount_loc(void *arg) for (void *ptr = vals; ptr < vals + size; ptr += long_size) { uint64_t key; - key = long_size == 4 ? r((uint32_t *)ptr) : r8((uint64_t *)ptr); + key = long_size == 4 ? *(uint32_t *)ptr : *(uint64_t *)ptr; if (!find_func(key)) { if (long_size == 4) *(uint32_t *)ptr = 0; From fc0585c7faa9fffa0ecdd6e2466e3293cd3239ac Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Wed, 2 Apr 2025 09:13:52 +0200 Subject: [PATCH 2037/2157] rv: Fix missing unlock on double nested monitors return path RV doesn't support nested monitors having children monitors themselves and exits with the EINVAL code. However, it returns without unlocking the rv_interface_lock. Unlock the lock before returning from the initialisation function. Cc: Masami Hiramatsu Link: https://lore.kernel.org/20250402071351.19864-2-gmonaco@redhat.com Fixes: cb85c660fcd4 ("rv: Add option for nested monitors and include sched") Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202503310200.UBXGitB4-lkp@intel.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/rv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 50344aa9f7f9..968c5c3b0246 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -809,7 +809,8 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent) if (p && rv_is_nested_monitor(p)) { pr_info("Parent monitor %s is already nested, cannot nest further\n", parent->name); - return -EINVAL; + retval = -EINVAL; + goto out_unlock; } r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL); From 8e5419d6542fdf2dca9a0acdef2b8255f0e4ba69 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Apr 2025 14:02:40 +0300 Subject: [PATCH 2038/2157] nfs: Add missing release on error in nfs_lock_and_join_requests() Call nfs_release_request() on this error path before returning. Fixes: c3f2235782c3 ("nfs: fold nfs_folio_find_and_lock_request into nfs_lock_and_join_requests") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/3aaaa3d5-1c8a-41e4-98c7-717801ddd171@stanley.mountain Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index aa3d8bea3ec0..23df8b214474 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -579,8 +579,10 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) while (!nfs_lock_request(head)) { ret = nfs_wait_on_request(head); - if (ret < 0) + if (ret < 0) { + nfs_release_request(head); return ERR_PTR(ret); + } } /* Ensure that nobody removed the request before we locked it */ From dcffc3b1ae3251d796a25c673f614e3099ca83d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 23:11:08 -0400 Subject: [PATCH 2039/2157] bcachefs: Split up bch_dev.io_ref We now have separate per device io_refs for read and write access. This fixes a device removal bug where the discard workers were still running while we're removing alloc info for that device. It's also a bit of hardening; we no longer allow writes to devices that are read-only. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 +++--- fs/bcachefs/backpointers.c | 2 +- fs/bcachefs/bcachefs.h | 4 +- fs/bcachefs/btree_io.c | 14 ++++-- fs/bcachefs/btree_node_scan.c | 8 ++-- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/chardev.c | 2 +- fs/bcachefs/debug.c | 4 +- fs/bcachefs/disk_groups.c | 4 +- fs/bcachefs/ec.c | 11 +++-- fs/bcachefs/fs-io.c | 4 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/io_read.c | 6 +-- fs/bcachefs/io_write.c | 9 +++- fs/bcachefs/journal.c | 6 +-- fs/bcachefs/journal_io.c | 8 ++-- fs/bcachefs/sb-members.h | 23 ++++----- fs/bcachefs/super-io.c | 21 ++++++--- fs/bcachefs/super.c | 85 ++++++++++++++++++++++++---------- 19 files changed, 142 insertions(+), 87 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c12ca7538e4f..1a467bb74a47 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1950,7 +1950,7 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_write_ref_put(c, BCH_WRITE_REF_discard); } @@ -1967,7 +1967,7 @@ void bch2_dev_do_discards(struct bch_dev *ca) if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_write_ref: bch2_write_ref_put(c, BCH_WRITE_REF_discard); } @@ -2045,7 +2045,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } @@ -2065,7 +2065,7 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_ref: bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } @@ -2256,7 +2256,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_bkey_buf_exit(&last_flushed, c); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } @@ -2274,7 +2274,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca) if (queue_work(c->write_ref_wq, &ca->invalidate_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_ref: bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } @@ -2506,7 +2506,7 @@ void bch2_recalc_capacity(struct bch_fs *c) bch2_set_ra_pages(c, ra_pages); - for_each_rw_member(c, ca) { + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { u64 dev_reserve = 0; /* diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 21d1d86d5008..5280dc2d1e3e 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -462,7 +462,7 @@ static int check_extent_checksum(struct btree_trans *trans, if (bio) bio_put(bio); kvfree(data_buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f52311017aee..21da4167a3ae 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -524,8 +524,8 @@ struct bch_dev { struct percpu_ref ref; #endif struct completion ref_completion; - struct percpu_ref io_ref; - struct completion io_ref_completion; + struct percpu_ref io_ref[2]; + struct completion io_ref_completion[2]; struct bch_fs *fs; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 871f3f46a0c2..17218699f65d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1353,7 +1353,7 @@ static void btree_node_read_work(struct work_struct *work) "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); rb->have_ioref = false; bch2_mark_io_failure(&failed, &rb->pick, false); @@ -1609,7 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } ra->err[rb->idx] = bio->bi_status; @@ -1928,7 +1928,7 @@ static void btree_node_scrub_work(struct work_struct *work) printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); - percpu_ref_put(&scrub->ca->io_ref); + percpu_ref_put(&scrub->ca->io_ref[READ]); kfree(scrub); bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); } @@ -1997,7 +1997,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans, return 0; err_free: btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); err: bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); return ret; @@ -2159,8 +2159,12 @@ static void btree_node_write_endio(struct bio *bio) spin_unlock_irqrestore(&c->btree_write_error_lock, flags); } + /* + * XXX: we should be using io_ref[WRITE], but we aren't retrying failed + * btree writes yet (due to device removal/ro): + */ if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); if (parent) { bio_put(bio); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 25d54b77cdc2..8c9fdb7263fe 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p) err: bio_put(bio); free_page((unsigned long) buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); closure_put(w->cl); kfree(w); return 0; @@ -291,7 +291,7 @@ static int read_btree_nodes(struct find_btree_nodes *f) struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL); if (!w) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); ret = -ENOMEM; goto err; } @@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f) struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); ret = PTR_ERR_OR_ZERO(t); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); kfree(w); bch_err_msg(c, ret, "starting kthread"); break; } closure_get(&cl); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); wake_up_process(t); } err: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 297adb996751..cd4f5de82566 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1132,7 +1132,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, for_each_online_member(c, ca) { int ret = bch2_trans_mark_dev_sb(c, ca, flags); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ret; } } diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 584f4a3eb670..c9d1585eec21 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -615,7 +615,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, for_each_online_member(c, ca) if (ca->dev == dev) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ca->dev_idx; } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 788af88f6979..5a8bc7013512 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -57,7 +57,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, submit_bio_wait(bio); bio_put(bio); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); @@ -297,7 +297,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, if (bio) bio_put(bio); kvfree(n_ondisk); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } #ifdef CONFIG_DEBUG_FS diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 5df8de0b8c02..1186280b29e9 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -555,9 +555,9 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) ? rcu_dereference(c->devs[t.dev]) : NULL; - if (ca && percpu_ref_tryget(&ca->io_ref)) { + if (ca && percpu_ref_tryget(&ca->io_ref[READ])) { prt_printf(out, "/dev/%s", ca->name); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } else if (ca) { prt_printf(out, "offline device %u", t.dev); } else { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 012386036a3e..1618272a606d 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,6 +105,7 @@ struct ec_bio { struct bch_dev *ca; struct ec_stripe_buf *buf; size_t idx; + int rw; u64 submit_time; struct bio bio; }; @@ -704,6 +705,7 @@ static void ec_block_endio(struct bio *bio) struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx]; struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; + int rw = ec_bio->rw; bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); @@ -725,7 +727,7 @@ static void ec_block_endio(struct bio *bio) } bio_put(&ec_bio->bio); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); closure_put(cl); } @@ -776,6 +778,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ec_bio->ca = ca; ec_bio->buf = buf; ec_bio->idx = idx; + ec_bio->rw = rw; ec_bio->submit_time = local_clock(); ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); @@ -785,14 +788,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b); closure_get(cl); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[rw]); submit_bio(&ec_bio->bio); offset += b; } - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); } static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, @@ -1265,7 +1268,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, ob->sectors_free, GFP_KERNEL, 0); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); if (ret) s->err = ret; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c80ed3a54e70..42709ebe4d93 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -48,7 +48,7 @@ static void nocow_flush_endio(struct bio *_bio) struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); closure_put(bio->cl); - percpu_ref_put(&bio->ca->io_ref); + percpu_ref_put(&bio->ca->io_ref[WRITE]); bio_put(&bio->bio); } @@ -71,7 +71,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { rcu_read_lock(); ca = rcu_dereference(c->devs[dev]); - if (ca && !percpu_ref_tryget(&ca->io_ref)) + if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE])) ca = NULL; rcu_read_unlock(); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index bb303791322a..217e2a7cef20 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2237,7 +2237,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) /* XXX: create an anonymous device for multi device filesystems */ sb->s_bdev = bdev; sb->s_dev = bdev->bd_dev; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); break; } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fd01e67b3e84..066670a47886 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -394,7 +394,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) if (rbio->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } if (rbio->split) { @@ -1003,7 +1003,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, unlikely(dev_ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); bch2_mark_io_failure(failed, &pick, false); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); goto retry_pick; } @@ -1036,7 +1036,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, */ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); rbio->ret = -BCH_ERR_data_read_buffer_too_small; goto out_read_done; } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 0503ac1952cd..4f6a574cf23b 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -445,6 +445,11 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, BUG_ON(c->opts.nochanges); bkey_for_each_ptr(ptrs, ptr) { + /* + * XXX: btree writes should be using io_ref[WRITE], but we + * aren't retrying failed btree writes yet (due to device + * removal/ro): + */ struct bch_dev *ca = nocow ? bch2_dev_have_ref(c, ptr->dev) : bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE); @@ -722,7 +727,7 @@ static void bch2_write_endio(struct bio *bio) } if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); if (wbio->bounce) bch2_bio_free_pages_pool(c, bio); @@ -1421,7 +1426,7 @@ static void bch2_nocow_write(struct bch_write_op *op) return; err_get_ioref: darray_for_each(buckets, i) - percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref); + percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]); /* Fall back to COW path: */ goto out; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8a36d5536668..11f104f436e3 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1315,7 +1315,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) int ret = bch2_dev_journal_alloc(ca, true); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ret; } } @@ -1461,11 +1461,9 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->reservations.idx = journal_cur_seq(j); c->last_bucket_seq_cleanup = journal_cur_seq(j); - - bch2_journal_space_available(j); spin_unlock(&j->lock); - return bch2_journal_reclaim_start(j); + return 0; } /* init/exit: */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2debc213e47c..1b7961f4f609 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) out: bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); kvfree(buf.data); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); closure_return(cl); return; err: @@ -1253,7 +1253,7 @@ int bch2_journal_read(struct bch_fs *c, if ((ca->mi.state == BCH_MEMBER_STATE_rw || ca->mi.state == BCH_MEMBER_STATE_ro) && - percpu_ref_tryget(&ca->io_ref)) + percpu_ref_tryget(&ca->io_ref[READ])) closure_call(&ca->journal.read, bch2_journal_read_device, system_unbound_wq, @@ -1768,7 +1768,7 @@ static void journal_write_endio(struct bio *bio) } closure_put(&w->io); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); } static CLOSURE_CALLBACK(journal_write_submit) @@ -1843,7 +1843,7 @@ static CLOSURE_CALLBACK(journal_write_preflush) if (w->separate_flush) { for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[WRITE]); struct journal_device *ja = &ca->journal; struct bio *bio = &ja->bio[w->idx]->bio; diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 38261638a611..06bb41a3f360 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -20,7 +20,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); static inline bool bch2_dev_is_online(struct bch_dev *ca) { - return !percpu_ref_is_zero(&ca->io_ref); + return !percpu_ref_is_zero(&ca->io_ref[READ]); } static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned); @@ -156,33 +156,34 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, - unsigned state_mask) + unsigned state_mask, + int rw) { rcu_read_lock(); if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); while ((ca = __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || - !percpu_ref_tryget(&ca->io_ref))) + !percpu_ref_tryget(&ca->io_ref[rw]))) ; rcu_read_unlock(); return ca; } -#define __for_each_online_member(_c, _ca, state_mask) \ +#define __for_each_online_member(_c, _ca, state_mask, rw) \ for (struct bch_dev *_ca = NULL; \ - (_ca = bch2_get_next_online_dev(_c, _ca, state_mask));) + (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));) #define for_each_online_member(c, ca) \ - __for_each_online_member(c, ca, ~0) + __for_each_online_member(c, ca, ~0, READ) #define for_each_rw_member(c, ca) \ - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw)) + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE) #define for_each_readable_member(c, ca) \ - __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro)) + __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ) static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev) { @@ -287,7 +288,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu(c, dev); - if (ca && !percpu_ref_tryget(&ca->io_ref)) + if (ca && !percpu_ref_tryget(&ca->io_ref[rw])) ca = NULL; rcu_read_unlock(); @@ -297,7 +298,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, return ca; if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); return NULL; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 572b06bfa0b8..e27422b6d9c6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -248,7 +248,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return NULL; } } @@ -945,7 +945,7 @@ static void write_super_endio(struct bio *bio) } closure_put(&ca->fs->sb_write); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } static void read_back_super(struct bch_fs *c, struct bch_dev *ca) @@ -963,7 +963,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -989,7 +989,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -1014,13 +1014,20 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + /* + * Note: we do writes to RO devices here, and we might want to change + * that in the future. + * + * For now, we expect to be able to call write_super() when we're not + * yet RW: + */ for_each_online_member(c, ca) { ret = darray_push(&online_devices, ca); if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); goto out; } - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); } /* Make sure we're using the new magic numbers: */ @@ -1186,7 +1193,7 @@ int bch2_write_super(struct bch_fs *c) /* Make new options visible after they're persistent: */ bch2_sb_update(c); darray_for_each(online_devices, ca) - percpu_ref_put(&(*ca)->io_ref); + percpu_ref_put(&(*ca)->io_ref[READ]); darray_exit(&online_devices); printbuf_exit(&err); return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 20208f3c5d8b..a58edde43bee 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -185,6 +185,7 @@ static void bch2_dev_unlink(struct bch_dev *); static void bch2_dev_free(struct bch_dev *); static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); +static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); struct bch_fs *bch2_dev_to_fs(dev_t dev) @@ -294,8 +295,10 @@ static void __bch2_fs_read_only(struct bch_fs *c) /* * After stopping journal: */ - for_each_member_device(c, ca) + for_each_member_device(c, ca) { + bch2_dev_io_ref_stop(ca, WRITE); bch2_dev_allocator_remove(c, ca); + } } #ifndef BCH_WRITE_REF_DEBUG @@ -465,10 +468,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) goto err; - ret = bch2_fs_mark_dirty(c); - if (ret) - goto err; - clear_bit(BCH_FS_clean_shutdown, &c->flags); /* @@ -480,10 +479,24 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - for_each_rw_member(c, ca) + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } bch2_recalc_capacity(c); + ret = bch2_fs_mark_dirty(c); + if (ret) + goto err; + + spin_lock(&c->journal.lock); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); + + ret = bch2_journal_reclaim_start(&c->journal); + if (ret) + goto err; + set_bit(BCH_FS_rw, &c->flags); set_bit(BCH_FS_was_rw, &c->flags); @@ -495,11 +508,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - - ret = bch2_journal_reclaim_start(&c->journal); - if (ret) - goto err; - if (!early) { ret = bch2_fs_read_write_late(c); if (ret) @@ -675,6 +683,7 @@ void bch2_fs_free(struct bch_fs *c) if (ca) { EBUG_ON(atomic_long_read(&ca->ref) != 1); + bch2_dev_io_ref_stop(ca, READ); bch2_free_super(&ca->disk_sb); bch2_dev_free(ca); } @@ -1199,6 +1208,15 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, /* Device startup/shutdown: */ +static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw) +{ + if (!percpu_ref_is_zero(&ca->io_ref[rw])) { + reinit_completion(&ca->io_ref_completion[rw]); + percpu_ref_kill(&ca->io_ref[rw]); + wait_for_completion(&ca->io_ref_completion[rw]); + } +} + static void bch2_dev_release(struct kobject *kobj) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); @@ -1208,6 +1226,9 @@ static void bch2_dev_release(struct kobject *kobj) static void bch2_dev_free(struct bch_dev *ca) { + WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); + WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); + cancel_work_sync(&ca->io_error_work); bch2_dev_unlink(ca); @@ -1226,7 +1247,8 @@ static void bch2_dev_free(struct bch_dev *ca) bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); - percpu_ref_exit(&ca->io_ref); + percpu_ref_exit(&ca->io_ref[WRITE]); + percpu_ref_exit(&ca->io_ref[READ]); #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_exit(&ca->ref); #endif @@ -1238,14 +1260,12 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) lockdep_assert_held(&c->state_lock); - if (percpu_ref_is_zero(&ca->io_ref)) + if (percpu_ref_is_zero(&ca->io_ref[READ])) return; __bch2_dev_read_only(c, ca); - reinit_completion(&ca->io_ref_completion); - percpu_ref_kill(&ca->io_ref); - wait_for_completion(&ca->io_ref_completion); + bch2_dev_io_ref_stop(ca, READ); bch2_dev_unlink(ca); @@ -1262,11 +1282,18 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref) } #endif -static void bch2_dev_io_ref_complete(struct percpu_ref *ref) +static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref) { - struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref); + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]); - complete(&ca->io_ref_completion); + complete(&ca->io_ref_completion[READ]); +} + +static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref) +{ + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]); + + complete(&ca->io_ref_completion[WRITE]); } static void bch2_dev_unlink(struct bch_dev *ca) @@ -1330,7 +1357,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); - init_completion(&ca->io_ref_completion); + init_completion(&ca->io_ref_completion[READ]); + init_completion(&ca->io_ref_completion[WRITE]); INIT_WORK(&ca->io_error_work, bch2_io_error_work); @@ -1356,7 +1384,9 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, bch2_dev_allocator_background_init(ca); - if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, + if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || bch2_dev_buckets_alloc(c, ca) || @@ -1419,7 +1449,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) return -BCH_ERR_device_size_too_small; } - BUG_ON(!percpu_ref_is_zero(&ca->io_ref)); + BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); + BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); ret = bch2_dev_journal_init(ca, sb->sb); if (ret) @@ -1438,7 +1469,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) ca->dev = ca->disk_sb.bdev->bd_dev; - percpu_ref_reinit(&ca->io_ref); + percpu_ref_reinit(&ca->io_ref[READ]); return 0; } @@ -1568,6 +1599,8 @@ static bool bch2_fs_may_start(struct bch_fs *c) static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { + bch2_dev_io_ref_stop(ca, WRITE); + /* * The allocator thread itself allocates btree nodes, so stop it first: */ @@ -1584,6 +1617,10 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + + if (percpu_ref_is_zero(&ca->io_ref[WRITE])) + percpu_ref_reinit(&ca->io_ref[WRITE]); + bch2_dev_do_discards(ca); } @@ -1731,7 +1768,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; err: if (ca->mi.state == BCH_MEMBER_STATE_rw && - !percpu_ref_is_zero(&ca->io_ref)) + !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); return ret; From 1c8f4587d239837b2556a76a11706c987c43909a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 21 Mar 2025 17:03:12 -0400 Subject: [PATCH 2040/2157] bcachefs: do_trace_key_cache_fill() Reducing stack frame usage; this moves the printbuf out of the main stack frame. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index edce59433375..f4d2e2cc2d53 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -287,6 +287,19 @@ static int btree_key_cache_create(struct btree_trans *trans, return ret; } +static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans, + struct btree_path *ck_path, + struct bkey_s_c k) +{ + struct printbuf buf = PRINTBUF; + + bch2_bpos_to_text(&buf, ck_path->pos); + prt_char(&buf, ' '); + bch2_bkey_val_to_text(&buf, trans->c, k); + trace_key_cache_fill(trans, buf.buf); + printbuf_exit(&buf); +} + static noinline int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, unsigned flags) @@ -320,15 +333,8 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, if (ret) goto err; - if (trace_key_cache_fill_enabled()) { - struct printbuf buf = PRINTBUF; - - bch2_bpos_to_text(&buf, ck_path->pos); - prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, trans->c, k); - trace_key_cache_fill(trans, buf.buf); - printbuf_exit(&buf); - } + if (trace_key_cache_fill_enabled()) + do_trace_key_cache_fill(trans, ck_path, k); out: /* We're not likely to need this iterator again: */ bch2_set_btree_iter_dontneed(&iter); From 9180ad2e161b7030e0af78fd2266cbcefe81e652 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 21 Mar 2025 15:18:51 -0400 Subject: [PATCH 2041/2157] bcachefs: Kill btree_iter.trans This was planned to be done ages ago, now finally completed; there are places where we have quite a few btree_trans objects on the stack, so this reduces stack usage somewhat. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 4 +- fs/bcachefs/alloc_background.c | 78 ++++++------ fs/bcachefs/alloc_foreground.c | 8 +- fs/bcachefs/backpointers.c | 10 +- fs/bcachefs/btree_gc.c | 4 +- fs/bcachefs/btree_io.c | 3 +- fs/bcachefs/btree_iter.c | 188 ++++++++++++++-------------- fs/bcachefs/btree_iter.h | 122 +++++++++--------- fs/bcachefs/btree_key_cache.c | 8 +- fs/bcachefs/btree_types.h | 1 - fs/bcachefs/btree_update.c | 26 ++-- fs/bcachefs/btree_update_interior.c | 12 +- fs/bcachefs/btree_write_buffer.c | 10 +- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/data_update.c | 8 +- fs/bcachefs/dirent.c | 16 +-- fs/bcachefs/disk_accounting.c | 4 +- fs/bcachefs/ec.c | 4 +- fs/bcachefs/extent_update.c | 6 +- fs/bcachefs/fs-io-buffered.c | 6 +- fs/bcachefs/fs-io.c | 10 +- fs/bcachefs/fs.c | 22 ++-- fs/bcachefs/fsck.c | 30 ++--- fs/bcachefs/inode.c | 18 +-- fs/bcachefs/io_misc.c | 18 +-- fs/bcachefs/io_read.c | 8 +- fs/bcachefs/io_write.c | 12 +- fs/bcachefs/migrate.c | 4 +- fs/bcachefs/move.c | 14 +-- fs/bcachefs/namei.c | 38 +++--- fs/bcachefs/quota.c | 2 +- fs/bcachefs/rebalance.c | 12 +- fs/bcachefs/recovery.c | 6 +- fs/bcachefs/reflink.c | 23 ++-- fs/bcachefs/snapshot.c | 10 +- fs/bcachefs/str_hash.c | 2 +- fs/bcachefs/str_hash.h | 8 +- fs/bcachefs/subvolume.c | 4 +- fs/bcachefs/subvolume.h | 14 +-- fs/bcachefs/tests.c | 30 ++--- fs/bcachefs/xattr.c | 2 +- 41 files changed, 405 insertions(+), 402 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 99487727ae64..d03adc36100e 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -273,7 +273,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct posix_acl *acl = NULL; if (rcu) @@ -344,7 +344,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl; umode_t mode; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1a467bb74a47..6b6c2521c11f 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -610,7 +610,7 @@ int bch2_alloc_read(struct bch_fs *c) * bch2_check_alloc_key() which runs later: */ if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } @@ -631,17 +631,17 @@ int bch2_alloc_read(struct bch_fs *c) * bch2_check_alloc_key() which runs later: */ if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } if (k.k->p.offset < ca->mi.first_bucket) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket)); continue; } if (k.k->p.offset >= ca->mi.nbuckets) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } @@ -1039,9 +1039,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for * extents style btrees, but works on non-extents btrees: */ -static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) +static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, struct bkey *hole) { - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) return k; @@ -1052,9 +1053,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos struct btree_iter iter2; struct bpos next; - bch2_trans_copy_iter(&iter2, iter); + bch2_trans_copy_iter(trans, &iter2, iter); - struct btree_path *path = btree_iter_path(iter->trans, iter); + struct btree_path *path = btree_iter_path(trans, iter); if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); @@ -1064,9 +1065,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos * btree node min/max is a closed interval, upto takes a half * open interval: */ - k = bch2_btree_iter_peek_max(&iter2, end); + k = bch2_btree_iter_peek_max(trans, &iter2, end); next = iter2.pos; - bch2_trans_iter_exit(iter->trans, &iter2); + bch2_trans_iter_exit(trans, &iter2); BUG_ON(next.offset >= iter->pos.offset + U32_MAX); @@ -1107,13 +1108,14 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck return *ca != NULL; } -static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, - struct bch_dev **ca, struct bkey *hole) +static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_dev **ca, struct bkey *hole) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct bkey_s_c k; again: - k = bch2_get_key_or_hole(iter, POS_MAX, hole); + k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole); if (bkey_err(k)) return k; @@ -1126,7 +1128,7 @@ static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, if (!next_bucket(c, ca, &hole_start)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, hole_start); + bch2_btree_iter_set_pos(trans, iter, hole_start); goto again; } @@ -1167,8 +1169,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); - bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); - k = bch2_btree_iter_peek_slot(discard_iter); + bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p); + k = bch2_btree_iter_peek_slot(trans, discard_iter); ret = bkey_err(k); if (ret) goto err; @@ -1181,8 +1183,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, goto err; } - bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); - k = bch2_btree_iter_peek_slot(freespace_iter); + bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); + k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) goto err; @@ -1195,8 +1197,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, goto err; } - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); - k = bch2_btree_iter_peek_slot(bucket_gens_iter); + bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); + k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) goto err; @@ -1249,9 +1251,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, if (!ca->mi.freespace_initialized) return 0; - bch2_btree_iter_set_pos(freespace_iter, start); + bch2_btree_iter_set_pos(trans, freespace_iter, start); - k = bch2_btree_iter_peek_slot(freespace_iter); + k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) goto err; @@ -1300,9 +1302,9 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, unsigned i, gens_offset, gens_end_offset; int ret; - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); + bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); - k = bch2_btree_iter_peek_slot(bucket_gens_iter); + k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) goto err; @@ -1435,7 +1437,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite *gen = a->gen; out: fsck_err: - bch2_set_btree_iter_dontneed(&alloc_iter); + bch2_set_btree_iter_dontneed(trans, &alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter); printbuf_exit(&buf); return ret; @@ -1572,7 +1574,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_begin(trans); - k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); + k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole); ret = bkey_err(k); if (ret) goto bkey_err; @@ -1610,7 +1612,7 @@ int bch2_check_alloc_info(struct bch_fs *c) if (ret) goto bkey_err; - bch2_btree_iter_set_pos(&iter, next); + bch2_btree_iter_set_pos(trans, &iter, next); bkey_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -1638,7 +1640,7 @@ int bch2_check_alloc_info(struct bch_fs *c) BTREE_ITER_prefetch); while (1) { bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); if (!k.k) break; @@ -1657,7 +1659,7 @@ int bch2_check_alloc_info(struct bch_fs *c) break; } - bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); + bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos)); } bch2_trans_iter_exit(trans, &iter); if (ret) @@ -1685,7 +1687,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret; - alloc_k = bch2_btree_iter_peek(alloc_iter); + alloc_k = bch2_btree_iter_peek(trans, alloc_iter); if (!alloc_k.k) return 0; @@ -1826,7 +1828,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bpos pos = need_discard_iter->pos; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; struct printbuf buf = PRINTBUF; @@ -2199,9 +2201,9 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter { struct bkey_s_c k; again: - k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); if (!k.k && !*wrapped) { - bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0)); *wrapped = true; goto again; } @@ -2251,7 +2253,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) if (ret) break; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } bch2_trans_iter_exit(trans, &iter); err: @@ -2321,7 +2323,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, break; } - k = bch2_get_key_or_hole(&iter, end, &hole); + k = bch2_get_key_or_hole(trans, &iter, end, &hole); ret = bkey_err(k); if (ret) goto bkey_err; @@ -2340,7 +2342,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, if (ret) goto bkey_err; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } else { struct bkey_i *freespace; @@ -2360,7 +2362,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, if (ret) goto bkey_err; - bch2_btree_iter_set_pos(&iter, k.k->p); + bch2_btree_iter_set_pos(trans, &iter, k.k->p); } bkey_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1a25a8a4ae09..d188bb531e2b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -327,7 +327,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, bucket = sector_to_bucket(ca, round_up(bucket_to_sector(ca, bucket) + 1, 1ULL << ca->mi.btree_bitmap_shift)); - bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, bucket)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, bucket)); s->buckets_seen++; s->skipped_mi_btree_bitmap++; continue; @@ -355,7 +355,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, watermark, s, cl) : NULL; next: - bch2_set_btree_iter_dontneed(&citer); + bch2_set_btree_iter_dontneed(trans, &citer); bch2_trans_iter_exit(trans, &citer); if (ob) break; @@ -417,7 +417,7 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, 1ULL << ca->mi.btree_bitmap_shift)); alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); - bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, alloc_cursor)); s->skipped_mi_btree_bitmap++; goto next; } @@ -426,7 +426,7 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); break; } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 5280dc2d1e3e..dc1cd8de18ac 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -252,7 +252,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 0, bp.v->level, iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) { bch2_trans_iter_exit(trans, iter); return k; @@ -293,7 +293,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 0, bp.v->level - 1, 0); - struct btree *b = bch2_btree_iter_peek_node(iter); + struct btree *b = bch2_btree_iter_peek_node(trans, iter); if (IS_ERR_OR_NULL(b)) goto err; @@ -321,7 +321,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st return 0; struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; + struct btree_iter alloc_iter = {}; struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret = 0; @@ -650,7 +650,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, retry: bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(trans, &iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; @@ -934,7 +934,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2025d408979c..7b98ba2dec64 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -691,7 +691,7 @@ static int bch2_gc_btree(struct btree_trans *trans, struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, bch2_btree_id_root(c, btree)->b->c.level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err_root; @@ -1199,7 +1199,7 @@ int bch2_gc_gens(struct bch_fs *c) BCH_TRANS_COMMIT_no_enospc, ({ ca = bch2_dev_iterate(c, ca, k.k->p.inode); if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } bch2_alloc_write_oldest_gen(trans, ca, &iter, k); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 17218699f65d..ac1f029a7eb2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1909,7 +1909,8 @@ static void btree_node_scrub_work(struct work_struct *work) scrub->key.k->k.p, 0, scrub->level - 1, 0); struct btree *b; - int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter))); + int ret = lockrestart_do(trans, + PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter))); if (ret) goto err; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a9c110b846b5..e34e9598ef25 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -244,10 +244,8 @@ void bch2_trans_verify_paths(struct btree_trans *trans) bch2_btree_path_verify(trans, path); } -static void bch2_btree_iter_verify(struct btree_iter *iter) +static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached); BUG_ON((iter->flags & BTREE_ITER_is_extents) && @@ -276,9 +274,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) bkey_gt(iter->pos, iter->k.p))); } -static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) +static int bch2_btree_iter_verify_ret(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_s_c k) { - struct btree_trans *trans = iter->trans; struct btree_iter copy; struct bkey_s_c prev; int ret = 0; @@ -299,7 +297,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, BTREE_ITER_nopreserve| BTREE_ITER_all_snapshots); - prev = bch2_btree_iter_prev(©); + prev = bch2_btree_iter_prev(trans, ©); if (!prev.k) goto out; @@ -365,9 +363,11 @@ static inline void bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_path *path, unsigned l) {} static inline void bch2_btree_path_verify(struct btree_trans *trans, struct btree_path *path) {} -static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} +static inline void bch2_btree_iter_verify(struct btree_trans *trans, + struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} -static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } +static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) { return 0; } #endif @@ -1855,10 +1855,8 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * return (struct bkey_s_c) { u, NULL }; } -void bch2_set_btree_iter_dontneed(struct btree_iter *iter) +void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - if (!iter->path || trans->restarted) return; @@ -1870,17 +1868,14 @@ void bch2_set_btree_iter_dontneed(struct btree_iter *iter) /* Btree iterators: */ int __must_check -__bch2_btree_iter_traverse(struct btree_iter *iter) +__bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) { - return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + return bch2_btree_path_traverse(trans, iter->path, iter->flags); } int __must_check -bch2_btree_iter_traverse(struct btree_iter *iter) +bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - int ret; - bch2_trans_verify_not_unlocked_or_in_restart(trans); iter->path = bch2_btree_path_set_pos(trans, iter->path, @@ -1888,7 +1883,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) iter->flags & BTREE_ITER_intent, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) return ret; @@ -1900,14 +1895,14 @@ bch2_btree_iter_traverse(struct btree_iter *iter) /* Iterate across nodes (leaf and interior nodes) */ -struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) +struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans, + struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct btree *b = NULL; int ret; EBUG_ON(trans->paths[iter->path].cached); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) @@ -1929,7 +1924,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); out: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return b; err: @@ -1938,26 +1933,26 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) } /* Only kept for -tools */ -struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *trans, + struct btree_iter *iter) { struct btree *b; - while (b = bch2_btree_iter_peek_node(iter), + while (b = bch2_btree_iter_peek_node(trans, iter), bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) - bch2_trans_begin(iter->trans); + bch2_trans_begin(trans); return b; } -struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) +struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct btree *b = NULL; int ret; EBUG_ON(trans->paths[iter->path].cached); bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) @@ -2024,7 +2019,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) EBUG_ON(btree_iter_path(trans, iter)->uptodate); out: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return b; err: @@ -2034,7 +2029,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) /* Iterate across keys (in leaf nodes only) */ -inline bool bch2_btree_iter_advance(struct btree_iter *iter) +inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter *iter) { struct bpos pos = iter->k.p; bool ret = !(iter->flags & BTREE_ITER_all_snapshots @@ -2043,11 +2038,11 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter) if (ret && !(iter->flags & BTREE_ITER_is_extents)) pos = bkey_successor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); return ret; } -inline bool bch2_btree_iter_rewind(struct btree_iter *iter) +inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter *iter) { struct bpos pos = bkey_start_pos(&iter->k); bool ret = !(iter->flags & BTREE_ITER_all_snapshots @@ -2056,7 +2051,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) if (ret && !(iter->flags & BTREE_ITER_is_extents)) pos = bkey_predecessor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); return ret; } @@ -2183,9 +2178,9 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, * bkey_s_c_null: */ static noinline -struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct bkey u; struct bkey_s_c k; @@ -2231,14 +2226,14 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return k; } -static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) +static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key) { - struct btree_trans *trans = iter->trans; struct bkey_s_c k, k2; int ret; EBUG_ON(btree_iter_path(trans, iter)->cached); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); while (1) { iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, @@ -2248,7 +2243,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); break; } @@ -2258,7 +2253,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(!l->b)) { /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } @@ -2269,10 +2264,10 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { k = k2; if (bkey_err(k)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); break; } } @@ -2305,27 +2300,28 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp search_key = bpos_successor(l->b->key.k.p); } else { /* End of btree: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } } - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; } /** * bch2_btree_iter_peek_max() - returns first key greater than or equal to * iterator's current position + * @trans: btree transaction object * @iter: iterator to peek from * @end: search limit: returns keys less than or equal to @end * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) +struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end) { - struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_s_c k; struct bpos iter_pos = iter->pos; @@ -2348,7 +2344,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en } while (1) { - k = __bch2_btree_iter_peek(iter, search_key); + k = __bch2_btree_iter_peek(trans, iter, search_key); if (unlikely(!k.k)) goto end; if (unlikely(bkey_err(k))) @@ -2462,9 +2458,9 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en if (!(iter->flags & BTREE_ITER_all_snapshots)) iter->pos.snapshot = iter->snapshot; - ret = bch2_btree_iter_verify_ret(iter, k); + ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); } @@ -2472,7 +2468,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en return k; end: - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); k = bkey_s_c_null; goto out_no_locked; } @@ -2480,24 +2476,25 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en /** * bch2_btree_iter_next() - returns first key greater than iterator's current * position + * @trans: btree transaction object * @iter: iterator to peek from * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_next(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_advance(iter)) + if (!bch2_btree_iter_advance(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek(iter); + return bch2_btree_iter_peek(trans, iter); } -static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) +static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key) { - struct btree_trans *trans = iter->trans; struct bkey_s_c k, k2; - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); while (1) { iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, @@ -2507,7 +2504,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); break; } @@ -2517,7 +2514,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru if (unlikely(!l->b)) { /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } @@ -2533,10 +2530,10 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { k = k2; if (bkey_err(k2)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); break; } } @@ -2557,25 +2554,27 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru search_key = bpos_predecessor(path->l[0].b->data->min_key); } else { /* Start of btree: */ - bch2_btree_iter_set_pos(iter, POS_MIN); + bch2_btree_iter_set_pos(trans, iter, POS_MIN); k = bkey_s_c_null; break; } } - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; } /** * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to * iterator's current position + * @trans: btree transaction object * @iter: iterator to peek from * @end: search limit: returns keys greater than or equal to @end * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) +struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && !bkey_eq(iter->pos, POS_MAX)) { @@ -2587,7 +2586,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp * real visible extents - easiest to just use peek_slot() (which * internally uses peek() for extents) */ - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) return k; @@ -2597,7 +2596,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp return k; } - struct btree_trans *trans = iter->trans; struct bpos search_key = iter->pos; struct bkey_s_c k; btree_path_idx_t saved_path = 0; @@ -2613,7 +2611,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp } while (1) { - k = __bch2_btree_iter_peek_prev(iter, search_key); + k = __bch2_btree_iter_peek_prev(trans, iter, search_key); if (unlikely(!k.k)) goto end; if (unlikely(bkey_err(k))) @@ -2704,10 +2702,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent); bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; end: - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); k = bkey_s_c_null; goto out_no_locked; } @@ -2715,27 +2713,27 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp /** * bch2_btree_iter_prev() - returns first key less than iterator's current * position + * @trans: btree transaction object * @iter: iterator to peek from * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_rewind(iter)) + if (!bch2_btree_iter_rewind(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_prev(iter); + return bch2_btree_iter_peek_prev(trans, iter); } -struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct bpos search_key; struct bkey_s_c k; int ret; bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); @@ -2751,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->pos.inode == KEY_INODE_MAX) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); + bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } search_key = btree_iter_search_key(iter); @@ -2785,7 +2783,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) goto out; if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && - (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { + (k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) { if (!bkey_err(k)) iter->k = *k.k; /* We're not returning a key from iter->path: */ @@ -2812,8 +2810,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->flags & BTREE_ITER_intent) { struct btree_iter iter2; - bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek_max(&iter2, end); + bch2_trans_copy_iter(trans, &iter2, iter); + k = bch2_btree_iter_peek_max(trans, &iter2, end); if (k.k && !bkey_err(k)) { swap(iter->key_cache_path, iter2.key_cache_path); @@ -2824,9 +2822,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } else { struct bpos pos = iter->pos; - k = bch2_btree_iter_peek_max(iter, end); + k = bch2_btree_iter_peek_max(trans, iter, end); if (unlikely(bkey_err(k))) - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); else iter->pos = pos; } @@ -2857,39 +2855,39 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); out_no_locked: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); - ret = bch2_btree_iter_verify_ret(iter, k); + bch2_btree_iter_verify(trans, iter); + ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) return bkey_s_c_err(ret); return k; } -struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_advance(iter)) + if (!bch2_btree_iter_advance(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } -struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_rewind(iter)) + if (!bch2_btree_iter_rewind(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } /* Obsolete, but still used by rust wrapper in -tools */ -struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *trans, struct btree_iter *iter) { struct bkey_s_c k; - while (btree_trans_too_many_iters(iter->trans) || - (k = bch2_btree_iter_peek_type(iter, iter->flags), + while (btree_trans_too_many_iters(trans) || + (k = bch2_btree_iter_peek_type(trans, iter, iter->flags), bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) - bch2_trans_begin(iter->trans); + bch2_trans_begin(trans); return k; } @@ -3035,7 +3033,6 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) iter->path = 0; iter->update_path = 0; iter->key_cache_path = 0; - iter->trans = NULL; } void bch2_trans_iter_init_outlined(struct btree_trans *trans, @@ -3075,10 +3072,9 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, BUG_ON(iter->min_depth != depth); } -void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) +void bch2_trans_copy_iter(struct btree_trans *trans, + struct btree_iter *dst, struct btree_iter *src) { - struct btree_trans *trans = src->trans; - *dst = *src; #ifdef TRACK_PATH_ALLOCATED dst->ip_allocated = _RET_IP_; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index e6f51a3b8187..9d2cccf5d21a 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -393,36 +393,37 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct void bch2_trans_node_drop(struct btree_trans *trans, struct btree *); void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); -int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); -int __must_check bch2_btree_iter_traverse(struct btree_iter *); +int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); +int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); -struct btree *bch2_btree_iter_peek_node(struct btree_iter *); -struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); -struct btree *bch2_btree_iter_next_node(struct btree_iter *); +struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *); +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *); +struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); -struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos); +struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *); -static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans, + struct btree_iter *iter) { - return bch2_btree_iter_peek_max(iter, SPOS_MAX); + return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX); } -struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); +struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos); -static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) +static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter) { - return bch2_btree_iter_peek_prev_min(iter, POS_MIN); + return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN); } -struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *); +struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *); +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *); -bool bch2_btree_iter_advance(struct btree_iter *); -bool bch2_btree_iter_rewind(struct btree_iter *); +bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *); +bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *); static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { @@ -433,10 +434,9 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo iter->k.size = 0; } -static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) +static inline void bch2_btree_iter_set_pos(struct btree_trans *trans, + struct btree_iter *iter, struct bpos new_pos) { - struct btree_trans *trans = iter->trans; - if (unlikely(iter->update_path)) bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_intent); @@ -454,13 +454,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } -static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot) +static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans, + struct btree_iter *iter, u32 snapshot) { struct bpos pos = iter->pos; iter->snapshot = snapshot; pos.snapshot = snapshot; - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); } void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); @@ -502,7 +503,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, unsigned flags, unsigned long ip) { - iter->trans = trans; iter->update_path = 0; iter->key_cache_path = 0; iter->btree_id = btree_id; @@ -539,9 +539,9 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, enum btree_id, struct bpos, unsigned, unsigned, unsigned); -void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); +void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *); -void bch2_set_btree_iter_dontneed(struct btree_iter *); +void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); void *__bch2_trans_kmalloc(struct btree_trans *, size_t); @@ -588,7 +588,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, struct bkey_s_c k; bch2_trans_iter_init(trans, iter, btree_id, pos, flags); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(trans, iter); if (!bkey_err(k) && type && k.k->type != type) k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); @@ -658,14 +658,14 @@ u32 bch2_trans_begin(struct btree_trans *); int _ret3 = 0; \ do { \ _ret3 = lockrestart_do((_trans), ({ \ - struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\ if (!_b) \ break; \ \ PTR_ERR_OR_ZERO(_b) ?: (_do); \ })) ?: \ lockrestart_do((_trans), \ - PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\ } while (!_ret3); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ @@ -677,31 +677,34 @@ u32 bch2_trans_begin(struct btree_trans *); __for_each_btree_node(_trans, _iter, _btree_id, _start, \ 0, 0, _flags, _b, _do) -static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, +static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : - bch2_btree_iter_peek_prev(iter); + return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : + bch2_btree_iter_peek_prev(trans, iter); } -static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, +static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : - bch2_btree_iter_peek(iter); + return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : + bch2_btree_iter_peek(trans, iter); } -static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, - struct bpos end, - unsigned flags) +static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos end, + unsigned flags) { if (!(flags & BTREE_ITER_slots)) - return bch2_btree_iter_peek_max(iter, end); + return bch2_btree_iter_peek_max(trans, iter, end); if (bkey_gt(iter->pos, end)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } int __bch2_btree_trans_too_many_iters(struct btree_trans *); @@ -768,14 +771,14 @@ transaction_restart: \ \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \ _end, (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ @@ -813,14 +816,14 @@ transaction_restart: \ \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_prev_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \ (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ @@ -850,37 +853,38 @@ transaction_restart: \ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) -struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *, + struct btree_iter *); #define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\ !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) + bch2_btree_iter_advance(_trans, &(_iter))) -#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\ +#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\ for (; \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \ !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) + bch2_btree_iter_advance(_trans, &(_iter))) #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ SPOS_MAX, _flags, _k, _ret) -#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ - _start, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - (_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_rewind(&(_iter))) +#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_rewind(_trans, &(_iter))) -#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ - for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) +#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \ + for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret) /* * This should not be used in a fastpath, without first trying _do in diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f4d2e2cc2d53..2b186584a291 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -319,7 +319,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, BTREE_ITER_key_cache_fill| BTREE_ITER_cached_nofill); iter.flags &= ~BTREE_ITER_with_journal; - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -337,7 +337,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, do_trace_key_cache_fill(trans, ck_path, k); out: /* We're not likely to need this iterator again: */ - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -418,7 +418,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_ITER_intent); b_iter.flags &= ~BTREE_ITER_with_key_cache; - ret = bch2_btree_iter_traverse(&c_iter); + ret = bch2_btree_iter_traverse(trans, &c_iter); if (ret) goto out; @@ -450,7 +450,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, !test_bit(JOURNAL_space_low, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; - struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); + struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter); ret = bkey_err(btree_k); if (ret) goto err; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 77578da2d23f..023c472dc9ee 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -367,7 +367,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path) * @nodes_intent_locked - bitmask indicating which locks are intent locks */ struct btree_iter { - struct btree_trans *trans; btree_path_idx_t path; btree_path_idx_t update_path; btree_path_idx_t key_cache_path; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index c05394f56424..1e6b7836cc01 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -126,7 +126,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, struct bpos new_pos) { struct bch_fs *c = trans->c; - struct btree_iter old_iter, new_iter = { NULL }; + struct btree_iter old_iter, new_iter = {}; struct bkey_s_c old_k, new_k; snapshot_id_list s; struct bkey_i *update; @@ -140,7 +140,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, bch2_trans_iter_init(trans, &old_iter, id, old_pos, BTREE_ITER_not_extents| BTREE_ITER_all_snapshots); - while ((old_k = bch2_btree_iter_prev(&old_iter)).k && + while ((old_k = bch2_btree_iter_prev(trans, &old_iter)).k && !(ret = bkey_err(old_k)) && bkey_eq(old_pos, old_k.k->p)) { struct bpos whiteout_pos = @@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, BTREE_ITER_intent| BTREE_ITER_with_updates| BTREE_ITER_not_extents); - k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -322,8 +322,8 @@ static int bch2_trans_update_extent(struct btree_trans *trans, if (done) goto out; next: - bch2_btree_iter_advance(&iter); - k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + bch2_btree_iter_advance(trans, &iter); + k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -592,13 +592,13 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, enum btree_id btree, struct bpos end) { bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); + struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter); int ret = bkey_err(k); if (ret) goto err; - bch2_btree_iter_advance(iter); - k = bch2_btree_iter_peek_slot(iter); + bch2_btree_iter_advance(trans, iter); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) goto err; @@ -634,7 +634,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans, BTREE_ITER_cached| BTREE_ITER_not_extents| BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, k, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -646,7 +646,7 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, struct btree_iter iter; bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), BTREE_ITER_intent|flags); - int ret = bch2_btree_iter_traverse(&iter) ?: + int ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, k, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -695,7 +695,7 @@ int bch2_btree_delete(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_cached| BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, update_flags); bch2_trans_iter_exit(trans, &iter); @@ -713,7 +713,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, int ret = 0; bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); - while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { + while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { struct disk_reservation disk_res = bch2_disk_reservation_init(trans->c, 0); struct bkey_i delete; @@ -808,7 +808,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, struct btree_iter iter; bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(&iter) ?: + int ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_bit_mod_iter(trans, &iter, set); bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index bf7e1dac7f46..55fbeeb8eaaa 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2147,7 +2147,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p, BTREE_MAX_DEPTH, b->c.level, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(iter); + int ret = bch2_btree_iter_traverse(trans, iter); if (ret) goto err; @@ -2239,7 +2239,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, BTREE_MAX_DEPTH, level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto out; @@ -2262,7 +2262,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, /* Traverse one depth lower to get a pointer to the node itself: */ struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; @@ -2406,7 +2406,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter iter2 = { NULL }; + struct btree_iter iter2 = {}; struct btree *parent; int ret; @@ -2430,7 +2430,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, parent = btree_node_parent(btree_iter_path(trans, iter), b); if (parent) { - bch2_trans_copy_iter(&iter2, iter); + bch2_trans_copy_iter(trans, &iter2, iter); iter2.path = bch2_btree_path_make_mut(trans, iter2.path, iter2.flags & BTREE_ITER_intent, @@ -2444,7 +2444,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, trans->paths_sorted = false; - ret = bch2_btree_iter_traverse(&iter2) ?: + ret = bch2_btree_iter_traverse(trans, &iter2) ?: bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun); if (ret) goto err; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 2c09d19dd621..adbe576ec77e 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -144,7 +144,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -208,7 +208,7 @@ btree_write_buffered_insert(struct btree_trans *trans, trans->journal_res.seq = wb->journal_seq; - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &wb->k, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter); @@ -285,7 +285,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0; bool write_locked = false; bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); @@ -368,7 +368,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) write_locked = false; ret = lockrestart_do(trans, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_foreground_maybe_merge(trans, iter.path, 0, BCH_WATERMARK_reclaim| BCH_TRANS_COMMIT_journal_reclaim| @@ -385,7 +385,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) BTREE_ITER_intent|BTREE_ITER_all_snapshots); } - bch2_btree_iter_set_pos(&iter, k->k.k.p); + bch2_btree_iter_set_pos(trans, &iter, k->k.k.p); btree_iter_path(trans, &iter)->preserve = false; bool accounting_accumulated = false; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index cd4f5de82566..a1fc462ea0de 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -365,7 +365,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, BTREE_ITER_intent|BTREE_ITER_all_snapshots); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node| BTREE_TRIGGER_norun); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index fe400dfc5d76..de02ebf847ec 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -216,7 +216,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, bch2_trans_begin(trans); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -398,7 +398,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, BCH_TRANS_COMMIT_no_enospc| m->data_opts.btree_insert_flags); if (!ret) { - bch2_btree_iter_set_pos(&iter, next_pos); + bch2_btree_iter_set_pos(trans, &iter, next_pos); this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size); if (trace_io_move_finish_enabled()) @@ -426,7 +426,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, count_event(c, io_move_fail); - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); goto next; } out: @@ -497,7 +497,7 @@ static int bch2_update_unwritten_extent(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos, BTREE_ITER_slots); ret = lockrestart_do(trans, ({ - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); bkey_err(k); })); bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d7f9f79318a2..bf53a029f356 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -417,8 +417,8 @@ int bch2_dirent_rename(struct btree_trans *trans, enum bch_rename_mode mode) { struct qstr src_name_lookup, dst_name_lookup; - struct btree_iter src_iter = { NULL }; - struct btree_iter dst_iter = { NULL }; + struct btree_iter src_iter = {}; + struct btree_iter dst_iter = {}; struct bkey_s_c old_src, old_dst = bkey_s_c_null; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos dst_pos = @@ -586,16 +586,16 @@ int bch2_dirent_rename(struct btree_trans *trans, } if (delete_src) { - bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); - ret = bch2_btree_iter_traverse(&src_iter) ?: + bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &src_iter) ?: bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node); if (ret) goto out; } if (delete_dst) { - bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot); - ret = bch2_btree_iter_traverse(&dst_iter) ?: + bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &dst_iter) ?: bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node); if (ret) goto out; @@ -642,7 +642,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct qstr *name, subvol_inum *inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; int ret = lockrestart_do(trans, bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); @@ -771,7 +771,7 @@ int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_hash_delete_at(trans, bch2_dirent_hash_desc, &dir_hash_info, &iter, BTREE_UPDATE_internal_snapshot_node); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index a59f6c12529b..b007319b72e9 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -739,7 +739,7 @@ int bch2_accounting_read(struct bch_fs *c) struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); continue; } @@ -930,7 +930,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); continue; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 1618272a606d..a396865e8b17 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1840,7 +1840,7 @@ static int __get_existing_stripe(struct btree_trans *trans, ret = 1; } out: - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1953,7 +1953,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st if (bkey_gt(k.k->p, POS(0, U32_MAX))) { if (start_pos.offset) { start_pos = min_pos; - bch2_btree_iter_set_pos(&iter, start_pos); + bch2_btree_iter_set_pos(trans, &iter, start_pos); continue; } diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 6aac579a692a..6bb42985306e 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -112,7 +112,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, unsigned nr_iters = 0; int ret; - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -126,9 +126,9 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret < 0) return ret; - bch2_trans_copy_iter(©, iter); + bch2_trans_copy_iter(trans, ©, iter); - for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) { + for_each_btree_key_max_continue_norestart(trans, copy, insert->k.p, 0, k, ret) { unsigned offset = 0; if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index a03e2c780cba..19d4599918dc 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -183,12 +183,12 @@ static void bchfs_read(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, rbio->bio.bi_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 42709ebe4d93..65c2c33d253d 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -636,9 +636,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if (ret) goto bkey_err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); if ((ret = bkey_err(k))) goto bkey_err; @@ -649,13 +649,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, /* already reserved */ if (bkey_extent_is_reservation(k) && bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } if (bkey_extent_is_data(k.k) && !(mode & FALLOC_FL_ZERO_RANGE)) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } @@ -676,7 +676,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if (ret) goto bkey_err; } - bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); + bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start)); if (ret) goto bkey_err; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 217e2a7cef20..5796844fbaa0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -88,7 +88,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, void *p, unsigned fields) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; retry: @@ -1075,7 +1075,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans *trans; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; kuid_t kuid; @@ -1330,9 +1330,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (ret) continue; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_max(&iter, end); + k = bch2_btree_iter_peek_max(trans, &iter, end); ret = bkey_err(k); if (ret) continue; @@ -1342,7 +1342,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } @@ -1380,7 +1380,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_copy(prev.k, cur.k); have_extent = true; - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, iter.pos.offset + sectors)); } bch2_trans_iter_exit(trans, &iter); @@ -1697,17 +1697,17 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter1, snapshot); - bch2_btree_iter_set_snapshot(&iter2, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter1, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter2, snapshot); ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); if (ret) goto err; if (inode_u.bi_dir == dir->ei_inode.bi_inum) { - bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); + bch2_btree_iter_set_pos(trans, &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); - k = bch2_btree_iter_peek_slot(&iter1); + k = bch2_btree_iter_peek_slot(trans, &iter1); ret = bkey_err(k); if (ret) goto err; @@ -1731,7 +1731,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child * File with multiple hardlinks and our backref is to the wrong * directory - linear search: */ - for_each_btree_key_continue_norestart(iter2, 0, k, ret) { + for_each_btree_key_continue_norestart(trans, iter2, 0, k, ret) { if (k.k->p.inode > dir->ei_inode.bi_inum) break; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 52320295dcf6..18308f3d64a1 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -186,7 +186,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, { struct bch_fs *c = trans->c; struct qstr lostfound_str = QSTR("lost+found"); - struct btree_iter lostfound_iter = { NULL }; + struct btree_iter lostfound_iter = {}; u64 inum = 0; unsigned d_type = 0; int ret; @@ -295,8 +295,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, if (ret) goto err; - bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); - ret = bch2_btree_iter_traverse(&lostfound_iter); + bch2_btree_iter_set_snapshot(trans, &lostfound_iter, snapshot); + ret = bch2_btree_iter_traverse(trans, &lostfound_iter); if (ret) goto err; @@ -544,7 +544,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub new_inode.bi_subvol = subvolid; int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: - bch2_btree_iter_traverse(&inode_iter) ?: + bch2_btree_iter_traverse(trans, &inode_iter) ?: bch2_inode_write(trans, &inode_iter, &new_inode); bch2_trans_iter_exit(trans, &inode_iter); if (ret) @@ -609,7 +609,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 struct btree_iter iter = {}; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); - struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); + struct bkey_s_c k = bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum, 0)); bch2_trans_iter_exit(trans, &iter); int ret = bkey_err(k); if (ret) @@ -1557,7 +1557,7 @@ static int overlapping_extents_found(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - struct btree_iter iter1, iter2 = { NULL }; + struct btree_iter iter1, iter2 = {}; struct bkey_s_c k1, k2; int ret; @@ -1566,7 +1566,7 @@ static int overlapping_extents_found(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter1, btree, pos1, BTREE_ITER_all_snapshots| BTREE_ITER_not_extents); - k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); + k1 = bch2_btree_iter_peek_max(trans, &iter1, POS(pos1.inode, U64_MAX)); ret = bkey_err(k1); if (ret) goto err; @@ -1586,12 +1586,12 @@ static int overlapping_extents_found(struct btree_trans *trans, goto err; } - bch2_trans_copy_iter(&iter2, &iter1); + bch2_trans_copy_iter(trans, &iter2, &iter1); while (1) { - bch2_btree_iter_advance(&iter2); + bch2_btree_iter_advance(trans, &iter2); - k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); + k2 = bch2_btree_iter_peek_max(trans, &iter2, POS(pos1.inode, U64_MAX)); ret = bkey_err(k2); if (ret) goto err; @@ -1791,9 +1791,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct btree_iter iter2; - bch2_trans_copy_iter(&iter2, iter); - bch2_btree_iter_set_snapshot(&iter2, i->snapshot); - ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_trans_copy_iter(trans, &iter2, iter); + bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot); + ret = bch2_btree_iter_traverse(trans, &iter2) ?: bch2_btree_delete_at(trans, &iter2, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter2); @@ -2185,7 +2185,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, BTREE_ID_dirents, SPOS(k.k->p.inode, k.k->p.offset, *i), BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&delete_iter) ?: + ret = bch2_btree_iter_traverse(trans, &delete_iter) ?: bch2_hash_delete_at(trans, bch2_dirent_hash_desc, hash_info, &delete_iter, @@ -2412,7 +2412,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_iter_exit(trans, &parent_iter); bch2_trans_iter_init(trans, &parent_iter, BTREE_ID_subvolumes, POS(0, parent), 0); - k = bch2_btree_iter_peek_slot(&parent_iter); + k = bch2_btree_iter_peek_slot(trans, &parent_iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 80051073f613..b51d98cf8a80 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -940,7 +940,7 @@ int bch2_inode_create(struct btree_trans *trans, BTREE_ITER_intent); struct bkey_s_c k; again: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(trans, iter)).k && !(ret = bkey_err(k)) && bkey_lt(k.k->p, POS(0, max))) { if (pos < iter->pos.offset) @@ -951,7 +951,7 @@ int bch2_inode_create(struct btree_trans *trans, * we've found just one: */ pos = iter->pos.offset + 1; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); } if (!ret && pos < max) @@ -967,12 +967,12 @@ int bch2_inode_create(struct btree_trans *trans, /* Retry from start */ pos = start = min; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); le32_add_cpu(&cursor->v.gen, 1); goto again; found_slot: - bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); - k = bch2_btree_iter_peek_slot(iter); + bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot)); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) { bch2_trans_iter_exit(trans, iter); @@ -1009,9 +1009,9 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_max(&iter, end); + k = bch2_btree_iter_peek_max(trans, &iter, end); ret = bkey_err(k); if (ret) goto err; @@ -1042,7 +1042,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_s_c k; u32 snapshot; int ret; @@ -1207,7 +1207,7 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; struct bkey_s_c k; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 6b842c8d21be..cc07729a4b62 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -43,7 +43,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, bch2_bkey_buf_init(&new); closure_init_stack(&cl); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) return ret; @@ -164,12 +164,12 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, if (ret) continue; - bch2_btree_iter_set_snapshot(iter, snapshot); + bch2_btree_iter_set_snapshot(trans, iter, snapshot); /* * peek_max() doesn't have ideal semantics for extents: */ - k = bch2_btree_iter_peek_max(iter, end_pos); + k = bch2_btree_iter_peek_max(trans, iter, end_pos); if (!k.k) break; @@ -230,7 +230,7 @@ static int truncate_set_isize(struct btree_trans *trans, u64 new_i_size, bool warn) { - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; @@ -399,7 +399,7 @@ case LOGGED_OP_FINSERT_start: if (ret) goto err; } else { - bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -425,12 +425,12 @@ case LOGGED_OP_FINSERT_shift_extents: if (ret) goto btree_err; - bch2_btree_iter_set_snapshot(&iter, snapshot); - bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + bch2_btree_iter_set_pos(trans, &iter, SPOS(inum.inum, pos, snapshot)); k = insert - ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) - : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); + ? bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum.inum, 0)) + : bch2_btree_iter_peek_max(trans, &iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 066670a47886..417bb0c7bbfa 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -909,7 +909,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, prt_printf(&buf, "memory gen: %u", gen); - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(trans, &iter))); if (!ret) { prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k); @@ -1285,12 +1285,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, bvec_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 4f6a574cf23b..a418fa62f09d 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -168,9 +168,9 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, *i_sectors_delta = 0; *disk_sectors_delta = 0; - bch2_trans_copy_iter(&iter, extent_iter); + bch2_trans_copy_iter(trans, &iter, extent_iter); - for_each_btree_key_max_continue_norestart(iter, + for_each_btree_key_max_continue_norestart(trans, iter, new->k.p, BTREE_ITER_slots, old, ret) { s64 sectors = min(new->k.p.offset, old.k->p.offset) - max(bkey_start_offset(&new->k), @@ -292,7 +292,7 @@ int bch2_extent_update(struct btree_trans *trans, * path already traversed at iter->pos because * bch2_trans_extent_update() will use it to attempt extent merging */ - ret = __bch2_btree_iter_traverse(iter); + ret = __bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -337,7 +337,7 @@ int bch2_extent_update(struct btree_trans *trans, if (i_sectors_delta_total) *i_sectors_delta_total += i_sectors_delta; - bch2_btree_iter_set_pos(iter, next_pos); + bch2_btree_iter_set_pos(trans, iter, next_pos); return 0; } @@ -1305,7 +1305,7 @@ static void bch2_nocow_write(struct bch_write_op *op) if (ret) break; - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) break; @@ -1389,7 +1389,7 @@ static void bch2_nocow_write(struct bch_write_op *op) bch2_keylist_push(&op->insert_keys); if (op->flags & BCH_WRITE_submitted) break; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } out: bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 57ad662871ba..90dcf80bd64a 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -130,7 +130,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, retry: ret = 0; while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && + (b = bch2_btree_iter_peek_node(trans, &iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { bch2_progress_update_iter(trans, progress, &iter, "dropping metadata"); @@ -154,7 +154,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, if (ret) break; next: - bch2_btree_iter_next_node(&iter); + bch2_btree_iter_next_node(trans, &iter); } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 5d41260e10da..fc396b9fa754 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -545,7 +545,7 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * BTREE_ID_reflink, reflink_pos, BTREE_ITER_not_extents); - struct bkey_s_c k = bch2_btree_iter_peek(iter); + struct bkey_s_c k = bch2_btree_iter_peek(trans, iter); if (!k.k || bkey_err(k)) { bch2_trans_iter_exit(trans, iter); return k; @@ -603,7 +603,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); if (!k.k) break; @@ -681,7 +681,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } bch2_trans_iter_exit(trans, &reflink_iter); @@ -794,7 +794,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&bp_iter); + k = bch2_btree_iter_peek(trans, &bp_iter); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -876,7 +876,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (ctxt->stats) atomic64_add(sectors, &ctxt->stats->sectors_seen); next: - bch2_btree_iter_advance(&bp_iter); + bch2_btree_iter_advance(trans, &bp_iter); } err: bch2_trans_iter_exit(trans, &bp_iter); @@ -991,7 +991,7 @@ static int bch2_move_btree(struct bch_fs *c, retry: ret = 0; while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && + (b = bch2_btree_iter_peek_node(trans, &iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { if (kthread && kthread_should_stop()) break; @@ -1011,7 +1011,7 @@ static int bch2_move_btree(struct bch_fs *c, if (ret) break; next: - bch2_btree_iter_next_node(&iter); + bch2_btree_iter_next_node(trans, &iter); } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index ee7251709fb9..0d65ea96f7a2 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -28,8 +28,8 @@ int bch2_create_trans(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter inode_iter = {}; subvol_inum new_inum = dir; u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); @@ -127,8 +127,8 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot); - ret = bch2_btree_iter_traverse(&dir_iter); + bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot); + ret = bch2_btree_iter_traverse(trans, &dir_iter); if (ret) goto err; } @@ -177,9 +177,9 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_depth = dir_u->bi_depth + 1; inode_iter.flags &= ~BTREE_ITER_all_snapshots; - bch2_btree_iter_set_snapshot(&inode_iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot); - ret = bch2_btree_iter_traverse(&inode_iter) ?: + ret = bch2_btree_iter_traverse(trans, &inode_iter) ?: bch2_inode_write(trans, &inode_iter, new_inode); err: bch2_trans_iter_exit(trans, &inode_iter); @@ -193,8 +193,8 @@ int bch2_link_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter inode_iter = {}; struct bch_hash_info dir_hash; u64 now = bch2_current_time(c); u64 dir_offset = 0; @@ -253,9 +253,9 @@ int bch2_unlink_trans(struct btree_trans *trans, bool deleting_subvol) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter dirent_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter dirent_iter = {}; + struct btree_iter inode_iter = {}; struct bch_hash_info dir_hash; subvol_inum inum; u64 now = bch2_current_time(c); @@ -301,7 +301,7 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - k = bch2_btree_iter_peek_slot(&dirent_iter); + k = bch2_btree_iter_peek_slot(trans, &dirent_iter); ret = bkey_err(k); if (ret) goto err; @@ -310,8 +310,8 @@ int bch2_unlink_trans(struct btree_trans *trans, * If we're deleting a subvolume, we need to really delete the * dirent, not just emit a whiteout in the current snapshot: */ - bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot); - ret = bch2_btree_iter_traverse(&dirent_iter); + bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &dirent_iter); if (ret) goto err; } else { @@ -390,10 +390,10 @@ int bch2_rename_trans(struct btree_trans *trans, enum bch_rename_mode mode) { struct bch_fs *c = trans->c; - struct btree_iter src_dir_iter = { NULL }; - struct btree_iter dst_dir_iter = { NULL }; - struct btree_iter src_inode_iter = { NULL }; - struct btree_iter dst_inode_iter = { NULL }; + struct btree_iter src_dir_iter = {}; + struct btree_iter dst_dir_iter = {}; + struct btree_iter src_inode_iter = {}; + struct btree_iter dst_inode_iter = {}; struct bch_hash_info src_hash, dst_hash; subvol_inum src_inum, dst_inum; u64 src_offset, dst_offset; @@ -666,7 +666,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - struct btree_iter bp_iter = { NULL }; + struct btree_iter bp_iter = {}; int ret = 0; if (inode_points_to_dirent(target, d)) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 8b857fc33244..3d4755d73af7 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -516,7 +516,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, KEY_TYPE_QUOTA_NOCHECK); advance: - bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); + bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); return 0; } diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index b9bde04b66c0..c63fa53f30d2 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -233,7 +233,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -281,7 +281,7 @@ static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -301,7 +301,7 @@ static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, struct btree_iter *work_iter) { return !kthread_should_stop() - ? bch2_btree_iter_peek(work_iter) + ? bch2_btree_iter_peek(trans, work_iter) : bkey_s_c_null; } @@ -335,7 +335,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, work_pos, BTREE_ITER_all_snapshots); - struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter); if (bkey_err(k)) return k; @@ -511,7 +511,7 @@ static int do_rebalance(struct moving_context *ctxt) struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &c->rebalance; - struct btree_iter rebalance_work_iter, extent_iter = { NULL }; + struct btree_iter rebalance_work_iter, extent_iter = {}; struct bkey_s_c k; int ret = 0; @@ -552,7 +552,7 @@ static int do_rebalance(struct moving_context *ctxt) if (ret) break; - bch2_btree_iter_advance(&rebalance_work_iter); + bch2_btree_iter_advance(trans, &rebalance_work_iter); } bch2_trans_iter_exit(trans, &extent_iter); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 266c5770c824..79fd18a5a07c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -198,7 +198,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(&iter); + int ret = bch2_btree_iter_traverse(trans, &iter); if (ret) goto out; @@ -261,7 +261,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); - ret = bch2_btree_iter_traverse(&iter); + ret = bch2_btree_iter_traverse(trans, &iter); if (ret) goto out; @@ -270,7 +270,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; goto out; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index ee23f1f93acc..710178e3da4c 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -495,7 +495,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bool reflink_p_may_update_opts_field) { struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = { NULL }; + struct btree_iter reflink_iter = {}; struct bkey_s_c k; struct bkey_i *r_v; struct bkey_i_reflink_p *r_p; @@ -507,7 +507,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, BTREE_ITER_intent); - k = bch2_btree_iter_peek_prev(&reflink_iter); + k = bch2_btree_iter_peek_prev(trans, &reflink_iter); ret = bkey_err(k); if (ret) goto err; @@ -569,12 +569,13 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, return ret; } -static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) +static struct bkey_s_c get_next_src(struct btree_trans *trans, + struct btree_iter *iter, struct bpos end) { struct bkey_s_c k; int ret; - for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { + for_each_btree_key_max_continue_norestart(trans, *iter, end, 0, k, ret) { if (bkey_extent_is_unwritten(k)) continue; @@ -583,7 +584,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) } if (bkey_ge(iter->pos, end)) - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } @@ -647,27 +648,27 @@ s64 bch2_remap_range(struct bch_fs *c, if (ret) continue; - bch2_btree_iter_set_snapshot(&src_iter, src_snapshot); + bch2_btree_iter_set_snapshot(trans, &src_iter, src_snapshot); ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol, &dst_snapshot); if (ret) continue; - bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); + bch2_btree_iter_set_snapshot(trans, &dst_iter, dst_snapshot); if (dst_inum.inum < src_inum.inum) { /* Avoid some lock cycle transaction restarts */ - ret = bch2_btree_iter_traverse(&dst_iter); + ret = bch2_btree_iter_traverse(trans, &dst_iter); if (ret) continue; } dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); - bch2_btree_iter_set_pos(&src_iter, src_want); + bch2_btree_iter_set_pos(trans, &src_iter, src_want); - src_k = get_next_src(&src_iter, src_end); + src_k = get_next_src(trans, &src_iter, src_end); ret = bkey_err(src_k); if (ret) continue; @@ -738,7 +739,7 @@ s64 bch2_remap_range(struct bch_fs *c, do { struct bch_inode_unpacked inode_u; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; bch2_trans_begin(trans); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 0c65065b08ec..c8536eb36060 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1074,9 +1074,9 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) { struct bch_fs *c = trans->c; - struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; - struct btree_iter c_iter = (struct btree_iter) { NULL }; - struct btree_iter tree_iter = (struct btree_iter) { NULL }; + struct btree_iter iter, p_iter = {}; + struct btree_iter c_iter = {}; + struct btree_iter tree_iter = {}; struct bkey_s_c_snapshot s; u32 parent_id, child_id; unsigned i; @@ -1193,13 +1193,13 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_intent); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); ret = bkey_err(k); if (ret) goto err; for (i = 0; i < nr_snapids; i++) { - k = bch2_btree_iter_prev_slot(&iter); + k = bch2_btree_iter_prev_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 602afca2f5ef..a90bf7b8a2b4 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -195,7 +195,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, struct btree_iter *k_iter, struct bkey_s_c hash_k) { struct bch_fs *c = trans->c; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c k; int ret = 0; diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 575ad1e03904..09a354a26c3b 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -231,11 +231,11 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, struct bkey_s_c k; int ret; - bch2_trans_copy_iter(&iter, start); + bch2_trans_copy_iter(trans, &iter, start); - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); - for_each_btree_key_continue_norestart(iter, BTREE_ITER_slots, k, ret) { + for_each_btree_key_continue_norestart(trans, iter, BTREE_ITER_slots, k, ret) { if (k.k->type != desc.key_type && k.k->type != KEY_TYPE_hash_whiteout) break; @@ -280,7 +280,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, } if (!slot.path && !(flags & STR_HASH_must_replace)) - bch2_trans_copy_iter(&slot, iter); + bch2_trans_copy_iter(trans, &slot, iter); if (k.k->type != KEY_TYPE_hash_whiteout) goto not_found; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index cd0d8e5e44e7..5537283d0bea 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -275,7 +275,7 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0); - struct bkey_s_c k = bch2_btree_iter_peek(&iter); + struct bkey_s_c k = bch2_btree_iter_peek(trans, &iter); bch2_trans_iter_exit(trans, &iter); return bkey_err(k) ?: k.k && k.k->p.inode == subvol @@ -574,7 +574,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, bool ro) { struct bch_fs *c = trans->c; - struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; + struct btree_iter dst_iter, src_iter = {}; struct bkey_i_subvolume *new_subvol = NULL; struct bkey_i_subvolume *src_subvol = NULL; u32 parent = 0, new_nodes[2], snapshot_subvols[2]; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 910f6196700e..f640c1e3d639 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -33,16 +33,16 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); int bch2_subvol_is_ro(struct bch_fs *, u32); static inline struct bkey_s_c -bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, - u32 subvolid, unsigned flags) +bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, u32 subvolid, unsigned flags) { u32 snapshot; - int ret = bch2_subvolume_get_snapshot(iter->trans, subvolid, &snapshot); + int ret = bch2_subvolume_get_snapshot(trans, subvolid, &snapshot); if (ret) return bkey_s_c_err(ret); - bch2_btree_iter_set_snapshot(iter, snapshot); - return bch2_btree_iter_peek_max_type(iter, end, flags); + bch2_btree_iter_set_snapshot(trans, iter, snapshot); + return bch2_btree_iter_peek_max_type(trans, iter, end, flags); } #define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ @@ -53,14 +53,14 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_in_subvolume_max_type(trans, &(_iter),\ _end, _subvolid, (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 6c6469814637..c265b102267a 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -43,7 +43,7 @@ static int test_delete(struct bch_fs *c, u64 nr) BTREE_ITER_intent); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &k.k_i, 0)); bch_err_msg(c, ret, "update error"); if (ret) @@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting once"); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error (first)"); if (ret) @@ -59,7 +59,7 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting twice"); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error (second)"); if (ret) @@ -84,7 +84,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) BTREE_ITER_intent); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &k.k_i, 0)); bch_err_msg(c, ret, "update error"); if (ret) @@ -94,7 +94,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_journal_flush_all_pins(&c->journal); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error"); if (ret) @@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); @@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); @@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) trans = bch2_trans_get(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k->p.snapshot != U32_MAX); @@ -602,9 +602,9 @@ static int rand_lookup(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); + bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX)); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter))); ret = bkey_err(k); if (ret) break; @@ -623,9 +623,9 @@ static int rand_mixed_trans(struct btree_trans *trans, struct bkey_s_c k; int ret; - bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX)); + bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, U32_MAX)); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(trans, iter); ret = bkey_err(k); bch_err_msg(trans->c, ret, "lookup error"); if (ret) @@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, BTREE_ITER_intent); - k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); + k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f9667b944c0d..651da52b2cbc 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -168,7 +168,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, int type, int flags) { struct bch_fs *c = trans->c; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; int ret; ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?: From 955ba7b5ea0395e9e3377f0ffc14f5dc4a099284 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 16:09:39 -0400 Subject: [PATCH 2042/2157] bcachefs: bch_dev_usage_full All the fastpaths that need device usage don't need the sector totals or fragmentation, just bucket counts. Split bch_dev_usage up into two different versions, the normal one with just bucket counts. This is also a stack usage improvement, since we have a bch_dev_usage on the stack in the allocation path. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 6 +++--- fs/bcachefs/alloc_foreground.c | 11 ++++++----- fs/bcachefs/bcachefs.h | 3 ++- fs/bcachefs/buckets.c | 10 ++++++++-- fs/bcachefs/buckets.h | 21 +++++++++++++++------ fs/bcachefs/buckets_types.h | 5 +++++ fs/bcachefs/chardev.c | 12 ++++++------ fs/bcachefs/movinggc.c | 8 ++++++-- 8 files changed, 51 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c556ccaffe89..34b3d6ac4fbb 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -321,11 +321,11 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, { u64 want_free = ca->mi.nbuckets >> 7; u64 free = max_t(s64, 0, - u.d[BCH_DATA_free].buckets - + u.d[BCH_DATA_need_discard].buckets + u.buckets[BCH_DATA_free] + + u.buckets[BCH_DATA_need_discard] - bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe)); - return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); + return clamp_t(s64, want_free - free, 0, u.buckets[BCH_DATA_cached]); } void bch2_dev_do_invalidates(struct bch_dev *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index d188bb531e2b..7c930ef77380 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -469,7 +469,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[watermark]); prt_printf(&buf, "data type\t%s\n", __bch2_data_types[data_type]); prt_printf(&buf, "blocking\t%u\n", cl != NULL); - prt_printf(&buf, "free\t%llu\n", usage->d[BCH_DATA_free].buckets); + prt_printf(&buf, "free\t%llu\n", usage->buckets[BCH_DATA_free]); prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(ca, *usage, watermark)); prt_printf(&buf, "copygc_wait\t%lu/%lli\n", bch2_copygc_wait_amount(c), @@ -524,10 +524,10 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, bch2_dev_usage_read_fast(ca, usage); avail = dev_buckets_free(ca, *usage, watermark); - if (usage->d[BCH_DATA_need_discard].buckets > avail) + if (usage->buckets[BCH_DATA_need_discard] > avail) bch2_dev_do_discards(ca); - if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) + if (usage->buckets[BCH_DATA_need_gc_gens] > avail) bch2_gc_gens_async(c); if (should_invalidate_buckets(ca, *usage)) @@ -1669,7 +1669,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) { struct bch_fs *c = ca->fs; - struct bch_dev_usage stats = bch2_dev_usage_read(ca); + struct bch_dev_usage_full stats = bch2_dev_usage_full_read(ca); unsigned nr[BCH_DATA_NR]; memset(nr, 0, sizeof(nr)); @@ -1692,7 +1692,8 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); prt_printf(out, "open buckets\t%i\r\n", ca->nr_open_buckets); - prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); + prt_printf(out, "buckets to invalidate\t%llu\r\n", + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))); } static noinline void bch2_print_allocator_stuck(struct bch_fs *c) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 21da4167a3ae..5d9f208a1bb7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -562,7 +562,8 @@ struct bch_dev { unsigned long *bucket_backpointer_mismatches; unsigned long *bucket_backpointer_empty; - struct bch_dev_usage __percpu *usage; + struct bch_dev_usage_full __percpu + *usage; /* Allocator: */ u64 alloc_cursor[3]; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a1fc462ea0de..fea61e60a9ee 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -29,6 +29,12 @@ #include void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) +{ + for (unsigned i = 0; i < BCH_DATA_NR; i++) + usage->buckets[i] = percpu_u64_get(&ca->usage->d[i].buckets); +} + +void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) { memset(usage, 0, sizeof(*usage)); acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); @@ -75,7 +81,7 @@ bch2_fs_usage_read_short(struct bch_fs *c) void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev *ca, - struct bch_dev_usage *usage) + struct bch_dev_usage_full *usage) { if (out->nr_tabstops < 5) { printbuf_tabstops_reset(out); @@ -1331,7 +1337,7 @@ void bch2_dev_buckets_free(struct bch_dev *ca) int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) { - ca->usage = alloc_percpu(struct bch_dev_usage); + ca->usage = alloc_percpu(struct bch_dev_usage_full); if (!ca->usage) return -BCH_ERR_ENOMEM_usage_init; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index c5363256e363..1c38b165f48b 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -172,7 +172,16 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); +void bch2_dev_usage_full_read_fast(struct bch_dev *, struct bch_dev_usage_full *); +static inline struct bch_dev_usage_full bch2_dev_usage_full_read(struct bch_dev *ca) +{ + struct bch_dev_usage_full ret; + + bch2_dev_usage_full_read_fast(ca, &ret); + return ret; +} + +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage_full *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { @@ -207,7 +216,7 @@ static inline u64 dev_buckets_free(struct bch_dev *ca, enum bch_watermark watermark) { return max_t(s64, 0, - usage.d[BCH_DATA_free].buckets - + usage.buckets[BCH_DATA_free]- ca->nr_open_buckets - bch2_dev_buckets_reserved(ca, watermark)); } @@ -217,10 +226,10 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca, enum bch_watermark watermark) { return max_t(s64, 0, - usage.d[BCH_DATA_free].buckets - + usage.d[BCH_DATA_cached].buckets - + usage.d[BCH_DATA_need_gc_gens].buckets - + usage.d[BCH_DATA_need_discard].buckets + usage.buckets[BCH_DATA_free] + + usage.buckets[BCH_DATA_cached] + + usage.buckets[BCH_DATA_need_gc_gens] + + usage.buckets[BCH_DATA_need_discard] - ca->nr_open_buckets - bch2_dev_buckets_reserved(ca, watermark)); } diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 900b8680c8b5..0aed2500ade3 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -54,7 +54,12 @@ struct bucket_gens { u8 b[] __counted_by(nbuckets); }; +/* Only info on bucket countns: */ struct bch_dev_usage { + u64 buckets[BCH_DATA_NR]; +}; + +struct bch_dev_usage_full { struct bch_dev_usage_type { u64 buckets; u64 sectors; /* _compressed_ sectors: */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index c9d1585eec21..5891b3a1e61c 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -350,8 +350,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, if (ctx->arg.op == BCH_DATA_OP_scrub) { struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); if (ca) { - struct bch_dev_usage u; - bch2_dev_usage_read_fast(ca, &u); + struct bch_dev_usage_full u; + bch2_dev_usage_full_read_fast(ca, &u); for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) if (ctx->arg.scrub.data_types & BIT(i)) e.p.sectors_total += u.d[i].sectors; @@ -473,7 +473,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; - struct bch_dev_usage src; + struct bch_dev_usage_full src; struct bch_dev *ca; unsigned i; @@ -493,7 +493,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_read(ca); + src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; @@ -514,7 +514,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_ioctl_dev_usage_v2 __user *user_arg) { struct bch_ioctl_dev_usage_v2 arg; - struct bch_dev_usage src; + struct bch_dev_usage_full src; struct bch_dev *ca; int ret = 0; @@ -534,7 +534,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_read(ca); + src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 5126c870ce5b..159410c50861 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -280,7 +280,11 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) s64 wait = S64_MAX, fragmented_allowed, fragmented; for_each_rw_member(c, ca) { - struct bch_dev_usage usage = bch2_dev_usage_read(ca); + struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca); + struct bch_dev_usage usage; + + for (unsigned i = 0; i < BCH_DATA_NR; i++) + usage.buckets[i] = usage_full.d[i].buckets; fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * ca->mi.bucket_size) >> 1); @@ -288,7 +292,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) for (unsigned i = 0; i < BCH_DATA_NR; i++) if (data_type_movable(i)) - fragmented += usage.d[i].fragmented; + fragmented += usage_full.d[i].fragmented; wait = min(wait, max(0LL, fragmented_allowed - fragmented)); } From a07c43e6c2ff4cbdba7abf8d533b2faf19fa2287 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 20:26:48 -0700 Subject: [PATCH 2043/2157] bcachefs: add missing selection of XARRAY_MULTI When CONFIG_XARRAY_MULTI is not set, reading from a bcachefs file hits the 'BUG_ON(order > 0);' in xas_set_order(), because it tries to insert a large folio in the page cache. Fix this by making bcachefs select XARRAY_MULTI. Fixes: be212d86b19c ("bcachefs: bs > ps support") Signed-off-by: Eric Biggers Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index c9798750202d..bf1c94e51dd0 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -26,6 +26,7 @@ config BCACHEFS_FS select SRCU select SYMBOLIC_ERRNAME select MIN_HEAP + select XARRAY_MULTI help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. From c44a14f216f45d8bf1634b52854a699d7090f1e8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Apr 2025 10:49:04 -0400 Subject: [PATCH 2044/2157] tracing: Enforce the persistent ring buffer to be page aligned Enforce that the address and the size of the memory used by the persistent ring buffer is page aligned. Also update the documentation to reflect this requirement. Link: https://lore.kernel.org/all/CAHk-=whUOfVucfJRt7E0AH+GV41ELmS4wJqxHDnui6Giddfkzw@mail.gmail.com/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Jann Horn Link: https://lore.kernel.org/20250402144953.412882844@goodmis.org Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ Documentation/trace/debugging.rst | 2 ++ kernel/trace/trace.c | 10 ++++++++++ 3 files changed, 14 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8..71861643ef14 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7241,6 +7241,8 @@ This is just one of many ways that can clear memory. Make sure your system keeps the content of memory across reboots before relying on this option. + NB: Both the mapped address and size must be page aligned for the architecture. + See also Documentation/trace/debugging.rst diff --git a/Documentation/trace/debugging.rst b/Documentation/trace/debugging.rst index 54fb16239d70..d54bc500af80 100644 --- a/Documentation/trace/debugging.rst +++ b/Documentation/trace/debugging.rst @@ -136,6 +136,8 @@ kernel, so only the same kernel is guaranteed to work if the mapping is preserved. Switching to a different kernel version may find a different layout and mark the buffer as invalid. +NB: Both the mapped address and size must be page aligned for the architecture. + Using trace_printk() in the boot instance ----------------------------------------- By default, the content of trace_printk() goes into the top level tracing diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 14c38fcd6f9e..96129985e81c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10774,6 +10774,16 @@ __init static void enable_instances(void) } if (start) { + /* Start and size must be page aligned */ + if (start & ~PAGE_MASK) { + pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); + continue; + } + if (size & ~PAGE_MASK) { + pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); + continue; + } + addr = map_pages(start, size); if (addr) { pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", From 34ea8fa084dd96a2e130ec871ade9ed3003f7eea Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Apr 2025 10:49:05 -0400 Subject: [PATCH 2045/2157] tracing: Have reserve_mem use phys_to_virt() and separate from memmap buffer The reserve_mem kernel command line option may pass back a physical address, but the memory is still part of the normal memory just like using memblock_alloc() would be. This means that the physical memory returned by the reserve_mem command line option can be converted directly to virtual memory by simply using phys_to_virt(). When freeing the buffer there's no need to call vunmap() anymore as the memory allocated by reserve_mem is freed by the call to reserve_mem_release_by_name(). Because the persistent ring buffer can also be allocated via the memmap option, which *is* different than normal memory as it cannot be added back to the buddy system, it must be treated differently. It still needs to be virtually mapped to have access to it. It also can not be freed nor can it ever be memory mapped to user space. Create a new trace_array flag called TRACE_ARRAY_FL_MEMMAP which gets set if the buffer is created by the memmap option, and this will prevent the buffer from being memory mapped by user space. Also increment the ref count for memmap'ed buffers so that they can never be freed. Link: https://lore.kernel.org/all/Z-wFszhJ_9o4dc8O@kernel.org/ Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Vlastimil Babka Cc: Jann Horn Link: https://lore.kernel.org/20250402144953.583750106@goodmis.org Suggested-by: Mike Rapoport Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 23 ++++++++++++++++------- kernel/trace/trace.h | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 96129985e81c..e58b72e61573 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8492,6 +8492,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) struct trace_iterator *iter = &info->iter; int ret = 0; + /* A memmap'ed buffer is not supported for user space mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) + return -ENODEV; + /* Currently the boot mapped buffer is not supported for mmap */ if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) return -ENODEV; @@ -9600,9 +9604,6 @@ static void free_trace_buffers(struct trace_array *tr) #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); #endif - - if (tr->range_addr_start) - vunmap((void *)tr->range_addr_start); } static void init_trace_flags_index(struct trace_array *tr) @@ -10696,6 +10697,7 @@ static inline void do_allocate_snapshot(const char *name) { } __init static void enable_instances(void) { struct trace_array *tr; + bool memmap_area = false; char *curr_str; char *name; char *str; @@ -10764,6 +10766,7 @@ __init static void enable_instances(void) name); continue; } + memmap_area = true; } else if (tok) { if (!reserve_mem_find_by_name(tok, &start, &size)) { start = 0; @@ -10784,7 +10787,10 @@ __init static void enable_instances(void) continue; } - addr = map_pages(start, size); + if (memmap_area) + addr = map_pages(start, size); + else + addr = (unsigned long)phys_to_virt(start); if (addr) { pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", name, &start, (unsigned long)size); @@ -10811,10 +10817,13 @@ __init static void enable_instances(void) update_printk_trace(tr); /* - * If start is set, then this is a mapped buffer, and - * cannot be deleted by user space, so keep the reference - * to it. + * memmap'd buffers can not be freed. */ + if (memmap_area) { + tr->flags |= TRACE_ARRAY_FL_MEMMAP; + tr->ref++; + } + if (start) { tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; tr->range_name = no_free_ptr(rname); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ab7c7a1930cc..9d9dcfad6269 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -446,6 +446,7 @@ enum { TRACE_ARRAY_FL_BOOT = BIT(1), TRACE_ARRAY_FL_LAST_BOOT = BIT(2), TRACE_ARRAY_FL_MOD_INIT = BIT(3), + TRACE_ARRAY_FL_MEMMAP = BIT(4), }; #ifdef CONFIG_MODULES From 394f3f02de5311ea976dd8046304194d22329bbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Apr 2025 10:49:06 -0400 Subject: [PATCH 2046/2157] tracing: Use vmap_page_range() to map memmap ring buffer The code to map the physical memory retrieved by memmap currently allocates an array of pages to cover the physical memory and then calls vmap() to map it to a virtual address. Instead of using this temporary array of struct page descriptors, simply use vmap_page_range() that can directly map the contiguous physical memory to a virtual address. Link: https://lore.kernel.org/all/CAHk-=whUOfVucfJRt7E0AH+GV41ELmS4wJqxHDnui6Giddfkzw@mail.gmail.com/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Jann Horn Link: https://lore.kernel.org/20250402144953.754618481@goodmis.org Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e58b72e61573..f6531cd3176b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -50,6 +50,7 @@ #include #include #include +#include /* vmap_page_range() */ #include /* COMMAND_LINE_SIZE */ @@ -9796,29 +9797,27 @@ static int instance_mkdir(const char *name) return ret; } -static u64 map_pages(u64 start, u64 size) +static u64 map_pages(unsigned long start, unsigned long size) { - struct page **pages; - phys_addr_t page_start; - unsigned int page_count; - unsigned int i; - void *vaddr; + unsigned long vmap_start, vmap_end; + struct vm_struct *area; + int ret; - page_count = DIV_ROUND_UP(size, PAGE_SIZE); - - page_start = start; - pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); - if (!pages) + area = get_vm_area(size, VM_IOREMAP); + if (!area) return 0; - for (i = 0; i < page_count; i++) { - phys_addr_t addr = page_start + i * PAGE_SIZE; - pages[i] = pfn_to_page(addr >> PAGE_SHIFT); - } - vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); - kfree(pages); + vmap_start = (unsigned long) area->addr; + vmap_end = vmap_start + size; - return (u64)(unsigned long)vaddr; + ret = vmap_page_range(vmap_start, vmap_end, + start, pgprot_nx(PAGE_KERNEL)); + if (ret < 0) { + free_vm_area(area); + return 0; + } + + return (u64)vmap_start; } /** From e4d4b8670c44cdd22212cab3c576e2d317efa67c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Apr 2025 10:49:07 -0400 Subject: [PATCH 2047/2157] ring-buffer: Use flush_kernel_vmap_range() over flush_dcache_folio() Some architectures do not have data cache coherency between user and kernel space. For these architectures, the cache needs to be flushed on both the kernel and user addresses so that user space can see the updates the kernel has made. Instead of using flush_dcache_folio() and playing with virt_to_folio() within the call to that function, use flush_kernel_vmap_range() which takes the virtual address and does the work for those architectures that need it. This also fixes a bug where the flush of the reader page only flushed one page. If the sub-buffer order is 1 or more, where the sub-buffer size would be greater than a page, it would miss the rest of the sub-buffer content, as the "reader page" is not just a page, but the size of a sub-buffer. Link: https://lore.kernel.org/all/CAG48ez3w0my4Rwttbc5tEbNsme6tc0mrSN95thjXUFaJ3aQ6SA@mail.gmail.com/ Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Vlastimil Babka Cc: Mike Rapoport Link: https://lore.kernel.org/20250402144953.920792197@goodmis.org Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions"); Suggested-by: Jann Horn Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f25966b3a1fc..d4b0f7b55cce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6016,7 +6016,7 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer) meta->read = cpu_buffer->read; /* Some archs do not have data cache coherency between kernel and user-space */ - flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page)); + flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE); } static void @@ -7319,7 +7319,8 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) out: /* Some archs do not have data cache coherency between kernel and user-space */ - flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page)); + flush_kernel_vmap_range(cpu_buffer->reader_page->page, + buffer->subbuf_size + BUF_PAGE_HDR_SIZE); rb_update_meta_page(cpu_buffer); From dddeeaa16ce9d163ccf3b681715512d338afa541 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 5 Mar 2025 18:09:00 +0000 Subject: [PATCH 2048/2157] igc: Fix XSK queue NAPI ID mapping In commit b65969856d4f ("igc: Link queues to NAPI instances"), the XSK queues were incorrectly unmapped from their NAPI instances. After discussion on the mailing list and the introduction of a test to codify the expected behavior, we can see that the unmapping causes the check_xsk test to fail: NETIF=enp86s0 ./tools/testing/selftests/drivers/net/queues.py [...] # Check| ksft_eq(q.get('xsk', None), {}, # Check failed None != {} xsk attr on queue we configured not ok 4 queues.check_xsk After this commit, the test passes: ok 4 queues.check_xsk Note that the test itself is only in net-next, so I tested this change by applying it to my local net-next tree, booting, and running the test. Cc: stable@vger.kernel.org Fixes: b65969856d4f ("igc: Link queues to NAPI instances") Signed-off-by: Joe Damato Reviewed-by: Gerhard Engleder Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 -- drivers/net/ethernet/intel/igc/igc_main.c | 4 ++-- drivers/net/ethernet/intel/igc/igc_xdp.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index cd1d7b6c1782..c35cc5cb1185 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -337,8 +337,6 @@ struct igc_adapter { struct igc_led_classdev *leds; }; -void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx, - struct napi_struct *napi); void igc_up(struct igc_adapter *adapter); void igc_down(struct igc_adapter *adapter); int igc_open(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 491d942cefca..27c99ff59ed4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5022,8 +5022,8 @@ static int igc_sw_init(struct igc_adapter *adapter) return 0; } -void igc_set_queue_napi(struct igc_adapter *adapter, int vector, - struct napi_struct *napi) +static void igc_set_queue_napi(struct igc_adapter *adapter, int vector, + struct napi_struct *napi) { struct igc_q_vector *q_vector = adapter->q_vector[vector]; diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index c538e6b18aad..9eb47b4beb06 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -97,7 +97,6 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter, napi_disable(napi); } - igc_set_queue_napi(adapter, queue_id, NULL); set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags); @@ -147,7 +146,6 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id) xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR); clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags); - igc_set_queue_napi(adapter, queue_id, napi); if (needs_reset) { napi_enable(napi); From d931cf9b38da0f533cacfe51c863a9912e67822f Mon Sep 17 00:00:00 2001 From: Zdenek Bouska Date: Wed, 19 Mar 2025 15:18:48 +0100 Subject: [PATCH 2049/2157] igc: Fix TX drops in XDP ZC Fixes TX frame drops in AF_XDP zero copy mode when budget < 4. xsk_tx_peek_desc() consumed TX frame and it was ignored because of low budget. Not even AF_XDP completion was done for dropped frames. It can be reproduced on i226 by sending 100000x 60 B frames with launch time set to minimal IPG (672 ns between starts of frames) on 1Gbit/s. Always 1026 frames are not sent and are missing a completion. Fixes: 9acf59a752d4c ("igc: Enable TX via AF_XDP zero-copy") Signed-off-by: Zdenek Bouska Reviewed-by: Song Yoong Siang Reviewed-by: Florian Bezdeka Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 27c99ff59ed4..156d123c0e21 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3042,7 +3042,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) * descriptors. Therefore, to be safe, we always ensure we have at least * 4 descriptors available. */ - while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) { + while (budget >= 4 && xsk_tx_peek_desc(pool, &xdp_desc)) { struct igc_metadata_request meta_req; struct xsk_tx_metadata *meta = NULL; struct igc_tx_buffer *bi; From efaaf344bc2917cbfa5997633bc18a05d3aed27f Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Thu, 13 Mar 2025 16:05:56 +0200 Subject: [PATCH 2050/2157] e1000e: change k1 configuration on MTP and later platforms Starting from Meteor Lake, the Kumeran interface between the integrated MAC and the I219 PHY works at a different frequency. This causes sporadic MDI errors when accessing the PHY, and in rare circumstances could lead to packet corruption. To overcome this, introduce minor changes to the Kumeran idle state (K1) parameters during device initialization. Hardware reset reverts this configuration, therefore it needs to be applied in a few places. Fixes: cc23f4f0b6b9 ("e1000e: Add support for Meteor Lake") Signed-off-by: Vitaly Lifshits Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/defines.h | 3 + drivers/net/ethernet/intel/e1000e/ich8lan.c | 80 +++++++++++++++++++-- drivers/net/ethernet/intel/e1000e/ich8lan.h | 4 ++ 3 files changed, 82 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 5e2cfa73f889..8294a7c4f122 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -803,4 +803,7 @@ /* SerDes Control */ #define E1000_GEN_POLL_TIMEOUT 640 +#define E1000_FEXTNVM12_PHYPD_CTRL_MASK 0x00C00000 +#define E1000_FEXTNVM12_PHYPD_CTRL_P1 0x00800000 + #endif /* _E1000_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 2f9655cf5dd9..364378133526 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -285,6 +285,45 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) } } +/** + * e1000_reconfigure_k1_exit_timeout - reconfigure K1 exit timeout to + * align to MTP and later platform requirements. + * @hw: pointer to the HW structure + * + * Context: PHY semaphore must be held by caller. + * Return: 0 on success, negative on failure + */ +static s32 e1000_reconfigure_k1_exit_timeout(struct e1000_hw *hw) +{ + u16 phy_timeout; + u32 fextnvm12; + s32 ret_val; + + if (hw->mac.type < e1000_pch_mtp) + return 0; + + /* Change Kumeran K1 power down state from P0s to P1 */ + fextnvm12 = er32(FEXTNVM12); + fextnvm12 &= ~E1000_FEXTNVM12_PHYPD_CTRL_MASK; + fextnvm12 |= E1000_FEXTNVM12_PHYPD_CTRL_P1; + ew32(FEXTNVM12, fextnvm12); + + /* Wait for the interface the settle */ + usleep_range(1000, 1100); + + /* Change K1 exit timeout */ + ret_val = e1e_rphy_locked(hw, I217_PHY_TIMEOUTS_REG, + &phy_timeout); + if (ret_val) + return ret_val; + + phy_timeout &= ~I217_PHY_TIMEOUTS_K1_EXIT_TO_MASK; + phy_timeout |= 0xF00; + + return e1e_wphy_locked(hw, I217_PHY_TIMEOUTS_REG, + phy_timeout); +} + /** * e1000_init_phy_workarounds_pchlan - PHY initialization workarounds * @hw: pointer to the HW structure @@ -327,15 +366,22 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) * LANPHYPC Value bit to force the interconnect to PCIe mode. */ switch (hw->mac.type) { + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + case e1000_pch_nvp: + /* At this point the PHY might be inaccessible so don't + * propagate the failure + */ + if (e1000_reconfigure_k1_exit_timeout(hw)) + e_dbg("Failed to reconfigure K1 exit timeout\n"); + + fallthrough; case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: - case e1000_pch_nvp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -419,8 +465,20 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) * the PHY is in. */ ret_val = hw->phy.ops.check_reset_block(hw); - if (ret_val) + if (ret_val) { e_err("ME blocked access to PHY after reset\n"); + goto out; + } + + if (hw->mac.type >= e1000_pch_mtp) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) { + e_err("Failed to reconfigure K1 exit timeout\n"); + goto out; + } + ret_val = e1000_reconfigure_k1_exit_timeout(hw); + hw->phy.ops.release(hw); + } } out: @@ -4888,6 +4946,18 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) u16 i; e1000_initialize_hw_bits_ich8lan(hw); + if (hw->mac.type >= e1000_pch_mtp) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_reconfigure_k1_exit_timeout(hw); + hw->phy.ops.release(hw); + if (ret_val) { + e_dbg("Error failed to reconfigure K1 exit timeout\n"); + return ret_val; + } + } /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 2504b11c3169..5feb589a9b5f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -219,6 +219,10 @@ #define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28) #define I217_PLL_CLOCK_GATE_MASK 0x07FF +/* PHY Timeouts */ +#define I217_PHY_TIMEOUTS_REG PHY_REG(770, 21) +#define I217_PHY_TIMEOUTS_K1_EXIT_TO_MASK 0x0FC0 + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ From 40206599beec98cfeb01913ee417f015e3f6190c Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Fri, 21 Feb 2025 16:49:17 +0100 Subject: [PATCH 2051/2157] ixgbe: fix media type detection for E610 device The commit 23c0e5a16bcc ("ixgbe: Add link management support for E610 device") introduced incorrect media type detection for E610 device. It reproduces when advertised speed is modified after driver reload. Clear the previous outdated PHY type high value. Reproduction steps: modprobe ixgbe ethtool -s eth0 advertise 0x1000000000000 modprobe -r ixgbe modprobe ixgbe ethtool -s eth0 advertise 0x1000000000000 Result before the fix: netlink error: link settings update failed netlink error: Invalid argument Result after the fix: No output error Fixes: 23c0e5a16bcc ("ixgbe: Add link management support for E610 device") Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Signed-off-by: Piotr Kwapulinski Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index cb07ecd8937d..00935747c8c5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -1453,9 +1453,11 @@ enum ixgbe_media_type ixgbe_get_media_type_e610(struct ixgbe_hw *hw) hw->link.link_info.phy_type_low = 0; } else { highest_bit = fls64(le64_to_cpu(pcaps.phy_type_low)); - if (highest_bit) + if (highest_bit) { hw->link.link_info.phy_type_low = BIT_ULL(highest_bit - 1); + hw->link.link_info.phy_type_high = 0; + } } } From 4c9106f4906a85f6b13542d862e423bcdc118cc3 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Mon, 17 Mar 2025 22:42:02 -0700 Subject: [PATCH 2052/2157] idpf: fix adapter NULL pointer dereference on reboot With SRIOV enabled, idpf ends up calling into idpf_remove() twice. First via idpf_shutdown() and then again when idpf_remove() calls into sriov_disable(), because the VF devices use the idpf driver, hence the same remove routine. When that happens, it is possible for the adapter to be NULL from the first call to idpf_remove(), leading to a NULL pointer dereference. echo 1 > /sys/class/net//device/sriov_numvfs reboot BUG: kernel NULL pointer dereference, address: 0000000000000020 ... RIP: 0010:idpf_remove+0x22/0x1f0 [idpf] ... ? idpf_remove+0x22/0x1f0 [idpf] ? idpf_remove+0x1e4/0x1f0 [idpf] pci_device_remove+0x3f/0xb0 device_release_driver_internal+0x19f/0x200 pci_stop_bus_device+0x6d/0x90 pci_stop_and_remove_bus_device+0x12/0x20 pci_iov_remove_virtfn+0xbe/0x120 sriov_disable+0x34/0xe0 idpf_sriov_configure+0x58/0x140 [idpf] idpf_remove+0x1b9/0x1f0 [idpf] idpf_shutdown+0x12/0x30 [idpf] pci_device_shutdown+0x35/0x60 device_shutdown+0x156/0x200 ... Replace the direct idpf_remove() call in idpf_shutdown() with idpf_vc_core_deinit() and idpf_deinit_dflt_mbx(), which perform the bulk of the cleanup, such as stopping the init task, freeing IRQs, destroying the vports and freeing the mailbox. This avoids the calls to sriov_disable() in addition to a small netdev cleanup, and destroying workqueues, which don't seem to be required on shutdown. Reported-by: Yuying Ma Fixes: e850efed5e15 ("idpf: add module register and probe functionality") Reviewed-by: Madhu Chittim Signed-off-by: Emil Tantilov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index b6c515d14cbf..bec4a02c5373 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -87,7 +87,11 @@ static void idpf_remove(struct pci_dev *pdev) */ static void idpf_shutdown(struct pci_dev *pdev) { - idpf_remove(pdev); + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + + cancel_delayed_work_sync(&adapter->vc_event_task); + idpf_vc_core_deinit(adapter); + idpf_deinit_dflt_mbx(adapter); if (system_state == SYSTEM_POWER_OFF) pci_set_power_state(pdev, PCI_D3hot); From 2985dae1e521ee1464130902415f5863ea05dc34 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 31 Mar 2025 20:23:36 -0700 Subject: [PATCH 2053/2157] mm/page_alloc: Fix try_alloc_pages Fix an obvious bug. try_alloc_pages() should set_page_refcounted. [ Not so obvious: it was probably correct at the time it was written but was at some point then rebased on top of v6.14-rc1. And at that point there was a semantic conflict with commit efabfe1420f5 ("mm/page_alloc: move set_page_refcounted() to callers of get_page_from_freelist()") and became buggy. - Linus ] Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Vlastimil BAbka Reviewed-by: Harry Yoo Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f51aa6051a99..5b173c2da641 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7385,6 +7385,9 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order) /* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */ + if (page) + set_page_refcounted(page); + if (memcg_kmem_online() && page && unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) { free_pages_nolock(page, order); From 2bac648dab395be0ad0d55b9c2ae7723e71e233e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Apr 2025 09:08:33 -1000 Subject: [PATCH 2054/2157] tools/sched_ext: Sync with scx repo Synchronize with https://github.com/sched-ext/scx at dc44584874f0 ("kernel: Synchronize with kernel tools/sched_ext"). - READ/WRITE_ONCE() is made more proper and READA_ONCE_ARENA() is dropped. - scale_by_task_weight[_inverse]() helpers added. - Enum defs expanded to cover more and new enums. - Don't trigger fatal error when some enums are missing from kernel BTF. Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 85 +++++++++++++------ .../sched_ext/include/scx/enum_defs.autogen.h | 3 + .../sched_ext/include/scx/enums.autogen.bpf.h | 24 ++++++ tools/sched_ext/include/scx/enums.autogen.h | 8 ++ tools/sched_ext/include/scx/enums.h | 3 +- 5 files changed, 94 insertions(+), 29 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index dc4333d23189..8787048c6762 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -586,36 +586,48 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s } } -#define READ_ONCE(x) \ -({ \ - union { typeof(x) __val; char __c[1]; } __u = \ - { .__c = { 0 } }; \ - __read_once_size(&(x), __u.__c, sizeof(x)); \ - __u.__val; \ +/* + * __unqual_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged, + * + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + * + * This is copied verbatim from kernel's include/linux/compiler_types.h, but + * with default expression (for pointers) changed from (x) to (typeof(x)0). + * + * This is because LLVM has a bug where for lvalue (x), it does not get rid of + * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). + * Hence, for pointers, we need to create an rvalue expression to get the + * desired type. See https://github.com/llvm/llvm-project/issues/53400. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type : (unsigned type)0, signed type : (signed type)0 + +#define __unqual_typeof(x) \ + typeof(_Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (typeof(x))0)) + +#define READ_ONCE(x) \ +({ \ + union { __unqual_typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \ + __u.__val; \ }) -#define WRITE_ONCE(x, val) \ -({ \ - union { typeof(x) __val; char __c[1]; } __u = \ - { .__val = (val) }; \ - __write_once_size(&(x), __u.__c, sizeof(x)); \ - __u.__val; \ -}) - -#define READ_ONCE_ARENA(type, x) \ -({ \ - union { type __val; char __c[1]; } __u = \ - { .__c = { 0 } }; \ - __read_once_size((void *)&(x), __u.__c, sizeof(x)); \ - __u.__val; \ -}) - -#define WRITE_ONCE_ARENA(type, x, val) \ -({ \ - union { type __val; char __c[1]; } __u = \ - { .__val = (val) }; \ - __write_once_size((void *)&(x), __u.__c, sizeof(x)); \ - __u.__val; \ +#define WRITE_ONCE(x, val) \ +({ \ + union { __unqual_typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \ + __u.__val; \ }) /* @@ -648,6 +660,23 @@ static inline u32 log2_u64(u64 v) return log2_u32(v) + 1; } +/* + * Return a value proportionally scaled to the task's weight. + */ +static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value) +{ + return (value * p->scx.weight) / 100; +} + +/* + * Return a value inversely proportional to the task's weight. + */ +static inline u64 scale_by_task_weight_inverse(const struct task_struct *p, u64 value) +{ + return value * 100 / p->scx.weight; +} + + #include "compat.bpf.h" #include "enums.bpf.h" diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h index 6e6c45f14fe1..c2c33df9292c 100644 --- a/tools/sched_ext/include/scx/enum_defs.autogen.h +++ b/tools/sched_ext/include/scx/enum_defs.autogen.h @@ -88,6 +88,8 @@ #define HAVE_SCX_OPS_ENQ_LAST #define HAVE_SCX_OPS_ENQ_EXITING #define HAVE_SCX_OPS_SWITCH_PARTIAL +#define HAVE_SCX_OPS_ENQ_MIGRATION_DISABLED +#define HAVE_SCX_OPS_ALLOW_QUEUED_WAKEUP #define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT #define HAVE_SCX_OPS_ALL_FLAGS #define HAVE_SCX_OPSS_NONE @@ -104,6 +106,7 @@ #define HAVE_SCX_RQ_BAL_PENDING #define HAVE_SCX_RQ_BAL_KEEP #define HAVE_SCX_RQ_BYPASSING +#define HAVE_SCX_RQ_CLK_VALID #define HAVE_SCX_RQ_IN_WAKEUP #define HAVE_SCX_RQ_IN_BALANCE #define HAVE_SCX_TASK_NONE diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h index 0e941a0d6f88..2f8002bcc19a 100644 --- a/tools/sched_ext/include/scx/enums.autogen.bpf.h +++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h @@ -13,6 +13,30 @@ const volatile u64 __SCX_SLICE_DFL __weak; const volatile u64 __SCX_SLICE_INF __weak; #define SCX_SLICE_INF __SCX_SLICE_INF +const volatile u64 __SCX_RQ_ONLINE __weak; +#define SCX_RQ_ONLINE __SCX_RQ_ONLINE + +const volatile u64 __SCX_RQ_CAN_STOP_TICK __weak; +#define SCX_RQ_CAN_STOP_TICK __SCX_RQ_CAN_STOP_TICK + +const volatile u64 __SCX_RQ_BAL_PENDING __weak; +#define SCX_RQ_BAL_PENDING __SCX_RQ_BAL_PENDING + +const volatile u64 __SCX_RQ_BAL_KEEP __weak; +#define SCX_RQ_BAL_KEEP __SCX_RQ_BAL_KEEP + +const volatile u64 __SCX_RQ_BYPASSING __weak; +#define SCX_RQ_BYPASSING __SCX_RQ_BYPASSING + +const volatile u64 __SCX_RQ_CLK_VALID __weak; +#define SCX_RQ_CLK_VALID __SCX_RQ_CLK_VALID + +const volatile u64 __SCX_RQ_IN_WAKEUP __weak; +#define SCX_RQ_IN_WAKEUP __SCX_RQ_IN_WAKEUP + +const volatile u64 __SCX_RQ_IN_BALANCE __weak; +#define SCX_RQ_IN_BALANCE __SCX_RQ_IN_BALANCE + const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak; #define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h index 88137a140e72..fedec938584b 100644 --- a/tools/sched_ext/include/scx/enums.autogen.h +++ b/tools/sched_ext/include/scx/enums.autogen.h @@ -8,6 +8,14 @@ SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \ SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \ SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_ONLINE); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CAN_STOP_TICK); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_PENDING); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_KEEP); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BYPASSING); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CLK_VALID); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_WAKEUP); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_BALANCE); \ SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \ SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \ SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \ diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h index 34cbebe974b7..8e7c91575f0b 100644 --- a/tools/sched_ext/include/scx/enums.h +++ b/tools/sched_ext/include/scx/enums.h @@ -14,7 +14,8 @@ static inline void __ENUM_set(u64 *val, char *type, char *name) bool res; res = __COMPAT_read_enum(type, name, val); - SCX_BUG_ON(!res, "enum not found(%s)", name); + if (!res) + *val = 0; } #define SCX_ENUM_SET(skel, type, name) do { \ From 9d74da1177c800eb3d51c13f9821b7b0683845a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 21 Mar 2025 23:24:20 +0100 Subject: [PATCH 2055/2157] netfilter: nft_set_hash: GC reaps elements with conncount for dynamic sets only conncount has its own GC handler which determines when to reap stale elements, this is convenient for dynamic sets. However, this also reaps non-dynamic sets with static configurations coming from control plane. Always run connlimit gc handler but honor feedback to reap element if this set is dynamic. Fixes: 290180e2448c ("netfilter: nf_tables: add connlimit support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 8bfac4185ac7..abb0c8ec6371 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -309,7 +309,8 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, nft_setelem_expr_foreach(expr, elem_expr, size) { if (expr->ops->gc && - expr->ops->gc(read_pnet(&set->net), expr)) + expr->ops->gc(read_pnet(&set->net), expr) && + set->flags & NFT_SET_EVAL) return true; } From 688c15017d5cd5aac882400782e7213d40dc3556 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 1 Apr 2025 14:36:47 +0200 Subject: [PATCH 2056/2157] netfilter: nf_tables: don't unregister hook when table is dormant When nf_tables_updchain encounters an error, hook registration needs to be rolled back. This should only be done if the hook has been registered, which won't happen when the table is flagged as dormant (inactive). Just move the assignment into the registration block. Reported-by: syzbot+53ed3a6440173ddbf499@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=53ed3a6440173ddbf499 Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c2df81b7e950..a133e1c175ce 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2839,11 +2839,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = nft_netdev_register_hooks(ctx->net, &hook.list); if (err < 0) goto err_hooks; + + unregister = true; } } - unregister = true; - if (nla[NFTA_CHAIN_COUNTERS]) { if (!nft_is_base_chain(chain)) { err = -EOPNOTSUPP; From 078aabd567de3d63d37d7673f714e309d369e6e2 Mon Sep 17 00:00:00 2001 From: Debin Zhu Date: Tue, 1 Apr 2025 20:40:18 +0800 Subject: [PATCH 2057/2157] netlabel: Fix NULL pointer exception caused by CALIPSO on IPv4 sockets When calling netlbl_conn_setattr(), addr->sa_family is used to determine the function behavior. If sk is an IPv4 socket, but the connect function is called with an IPv6 address, the function calipso_sock_setattr() is triggered. Inside this function, the following code is executed: sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; Since sk is an IPv4 socket, pinet6 is NULL, leading to a null pointer dereference. This patch fixes the issue by checking if inet6_sk(sk) returns a NULL pointer before accessing pinet6. Signed-off-by: Debin Zhu Signed-off-by: Bitao Ouyang <1985755126@qq.com> Acked-by: Paul Moore Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.") Link: https://patch.msgid.link/20250401124018.4763-1-mowenroot@163.com Signed-off-by: Jakub Kicinski --- net/ipv6/calipso.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index dbcea9fee626..62618a058b8f 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk, struct ipv6_opt_hdr *hop; int opt_len, len, ret_val = -ENOMSG, offset; unsigned char *opt; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + if (!pinfo) + return -EAFNOSUPPORT; + + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; @@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk, { int ret_val; struct ipv6_opt_hdr *old, *new; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + if (!pinfo) + return -EAFNOSUPPORT; + + txopts = txopt_get(pinfo); old = NULL; if (txopts) old = txopts->hopopt; @@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk, static void calipso_sock_delattr(struct sock *sk) { struct ipv6_opt_hdr *new_hop; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + if (!pinfo) + return; + + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; From ce8fe975fd99b49c29c42e50f2441ba53112b2e8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Mar 2025 15:25:35 -0700 Subject: [PATCH 2058/2157] net_sched: skbprio: Remove overly strict queue assertions In the current implementation, skbprio enqueue/dequeue contains an assertion that fails under certain conditions when SKBPRIO is used as a child qdisc under TBF with specific parameters. The failure occurs because TBF sometimes peeks at packets in the child qdisc without actually dequeuing them when tokens are unavailable. This peek operation creates a discrepancy between the parent and child qdisc queue length counters. When TBF later receives a high-priority packet, SKBPRIO's queue length may show a different value than what's reflected in its internal priority queue tracking, triggering the assertion. The fix removes this overly strict assertions in SKBPRIO, they are not necessary at all. Reported-by: syzbot+a3422a19b05ea96bee18@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a3422a19b05ea96bee18 Fixes: aea5f654e6b7 ("net/sched: add skbprio scheduler") Cc: Nishanth Devarajan Signed-off-by: Cong Wang Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250329222536.696204-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_skbprio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 20ff7386b74b..f485f62ab721 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -123,8 +123,6 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check to update highest and lowest priorities. */ if (skb_queue_empty(lp_qdisc)) { if (q->lowest_prio == q->highest_prio) { - /* The incoming packet is the only packet in queue. */ - BUG_ON(sch->q.qlen != 1); q->lowest_prio = prio; q->highest_prio = prio; } else { @@ -156,7 +154,6 @@ static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) /* Update highest priority field. */ if (skb_queue_empty(hpq)) { if (q->lowest_prio == q->highest_prio) { - BUG_ON(sch->q.qlen); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } else { From 076c700988938e02d51c018095d33b339cdb7ffd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Mar 2025 15:25:36 -0700 Subject: [PATCH 2059/2157] selftests: tc-testing: Add TBF with SKBPRIO queue length corner case test Add a test case to validate the interaction between TBF and SKBPRIO queueing disciplines, specifically targeting queue length accounting corner cases. This test complements the fix for the queue length accounting issue in the SKBPRIO qdisc. This is still best-effort, as timing and manipulating enqueue and dequeue from user-space is very hard. Cc: Pedro Tammela Signed-off-by: Cong Wang Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250329222536.696204-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9044ac054167..25454fd95537 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -126,5 +126,37 @@ "$TC qdisc del dev $DUMMY root handle 1: drr", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "c024", + "name": "Test TBF with SKBPRIO - catch qlen corner cases", + "category": [ + "qdisc", + "tbf", + "skbprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root tbf rate 100bit burst 2000 limit 1000", + "$TC qdisc add dev $DUMMY parent 1: handle 10: skbprio limit 1", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.5" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc skbprio'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + } ] From 10206302af856791fbcc27a33ed3c3eb09b2793d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Mar 2025 09:15:32 +0000 Subject: [PATCH 2060/2157] sctp: add mutual exclusion in proc_sctp_do_udp_port() We must serialize calls to sctp_udp_sock_stop() and sctp_udp_sock_start() or risk a crash as syzbot reported: Oops: general protection fault, probably for non-canonical address 0xdffffc000000000d: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 1 UID: 0 PID: 6551 Comm: syz.1.44 Not tainted 6.14.0-syzkaller-g7f2ff7b62617 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 RIP: 0010:kernel_sock_shutdown+0x47/0x70 net/socket.c:3653 Call Trace: udp_tunnel_sock_release+0x68/0x80 net/ipv4/udp_tunnel_core.c:181 sctp_udp_sock_stop+0x71/0x160 net/sctp/protocol.c:930 proc_sctp_do_udp_port+0x264/0x450 net/sctp/sysctl.c:553 proc_sys_call_handler+0x3d0/0x5b0 fs/proc/proc_sysctl.c:601 iter_file_splice_write+0x91c/0x1150 fs/splice.c:738 do_splice_from fs/splice.c:935 [inline] direct_splice_actor+0x18f/0x6c0 fs/splice.c:1158 splice_direct_to_actor+0x342/0xa30 fs/splice.c:1102 do_splice_direct_actor fs/splice.c:1201 [inline] do_splice_direct+0x174/0x240 fs/splice.c:1227 do_sendfile+0xafd/0xe50 fs/read_write.c:1368 __do_sys_sendfile64 fs/read_write.c:1429 [inline] __se_sys_sendfile64 fs/read_write.c:1415 [inline] __x64_sys_sendfile64+0x1d8/0x220 fs/read_write.c:1415 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] Fixes: 046c052b475e ("sctp: enable udp tunneling socks") Reported-by: syzbot+fae49d997eb56fa7c74d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67ea5c01.050a0220.1547ec.012b.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Acked-by: Xin Long Link: https://patch.msgid.link/20250331091532.224982-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/sysctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 8e1e97be4df7..ee3eac338a9d 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -525,6 +525,8 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, return ret; } +static DEFINE_MUTEX(sctp_sysctl_mutex); + static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -549,6 +551,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, if (new_value > max || new_value < min) return -EINVAL; + mutex_lock(&sctp_sysctl_mutex); net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { @@ -561,6 +564,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); + mutex_unlock(&sctp_sysctl_mutex); } return ret; From 57b290d97c6150774bf929117ca737a26d8fc33d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Mar 2025 08:52:53 +0200 Subject: [PATCH 2061/2157] net: airoha: Fix qid report in airoha_tc_get_htb_get_leaf_queue() Fix the following kernel warning deleting HTB offloaded leafs and/or root HTB qdisc in airoha_eth driver properly reporting qid in airoha_tc_get_htb_get_leaf_queue routine. $tc qdisc replace dev eth1 root handle 10: htb offload $tc class add dev eth1 arent 10: classid 10:4 htb rate 100mbit ceil 100mbit $tc qdisc replace dev eth1 parent 10:4 handle 4: ets bands 8 \ quanta 1514 3028 4542 6056 7570 9084 10598 12112 $tc qdisc del dev eth1 root [ 55.827864] ------------[ cut here ]------------ [ 55.832493] WARNING: CPU: 3 PID: 2678 at 0xffffffc0798695a4 [ 55.956510] CPU: 3 PID: 2678 Comm: tc Tainted: G O 6.6.71 #0 [ 55.963557] Hardware name: Airoha AN7581 Evaluation Board (DT) [ 55.969383] pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 55.976344] pc : 0xffffffc0798695a4 [ 55.979851] lr : 0xffffffc079869a20 [ 55.983358] sp : ffffffc0850536a0 [ 55.986665] x29: ffffffc0850536a0 x28: 0000000000000024 x27: 0000000000000001 [ 55.993800] x26: 0000000000000000 x25: ffffff8008b19000 x24: ffffff800222e800 [ 56.000935] x23: 0000000000000001 x22: 0000000000000000 x21: ffffff8008b19000 [ 56.008071] x20: ffffff8002225800 x19: ffffff800379d000 x18: 0000000000000000 [ 56.015206] x17: ffffffbf9ea59000 x16: ffffffc080018000 x15: 0000000000000000 [ 56.022342] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000001 [ 56.029478] x11: ffffffc081471008 x10: ffffffc081575a98 x9 : 0000000000000000 [ 56.036614] x8 : ffffffc08167fd40 x7 : ffffffc08069e104 x6 : ffffff8007f86000 [ 56.043748] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000001 [ 56.050884] x2 : 0000000000000000 x1 : 0000000000000250 x0 : ffffff800222c000 [ 56.058020] Call trace: [ 56.060459] 0xffffffc0798695a4 [ 56.063618] 0xffffffc079869a20 [ 56.066777] __qdisc_destroy+0x40/0xa0 [ 56.070528] qdisc_put+0x54/0x6c [ 56.073748] qdisc_graft+0x41c/0x648 [ 56.077324] tc_get_qdisc+0x168/0x2f8 [ 56.080978] rtnetlink_rcv_msg+0x230/0x330 [ 56.085076] netlink_rcv_skb+0x5c/0x128 [ 56.088913] rtnetlink_rcv+0x14/0x1c [ 56.092490] netlink_unicast+0x1e0/0x2c8 [ 56.096413] netlink_sendmsg+0x198/0x3c8 [ 56.100337] ____sys_sendmsg+0x1c4/0x274 [ 56.104261] ___sys_sendmsg+0x7c/0xc0 [ 56.107924] __sys_sendmsg+0x44/0x98 [ 56.111492] __arm64_sys_sendmsg+0x20/0x28 [ 56.115580] invoke_syscall.constprop.0+0x58/0xfc [ 56.120285] do_el0_svc+0x3c/0xbc [ 56.123592] el0_svc+0x18/0x4c [ 56.126647] el0t_64_sync_handler+0x118/0x124 [ 56.131005] el0t_64_sync+0x150/0x154 [ 56.134660] ---[ end trace 0000000000000000 ]--- Fixes: ef1ca9271313b ("net: airoha: Add sched HTB offload support") Signed-off-by: Lorenzo Bianconi Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250331-airoha-htb-qdisc-offload-del-fix-v1-1-4ea429c2c968@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c0a642568ac1..20a96cafc748 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2358,7 +2358,7 @@ static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port, return -EINVAL; } - opt->qid = channel; + opt->qid = AIROHA_NUM_TX_RING + channel; return 0; } From 367579274f60cb23c570ae5348966ab51e1509a4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Mar 2025 18:17:31 +0200 Subject: [PATCH 2062/2157] net: airoha: Fix ETS priomap validation ETS Qdisc schedules SP bands in a priority order assigning band-0 the highest priority (band-0 > band-1 > .. > band-n) while EN7581 arranges SP bands in a priority order assigning band-7 the highest priority (band-7 > band-6, .. > band-n). Fix priomap check in airoha_qdma_set_tx_ets_sched routine in order to align ETS Qdisc and airoha_eth driver SP priority ordering. Fixes: b56e4d660a96 ("net: airoha: Enforce ETS Qdisc priomap") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20250331-airoha-ets-validate-priomap-v1-1-60a524488672@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 20a96cafc748..69e523dd4186 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2028,7 +2028,7 @@ static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, struct tc_ets_qopt_offload_replace_params *p = &opt->replace_params; enum tx_sched_mode mode = TC_SCH_SP; u16 w[AIROHA_NUM_QOS_QUEUES] = {}; - int i, nstrict = 0, nwrr, qidx; + int i, nstrict = 0; if (p->bands > AIROHA_NUM_QOS_QUEUES) return -EINVAL; @@ -2046,17 +2046,17 @@ static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, * lowest priorities with respect to SP ones. * e.g: WRR0, WRR1, .., WRRm, SP0, SP1, .., SPn */ - nwrr = p->bands - nstrict; - qidx = nstrict && nwrr ? nstrict : 0; - for (i = 1; i <= p->bands; i++) { - if (p->priomap[i % AIROHA_NUM_QOS_QUEUES] != qidx) + for (i = 0; i < nstrict; i++) { + if (p->priomap[p->bands - i - 1] != i) return -EINVAL; - - qidx = i == nwrr ? 0 : qidx + 1; } - for (i = 0; i < nwrr; i++) + for (i = 0; i < p->bands - nstrict; i++) { + if (p->priomap[i] != nstrict + i) + return -EINVAL; + w[i] = p->weights[nstrict + i]; + } if (!nstrict) mode = TC_SCH_WRR8; From d996e412b2dfc079bd44bff5b3bc743fdb6d7c90 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 31 Mar 2025 07:28:14 -0700 Subject: [PATCH 2063/2157] bpf: add missing ops lock around dev_xdp_attach_link Syzkaller points out that create_link path doesn't grab ops lock, add it. Reported-by: syzbot+08936936fe8132f91f1a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/67e6b3e8.050a0220.2f068f.0079.GAE@google.com/ Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250331142814.1887506-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index be17e0660144..5d20ff226d5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10284,7 +10284,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) goto unlock; } + netdev_lock_ops(dev); err = dev_xdp_attach_link(dev, &extack, link); + netdev_unlock_ops(dev); rtnl_unlock(); if (err) { From 5bbcb5902e1c7193b5ffee13251ec92878aae0e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Mar 2025 17:15:20 -0700 Subject: [PATCH 2064/2157] MAINTAINERS: update Open vSwitch maintainers Pravin has not been active for a while, missingmaints reports: Subsystem OPENVSWITCH Changes 138 / 253 (54%) (No activity) Top reviewers: [41]: aconole@redhat.com [31]: horms@kernel.org [23]: echaudro@redhat.com [8]: fw@strlen.de [6]: i.maximets@ovn.org INACTIVE MAINTAINER Pravin B Shelar Let's elevate Aaron, Eelco and Ilya to the status of maintainers. Acked-by: Aaron Conole Acked-by: Ilya Maximets Acked-by: Eelco Chaudron Acked-by: Simon Horman Link: https://patch.msgid.link/20250401001520.2080231-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 660d5e67af7d..0dabce0f03f0 100644 --- a/CREDITS +++ b/CREDITS @@ -3666,6 +3666,10 @@ S: 149 Union St. S: Kingston, Ontario S: Canada K7L 2P4 +N: Pravin B Shelar +E: pshelar@ovn.org +D: Open vSwitch maintenance and contributions + N: John Shifflett E: john@geolog.com E: jshiffle@netcom.com diff --git a/MAINTAINERS b/MAINTAINERS index e0045ac4327b..ff882416c445 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18097,7 +18097,9 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin B Shelar +M: Aaron Conole +M: Eelco Chaudron +M: Ilya Maximets L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained From d3210dabda8dd0477f3a6301dcaf9ed44aeccd3c Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 31 Mar 2025 18:53:15 -0700 Subject: [PATCH 2065/2157] eth: mlx4: select PAGE_POOL With commit 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") mlx4 started using functions guarded by PAGE_POOL. This change introduced build errors when CONFIG_MLX4_EN is set but CONFIG_PAGE_POOL is not: ld: vmlinux.o: in function `mlx4_en_alloc_frags': en_rx.c:(.text+0xa5eaf9): undefined reference to `page_pool_alloc_pages' ld: vmlinux.o: in function `mlx4_en_create_rx_ring': (.text+0xa5ee91): undefined reference to `page_pool_create' Make MLX4_EN select PAGE_POOL to fix the ml;x4 build errors. Fixes: 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") Signed-off-by: Greg Thelen Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250401015315.2306092-1-gthelen@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 825e05fb8607..0b1cb340206f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -7,6 +7,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on PCI && NETDEVICES && ETHERNET && INET depends on PTP_1588_CLOCK_OPTIONAL + select PAGE_POOL select MLX4_CORE help This driver supports Mellanox Technologies ConnectX Ethernet From 96844075226b49af25a69a1d084b648ec2d9b08d Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 1 Apr 2025 08:58:04 +0200 Subject: [PATCH 2066/2157] net: mvpp2: Prevent parser TCAM memory corruption Protect the parser TCAM/SRAM memory, and the cached (shadow) SRAM information, from concurrent modifications. Both the TCAM and SRAM tables are indirectly accessed by configuring an index register that selects the row to read or write to. This means that operations must be atomic in order to, e.g., avoid spreading writes across multiple rows. Since the shadow SRAM array is used to find free rows in the hardware table, it must also be protected in order to avoid TOCTOU errors where multiple cores allocate the same row. This issue was detected in a situation where `mvpp2_set_rx_mode()` ran concurrently on two CPUs. In this particular case the MVPP2_PE_MAC_UC_PROMISCUOUS entry was corrupted, causing the classifier unit to drop all incoming unicast - indicated by the `rx_classifier_drops` counter. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Tobias Waldekranz Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/20250401065855.3113635-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 3 + .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 3 +- .../net/ethernet/marvell/mvpp2/mvpp2_prs.c | 201 ++++++++++++------ 3 files changed, 140 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 44fe9b68d1c2..061fcd444d50 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -1113,6 +1113,9 @@ struct mvpp2 { /* Spinlocks for CM3 shared memory configuration */ spinlock_t mss_spinlock; + + /* Spinlock for shared PRS parser memory and shadow table */ + spinlock_t prs_spinlock; }; struct mvpp2_pcpu_stats { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 566c12c89520..416a926a8281 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7723,8 +7723,9 @@ static int mvpp2_probe(struct platform_device *pdev) if (mvpp2_read(priv, MVPP2_VER_ID_REG) == MVPP2_VER_PP23) priv->hw_version = MVPP23; - /* Init mss lock */ + /* Init locks for shared packet processor resources */ spin_lock_init(&priv->mss_spinlock); + spin_lock_init(&priv->prs_spinlock); /* Initialize network controller */ err = mvpp2_init(pdev, priv); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 9af22f497a40..93e978bdf303 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -23,6 +23,8 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) { int i; + lockdep_assert_held(&priv->prs_spinlock); + if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1) return -EINVAL; @@ -43,11 +45,13 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) } /* Initialize tcam entry from hw */ -int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, - int tid) +static int __mvpp2_prs_init_from_hw(struct mvpp2 *priv, + struct mvpp2_prs_entry *pe, int tid) { int i; + lockdep_assert_held(&priv->prs_spinlock); + if (tid > MVPP2_PRS_TCAM_SRAM_SIZE - 1) return -EINVAL; @@ -73,6 +77,18 @@ int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, return 0; } +int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, + int tid) +{ + int err; + + spin_lock_bh(&priv->prs_spinlock); + err = __mvpp2_prs_init_from_hw(priv, pe, tid); + spin_unlock_bh(&priv->prs_spinlock); + + return err; +} + /* Invalidate tcam hw entry */ static void mvpp2_prs_hw_inv(struct mvpp2 *priv, int index) { @@ -374,7 +390,7 @@ static int mvpp2_prs_flow_find(struct mvpp2 *priv, int flow) priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); bits = mvpp2_prs_sram_ai_get(&pe); /* Sram store classification lookup ID in AI bits [5:0] */ @@ -441,7 +457,7 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) if (priv->prs_shadow[MVPP2_PE_DROP_ALL].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL); + __mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -469,14 +485,17 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) } /* Set port to unicast or multicast promiscuous mode */ -void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, - enum mvpp2_prs_l2_cast l2_cast, bool add) +static void __mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, + enum mvpp2_prs_l2_cast l2_cast, + bool add) { struct mvpp2_prs_entry pe; unsigned char cast_match; unsigned int ri; int tid; + lockdep_assert_held(&priv->prs_spinlock); + if (l2_cast == MVPP2_PRS_L2_UNI_CAST) { cast_match = MVPP2_PRS_UCAST_VAL; tid = MVPP2_PE_MAC_UC_PROMISCUOUS; @@ -489,7 +508,7 @@ void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, /* promiscuous mode - Accept unknown unicast or multicast packets */ if (priv->prs_shadow[tid].valid) { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { memset(&pe, 0, sizeof(pe)); mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); @@ -522,6 +541,14 @@ void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, mvpp2_prs_hw_write(priv, &pe); } +void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, + enum mvpp2_prs_l2_cast l2_cast, bool add) +{ + spin_lock_bh(&priv->prs_spinlock); + __mvpp2_prs_mac_promisc_set(priv, port, l2_cast, add); + spin_unlock_bh(&priv->prs_spinlock); +} + /* Set entry for dsa packets */ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, bool tagged, bool extend) @@ -539,7 +566,7 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, if (priv->prs_shadow[tid].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -610,7 +637,7 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port, if (priv->prs_shadow[tid].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -673,7 +700,7 @@ static int mvpp2_prs_vlan_find(struct mvpp2 *priv, unsigned short tpid, int ai) priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid); if (!match) continue; @@ -726,7 +753,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid_aux); + __mvpp2_prs_init_from_hw(priv, &pe, tid_aux); ri_bits = mvpp2_prs_sram_ri_get(&pe); if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) == MVPP2_PRS_RI_VLAN_DOUBLE) @@ -760,7 +787,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Update ports' mask */ mvpp2_prs_tcam_port_map_set(&pe, port_map); @@ -800,7 +827,7 @@ static int mvpp2_prs_double_vlan_find(struct mvpp2 *priv, unsigned short tpid1, priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid1) && mvpp2_prs_tcam_data_cmp(&pe, 4, tpid2); @@ -849,7 +876,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid_aux); + __mvpp2_prs_init_from_hw(priv, &pe, tid_aux); ri_bits = mvpp2_prs_sram_ri_get(&pe); ri_bits &= MVPP2_PRS_RI_VLAN_MASK; if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE || @@ -880,7 +907,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Update ports' mask */ @@ -1213,8 +1240,8 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) /* Create dummy entries for drop all and promiscuous modes */ mvpp2_prs_drop_fc(priv); mvpp2_prs_mac_drop_all_set(priv, 0, false); - mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); - mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); + __mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); + __mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); } /* Set default entries for various types of dsa packets */ @@ -1533,12 +1560,6 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) struct mvpp2_prs_entry pe; int err; - priv->prs_double_vlans = devm_kcalloc(&pdev->dev, sizeof(bool), - MVPP2_PRS_DBL_VLANS_MAX, - GFP_KERNEL); - if (!priv->prs_double_vlans) - return -ENOMEM; - /* Double VLAN: 0x88A8, 0x8100 */ err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q, MVPP2_PRS_PORT_MASK); @@ -1941,7 +1962,7 @@ static int mvpp2_prs_vid_range_find(struct mvpp2_port *port, u16 vid, u16 mask) port->priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) continue; - mvpp2_prs_init_from_hw(port->priv, &pe, tid); + __mvpp2_prs_init_from_hw(port->priv, &pe, tid); mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]); mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]); @@ -1970,6 +1991,8 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + /* Scan TCAM and see if entry with this already exist */ tid = mvpp2_prs_vid_range_find(port, vid, mask); @@ -1988,8 +2011,10 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) MVPP2_PRS_VLAN_FILT_MAX_ENTRY); /* There isn't room for a new VID filter */ - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&priv->prs_spinlock); return tid; + } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); pe.index = tid; @@ -1997,7 +2022,7 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) /* Mask all ports */ mvpp2_prs_tcam_port_map_set(&pe, 0); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Enable the current port */ @@ -2019,6 +2044,7 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); mvpp2_prs_hw_write(priv, &pe); + spin_unlock_bh(&priv->prs_spinlock); return 0; } @@ -2028,15 +2054,16 @@ void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid) struct mvpp2 *priv = port->priv; int tid; - /* Scan TCAM and see if entry with this already exist */ + spin_lock_bh(&priv->prs_spinlock); + + /* Invalidate TCAM entry with this , if it exists */ tid = mvpp2_prs_vid_range_find(port, vid, 0xfff); + if (tid >= 0) { + mvpp2_prs_hw_inv(priv, tid); + priv->prs_shadow[tid].valid = false; + } - /* No such entry */ - if (tid < 0) - return; - - mvpp2_prs_hw_inv(priv, tid); - priv->prs_shadow[tid].valid = false; + spin_unlock_bh(&priv->prs_spinlock); } /* Remove all existing VID filters on this port */ @@ -2045,6 +2072,8 @@ void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; int tid; + spin_lock_bh(&priv->prs_spinlock); + for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { if (priv->prs_shadow[tid].valid) { @@ -2052,6 +2081,8 @@ void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) priv->prs_shadow[tid].valid = false; } } + + spin_unlock_bh(&priv->prs_spinlock); } /* Remove VID filering entry for this port */ @@ -2060,10 +2091,14 @@ void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port) unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); struct mvpp2 *priv = port->priv; + spin_lock_bh(&priv->prs_spinlock); + /* Invalidate the guard entry */ mvpp2_prs_hw_inv(priv, tid); priv->prs_shadow[tid].valid = false; + + spin_unlock_bh(&priv->prs_spinlock); } /* Add guard entry that drops packets when no VID is matched on this port */ @@ -2079,6 +2114,8 @@ void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + pe.index = tid; reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); @@ -2111,6 +2148,8 @@ void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) /* Update shadow table */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); mvpp2_prs_hw_write(priv, &pe); + + spin_unlock_bh(&priv->prs_spinlock); } /* Parser default initialization */ @@ -2118,6 +2157,20 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) { int err, index, i; + priv->prs_shadow = devm_kcalloc(&pdev->dev, MVPP2_PRS_TCAM_SRAM_SIZE, + sizeof(*priv->prs_shadow), + GFP_KERNEL); + if (!priv->prs_shadow) + return -ENOMEM; + + priv->prs_double_vlans = devm_kcalloc(&pdev->dev, sizeof(bool), + MVPP2_PRS_DBL_VLANS_MAX, + GFP_KERNEL); + if (!priv->prs_double_vlans) + return -ENOMEM; + + spin_lock_bh(&priv->prs_spinlock); + /* Enable tcam table */ mvpp2_write(priv, MVPP2_PRS_TCAM_CTRL_REG, MVPP2_PRS_TCAM_EN_MASK); @@ -2136,12 +2189,6 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) for (index = 0; index < MVPP2_PRS_TCAM_SRAM_SIZE; index++) mvpp2_prs_hw_inv(priv, index); - priv->prs_shadow = devm_kcalloc(&pdev->dev, MVPP2_PRS_TCAM_SRAM_SIZE, - sizeof(*priv->prs_shadow), - GFP_KERNEL); - if (!priv->prs_shadow) - return -ENOMEM; - /* Always start from lookup = 0 */ for (index = 0; index < MVPP2_MAX_PORTS; index++) mvpp2_prs_hw_port_init(priv, index, MVPP2_PRS_LU_MH, @@ -2158,26 +2205,13 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) mvpp2_prs_vid_init(priv); err = mvpp2_prs_etype_init(priv); - if (err) - return err; + err = err ? : mvpp2_prs_vlan_init(pdev, priv); + err = err ? : mvpp2_prs_pppoe_init(priv); + err = err ? : mvpp2_prs_ip6_init(priv); + err = err ? : mvpp2_prs_ip4_init(priv); - err = mvpp2_prs_vlan_init(pdev, priv); - if (err) - return err; - - err = mvpp2_prs_pppoe_init(priv); - if (err) - return err; - - err = mvpp2_prs_ip6_init(priv); - if (err) - return err; - - err = mvpp2_prs_ip4_init(priv); - if (err) - return err; - - return 0; + spin_unlock_bh(&priv->prs_spinlock); + return err; } /* Compare MAC DA with tcam entry data */ @@ -2217,7 +2251,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, (priv->prs_shadow[tid].udf != udf_type)) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); entry_pmap = mvpp2_prs_tcam_port_map_get(&pe); if (mvpp2_prs_mac_range_equals(&pe, da, mask) && @@ -2229,7 +2263,8 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, } /* Update parser's mac da entry */ -int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) +static int __mvpp2_prs_mac_da_accept(struct mvpp2_port *port, + const u8 *da, bool add) { unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; struct mvpp2 *priv = port->priv; @@ -2261,7 +2296,7 @@ int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) /* Mask all ports */ mvpp2_prs_tcam_port_map_set(&pe, 0); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); @@ -2317,6 +2352,17 @@ int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) return 0; } +int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) +{ + int err; + + spin_lock_bh(&port->priv->prs_spinlock); + err = __mvpp2_prs_mac_da_accept(port, da, add); + spin_unlock_bh(&port->priv->prs_spinlock); + + return err; +} + int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da) { struct mvpp2_port *port = netdev_priv(dev); @@ -2345,6 +2391,8 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) unsigned long pmap; int index, tid; + spin_lock_bh(&priv->prs_spinlock); + for (tid = MVPP2_PE_MAC_RANGE_START; tid <= MVPP2_PE_MAC_RANGE_END; tid++) { unsigned char da[ETH_ALEN], da_mask[ETH_ALEN]; @@ -2354,7 +2402,7 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF)) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); pmap = mvpp2_prs_tcam_port_map_get(&pe); @@ -2375,14 +2423,17 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) continue; /* Remove entry from TCAM */ - mvpp2_prs_mac_da_accept(port, da, false); + __mvpp2_prs_mac_da_accept(port, da, false); } + + spin_unlock_bh(&priv->prs_spinlock); } int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) { switch (type) { case MVPP2_TAG_TYPE_EDSA: + spin_lock_bh(&priv->prs_spinlock); /* Add port to EDSA entries */ mvpp2_prs_dsa_tag_set(priv, port, true, MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); @@ -2393,9 +2444,11 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA); + spin_unlock_bh(&priv->prs_spinlock); break; case MVPP2_TAG_TYPE_DSA: + spin_lock_bh(&priv->prs_spinlock); /* Add port to DSA entries */ mvpp2_prs_dsa_tag_set(priv, port, true, MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); @@ -2406,10 +2459,12 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA); + spin_unlock_bh(&priv->prs_spinlock); break; case MVPP2_TAG_TYPE_MH: case MVPP2_TAG_TYPE_NONE: + spin_lock_bh(&priv->prs_spinlock); /* Remove port form EDSA and DSA entries */ mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); @@ -2419,6 +2474,7 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA); + spin_unlock_bh(&priv->prs_spinlock); break; default: @@ -2437,11 +2493,15 @@ int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_LAST_FREE_TID, MVPP2_PE_FIRST_FREE_TID); - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&priv->prs_spinlock); return tid; + } pe.index = tid; @@ -2461,6 +2521,7 @@ int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask) mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); mvpp2_prs_hw_write(priv, &pe); + spin_unlock_bh(&priv->prs_spinlock); return 0; } @@ -2472,6 +2533,8 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&port->priv->prs_spinlock); + tid = mvpp2_prs_flow_find(port->priv, port->id); /* Such entry not exist */ @@ -2480,8 +2543,10 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) tid = mvpp2_prs_tcam_first_free(port->priv, MVPP2_PE_LAST_FREE_TID, MVPP2_PE_FIRST_FREE_TID); - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&port->priv->prs_spinlock); return tid; + } pe.index = tid; @@ -2492,13 +2557,14 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) /* Update shadow table */ mvpp2_prs_shadow_set(port->priv, pe.index, MVPP2_PRS_LU_FLOWS); } else { - mvpp2_prs_init_from_hw(port->priv, &pe, tid); + __mvpp2_prs_init_from_hw(port->priv, &pe, tid); } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_tcam_port_map_set(&pe, (1 << port->id)); mvpp2_prs_hw_write(port->priv, &pe); + spin_unlock_bh(&port->priv->prs_spinlock); return 0; } @@ -2509,11 +2575,14 @@ int mvpp2_prs_hits(struct mvpp2 *priv, int index) if (index > MVPP2_PRS_TCAM_SRAM_SIZE) return -EINVAL; + spin_lock_bh(&priv->prs_spinlock); + mvpp2_write(priv, MVPP2_PRS_TCAM_HIT_IDX_REG, index); val = mvpp2_read(priv, MVPP2_PRS_TCAM_HIT_CNT_REG); val &= MVPP2_PRS_TCAM_HIT_CNT_MASK; + spin_unlock_bh(&priv->prs_spinlock); return val; } From ca9e5d3d9a4d7d30355aa68bb30dc6f850f067ab Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 1 Apr 2025 11:49:08 -0300 Subject: [PATCH 2067/2157] selftests: tc-testing: fix nat regex matching In iproute 6.14, the nat ip mask logic was fixed to remove an undefined behaviour[1]. So now instead of reporting '0.0.0.0/32' on x86 and potentially '0.0.0.0/0' in other platforms, it reports '0.0.0.0/0' in all platforms. [1] https://lore.kernel.org/netdev/20250306112520.188728-1-torben.nielsen@prevas.dk/ Reviewed-by: Simon Horman Signed-off-by: Pedro Tammela Link: https://patch.msgid.link/20250401144908.568140-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../selftests/tc-testing/tc-tests/actions/nat.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index ee2792998c89..4f21aeb8a3fb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -305,7 +305,7 @@ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -332,7 +332,7 @@ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -359,7 +359,7 @@ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -548,7 +548,7 @@ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -575,7 +575,7 @@ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -602,7 +602,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -629,7 +629,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action nat" From 1b7fdc702c031134c619b74c4f311c590e50ca3d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 1 Apr 2025 12:07:08 -0700 Subject: [PATCH 2068/2157] rtnetlink: Use register_pernet_subsys() in rtnl_net_debug_init(). rtnl_net_debug_init() registers rtnl_net_debug_net_ops by register_pernet_device() but calls unregister_pernet_subsys() in case register_netdevice_notifier() fails. It corrupts pernet_list because first_device is updated in register_pernet_device() but not unregister_pernet_subsys(). Let's fix it by calling register_pernet_subsys() instead. The _subsys() one fits better for the use case because it keeps the notifier alive until default_device_exit_net(), giving it more chance to test NETDEV_UNREGISTER. Fixes: 03fa53485659 ("rtnetlink: Add ASSERT_RTNL_NET() placeholder for netdev notifier.") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250401190716.70437-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnl_net_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnl_net_debug.c b/net/core/rtnl_net_debug.c index 7ecd28cc1c22..f3272b09c255 100644 --- a/net/core/rtnl_net_debug.c +++ b/net/core/rtnl_net_debug.c @@ -102,7 +102,7 @@ static int __init rtnl_net_debug_init(void) { int ret; - ret = register_pernet_device(&rtnl_net_debug_net_ops); + ret = register_pernet_subsys(&rtnl_net_debug_net_ops); if (ret) return ret; From 5a465a0da13ee9fbd7d3cd0b2893309b0fe4b7e3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 1 Apr 2025 11:44:42 -0700 Subject: [PATCH 2069/2157] udp: Fix multiple wraparounds of sk->sk_rmem_alloc. __udp_enqueue_schedule_skb() has the following condition: if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) goto drop; sk->sk_rcvbuf is initialised by net.core.rmem_default and later can be configured by SO_RCVBUF, which is limited by net.core.rmem_max, or SO_RCVBUFFORCE. If we set INT_MAX to sk->sk_rcvbuf, the condition is always false as sk->sk_rmem_alloc is also signed int. Then, the size of the incoming skb is added to sk->sk_rmem_alloc unconditionally. This results in integer overflow (possibly multiple times) on sk->sk_rmem_alloc and allows a single socket to have skb up to net.core.udp_mem[1]. For example, if we set a large value to udp_mem[1] and INT_MAX to sk->sk_rcvbuf and flood packets to the socket, we can see multiple overflows: # cat /proc/net/sockstat | grep UDP: UDP: inuse 3 mem 7956736 <-- (7956736 << 12) bytes > INT_MAX * 15 ^- PAGE_SHIFT # ss -uam State Recv-Q ... UNCONN -1757018048 ... <-- flipping the sign repeatedly skmem:(r2537949248,rb2147483646,t0,tb212992,f1984,w0,o0,bl0,d0) Previously, we had a boundary check for INT_MAX, which was removed by commit 6a1f12dd85a8 ("udp: relax atomic operation on sk->sk_rmem_alloc"). A complete fix would be to revert it and cap the right operand by INT_MAX: rmem = atomic_add_return(size, &sk->sk_rmem_alloc); if (rmem > min(size + (unsigned int)sk->sk_rcvbuf, INT_MAX)) goto uncharge_drop; but we do not want to add the expensive atomic_add_return() back just for the corner case. Casting rmem to unsigned int prevents multiple wraparounds, but we still allow a single wraparound. # cat /proc/net/sockstat | grep UDP: UDP: inuse 3 mem 524288 <-- (INT_MAX + 1) >> 12 # ss -uam State Recv-Q ... UNCONN -2147482816 ... <-- INT_MAX + 831 bytes skmem:(r2147484480,rb2147483646,t0,tb212992,f3264,w0,o0,bl0,d14468947) So, let's define rmem and rcvbuf as unsigned int and check skb->truesize only when rcvbuf is large enough to lower the overflow possibility. Note that we still have a small chance to see overflow if multiple skbs to the same socket are processed on different core at the same time and each size does not exceed the limit but the total size does. Note also that we must ignore skb->truesize for a small buffer as explained in commit 363dc73acacb ("udp: be less conservative with sock rmem accounting"). Fixes: 6a1f12dd85a8 ("udp: relax atomic operation on sk->sk_rmem_alloc") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250401184501.67377-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d0bffcfa56d8..354779b22faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1725,17 +1725,25 @@ static int udp_rmem_schedule(struct sock *sk, int size) int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; - int rmem, err = -ENOMEM; + unsigned int rmem, rcvbuf; spinlock_t *busy = NULL; - int size, rcvbuf; + int size, err = -ENOMEM; - /* Immediately drop when the receive queue is full. - * Always allow at least one packet. - */ rmem = atomic_read(&sk->sk_rmem_alloc); rcvbuf = READ_ONCE(sk->sk_rcvbuf); - if (rmem > rcvbuf) - goto drop; + size = skb->truesize; + + /* Immediately drop when the receive queue is full. + * Cast to unsigned int performs the boundary check for INT_MAX. + */ + if (rmem + size > rcvbuf) { + if (rcvbuf > INT_MAX >> 1) + goto drop; + + /* Always allow at least one packet for small buffer. */ + if (rmem > rcvbuf) + goto drop; + } /* Under mem pressure, it might be helpful to help udp_recvmsg() * having linear skbs : @@ -1745,10 +1753,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) */ if (rmem > (rcvbuf >> 1)) { skb_condense(skb); - + size = skb->truesize; busy = busylock_acquire(sk); } - size = skb->truesize; + udp_set_dev_scratch(skb); atomic_add(size, &sk->sk_rmem_alloc); From df207de9d9e7a4d92f8567e2c539d9c8c12fd99d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 1 Apr 2025 11:44:43 -0700 Subject: [PATCH 2070/2157] udp: Fix memory accounting leak. Matt Dowling reported a weird UDP memory usage issue. Under normal operation, the UDP memory usage reported in /proc/net/sockstat remains close to zero. However, it occasionally spiked to 524,288 pages and never dropped. Moreover, the value doubled when the application was terminated. Finally, it caused intermittent packet drops. We can reproduce the issue with the script below [0]: 1. /proc/net/sockstat reports 0 pages # cat /proc/net/sockstat | grep UDP: UDP: inuse 1 mem 0 2. Run the script till the report reaches 524,288 # python3 test.py & sleep 5 # cat /proc/net/sockstat | grep UDP: UDP: inuse 3 mem 524288 <-- (INT_MAX + 1) >> PAGE_SHIFT 3. Kill the socket and confirm the number never drops # pkill python3 && sleep 5 # cat /proc/net/sockstat | grep UDP: UDP: inuse 1 mem 524288 4. (necessary since v6.0) Trigger proto_memory_pcpu_drain() # python3 test.py & sleep 1 && pkill python3 5. The number doubles # cat /proc/net/sockstat | grep UDP: UDP: inuse 1 mem 1048577 The application set INT_MAX to SO_RCVBUF, which triggered an integer overflow in udp_rmem_release(). When a socket is close()d, udp_destruct_common() purges its receive queue and sums up skb->truesize in the queue. This total is calculated and stored in a local unsigned integer variable. The total size is then passed to udp_rmem_release() to adjust memory accounting. However, because the function takes a signed integer argument, the total size can wrap around, causing an overflow. Then, the released amount is calculated as follows: 1) Add size to sk->sk_forward_alloc. 2) Round down sk->sk_forward_alloc to the nearest lower multiple of PAGE_SIZE and assign it to amount. 3) Subtract amount from sk->sk_forward_alloc. 4) Pass amount >> PAGE_SHIFT to __sk_mem_reduce_allocated(). When the issue occurred, the total in udp_destruct_common() was 2147484480 (INT_MAX + 833), which was cast to -2147482816 in udp_rmem_release(). At 1) sk->sk_forward_alloc is changed from 3264 to -2147479552, and 2) sets -2147479552 to amount. 3) reverts the wraparound, so we don't see a warning in inet_sock_destruct(). However, udp_memory_allocated ends up doubling at 4). Since commit 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated"), memory usage no longer doubles immediately after a socket is close()d because __sk_mem_reduce_allocated() caches the amount in udp_memory_per_cpu_fw_alloc. However, the next time a UDP socket receives a packet, the subtraction takes effect, causing UDP memory usage to double. This issue makes further memory allocation fail once the socket's sk->sk_rmem_alloc exceeds net.ipv4.udp_rmem_min, resulting in packet drops. To prevent this issue, let's use unsigned int for the calculation and call sk_forward_alloc_add() only once for the small delta. Note that first_packet_length() also potentially has the same problem. [0]: from socket import * SO_RCVBUFFORCE = 33 INT_MAX = (2 ** 31) - 1 s = socket(AF_INET, SOCK_DGRAM) s.bind(('', 0)) s.setsockopt(SOL_SOCKET, SO_RCVBUFFORCE, INT_MAX) c = socket(AF_INET, SOCK_DGRAM) c.connect(s.getsockname()) data = b'a' * 100 while True: c.send(data) Fixes: f970bd9e3a06 ("udp: implement memory accounting helpers") Reported-by: Matt Dowling Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250401184501.67377-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 354779b22faf..2742cc7602bb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1625,12 +1625,12 @@ static bool udp_skb_has_head_state(struct sk_buff *skb) } /* fully reclaim rmem/fwd memory allocated for skb */ -static void udp_rmem_release(struct sock *sk, int size, int partial, - bool rx_queue_lock_held) +static void udp_rmem_release(struct sock *sk, unsigned int size, + int partial, bool rx_queue_lock_held) { struct udp_sock *up = udp_sk(sk); struct sk_buff_head *sk_queue; - int amt; + unsigned int amt; if (likely(partial)) { up->forward_deficit += size; @@ -1650,10 +1650,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (!rx_queue_lock_held) spin_lock(&sk_queue->lock); - - sk_forward_alloc_add(sk, size); - amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); - sk_forward_alloc_add(sk, -amt); + amt = (size + sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); + sk_forward_alloc_add(sk, size - amt); if (amt) __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); @@ -1843,7 +1841,7 @@ EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, - int *total) + unsigned int *total) { struct sk_buff *skb; @@ -1876,8 +1874,8 @@ static int first_packet_length(struct sock *sk) { struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue; struct sk_buff_head *sk_queue = &sk->sk_receive_queue; + unsigned int total = 0; struct sk_buff *skb; - int total = 0; int res; spin_lock_bh(&rcvq->lock); From fccd2b711d9628c7ce0111d5e4938652101ee30a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 28 Mar 2025 15:15:28 +0100 Subject: [PATCH 2071/2157] vsock: avoid timeout during connect() if the socket is closing When a peer attempts to establish a connection, vsock_connect() contains a loop that waits for the state to be TCP_ESTABLISHED. However, the other peer can be fast enough to accept the connection and close it immediately, thus moving the state to TCP_CLOSING. When this happens, the peer in the vsock_connect() is properly woken up, but since the state is not TCP_ESTABLISHED, it goes back to sleep until the timeout expires, returning -ETIMEDOUT. If the socket state is TCP_CLOSING, waiting for the timeout is pointless. vsock_connect() can return immediately without errors or delay since the connection actually happened. The socket will be in a closing state, but this is not an issue, and subsequent calls will fail as expected. We discovered this issue while developing a test that accepts and immediately closes connections to stress the transport switch between two connect() calls, where the first one was interrupted by a signal (see Closes link). Reported-by: Luigi Leonardi Closes: https://lore.kernel.org/virtualization/bq6hxrolno2vmtqwcvb5bljfpb7mvwb3kohrvaed6auz5vxrfv@ijmd2f3grobn/ Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Stefano Garzarella Acked-by: Paolo Abeni Tested-by: Luigi Leonardi Reviewed-by: Luigi Leonardi Link: https://patch.msgid.link/20250328141528.420719-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7e3db87ae433..fc6afbc8d680 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1551,7 +1551,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { + /* If the socket is already closing or it is in an error state, there + * is no point in waiting. + */ + while (sk->sk_state != TCP_ESTABLISHED && + sk->sk_state != TCP_CLOSING && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection From 8930424777e43257f5bf6f0f0f53defd0d30415c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 29 Mar 2025 01:33:44 +0100 Subject: [PATCH 2072/2157] tunnels: Accept PACKET_HOST in skb_tunnel_check_pmtu(). Because skb_tunnel_check_pmtu() doesn't handle PACKET_HOST packets, commit 30a92c9e3d6b ("openvswitch: Set the skbuff pkt_type for proper pmtud support.") forced skb->pkt_type to PACKET_OUTGOING for openvswitch packets that are sent using the OVS_ACTION_ATTR_OUTPUT action. This allowed such packets to invoke the iptunnel_pmtud_check_icmp() or iptunnel_pmtud_check_icmpv6() helpers and thus trigger PMTU update on the input device. However, this also broke other parts of PMTU discovery. Since these packets don't have the PACKET_HOST type anymore, they won't trigger the sending of ICMP Fragmentation Needed or Packet Too Big messages to remote hosts when oversized (see the skb_in->pkt_type condition in __icmp_send() for example). These two skb->pkt_type checks are therefore incompatible as one requires skb->pkt_type to be PACKET_HOST, while the other requires it to be anything but PACKET_HOST. It makes sense to not trigger ICMP messages for non-PACKET_HOST packets as these messages should be generated only for incoming l2-unicast packets. However there doesn't seem to be any reason for skb_tunnel_check_pmtu() to ignore PACKET_HOST packets. Allow both cases to work by allowing skb_tunnel_check_pmtu() to work on PACKET_HOST packets and not overriding skb->pkt_type in openvswitch anymore. Fixes: 30a92c9e3d6b ("openvswitch: Set the skbuff pkt_type for proper pmtud support.") Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Guillaume Nault Reviewed-by: Stefano Brivio Reviewed-by: Aaron Conole Tested-by: Aaron Conole Link: https://patch.msgid.link/eac941652b86fddf8909df9b3bf0d97bc9444793.1743208264.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 2 +- net/openvswitch/actions.c | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index a3676155be78..364ea798511e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -416,7 +416,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, skb_dst_update_pmtu_no_confirm(skb, mtu); - if (!reply || skb->pkt_type == PACKET_HOST) + if (!reply) return 0; if (skb->protocol == htons(ETH_P_IP)) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 704c858cf209..61fea7baae5d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -947,12 +947,6 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, pskb_trim(skb, ovs_mac_header_len(key)); } - /* Need to set the pkt_type to involve the routing layer. The - * packet movement through the OVS datapath doesn't generally - * use routing, but this is needed for tunnel cases. - */ - skb->pkt_type = PACKET_OUTGOING; - if (likely(!mru || (skb->len <= mru + vport->dev->hard_header_len))) { ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); From f83e10a233059b74eaa2716e903b57464b3d3b0c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Apr 2025 16:01:02 +0100 Subject: [PATCH 2073/2157] cifs: Remove cifs_truncate_page() as it should be superfluous The calls to cifs_truncate_page() should be superfluous as the places that call it also call truncate_setsize() or cifs_setsize() and therefore truncate_pagecache() which should also clear the tail part of the folio containing the EOF marker. Further, smb3_simple_falloc() calls both cifs_setsize() and truncate_setsize() in addition to cifs_truncate_page(). Remove the superfluous calls. This gets rid of another function referring to struct page. [Should cifs_setsize() also set inode->i_blocks?] Signed-off-by: David Howells cc: Steve French Reviewed-by: Paulo Alcantara (Red Hat) cc: Matthew Wilcox cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 1 - fs/smb/client/inode.c | 19 ------------------- fs/smb/client/smb2ops.c | 2 -- 3 files changed, 22 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 8dea0cf3a8de..a769fa7ceece 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -135,7 +135,6 @@ extern ssize_t cifs_file_copychunk_range(unsigned int xid, extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern void cifs_setsize(struct inode *inode, loff_t offset); -extern int cifs_truncate_page(struct address_space *mapping, loff_t from); struct smb3_fs_context; extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 3bb21aa58474..a00a9d91d0da 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2901,23 +2901,6 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, return -EOPNOTSUPP; } -int cifs_truncate_page(struct address_space *mapping, loff_t from) -{ - pgoff_t index = from >> PAGE_SHIFT; - unsigned offset = from & (PAGE_SIZE - 1); - struct page *page; - int rc = 0; - - page = grab_cache_page(mapping, index); - if (!page) - return -ENOMEM; - - zero_user_segment(page, offset, PAGE_SIZE); - unlock_page(page); - put_page(page); - return rc; -} - void cifs_setsize(struct inode *inode, loff_t offset) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); @@ -3012,8 +2995,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, */ attrs->ia_ctime = attrs->ia_mtime = current_time(inode); attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME; - - cifs_truncate_page(inode->i_mapping, inode->i_size); } return rc; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 374d65cc8123..41d8cd20b25f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3534,8 +3534,6 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, if (rc == 0) { netfs_resize_file(&cifsi->netfs, new_eof, true); cifs_setsize(inode, new_eof); - cifs_truncate_page(inode->i_mapping, inode->i_size); - truncate_setsize(inode, new_eof); } goto out; } From 28753e4304547a15b33f750fcfe1b1c7b6aa7ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 26 Oct 2024 20:55:47 +0200 Subject: [PATCH 2074/2157] cifs: Implement is_network_name_deleted for SMB1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change allows Linux SMB1 client to autoreconnect the share when it is modified on server by admin operation which removes and re-adds it. Implementation is reused from SMB2+ is_network_name_deleted callback. There are just adjusted checks for error codes and access to struct smb_hdr. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb1ops.c | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index ad89f60207ab..26df807fbe7a 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -14,6 +14,8 @@ #include "cifspdu.h" #include "cifs_unicode.h" #include "fs_context.h" +#include "nterr.h" +#include "smberr.h" /* * An NT cancel request header looks just like the original request except: @@ -1062,6 +1064,47 @@ cifs_make_node(unsigned int xid, struct inode *inode, full_path, mode, dev); } +static bool +cifs_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) +{ + struct smb_hdr *shdr = (struct smb_hdr *)buf; + struct TCP_Server_Info *pserver; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + if (shdr->Flags2 & SMBFLG2_ERR_STATUS) { + if (shdr->Status.CifsError != cpu_to_le32(NT_STATUS_NETWORK_NAME_DELETED)) + return false; + } else { + if (shdr->Status.DosError.ErrorClass != ERRSRV || + shdr->Status.DosError.Error != cpu_to_le16(ERRinvtid)) + return false; + } + + /* If server is a channel, select the primary channel */ + pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid == shdr->Tid) { + spin_lock(&tcon->tc_lock); + tcon->need_reconnect = true; + spin_unlock(&tcon->tc_lock); + spin_unlock(&cifs_tcp_ses_lock); + pr_warn_once("Server share %s deleted.\n", + tcon->tree_name); + return true; + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + + return false; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1146,6 +1189,7 @@ struct smb_version_operations smb1_operations = { .get_acl_by_fid = get_cifs_acl_by_fid, .set_acl = set_cifs_acl, .make_node = cifs_make_node, + .is_network_name_deleted = cifs_is_network_name_deleted, }; struct smb_version_values smb1_values = { From 827a1bd9af9df6a4023736ff52475b2a5395d91d Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 31 Mar 2025 20:50:31 -0500 Subject: [PATCH 2075/2157] cifs: update internal version number To 2.52 Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index a769fa7ceece..ca435a3841b8 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 53 -#define CIFS_VERSION "2.53" +#define SMB3_PRODUCT_BUILD 54 +#define CIFS_VERSION "2.54" #endif /* _CIFSFS_H */ From 4210030d8bc4834fcb774babc458d02a2432ee76 Mon Sep 17 00:00:00 2001 From: Tingmao Wang Date: Sun, 30 Mar 2025 22:34:42 +0100 Subject: [PATCH 2076/2157] docs: fs/9p: Add missing "not" in cache documentation A quick fix for what I assume is a typo. Signed-off-by: Tingmao Wang Reviewed-by: Christian Schoenebeck Message-ID: <20250330213443.98434-1-m@maowtm.org> Signed-off-by: Dominique Martinet --- Documentation/filesystems/9p.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst index 28871200e87c..522295857f7b 100644 --- a/Documentation/filesystems/9p.rst +++ b/Documentation/filesystems/9p.rst @@ -165,8 +165,8 @@ Options do not necessarily validate cached values on the server. In other words changes on the server are not guaranteed to be reflected on the client system. Only use this mode of operation if you - have an exclusive mount and the server will modify the filesystem - underneath you. + have an exclusive mount and the server will not modify the + filesystem underneath you. debug=n specifies debug level. The debug level is a bitmask. From 5d0b204654de25615cf712be86c3192eca68ed80 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 27 Feb 2025 16:10:52 +0800 Subject: [PATCH 2077/2157] xsk: Fix __xsk_generic_xmit() error code when cq is full When the cq reservation is failed, the error code is not set which is initialized to zero in __xsk_generic_xmit(). That means the packet is not send successfully but sendto() return ok. Considering the impact on uapi, return -EAGAIN is a good idea. The cq is full usually because it is not released in time, try to send msg again is appropriate. The bug was at the very early implementation of xsk, so the Fixes tag targets the commit that introduced the changes in xsk_cq_reserve_addr_locked where this fix depends on. Fixes: e6c4047f5122 ("xsk: Use xsk_buff_pool directly for cq functions") Suggested-by: Magnus Karlsson Signed-off-by: Wang Liang Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250227081052.4096337-1-wangliang74@huawei.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index e5d104ce7b82..5696af45bcf7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -806,8 +806,11 @@ static int __xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr)) + err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr); + if (err) { + err = -EAGAIN; goto out; + } skb = xsk_build_skb(xs, &desc); if (IS_ERR(skb)) { From 00387808d36e23c7d56e9e04a31de0be1443b0e9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 27 Mar 2025 11:55:28 -0700 Subject: [PATCH 2078/2157] selftests/bpf: Fix tests after fields reorder in struct file The change in struct file [1] moved f_ref to the 3rd cache line. It made *(u64 *)file dereference invalid from the verifier point of view, because btf_struct_walk() walks into f_lock field, which is 4-byte long. Fix the selftests to deference the file pointer as a 4-byte access. [1] commit e249056c91a2 ("fs: place f_ref to 3rd cache line in struct file to resolve false sharing") Reported-by: Jakub Kicinski Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20250327185528.1740787-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_module_attach.c | 2 +- tools/testing/selftests/bpf/progs/test_subprogs_extable.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index fb07f5773888..7f3c233943b3 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) bpf_probe_read_kernel(&buf, 8, ret); bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); - *(volatile long long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c index e2a21fbd4e44..dcac69f5928a 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c @@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; @@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; @@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; From 14d84357a0af7783a440d4a40794752ed20d2607 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 28 Mar 2025 12:31:24 -0700 Subject: [PATCH 2079/2157] selftests/bpf: Fix verifier_bpf_fastcall test Commit [1] moves percpu data on x86 from address 0x000... to address 0xfff... Before [1]: 159020: 0000000000030700 0 OBJECT GLOBAL DEFAULT 23 pcpu_hot After [1]: 152602: ffffffff83a3e034 4 OBJECT GLOBAL DEFAULT 35 pcpu_hot As a result, verifier_bpf_fastcall tests should now expect a negative value for pcpu_hot, IOW, the disassemble should show "r=" instead of "w=". Fix this in the test. Note that, a later change created a new variable "cpu_number" for bpf_get_smp_processor_id() [2]. The inlining logic is updated properly as part of this change, so there is no need to fix anything on the kernel side. [1] commit 9d7de2aa8b41 ("x86/percpu/64: Use relative percpu offsets") [2] commit 01c7bc5198e9 ("x86/smp: Move cpu number to percpu hot section") Reported-by: Jakub Kicinski Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20250328193124.808784-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index a9be6ae49454..c258b0722e04 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -12,7 +12,7 @@ SEC("raw_tp") __arch_x86_64 __log_level(4) __msg("stack depth 8") __xlated("4: r5 = 5") -__xlated("5: w0 = ") +__xlated("5: r0 = ") __xlated("6: r0 = &(void __percpu *)(r0)") __xlated("7: r0 = *(u32 *)(r0 +0)") __xlated("8: exit") @@ -704,7 +704,7 @@ SEC("raw_tp") __arch_x86_64 __log_level(4) __msg("stack depth 32+0") __xlated("2: r1 = 1") -__xlated("3: w0 =") +__xlated("3: r0 =") __xlated("4: r0 = &(void __percpu *)(r0)") __xlated("5: r0 = *(u32 *)(r0 +0)") /* bpf_loop params setup */ @@ -753,7 +753,7 @@ __arch_x86_64 __log_level(4) __msg("stack depth 40+0") /* call bpf_get_smp_processor_id */ __xlated("2: r1 = 42") -__xlated("3: w0 =") +__xlated("3: r0 =") __xlated("4: r0 = &(void __percpu *)(r0)") __xlated("5: r0 = *(u32 *)(r0 +0)") /* call bpf_get_prandom_u32 */ From 3f8ad18f81841a9ce70f603c45d5a278528c67e6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 30 Mar 2025 20:38:28 -0700 Subject: [PATCH 2080/2157] selftests/bpf: Fix verifier_private_stack test failure Several verifier_private_stack tests failed with latest bpf-next. For example, for 'Private stack, single prog' subtest, the jitted code: func #0: 0: f3 0f 1e fa endbr64 4: 0f 1f 44 00 00 nopl (%rax,%rax) 9: 0f 1f 00 nopl (%rax) c: 55 pushq %rbp d: 48 89 e5 movq %rsp, %rbp 10: f3 0f 1e fa endbr64 14: 49 b9 58 74 8a 8f 7d 60 00 00 movabsq $0x607d8f8a7458, %r9 1e: 65 4c 03 0c 25 28 c0 48 87 addq %gs:-0x78b73fd8, %r9 27: bf 2a 00 00 00 movl $0x2a, %edi 2c: 49 89 b9 00 ff ff ff movq %rdi, -0x100(%r9) 33: 31 c0 xorl %eax, %eax 35: c9 leave 36: e9 20 5d 0f e1 jmp 0xffffffffe10f5d5b The insn 'addq %gs:-0x78b73fd8, %r9' does not match the expected regex 'addq %gs:0x{{.*}}, %r9' and this caused test failure. Fix it by changing '%gs:0x{{.*}}' to '%gs:{{.*}}' to accommodate the possible negative offset. A few other subtests are fixed in a similar way. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20250331033828.365077-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_private_stack.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index b1fbdf119553..fc91b414364e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -27,7 +27,7 @@ __description("Private stack, single prog") __success __arch_x86_64 __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x100(%r9)") __naked void private_stack_single_prog(void) @@ -74,7 +74,7 @@ __success __arch_x86_64 /* private stack fp for the main prog */ __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") @@ -122,7 +122,7 @@ __jited(" pushq %rbp") __jited(" movq %rsp, %rbp") __jited(" endbr64") __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") From 12e0b15b1986736af8c64b920efad00c655a3c79 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 1 Apr 2025 13:57:30 +0200 Subject: [PATCH 2081/2157] crypto: inside-secure/eip93 - acquire lock on eip93_put_descriptor hash In the EIP93 HASH functions, the eip93_put_descriptor is called without acquiring lock. This is problematic when multiple thread execute hash operations. Correctly acquire ring write lock on calling eip93_put_descriptor to prevent concurrent access and mess with the ring pointers. Fixes: 9739f5f93b78 ("crypto: eip93 - Add Inside Secure SafeXcel EIP-93 crypto engine support") Reported-by: Herbert Xu Signed-off-by: Christian Marangi Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/eip93/eip93-hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/inside-secure/eip93/eip93-hash.c b/drivers/crypto/inside-secure/eip93/eip93-hash.c index 5e9627467a42..df1b05ac5a57 100644 --- a/drivers/crypto/inside-secure/eip93/eip93-hash.c +++ b/drivers/crypto/inside-secure/eip93/eip93-hash.c @@ -260,7 +260,8 @@ static int eip93_send_hash_req(struct crypto_async_request *async, u8 *data, } again: - ret = eip93_put_descriptor(eip93, &cdesc); + scoped_guard(spinlock_irqsave, &eip93->ring->write_lock) + ret = eip93_put_descriptor(eip93, &cdesc); if (ret) { usleep_range(EIP93_RING_BUSY_DELAY, EIP93_RING_BUSY_DELAY * 2); From 3a0a3ff6593d670af2451ec363ccb7b18aec0c0a Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Mar 2025 18:36:32 +0100 Subject: [PATCH 2082/2157] net: decrease cached dst counters in dst_release Upstream fix ac888d58869b ("net: do not delay dst_entries_add() in dst_release()") moved decrementing the dst count from dst_destroy to dst_release to avoid accessing already freed data in case of netns dismantle. However in case CONFIG_DST_CACHE is enabled and OvS+tunnels are used, this fix is incomplete as the same issue will be seen for cached dsts: Unable to handle kernel paging request at virtual address ffff5aabf6b5c000 Call trace: percpu_counter_add_batch+0x3c/0x160 (P) dst_release+0xec/0x108 dst_cache_destroy+0x68/0xd8 dst_destroy+0x13c/0x168 dst_destroy_rcu+0x1c/0xb0 rcu_do_batch+0x18c/0x7d0 rcu_core+0x174/0x378 rcu_core_si+0x18/0x30 Fix this by invalidating the cache, and thus decrementing cached dst counters, in dst_release too. Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") Signed-off-by: Antoine Tenart Link: https://patch.msgid.link/20250326173634.31096-1-atenart@kernel.org Signed-off-by: Paolo Abeni --- net/core/dst.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/dst.c b/net/core/dst.c index c99b95cf9cbb..795ca07e28a4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -165,6 +165,14 @@ static void dst_count_dec(struct dst_entry *dst) void dst_release(struct dst_entry *dst) { if (dst && rcuref_put(&dst->__rcuref)) { +#ifdef CONFIG_DST_CACHE + if (dst->flags & DST_METADATA) { + struct metadata_dst *md_dst = (struct metadata_dst *)dst; + + if (md_dst->type == METADATA_IP_TUNNEL) + dst_cache_reset_now(&md_dst->u.tun_info.dst_cache); + } +#endif dst_count_dec(dst); call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } From 975776841e689dd8ba36df9fa72ac3eca3c2957a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Mar 2025 15:49:55 +0200 Subject: [PATCH 2083/2157] sched/isolation: Make CONFIG_CPU_ISOLATION depend on CONFIG_SMP kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but the Kconfig entry we have right now: config CPU_ISOLATION bool "CPU isolation" depends on SMP || COMPILE_TEST allows the creation of pointless .config's which cause build failures. Reported-by: kernel test robot Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/ --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 681f38ee68db..ab9b0c2c3d52 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -709,7 +709,7 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" - depends on SMP || COMPILE_TEST + depends on SMP default y help Make sure that CPUs running critical tasks are not disturbed by From 169eae7711ea4b745e2d33d53e7b88689b10e1a0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 27 Mar 2025 09:29:45 -0400 Subject: [PATCH 2084/2157] rseq: Eliminate useless task_work on execve Eliminate a useless task_work on execve by moving the call to rseq_set_notify_resume() from sched_mm_cid_after_execve() to the error path of bprm_execve(). The call to rseq_set_notify_resume() from sched_mm_cid_after_execve() is pointless in the success case, because rseq_execve() will clear the rseq pointer before returning to userspace. sched_mm_cid_after_execve() is called from both the success and error paths of bprm_execve(). The call to rseq_set_notify_resume() is needed on error because the mm_cid may have changed. Also move the rseq_execve() to right after sched_mm_cid_after_execve() in bprm_execve(). [ mingo: Merged to a recent upstream kernel, extended the changelog. ] Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Cc: Oleg Nesterov Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250327132945.1558783-1-mathieu.desnoyers@efficios.com --- fs/exec.c | 3 ++- kernel/sched/core.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 5d1c0d2dc403..8e4ea5f1e64c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1864,9 +1864,9 @@ static int bprm_execve(struct linux_binprm *bprm) goto out; sched_mm_cid_after_execve(current); + rseq_execve(current); /* execve succeeded */ current->in_execve = 0; - rseq_execve(current); user_events_execve(current); acct_update_integrals(current); task_numa_free(current, false); @@ -1883,6 +1883,7 @@ static int bprm_execve(struct linux_binprm *bprm) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); + rseq_set_notify_resume(current); current->in_execve = 0; return retval; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cfaca3040b2f..c81cf642dba0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10703,7 +10703,6 @@ void sched_mm_cid_after_execve(struct task_struct *t) smp_mb(); t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm); } - rseq_set_notify_resume(t); } void sched_mm_cid_fork(struct task_struct *t) From 1b755d8eb1ace3870789d48fbd94f386ad6e30be Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 3 Apr 2025 01:00:26 +0800 Subject: [PATCH 2085/2157] netfilter: nft_tunnel: fix geneve_opt type confusion addition When handling multiple NFTA_TUNNEL_KEY_OPTS_GENEVE attributes, the parsing logic should place every geneve_opt structure one by one compactly. Hence, when deciding the next geneve_opt position, the pointer addition should be in units of char *. However, the current implementation erroneously does type conversion before the addition, which will lead to heap out-of-bounds write. [ 6.989857] ================================================================== [ 6.990293] BUG: KASAN: slab-out-of-bounds in nft_tunnel_obj_init+0x977/0xa70 [ 6.990725] Write of size 124 at addr ffff888005f18974 by task poc/178 [ 6.991162] [ 6.991259] CPU: 0 PID: 178 Comm: poc-oob-write Not tainted 6.1.132 #1 [ 6.991655] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 6.992281] Call Trace: [ 6.992423] [ 6.992586] dump_stack_lvl+0x44/0x5c [ 6.992801] print_report+0x184/0x4be [ 6.993790] kasan_report+0xc5/0x100 [ 6.994252] kasan_check_range+0xf3/0x1a0 [ 6.994486] memcpy+0x38/0x60 [ 6.994692] nft_tunnel_obj_init+0x977/0xa70 [ 6.995677] nft_obj_init+0x10c/0x1b0 [ 6.995891] nf_tables_newobj+0x585/0x950 [ 6.996922] nfnetlink_rcv_batch+0xdf9/0x1020 [ 6.998997] nfnetlink_rcv+0x1df/0x220 [ 6.999537] netlink_unicast+0x395/0x530 [ 7.000771] netlink_sendmsg+0x3d0/0x6d0 [ 7.001462] __sock_sendmsg+0x99/0xa0 [ 7.001707] ____sys_sendmsg+0x409/0x450 [ 7.002391] ___sys_sendmsg+0xfd/0x170 [ 7.003145] __sys_sendmsg+0xea/0x170 [ 7.004359] do_syscall_64+0x5e/0x90 [ 7.005817] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 7.006127] RIP: 0033:0x7ec756d4e407 [ 7.006339] Code: 48 89 fa 4c 89 df e8 38 aa 00 00 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 1a 5b c3 0f 1f 84 00 00 00 00 00 48 8b 44 24 10 0f 05 <5b> c3 0f 1f 80 00 00 00 00 83 e2 39 83 faf [ 7.007364] RSP: 002b:00007ffed5d46760 EFLAGS: 00000202 ORIG_RAX: 000000000000002e [ 7.007827] RAX: ffffffffffffffda RBX: 00007ec756cc4740 RCX: 00007ec756d4e407 [ 7.008223] RDX: 0000000000000000 RSI: 00007ffed5d467f0 RDI: 0000000000000003 [ 7.008620] RBP: 00007ffed5d468a0 R08: 0000000000000000 R09: 0000000000000000 [ 7.009039] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 7.009429] R13: 00007ffed5d478b0 R14: 00007ec756ee5000 R15: 00005cbd4e655cb8 Fix this bug with correct pointer addition and conversion in parse and dump code. Fixes: 925d844696d9 ("netfilter: nft_tunnel: add support for geneve opts") Signed-off-by: Lin Ma Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 681301b46aa4..2e40f575aed9 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -341,7 +341,7 @@ static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GEN static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, struct nft_tunnel_opts *opts) { - struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len; + struct geneve_opt *opt = (struct geneve_opt *)(opts->u.data + opts->len); struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1]; int err, data_len; @@ -625,7 +625,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, if (!inner) goto failure; while (opts->len > offset) { - opt = (struct geneve_opt *)opts->u.data + offset; + opt = (struct geneve_opt *)(opts->u.data + offset); if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, opt->opt_class) || nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE, From 390513642ee6763c7ada07f0a1470474986e6c1c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 3 Apr 2025 12:29:30 +0100 Subject: [PATCH 2086/2157] io_uring: always do atomic put from iowq io_uring always switches requests to atomic refcounting for iowq execution before there is any parallilism by setting REQ_F_REFCOUNT, and the flag is not cleared until the request completes. That should be fine as long as the compiler doesn't make up a non existing value for the flags, however KCSAN still complains when the request owner changes oter flag bits: BUG: KCSAN: data-race in io_req_task_cancel / io_wq_free_work ... read to 0xffff888117207448 of 8 bytes by task 3871 on cpu 0: req_ref_put_and_test io_uring/refs.h:22 [inline] Skip REQ_F_REFCOUNT checks for iowq, we know it's set. Reported-by: syzbot+903a2ad71fb3f1e47cf5@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d880bc27fb8c3209b54641be4ff6ac02b0e5789a.1743679736.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/refs.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a4065e3d13d0..c6209fe44cb1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1796,7 +1796,7 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work) struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *nxt = NULL; - if (req_ref_put_and_test(req)) { + if (req_ref_put_and_test_atomic(req)) { if (req->flags & IO_REQ_LINK_FLAGS) nxt = io_req_find_next(req); io_free_req(req); diff --git a/io_uring/refs.h b/io_uring/refs.h index 63982ead9f7d..0d928d87c4ed 100644 --- a/io_uring/refs.h +++ b/io_uring/refs.h @@ -17,6 +17,13 @@ static inline bool req_ref_inc_not_zero(struct io_kiocb *req) return atomic_inc_not_zero(&req->refs); } +static inline bool req_ref_put_and_test_atomic(struct io_kiocb *req) +{ + WARN_ON_ONCE(!(data_race(req->flags) & REQ_F_REFCOUNT)); + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + return atomic_dec_and_test(&req->refs); +} + static inline bool req_ref_put_and_test(struct io_kiocb *req) { if (likely(!(req->flags & REQ_F_REFCOUNT))) From 01b91bf14f6d4893e03e357006e7af3a20c03fee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Apr 2025 18:54:02 +0800 Subject: [PATCH 2087/2157] block: don't grab elevator lock during queue initialization ->elevator_lock depends on queue freeze lock, see block/blk-sysfs.c. queue freeze lock depends on fs_reclaim. So don't grab elevator lock during queue initialization which needs to call kmalloc(GFP_KERNEL), and we can cut the dependency between ->elevator_lock and fs_reclaim, then the lockdep warning can be killed. This way is safe because elevator setting isn't ready to run during queue initialization. There isn't such issue in __blk_mq_update_nr_hw_queues() because memalloc_noio_save() is called before acquiring elevator lock. Fixes the following lockdep warning: https://lore.kernel.org/linux-block/67e6b425.050a0220.2f068f.007b.GAE@google.com/ Reported-by: syzbot+4c7e0f9b94ad65811efb@syzkaller.appspotmail.com Cc: Nilay Shroff Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250403105402.1334206-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0cfd1a149f64..c2697db59109 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4464,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( return NULL; } -static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, - struct request_queue *q) +static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i, j; - /* protect against switching io scheduler */ - mutex_lock(&q->elevator_lock); for (i = 0; i < set->nr_hw_queues; i++) { int old_node; int node = blk_mq_get_hctx_node(set, i); @@ -4504,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); - mutex_unlock(&q->elevator_lock); +} + +static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q, bool lock) +{ + if (lock) { + /* protect against switching io scheduler */ + mutex_lock(&q->elevator_lock); + __blk_mq_realloc_hw_ctxs(set, q); + mutex_unlock(&q->elevator_lock); + } else { + __blk_mq_realloc_hw_ctxs(set, q); + } /* unregister cpuhp callbacks for exited hctxs */ blk_mq_remove_hw_queues_cpuhp(q); @@ -4536,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, xa_init(&q->hctx_table); - blk_mq_realloc_hw_ctxs(set, q); + blk_mq_realloc_hw_ctxs(set, q, false); if (!q->nr_hw_queues) goto err_hctxs; @@ -5032,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, fallback: blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_realloc_hw_ctxs(set, q); + blk_mq_realloc_hw_ctxs(set, q, true); if (q->nr_hw_queues != set->nr_hw_queues) { int i = prev_nr_hw_queues; From 9364f17ba40422d2661da295bb0da68ca87cc57e Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 2 Apr 2025 21:45:44 +0800 Subject: [PATCH 2088/2157] bcachefs: Add error handling for zlib_deflateInit2() In attempt_compress(), the return value of zlib_deflateInit2() needs to be checked. A proper implementation can be found in pstore_compress(). Add an error check and return 0 immediately if the initialzation fails. Fixes: 986e9842fb68 ("bcachefs: Compression levels") Signed-off-by: Wentao Liang Signed-off-by: Kent Overstreet --- fs/bcachefs/compress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 85fc90342492..28ed32449913 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -371,13 +371,14 @@ static int attempt_compress(struct bch_fs *c, }; zlib_set_workspace(&strm, workspace); - zlib_deflateInit2(&strm, + if (zlib_deflateInit2(&strm, compression.level ? clamp_t(unsigned, compression.level, Z_BEST_SPEED, Z_BEST_COMPRESSION) : Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, - Z_DEFAULT_STRATEGY); + Z_DEFAULT_STRATEGY) != Z_OK) + return 0; if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END) return 0; From b2ffadcc7f8fd2059e389d640f9c81febd606daf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Apr 2025 13:01:29 -0400 Subject: [PATCH 2089/2157] bcachefs: Fix scheduling while atomic from logging changes Two fixes from the recent logging changes: bch2_inconsistent(), bch2_fs_inconsistent() be called from interrupt context, or with rcu_read_lock() held. The one syzbot found is in bch2_bkey_pick_read_device bch2_dev_rcu bch2_fs_inconsistent We're starting to switch to lift the printbufs up to higher levels so we can emit better log messages and print them all in one go (avoid garbling), so that conversion will help with spotting these in the future; when we declare a printbuf it must be flagged if we're in an atomic context. Secondly, in btree_node_write_endio: 00085 BUG: sleeping function called from invalid context at include/linux/sched/mm.h:321 00085 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 618, name: bch-reclaim/fa6 00085 preempt_count: 10001, expected: 0 00085 RCU nest depth: 0, expected: 0 00085 4 locks held by bch-reclaim/fa6/618: 00085 #0: ffffff80d7ccad68 (&j->reclaim_lock){+.+.}-{4:4}, at: bch2_journal_reclaim_thread+0x84/0x198 00085 #1: ffffff80d7c84218 (&c->btree_trans_barrier){.+.+}-{0:0}, at: __bch2_trans_get+0x1c0/0x440 00085 #2: ffffff80cd3f8140 (bcachefs_btree){+.+.}-{0:0}, at: __bch2_trans_get+0x22c/0x440 00085 #3: ffffff80c3823c20 (&vblk->vqs[i].lock){-.-.}-{3:3}, at: virtblk_done+0x58/0x130 00085 irq event stamp: 328 00085 hardirqs last enabled at (327): [] finish_task_switch.isra.0+0xbc/0x2a0 00085 hardirqs last disabled at (328): [] el1_interrupt+0x20/0x60 00085 softirqs last enabled at (0): [] copy_process+0x7c8/0x2118 00085 softirqs last disabled at (0): [<0000000000000000>] 0x0 00085 Preemption disabled at: 00085 [] irq_enter_rcu+0x18/0x90 00085 CPU: 8 UID: 0 PID: 618 Comm: bch-reclaim/fa6 Not tainted 6.14.0-rc6-ktest-g04630bde23e8 #18798 00085 Hardware name: linux,dummy-virt (DT) 00085 Call trace: 00085 show_stack+0x1c/0x30 (C) 00085 dump_stack_lvl+0x84/0xc0 00085 dump_stack+0x14/0x20 00085 __might_resched+0x180/0x288 00085 __might_sleep+0x4c/0x88 00085 __kmalloc_node_track_caller_noprof+0x34c/0x3e0 00085 krealloc_noprof+0x1a0/0x2d8 00085 bch2_printbuf_make_room+0x9c/0x120 00085 bch2_prt_printf+0x60/0x1b8 00085 btree_node_write_endio+0x1b0/0x2d8 00085 bio_endio+0x138/0x1f0 00085 btree_node_write_endio+0xe8/0x2d8 00085 bio_endio+0x138/0x1f0 00085 blk_update_request+0x220/0x4c0 00085 blk_mq_end_request+0x28/0x148 00085 virtblk_request_done+0x64/0xe8 00085 blk_mq_complete_request+0x34/0x40 00085 virtblk_done+0x78/0x130 00085 vring_interrupt+0x6c/0xb0 00085 __handle_irq_event_percpu+0x8c/0x2e0 00085 handle_irq_event+0x50/0xb0 00085 handle_fasteoi_irq+0xc4/0x250 00085 handle_irq_desc+0x44/0x60 00085 generic_handle_domain_irq+0x20/0x30 00085 gic_handle_irq+0x54/0xc8 00085 call_on_irq_stack+0x24/0x40 Reported-by: syzbot+c82cd2906e2f192410bb@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 1 + fs/bcachefs/error.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index ac1f029a7eb2..5fd4a58d2ad2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -2146,6 +2146,7 @@ static void btree_node_write_endio(struct bio *bio) if (ca && bio->bi_status) { struct printbuf buf = PRINTBUF; + buf.atomic++; prt_printf(&buf, "btree write error: %s\n ", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index d4dfd13a8076..b885bd92834c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -45,6 +45,8 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) bool bch2_inconsistent_error(struct bch_fs *c) { struct printbuf buf = PRINTBUF; + buf.atomic++; + printbuf_indent_add_nextline(&buf, 2); bool ret = __bch2_inconsistent_error(c, &buf); @@ -59,6 +61,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra const char *fmt, va_list args) { struct printbuf buf = PRINTBUF; + buf.atomic++; bch2_log_msg_start(c, &buf); From 570f5050bb0739f24aeb94034d8ec134c450b4aa Mon Sep 17 00:00:00 2001 From: Bharadwaj Raju Date: Wed, 2 Apr 2025 23:45:53 +0530 Subject: [PATCH 2090/2157] bcachefs: use nonblocking variant of print_string_as_lines in error path The inconsistency error path calls print_string_as_lines, which calls console_lock, which is a potentially-sleeping function and so can't be called in an atomic context. Replace calls to it with the nonblocking variant which is safe to call. Signed-off-by: Bharadwaj Raju Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index b885bd92834c..baf5dfb32298 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -34,7 +34,7 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: - bch2_print_string_as_lines(KERN_ERR, out->buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf); panic(bch2_fmt(c, "panic after error")); return true; default: @@ -71,7 +71,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; From 83d539b1b04705f972b53b4669fb587c54def0db Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 14:31:12 -0400 Subject: [PATCH 2091/2157] bcachefs: Fix check_snapshot_exists() restart handling Codepaths that create entries in the snapshots btree currently call bch2_mark_snapshot(), which updates the in-memory snapshot table, before transaction commit. This is because bch2_mark_snapshot() is an atomic trigger, run with btree write locks held, and isn't allowed to fail - but it might need to reallocate the table, hence we call it early when we're still allowed to fail. This is generally harmless - if we fail, we'll have left an entry in the snapshots table around, but nothing will reference it and it'll get overwritten if reused by another transaction. But check_snapshot_exists(), which reconstructs snapshots when the snapshots btree has been corrupted or lost, was erronously rechecking if the snapshot exists inside the transaction commit loop - so on transaction restart (in this case mem_realloced), the second iteration would return without repairing. This code needs some cleanup: splitting out a "maybe realloc snapshots table" helper would have avoided this, that will be in the next patch. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index c8536eb36060..b7de29aed839 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -843,9 +843,6 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) { struct bch_fs *c = trans->c; - if (bch2_snapshot_exists(c, id)) - return 0; - /* Do we need to reconstruct the snapshot_tree entry as well? */ struct btree_iter iter; struct bkey_s_c k; From 39ebd74864f5c4f7d44f1fe026c71a270631186b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 12:48:23 -0400 Subject: [PATCH 2092/2157] bcachefs: Fix null ptr deref in invalidate_one_bucket() bch2_backpointer_get_key() returns bkey_s_c_null when the target isn't found. backpointer_get_key() flags the error, so there's nothing else to do here - just skip it and move on. Link: https://github.com/koverstreet/bcachefs/issues/847 Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 6b6c2521c11f..94ea9e49aec4 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2084,6 +2084,9 @@ static int invalidate_one_bp(struct btree_trans *trans, if (ret) return ret; + if (!extent_k.k) + return 0; + struct bkey_i *n = bch2_bkey_make_mut(trans, &extent_iter, &extent_k, BTREE_UPDATE_internal_snapshot_node); From 2581f89ac8d7174fda975523ae6c2bdc8ad62144 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 12:54:25 -0400 Subject: [PATCH 2093/2157] bcachefs: backpointer_get_key: check for null from peek_slot() peek_slot() doesn't normally return bkey_s_c_null - except when we ask for a key at a btree level that doesn't exist, which can happen here. We might want to revisit this, but we'll have to look over all the places where we use peek_slot() on interior nodes. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index dc1cd8de18ac..ff26bb515150 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -258,6 +258,18 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, return k; } + /* + * peek_slot() doesn't normally return NULL - except when we ask for a + * key at a btree level that doesn't exist. + * + * We may want to revisit this and change peek_slot(): + */ + if (!k.k) { + bkey_init(&iter->k); + iter->k.p = bp.v->pos; + k.k = &iter->k; + } + if (k.k && extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) return k; From 77ad1df82b9e8d169e3ec9ee8b7caabfa45872ce Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 12:23:25 -0400 Subject: [PATCH 2094/2157] bcachefs: Fix "journal stuck" during recovery If we crash when the journal pin fifo is completely full - i.e. we're at the maximum number of dirty journal entries - that may put us in a sticky situation in recovery, as journal replay will need to be able to open new journal entries in order to get going. bch2_fs_journal_start() already had provisions for resizing the journal pin fifo if needed, but it needs a fudge factor to ensure there's room for journal replay. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 11f104f436e3..d8f74b6d0a75 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1404,6 +1404,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) nr = cur_seq - last_seq; + /* + * Extra fudge factor, in case we crashed when the journal pin fifo was + * nearly or completely full. We'll need to be able to open additional + * journal entries (at least a few) in order for journal replay to get + * going: + */ + nr += nr / 4; + if (nr + 1 > j->pin.size) { free_fifo(&j->pin); init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL); From 15970e1b23f5c25db88c613fddf9131de086f28e Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Wed, 2 Apr 2025 00:10:37 +0000 Subject: [PATCH 2095/2157] gve: handle overflow when reporting TX consumed descriptors When the tx tail is less than the head (in cases of wraparound), the TX consumed descriptor statistic in DQ will be reported as UINT32_MAX - head + tail, which is incorrect. Mask the difference of head and tail according to the ring size when reporting the statistic. Cc: stable@vger.kernel.org Fixes: 2c9198356d56 ("gve: Add consumed counts to ethtool stats") Signed-off-by: Joshua Washington Signed-off-by: Harshitha Ramamurthy Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250402001037.2717315-1-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 31a21ccf4863..4dea1fdce748 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -392,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev, */ data[i++] = 0; data[i++] = 0; - data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head; + data[i++] = + (tx->dqo_tx.tail - tx->dqo_tx.head) & + tx->mask; } do { start = From 8241ecec1cdc6699ae197d52d58e76bddd995fa5 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 1 Apr 2025 23:54:39 +0100 Subject: [PATCH 2096/2157] sfc: fix NULL dereferences in ef100_process_design_param() Since cited commit, ef100_probe_main() and hence also ef100_check_design_params() run before efx->net_dev is created; consequently, we cannot netif_set_tso_max_size() or _segs() at this point. Move those netif calls to ef100_probe_netdev(), and also replace netif_err within the design params code with pci_err. Reported-by: Kyungwook Boo Fixes: 98ff4c7c8ac7 ("sfc: Separate netdev probe/remove from PCI probe/remove") Signed-off-by: Edward Cree Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250401225439.2401047-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef100_netdev.c | 6 ++-- drivers/net/ethernet/sfc/ef100_nic.c | 47 +++++++++++-------------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index d941f073f1eb..3a06e3b1bd6b 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -450,8 +450,9 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data) net_dev->hw_enc_features |= efx->type->offload_features; net_dev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO; - netif_set_tso_max_segs(net_dev, - ESE_EF100_DP_GZ_TSO_MAX_HDR_NUM_SEGS_DEFAULT); + nic_data = efx->nic_data; + netif_set_tso_max_size(efx->net_dev, nic_data->tso_max_payload_len); + netif_set_tso_max_segs(efx->net_dev, nic_data->tso_max_payload_num_segs); rc = efx_ef100_init_datapath_caps(efx); if (rc < 0) @@ -477,7 +478,6 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data) /* Don't fail init if RSS setup doesn't work. */ efx_mcdi_push_default_indir_table(efx, efx->n_rx_channels); - nic_data = efx->nic_data; rc = ef100_get_mac_address(efx, net_dev->perm_addr, CLIENT_HANDLE_SELF, efx->type->is_vf); if (rc) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 62e674d6ff60..3ad95a4c8af2 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -887,8 +887,7 @@ static int ef100_process_design_param(struct efx_nic *efx, case ESE_EF100_DP_GZ_TSO_MAX_HDR_NUM_SEGS: /* We always put HDR_NUM_SEGS=1 in our TSO descriptors */ if (!reader->value) { - netif_err(efx, probe, efx->net_dev, - "TSO_MAX_HDR_NUM_SEGS < 1\n"); + pci_err(efx->pci_dev, "TSO_MAX_HDR_NUM_SEGS < 1\n"); return -EOPNOTSUPP; } return 0; @@ -901,32 +900,28 @@ static int ef100_process_design_param(struct efx_nic *efx, */ if (!reader->value || reader->value > EFX_MIN_DMAQ_SIZE || EFX_MIN_DMAQ_SIZE % (u32)reader->value) { - netif_err(efx, probe, efx->net_dev, - "%s size granularity is %llu, can't guarantee safety\n", - reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", - reader->value); + pci_err(efx->pci_dev, + "%s size granularity is %llu, can't guarantee safety\n", + reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", + reader->value); return -EOPNOTSUPP; } return 0; case ESE_EF100_DP_GZ_TSO_MAX_PAYLOAD_LEN: nic_data->tso_max_payload_len = min_t(u64, reader->value, GSO_LEGACY_MAX_SIZE); - netif_set_tso_max_size(efx->net_dev, - nic_data->tso_max_payload_len); return 0; case ESE_EF100_DP_GZ_TSO_MAX_PAYLOAD_NUM_SEGS: nic_data->tso_max_payload_num_segs = min_t(u64, reader->value, 0xffff); - netif_set_tso_max_segs(efx->net_dev, - nic_data->tso_max_payload_num_segs); return 0; case ESE_EF100_DP_GZ_TSO_MAX_NUM_FRAMES: nic_data->tso_max_frames = min_t(u64, reader->value, 0xffff); return 0; case ESE_EF100_DP_GZ_COMPAT: if (reader->value) { - netif_err(efx, probe, efx->net_dev, - "DP_COMPAT has unknown bits %#llx, driver not compatible with this hw\n", - reader->value); + pci_err(efx->pci_dev, + "DP_COMPAT has unknown bits %#llx, driver not compatible with this hw\n", + reader->value); return -EOPNOTSUPP; } return 0; @@ -946,10 +941,10 @@ static int ef100_process_design_param(struct efx_nic *efx, * So the value of this shouldn't matter. */ if (reader->value != ESE_EF100_DP_GZ_VI_STRIDES_DEFAULT) - netif_dbg(efx, probe, efx->net_dev, - "NIC has other than default VI_STRIDES (mask " - "%#llx), early probing might use wrong one\n", - reader->value); + pci_dbg(efx->pci_dev, + "NIC has other than default VI_STRIDES (mask " + "%#llx), early probing might use wrong one\n", + reader->value); return 0; case ESE_EF100_DP_GZ_RX_MAX_RUNT: /* Driver doesn't look at L2_STATUS:LEN_ERR bit, so we don't @@ -961,9 +956,9 @@ static int ef100_process_design_param(struct efx_nic *efx, /* Host interface says "Drivers should ignore design parameters * that they do not recognise." */ - netif_dbg(efx, probe, efx->net_dev, - "Ignoring unrecognised design parameter %u\n", - reader->type); + pci_dbg(efx->pci_dev, + "Ignoring unrecognised design parameter %u\n", + reader->type); return 0; } } @@ -999,13 +994,13 @@ static int ef100_check_design_params(struct efx_nic *efx) */ if (reader.state != EF100_TLV_TYPE) { if (reader.state == EF100_TLV_TYPE_CONT) - netif_err(efx, probe, efx->net_dev, - "truncated design parameter (incomplete type %u)\n", - reader.type); + pci_err(efx->pci_dev, + "truncated design parameter (incomplete type %u)\n", + reader.type); else - netif_err(efx, probe, efx->net_dev, - "truncated design parameter %u\n", - reader.type); + pci_err(efx->pci_dev, + "truncated design parameter %u\n", + reader.type); rc = -EIO; } out: From e5ddf19dbc3e24cce508de0dab0e8df5b7417de8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 2 Apr 2025 01:59:31 +0100 Subject: [PATCH 2097/2157] net/selftests: Add loopback link local route for self-connect self-connect-ipv6 got slightly flaky on netdev: > # timeout set to 120 > # selftests: net/tcp_ao: self-connect_ipv6 > # 1..5 > # # 708[lib/setup.c:250] rand seed 1742872572 > # TAP version 13 > # # 708[lib/proc.c:213] Snmp6 Ip6OutNoRoutes: 0 => 1 > # not ok 1 # error 708[self-connect.c:70] failed to connect() > # ok 2 No unexpected trace events during the test run > # # Planned tests != run tests (5 != 2) > # # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:1 > ok 1 selftests: net/tcp_ao: self-connect_ipv6 I can not reproduce it on my machines, but judging by "Ip6OutNoRoutes" there is no route to the local_addr (::1). Looking at the kernel code, I see that kernel does add link-local address automatically in init_loopback(), but that is called from ipv6 notifier block. So, in turn the userspace that brought up the loopback interface may see rtnetlink ACK earlier than addrconf_notify() does it's job (at least, on a slow VM such as netdev). Probably, for ipv4 it's the same, judging by inetdev_event(). The fix is quite simple: set the link-local route straight after bringing the loopback interface. That will make it synchronous. Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20250402-tcp-ao-selfconnect-flake-v1-1-8388d629ef3d@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/self-connect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index 73b2f2276f3f..2c73bea698a6 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) From e4546c6498c68ba65929fcbcf54ff1947fe53f48 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 2 Apr 2025 13:31:23 +0000 Subject: [PATCH 2098/2157] eth: bnxt: fix deadlock in the mgmt_ops When queue is being reset, callbacks of mgmt_ops are called by netdev_nl_bind_rx_doit(). The netdev_nl_bind_rx_doit() first acquires netdev_lock() and then calls callbacks. So, mgmt_ops callbacks should not acquire netdev_lock() internaly. The bnxt_queue_{start | stop}() calls napi_{enable | disable}() but they internally acquire netdev_lock(). So, deadlock occurs. To avoid deadlock, napi_{enable | disable}_locked() should be used instead. Signed-off-by: Taehee Yoo Acked-by: Stanislav Fomichev Reviewed-by: Michael Chan Fixes: cae03e5bdd9e ("net: hold netdev instance lock during queue operations") Link: https://patch.msgid.link/20250402133123.840173-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1a70605fad38..28ee12186c37 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15909,7 +15909,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) goto err_reset; } - napi_enable(&bnapi->napi); + napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); for (i = 0; i < bp->nr_vnics; i++) { @@ -15931,7 +15931,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) err_reset: netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", rc); - napi_enable(&bnapi->napi); + napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); bnxt_reset_task(bp, true); return rc; @@ -15971,7 +15971,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) * completion is handled in NAPI to guarantee no more DMA on that ring * after seeing the completion. */ - napi_disable(&bnapi->napi); + napi_disable_locked(&bnapi->napi); if (bp->tph_mode) { bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); From 7ac6ea4a3e0898db76aecccd68fb2c403eb7d24e Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 2 Apr 2025 14:17:51 +0200 Subject: [PATCH 2099/2157] ipv6: fix omitted netlink attributes when using RTEXT_FILTER_SKIP_STATS Using RTEXT_FILTER_SKIP_STATS is incorrectly skipping non-stats IPv6 netlink attributes on link dump. This causes issues on userspace tools, e.g iproute2 is not rendering address generation mode as it should due to missing netlink attribute. Move the filling of IFLA_INET6_STATS and IFLA_INET6_ICMP6STATS to a helper function guarded by a flag check to avoid hitting the same situation in the future. Fixes: d5566fd72ec1 ("rtnetlink: RTEXT_FILTER_SKIP_STATS support to avoid dumping inet/inet6 stats") Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250402121751.3108-1-ffmancera@riseup.net Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8cc1076536..54a8ea004da2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5784,6 +5784,27 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } +static int inet6_fill_ifla6_stats_attrs(struct sk_buff *skb, + struct inet6_dev *idev) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); + if (!nla) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); + + nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); + if (!nla) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, u32 ext_filter_mask) { @@ -5806,18 +5827,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, /* XXX - MC not implemented */ - if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS) - return 0; - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (!nla) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (!nla) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) { + if (inet6_fill_ifla6_stats_attrs(skb, idev) < 0) + goto nla_put_failure; + } nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (!nla) From 40eb4a0434cd90668fe376a92f18982b913c283b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 1 Apr 2025 16:53:44 +0200 Subject: [PATCH 2100/2157] MAINTAINERS: Update Loic Poulain's email address Update Loic Poulain's email address to @oss.qualcomm.com. Signed-off-by: Loic Poulain Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250401145344.10669-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ff882416c445..a2830693af7a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19871,7 +19871,7 @@ F: Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml F: drivers/i2c/busses/i2c-qcom-geni.c QUALCOMM I2C CCI DRIVER -M: Loic Poulain +M: Loic Poulain M: Robert Foss L: linux-i2c@vger.kernel.org L: linux-arm-msm@vger.kernel.org @@ -20004,7 +20004,7 @@ F: Documentation/devicetree/bindings/media/*venus* F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Loic Poulain +M: Loic Poulain L: wcn36xx@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx @@ -26043,7 +26043,7 @@ F: kernel/workqueue.c F: kernel/workqueue_internal.h WWAN DRIVERS -M: Loic Poulain +M: Loic Poulain M: Sergey Ryazanov R: Johannes Berg L: netdev@vger.kernel.org From a58d882841a0750da3c482cd3d82432b1c7edb77 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Tue, 1 Apr 2025 15:56:37 +0200 Subject: [PATCH 2101/2157] net: dsa: mv88e6xxx: propperly shutdown PPU re-enable timer on destroy The mv88e6xxx has an internal PPU that polls PHY state. If we want to access the internal PHYs, we need to disable the PPU first. Because that is a slow operation, a 10ms timer is used to re-enable it, canceled with every access, so bulk operations effectively only disable it once and re-enable it some 10ms after the last access. If a PHY is accessed and then the mv88e6xxx module is removed before the 10ms are up, the PPU re-enable ends up accessing a dangling pointer. This especially affects probing during bootup. The MDIO bus and PHY registration may succeed, but registration with the DSA framework may fail later on (e.g. because the CPU port depends on another, very slow device that isn't done probing yet, returning -EPROBE_DEFER). In this case, probe() fails, but the MDIO subsystem may already have accessed the MIDO bus or PHYs, arming the timer. This is fixed as follows: - If probe fails after mv88e6xxx_phy_init(), make sure we also call mv88e6xxx_phy_destroy() before returning - In mv88e6xxx_remove(), make sure we do the teardown in the correct order, calling mv88e6xxx_phy_destroy() after unregistering the switch device. - In mv88e6xxx_phy_destroy(), destroy both the timer and the work item that the timer might schedule, synchronously waiting in case one of the callbacks already fired and destroying the timer first, before waiting for the work item. - Access to the PPU is guarded by a mutex, the worker acquires it with a mutex_trylock(), not proceeding with the expensive shutdown if that fails. We grab the mutex in mv88e6xxx_phy_destroy() to make sure the slow PPU shutdown is already done or won't even enter, when we wait for the work item. Fixes: 2e5f032095ff ("dsa: add support for the Marvell 88E6131 switch chip") Signed-off-by: David Oberhollenzer Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250401135705.92760-1-david.oberhollenzer@sigma-star.at Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 11 +++++++---- drivers/net/dsa/mv88e6xxx/phy.c | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 901929f96b38..29a89ab4b789 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -7350,13 +7350,13 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) err = mv88e6xxx_switch_reset(chip); mv88e6xxx_reg_unlock(chip); if (err) - goto out; + goto out_phy; if (np) { chip->irq = of_irq_get(np, 0); if (chip->irq == -EPROBE_DEFER) { err = chip->irq; - goto out; + goto out_phy; } } @@ -7375,7 +7375,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) mv88e6xxx_reg_unlock(chip); if (err) - goto out; + goto out_phy; if (chip->info->g2_irqs > 0) { err = mv88e6xxx_g2_irq_setup(chip); @@ -7409,6 +7409,8 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); +out_phy: + mv88e6xxx_phy_destroy(chip); out: if (pdata) dev_put(pdata->netdev); @@ -7431,7 +7433,6 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_ptp_free(chip); } - mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_g1_vtu_prob_irq_free(chip); @@ -7444,6 +7445,8 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); + + mv88e6xxx_phy_destroy(chip); } static void mv88e6xxx_shutdown(struct mdio_device *mdiodev) diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 8bb88b3d900d..ee9e5d7e5277 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -229,7 +229,10 @@ static void mv88e6xxx_phy_ppu_state_init(struct mv88e6xxx_chip *chip) static void mv88e6xxx_phy_ppu_state_destroy(struct mv88e6xxx_chip *chip) { + mutex_lock(&chip->ppu_mutex); del_timer_sync(&chip->ppu_timer); + cancel_work_sync(&chip->ppu_work); + mutex_unlock(&chip->ppu_mutex); } int mv88e6185_phy_ppu_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, From 09bccf56db36501ccb1935d921dc24451e9f57dd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 1 Apr 2025 11:42:30 +0200 Subject: [PATCH 2102/2157] net: airoha: Validate egress gdm port in airoha_ppe_foe_entry_prepare() Dev pointer in airoha_ppe_foe_entry_prepare routine is not strictly a device allocated by airoha_eth driver since it is an egress device and the flowtable can contain even wlan, pppoe or vlan devices. E.g: flowtable ft { hook ingress priority filter devices = { eth1, lan1, lan2, lan3, lan4, wlan0 } flags offload ^ | "not allocated by airoha_eth" -- } In this case airoha_get_dsa_port() will just return the original device pointer and we can't assume netdev priv pointer points to an airoha_gdm_port struct. Fix the issue validating egress gdm port in airoha_ppe_foe_entry_prepare routine before accessing net_device priv pointer. Fixes: 00a7678310fe ("net: airoha: Introduce flowtable offload support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250401-airoha-validate-egress-gdm-port-v4-1-c7315d33ce10@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 13 +++++++++++++ drivers/net/ethernet/airoha/airoha_eth.h | 3 +++ drivers/net/ethernet/airoha/airoha_ppe.c | 8 ++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 69e523dd4186..d748dc6de923 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2454,6 +2454,19 @@ static void airoha_metadata_dst_free(struct airoha_gdm_port *port) } } +bool airoha_is_valid_gdm_port(struct airoha_eth *eth, + struct airoha_gdm_port *port) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { + if (eth->ports[i] == port) + return true; + } + + return false; +} + static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np, int index) { diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 60690b685710..ec8908f904c6 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -532,6 +532,9 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) +bool airoha_is_valid_gdm_port(struct airoha_eth *eth, + struct airoha_gdm_port *port); + void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash); int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 8b55e871352d..f10dab935cab 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -197,7 +197,8 @@ static int airoha_get_dsa_port(struct net_device **dev) #endif } -static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe, +static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, + struct airoha_foe_entry *hwe, struct net_device *dev, int type, struct airoha_flow_data *data, int l4proto) @@ -225,6 +226,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe, struct airoha_gdm_port *port = netdev_priv(dev); u8 pse_port; + if (!airoha_is_valid_gdm_port(eth, port)) + return -EINVAL; + if (dsa_port >= 0) pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 : port->id; else @@ -633,7 +637,7 @@ static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port, !is_valid_ether_addr(data.eth.h_dest)) return -EINVAL; - err = airoha_ppe_foe_entry_prepare(&hwe, odev, offload_type, + err = airoha_ppe_foe_entry_prepare(eth, &hwe, odev, offload_type, &data, l4proto); if (err) return err; From d2ccd0560d9648a98ae0c6534588144044cff0a0 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:42 -0700 Subject: [PATCH 2103/2157] net: switch to netif_disable_lro in inetdev_init Cosmin reports the following deadlock: dump_stack_lvl+0x62/0x90 print_deadlock_bug+0x274/0x3b0 __lock_acquire+0x1229/0x2470 lock_acquire+0xb7/0x2b0 __mutex_lock+0xa6/0xd20 dev_disable_lro+0x20/0x80 inetdev_init+0x12f/0x1f0 inetdev_event+0x48b/0x870 notifier_call_chain+0x38/0xf0 netif_change_net_namespace+0x72e/0x9f0 do_setlink.isra.0+0xd5/0x1220 rtnl_newlink+0x7ea/0xb50 rtnetlink_rcv_msg+0x459/0x5e0 netlink_rcv_skb+0x54/0x100 netlink_unicast+0x193/0x270 netlink_sendmsg+0x204/0x450 Switch to netif_disable_lro which assumes the caller holds the instance lock. inetdev_init is called for blackhole device (which sw device and doesn't grab instance lock) and from REGISTER/UNREGISTER notifiers. We already hold the instance lock for REGISTER notifier during netns change and we'll soon hold the lock during other paths. Reviewed-by: Jakub Kicinski Reported-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 754f60fb6e25..77e5705ac799 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (!in_dev->arp_parms) goto out_kfree; if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) - dev_disable_lro(dev); + netif_disable_lro(dev); /* Reference in_dev->dev */ netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ From 4c975fd700022c90e61a46326e3444e08317876e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:43 -0700 Subject: [PATCH 2104/2157] net: hold instance lock during NETDEV_REGISTER/UP Callers of inetdev_init can come from several places with inconsistent expectation about netdev instance lock. Grab instance lock during REGISTER (plus UP). Also solve the inconsistency with UNREGISTER where it was locked only during move netns path. WARNING: CPU: 10 PID: 1479 at ./include/net/netdev_lock.h:54 __netdev_update_features+0x65f/0xca0 __warn+0x81/0x180 __netdev_update_features+0x65f/0xca0 report_bug+0x156/0x180 handle_bug+0x4f/0x90 exc_invalid_op+0x13/0x60 asm_exc_invalid_op+0x16/0x20 __netdev_update_features+0x65f/0xca0 netif_disable_lro+0x30/0x1d0 inetdev_init+0x12f/0x1f0 inetdev_event+0x48b/0x870 notifier_call_chain+0x38/0xf0 register_netdevice+0x741/0x8b0 register_netdev+0x1f/0x40 mlx5e_probe+0x4e3/0x8e0 [mlx5_core] auxiliary_bus_probe+0x3f/0x90 really_probe+0xc3/0x3a0 __driver_probe_device+0x80/0x150 driver_probe_device+0x1f/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xb4/0x1c0 bus_probe_device+0x91/0xa0 device_add+0x657/0x870 Reviewed-by: Jakub Kicinski Reported-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- net/core/dev.c | 12 +++++++++--- net/core/dev_api.c | 8 +------- net/core/rtnetlink.c | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa79145518d1..cf3b6445817b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, diff --git a/net/core/dev.c b/net/core/dev.c index 5d20ff226d5e..ce6612106e60 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1858,7 +1858,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb, int err; for_each_netdev(net, dev) { + netdev_lock_ops(dev); err = call_netdevice_register_notifiers(nb, dev); + netdev_unlock_ops(dev); if (err) goto rollback; } @@ -11047,7 +11049,9 @@ int register_netdevice(struct net_device *dev) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ + netdev_lock_ops(dev); ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); ret = notifier_to_errno(ret); if (ret) { /* Expect explicit free_netdev() on failure */ @@ -12059,7 +12063,7 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack) { @@ -12144,11 +12148,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, * And now a mini version of register_netdevice unregister_netdevice. */ + netdev_lock_ops(dev); /* If device is running close it first. */ netif_close(dev); - /* And unlink it from device chain */ unlist_netdevice(dev); + netdev_unlock_ops(dev); synchronize_net(); @@ -12210,11 +12215,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); + netdev_lock_ops(dev); /* Add the device back in the hashes */ list_netdevice(dev); - /* Notify protocols, that a new device appeared. */ call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); /* * Prevent userspace races by waiting until the network diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 8dbc60612100..90bafb0b1b8c 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int ret; - - netdev_lock_ops(dev); - ret = netif_change_net_namespace(dev, net, pat, 0, NULL); - netdev_unlock_ops(dev); - - return ret; + return __dev_change_net_namespace(dev, net, pat, 0, NULL); } EXPORT_SYMBOL_GPL(dev_change_net_namespace); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 334db17be37d..c23852835050 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; - netdev_lock_ops(dev); - err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = netif_change_net_namespace(dev, tgt_net, pat, + err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex, extack); if (err) - goto errout; + return err; status |= DO_SETLINK_MODIFIED; } + netdev_lock_ops(dev); + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; From 8965c160b8f7333df895321c8aa6bad4a7175f2b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:44 -0700 Subject: [PATCH 2105/2157] net: use netif_disable_lro in ipv6_add_dev ipv6_add_dev might call dev_disable_lro which unconditionally grabs instance lock, so it will deadlock during NETDEV_REGISTER. Switch to netif_disable_lro. Make sure all callers hold the instance lock as well. Cc: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/net/ip.h | 16 ++++++++-------- net/core/dev.c | 1 + net/ipv6/addrconf.c | 15 +++++++++++++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 8a48ade24620..47ed6d23853d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, memcpy(buf, &naddr, sizeof(naddr)); } -#if IS_MODULE(CONFIG_IPV6) -#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) -#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) -#else -#define EXPORT_IPV6_MOD(X) -#define EXPORT_IPV6_MOD_GPL(X) -#endif - #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif +#if IS_MODULE(CONFIG_IPV6) +#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) +#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) +#else +#define EXPORT_IPV6_MOD(X) +#define EXPORT_IPV6_MOD_GPL(X) +#endif + static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; diff --git a/net/core/dev.c b/net/core/dev.c index ce6612106e60..0608605cfc24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev) netdev_unlock_ops(lower_dev); } } +EXPORT_IPV6_MOD(netif_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 54a8ea004da2..c3b908fccbc1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) int err = -ENOMEM; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); @@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(err); } if (ndev->cnf.forwarding) - dev_disable_lro(dev); + netif_disable_lro(dev); /* We refer to the device */ netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); @@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); + netdev_lock_ops(dev); if (dev) err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); else err = -ENODEV; + netdev_unlock_ops(dev); rtnl_net_unlock(net); return err; } @@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!dev) { NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface"); err = -ENODEV; - goto unlock; + goto unlock_rtnl; } + netdev_lock_ops(dev); idev = ipv6_find_idev(dev); if (IS_ERR(idev)) { err = PTR_ERR(idev); @@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, in6_ifa_put(ifa); unlock: + netdev_unlock_ops(dev); +unlock_rtnl: rtnl_net_unlock(net); return err; @@ -6516,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, if (idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; @@ -6528,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } } From b912d599d3d83ff9a2db58c17b5c76429a206db5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:45 -0700 Subject: [PATCH 2106/2157] net: rename rtnl_net_debug to lock_debug And make it selected by CONFIG_DEBUG_NET. Don't rename any of the structs/functions. Next patch will use rtnl_net_debug_event in netdevsim. Reviewed-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-5-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/core/Makefile | 2 +- net/core/{rtnl_net_debug.c => lock_debug.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename net/core/{rtnl_net_debug.c => lock_debug.c} (100%) diff --git a/net/core/Makefile b/net/core/Makefile index a10c3bd96798..b2a76ce33932 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o obj-$(CONFIG_NET_DEVMEM) += devmem.o -obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o +obj-$(CONFIG_DEBUG_NET) += lock_debug.o obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o diff --git a/net/core/rtnl_net_debug.c b/net/core/lock_debug.c similarity index 100% rename from net/core/rtnl_net_debug.c rename to net/core/lock_debug.c From 1901066aab7654f4a225ac29354a564d891d0c1a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:46 -0700 Subject: [PATCH 2107/2157] netdevsim: add dummy device notifiers In order to exercise and verify notifiers' locking assumptions, register dummy notifiers (via register_netdevice_notifier_dev_net). Share notifier event handler that enforces the assumptions with lock_debug.c (rename and export rtnl_net_debug_event as netdev_debug_event). Add ops lock asserts to netdev_debug_event. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 13 +++++++++++++ drivers/net/netdevsim/netdevsim.h | 3 +++ include/net/netdev_lock.h | 3 +++ net/core/lock_debug.c | 14 +++++++++----- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index b67af4651185..ddda0c1e7a6d 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -939,6 +939,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; + netdev_lockdep_set_classes(ns->netdev); err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) @@ -960,6 +961,14 @@ static int nsim_init_netdevsim(struct netdevsim *ns) if (err) goto err_ipsec_teardown; rtnl_unlock(); + + if (IS_ENABLED(CONFIG_DEBUG_NET)) { + ns->nb.notifier_call = netdev_debug_event; + if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn)) + ns->nb.notifier_call = NULL; + } + return 0; err_ipsec_teardown: @@ -1043,6 +1052,10 @@ void nsim_destroy(struct netdevsim *ns) debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); + if (ns->nb.notifier_call) + unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn); + rtnl_lock(); peer = rtnl_dereference(ns->peer); if (peer) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 665020d18f29..d04401f0bdf7 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -144,6 +144,9 @@ struct netdevsim { struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; + + struct notifier_block nb; + struct netdev_net_notifier nn; }; struct netdevsim * diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 1c0c9a94cc22..c316b551df8d 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr); + #endif diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index f3272b09c255..b7f22dc92a6f 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -6,10 +6,11 @@ #include #include #include +#include #include -static int rtnl_net_debug_event(struct notifier_block *nb, - unsigned long event, void *ptr) +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); @@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb, /* Keep enum and don't add default to trigger -Werror=switch */ switch (cmd) { + case NETDEV_REGISTER: case NETDEV_UP: + netdev_ops_assert_locked(dev); + fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_CHANGE: - case NETDEV_REGISTER: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: @@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb, return NOTIFY_DONE; } +EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL"); static int rtnl_net_debug_net_id; @@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net) struct notifier_block *nb; nb = net_generic(net, rtnl_net_debug_net_id); - nb->notifier_call = rtnl_net_debug_event; + nb->notifier_call = netdev_debug_event; return register_netdevice_notifier_net(net, nb); } @@ -95,7 +99,7 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = { }; static struct notifier_block rtnl_net_debug_block = { - .notifier_call = rtnl_net_debug_event, + .notifier_call = netdev_debug_event, }; static int __init rtnl_net_debug_init(void) From dbfc99495d960134bfe1a4f13849fb0d5373b42c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:47 -0700 Subject: [PATCH 2108/2157] net: dummy: request ops lock Even though dummy device doesn't really need an instance lock, a lot of selftests use dummy so it's useful to have extra expose to the instance lock on NIPA. Request the instance/ops locking. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/dummy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index a4938c6a5ebb..d6bdad4baadd 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -105,6 +105,7 @@ static void dummy_setup(struct net_device *dev) dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; dev->needs_free_netdev = true; + dev->request_ops_lock = true; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; From ee705fa21fdc0c7da98309e5c3c8ab72b99ef087 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:48 -0700 Subject: [PATCH 2109/2157] docs: net: document netdev notifier expectations We don't have a consistent state yet, but document where we think we are and where we wanna be. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index ebb868f50ac2..6c2d8945f597 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -343,6 +343,29 @@ there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., acquiring the instance lock themselves, while the ``netif_xxx`` functions assume that the driver has already acquired the instance lock. +Notifiers and netdev instance lock +================================== + +For device drivers that implement shaping or queue management APIs, +some of the notifiers (``enum netdev_cmd``) are running under the netdev +instance lock. + +For devices with locked ops, currently only the following notifiers are +running under the lock: +* ``NETDEV_REGISTER`` +* ``NETDEV_UP`` + +The following notifiers are running without the lock: +* ``NETDEV_UNREGISTER`` + +There are no clear expectations for the remaining notifiers. Notifiers not on +the list may run with or without the instance lock, potentially even invoking +the same notifier type with and without the lock from different code paths. +The goal is to eventually ensure that all (or most, with a few documented +exceptions) notifiers run under the instance lock. Please extend this +documentation whenever you make explicit assumption about lock being held +from a notifier. + NETDEV_INTERNAL symbol namespace ================================ From 56c8a23f8a0f3c735f3b8f9d323927904090049b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:49 -0700 Subject: [PATCH 2110/2157] selftests: net: use netdevsim in netns test Netdevsim has extra register_netdevice_notifier_dev_net notifiers, use netdevim instead of dummy device to test them out. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-9-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 25 +++++++++++++++++++++++ tools/testing/selftests/net/netns-name.sh | 13 ++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 975be4fdbcdb..701905eeff66 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -222,6 +222,31 @@ setup_ns() NS_LIST+=("${ns_list[@]}") } +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + tc_rule_stats_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 0be1905d1f2f..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict From c0f21784bca558d03d48b5ba68fac2f7070f516b Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 2 Apr 2025 10:24:14 -0700 Subject: [PATCH 2111/2157] io_uring/zcrx: fix selftests w/ updated netdev Python helpers Fix io_uring zero copy rx selftest with updated netdev Python helpers. Signed-off-by: David Wei Link: https://patch.msgid.link/20250402172414.895276-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index d301d9b356f7..9f271ab6ec04 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -27,7 +27,7 @@ def _set_flow_rule(cfg, chan): def test_zcrx(cfg) -> None: - cfg.require_v6() + cfg.require_ipver('6') combined_chans = _get_combined_channels(cfg) if combined_chans < 2: @@ -40,7 +40,7 @@ def test_zcrx(cfg) -> None: flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): wait_port_listen(9999, proto="tcp", host=cfg.remote) cmd(tx_cmd) @@ -51,7 +51,7 @@ def test_zcrx(cfg) -> None: def test_zcrx_oneshot(cfg) -> None: - cfg.require_v6() + cfg.require_ipver('6') combined_chans = _get_combined_channels(cfg) if combined_chans < 2: @@ -64,7 +64,7 @@ def test_zcrx_oneshot(cfg) -> None: flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): wait_port_listen(9999, proto="tcp", host=cfg.remote) cmd(tx_cmd) From c0dbd11ada2c94edc337a5f6665cbaa6079ff785 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 3 Apr 2025 16:43:50 +0200 Subject: [PATCH 2112/2157] fs: actually hold the namespace semaphore Don't use a scoped guard that only protects the next statement. Use a regular guard to make sure that the namespace semaphore is held across the whole function. Signed-off-by: Christian Brauner Reported-by: Leon Romanovsky Link: https://lore.kernel.org/all/20250401170715.GA112019@unreal/ Fixes: db04662e2f4f ("fs: allow detached mounts in clone_private_mount()") Signed-off-by: Linus Torvalds --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 16292ff760c9..14935a0500a2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2478,7 +2478,8 @@ struct vfsmount *clone_private_mount(const struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; - scoped_guard(rwsem_read, &namespace_sem) + guard(rwsem_read)(&namespace_sem); + if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); From b27055a08ad4b415dcf15b63034f9cb236f7fb40 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 3 Apr 2025 00:56:32 +0800 Subject: [PATCH 2113/2157] net: fix geneve_opt length integer overflow struct geneve_opt uses 5 bit length for each single option, which means every vary size option should be smaller than 128 bytes. However, all current related Netlink policies cannot promise this length condition and the attacker can exploit a exact 128-byte size option to *fake* a zero length option and confuse the parsing logic, further achieve heap out-of-bounds read. One example crash log is like below: [ 3.905425] ================================================================== [ 3.905925] BUG: KASAN: slab-out-of-bounds in nla_put+0xa9/0xe0 [ 3.906255] Read of size 124 at addr ffff888005f291cc by task poc/177 [ 3.906646] [ 3.906775] CPU: 0 PID: 177 Comm: poc-oob-read Not tainted 6.1.132 #1 [ 3.907131] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 3.907784] Call Trace: [ 3.907925] [ 3.908048] dump_stack_lvl+0x44/0x5c [ 3.908258] print_report+0x184/0x4be [ 3.909151] kasan_report+0xc5/0x100 [ 3.909539] kasan_check_range+0xf3/0x1a0 [ 3.909794] memcpy+0x1f/0x60 [ 3.909968] nla_put+0xa9/0xe0 [ 3.910147] tunnel_key_dump+0x945/0xba0 [ 3.911536] tcf_action_dump_1+0x1c1/0x340 [ 3.912436] tcf_action_dump+0x101/0x180 [ 3.912689] tcf_exts_dump+0x164/0x1e0 [ 3.912905] fw_dump+0x18b/0x2d0 [ 3.913483] tcf_fill_node+0x2ee/0x460 [ 3.914778] tfilter_notify+0xf4/0x180 [ 3.915208] tc_new_tfilter+0xd51/0x10d0 [ 3.918615] rtnetlink_rcv_msg+0x4a2/0x560 [ 3.919118] netlink_rcv_skb+0xcd/0x200 [ 3.919787] netlink_unicast+0x395/0x530 [ 3.921032] netlink_sendmsg+0x3d0/0x6d0 [ 3.921987] __sock_sendmsg+0x99/0xa0 [ 3.922220] __sys_sendto+0x1b7/0x240 [ 3.922682] __x64_sys_sendto+0x72/0x90 [ 3.922906] do_syscall_64+0x5e/0x90 [ 3.923814] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 3.924122] RIP: 0033:0x7e83eab84407 [ 3.924331] Code: 48 89 fa 4c 89 df e8 38 aa 00 00 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 1a 5b c3 0f 1f 84 00 00 00 00 00 48 8b 44 24 10 0f 05 <5b> c3 0f 1f 80 00 00 00 00 83 e2 39 83 faf [ 3.925330] RSP: 002b:00007ffff505e370 EFLAGS: 00000202 ORIG_RAX: 000000000000002c [ 3.925752] RAX: ffffffffffffffda RBX: 00007e83eaafa740 RCX: 00007e83eab84407 [ 3.926173] RDX: 00000000000001a8 RSI: 00007ffff505e3c0 RDI: 0000000000000003 [ 3.926587] RBP: 00007ffff505f460 R08: 00007e83eace1000 R09: 000000000000000c [ 3.926977] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffff505f3c0 [ 3.927367] R13: 00007ffff505f5c8 R14: 00007e83ead1b000 R15: 00005d4fbbe6dcb8 Fix these issues by enforing correct length condition in related policies. Fixes: 925d844696d9 ("netfilter: nft_tunnel: add support for geneve opts") Fixes: 4ece47787077 ("lwtunnel: add options setting and dumping for geneve") Fixes: 0ed5269f9e41 ("net/sched: add tunnel option support to act_tunnel_key") Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Signed-off-by: Lin Ma Reviewed-by: Xin Long Acked-by: Cong Wang Link: https://patch.msgid.link/20250402165632.6958-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 2 +- net/netfilter/nft_tunnel.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/cls_flower.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 364ea798511e..f65d2f727381 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -451,7 +451,7 @@ static const struct nla_policy geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = { [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, - [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, + [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 }, }; static const struct nla_policy diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 681301b46aa4..ec7089ab752c 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -335,7 +335,7 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = { [NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 }, [NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 }, - [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, + [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 }, }; static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index ae5dea7c48a8..2cef4b08befb 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -68,7 +68,7 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, - .len = 128 }, + .len = 127 }, }; static const struct nla_policy diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 03505673d523..099ff6a3e1f5 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -766,7 +766,7 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, - .len = 128 }, + .len = 127 }, }; static const struct nla_policy From 2a8377720a0a30fa133f7773e901598f7d563f36 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 1 Apr 2025 11:02:12 +0200 Subject: [PATCH 2114/2157] net: octeontx2: Handle XDP_ABORTED and XDP invalid as XDP_DROP In the current implementation octeontx2 manages XDP_ABORTED and XDP invalid as XDP_PASS forwarding the skb to the networking stack. Align the behaviour to other XDP drivers handling XDP_ABORTED and XDP invalid as XDP_DROP. Please note this patch has just compile tested. Fixes: 06059a1a9a4a5 ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250401-octeontx2-xdp-abort-fix-v1-1-f0587c35a0b9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index af8cabe828d0..0a6bb346ba45 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1559,12 +1559,11 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, break; default: bpf_warn_invalid_xdp_action(pfvf->netdev, prog, act); - break; + fallthrough; case XDP_ABORTED: - if (xsk_buff) - xsk_buff_free(xsk_buff); - trace_xdp_exception(pfvf->netdev, prog, act); - break; + if (act == XDP_ABORTED) + trace_xdp_exception(pfvf->netdev, prog, act); + fallthrough; case XDP_DROP: cq->pool_ptrs++; if (xsk_buff) { From 51de3600093429e3b712e5f091d767babc5dd6df Mon Sep 17 00:00:00 2001 From: Ying Lu Date: Wed, 2 Apr 2025 16:58:59 +0800 Subject: [PATCH 2115/2157] usbnet:fix NPE during rx_complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing usbnet_going_away Check in Critical Path. The usb_submit_urb function lacks a usbnet_going_away validation, whereas __usbnet_queue_skb includes this check. This inconsistency creates a race condition where: A URB request may succeed, but the corresponding SKB data fails to be queued. Subsequent processes: (e.g., rx_complete → defer_bh → __skb_unlink(skb, list)) attempt to access skb->next, triggering a NULL pointer dereference (Kernel Panic). Fixes: 04e906839a05 ("usbnet: fix cyclical race on disconnect with work queue") Cc: stable@vger.kernel.org Signed-off-by: Ying Lu Link: https://patch.msgid.link/4c9ef2efaa07eb7f9a5042b74348a67e5a3a7aea.1743584159.git.luying1@xiaomi.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index aeab2308b150..724b93aa4f7e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -530,7 +530,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) netif_device_present (dev->net) && test_bit(EVENT_DEV_OPEN, &dev->flags) && !test_bit (EVENT_RX_HALT, &dev->flags) && - !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) { + !test_bit (EVENT_DEV_ASLEEP, &dev->flags) && + !usbnet_going_away(dev)) { switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) { case -EPIPE: usbnet_defer_kevent (dev, EVENT_RX_HALT); @@ -551,8 +552,7 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) tasklet_schedule (&dev->bh); break; case 0: - if (!usbnet_going_away(dev)) - __usbnet_queue_skb(&dev->rxq, skb, rx_start); + __usbnet_queue_skb(&dev->rxq, skb, rx_start); } } else { netif_dbg(dev, ifdown, dev->net, "rx: stopped\n"); From 4d0ab3a6885e3e9040310a8d8f54503366083626 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:23 +0300 Subject: [PATCH 2116/2157] ipv6: Start path selection from the first nexthop Cited commit transitioned IPv6 path selection to use hash-threshold instead of modulo-N. With hash-threshold, each nexthop is assigned a region boundary in the multipath hash function's output space and a nexthop is chosen if the calculated hash is smaller than the nexthop's region boundary. Hash-threshold does not work correctly if path selection does not start with the first nexthop. For example, if fib6_select_path() is always passed the last nexthop in the group, then it will always be chosen because its region boundary covers the entire hash function's output space. Fix this by starting the selection process from the first nexthop and do not consider nexthops for which rt6_score_route() provided a negative score. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Reported-by: Stanislav Fomichev Closes: https://lore.kernel.org/netdev/Z9RIyKZDNoka53EO@mini-arch/ Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250402114224.293392-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3406a0d45bd..864f0002034b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -412,11 +412,35 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } +static struct fib6_info * +rt6_multipath_first_sibling_rcu(const struct fib6_info *rt) +{ + struct fib6_info *iter; + struct fib6_node *fn; + + fn = rcu_dereference(rt->fib6_node); + if (!fn) + goto out; + iter = rcu_dereference(fn->leaf); + if (!iter) + goto out; + + while (iter) { + if (iter->fib6_metric == rt->fib6_metric && + rt6_qualify_for_ecmp(iter)) + return iter; + iter = rcu_dereference(iter->fib6_next); + } + +out: + return NULL; +} + void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *match = res->f6i; + struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) @@ -440,10 +464,18 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, return; } - if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) + first = rt6_multipath_first_sibling_rcu(match); + if (!first) goto out; - list_for_each_entry_rcu(sibling, &match->fib6_siblings, + if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) { + match = first; + goto out; + } + + list_for_each_entry_rcu(sibling, &first->fib6_siblings, fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; From 8b8e0dd357165e0258d9f9cdab5366720ed2f619 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:24 +0300 Subject: [PATCH 2117/2157] ipv6: Do not consider link down nexthops in path selection Nexthops whose link is down are not supposed to be considered during path selection when the "ignore_routes_with_linkdown" sysctl is set. This is done by assigning them a negative region boundary. However, when comparing the computed hash (unsigned) with the region boundary (signed), the negative region boundary is treated as unsigned, resulting in incorrect nexthop selection. Fix by treating the computed hash as signed. Note that the computed hash is always in range of [0, 2^31 - 1]. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Signed-off-by: Ido Schimmel Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250402114224.293392-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 864f0002034b..ab12b816ab94 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -442,6 +442,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, { struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; + int hash; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -468,7 +469,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (!first) goto out; - if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + hash = fl6->mp_hash; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && rt6_score_route(first->fib6_nh, first->fib6_flags, oif, strict) >= 0) { match = first; @@ -481,7 +483,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, int nh_upper_bound; nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); - if (fl6->mp_hash > nh_upper_bound) + if (hash > nh_upper_bound) continue; if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break; From fda8c491db2a90ff3e6fbbae58e495b4ddddeca3 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 2 Apr 2025 21:50:36 +0800 Subject: [PATCH 2118/2157] arcnet: Add NULL check in com20020pci_probe() devm_kasprintf() returns NULL when memory allocation fails. Currently, com20020pci_probe() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue and ensure no resources are left allocated. Fixes: 6b17a597fc2f ("arcnet: restoring support for multiple Sohard Arcnet cards") Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250402135036.44697-1-bsdhenrymartin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/com20020-pci.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index c5e571ec94c9..0472bcdff130 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -251,18 +251,33 @@ static int com20020pci_probe(struct pci_dev *pdev, card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, GFP_KERNEL, "arc%d-%d-tx", dev->dev_id, i); + if (!card->tx_led.default_trigger) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "pci:green:tx:%d-%d", dev->dev_id, i); - + if (!card->tx_led.name) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->tx_led.dev = &dev->dev; card->recon_led.brightness_set = led_recon_set; card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, GFP_KERNEL, "arc%d-%d-recon", dev->dev_id, i); + if (!card->recon_led.default_trigger) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "pci:red:recon:%d-%d", dev->dev_id, i); + if (!card->recon_led.name) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->recon_led.dev = &dev->dev; ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); From 053f3ff67d7feefc75797863f3d84b47ad47086f Mon Sep 17 00:00:00 2001 From: Dave Marquardt Date: Wed, 2 Apr 2025 10:44:03 -0500 Subject: [PATCH 2119/2157] net: ibmveth: make veth_pool_store stop hanging v2: - Created a single error handling unlock and exit in veth_pool_store - Greatly expanded commit message with previous explanatory-only text Summary: Use rtnl_mutex to synchronize veth_pool_store with itself, ibmveth_close and ibmveth_open, preventing multiple calls in a row to napi_disable. Background: Two (or more) threads could call veth_pool_store through writing to /sys/devices/vio/30000002/pool*/*. You can do this easily with a little shell script. This causes a hang. I configured LOCKDEP, compiled ibmveth.c with DEBUG, and built a new kernel. I ran this test again and saw: Setting pool0/active to 0 Setting pool1/active to 1 [ 73.911067][ T4365] ibmveth 30000002 eth0: close starting Setting pool1/active to 1 Setting pool1/active to 0 [ 73.911367][ T4366] ibmveth 30000002 eth0: close starting [ 73.916056][ T4365] ibmveth 30000002 eth0: close complete [ 73.916064][ T4365] ibmveth 30000002 eth0: open starting [ 110.808564][ T712] systemd-journald[712]: Sent WATCHDOG=1 notification. [ 230.808495][ T712] systemd-journald[712]: Sent WATCHDOG=1 notification. [ 243.683786][ T123] INFO: task stress.sh:4365 blocked for more than 122 seconds. [ 243.683827][ T123] Not tainted 6.14.0-01103-g2df0c02dab82-dirty #8 [ 243.683833][ T123] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 243.683838][ T123] task:stress.sh state:D stack:28096 pid:4365 tgid:4365 ppid:4364 task_flags:0x400040 flags:0x00042000 [ 243.683852][ T123] Call Trace: [ 243.683857][ T123] [c00000000c38f690] [0000000000000001] 0x1 (unreliable) [ 243.683868][ T123] [c00000000c38f840] [c00000000001f908] __switch_to+0x318/0x4e0 [ 243.683878][ T123] [c00000000c38f8a0] [c000000001549a70] __schedule+0x500/0x12a0 [ 243.683888][ T123] [c00000000c38f9a0] [c00000000154a878] schedule+0x68/0x210 [ 243.683896][ T123] [c00000000c38f9d0] [c00000000154ac80] schedule_preempt_disabled+0x30/0x50 [ 243.683904][ T123] [c00000000c38fa00] [c00000000154dbb0] __mutex_lock+0x730/0x10f0 [ 243.683913][ T123] [c00000000c38fb10] [c000000001154d40] napi_enable+0x30/0x60 [ 243.683921][ T123] [c00000000c38fb40] [c000000000f4ae94] ibmveth_open+0x68/0x5dc [ 243.683928][ T123] [c00000000c38fbe0] [c000000000f4aa20] veth_pool_store+0x220/0x270 [ 243.683936][ T123] [c00000000c38fc70] [c000000000826278] sysfs_kf_write+0x68/0xb0 [ 243.683944][ T123] [c00000000c38fcb0] [c0000000008240b8] kernfs_fop_write_iter+0x198/0x2d0 [ 243.683951][ T123] [c00000000c38fd00] [c00000000071b9ac] vfs_write+0x34c/0x650 [ 243.683958][ T123] [c00000000c38fdc0] [c00000000071bea8] ksys_write+0x88/0x150 [ 243.683966][ T123] [c00000000c38fe10] [c0000000000317f4] system_call_exception+0x124/0x340 [ 243.683973][ T123] [c00000000c38fe50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec ... [ 243.684087][ T123] Showing all locks held in the system: [ 243.684095][ T123] 1 lock held by khungtaskd/123: [ 243.684099][ T123] #0: c00000000278e370 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x50/0x248 [ 243.684114][ T123] 4 locks held by stress.sh/4365: [ 243.684119][ T123] #0: c00000003a4cd3f8 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x88/0x150 [ 243.684132][ T123] #1: c000000041aea888 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x154/0x2d0 [ 243.684143][ T123] #2: c0000000366fb9a8 (kn->active#64){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x160/0x2d0 [ 243.684155][ T123] #3: c000000035ff4cb8 (&dev->lock){+.+.}-{3:3}, at: napi_enable+0x30/0x60 [ 243.684166][ T123] 5 locks held by stress.sh/4366: [ 243.684170][ T123] #0: c00000003a4cd3f8 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x88/0x150 [ 243.684183][ T123] #1: c00000000aee2288 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x154/0x2d0 [ 243.684194][ T123] #2: c0000000366f4ba8 (kn->active#64){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x160/0x2d0 [ 243.684205][ T123] #3: c000000035ff4cb8 (&dev->lock){+.+.}-{3:3}, at: napi_disable+0x30/0x60 [ 243.684216][ T123] #4: c0000003ff9bbf18 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0x138/0x12a0 From the ibmveth debug, two threads are calling veth_pool_store, which calls ibmveth_close and ibmveth_open. Here's the sequence: T4365 T4366 ----------------- ----------------- --------- veth_pool_store veth_pool_store ibmveth_close ibmveth_close napi_disable napi_disable ibmveth_open napi_enable <- HANG ibmveth_close calls napi_disable at the top and ibmveth_open calls napi_enable at the top. https://docs.kernel.org/networking/napi.html]] says The control APIs are not idempotent. Control API calls are safe against concurrent use of datapath APIs but an incorrect sequence of control API calls may result in crashes, deadlocks, or race conditions. For example, calling napi_disable() multiple times in a row will deadlock. In the normal open and close paths, rtnl_mutex is acquired to prevent other callers. This is missing from veth_pool_store. Use rtnl_mutex in veth_pool_store fixes these hangs. Signed-off-by: Dave Marquardt Fixes: 860f242eb534 ("[PATCH] ibmveth change buffer pools dynamically") Reviewed-by: Nick Child Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250402154403.386744-1-davemarq@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 39 +++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index b619a3ec245b..04192190beba 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1802,18 +1802,22 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, long value = simple_strtol(buf, NULL, 10); long rc; + rtnl_lock(); + if (attr == &veth_active_attr) { if (value && !pool->active) { if (netif_running(netdev)) { if (ibmveth_alloc_buffer_pool(pool)) { netdev_err(netdev, "unable to alloc pool\n"); - return -ENOMEM; + rc = -ENOMEM; + goto unlock_err; } pool->active = 1; ibmveth_close(netdev); - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->active = 1; } @@ -1833,48 +1837,59 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, if (i == IBMVETH_NUM_BUFF_POOLS) { netdev_err(netdev, "no active pool >= MTU\n"); - return -EPERM; + rc = -EPERM; + goto unlock_err; } if (netif_running(netdev)) { ibmveth_close(netdev); pool->active = 0; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } pool->active = 0; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { - return -EINVAL; + rc = -EINVAL; + goto unlock_err; } else { if (netif_running(netdev)) { ibmveth_close(netdev); pool->size = value; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->size = value; } } } else if (attr == &veth_size_attr) { if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { - return -EINVAL; + rc = -EINVAL; + goto unlock_err; } else { if (netif_running(netdev)) { ibmveth_close(netdev); pool->buff_size = value; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->buff_size = value; } } } + rtnl_unlock(); /* kick the interrupt handler to allocate/deallocate pools */ ibmveth_interrupt(netdev->irq, netdev); return count; + +unlock_err: + rtnl_unlock(); + return rc; } From ec304b70d46bd2ed66541c5b57b63276529e05b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:34:04 -0700 Subject: [PATCH 2120/2157] net: move mp dev config validation to __net_mp_open_rxq() devmem code performs a number of safety checks to avoid having to reimplement all of them in the drivers. Move those to __net_mp_open_rxq() and reuse that function for binding to make sure that io_uring ZC also benefits from them. While at it rename the queue ID variable to rxq_idx in __net_mp_open_rxq(), we touch most of the relevant lines. The XArray insertion is reordered after the netdev_rx_queue_restart() call, otherwise we'd need to duplicate the queue index check or risk inserting an invalid pointer. The XArray allocation failures should be extremely rare. Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Fixes: 6e18ed929d3b ("net: add helpers for setting a memory provider on an rx queue") Link: https://patch.msgid.link/20250403013405.2827250-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/memory_provider.h | 6 +++ net/core/devmem.c | 52 ++++++------------------- net/core/netdev-genl.c | 6 --- net/core/netdev_rx_queue.c | 49 +++++++++++++++++------ 4 files changed, 55 insertions(+), 58 deletions(-) diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index b3e665897767..ada4f968960a 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -6,6 +6,7 @@ #include struct netdev_rx_queue; +struct netlink_ext_ack; struct sk_buff; struct memory_provider_ops { @@ -24,8 +25,13 @@ void net_mp_niov_clear_page_pool(struct net_iov *niov); int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *p); +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack); void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p); +void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p); /** * net_mp_netmem_place_in_cache() - give a netmem to a page pool diff --git a/net/core/devmem.c b/net/core/devmem.c index ee145a2aa41c..f2ce3c2ebc97 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -143,57 +142,28 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack) { + struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; struct netdev_rx_queue *rxq; u32 xa_idx; int err; - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { - NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); - return -EINVAL; - } - - if (dev->cfg->hds_thresh) { - NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); - return -EINVAL; - } - - rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_ops) { - NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); - return -EEXIST; - } - -#ifdef CONFIG_XDP_SOCKETS - if (rxq->pool) { - NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); - return -EBUSY; - } -#endif - - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, - GFP_KERNEL); + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack); if (err) return err; - rxq->mp_params.mp_priv = binding; - rxq->mp_params.mp_ops = &dmabuf_devmem_ops; - - err = netdev_rx_queue_restart(dev, rxq_idx); + rxq = __netif_get_rx_queue(dev, rxq_idx); + err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, + GFP_KERNEL); if (err) - goto err_xa_erase; + goto err_close_rxq; return 0; -err_xa_erase: - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; - xa_erase(&binding->bound_rxqs, xa_idx); - +err_close_rxq: + __net_mp_close_rxq(dev, rxq_idx, &mp_params); return err; } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 3afeaa8c5dc5..5d7af50fe702 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -874,12 +874,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock; } - if (dev_xdp_prog_count(netdev)) { - NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); - err = -EEXIST; - goto err_unlock; - } - binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 3af716f77a13..556b5393ec9f 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include @@ -86,8 +87,9 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p) +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { struct netdev_rx_queue *rxq; int ret; @@ -95,16 +97,41 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; - if (ifq_idx >= dev->real_num_rx_queues) + if (rxq_idx >= dev->real_num_rx_queues) return -EINVAL; - ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - rxq = __netif_get_rx_queue(dev, ifq_idx); - if (rxq->mp_params.mp_ops) + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + if (dev->cfg->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + if (dev_xdp_prog_count(dev)) { + NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); return -EEXIST; + } + + rxq = __netif_get_rx_queue(dev, rxq_idx); + if (rxq->mp_params.mp_ops) { + NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); + return -EEXIST; + } +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) { + NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); + return -EBUSY; + } +#endif rxq->mp_params = *p; - ret = netdev_rx_queue_restart(dev, ifq_idx); + ret = netdev_rx_queue_restart(dev, rxq_idx); if (ret) { rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; @@ -112,19 +139,19 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, return ret; } -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, +int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *p) { int ret; netdev_lock(dev); - ret = __net_mp_open_rxq(dev, ifq_idx, p); + ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL); netdev_unlock(dev); return ret; } -static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p) +void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; From 34f71de3f548eba0604c9cbabc1eb68b2f81fa0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:34:05 -0700 Subject: [PATCH 2121/2157] net: avoid false positive warnings in __net_mp_close_rxq() Commit under Fixes solved the problem of spurious warnings when we uninstall an MP from a device while its down. The __net_mp_close_rxq() which is used by io_uring was not fixed. Move the fix over and reuse __net_mp_close_rxq() in the devmem path. Acked-by: Stanislav Fomichev Fixes: a70f891e0fa0 ("net: devmem: do not WARN conditionally after netdev_rx_queue_restart()") Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250403013405.2827250-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 12 +++++------- net/core/netdev_rx_queue.c | 4 +++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index f2ce3c2ebc97..6e27a47d0493 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -116,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; - int err; if (binding->list.next) list_del(&binding->list); xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { - WARN_ON(rxq->mp_params.mp_priv != binding); - - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; + const struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; rxq_idx = get_netdev_rx_queue_index(rxq); - err = netdev_rx_queue_restart(binding->dev, rxq_idx); - WARN_ON(err && err != -ENETDOWN); + __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 556b5393ec9f..d126f10197bf 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -154,6 +154,7 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; + int err; if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) return; @@ -173,7 +174,8 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; - WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); + err = netdev_rx_queue_restart(dev, ifq_idx); + WARN_ON(err && err != -ENETDOWN); } void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, From 0802c32d4b03a26604c2db2c8a63b34a80361305 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:03 -0700 Subject: [PATCH 2122/2157] netlink: specs: rt_addr: fix the spec format / schema failures The spec is mis-formatted, schema validation says: Failed validating 'type' in schema['properties']['operations']['properties']['list']['items']['properties']['dump']['properties']['request']['properties']['value']: {'minimum': 0, 'type': 'integer'} On instance['operations']['list'][3]['dump']['request']['value']: '58 - ifa-family' The ifa-family clearly wants to be part of an attribute list. Reviewed-by: Jacob Keller Reviewed-by: Donald Hunter Reviewed-by: Yuyang Huang Fixes: 4f280376e531 ("selftests/net: Add selftest for IPv4 RTM_GETMULTICAST support") Link: https://patch.msgid.link/20250403013706.2828322-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 5dd5469044c7..3bc9b6f9087e 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -187,6 +187,7 @@ operations: dump: request: value: 58 + attributes: - ifa-family reply: value: 58 From 524c03585fda36584cc7ada49a1827666d37eb4e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:04 -0700 Subject: [PATCH 2123/2157] netlink: specs: rt_addr: fix get multi command name Command names should match C defines, codegens may depend on it. Reviewed-by: Jacob Keller Fixes: 4f280376e531 ("selftests/net: Add selftest for IPv4 RTM_GETMULTICAST support") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 2 +- tools/testing/selftests/net/rtnetlink.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 3bc9b6f9087e..1650dc3f091a 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -169,7 +169,7 @@ operations: value: 20 attributes: *ifaddr-all - - name: getmaddrs + name: getmulticast doc: Get / dump IPv4/IPv6 multicast addresses. attribute-set: addr-attrs fixed-header: ifaddrmsg diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py index 80950888800b..69436415d56e 100755 --- a/tools/testing/selftests/net/rtnetlink.py +++ b/tools/testing/selftests/net/rtnetlink.py @@ -12,7 +12,7 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: At least the loopback interface should have this address. """ - addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) all_host_multicasts = [ addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST From 0c8e30252d9fe8127f90b7a0a293872b368ebf3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:05 -0700 Subject: [PATCH 2124/2157] netlink: specs: rt_addr: pull the ifa- prefix out of the names YAML specs don't normally include the C prefix name in the name of the YAML attr. Remove the ifa- prefix from all attributes in addr-attrs and specify name-prefix instead. This is a bit risky, hopefully there aren't many users out there. Fixes: dfb0f7d9d979 ("doc/netlink: Add spec for rt addr messages") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 39 ++++++++++++------------ tools/testing/selftests/net/rtnetlink.py | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 1650dc3f091a..df6b23f06a22 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -78,45 +78,46 @@ definitions: attribute-sets: - name: addr-attrs + name-prefix: ifa- attributes: - - name: ifa-address + name: address type: binary display-hint: ipv4 - - name: ifa-local + name: local type: binary display-hint: ipv4 - - name: ifa-label + name: label type: string - - name: ifa-broadcast + name: broadcast type: binary display-hint: ipv4 - - name: ifa-anycast + name: anycast type: binary - - name: ifa-cacheinfo + name: cacheinfo type: binary struct: ifa-cacheinfo - - name: ifa-multicast + name: multicast type: binary - - name: ifa-flags + name: flags type: u32 enum: ifa-flags enum-as-flags: true - - name: ifa-rt-priority + name: rt-priority type: u32 - - name: ifa-target-netnsid + name: target-netnsid type: binary - - name: ifa-proto + name: proto type: u8 @@ -137,10 +138,10 @@ operations: - ifa-prefixlen - ifa-scope - ifa-index - - ifa-address - - ifa-label - - ifa-local - - ifa-cacheinfo + - address + - label + - local + - cacheinfo - name: deladdr doc: Remove address @@ -154,8 +155,8 @@ operations: - ifa-prefixlen - ifa-scope - ifa-index - - ifa-address - - ifa-local + - address + - local - name: getaddr doc: Dump address information. @@ -182,8 +183,8 @@ operations: reply: value: 58 attributes: &mcaddr-attrs - - ifa-multicast - - ifa-cacheinfo + - multicast + - cacheinfo dump: request: value: 58 diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py index 69436415d56e..e9ad5e88da97 100755 --- a/tools/testing/selftests/net/rtnetlink.py +++ b/tools/testing/selftests/net/rtnetlink.py @@ -15,7 +15,7 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) all_host_multicasts = [ - addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST ] ksft_ge(len(all_host_multicasts), 1, From 1a1eba0e9899c286914032c78708c614b016704b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:06 -0700 Subject: [PATCH 2125/2157] netlink: specs: rt_route: pull the ifa- prefix out of the names YAML specs don't normally include the C prefix name in the name of the YAML attr. Remove the ifa- prefix from all attributes in route-attrs and metrics and specify name-prefix instead. This is a bit risky, hopefully there aren't many users out there. Fixes: 023289b4f582 ("doc/netlink: Add spec for rt route messages") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_route.yaml | 180 +++++++++++----------- 1 file changed, 91 insertions(+), 89 deletions(-) diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt_route.yaml index a674103e5bc4..292469c7d4b9 100644 --- a/Documentation/netlink/specs/rt_route.yaml +++ b/Documentation/netlink/specs/rt_route.yaml @@ -80,165 +80,167 @@ definitions: attribute-sets: - name: route-attrs + name-prefix: rta- attributes: - - name: rta-dst + name: dst type: binary display-hint: ipv4 - - name: rta-src + name: src type: binary display-hint: ipv4 - - name: rta-iif + name: iif type: u32 - - name: rta-oif + name: oif type: u32 - - name: rta-gateway + name: gateway type: binary display-hint: ipv4 - - name: rta-priority + name: priority type: u32 - - name: rta-prefsrc + name: prefsrc type: binary display-hint: ipv4 - - name: rta-metrics + name: metrics type: nest - nested-attributes: rta-metrics + nested-attributes: metrics - - name: rta-multipath + name: multipath type: binary - - name: rta-protoinfo # not used + name: protoinfo # not used type: binary - - name: rta-flow + name: flow type: u32 - - name: rta-cacheinfo + name: cacheinfo type: binary struct: rta-cacheinfo - - name: rta-session # not used + name: session # not used type: binary - - name: rta-mp-algo # not used + name: mp-algo # not used type: binary - - name: rta-table + name: table type: u32 - - name: rta-mark + name: mark type: u32 - - name: rta-mfc-stats + name: mfc-stats type: binary - - name: rta-via + name: via type: binary - - name: rta-newdst + name: newdst type: binary - - name: rta-pref + name: pref type: u8 - - name: rta-encap-type + name: encap-type type: u16 - - name: rta-encap + name: encap type: binary # tunnel specific nest - - name: rta-expires + name: expires type: u32 - - name: rta-pad + name: pad type: binary - - name: rta-uid + name: uid type: u32 - - name: rta-ttl-propagate + name: ttl-propagate type: u8 - - name: rta-ip-proto + name: ip-proto type: u8 - - name: rta-sport + name: sport type: u16 - - name: rta-dport + name: dport type: u16 - - name: rta-nh-id + name: nh-id type: u32 - - name: rta-flowlabel + name: flowlabel type: u32 byte-order: big-endian display-hint: hex - - name: rta-metrics + name: metrics + name-prefix: rtax- attributes: - - name: rtax-unspec + name: unspec type: unused value: 0 - - name: rtax-lock + name: lock type: u32 - - name: rtax-mtu + name: mtu type: u32 - - name: rtax-window + name: window type: u32 - - name: rtax-rtt + name: rtt type: u32 - - name: rtax-rttvar + name: rttvar type: u32 - - name: rtax-ssthresh + name: ssthresh type: u32 - - name: rtax-cwnd + name: cwnd type: u32 - - name: rtax-advmss + name: advmss type: u32 - - name: rtax-reordering + name: reordering type: u32 - - name: rtax-hoplimit + name: hoplimit type: u32 - - name: rtax-initcwnd + name: initcwnd type: u32 - - name: rtax-features + name: features type: u32 - - name: rtax-rto-min + name: rto-min type: u32 - - name: rtax-initrwnd + name: initrwnd type: u32 - - name: rtax-quickack + name: quickack type: u32 - - name: rtax-cc-algo + name: cc-algo type: string - - name: rtax-fastopen-no-cookie + name: fastopen-no-cookie type: u32 operations: @@ -254,18 +256,18 @@ operations: value: 26 attributes: - rtm-family - - rta-src + - src - rtm-src-len - - rta-dst + - dst - rtm-dst-len - - rta-iif - - rta-oif - - rta-ip-proto - - rta-sport - - rta-dport - - rta-mark - - rta-uid - - rta-flowlabel + - iif + - oif + - ip-proto + - sport + - dport + - mark + - uid + - flowlabel reply: value: 24 attributes: &all-route-attrs @@ -278,34 +280,34 @@ operations: - rtm-scope - rtm-type - rtm-flags - - rta-dst - - rta-src - - rta-iif - - rta-oif - - rta-gateway - - rta-priority - - rta-prefsrc - - rta-metrics - - rta-multipath - - rta-flow - - rta-cacheinfo - - rta-table - - rta-mark - - rta-mfc-stats - - rta-via - - rta-newdst - - rta-pref - - rta-encap-type - - rta-encap - - rta-expires - - rta-pad - - rta-uid - - rta-ttl-propagate - - rta-ip-proto - - rta-sport - - rta-dport - - rta-nh-id - - rta-flowlabel + - dst + - src + - iif + - oif + - gateway + - priority + - prefsrc + - metrics + - multipath + - flow + - cacheinfo + - table + - mark + - mfc-stats + - via + - newdst + - pref + - encap-type + - encap + - expires + - pad + - uid + - ttl-propagate + - ip-proto + - sport + - dport + - nh-id + - flowlabel dump: request: value: 26 From 825dfab23bca520629a9e5a21ba5b03aaccc75f2 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:55 +0100 Subject: [PATCH 2126/2157] irqdomain: Rename irq_set_default_host() to irq_set_default_domain() Naming interrupt domains host is confusing at best and the irqdomain code uses both domain and host inconsistently. Therefore rename irq_set_default_host() to irq_set_default_domain(). Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-3-jirislaby@kernel.org --- arch/arc/kernel/intc-arcv2.c | 2 +- arch/arc/kernel/intc-compact.c | 2 +- arch/arm/mach-pxa/irq.c | 2 +- arch/mips/cavium-octeon/octeon-irq.c | 6 +++--- arch/mips/sgi-ip27/ip27-irq.c | 2 +- arch/mips/sgi-ip30/ip30-irq.c | 2 +- arch/nios2/kernel/irq.c | 2 +- arch/powerpc/platforms/44x/uic.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_pic.c | 2 +- arch/powerpc/platforms/amigaone/setup.c | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/embedded6xx/flipper-pic.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/powerpc/platforms/powermac/pic.c | 2 +- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/sysdev/ehv_pic.c | 2 +- arch/powerpc/sysdev/ipic.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- arch/powerpc/sysdev/xive/common.c | 2 +- arch/x86/kernel/apic/vector.c | 2 +- drivers/irqchip/irq-armada-370-xp.c | 2 +- drivers/irqchip/irq-clps711x.c | 2 +- drivers/irqchip/irq-imx-gpcv2.c | 2 +- drivers/irqchip/irq-pic32-evic.c | 2 +- drivers/irqchip/irq-xilinx-intc.c | 2 +- drivers/irqchip/irq-xtensa-mx.c | 2 +- drivers/irqchip/irq-xtensa-pic.c | 4 ++-- include/linux/irqdomain.h | 2 +- kernel/irq/irqdomain.c | 8 ++++---- 30 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index f324f0e3341a..fea29d9d18d6 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -178,7 +178,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent) * Needed for primary domain lookup to succeed * This is a primary irqchip, and can never have a parent */ - irq_set_default_host(root_domain); + irq_set_default_domain(root_domain); #ifdef CONFIG_SMP irq_create_mapping(root_domain, IPI_IRQ); diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 6885e422870e..1d2ff1c6a61b 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -121,7 +121,7 @@ init_onchip_IRQ(struct device_node *intc, struct device_node *parent) * Needed for primary domain lookup to succeed * This is a primary irqchip, and can never have a parent */ - irq_set_default_host(root_domain); + irq_set_default_domain(root_domain); return 0; } diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index a9ef71008147..d9cadd97748a 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -152,7 +152,7 @@ pxa_init_irq_common(struct device_node *node, int irq_nr, &pxa_irq_ops, NULL); if (!pxa_irq_domain) panic("Unable to add PXA IRQ domain\n"); - irq_set_default_host(pxa_irq_domain); + irq_set_default_domain(pxa_irq_domain); for (n = 0; n < irq_nr; n += 32) { void __iomem *base = irq_base(n >> 5); diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 8425a6b38aa2..e6b4d9c0c169 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1505,7 +1505,7 @@ static int __init octeon_irq_init_ciu( ciu_domain = irq_domain_add_tree( ciu_node, &octeon_irq_domain_ciu_ops, dd); - irq_set_default_host(ciu_domain); + irq_set_default_domain(ciu_domain); /* CIU_0 */ for (i = 0; i < 16; i++) { @@ -2076,7 +2076,7 @@ static int __init octeon_irq_init_ciu2( ciu_domain = irq_domain_add_tree( ciu_node, &octeon_irq_domain_ciu2_ops, NULL); - irq_set_default_host(ciu_domain); + irq_set_default_domain(ciu_domain); /* CUI2 */ for (i = 0; i < 64; i++) { @@ -2929,7 +2929,7 @@ static int __init octeon_irq_init_ciu3(struct device_node *ciu_node, /* Only do per CPU things if it is the CIU of the boot node. */ octeon_irq_ciu3_alloc_resources(ciu3_info); if (node == 0) - irq_set_default_host(domain); + irq_set_default_domain(domain); octeon_irq_use_ip4 = false; /* Enable the CIU lines */ diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 00e63e9ef61d..288d4d17eddd 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -297,7 +297,7 @@ void __init arch_init_irq(void) if (WARN_ON(domain == NULL)) return; - irq_set_default_host(domain); + irq_set_default_domain(domain); irq_set_percpu_devid(IP27_HUB_PEND0_IRQ); irq_set_chained_handler_and_data(IP27_HUB_PEND0_IRQ, ip27_do_irq_mask0, diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c index 423c32cb66ed..9fb905e2cf14 100644 --- a/arch/mips/sgi-ip30/ip30-irq.c +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -313,7 +313,7 @@ void __init arch_init_irq(void) if (!domain) return; - irq_set_default_host(domain); + irq_set_default_domain(domain); irq_set_percpu_devid(IP30_HEART_L0_IRQ); irq_set_chained_handler_and_data(IP30_HEART_L0_IRQ, ip30_normal_irq, diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c index 6b7890e5f7af..8fa280660051 100644 --- a/arch/nios2/kernel/irq.c +++ b/arch/nios2/kernel/irq.c @@ -72,7 +72,7 @@ void __init init_IRQ(void) domain = irq_domain_add_linear(node, NIOS2_CPU_NR_IRQS, &irq_ops, NULL); BUG_ON(!domain); - irq_set_default_host(domain); + irq_set_default_domain(domain); of_node_put(node); /* Load the initial ienable value */ ienable = RDCTL(CTL_IENABLE); diff --git a/arch/powerpc/platforms/44x/uic.c b/arch/powerpc/platforms/44x/uic.c index 8b03ae4cb3f6..31f760c2ec5d 100644 --- a/arch/powerpc/platforms/44x/uic.c +++ b/arch/powerpc/platforms/44x/uic.c @@ -291,7 +291,7 @@ void __init uic_init_tree(void) if (!primary_uic) panic("Unable to initialize primary UIC %pOF\n", np); - irq_set_default_host(primary_uic->irqhost); + irq_set_default_domain(primary_uic->irqhost); of_node_put(np); /* The scan again for cascaded UICs */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 1e0a5e9644dc..43c881d31ca6 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -453,7 +453,7 @@ void __init mpc52xx_init_irq(void) if (!mpc52xx_irqhost) panic(__FILE__ ": Cannot allocate the IRQ host\n"); - irq_set_default_host(mpc52xx_irqhost); + irq_set_default_domain(mpc52xx_irqhost); pr_info("MPC52xx PIC is up and running!\n"); } diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 2c8dc0886912..33f852a7625f 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -109,7 +109,7 @@ static void __init amigaone_init_IRQ(void) i8259_init(pic, int_ack); ppc_md.get_irq = i8259_irq; - irq_set_default_host(i8259_get_host()); + irq_set_default_domain(i8259_get_host()); } static int __init request_isa_regions(void) diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 36ee3a5056a1..c1bfa4c3444c 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -486,7 +486,7 @@ static void __init chrp_find_8259(void) i8259_init(pic, chrp_int_ack); if (ppc_md.get_irq == NULL) { ppc_md.get_irq = i8259_irq; - irq_set_default_host(i8259_get_host()); + irq_set_default_domain(i8259_get_host()); } if (chrp_mpic != NULL) { cascade_irq = irq_of_parse_and_map(pic, 0); diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 4d9200bdba78..013d66304c31 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -190,7 +190,7 @@ void __init flipper_pic_probe(void) flipper_irq_host = flipper_pic_init(np); BUG_ON(!flipper_irq_host); - irq_set_default_host(flipper_irq_host); + irq_set_default_domain(flipper_irq_host); of_node_put(np); } diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 0761d98e5be3..d03b41336901 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -228,7 +228,7 @@ static void __init nemo_init_IRQ(struct mpic *mpic) irq_set_chained_handler(gpio_virq, sb600_8259_cascade); mpic_unmask_irq(irq_get_irq_data(gpio_virq)); - irq_set_default_host(mpic->irqhost); + irq_set_default_domain(mpic->irqhost); } #else diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 2202bf77c7a3..03a7c51f2645 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -330,7 +330,7 @@ static void __init pmac_pic_probe_oldstyle(void) pmac_pic_host = irq_domain_add_linear(master, max_irqs, &pmac_pic_host_ops, NULL); BUG_ON(pmac_pic_host == NULL); - irq_set_default_host(pmac_pic_host); + irq_set_default_domain(pmac_pic_host); /* Get addresses of first controller if we have a node for it */ BUG_ON(of_address_to_resource(master, 0, &r)); diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index af3fe9f04f24..95e96bd61a20 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -744,7 +744,7 @@ void __init ps3_init_IRQ(void) struct irq_domain *host; host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL); - irq_set_default_host(host); + irq_set_default_domain(host); for_each_possible_cpu(cpu) { struct ps3_private *pd = &per_cpu(ps3_private, cpu); diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 040827671d21..fb502b72fca1 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -291,5 +291,5 @@ void __init ehv_pic_init(void) ehv_pic->coreint_flag = of_property_read_bool(np, "has-external-proxy"); global_ehv_pic = ehv_pic; - irq_set_default_host(global_ehv_pic->irqhost); + irq_set_default_domain(global_ehv_pic->irqhost); } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 037b04bf9a9f..a35be0232978 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -757,7 +757,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) ipic_write(ipic->regs, IPIC_SEMSR, temp); primary_ipic = ipic; - irq_set_default_host(primary_ipic->irqhost); + irq_set_default_domain(primary_ipic->irqhost); ipic_write(ipic->regs, IPIC_SIMSR_H, 0); ipic_write(ipic->regs, IPIC_SIMSR_L, 0); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index d94cf36b0f65..4afbab83a2e2 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1520,7 +1520,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, if (!(mpic->flags & MPIC_SECONDARY)) { mpic_primary = mpic; - irq_set_default_host(mpic->irqhost); + irq_set_default_domain(mpic->irqhost); } return mpic; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index d3a4156e8788..c3fa539a9898 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -472,7 +472,7 @@ static int __init xics_allocate_domain(void) return -ENOMEM; } - irq_set_default_host(xics_host); + irq_set_default_domain(xics_host); return 0; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a6c388bdf5d0..dc2e61837396 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1467,7 +1467,7 @@ static void __init xive_init_host(struct device_node *np) xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; - irq_set_default_host(xive_irq_domain); + irq_set_default_domain(xive_irq_domain); } static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 72fa4bb78f0a..fee42a73d64a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -799,7 +799,7 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_set_default_host(x86_vector_domain); + irq_set_default_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index d7c5ef248474..6218e5d20b50 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -880,7 +880,7 @@ static int __init mpic_of_init(struct device_node *node, struct device_node *par } if (mpic_is_ipi_available(mpic)) { - irq_set_default_host(mpic->domain); + irq_set_default_domain(mpic->domain); set_handle_irq(mpic_handle_irq); #ifdef CONFIG_SMP err = mpic_ipi_init(mpic, node); diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c index 806ebb1de201..48c73c948ddf 100644 --- a/drivers/irqchip/irq-clps711x.c +++ b/drivers/irqchip/irq-clps711x.c @@ -191,7 +191,7 @@ static int __init _clps711x_intc_init(struct device_node *np, goto out_irqfree; } - irq_set_default_host(clps711x_intc->domain); + irq_set_default_domain(clps711x_intc->domain); set_handle_irq(clps711x_irqh); #ifdef CONFIG_FIQ diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 8a0e82067924..095ae8e3217e 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -247,7 +247,7 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node, kfree(cd); return -ENOMEM; } - irq_set_default_host(domain); + irq_set_default_domain(domain); /* Initially mask all interrupts */ for (i = 0; i < IMR_NUM; i++) { diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c index eb6ca516a166..b546b1036e12 100644 --- a/drivers/irqchip/irq-pic32-evic.c +++ b/drivers/irqchip/irq-pic32-evic.c @@ -291,7 +291,7 @@ static int __init pic32_of_init(struct device_node *node, gc->private = &priv[i]; } - irq_set_default_host(evic_irq_domain); + irq_set_default_domain(evic_irq_domain); /* * External interrupts have software configurable edge polarity. These diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 7e08714d507f..38727e9cc713 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -233,7 +233,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, } } else { primary_intc = irqc; - irq_set_default_host(primary_intc->root_domain); + irq_set_default_domain(primary_intc->root_domain); set_handle_irq(xil_intc_handle_irq); } diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 7f314e58f3ce..9b441d180299 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -156,7 +156,7 @@ static void __init xtensa_mx_init_common(struct irq_domain *root_domain) { unsigned int i; - irq_set_default_host(root_domain); + irq_set_default_domain(root_domain); secondary_init_irq(); /* Initialize default IRQ routing to CPU 0 */ diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c index f9d6fce4da33..9be7b7c5cd23 100644 --- a/drivers/irqchip/irq-xtensa-pic.c +++ b/drivers/irqchip/irq-xtensa-pic.c @@ -87,7 +87,7 @@ int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent) struct irq_domain *root_domain = irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0, &xtensa_irq_domain_ops, &xtensa_irq_chip); - irq_set_default_host(root_domain); + irq_set_default_domain(root_domain); return 0; } @@ -97,7 +97,7 @@ static int __init xtensa_pic_init(struct device_node *np, struct irq_domain *root_domain = irq_domain_add_linear(np, NR_IRQS, &xtensa_irq_domain_ops, &xtensa_irq_chip); - irq_set_default_host(root_domain); + irq_set_default_domain(root_domain); return 0; } IRQCHIP_DECLARE(xtensa_irq_chip, "cdns,xtensa-pic", xtensa_pic_init); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33ff41eef8f7..4b5c495b5710 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -352,7 +352,7 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, void *host_data); struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -void irq_set_default_host(struct irq_domain *host); +void irq_set_default_domain(struct irq_domain *domain); struct irq_domain *irq_get_default_host(void); int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 2861f89880af..480fdc9e769e 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -398,7 +398,7 @@ void irq_domain_remove(struct irq_domain *domain) * If the going away domain is the default one, reset it. */ if (unlikely(irq_default_domain == domain)) - irq_set_default_host(NULL); + irq_set_default_domain(NULL); mutex_unlock(&irq_domain_mutex); @@ -573,7 +573,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); /** - * irq_set_default_host() - Set a "default" irq domain + * irq_set_default_domain() - Set a "default" irq domain * @domain: default domain pointer * * For convenience, it's possible to set a "default" domain that will be used @@ -581,13 +581,13 @@ EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); * platforms that want to manipulate a few hard coded interrupt numbers that * aren't properly represented in the device-tree. */ -void irq_set_default_host(struct irq_domain *domain) +void irq_set_default_domain(struct irq_domain *domain) { pr_debug("Default domain set to @0x%p\n", domain); irq_default_domain = domain; } -EXPORT_SYMBOL_GPL(irq_set_default_host); +EXPORT_SYMBOL_GPL(irq_set_default_domain); /** * irq_get_default_host() - Retrieve the "default" irq domain From 0a27ea384c82e70d16e40adbaebeb3725f7e6342 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:56 +0100 Subject: [PATCH 2127/2157] irqdomain: Rename irq_get_default_host() to irq_get_default_domain() Naming interrupt domains host is confusing at best and the irqdomain code uses both domain and host inconsistently. Therefore rename irq_get_default_host() to irq_get_default_domain(). Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-4-jirislaby@kernel.org --- arch/mips/pci/pci-xtalk-bridge.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/kvm/book3s_xive.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- arch/powerpc/platforms/pseries/msi.c | 2 +- drivers/irqchip/irq-armada-370-xp.c | 4 ++-- include/linux/irqdomain.h | 2 +- kernel/irq/irqdomain.c | 6 +++--- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index dae856fb3e5b..e00c38620d14 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -620,7 +620,7 @@ static int bridge_probe(struct platform_device *pdev) if (bridge_get_partnum(virt_to_phys((void *)bd->bridge_addr), partnum)) return -EPROBE_DEFER; /* not available yet */ - parent = irq_get_default_host(); + parent = irq_get_default_domain(); if (!parent) return -ENODEV; fn = irq_domain_alloc_named_fwnode("BRIDGE"); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 86bff159c51e..19f4d298dd17 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6041,7 +6041,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * the underlying calls, which will EOI the interrupt in real * mode, need an HW IRQ number mapped in the XICS IRQ domain. */ - host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq); + host_data = irq_domain_get_irq_data(irq_get_default_domain(), host_irq); irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data); if (i == pimap->n_mapped) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1362c672387e..1302b5ac5672 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1555,7 +1555,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; struct irq_data *host_data = - irq_domain_get_irq_data(irq_get_default_host(), host_irq); + irq_domain_get_irq_data(irq_get_default_domain(), host_irq); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data); u16 idx; u8 prio; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d2a8e0287811..ae4b549b5ca0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1881,7 +1881,7 @@ static const struct irq_domain_ops pnv_irq_domain_ops = { static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) { struct pnv_phb *phb = hose->private_data; - struct irq_domain *parent = irq_get_default_host(); + struct irq_domain *parent = irq_get_default_domain(); hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id); if (!hose->fwnode) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index fdc2f7f38dc9..f9d80111c322 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -611,7 +611,7 @@ static const struct irq_domain_ops pseries_irq_domain_ops = { static int __pseries_msi_allocate_domains(struct pci_controller *phb, unsigned int count) { - struct irq_domain *parent = irq_get_default_host(); + struct irq_domain *parent = irq_get_default_domain(); phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI", phb->global_number); diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 6218e5d20b50..2aa6a51e05d0 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -564,7 +564,7 @@ static void mpic_reenable_percpu(struct mpic *mpic) static int mpic_starting_cpu(unsigned int cpu) { - struct mpic *mpic = irq_get_default_host()->host_data; + struct mpic *mpic = irq_get_default_domain()->host_data; mpic_perf_init(mpic); mpic_smp_cpu_init(mpic); @@ -700,7 +700,7 @@ static void mpic_handle_cascade_irq(struct irq_desc *desc) static void __exception_irq_entry mpic_handle_irq(struct pt_regs *regs) { - struct mpic *mpic = irq_get_default_host()->host_data; + struct mpic *mpic = irq_get_default_domain()->host_data; irq_hw_number_t i; u32 irqstat; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 4b5c495b5710..e9ab95fbc5a9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -353,7 +353,7 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); void irq_set_default_domain(struct irq_domain *domain); -struct irq_domain *irq_get_default_host(void); +struct irq_domain *irq_get_default_domain(void); int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 480fdc9e769e..9d5c8651492d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -590,7 +590,7 @@ void irq_set_default_domain(struct irq_domain *domain) EXPORT_SYMBOL_GPL(irq_set_default_domain); /** - * irq_get_default_host() - Retrieve the "default" irq domain + * irq_get_default_domain() - Retrieve the "default" irq domain * * Returns: the default domain, if any. * @@ -598,11 +598,11 @@ EXPORT_SYMBOL_GPL(irq_set_default_domain); * systems that cannot implement a firmware->fwnode mapping (which * both DT and ACPI provide). */ -struct irq_domain *irq_get_default_host(void) +struct irq_domain *irq_get_default_domain(void) { return irq_default_domain; } -EXPORT_SYMBOL_GPL(irq_get_default_host); +EXPORT_SYMBOL_GPL(irq_get_default_domain); static bool irq_domain_is_nomap(struct irq_domain *domain) { From d2705d33885e3a19f727dff2521fb7d5b1fc5cda Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:57 +0100 Subject: [PATCH 2128/2157] irqdomain: Stop using 'host' for domain It is confusing to see 'host' and 'domain' to be used as 'domain'. Given this header is all about domains, switch the remaining 'host' uses to 'domain'. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-5-jirislaby@kernel.org --- include/linux/irqdomain.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e9ab95fbc5a9..bb7111105296 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -72,7 +72,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a host, returns + * @match: Match an interrupt controller device node to a domain, returns * 1 on a match * @select: Match an interrupt controller fw specification. It is more generic * than @match as it receives a complete struct irq_fwspec. Therefore, @@ -454,7 +454,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod return IS_ERR(d) ? NULL : d; } -unsigned int irq_create_direct_mapping(struct irq_domain *host); +unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, @@ -507,7 +507,7 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw return IS_ERR(d) ? NULL : d; } -void irq_domain_remove(struct irq_domain *host); +void irq_domain_remove(struct irq_domain *domain); int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); @@ -515,16 +515,16 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -unsigned int irq_create_mapping_affinity(struct irq_domain *host, +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); -static inline unsigned int irq_create_mapping(struct irq_domain *host, +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - return irq_create_mapping_affinity(host, hwirq, NULL); + return irq_create_mapping_affinity(domain, hwirq, NULL); } struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, From 94f68c0f99a548d33a102672690100bf76a7c460 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Apr 2025 07:56:36 -0700 Subject: [PATCH 2129/2157] selftests: net: amt: indicate progress in the stress test Our CI expects output from the test at least once every 10 minutes. The AMT test when running on debug kernel is just on the edge of that time for the stress test. Improve the output: - print the name of the test first, before starting it, - output a dot every 10% of the way. Output after: TEST: amt discovery [ OK ] TEST: IPv4 amt multicast forwarding [ OK ] TEST: IPv6 amt multicast forwarding [ OK ] TEST: IPv4 amt traffic forwarding torture .......... [ OK ] TEST: IPv6 amt traffic forwarding torture .......... [ OK ] Reviewed-by: Taehee Yoo Link: https://patch.msgid.link/20250403145636.2891166-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/amt.sh | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index d458b45c775b..3ef209cacb8e 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -194,15 +194,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -278,10 +284,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 From 9b305678c55dd45044aa565fee04f8d88382bc4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 Apr 2025 16:51:19 +0200 Subject: [PATCH 2130/2157] genirq/migration: Use irqd_get_parent_data() in irq_force_complete_move() Frank reported, that the common irq_force_complete_move() breaks the out of tree build of ia64. The reason is that ia64 uses the migration code, but does not have hierarchical interrupt domains enabled. This went unnoticed in mainline as both x86 and RISC-V have hierarchical domains enabled. Not that it matters for mainline, but it's still inconsistent. Use irqd_get_parent_data() instead of accessing the parent_data field directly. The helper returns NULL when hierarchical domains are disabled otherwise it accesses the parent_data field of the domain. No functional change. Fixes: 751dc837dabd ("genirq: Introduce common irq_force_complete_move() implementation") Reported-by: Frank Scheiner Signed-off-by: Thomas Gleixner Tested-by: Frank Scheiner Link: https://lore.kernel.org/all/87h634ugig.ffs@tglx --- kernel/irq/migration.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 147cabb4c077..f2b2929986ff 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -37,7 +37,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) void irq_force_complete_move(struct irq_desc *desc) { - for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) { + for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = irqd_get_parent_data(d)) { if (d->chip && d->chip->irq_force_complete_move) { d->chip->irq_force_complete_move(d); return; From 324a2219ba38b00ab0e53bd535782771ba9614b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 Apr 2025 17:10:52 +0200 Subject: [PATCH 2131/2157] Revert "timekeeping: Fix possible inconsistencies in _COARSE clockids" This reverts commit 757b000f7b936edf79311ab0971fe465bbda75ea. Miroslav reported that the changes for handling the inconsistencies in the coarse time getters result in a regression on the adjtimex() side. There are two issues: 1) The forwarding of the base time moves the update out of the original period and establishes a new one. 2) The clearing of the accumulated NTP error is changing the behaviour as well. Userspace expects that multiplier/frequency updates are in effect, when the syscall returns, so delaying the update to the next tick is not solving the problem either. Revert the change, so that the established expectations of user space implementations (ntpd, chronyd) are restored. The re-introduced inconsistency of the coarse time getters will be addressed in a subsequent fix. Fixes: 757b000f7b93 ("timekeeping: Fix possible inconsistencies in _COARSE clockids") Reported-by: Miroslav Lichvar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/Z-qsg6iDGlcIJulJ@localhost --- kernel/time/timekeeping.c | 94 +++++++++++---------------------------- 1 file changed, 25 insertions(+), 69 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 929846b8b45a..1e67d076f195 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -682,19 +682,20 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act } /** - * timekeeping_forward - update clock to given cycle now value + * timekeeping_forward_now - update clock to the current time * @tk: Pointer to the timekeeper to update - * @cycle_now: Current clocksource read value * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now) +static void timekeeping_forward_now(struct timekeeper *tk) { - u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, - tk->tkr_mono.clock->max_raw_delta); + u64 cycle_now, delta; + cycle_now = tk_clock_read(&tk->tkr_mono); + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, + tk->tkr_mono.clock->max_raw_delta); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; @@ -709,21 +710,6 @@ static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now) } } -/** - * timekeeping_forward_now - update clock to the current time - * @tk: Pointer to the timekeeper to update - * - * Forward the current clock to update its state since the last call to - * update_wall_time(). This is useful before significant clock changes, - * as it avoids having to deal with this time offset explicitly. - */ -static void timekeeping_forward_now(struct timekeeper *tk) -{ - u64 cycle_now = tk_clock_read(&tk->tkr_mono); - - timekeeping_forward(tk, cycle_now); -} - /** * ktime_get_real_ts64 - Returns the time of day in a timespec64. * @ts: pointer to the timespec to be set @@ -2165,54 +2151,6 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, return offset; } -static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset, - enum timekeeping_adv_mode mode, - unsigned int *clock_set) -{ - int shift = 0, maxshift; - - /* - * TK_ADV_FREQ indicates that adjtimex(2) directly set the - * frequency or the tick length. - * - * Accumulate the offset, so that the new multiplier starts from - * now. This is required as otherwise for offsets, which are - * smaller than tk::cycle_interval, timekeeping_adjust() could set - * xtime_nsec backwards, which subsequently causes time going - * backwards in the coarse time getters. But even for the case - * where offset is greater than tk::cycle_interval the periodic - * accumulation does not have much value. - * - * Also reset tk::ntp_error as it does not make sense to keep the - * old accumulated error around in this case. - */ - if (mode == TK_ADV_FREQ) { - timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset); - tk->ntp_error = 0; - return 0; - } - - /* - * With NO_HZ we may have to accumulate many cycle_intervals - * (think "ticks") worth of time at once. To do this efficiently, - * we calculate the largest doubling multiple of cycle_intervals - * that is smaller than the offset. We then accumulate that - * chunk in one go, and then try to consume the next smaller - * doubled multiple. - */ - shift = ilog2(offset) - ilog2(tk->cycle_interval); - shift = max(0, shift); - /* Bound shift to one less than what overflows tick_length */ - maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1; - shift = min(shift, maxshift); - while (offset >= tk->cycle_interval) { - offset = logarithmic_accumulation(tk, offset, shift, clock_set); - if (offset < tk->cycle_interval << shift) - shift--; - } - return offset; -} - /* * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length @@ -2222,6 +2160,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) struct timekeeper *tk = &tk_core.shadow_timekeeper; struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; + int shift = 0, maxshift; u64 offset; guard(raw_spinlock_irqsave)(&tk_core.lock); @@ -2238,7 +2177,24 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; - offset = timekeeping_accumulate(tk, offset, mode, &clock_set); + /* + * With NO_HZ we may have to accumulate many cycle_intervals + * (think "ticks") worth of time at once. To do this efficiently, + * we calculate the largest doubling multiple of cycle_intervals + * that is smaller than the offset. We then accumulate that + * chunk in one go, and then try to consume the next smaller + * doubled multiple. + */ + shift = ilog2(offset) - ilog2(tk->cycle_interval); + shift = max(0, shift); + /* Bound shift to one less than what overflows tick_length */ + maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; + shift = min(shift, maxshift); + while (offset >= tk->cycle_interval) { + offset = logarithmic_accumulation(tk, offset, shift, &clock_set); + if (offset < tk->cycle_interval< Date: Sat, 5 Apr 2025 10:17:26 +0200 Subject: [PATCH 2132/2157] treewide: Switch/rename to timer_delete[_sync]() timer_delete[_sync]() replaces del_timer[_sync](). Convert the whole tree over and remove the historical wrapper inlines. Conversion was done with coccinelle plus manual fixups where necessary. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/alpha/kernel/srmcons.c | 2 +- arch/arm/mach-footbridge/dc21285.c | 2 +- arch/arm/mach-pxa/sharpsl_pm.c | 4 +- arch/m68k/amiga/amisound.c | 2 +- arch/m68k/mac/macboing.c | 4 +- arch/mips/sgi-ip22/ip22-reset.c | 2 +- arch/powerpc/kvm/booke.c | 4 +- arch/powerpc/platforms/cell/spufs/sched.c | 6 +-- arch/powerpc/platforms/powermac/low_i2c.c | 2 +- arch/s390/kernel/time.c | 2 +- arch/s390/mm/cmm.c | 6 +-- arch/sh/drivers/pci/common.c | 4 +- arch/sparc/kernel/led.c | 4 +- arch/um/drivers/vector_kern.c | 2 +- arch/x86/kernel/cpu/mce/core.c | 6 +-- arch/x86/kvm/xen.c | 4 +- arch/xtensa/platforms/iss/console.c | 2 +- arch/xtensa/platforms/iss/network.c | 2 +- block/blk-core.c | 2 +- block/blk-stat.c | 2 +- block/blk-stat.h | 2 +- block/blk-throttle.c | 4 +- drivers/accel/qaic/qaic_timesync.c | 2 +- drivers/accessibility/speakup/main.c | 18 ++++---- drivers/accessibility/speakup/synth.c | 2 +- drivers/ata/libata-eh.c | 2 +- drivers/atm/idt77105.c | 4 +- drivers/atm/iphase.c | 2 +- drivers/atm/lanai.c | 2 +- drivers/atm/nicstar.c | 2 +- drivers/atm/suni.c | 2 +- drivers/auxdisplay/line-display.c | 8 ++-- drivers/auxdisplay/panel.c | 4 +- drivers/base/devcoredump.c | 2 +- drivers/base/power/main.c | 2 +- drivers/base/power/wakeup.c | 6 +-- drivers/block/amiflop.c | 10 ++--- drivers/block/aoe/aoedev.c | 2 +- drivers/block/aoe/aoemain.c | 2 +- drivers/block/ataflop.c | 12 ++--- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/floppy.c | 8 ++-- drivers/block/sunvdc.c | 2 +- drivers/block/swim3.c | 10 ++--- drivers/bluetooth/bluecard_cs.c | 4 +- drivers/bluetooth/hci_bcsp.c | 2 +- drivers/bluetooth/hci_h5.c | 6 +-- drivers/bluetooth/hci_qca.c | 10 ++--- drivers/bus/mhi/host/pci_generic.c | 8 ++-- drivers/char/dtlk.c | 6 +-- drivers/char/hangcheck-timer.c | 2 +- drivers/char/hw_random/xgene-rng.c | 2 +- drivers/char/ipmi/bt-bmc.c | 2 +- drivers/char/ipmi/ipmi_msghandler.c | 2 +- drivers/char/ipmi/ipmi_si_intf.c | 4 +- drivers/char/ipmi/ipmi_ssif.c | 6 +-- drivers/char/ipmi/kcs_bmc_aspeed.c | 4 +- drivers/char/ipmi/ssif_bmc.c | 2 +- drivers/char/random.c | 2 +- drivers/char/tlclk.c | 4 +- drivers/char/tpm/tpm-dev-common.c | 4 +- drivers/comedi/drivers/comedi_test.c | 12 ++--- drivers/comedi/drivers/das16.c | 4 +- drivers/comedi/drivers/jr3_pci.c | 2 +- drivers/cpufreq/powernv-cpufreq.c | 4 +- drivers/crypto/axis/artpec6_crypto.c | 4 +- drivers/dma-buf/st-dma-fence.c | 2 +- drivers/dma/imx-dma.c | 4 +- drivers/dma/ioat/dma.c | 2 +- drivers/dma/ioat/init.c | 4 +- drivers/firewire/core-transaction.c | 2 +- drivers/firmware/psci/psci_checker.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 2 +- drivers/gpu/drm/bridge/tda998x_drv.c | 2 +- drivers/gpu/drm/drm_vblank.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 2 +- drivers/gpu/drm/gud/gud_pipe.c | 2 +- .../drm/i915/gt/intel_execlists_submission.c | 6 +-- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 4 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 2 +- drivers/gpu/drm/i915/i915_utils.c | 2 +- drivers/gpu/drm/i915/intel_wakeref.c | 2 +- drivers/gpu/drm/i915/selftests/lib_sw_fence.c | 2 +- drivers/gpu/drm/mediatek/mtk_dp.c | 2 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 +- drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +- drivers/gpu/drm/omapdrm/dss/dsi.c | 2 +- drivers/gpu/drm/vc4/vc4_bo.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/greybus/operation.c | 2 +- drivers/hid/hid-apple.c | 4 +- drivers/hid/hid-appleir.c | 2 +- drivers/hid/hid-appletb-kbd.c | 2 +- drivers/hid/hid-magicmouse.c | 4 +- drivers/hid/hid-multitouch.c | 4 +- drivers/hid/hid-nvidia-shield.c | 2 +- drivers/hid/hid-prodikeys.c | 2 +- drivers/hid/hid-sony.c | 2 +- drivers/hid/hid-uclogic-core.c | 2 +- drivers/hid/hid-wiimote-core.c | 2 +- drivers/hid/usbhid/hid-core.c | 4 +- drivers/hid/wacom_sys.c | 2 +- drivers/hsi/clients/ssi_protocol.c | 18 ++++---- drivers/hte/hte-tegra194-test.c | 2 +- drivers/hwmon/pwm-fan.c | 2 +- drivers/i2c/busses/i2c-img-scb.c | 2 +- drivers/iio/common/ssp_sensors/ssp_dev.c | 4 +- drivers/infiniband/hw/cxgb4/cm.c | 2 +- drivers/infiniband/hw/hfi1/aspm.c | 2 +- drivers/infiniband/hw/hfi1/chip.c | 4 +- drivers/infiniband/hw/hfi1/driver.c | 2 +- drivers/infiniband/hw/hfi1/init.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 2 +- drivers/infiniband/hw/hfi1/tid_rdma.c | 8 ++-- drivers/infiniband/hw/hfi1/verbs.c | 2 +- drivers/infiniband/hw/irdma/cm.c | 2 +- drivers/infiniband/hw/irdma/utils.c | 4 +- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/mthca/mthca_catas.c | 2 +- drivers/infiniband/hw/qib/qib_driver.c | 2 +- drivers/infiniband/hw/qib/qib_iba7220.c | 4 +- drivers/infiniband/hw/qib/qib_iba7322.c | 4 +- drivers/infiniband/hw/qib/qib_init.c | 10 ++--- drivers/infiniband/hw/qib/qib_mad.c | 2 +- drivers/infiniband/hw/qib/qib_sd7220.c | 2 +- drivers/infiniband/hw/qib/qib_verbs.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 8 ++-- drivers/infiniband/sw/rxe/rxe_qp.c | 4 +- drivers/input/ff-memless.c | 4 +- drivers/input/gameport/gameport.c | 4 +- drivers/input/input.c | 4 +- drivers/input/joystick/db9.c | 2 +- drivers/input/joystick/gamecon.c | 2 +- drivers/input/joystick/n64joy.c | 2 +- drivers/input/joystick/turbografx.c | 2 +- drivers/input/keyboard/imx_keypad.c | 2 +- drivers/input/keyboard/snvs_pwrkey.c | 2 +- drivers/input/keyboard/tegra-kbc.c | 4 +- drivers/input/mouse/alps.c | 2 +- drivers/input/mouse/byd.c | 2 +- drivers/input/serio/hil_mlc.c | 2 +- drivers/input/serio/hp_sdc.c | 2 +- drivers/input/touchscreen/ad7877.c | 2 +- drivers/input/touchscreen/ad7879.c | 2 +- drivers/input/touchscreen/bu21029_ts.c | 2 +- drivers/input/touchscreen/exc3000.c | 2 +- drivers/input/touchscreen/sx8654.c | 2 +- drivers/input/touchscreen/tsc200x-core.c | 4 +- drivers/iommu/dma-iommu.c | 2 +- drivers/isdn/hardware/mISDN/hfcmulti.c | 6 +-- drivers/isdn/hardware/mISDN/hfcpci.c | 14 +++--- drivers/isdn/hardware/mISDN/mISDNipac.c | 10 ++--- drivers/isdn/hardware/mISDN/mISDNisar.c | 6 +-- drivers/isdn/hardware/mISDN/w6692.c | 8 ++-- drivers/isdn/mISDN/dsp_core.c | 6 +-- drivers/isdn/mISDN/dsp_tones.c | 4 +- drivers/isdn/mISDN/fsm.c | 4 +- drivers/leds/flash/leds-rt8515.c | 4 +- drivers/leds/flash/leds-sgm3140.c | 6 +-- drivers/leds/led-core.c | 4 +- drivers/leds/trigger/ledtrig-pattern.c | 2 +- drivers/leds/trigger/ledtrig-transient.c | 2 +- drivers/macintosh/adbhid.c | 2 +- drivers/mailbox/mailbox-altera.c | 2 +- drivers/md/bcache/stats.c | 2 +- drivers/md/dm-integrity.c | 4 +- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-vdo/dedupe.c | 2 +- drivers/md/dm-writecache.c | 6 +-- drivers/md/md.c | 4 +- drivers/media/common/saa7146/saa7146_fops.c | 2 +- drivers/media/common/saa7146/saa7146_vbi.c | 4 +- drivers/media/common/saa7146/saa7146_video.c | 2 +- drivers/media/dvb-core/dmxdev.c | 6 +-- drivers/media/i2c/tc358743.c | 4 +- drivers/media/i2c/tvaudio.c | 4 +- drivers/media/pci/bt8xx/bttv-driver.c | 2 +- drivers/media/pci/bt8xx/bttv-input.c | 4 +- drivers/media/pci/bt8xx/bttv-risc.c | 2 +- drivers/media/pci/ivtv/ivtv-irq.c | 6 +-- drivers/media/pci/ivtv/ivtv-streams.c | 4 +- .../pci/netup_unidvb/netup_unidvb_core.c | 2 +- drivers/media/pci/saa7134/saa7134-core.c | 10 ++--- drivers/media/pci/saa7134/saa7134-input.c | 2 +- drivers/media/pci/saa7134/saa7134-ts.c | 2 +- drivers/media/pci/saa7134/saa7134-vbi.c | 2 +- drivers/media/pci/saa7134/saa7134-video.c | 2 +- drivers/media/pci/tw686x/tw686x-core.c | 2 +- .../media/platform/samsung/s5p-mfc/s5p_mfc.c | 6 +-- .../st/sti/c8sectpfe/c8sectpfe-core.c | 2 +- drivers/media/radio/radio-cadet.c | 2 +- drivers/media/rc/ene_ir.c | 2 +- drivers/media/rc/igorplugusb.c | 4 +- drivers/media/rc/img-ir/img-ir-hw.c | 4 +- drivers/media/rc/img-ir/img-ir-raw.c | 2 +- drivers/media/rc/imon.c | 2 +- drivers/media/rc/ir-mce_kbd-decoder.c | 4 +- drivers/media/rc/rc-ir-raw.c | 2 +- drivers/media/rc/rc-main.c | 6 +-- drivers/media/rc/serial_ir.c | 2 +- drivers/media/usb/au0828/au0828-dvb.c | 4 +- drivers/media/usb/au0828/au0828-video.c | 12 ++--- drivers/media/usb/pvrusb2/pvrusb2-encoder.c | 2 +- drivers/media/usb/pvrusb2/pvrusb2-hdw.c | 16 +++---- drivers/memory/tegra/tegra210-emc-core.c | 4 +- drivers/memstick/core/ms_block.c | 4 +- drivers/memstick/host/jmb38x_ms.c | 2 +- drivers/memstick/host/r592.c | 4 +- drivers/memstick/host/tifm_ms.c | 4 +- drivers/misc/bcm-vk/bcm_vk_tty.c | 4 +- drivers/misc/cardreader/rtsx_usb.c | 2 +- drivers/misc/sgi-xp/xpc_main.c | 2 +- drivers/misc/sgi-xp/xpc_partition.c | 2 +- drivers/mmc/core/host.c | 4 +- drivers/mmc/host/atmel-mci.c | 8 ++-- drivers/mmc/host/dw_mmc.c | 16 +++---- drivers/mmc/host/jz4740_mmc.c | 4 +- drivers/mmc/host/meson-mx-sdio.c | 4 +- drivers/mmc/host/mvsdio.c | 4 +- drivers/mmc/host/mxcmmc.c | 4 +- drivers/mmc/host/omap.c | 10 ++--- drivers/mmc/host/sdhci.c | 8 ++-- drivers/mmc/host/tifm_sd.c | 2 +- drivers/mmc/host/via-sdmmc.c | 4 +- drivers/mmc/host/vub300.c | 6 +-- drivers/mmc/host/wbsd.c | 2 +- drivers/most/most_usb.c | 4 +- drivers/mtd/sm_ftl.c | 4 +- drivers/net/arcnet/arcnet.c | 2 +- drivers/net/can/grcan.c | 12 ++--- drivers/net/can/kvaser_pciefd.c | 6 +-- drivers/net/can/sja1000/peak_pcmcia.c | 2 +- drivers/net/dsa/mv88e6xxx/phy.c | 4 +- drivers/net/dsa/sja1105/sja1105_ptp.c | 4 +- drivers/net/eql.c | 2 +- drivers/net/ethernet/3com/3c515.c | 2 +- drivers/net/ethernet/3com/3c574_cs.c | 2 +- drivers/net/ethernet/3com/3c589_cs.c | 2 +- drivers/net/ethernet/3com/3c59x.c | 2 +- drivers/net/ethernet/8390/axnet_cs.c | 2 +- drivers/net/ethernet/8390/pcnet_cs.c | 2 +- drivers/net/ethernet/agere/et131x.c | 2 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +-- drivers/net/ethernet/amd/a2065.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 4 +- drivers/net/ethernet/amd/declance.c | 2 +- drivers/net/ethernet/amd/pcnet32.c | 2 +- drivers/net/ethernet/amd/sunlance.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +- drivers/net/ethernet/apple/bmac.c | 6 +-- drivers/net/ethernet/apple/mace.c | 4 +- .../net/ethernet/aquantia/atlantic/aq_nic.c | 4 +- drivers/net/ethernet/atheros/ag71xx.c | 2 +- .../net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- .../net/ethernet/atheros/atl1e/atl1e_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 8 ++-- drivers/net/ethernet/broadcom/b44.c | 4 +- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 6 +-- drivers/net/ethernet/broadcom/bnx2.c | 10 ++--- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 26 +++++------ drivers/net/ethernet/brocade/bna/bnad.c | 16 +++---- .../net/ethernet/brocade/bna/bnad_ethtool.c | 2 +- drivers/net/ethernet/chelsio/cxgb/sge.c | 4 +- drivers/net/ethernet/chelsio/cxgb3/sge.c | 4 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 +- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 4 +- drivers/net/ethernet/cisco/enic/enic_clsf.h | 2 +- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- drivers/net/ethernet/dec/tulip/21142.c | 4 +- drivers/net/ethernet/dec/tulip/de2104x.c | 6 +-- drivers/net/ethernet/dec/tulip/dmfe.c | 2 +- drivers/net/ethernet/dec/tulip/interrupt.c | 4 +- drivers/net/ethernet/dec/tulip/pnic2.c | 6 +-- drivers/net/ethernet/dec/tulip/tulip_core.c | 4 +- drivers/net/ethernet/dec/tulip/uli526x.c | 2 +- drivers/net/ethernet/dec/tulip/winbond-840.c | 4 +- drivers/net/ethernet/dlink/dl2k.c | 4 +- drivers/net/ethernet/fealnx.c | 4 +- drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 4 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- .../hisilicon/hns3/hns3pf/hclge_main.c | 2 +- drivers/net/ethernet/intel/e100.c | 4 +- drivers/net/ethernet/intel/e1000e/netdev.c | 8 ++-- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- .../ethernet/intel/ice/ice_virtchnl_fdir.c | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 8 ++-- drivers/net/ethernet/intel/igbvf/netdev.c | 4 +- drivers/net/ethernet/intel/igc/igc_main.c | 8 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/korina.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 6 +-- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/marvell/skge.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/mellanox/mlx4/catas.c | 2 +- .../ethernet/mellanox/mlx5/core/fw_reset.c | 2 +- .../net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- .../net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/natsemi/natsemi.c | 4 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- .../ethernet/netronome/nfp/nfp_net_common.c | 2 +- drivers/net/ethernet/nvidia/forcedeth.c | 6 +-- .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 2 +- .../net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/pasemi/pasemi_mac.c | 2 +- .../ethernet/pensando/ionic/ionic_bus_pci.c | 2 +- drivers/net/ethernet/qlogic/qla3xxx.c | 2 +- drivers/net/ethernet/realtek/atp.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- .../net/ethernet/samsung/sxgbe/sxgbe_main.c | 6 +-- drivers/net/ethernet/seeq/ether3.c | 6 +-- drivers/net/ethernet/sfc/falcon/falcon.c | 2 +- drivers/net/ethernet/sfc/falcon/rx.c | 2 +- drivers/net/ethernet/sfc/mcdi.c | 4 +- drivers/net/ethernet/sfc/rx_common.c | 2 +- drivers/net/ethernet/sfc/siena/mcdi.c | 4 +- drivers/net/ethernet/sfc/siena/rx_common.c | 2 +- drivers/net/ethernet/sgi/ioc3-eth.c | 8 ++-- drivers/net/ethernet/sis/sis190.c | 4 +- drivers/net/ethernet/sis/sis900.c | 2 +- drivers/net/ethernet/smsc/epic100.c | 2 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +-- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/ldmvsw.c | 6 +-- drivers/net/ethernet/sun/niu.c | 6 +-- drivers/net/ethernet/sun/sunbmac.c | 2 +- drivers/net/ethernet/sun/sungem.c | 8 ++-- drivers/net/ethernet/sun/sunhme.c | 6 +-- drivers/net/ethernet/sun/sunvnet.c | 2 +- drivers/net/ethernet/sun/sunvnet_common.c | 6 +-- .../net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- drivers/net/ethernet/ti/cpsw_ale.c | 2 +- drivers/net/ethernet/ti/netcp_ethss.c | 2 +- drivers/net/ethernet/ti/tlan.c | 4 +- drivers/net/ethernet/tundra/tsi108_eth.c | 2 +- drivers/net/fddi/defza.c | 10 ++--- drivers/net/hamradio/6pack.c | 6 +-- drivers/net/hamradio/scc.c | 26 +++++------ drivers/net/hamradio/yam.c | 2 +- drivers/net/hippi/rrunner.c | 2 +- drivers/net/ntb_netdev.c | 2 +- drivers/net/phy/phylink.c | 4 +- drivers/net/slip/slip.c | 14 +++--- drivers/net/tun.c | 2 +- drivers/net/usb/catc.c | 2 +- drivers/net/usb/lan78xx.c | 6 +-- drivers/net/usb/sierra_net.c | 2 +- drivers/net/usb/usbnet.c | 6 +-- drivers/net/vxlan/vxlan_core.c | 2 +- drivers/net/wan/hdlc_cisco.c | 2 +- drivers/net/wan/hdlc_fr.c | 2 +- drivers/net/wan/hdlc_ppp.c | 2 +- drivers/net/wireguard/device.c | 2 +- drivers/net/wireguard/timers.c | 8 ++-- drivers/net/wireless/ath/ar5523/ar5523.c | 4 +- drivers/net/wireless/ath/ath10k/debug.c | 2 +- drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- drivers/net/wireless/ath/ath10k/pci.c | 4 +- drivers/net/wireless/ath/ath10k/sdio.c | 2 +- drivers/net/wireless/ath/ath10k/snoc.c | 2 +- drivers/net/wireless/ath/ath11k/ahb.c | 2 +- drivers/net/wireless/ath/ath11k/dp.c | 4 +- drivers/net/wireless/ath/ath11k/dp_rx.c | 8 ++-- drivers/net/wireless/ath/ath12k/dp.c | 2 +- drivers/net/wireless/ath/ath12k/dp_rx.c | 4 +- drivers/net/wireless/ath/ath6kl/cfg80211.c | 6 +-- drivers/net/wireless/ath/ath6kl/init.c | 2 +- drivers/net/wireless/ath/ath6kl/main.c | 2 +- drivers/net/wireless/ath/ath6kl/recovery.c | 4 +- drivers/net/wireless/ath/ath6kl/txrx.c | 2 +- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/ath/ath9k/gpio.c | 8 ++-- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 6 +-- drivers/net/wireless/ath/ath9k/init.c | 2 +- drivers/net/wireless/ath/ath9k/link.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 10 ++--- drivers/net/wireless/ath/ath9k/pci.c | 2 +- drivers/net/wireless/ath/wcn36xx/dxe.c | 4 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 2 +- drivers/net/wireless/ath/wil6210/main.c | 6 +-- drivers/net/wireless/ath/wil6210/netdev.c | 6 +-- drivers/net/wireless/ath/wil6210/p2p.c | 2 +- drivers/net/wireless/ath/wil6210/wmi.c | 6 +-- drivers/net/wireless/atmel/at76c50x-usb.c | 2 +- .../broadcom/brcm80211/brcmfmac/btcoex.c | 4 +- .../broadcom/brcm80211/brcmfmac/pcie.c | 2 +- .../broadcom/brcm80211/brcmfmac/sdio.c | 2 +- .../wireless/intel/ipw2x00/libipw_crypto.c | 2 +- .../net/wireless/intel/iwlegacy/3945-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/3945-rs.c | 2 +- .../net/wireless/intel/iwlegacy/4965-mac.c | 4 +- drivers/net/wireless/intel/iwlegacy/common.c | 2 +- .../net/wireless/intel/iwlwifi/dvm/debugfs.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/main.c | 4 +- drivers/net/wireless/intel/iwlwifi/dvm/tt.c | 10 ++--- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 6 +-- .../net/wireless/marvell/libertas/cmdresp.c | 2 +- .../net/wireless/marvell/libertas/if_usb.c | 2 +- drivers/net/wireless/marvell/libertas/main.c | 12 ++--- .../net/wireless/marvell/libertas_tf/cmd.c | 2 +- .../net/wireless/marvell/libertas_tf/if_usb.c | 2 +- .../net/wireless/marvell/libertas_tf/main.c | 4 +- .../wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- drivers/net/wireless/marvell/mwifiex/cmdevt.c | 2 +- drivers/net/wireless/marvell/mwifiex/init.c | 4 +- drivers/net/wireless/marvell/mwifiex/main.c | 2 +- drivers/net/wireless/marvell/mwifiex/pcie.c | 4 +- .../net/wireless/marvell/mwifiex/sta_event.c | 4 +- drivers/net/wireless/marvell/mwifiex/tdls.c | 2 +- drivers/net/wireless/marvell/mwifiex/usb.c | 6 +-- .../net/wireless/mediatek/mt76/mt7615/main.c | 4 +- .../wireless/mediatek/mt76/mt7615/pci_mac.c | 4 +- .../net/wireless/mediatek/mt76/mt7615/usb.c | 2 +- .../net/wireless/mediatek/mt76/mt7921/main.c | 6 +-- .../net/wireless/mediatek/mt76/mt7925/main.c | 4 +- .../net/wireless/mediatek/mt76/mt792x_core.c | 2 +- drivers/net/wireless/microchip/wilc1000/hif.c | 18 ++++---- drivers/net/wireless/purelifi/plfxlc/usb.c | 4 +- drivers/net/wireless/realtek/rtlwifi/base.c | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/sw.c | 4 +- drivers/net/wireless/rsi/rsi_91x_hal.c | 4 +- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 6 +-- drivers/net/wireless/st/cw1200/main.c | 2 +- drivers/net/wireless/st/cw1200/pm.c | 2 +- drivers/net/wireless/st/cw1200/queue.c | 2 +- drivers/net/wireless/st/cw1200/sta.c | 6 +-- drivers/net/wireless/ti/wlcore/main.c | 4 +- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/nfc/nfcmrvl/fw_dnld.c | 6 +-- drivers/nfc/pn533/pn533.c | 4 +- drivers/nfc/pn533/uart.c | 2 +- drivers/nfc/st-nci/ndlc.c | 12 ++--- drivers/nfc/st-nci/se.c | 10 ++--- drivers/nfc/st21nfca/core.c | 4 +- drivers/nfc/st21nfca/se.c | 6 +-- drivers/nvme/host/multipath.c | 4 +- drivers/parport/ieee1284.c | 2 +- drivers/pci/hotplug/cpqphp_ctrl.c | 2 +- drivers/pci/hotplug/shpchp_hpc.c | 2 +- drivers/pcmcia/i82365.c | 2 +- drivers/pcmcia/soc_common.c | 4 +- drivers/pcmcia/tcic.c | 2 +- drivers/platform/mellanox/mlxbf-tmfifo.c | 2 +- drivers/platform/x86/intel_ips.c | 2 +- drivers/platform/x86/sony-laptop.c | 2 +- drivers/pps/clients/pps-gpio.c | 2 +- drivers/pps/clients/pps-ktimer.c | 2 +- drivers/pps/generators/pps_gen-dummy.c | 4 +- drivers/ptp/ptp_ocp.c | 2 +- drivers/rtc/dev.c | 2 +- drivers/rtc/rtc-test.c | 4 +- drivers/s390/block/dasd.c | 8 ++-- drivers/s390/char/con3270.c | 4 +- drivers/s390/char/sclp.c | 12 ++--- drivers/s390/char/sclp_con.c | 2 +- drivers/s390/char/sclp_vt220.c | 4 +- drivers/s390/char/tape_core.c | 2 +- drivers/s390/char/tape_std.c | 2 +- drivers/s390/cio/device_fsm.c | 2 +- drivers/s390/cio/eadm_sch.c | 2 +- drivers/s390/crypto/ap_queue.c | 2 +- drivers/s390/net/fsm.c | 4 +- drivers/s390/net/qeth_core_main.c | 2 +- drivers/s390/scsi/zfcp_fsf.c | 4 +- drivers/s390/scsi/zfcp_qdio.c | 2 +- drivers/scsi/aic7xxx/aic79xx_core.c | 4 +- drivers/scsi/aic94xx/aic94xx_hwi.c | 2 +- drivers/scsi/aic94xx/aic94xx_init.c | 2 +- drivers/scsi/aic94xx/aic94xx_tmf.c | 6 +-- drivers/scsi/arcmsr/arcmsr_hba.c | 20 ++++----- drivers/scsi/arm/fas216.c | 6 +-- drivers/scsi/be2iscsi/be_main.c | 4 +- drivers/scsi/bfa/bfad.c | 10 ++--- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 +- drivers/scsi/bnx2fc/bnx2fc_tgt.c | 4 +- drivers/scsi/bnx2i/bnx2i_iscsi.c | 8 ++-- drivers/scsi/csiostor/csio_hw.c | 4 +- drivers/scsi/csiostor/csio_mb.c | 4 +- drivers/scsi/cxgbi/cxgb3i/cxgb3i.c | 2 +- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 2 +- drivers/scsi/dc395x.c | 12 ++--- drivers/scsi/elx/efct/efct_driver.c | 2 +- drivers/scsi/elx/efct/efct_xport.c | 2 +- drivers/scsi/elx/libefc/efc_fabric.c | 2 +- drivers/scsi/elx/libefc/efc_node.c | 2 +- drivers/scsi/esas2r/esas2r_init.c | 2 +- drivers/scsi/fcoe/fcoe.c | 2 +- drivers/scsi/fcoe/fcoe_ctlr.c | 4 +- drivers/scsi/fnic/fdls_disc.c | 12 ++--- drivers/scsi/fnic/fip.c | 12 ++--- drivers/scsi/fnic/fnic_main.c | 12 ++--- drivers/scsi/hisi_sas/hisi_sas_main.c | 6 +-- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 14 +++--- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 4 +- drivers/scsi/ibmvscsi/ibmvfc.c | 16 +++---- drivers/scsi/ibmvscsi/ibmvscsi.c | 6 +-- drivers/scsi/ipr.c | 12 ++--- drivers/scsi/isci/host.c | 12 ++--- drivers/scsi/isci/isci.h | 8 ++-- drivers/scsi/libfc/fc_fcp.c | 4 +- drivers/scsi/libiscsi.c | 6 +-- drivers/scsi/libsas/sas_expander.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 8 ++-- drivers/scsi/lpfc/lpfc_attr.c | 2 +- drivers/scsi/lpfc/lpfc_els.c | 4 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 8 ++-- drivers/scsi/lpfc/lpfc_init.c | 20 ++++----- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 10 ++--- drivers/scsi/megaraid/megaraid_mbox.c | 2 +- drivers/scsi/megaraid/megaraid_mm.c | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 10 ++--- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- drivers/scsi/mvsas/mv_sas.c | 2 +- drivers/scsi/pmcraid.c | 6 +-- drivers/scsi/qla1280.c | 2 +- drivers/scsi/qla2xxx/qla_init.c | 2 +- drivers/scsi/qla2xxx/qla_iocb.c | 4 +- drivers/scsi/qla2xxx/qla_mid.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla4xxx/ql4_os.c | 2 +- drivers/scsi/smartpqi/smartpqi_init.c | 2 +- drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +- .../gpib/agilent_82357a/agilent_82357a.c | 4 +- drivers/staging/gpib/common/gpib_os.c | 4 +- drivers/staging/gpib/common/iblib.c | 2 +- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 8 ++-- drivers/staging/media/imx/imx-ic-prpencvf.c | 2 +- drivers/staging/media/imx/imx-media-csi.c | 2 +- drivers/staging/rtl8723bs/core/rtw_cmd.c | 2 +- drivers/staging/rtl8723bs/core/rtw_mlme.c | 4 +- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 22 +++++----- drivers/staging/rtl8723bs/core/rtw_recv.c | 2 +- drivers/staging/rtl8723bs/core/rtw_sta_mgt.c | 6 +-- drivers/staging/rtl8723bs/hal/sdio_ops.c | 2 +- drivers/staging/rtl8723bs/os_dep/os_intfs.c | 12 ++--- drivers/target/iscsi/iscsi_target_erl0.c | 2 +- drivers/target/iscsi/iscsi_target_erl1.c | 2 +- drivers/target/iscsi/iscsi_target_util.c | 4 +- drivers/target/target_core_user.c | 8 ++-- drivers/tty/ipwireless/hardware.c | 4 +- drivers/tty/mips_ejtag_fdc.c | 4 +- drivers/tty/moxa.c | 2 +- drivers/tty/n_gsm.c | 14 +++--- drivers/tty/serial/8250/8250_aspeed_vuart.c | 2 +- drivers/tty/serial/8250/8250_core.c | 2 +- drivers/tty/serial/altera_uart.c | 2 +- drivers/tty/serial/amba-pl011.c | 4 +- drivers/tty/serial/atmel_serial.c | 2 +- drivers/tty/serial/fsl_lpuart.c | 6 +-- drivers/tty/serial/imx.c | 4 +- drivers/tty/serial/liteuart.c | 4 +- drivers/tty/serial/max3100.c | 4 +- drivers/tty/serial/mux.c | 2 +- drivers/tty/serial/sa1100.c | 4 +- drivers/tty/serial/sccnxp.c | 2 +- drivers/tty/serial/sh-sci.c | 2 +- drivers/tty/synclink_gt.c | 8 ++-- drivers/tty/sysrq.c | 4 +- drivers/tty/vcc.c | 6 +-- drivers/tty/vt/keyboard.c | 2 +- drivers/tty/vt/vt.c | 4 +- drivers/usb/atm/cxacru.c | 2 +- drivers/usb/atm/speedtch.c | 8 ++-- drivers/usb/atm/usbatm.c | 4 +- drivers/usb/core/hcd.c | 6 +-- drivers/usb/core/hub.c | 2 +- drivers/usb/dwc2/hcd.c | 2 +- drivers/usb/dwc2/hcd_queue.c | 4 +- drivers/usb/gadget/legacy/zero.c | 4 +- drivers/usb/gadget/udc/omap_udc.c | 2 +- drivers/usb/gadget/udc/pxa25x_udc.c | 8 ++-- drivers/usb/gadget/udc/r8a66597-udc.c | 2 +- drivers/usb/gadget/udc/snps_udc_core.c | 4 +- drivers/usb/host/ehci-platform.c | 2 +- drivers/usb/host/isp1362-hcd.c | 2 +- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/ohci-hub.c | 2 +- drivers/usb/host/oxu210hp-hcd.c | 6 +-- drivers/usb/host/r8a66597-hcd.c | 2 +- drivers/usb/host/sl811-hcd.c | 2 +- drivers/usb/host/uhci-hcd.c | 2 +- drivers/usb/host/uhci-q.c | 2 +- drivers/usb/host/xen-hcd.c | 4 +- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-mtk.c | 4 +- drivers/usb/host/xhci.c | 14 +++--- drivers/usb/isp1760/isp1760-hcd.c | 2 +- drivers/usb/isp1760/isp1760-udc.c | 4 +- drivers/usb/misc/usbtest.c | 2 +- drivers/usb/musb/da8xx.c | 6 +-- drivers/usb/musb/mpfs.c | 4 +- drivers/usb/musb/musb_core.c | 4 +- drivers/usb/musb/musb_dsps.c | 8 ++-- drivers/usb/musb/tusb6010.c | 8 ++-- drivers/usb/phy/phy-mv-usb.c | 2 +- drivers/usb/storage/realtek_cr.c | 2 +- drivers/video/fbdev/aty/radeon_backlight.c | 2 +- drivers/video/fbdev/aty/radeon_base.c | 4 +- drivers/video/fbdev/aty/radeon_pm.c | 2 +- drivers/video/fbdev/omap/hwa742.c | 2 +- drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 2 +- drivers/virt/vboxguest/vboxguest_core.c | 2 +- drivers/watchdog/alim7101_wdt.c | 4 +- drivers/watchdog/at91sam9_wdt.c | 4 +- drivers/watchdog/bcm47xx_wdt.c | 4 +- drivers/watchdog/cpwd.c | 4 +- drivers/watchdog/lpc18xx_wdt.c | 4 +- drivers/watchdog/machzwd.c | 4 +- drivers/watchdog/mixcomwd.c | 4 +- drivers/watchdog/pcwd.c | 2 +- drivers/watchdog/pika_wdt.c | 2 +- drivers/watchdog/sbc60xxwdt.c | 4 +- drivers/watchdog/sc520_wdt.c | 2 +- drivers/watchdog/shwdt.c | 2 +- drivers/watchdog/via_wdt.c | 2 +- drivers/watchdog/w83877f_wdt.c | 4 +- fs/afs/fs_probe.c | 2 +- fs/afs/server.c | 2 +- fs/bcachefs/clock.c | 2 +- fs/btrfs/zstd.c | 2 +- fs/ext4/super.c | 2 +- fs/jbd2/journal.c | 4 +- fs/jffs2/wbuf.c | 2 +- fs/nilfs2/segment.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- fs/pstore/platform.c | 2 +- include/linux/timer.h | 36 +-------------- include/net/sctp/sctp.h | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/kcsan/kcsan_test.c | 2 +- kernel/kthread.c | 4 +- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/srcutree.c | 2 +- kernel/rcu/tasks.h | 2 +- kernel/rcu/tree_nocb.h | 4 +- kernel/sched/psi.c | 2 +- kernel/time/clocksource.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/sleep_timeout.c | 2 +- kernel/time/timer.c | 8 ++-- kernel/workqueue.c | 14 +++--- mm/backing-dev.c | 2 +- mm/page-writeback.c | 4 +- net/appletalk/aarp.c | 4 +- net/atm/clip.c | 2 +- net/atm/lec.c | 26 +++++------ net/atm/mpc.c | 4 +- net/ax25/af_ax25.c | 10 ++--- net/ax25/ax25_ds_timer.c | 2 +- net/ax25/ax25_subr.c | 10 ++--- net/ax25/ax25_timer.c | 14 +++--- net/batman-adv/tp_meter.c | 6 +-- net/bluetooth/hidp/core.c | 2 +- net/bluetooth/rfcomm/core.c | 4 +- net/bridge/br_mdb.c | 6 +-- net/bridge/br_multicast.c | 44 +++++++++---------- net/bridge/br_stp.c | 14 +++--- net/bridge/br_stp_if.c | 12 ++--- net/can/af_can.c | 2 +- net/core/drop_monitor.c | 8 ++-- net/core/gen_estimator.c | 2 +- net/core/neighbour.c | 10 ++--- net/core/sock.c | 4 +- net/ipv4/igmp.c | 10 ++--- net/ipv4/inet_fragment.c | 6 +-- net/ipv4/ipmr.c | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 4 +- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/lapb/lapb_iface.c | 4 +- net/lapb/lapb_timer.c | 8 ++-- net/llc/llc_c_ac.c | 18 ++++---- net/llc/llc_conn.c | 16 +++---- net/mac80211/agg-rx.c | 4 +- net/mac80211/agg-tx.c | 6 +-- net/mac80211/ibss.c | 2 +- net/mac80211/iface.c | 2 +- net/mac80211/led.c | 2 +- net/mac80211/mesh.c | 8 ++-- net/mac80211/mesh_plink.c | 12 ++--- net/mac80211/mlme.c | 16 +++---- net/mac80211/ocb.c | 2 +- net/mac80211/offchannel.c | 6 +-- net/mac80211/pm.c | 4 +- net/mac80211/rx.c | 2 +- net/mac80211/sta_info.c | 2 +- net/mctp/af_mctp.c | 2 +- net/mptcp/pm.c | 2 +- net/ncsi/ncsi-manage.c | 4 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 6 +-- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/nf_conntrack_expect.c | 10 ++--- net/netfilter/nf_conntrack_netlink.c | 4 +- net/netfilter/nfnetlink_log.c | 2 +- net/netrom/nr_loopback.c | 2 +- net/nfc/core.c | 6 +-- net/nfc/hci/core.c | 4 +- net/nfc/hci/llc_shdlc.c | 8 ++-- net/nfc/llcp_core.c | 6 +-- net/nfc/nci/core.c | 6 +-- net/nfc/nci/data.c | 2 +- net/nfc/nci/rsp.c | 2 +- net/packet/af_packet.c | 2 +- net/rose/rose_link.c | 8 ++-- net/rose/rose_loopback.c | 2 +- net/rose/rose_route.c | 4 +- net/rxrpc/call_event.c | 2 +- net/rxrpc/call_object.c | 4 +- net/rxrpc/conn_client.c | 2 +- net/rxrpc/conn_object.c | 8 ++-- net/rxrpc/net_ns.c | 4 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_generic.c | 2 +- net/sched/sch_pie.c | 2 +- net/sched/sch_red.c | 4 +- net/sched/sch_sfq.c | 4 +- net/sctp/associola.c | 4 +- net/sctp/input.c | 2 +- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 5 +-- net/sctp/protocol.c | 2 +- net/sctp/sm_sideeffect.c | 6 +-- net/sctp/stream.c | 6 +-- net/sctp/transport.c | 12 ++--- net/sunrpc/xprt.c | 4 +- net/tipc/node.c | 2 +- net/tipc/subscr.c | 2 +- net/wireless/core.c | 6 +-- net/x25/x25_link.c | 2 +- net/x25/x25_timer.c | 4 +- net/xfrm/xfrm_policy.c | 10 ++--- net/xfrm/xfrm_state.c | 2 +- samples/connector/cn_test.c | 2 +- samples/ftrace/sample-trace-array.c | 2 +- sound/core/timer.c | 4 +- sound/drivers/aloop.c | 4 +- sound/drivers/dummy.c | 2 +- sound/drivers/mpu401/mpu401_uart.c | 2 +- sound/drivers/mtpav.c | 2 +- sound/drivers/opl3/opl3_seq.c | 2 +- sound/drivers/serial-u16550.c | 2 +- sound/i2c/other/ak4117.c | 2 +- sound/isa/sb/emu8000_pcm.c | 2 +- sound/isa/sb/sb8_midi.c | 4 +- sound/isa/wavefront/wavefront_midi.c | 4 +- sound/pci/asihpi/asihpi.c | 2 +- sound/pci/ctxfi/cttimer.c | 2 +- sound/pci/echoaudio/midi.c | 2 +- sound/pci/rme9652/hdsp.c | 2 +- sound/pci/rme9652/hdspm.c | 2 +- sound/sh/aica.c | 2 +- sound/soc/codecs/rt5645.c | 4 +- sound/soc/fsl/imx-pcm-rpmsg.c | 4 +- sound/soc/ti/ams-delta.c | 2 +- sound/usb/midi.c | 2 +- 787 files changed, 1613 insertions(+), 1648 deletions(-) diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index b9cd364e814e..a89ce84371f9 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -177,7 +177,7 @@ srmcons_close(struct tty_struct *tty, struct file *filp) if (tty->count == 1) { port->tty = NULL; - del_timer(&srmconsp->timer); + timer_delete(&srmconsp->timer); } spin_unlock_irqrestore(&port->lock, flags); diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index f8920d0010de..6521ab3d24fa 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -135,7 +135,7 @@ static struct timer_list perr_timer; static void dc21285_enable_error(struct timer_list *timer) { - del_timer(timer); + timer_delete(timer); if (timer == &serr_timer) enable_irq(IRQ_PCI_SERR); diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index dd930e3a61a4..71b282b146d0 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -913,8 +913,8 @@ static void sharpsl_pm_remove(struct platform_device *pdev) if (sharpsl_pm.machinfo->exit) sharpsl_pm.machinfo->exit(); - del_timer_sync(&sharpsl_pm.chrg_full_timer); - del_timer_sync(&sharpsl_pm.ac_timer); + timer_delete_sync(&sharpsl_pm.chrg_full_timer); + timer_delete_sync(&sharpsl_pm.ac_timer); } static struct platform_driver sharpsl_pm_driver = { diff --git a/arch/m68k/amiga/amisound.c b/arch/m68k/amiga/amisound.c index 714fe8ec6afa..5fd93dfab809 100644 --- a/arch/m68k/amiga/amisound.c +++ b/arch/m68k/amiga/amisound.c @@ -78,7 +78,7 @@ void amiga_mksound( unsigned int hz, unsigned int ticks ) return; local_irq_save(flags); - del_timer( &sound_timer ); + timer_delete(&sound_timer); if (hz > 20 && hz < 32767) { unsigned long period = (clock_constant / hz); diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c index faea2265a540..6312d5b600a5 100644 --- a/arch/m68k/mac/macboing.c +++ b/arch/m68k/mac/macboing.c @@ -183,7 +183,7 @@ void mac_mksound( unsigned int freq, unsigned int length ) local_irq_save(flags); - del_timer( &mac_sound_timer ); + timer_delete(&mac_sound_timer); for ( i = 0; i < 0x800; i++ ) mac_asc_regs[ i ] = 0; @@ -277,7 +277,7 @@ static void mac_quadra_ring_bell(struct timer_list *unused) local_irq_save(flags); - del_timer( &mac_sound_timer ); + timer_delete(&mac_sound_timer); if ( mac_bell_duration-- > 0 ) { diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index 8f0861c58080..8539f562f5b8 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -98,7 +98,7 @@ static void blink_timeout(struct timer_list *unused) static void debounce(struct timer_list *unused) { - del_timer(&debounce_timer); + timer_delete(&debounce_timer); if (sgint->istat1 & SGINT_ISTAT1_PWR) { /* Interrupt still being sent. */ debounce_timer.expires = jiffies + (HZ / 20); /* 0.05s */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6a5be025a8af..6a4805968966 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -622,7 +622,7 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu) if (nr_jiffies < NEXT_TIMER_MAX_DELTA) mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); else - del_timer(&vcpu->arch.wdt_timer); + timer_delete(&vcpu->arch.wdt_timer); spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); } @@ -1441,7 +1441,7 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) { - del_timer_sync(&vcpu->arch.wdt_timer); + timer_delete_sync(&vcpu->arch.wdt_timer); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 610ca8570682..8e7ed010bfde 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -508,7 +508,7 @@ static void __spu_del_from_rq(struct spu_context *ctx) if (!list_empty(&ctx->rq)) { if (!--spu_prio->nr_waiting) - del_timer(&spusched_timer); + timer_delete(&spusched_timer); list_del_init(&ctx->rq); if (list_empty(&spu_prio->runq[prio])) @@ -1126,8 +1126,8 @@ void spu_sched_exit(void) remove_proc_entry("spu_loadavg", NULL); - del_timer_sync(&spusched_timer); - del_timer_sync(&spuloadavg_timer); + timer_delete_sync(&spusched_timer); + timer_delete_sync(&spuloadavg_timer); kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index c097d591670e..a0ae58636e10 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -347,7 +347,7 @@ static irqreturn_t kw_i2c_irq(int irq, void *dev_id) unsigned long flags; spin_lock_irqsave(&host->lock, flags); - del_timer(&host->timeout_timer); + timer_delete(&host->timeout_timer); kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr)); if (host->state != state_idle) { host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c900deddd36d..fed17d407a44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -680,7 +680,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 39f44b6256e0..e2a6eb92420f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -201,7 +201,7 @@ static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) - del_timer(&cmm_timer); + timer_delete(&cmm_timer); return; } mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds)); @@ -424,7 +424,7 @@ static int __init cmm_init(void) #endif unregister_sysctl_table(cmm_sysctl_header); out_sysctl: - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); return rc; } module_init(cmm_init); @@ -437,7 +437,7 @@ static void __exit cmm_exit(void) #endif unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index ab9e791070b4..5442475d132e 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -90,7 +90,7 @@ static void pcibios_enable_err(struct timer_list *t) { struct pci_channel *hose = from_timer(hose, t, err_timer); - del_timer(&hose->err_timer); + timer_delete(&hose->err_timer); printk(KERN_DEBUG "PCI: re-enabling error IRQ.\n"); enable_irq(hose->err_irq); } @@ -99,7 +99,7 @@ static void pcibios_enable_serr(struct timer_list *t) { struct pci_channel *hose = from_timer(hose, t, serr_timer); - del_timer(&hose->serr_timer); + timer_delete(&hose->serr_timer); printk(KERN_DEBUG "PCI: re-enabling system error IRQ.\n"); enable_irq(hose->serr_irq); } diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c index ab657b359789..f4fb82b019bb 100644 --- a/arch/sparc/kernel/led.c +++ b/arch/sparc/kernel/led.c @@ -84,7 +84,7 @@ static ssize_t led_proc_write(struct file *file, const char __user *buffer, /* before we change anything we want to stop any running timers, * otherwise calls such as on will have no persistent effect */ - del_timer_sync(&led_blink_timer); + timer_delete_sync(&led_blink_timer); if (!strcmp(buf, "on")) { auxio_set_led(AUXIO_LED_ON); @@ -134,7 +134,7 @@ static int __init led_init(void) static void __exit led_exit(void) { remove_proc_entry("led", NULL); - del_timer_sync(&led_blink_timer); + timer_delete_sync(&led_blink_timer); } module_init(led_init); diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 85b129e2b70b..b97bb52dd562 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1112,7 +1112,7 @@ static int vector_net_close(struct net_device *dev) struct vector_private *vp = netdev_priv(dev); netif_stop_queue(dev); - del_timer(&vp->tl); + timer_delete(&vp->tl); vp->opened = false; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 1f14c3308b6b..f6fd71b64b66 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1786,13 +1786,13 @@ void mce_timer_kick(bool storm) __this_cpu_write(mce_next_interval, check_interval * HZ); } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +/* Must not be called in IRQ context where timer_delete_sync() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + timer_delete_sync(&per_cpu(mce_timer, cpu)); } static void __mcheck_cpu_mce_banks_init(void) @@ -2820,7 +2820,7 @@ static int mce_cpu_pre_down(unsigned int cpu) struct timer_list *t = this_cpu_ptr(&mce_timer); mce_disable_cpu(); - del_timer_sync(t); + timer_delete_sync(t); mce_threshold_remove_device(cpu); mce_device_remove(cpu); return 0; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bd21e9c335ad..38b33cdd4232 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1553,7 +1553,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, kvm_vcpu_halt(vcpu); if (sched_poll.timeout) - del_timer(&vcpu->arch.xen.poll_timer); + timer_delete(&vcpu->arch.xen.poll_timer); kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); } @@ -2308,7 +2308,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); - del_timer_sync(&vcpu->arch.xen.poll_timer); + timer_delete_sync(&vcpu->arch.xen.poll_timer); } void kvm_xen_init_vm(struct kvm *kvm) diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index abec44b687df..8b95221375a8 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -48,7 +48,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp) static void rs_close(struct tty_struct *tty, struct file * filp) { if (tty->count == 1) - del_timer_sync(&serial_timer); + timer_delete_sync(&serial_timer); } diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index e89f27f2bb18..c6d8c62695e1 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -375,7 +375,7 @@ static int iss_net_close(struct net_device *dev) struct iss_net_private *lp = netdev_priv(dev); netif_stop_queue(dev); - del_timer_sync(&lp->timer); + timer_delete_sync(&lp->timer); lp->tp.net_ops->close(lp); return 0; diff --git a/block/blk-core.c b/block/blk-core.c index 4623de79effa..e8cc270a453f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -219,7 +219,7 @@ EXPORT_SYMBOL_GPL(blk_status_to_str); */ void blk_sync_queue(struct request_queue *q) { - del_timer_sync(&q->timeout); + timer_delete_sync(&q->timeout); cancel_work_sync(&q->timeout_work); } EXPORT_SYMBOL(blk_sync_queue); diff --git a/block/blk-stat.c b/block/blk-stat.c index eaf60097bbe1..46449da856f8 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -162,7 +162,7 @@ void blk_stat_remove_callback(struct request_queue *q, blk_queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); - del_timer_sync(&cb->timer); + timer_delete_sync(&cb->timer); } static void blk_stat_free_callback_rcu(struct rcu_head *head) diff --git a/block/blk-stat.h b/block/blk-stat.h index 5d7f18ba436d..9e05bf18d1be 100644 --- a/block/blk-stat.h +++ b/block/blk-stat.h @@ -148,7 +148,7 @@ static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb, static inline void blk_stat_deactivate(struct blk_stat_callback *cb) { - del_timer_sync(&cb->timer); + timer_delete_sync(&cb->timer); } /** diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 91dab43c65ab..d6dd2e047874 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -333,7 +333,7 @@ static void throtl_pd_free(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); - del_timer_sync(&tg->service_queue.pending_timer); + timer_delete_sync(&tg->service_queue.pending_timer); blkg_rwstat_exit(&tg->stat_bytes); blkg_rwstat_exit(&tg->stat_ios); kfree(tg); @@ -1711,7 +1711,7 @@ void blk_throtl_exit(struct gendisk *disk) if (!blk_throtl_activated(q)) return; - del_timer_sync(&q->td->service_queue.pending_timer); + timer_delete_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(disk, &blkcg_policy_throtl); kfree(q->td); diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c index 2473c66309d4..972833fabcfc 100644 --- a/drivers/accel/qaic/qaic_timesync.c +++ b/drivers/accel/qaic/qaic_timesync.c @@ -221,7 +221,7 @@ static void qaic_timesync_remove(struct mhi_device *mhi_dev) { struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev); - del_timer_sync(&mqtsdev->timer); + timer_delete_sync(&mqtsdev->timer); mhi_unprepare_from_transfer(mqtsdev->mhi_dev); kfree(mqtsdev->sync_msg); kfree(mqtsdev); diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index f677ad2177c2..e68cf1d83787 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -1172,13 +1172,13 @@ static void do_handle_shift(struct vc_data *vc, u_char value, char up_flag) if (cursor_track == read_all_mode) { switch (value) { case KVAL(K_SHIFT): - del_timer(&cursor_timer); + timer_delete(&cursor_timer); spk_shut_up &= 0xfe; spk_do_flush(); read_all_doc(vc); break; case KVAL(K_CTRL): - del_timer(&cursor_timer); + timer_delete(&cursor_timer); cursor_track = prev_cursor_track; spk_shut_up &= 0xfe; spk_do_flush(); @@ -1399,7 +1399,7 @@ static void start_read_all_timer(struct vc_data *vc, enum read_all_command comma static void kbd_fakekey2(struct vc_data *vc, enum read_all_command command) { - del_timer(&cursor_timer); + timer_delete(&cursor_timer); speakup_fake_down_arrow(); start_read_all_timer(vc, command); } @@ -1415,7 +1415,7 @@ static void read_all_doc(struct vc_data *vc) cursor_track = read_all_mode; spk_reset_index_count(0); if (get_sentence_buf(vc, 0) == -1) { - del_timer(&cursor_timer); + timer_delete(&cursor_timer); if (!in_keyboard_notifier) speakup_fake_down_arrow(); start_read_all_timer(vc, RA_DOWN_ARROW); @@ -1428,7 +1428,7 @@ static void read_all_doc(struct vc_data *vc) static void stop_read_all(struct vc_data *vc) { - del_timer(&cursor_timer); + timer_delete(&cursor_timer); cursor_track = prev_cursor_track; spk_shut_up &= 0xfe; spk_do_flush(); @@ -1528,7 +1528,7 @@ static int pre_handle_cursor(struct vc_data *vc, u_char value, char up_flag) spin_unlock_irqrestore(&speakup_info.spinlock, flags); return NOTIFY_STOP; } - del_timer(&cursor_timer); + timer_delete(&cursor_timer); spk_shut_up &= 0xfe; spk_do_flush(); start_read_all_timer(vc, value + 1); @@ -1692,7 +1692,7 @@ static void cursor_done(struct timer_list *unused) struct vc_data *vc = vc_cons[cursor_con].d; unsigned long flags; - del_timer(&cursor_timer); + timer_delete(&cursor_timer); spin_lock_irqsave(&speakup_info.spinlock, flags); if (cursor_con != fg_console) { is_cursor = 0; @@ -2333,7 +2333,7 @@ static void __exit speakup_exit(void) speakup_unregister_devsynth(); speakup_cancel_selection(); speakup_cancel_paste(); - del_timer_sync(&cursor_timer); + timer_delete_sync(&cursor_timer); kthread_stop(speakup_task); speakup_task = NULL; mutex_lock(&spk_mutex); @@ -2437,7 +2437,7 @@ static int __init speakup_init(void) error_vtnotifier: unregister_keyboard_notifier(&keyboard_notifier_block); - del_timer(&cursor_timer); + timer_delete(&cursor_timer); error_kbdnotifier: speakup_unregister_devsynth(); diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c index 85062e605d79..d8addbf3ad0d 100644 --- a/drivers/accessibility/speakup/synth.c +++ b/drivers/accessibility/speakup/synth.c @@ -521,7 +521,7 @@ void synth_release(void) spin_lock_irqsave(&speakup_info.spinlock, flags); pr_info("releasing synth %s\n", synth->name); synth->alive = 0; - del_timer(&thread_timer); + timer_delete(&thread_timer); spin_unlock_irqrestore(&speakup_info.spinlock, flags); if (synth->attributes.name) sysfs_remove_group(speakup_kobj, &synth->attributes); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 16cd676eae1f..b990c1ee0b12 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -700,7 +700,7 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ata_eh_acquire(ap); repeat: /* kill fast drain timer */ - del_timer_sync(&ap->fastdrain_timer); + timer_delete_sync(&ap->fastdrain_timer); /* process port resume request */ ata_eh_handle_port_resume(ap); diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index fcd70e094a2e..e6a300203e6c 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -366,8 +366,8 @@ EXPORT_SYMBOL(idt77105_init); static void __exit idt77105_exit(void) { /* turn off timers */ - del_timer_sync(&stats_timer); - del_timer_sync(&restart_timer); + timer_delete_sync(&stats_timer); + timer_delete_sync(&restart_timer); } module_exit(idt77105_exit); diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index d213adcefe33..301e697e22ad 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -3283,7 +3283,7 @@ static void __exit ia_module_exit(void) { pci_unregister_driver(&ia_driver); - del_timer_sync(&ia_timer); + timer_delete_sync(&ia_timer); } module_init(ia_module_init); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 32d7aa141d96..00fe25b5b6a3 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1792,7 +1792,7 @@ static inline void lanai_timed_poll_start(struct lanai_dev *lanai) static inline void lanai_timed_poll_stop(struct lanai_dev *lanai) { - del_timer_sync(&lanai->timer); + timer_delete_sync(&lanai->timer); } /* -------------------- INTERRUPT SERVICE: */ diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 27153d6bc781..45952cfea06b 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -300,7 +300,7 @@ static void __exit nicstar_cleanup(void) { XPRINTK("nicstar: nicstar_cleanup() called.\n"); - del_timer_sync(&ns_timer); + timer_delete_sync(&ns_timer); pci_unregister_driver(&nicstar_driver); diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index 32802ea9521c..7d0fa729c2fe 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -347,7 +347,7 @@ static int suni_stop(struct atm_dev *dev) for (walk = &sunis; *walk != PRIV(dev); walk = &PRIV((*walk)->dev)->next); *walk = PRIV((*walk)->dev)->next; - if (!sunis) del_timer_sync(&poll_timer); + if (!sunis) timer_delete_sync(&poll_timer); spin_unlock_irqrestore(&sunis_lock,flags); kfree(PRIV(dev)); diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index fcec77f100ce..b6808c4f89b6 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c @@ -84,7 +84,7 @@ static int linedisp_display(struct linedisp *linedisp, const char *msg, char *new_msg; /* stop the scroll timer */ - del_timer_sync(&linedisp->timer); + timer_delete_sync(&linedisp->timer); if (count == -1) count = strlen(msg); @@ -183,7 +183,7 @@ static ssize_t scroll_step_ms_store(struct device *dev, linedisp->scroll_rate = msecs_to_jiffies(ms); if (linedisp->message && linedisp->message_len > linedisp->num_chars) { - del_timer_sync(&linedisp->timer); + timer_delete_sync(&linedisp->timer); if (linedisp->scroll_rate) linedisp_scroll(&linedisp->timer); } @@ -376,7 +376,7 @@ int linedisp_register(struct linedisp *linedisp, struct device *parent, out_del_dev: device_del(&linedisp->dev); out_del_timer: - del_timer_sync(&linedisp->timer); + timer_delete_sync(&linedisp->timer); out_put_device: put_device(&linedisp->dev); return err; @@ -391,7 +391,7 @@ EXPORT_SYMBOL_NS_GPL(linedisp_register, "LINEDISP"); void linedisp_unregister(struct linedisp *linedisp) { device_del(&linedisp->dev); - del_timer_sync(&linedisp->timer); + timer_delete_sync(&linedisp->timer); put_device(&linedisp->dev); } EXPORT_SYMBOL_NS_GPL(linedisp_unregister, "LINEDISP"); diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index 91ccb9789d43..958c0e31e84a 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -1654,7 +1654,7 @@ static void panel_attach(struct parport *port) err_lcd_unreg: if (scan_timer.function) - del_timer_sync(&scan_timer); + timer_delete_sync(&scan_timer); if (lcd.enabled) charlcd_unregister(lcd.charlcd); err_unreg_device: @@ -1675,7 +1675,7 @@ static void panel_detach(struct parport *port) return; } if (scan_timer.function) - del_timer_sync(&scan_timer); + timer_delete_sync(&scan_timer); if (keypad.enabled) { misc_deregister(&keypad_dev); diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 64840e5d5fcc..03a39c417dc4 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -41,7 +41,7 @@ struct devcd_entry { * devcd_data_write() * mod_delayed_work() * try_to_grab_pending() - * del_timer() + * timer_delete() * debug_assert_init() * INIT_DELAYED_WORK() * schedule_delayed_work() diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ac2a197c1234..c8b0a9e29ed8 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -559,7 +559,7 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) { struct timer_list *timer = &wd->timer; - del_timer_sync(timer); + timer_delete_sync(timer); destroy_timer_on_stack(timer); } #else diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 752b417e8129..63bf914a4d44 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -197,7 +197,7 @@ void wakeup_source_remove(struct wakeup_source *ws) raw_spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); - del_timer_sync(&ws->timer); + timer_delete_sync(&ws->timer); /* * Clear timer.function to make wakeup_source_not_registered() treat * this wakeup source as not registered. @@ -613,7 +613,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); wakeup_source_report_event(ws, false); - del_timer(&ws->timer); + timer_delete(&ws->timer); ws->timer_expires = 0; spin_unlock_irqrestore(&ws->lock, flags); @@ -693,7 +693,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) ws->max_time = duration; ws->last_time = now; - del_timer(&ws->timer); + timer_delete(&ws->timer); ws->timer_expires = 0; if (ws->autosleep_enabled) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 9edd4468f755..6357d86eafdc 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -457,7 +457,7 @@ static int fd_motor_on(int nr) { nr &= 3; - del_timer(motor_off_timer + nr); + timer_delete(motor_off_timer + nr); if (!unit[nr].motor) { unit[nr].motor = 1; @@ -1393,7 +1393,7 @@ static int non_int_flush_track (unsigned long nr) nr&=3; writefromint = 0; - del_timer(&post_write_timer); + timer_delete(&post_write_timer); get_fdc(nr); if (!fd_motor_on(nr)) { writepending = 0; @@ -1435,7 +1435,7 @@ static int get_track(int drive, int track) } if (unit[drive].dirty == 1) { - del_timer (flush_track_timer + drive); + timer_delete(flush_track_timer + drive); non_int_flush_track (drive); } errcnt = 0; @@ -1591,7 +1591,7 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode, case FDDEFPRM: return -EINVAL; case FDFLUSH: /* unconditionally, even if not needed */ - del_timer (flush_track_timer + drive); + timer_delete(flush_track_timer + drive); non_int_flush_track(drive); break; #ifdef RAW_IOCTL @@ -1714,7 +1714,7 @@ static void floppy_release(struct gendisk *disk) mutex_lock(&amiflop_mutex); if (unit[drive].dirty == 1) { - del_timer (flush_track_timer + drive); + timer_delete(flush_track_timer + drive); non_int_flush_track (drive); } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 4db7f6ce8ade..141b2a0e03f2 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -274,7 +274,7 @@ freedev(struct aoedev *d) if (!freeing) return; - del_timer_sync(&d->timer); + timer_delete_sync(&d->timer); if (d->gd) { aoedisk_rm_debugfs(d); del_gendisk(d->gd); diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 6238c4c87cfc..cdf6e4041bb9 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -28,7 +28,7 @@ static void discover_timer(struct timer_list *t) static void __exit aoe_exit(void) { - del_timer_sync(&timer); + timer_delete_sync(&timer); aoenet_exit(); unregister_blkdev(AOE_MAJOR, DEVICE_NAME); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index a81ade622a01..7fe14266c12c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -494,7 +494,7 @@ static inline void start_timeout(void) static inline void stop_timeout(void) { - del_timer(&timeout_timer); + timer_delete(&timeout_timer); } /* Select the side to use. */ @@ -784,7 +784,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) contents become invalid! */ BufferDrive = -1; /* stop deselect timer */ - del_timer( &motor_off_timer ); + timer_delete(&motor_off_timer); FILL( 60 * (nsect / 9), 0x4e ); for( sect = 0; sect < nsect; ++sect ) { @@ -1138,7 +1138,7 @@ static void fd_rwsec_done( int status ) DPRINT(("fd_rwsec_done()\n")); if (read_track) { - del_timer(&readtrack_timer); + timer_delete(&readtrack_timer); if (!MultReadInProgress) return; MultReadInProgress = 0; @@ -1356,7 +1356,7 @@ static void fd_times_out(struct timer_list *unused) /* If the timeout occurred while the readtrack_check timer was * active, we need to cancel it, else bad things will happen */ if (UseTrackbuffer) - del_timer( &readtrack_timer ); + timer_delete(&readtrack_timer); FDC_WRITE( FDCREG_CMD, FDCCMD_FORCI ); udelay( 25 ); @@ -1566,7 +1566,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, } /* stop deselect timer */ - del_timer( &motor_off_timer ); + timer_delete(&motor_off_timer); ReqCnt = 0; ReqCmd = rq_data_dir(fd_request); @@ -2055,7 +2055,7 @@ static void atari_floppy_cleanup(void) blk_mq_free_tag_set(&unit[i].tag_set); } - del_timer_sync(&fd_timer); + timer_delete_sync(&fd_timer); atari_stram_free(DMABuffer); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5bbd312c3e14..ced2cc5f46f2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3034,7 +3034,7 @@ void drbd_md_sync(struct drbd_device *device) BUILD_BUG_ON(UI_SIZE != 4); BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); - del_timer(&device->md_sync_timer); + timer_delete(&device->md_sync_timer); /* timer may be rearmed by drbd_md_mark_dirty() now. */ if (!test_and_clear_bit(MD_DIRTY, &device->flags)) return; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 720fc30e2ecc..e09930c2b226 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1033,7 +1033,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct /* We do some synchronous IO below, which may take some time. * Clear the timer, to avoid scary "timer expired!" messages, * "Superblock" is written out at least twice below, anyways. */ - del_timer(&device->md_sync_timer); + timer_delete(&device->md_sync_timer); /* We won't change the "al-extents" setting, we just may need * to move the on-disk location of the activity log ringbuffer. diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0c9f54197768..e5a2e5f7887b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5187,7 +5187,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) atomic_set(&device->rs_pending_cnt, 0); wake_up(&device->misc_wait); - del_timer_sync(&device->resync_timer); + timer_delete_sync(&device->resync_timer); resync_timer_fn(&device->resync_timer); /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index abf0486f0d4f..e97432032f01 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -937,7 +937,7 @@ static void floppy_off(unsigned int drive) if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive)))) return; - del_timer(motor_off_timer + drive); + timer_delete(motor_off_timer + drive); /* make spindle stop in a position which minimizes spinup time * next time */ @@ -1918,7 +1918,7 @@ static int start_motor(void (*function)(void)) mask &= ~(0x10 << UNIT(current_drive)); /* starts motor and selects floppy */ - del_timer(motor_off_timer + current_drive); + timer_delete(motor_off_timer + current_drive); set_dor(current_fdc, mask, data); /* wait_for_completion also schedules reset if needed. */ @@ -4762,7 +4762,7 @@ static int __init do_floppy_init(void) for (drive = 0; drive < N_DRIVE; drive++) { if (!disks[drive][0]) break; - del_timer_sync(&motor_off_timer[drive]); + timer_delete_sync(&motor_off_timer[drive]); put_disk(disks[drive][0]); blk_mq_free_tag_set(&tag_sets[drive]); } @@ -4983,7 +4983,7 @@ static void __exit floppy_module_exit(void) destroy_workqueue(floppy_wq); for (drive = 0; drive < N_DRIVE; drive++) { - del_timer_sync(&motor_off_timer[drive]); + timer_delete_sync(&motor_off_timer[drive]); if (floppy_available(drive)) { for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 2b33fb5b949b..b5727dea15bd 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1070,7 +1070,7 @@ static void vdc_port_remove(struct vio_dev *vdev) flush_work(&port->ldc_reset_work); cancel_delayed_work_sync(&port->ldc_reset_timer_work); - del_timer_sync(&port->vio.timer); + timer_delete_sync(&port->vio.timer); del_gendisk(port->disk); put_disk(port->disk); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 3aedcb5add61..ee6cade70222 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -362,7 +362,7 @@ static void set_timeout(struct floppy_state *fs, int nticks, void (*proc)(struct timer_list *t)) { if (fs->timeout_pending) - del_timer(&fs->timeout); + timer_delete(&fs->timeout); fs->timeout.expires = jiffies + nticks; fs->timeout.function = proc; add_timer(&fs->timeout); @@ -677,7 +677,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS); out_8(&sw->select, RELAX); out_8(&sw->intr_enable, 0); - del_timer(&fs->timeout); + timer_delete(&fs->timeout); fs->timeout_pending = 0; if (sw->ctrack == 0xff) { swim3_err("%s", "Seen sector but cyl=ff?\n"); @@ -706,7 +706,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) out_8(&sw->control_bic, DO_SEEK); out_8(&sw->select, RELAX); out_8(&sw->intr_enable, 0); - del_timer(&fs->timeout); + timer_delete(&fs->timeout); fs->timeout_pending = 0; if (fs->state == seeking) ++fs->retries; @@ -716,7 +716,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) break; case settling: out_8(&sw->intr_enable, 0); - del_timer(&fs->timeout); + timer_delete(&fs->timeout); fs->timeout_pending = 0; act(fs); break; @@ -726,7 +726,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) out_8(&sw->intr_enable, 0); out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION); out_8(&sw->select, RELAX); - del_timer(&fs->timeout); + timer_delete(&fs->timeout); fs->timeout_pending = 0; dr = fs->dma; cp = fs->dma_cmd; diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index 36eabf61717f..1c7f89e134b3 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -638,7 +638,7 @@ static int bluecard_hci_close(struct hci_dev *hdev) bluecard_hci_flush(hdev); /* Stop LED timer */ - del_timer_sync(&(info->timer)); + timer_delete_sync(&(info->timer)); /* Disable power LED */ outb(0x00, iobase + 0x30); @@ -885,7 +885,7 @@ static void bluecard_release(struct pcmcia_device *link) bluecard_close(info); - del_timer_sync(&(info->timer)); + timer_delete_sync(&(info->timer)); pcmcia_disable_device(link); } diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 76878119d910..610d0e3c36d4 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -382,7 +382,7 @@ static void bcsp_pkt_cull(struct bcsp_struct *bcsp) } if (skb_queue_empty(&bcsp->unack)) - del_timer(&bcsp->tbcsp); + timer_delete(&bcsp->tbcsp); spin_unlock_irqrestore(&bcsp->unack.lock, flags); diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index c0436881a533..edafa228bf83 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -197,7 +197,7 @@ static void h5_peer_reset(struct hci_uart *hu) h5->state = H5_UNINITIALIZED; - del_timer(&h5->timer); + timer_delete(&h5->timer); skb_queue_purge(&h5->rel); skb_queue_purge(&h5->unrel); @@ -254,7 +254,7 @@ static int h5_close(struct hci_uart *hu) { struct h5 *h5 = hu->priv; - del_timer_sync(&h5->timer); + timer_delete_sync(&h5->timer); skb_queue_purge(&h5->unack); skb_queue_purge(&h5->rel); @@ -318,7 +318,7 @@ static void h5_pkt_cull(struct h5 *h5) } if (skb_queue_empty(&h5->unack)) - del_timer(&h5->timer); + timer_delete(&h5->timer); unlock: spin_unlock_irqrestore(&h5->unack.lock, flags); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index f2558506a02c..e00590ba24fd 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -867,7 +867,7 @@ static void device_woke_up(struct hci_uart *hu) skb_queue_tail(&qca->txq, skb); /* Switch timers and change state to HCI_IBS_TX_AWAKE */ - del_timer(&qca->wake_retrans_timer); + timer_delete(&qca->wake_retrans_timer); idle_delay = msecs_to_jiffies(qca->tx_idle_delay); mod_timer(&qca->tx_idle_timer, jiffies + idle_delay); qca->tx_ibs_state = HCI_IBS_TX_AWAKE; @@ -2239,8 +2239,8 @@ static int qca_power_off(struct hci_dev *hdev) hu->hdev->hw_error = NULL; hu->hdev->reset = NULL; - del_timer_sync(&qca->wake_retrans_timer); - del_timer_sync(&qca->tx_idle_timer); + timer_delete_sync(&qca->wake_retrans_timer); + timer_delete_sync(&qca->tx_idle_timer); /* Stop sending shutdown command if soc crashes. */ if (soc_type != QCA_ROME @@ -2629,10 +2629,10 @@ static int __maybe_unused qca_suspend(struct device *dev) switch (qca->tx_ibs_state) { case HCI_IBS_TX_WAKING: - del_timer(&qca->wake_retrans_timer); + timer_delete(&qca->wake_retrans_timer); fallthrough; case HCI_IBS_TX_AWAKE: - del_timer(&qca->tx_idle_timer); + timer_delete(&qca->tx_idle_timer); serdev_device_write_flush(hu->serdev); cmd = HCI_IBS_SLEEP_IND; diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index 474f1359c997..03aa88795209 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1096,7 +1096,7 @@ static void mhi_pci_recovery_work(struct work_struct *work) dev_warn(&pdev->dev, "device recovery started\n"); - del_timer(&mhi_pdev->health_check_timer); + timer_delete(&mhi_pdev->health_check_timer); pm_runtime_forbid(&pdev->dev); /* Clean up MHI state */ @@ -1293,7 +1293,7 @@ static void mhi_pci_remove(struct pci_dev *pdev) struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev); struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; - del_timer_sync(&mhi_pdev->health_check_timer); + timer_delete_sync(&mhi_pdev->health_check_timer); cancel_work_sync(&mhi_pdev->recovery_work); if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { @@ -1321,7 +1321,7 @@ static void mhi_pci_reset_prepare(struct pci_dev *pdev) dev_info(&pdev->dev, "reset\n"); - del_timer(&mhi_pdev->health_check_timer); + timer_delete(&mhi_pdev->health_check_timer); /* Clean up MHI state */ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { @@ -1431,7 +1431,7 @@ static int __maybe_unused mhi_pci_runtime_suspend(struct device *dev) if (test_and_set_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status)) return 0; - del_timer(&mhi_pdev->health_check_timer); + timer_delete(&mhi_pdev->health_check_timer); cancel_work_sync(&mhi_pdev->recovery_work); if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) || diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c index 27f5f9d19531..16618079298a 100644 --- a/drivers/char/dtlk.c +++ b/drivers/char/dtlk.c @@ -243,11 +243,11 @@ static __poll_t dtlk_poll(struct file *file, poll_table * wait) poll_wait(file, &dtlk_process_list, wait); if (dtlk_has_indexing && dtlk_readable()) { - del_timer(&dtlk_timer); + timer_delete(&dtlk_timer); mask = EPOLLIN | EPOLLRDNORM; } if (dtlk_writeable()) { - del_timer(&dtlk_timer); + timer_delete(&dtlk_timer); mask |= EPOLLOUT | EPOLLWRNORM; } /* there are no exception conditions */ @@ -322,7 +322,7 @@ static int dtlk_release(struct inode *inode, struct file *file) } TRACE_RET; - del_timer_sync(&dtlk_timer); + timer_delete_sync(&dtlk_timer); return 0; } diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index 4181bcc1c796..497fc167cb8c 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -167,7 +167,7 @@ static int __init hangcheck_init(void) static void __exit hangcheck_exit(void) { - del_timer_sync(&hangcheck_ticktock); + timer_delete_sync(&hangcheck_ticktock); printk("Hangcheck: Stopped hangcheck timer.\n"); } diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c index 39acaa503fec..a1a751074f7e 100644 --- a/drivers/char/hw_random/xgene-rng.c +++ b/drivers/char/hw_random/xgene-rng.c @@ -93,7 +93,7 @@ static void xgene_rng_expired_timer(struct timer_list *t) /* Clear failure counter as timer expired */ disable_irq(ctx->irq); ctx->failure_cnt = 0; - del_timer(&ctx->failure_timer); + timer_delete(&ctx->failure_timer); enable_irq(ctx->irq); } diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index 009e32033b17..77146b5c762b 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -465,7 +465,7 @@ static void bt_bmc_remove(struct platform_device *pdev) misc_deregister(&bt_bmc->miscdev); if (bt_bmc->irq < 0) - del_timer_sync(&bt_bmc->poll_timer); + timer_delete_sync(&bt_bmc->poll_timer); } static const struct of_device_id bt_bmc_match[] = { diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 1e5313748f8b..3ba9d7e9a6c7 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5538,7 +5538,7 @@ static void __exit cleanup_ipmi(void) * here. */ atomic_set(&stop_operation, 1); - del_timer_sync(&ipmi_timer); + timer_delete_sync(&ipmi_timer); initialized = false; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index eea23a3b966e..12b0b77eb1cc 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -859,7 +859,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, if (si_sm_result == SI_SM_IDLE && smi_info->timer_running) { /* Ok it if fails, the timer will just go off. */ - if (del_timer(&smi_info->si_timer)) + if (timer_delete(&smi_info->si_timer)) smi_info->timer_running = false; } @@ -1839,7 +1839,7 @@ static inline void stop_timer_and_thread(struct smi_info *smi_info) } smi_info->timer_can_start = false; - del_timer_sync(&smi_info->si_timer); + timer_delete_sync(&smi_info->si_timer); } static struct smi_info *find_dup_si(struct smi_info *info) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 506d9988721e..0b45b07dec22 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -599,7 +599,7 @@ static void ssif_alert(struct i2c_client *client, enum i2c_alert_protocol type, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); if (ssif_info->waiting_alert) { ssif_info->waiting_alert = false; - del_timer(&ssif_info->retry_timer); + timer_delete(&ssif_info->retry_timer); do_get = true; } else if (ssif_info->curr_msg) { ssif_info->got_alert = true; @@ -1268,8 +1268,8 @@ static void shutdown_ssif(void *send_info) schedule_timeout(1); ssif_info->stopping = true; - del_timer_sync(&ssif_info->watch_timer); - del_timer_sync(&ssif_info->retry_timer); + timer_delete_sync(&ssif_info->watch_timer); + timer_delete_sync(&ssif_info->retry_timer); if (ssif_info->thread) { complete(&ssif_info->wake_thread); kthread_stop(ssif_info->thread); diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c index c03bc1ec593a..a13a3470c17a 100644 --- a/drivers/char/ipmi/kcs_bmc_aspeed.c +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c @@ -428,7 +428,7 @@ static void aspeed_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask, if (rc == -ETIMEDOUT) mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD); } else { - del_timer(&priv->obe.timer); + timer_delete(&priv->obe.timer); } } @@ -655,7 +655,7 @@ static void aspeed_kcs_remove(struct platform_device *pdev) spin_lock_irq(&priv->obe.lock); priv->obe.remove = true; spin_unlock_irq(&priv->obe.lock); - del_timer_sync(&priv->obe.timer); + timer_delete_sync(&priv->obe.timer); } static const struct of_device_id ast_kcs_bmc_match[] = { diff --git a/drivers/char/ipmi/ssif_bmc.c b/drivers/char/ipmi/ssif_bmc.c index 310f17dd9511..e4bd74585d4d 100644 --- a/drivers/char/ipmi/ssif_bmc.c +++ b/drivers/char/ipmi/ssif_bmc.c @@ -209,7 +209,7 @@ static ssize_t ssif_bmc_write(struct file *file, const char __user *buf, size_t if (ret) goto exit; - del_timer(&ssif_bmc->response_timer); + timer_delete(&ssif_bmc->response_timer); ssif_bmc->response_timer_inited = false; memcpy(&ssif_bmc->response, &msg, count); diff --git a/drivers/char/random.c b/drivers/char/random.c index 92cbd24a36d8..38f2fab29c56 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1352,7 +1352,7 @@ static void __cold try_to_generate_entropy(void) } mix_pool_bytes(&stack->entropy, sizeof(stack->entropy)); - del_timer_sync(&stack->timer); + timer_delete_sync(&stack->timer); destroy_timer_on_stack(&stack->timer); } diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 6c1d94eda5a2..b381ea7e85d2 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -838,7 +838,7 @@ static void __exit tlclk_cleanup(void) unregister_chrdev(tlclk_major, "telco_clock"); release_region(TLCLK_BASE, 8); - del_timer_sync(&switchover_timer); + timer_delete_sync(&switchover_timer); kfree(alarm_events); } @@ -856,7 +856,7 @@ static void switchover_timeout(struct timer_list *unused) } /* Alarm processing is done, wake up read task */ - del_timer(&switchover_timer); + timer_delete(&switchover_timer); got_event = 1; wake_up(&wq); } diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 48ff87444f85..11deaf538e87 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -160,7 +160,7 @@ ssize_t tpm_common_read(struct file *file, char __user *buf, out: if (!priv->response_length) { *off = 0; - del_timer_sync(&priv->user_read_timer); + timer_delete_sync(&priv->user_read_timer); flush_work(&priv->timeout_work); } mutex_unlock(&priv->buffer_mutex); @@ -267,7 +267,7 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait) void tpm_common_release(struct file *file, struct file_priv *priv) { flush_work(&priv->async_work); - del_timer_sync(&priv->user_read_timer); + timer_delete_sync(&priv->user_read_timer); flush_work(&priv->timeout_work); file->private_data = NULL; priv->response_length = 0; diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 05ae9122823f..da17d891f0e5 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -418,9 +418,9 @@ static int waveform_ai_cancel(struct comedi_device *dev, spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ - del_timer(&devpriv->ai_timer); + timer_delete(&devpriv->ai_timer); } else { - del_timer_sync(&devpriv->ai_timer); + timer_delete_sync(&devpriv->ai_timer); } return 0; } @@ -628,9 +628,9 @@ static int waveform_ao_cancel(struct comedi_device *dev, spin_unlock_bh(&dev->spinlock); if (in_softirq()) { /* Assume we were called from the timer routine itself. */ - del_timer(&devpriv->ao_timer); + timer_delete(&devpriv->ao_timer); } else { - del_timer_sync(&devpriv->ao_timer); + timer_delete_sync(&devpriv->ao_timer); } return 0; } @@ -791,8 +791,8 @@ static void waveform_detach(struct comedi_device *dev) struct waveform_private *devpriv = dev->private; if (devpriv) { - del_timer_sync(&devpriv->ai_timer); - del_timer_sync(&devpriv->ao_timer); + timer_delete_sync(&devpriv->ai_timer); + timer_delete_sync(&devpriv->ao_timer); } } diff --git a/drivers/comedi/drivers/das16.c b/drivers/comedi/drivers/das16.c index 4ed56a02150e..f5ca6c0d4d0c 100644 --- a/drivers/comedi/drivers/das16.c +++ b/drivers/comedi/drivers/das16.c @@ -775,7 +775,7 @@ static int das16_cancel(struct comedi_device *dev, struct comedi_subdevice *s) /* disable SW timer */ if (devpriv->timer_running) { devpriv->timer_running = 0; - del_timer(&devpriv->timer); + timer_delete(&devpriv->timer); } if (devpriv->can_burst) @@ -940,7 +940,7 @@ static void das16_free_dma(struct comedi_device *dev) struct das16_private_struct *devpriv = dev->private; if (devpriv) { - del_timer_sync(&devpriv->timer); + timer_delete_sync(&devpriv->timer); comedi_isadma_free(devpriv->dma); } } diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index 951c23fa0369..cdc842b32bab 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -758,7 +758,7 @@ static void jr3_pci_detach(struct comedi_device *dev) struct jr3_pci_dev_private *devpriv = dev->private; if (devpriv) - del_timer_sync(&devpriv->timer); + timer_delete_sync(&devpriv->timer); comedi_pci_detach(dev); } diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 6094c530bf57..afe5abf89d33 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -802,7 +802,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (gpstate_idx != new_index) queue_gpstate_timer(gpstates); else - del_timer_sync(&gpstates->timer); + timer_delete_sync(&gpstates->timer); gpstates_done: freq_data.gpstate_id = idx_to_pstate(gpstate_idx); @@ -880,7 +880,7 @@ static void powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); if (gpstates) - del_timer_sync(&gpstates->timer); + timer_delete_sync(&gpstates->timer); kfree(policy->driver_data); } diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 500b08e42282..f8d50bd227a6 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -2067,7 +2067,7 @@ static void artpec6_crypto_process_queue(struct artpec6_crypto *ac, if (ac->pending_count) mod_timer(&ac->timer, jiffies + msecs_to_jiffies(100)); else - del_timer(&ac->timer); + timer_delete(&ac->timer); } static void artpec6_crypto_timeout(struct timer_list *t) @@ -2963,7 +2963,7 @@ static void artpec6_crypto_remove(struct platform_device *pdev) tasklet_disable(&ac->task); devm_free_irq(&pdev->dev, irq, ac); tasklet_kill(&ac->task); - del_timer_sync(&ac->timer); + timer_delete_sync(&ac->timer); artpec6_crypto_disable_hw(ac); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index cf2ce3744ce6..9f80a45498f0 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -412,7 +412,7 @@ static int test_wait_timeout(void *arg) err = 0; err_free: - del_timer_sync(&wt.timer); + timer_delete_sync(&wt.timer); destroy_timer_on_stack(&wt.timer); dma_fence_signal(wt.f); dma_fence_put(wt.f); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index de8d7070904e..b96cc0a83872 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -324,7 +324,7 @@ static void imxdma_disable_hw(struct imxdma_channel *imxdmac) dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel); if (imxdma_hw_chain(imxdmac)) - del_timer(&imxdmac->watchdog); + timer_delete(&imxdmac->watchdog); local_irq_save(flags); imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) | @@ -454,7 +454,7 @@ static void dma_irq_handle_channel(struct imxdma_channel *imxdmac) } if (imxdma_hw_chain(imxdmac)) { - del_timer(&imxdmac->watchdog); + timer_delete(&imxdmac->watchdog); return; } } diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 79d8957f9e60..06a813cc7641 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -159,7 +159,7 @@ void ioat_stop(struct ioatdma_chan *ioat_chan) } /* flush inflight timers */ - del_timer_sync(&ioat_chan->timer); + timer_delete_sync(&ioat_chan->timer); /* flush inflight tasklet runs */ tasklet_kill(&ioat_chan->cleanup_task); diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index cc9ddd6c325b..02f68b328511 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1224,12 +1224,12 @@ static void ioat_shutdown(struct pci_dev *pdev) set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); /* - * Synchronization rule for del_timer_sync(): + * Synchronization rule for timer_delete_sync(): * - The caller must not hold locks which would prevent * completion of the timer's handler. * So prep_lock cannot be held before calling it. */ - del_timer_sync(&ioat_chan->timer); + timer_delete_sync(&ioat_chan->timer); /* this should quiesce then reset */ ioat_reset_hw(ioat_chan); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index e141d24a7644..b0f9ef6ac6df 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -39,7 +39,7 @@ static int try_cancel_split_timeout(struct fw_transaction *t) { if (t->is_split_transaction) - return del_timer(&t->split_timeout_timer); + return timer_delete(&t->split_timeout_timer); else return 1; } diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index 116eb465cdb4..b662b7e28b80 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -342,7 +342,7 @@ static int suspend_test_thread(void *arg) * Disable the timer to make sure that the timer will not trigger * later. */ - del_timer(&wakeup_timer); + timer_delete(&wakeup_timer); destroy_timer_on_stack(&wakeup_timer); if (atomic_dec_return_relaxed(&nb_active_threads) == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2f24a6aa13bf..5f5c00ace96b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -280,7 +280,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); - if (del_timer(&ring->fence_drv.fallback_timer) && + if (timer_delete(&ring->fence_drv.fallback_timer) && seq != ring->fence_drv.sync_seq) amdgpu_fence_schedule_fallback(ring); @@ -618,7 +618,7 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - del_timer_sync(&ring->fence_drv.fallback_timer); + timer_delete_sync(&ring->fence_drv.fallback_timer); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 85f774063f9b..fb212f0a1136 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1239,7 +1239,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); - del_timer_sync(&ring->fence_drv.fallback_timer); + timer_delete_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index 1c66da1c3fb4..03ed14663107 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -124,7 +124,7 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) } } - del_timer(&mux->resubmit_timer); + timer_delete(&mux->resubmit_timer); mux->s_resubmit = false; } diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index ebc758c72891..20658258fb51 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1763,7 +1763,7 @@ static void tda998x_destroy(struct device *dev) if (priv->hdmi->irq) free_irq(priv->hdmi->irq, priv); - del_timer_sync(&priv->edid_delay_timer); + timer_delete_sync(&priv->edid_delay_timer); cancel_work_sync(&priv->detect_work); i2c_unregister_device(priv->cec); diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 94e45ed6869d..78958ddf8485 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -508,7 +508,7 @@ static void drm_vblank_init_release(struct drm_device *dev, void *ptr) drm_core_check_feature(dev, DRIVER_MODESET)); drm_vblank_destroy_worker(vblank); - del_timer_sync(&vblank->disable_timer); + timer_delete_sync(&vblank->disable_timer); } /** diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index fd388b1dbe68..08cf79a62025 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -427,7 +427,7 @@ static void vidi_unbind(struct device *dev, struct device *master, void *data) { struct vidi_context *ctx = dev_get_drvdata(dev); - del_timer_sync(&ctx->timer); + timer_delete_sync(&ctx->timer); } static const struct component_ops vidi_component_ops = { diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index e163649816d5..77cfcf37ddd2 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -254,7 +254,7 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len) usb_sg_wait(&ctx.sgr); - if (!del_timer_sync(&ctx.timer)) + if (!timer_delete_sync(&ctx.timer)) ret = -ETIMEDOUT; else if (ctx.sgr.status < 0) ret = ctx.sgr.status; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4a80ffa1b962..03baa7fa0a27 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2502,7 +2502,7 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); ENGINE_TRACE(engine, "semaphore yield: %08x\n", engine->execlists.yield); - if (del_timer(&engine->execlists.timer)) + if (timer_delete(&engine->execlists.timer)) tasklet = true; } @@ -3370,8 +3370,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) static void execlists_shutdown(struct intel_engine_cs *engine) { /* Synchronise with residual timers and any softirq they raise */ - del_timer_sync(&engine->execlists.timer); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.timer); + timer_delete_sync(&engine->execlists.preempt); tasklet_kill(&engine->sched_engine->tasklet); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 2cfaedb04876..64e9317f58fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -161,7 +161,7 @@ static void rps_start_timer(struct intel_rps *rps) static void rps_stop_timer(struct intel_rps *rps) { - del_timer_sync(&rps->timer); + timer_delete_sync(&rps->timer); rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); cancel_work_sync(&rps->work); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index c0637bf799a3..64315b714743 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -297,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) struct i915_request *rq; unsigned long flags; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irqsave(&engine->sched_engine->lock, flags); @@ -432,7 +432,7 @@ void mock_engine_flush(struct intel_engine_cs *engine) container_of(engine, typeof(*mock), base); struct i915_request *request, *rn; - del_timer_sync(&mock->hw_delay); + timer_delete_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index d7717de17ecc..0454eb1814bb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1198,7 +1198,7 @@ static int live_timeslice_rewind(void *arg) ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); + timer_delete(&engine->execlists.timer); tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } @@ -2357,7 +2357,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) /* force preempt reset [failure] */ while (!engine->execlists.pending[0]) intel_engine_flush_submission(engine); - del_timer_sync(&engine->execlists.preempt); + timer_delete_sync(&engine->execlists.preempt); intel_engine_flush_submission(engine); cancel_reset_timeout(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 1bf7b88d9a9d..401bee030dbc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -660,7 +660,7 @@ static int live_emit_pte_full_ring(void *arg) out_rq: i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ - del_timer_sync(&st.timer); + timer_delete_sync(&st.timer); destroy_timer_on_stack(&st.timer); out_unpin: intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 2576f8f6c0f6..b60c28fbd207 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -52,7 +52,7 @@ void cancel_timer(struct timer_list *t) if (!timer_active(t)) return; - del_timer(t); + timer_delete(t); WRITE_ONCE(t->expires, 0); } diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 87f246047312..07e81be4d392 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -163,7 +163,7 @@ void intel_wakeref_auto(struct intel_wakeref_auto *wf, unsigned long timeout) unsigned long flags; if (!timeout) { - if (del_timer_sync(&wf->timer)) + if (timer_delete_sync(&wf->timer)) wakeref_auto_timeout(&wf->timer); return; } diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index bf2752cc1e0b..d5ecc68155da 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -74,7 +74,7 @@ void timed_fence_init(struct timed_fence *tf, unsigned long expires) void timed_fence_fini(struct timed_fence *tf) { - if (del_timer_sync(&tf->timer)) + if (timer_delete_sync(&tf->timer)) i915_sw_fence_commit(&tf->fence); destroy_timer_on_stack(&tf->timer); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index ccdc57cef3ea..fed3307d3374 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2847,7 +2847,7 @@ static void mtk_dp_remove(struct platform_device *pdev) pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); if (mtk_dp->data->bridge_type != DRM_MODE_CONNECTOR_eDP) - del_timer_sync(&mtk_dp->debounce_timer); + timer_delete_sync(&mtk_dp->debounce_timer); platform_device_unregister(mtk_dp->phy_dev); if (mtk_dp->audio_pdev) platform_device_unregister(mtk_dp->audio_pdev); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 71dca78cd7a5..650e5bac225f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1253,7 +1253,7 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 0469fea55010..36f72c43eae8 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -182,7 +182,7 @@ void a5xx_preempt_irq(struct msm_gpu *gpu) return; /* Delete the preemption watchdog timer */ - del_timer(&a5xx_gpu->preempt_timer); + timer_delete(&a5xx_gpu->preempt_timer); /* * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 38c94915d4c9..c8711938a5f4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -28,7 +28,7 @@ static void a6xx_gmu_fault(struct a6xx_gmu *gmu) gmu->hung = true; /* Turn off the hangcheck timer while we are resetting */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); /* Queue the GPU handler because we need to treat this as a recovery */ kthread_queue_work(gpu->worker, &gpu->recover_work); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 1820c167fcee..06465bc2d0b4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1706,7 +1706,7 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1726,7 +1726,7 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) */ if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index 2fd4e39f618f..9b5e27d2373c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -146,7 +146,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) return; /* Delete the preemption watchdog timer */ - del_timer(&a6xx_gpu->preempt_timer); + timer_delete(&a6xx_gpu->preempt_timer); /* * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 7156cda07b03..26db1f4b5fb9 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -292,7 +292,7 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, if (do_devcoredump) { /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); gpu->fault_info.ttbr0 = info->ttbr0; gpu->fault_info.iova = iova; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 284e69bb47c1..8610bbf2b87c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1410,7 +1410,7 @@ static void dpu_encoder_virt_atomic_disable(struct drm_encoder *drm_enc, /* after phys waits for frame-done, should be no more frames pending */ if (atomic_xchg(&dpu_enc->frame_done_timeout_ms, 0)) { DPU_ERROR("enc%d timeout pending\n", drm_enc->base.id); - del_timer_sync(&dpu_enc->frame_done_timer); + timer_delete_sync(&dpu_enc->frame_done_timer); } dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_STOP); @@ -1582,7 +1582,7 @@ void dpu_encoder_frame_done_callback( if (!dpu_enc->frame_busy_mask[0]) { atomic_set(&dpu_enc->frame_done_timeout_ms, 0); - del_timer(&dpu_enc->frame_done_timer); + timer_delete(&dpu_enc->frame_done_timer); dpu_encoder_resource_control(drm_enc, DPU_ENC_RC_EVENT_FRAME_DONE); diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 59d20eb8a7e0..9b9cc593790c 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -452,7 +452,7 @@ static irqreturn_t omap_dsi_irq_handler(int irq, void *arg) #ifdef DSI_CATCH_MISSING_TE if (irqstatus & DSI_IRQ_TE_TRIGGER) - del_timer(&dsi->te_timer); + timer_delete(&dsi->te_timer); #endif /* make a copy and unlock, so that isrs can unregister diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index fb450b6a4d44..7125773889f1 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -1043,7 +1043,7 @@ static void vc4_bo_cache_destroy(struct drm_device *dev, void *unused) struct vc4_dev *vc4 = to_vc4_dev(dev); int i; - del_timer(&vc4->bo_cache.time_timer); + timer_delete(&vc4->bo_cache.time_timer); cancel_work_sync(&vc4->bo_cache.time_work); vc4_bo_cache_purge(dev); diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index e15754178395..37bb1fb58cf9 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -49,7 +49,7 @@ static void vgem_fence_release(struct dma_fence *base) { struct vgem_fence *fence = container_of(base, typeof(*fence), base); - del_timer_sync(&fence->timer); + timer_delete_sync(&fence->timer); dma_fence_free(&fence->base); } diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 9fbed1a2fcc6..788f56b066b6 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -297,7 +297,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, void xe_execlist_port_destroy(struct xe_execlist_port *port) { - del_timer(&port->irq_fail); + timer_delete(&port->irq_fail); /* Prevent an interrupt while we're destroying */ spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); diff --git a/drivers/greybus/operation.c b/drivers/greybus/operation.c index 8459e9bc0749..f6beeebf974c 100644 --- a/drivers/greybus/operation.c +++ b/drivers/greybus/operation.c @@ -279,7 +279,7 @@ static void gb_operation_work(struct work_struct *work) if (gb_operation_is_incoming(operation)) { gb_operation_request_handle(operation); } else { - ret = del_timer_sync(&operation->timer); + ret = timer_delete_sync(&operation->timer); if (!ret) { /* Cancel request message if scheduled by timeout. */ if (gb_operation_result(operation) == -ETIMEDOUT) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d900dd05c335..ed34f5cd5a91 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -950,7 +950,7 @@ static int apple_probe(struct hid_device *hdev, return 0; out_err: - del_timer_sync(&asc->battery_timer); + timer_delete_sync(&asc->battery_timer); hid_hw_stop(hdev); return ret; } @@ -959,7 +959,7 @@ static void apple_remove(struct hid_device *hdev) { struct apple_sc *asc = hid_get_drvdata(hdev); - del_timer_sync(&asc->battery_timer); + timer_delete_sync(&asc->battery_timer); hid_hw_stop(hdev); } diff --git a/drivers/hid/hid-appleir.c b/drivers/hid/hid-appleir.c index c45e5aa569d2..bb7db9ae41c2 100644 --- a/drivers/hid/hid-appleir.c +++ b/drivers/hid/hid-appleir.c @@ -319,7 +319,7 @@ static void appleir_remove(struct hid_device *hid) { struct appleir *appleir = hid_get_drvdata(hid); hid_hw_stop(hid); - del_timer_sync(&appleir->key_up_timer); + timer_delete_sync(&appleir->key_up_timer); } static const struct hid_device_id appleir_devices[] = { diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index d4b95aa3eecb..029ccbaa1d12 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -448,7 +448,7 @@ static void appletb_kbd_remove(struct hid_device *hdev) appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); input_unregister_handler(&kbd->inp_handler); - del_timer_sync(&kbd->inactivity_timer); + timer_delete_sync(&kbd->inactivity_timer); hid_hw_close(hdev); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index a76f17158539..adfa329e917b 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -915,7 +915,7 @@ static int magicmouse_probe(struct hid_device *hdev, return 0; err_stop_hw: - del_timer_sync(&msc->battery_timer); + timer_delete_sync(&msc->battery_timer); hid_hw_stop(hdev); return ret; } @@ -926,7 +926,7 @@ static void magicmouse_remove(struct hid_device *hdev) if (msc) { cancel_delayed_work_sync(&msc->work); - del_timer_sync(&msc->battery_timer); + timer_delete_sync(&msc->battery_timer); } hid_hw_stop(hdev); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e50887a6d22c..7ac8e16e6158 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1299,7 +1299,7 @@ static void mt_touch_report(struct hid_device *hid, mod_timer(&td->release_timer, jiffies + msecs_to_jiffies(100)); else - del_timer(&td->release_timer); + timer_delete(&td->release_timer); } clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); @@ -1881,7 +1881,7 @@ static void mt_remove(struct hid_device *hdev) { struct mt_device *td = hid_get_drvdata(hdev); - del_timer_sync(&td->release_timer); + timer_delete_sync(&td->release_timer); sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index ff9078ad1961..b0c40a245bf8 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -1102,7 +1102,7 @@ static void shield_remove(struct hid_device *hdev) hid_hw_close(hdev); thunderstrike_destroy(ts); - del_timer_sync(&ts->psy_stats_timer); + timer_delete_sync(&ts->psy_stats_timer); cancel_work_sync(&ts->hostcmd_req_work); hid_hw_stop(hdev); } diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 3d08c190a935..c6b922c2adba 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -254,7 +254,7 @@ static void stop_sustain_timers(struct pcmidi_snd *pm) for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) { pms = &pm->sustained_notes[i]; pms->in_use = 1; - del_timer_sync(&pms->timer); + timer_delete_sync(&pms->timer); } } diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 5258b45684e8..a2be652b7bbd 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2164,7 +2164,7 @@ static void sony_remove(struct hid_device *hdev) struct sony_sc *sc = hid_get_drvdata(hdev); if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { - del_timer_sync(&sc->ghl_poke_timer); + timer_delete_sync(&sc->ghl_poke_timer); usb_free_urb(sc->ghl_urb); } diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index d8008933c052..a367df6ea01f 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -474,7 +474,7 @@ static void uclogic_remove(struct hid_device *hdev) { struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); - del_timer_sync(&drvdata->inrange_timer); + timer_delete_sync(&drvdata->inrange_timer); hid_hw_stop(hdev); kfree(drvdata->desc_ptr); uclogic_params_cleanup(&drvdata->params); diff --git a/drivers/hid/hid-wiimote-core.c b/drivers/hid/hid-wiimote-core.c index 26167cfb696f..8080083121d3 100644 --- a/drivers/hid/hid-wiimote-core.c +++ b/drivers/hid/hid-wiimote-core.c @@ -1171,7 +1171,7 @@ static void wiimote_init_hotplug(struct wiimote_data *wdata) wiimote_cmd_release(wdata); /* delete MP hotplug timer */ - del_timer_sync(&wdata->timer); + timer_delete_sync(&wdata->timer); } else { /* reschedule MP hotplug timer */ if (!(flags & WIIPROTO_FLAG_BUILTIN_MP) && diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 44c2351b870f..7d9297fad90e 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1462,13 +1462,13 @@ static void usbhid_disconnect(struct usb_interface *intf) static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid) { - del_timer_sync(&usbhid->io_retry); + timer_delete_sync(&usbhid->io_retry); cancel_work_sync(&usbhid->reset_work); } static void hid_cease_io(struct usbhid_device *usbhid) { - del_timer_sync(&usbhid->io_retry); + timer_delete_sync(&usbhid->io_retry); usb_kill_urb(usbhid->urbin); usb_kill_urb(usbhid->urbctrl); usb_kill_urb(usbhid->urbout); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 97393a3083ca..1556d4287fa5 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2896,7 +2896,7 @@ static void wacom_remove(struct hid_device *hdev) cancel_work_sync(&wacom->battery_work); cancel_work_sync(&wacom->remote_work); cancel_work_sync(&wacom->mode_change_work); - del_timer_sync(&wacom->idleprox_timer); + timer_delete_sync(&wacom->idleprox_timer); if (hdev->bus == BUS_BLUETOOTH) device_remove_file(&hdev->dev, &dev_attr_speed); diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 6105ea9a6c6a..cbc5e72857de 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -281,9 +281,9 @@ static void ssip_set_rxstate(struct ssi_protocol *ssi, unsigned int state) ssi->recv_state = state; switch (state) { case RECV_IDLE: - del_timer(&ssi->rx_wd); + timer_delete(&ssi->rx_wd); if (ssi->send_state == SEND_IDLE) - del_timer(&ssi->keep_alive); + timer_delete(&ssi->keep_alive); break; case RECV_READY: /* CMT speech workaround */ @@ -306,9 +306,9 @@ static void ssip_set_txstate(struct ssi_protocol *ssi, unsigned int state) switch (state) { case SEND_IDLE: case SEND_READY: - del_timer(&ssi->tx_wd); + timer_delete(&ssi->tx_wd); if (ssi->recv_state == RECV_IDLE) - del_timer(&ssi->keep_alive); + timer_delete(&ssi->keep_alive); break; case WAIT4READY: case SENDING: @@ -398,9 +398,9 @@ static void ssip_reset(struct hsi_client *cl) if (test_and_clear_bit(SSIP_WAKETEST_FLAG, &ssi->flags)) ssi_waketest(cl, 0); /* FIXME: To be removed */ spin_lock_bh(&ssi->lock); - del_timer(&ssi->rx_wd); - del_timer(&ssi->tx_wd); - del_timer(&ssi->keep_alive); + timer_delete(&ssi->rx_wd); + timer_delete(&ssi->tx_wd); + timer_delete(&ssi->keep_alive); cancel_work_sync(&ssi->work); ssi->main_state = 0; ssi->send_state = 0; @@ -648,7 +648,7 @@ static void ssip_rx_data_complete(struct hsi_msg *msg) ssip_error(cl); return; } - del_timer(&ssi->rx_wd); /* FIXME: Revisit */ + timer_delete(&ssi->rx_wd); /* FIXME: Revisit */ skb = msg->context; ssip_pn_rx(skb); hsi_free_msg(msg); @@ -731,7 +731,7 @@ static void ssip_rx_waketest(struct hsi_client *cl, u32 cmd) spin_lock_bh(&ssi->lock); ssi->main_state = ACTIVE; - del_timer(&ssi->tx_wd); /* Stop boot handshake timer */ + timer_delete(&ssi->tx_wd); /* Stop boot handshake timer */ spin_unlock_bh(&ssi->lock); dev_notice(&cl->device, "WAKELINES TEST %s\n", diff --git a/drivers/hte/hte-tegra194-test.c b/drivers/hte/hte-tegra194-test.c index f890b79723af..94e931f45305 100644 --- a/drivers/hte/hte-tegra194-test.c +++ b/drivers/hte/hte-tegra194-test.c @@ -221,7 +221,7 @@ static void tegra_hte_test_remove(struct platform_device *pdev) free_irq(hte.gpio_in_irq, &hte); gpiod_put(hte.gpio_in); gpiod_put(hte.gpio_out); - del_timer_sync(&hte.timer); + timer_delete_sync(&hte.timer); } static struct platform_driver tegra_hte_test_driver = { diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 579d31bb9ac7..d506a5e7e033 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -483,7 +483,7 @@ static void pwm_fan_cleanup(void *__ctx) { struct pwm_fan_ctx *ctx = __ctx; - del_timer_sync(&ctx->rpm_timer); + timer_delete_sync(&ctx->rpm_timer); /* Switch off everything */ ctx->enable_mode = pwm_disable_reg_disable; pwm_fan_power_off(ctx, true); diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 02f75cf310aa..5d8b82ae6fd6 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1122,7 +1122,7 @@ static int img_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, time_left = wait_for_completion_timeout(&i2c->msg_complete, IMG_I2C_TIMEOUT); - del_timer_sync(&i2c->check_timer); + timer_delete_sync(&i2c->check_timer); if (time_left == 0) i2c->msg_status = -ETIMEDOUT; diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c index 65f8a2b13cfd..22ea10eb48ae 100644 --- a/drivers/iio/common/ssp_sensors/ssp_dev.c +++ b/drivers/iio/common/ssp_sensors/ssp_dev.c @@ -190,7 +190,7 @@ static void ssp_enable_wdt_timer(struct ssp_data *data) static void ssp_disable_wdt_timer(struct ssp_data *data) { - del_timer_sync(&data->wdt_timer); + timer_delete_sync(&data->wdt_timer); cancel_work_sync(&data->work_wdt); } @@ -589,7 +589,7 @@ static void ssp_remove(struct spi_device *spi) free_irq(data->spi->irq, data); - del_timer_sync(&data->wdt_timer); + timer_delete_sync(&data->wdt_timer); cancel_work_sync(&data->work_wdt); mutex_destroy(&data->comm_lock); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8d753e6e0c71..e02721a9e288 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -191,7 +191,7 @@ static void start_ep_timer(struct c4iw_ep *ep) static int stop_ep_timer(struct c4iw_ep *ep) { pr_debug("ep %p stopping\n", ep); - del_timer_sync(&ep->timer); + timer_delete_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); return 0; diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c index a3c53be4072c..9b508eaf441d 100644 --- a/drivers/infiniband/hw/hfi1/aspm.c +++ b/drivers/infiniband/hw/hfi1/aspm.c @@ -191,7 +191,7 @@ void aspm_disable_all(struct hfi1_devdata *dd) for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = hfi1_rcd_get_by_index(dd, i); if (rcd) { - del_timer_sync(&rcd->aspm_timer); + timer_delete_sync(&rcd->aspm_timer); spin_lock_irqsave(&rcd->aspm_lock, flags); rcd->aspm_intr_enable = false; spin_unlock_irqrestore(&rcd->aspm_lock, flags); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 368b6be3226f..e908f529335d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5576,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd) static void free_rcverr(struct hfi1_devdata *dd) { if (dd->rcverr_timer.function) - del_timer_sync(&dd->rcverr_timer); + timer_delete_sync(&dd->rcverr_timer); } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -12308,7 +12308,7 @@ static void free_cntrs(struct hfi1_devdata *dd) int i; if (dd->synth_stats_timer.function) - del_timer_sync(&dd->synth_stats_timer); + timer_delete_sync(&dd->synth_stats_timer); cancel_work_sync(&dd->update_cntr_work); ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 50826e7cdb7e..3da90f2eb8e7 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1303,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) */ smp_rmb(); if (atomic_read(&ppd->led_override_timer_active)) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); /* Ensure the atomic_set is visible to all CPUs */ smp_wmb(); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index d6fbd9c2b8b4..b35f92e7d865 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -985,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (ppd->led_override_timer.function) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b67d23b1f286..0d2b39b7c8b5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1575,7 +1575,7 @@ void sdma_exit(struct hfi1_devdata *dd) sde->this_idx); sdma_process_event(sde, sdma_event_e00_go_hw_down); - del_timer_sync(&sde->err_progress_check_timer); + timer_delete_sync(&sde->err_progress_check_timer); /* * This waits for the state machine to exit so it is not diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index c465966a1d9c..78bf4a48c035 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3965,7 +3965,7 @@ static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) { - rval = del_timer(&qpriv->s_tid_timer); + rval = timer_delete(&qpriv->s_tid_timer); qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER; } return rval; @@ -3975,7 +3975,7 @@ void hfi1_del_tid_reap_timer(struct rvt_qp *qp) { struct hfi1_qp_priv *qpriv = qp->priv; - del_timer_sync(&qpriv->s_tid_timer); + timer_delete_sync(&qpriv->s_tid_timer); qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER; } @@ -4781,7 +4781,7 @@ static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) { - rval = del_timer(&priv->s_tid_retry_timer); + rval = timer_delete(&priv->s_tid_retry_timer); priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER; } return rval; @@ -4791,7 +4791,7 @@ void hfi1_del_tid_retry_timer(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - del_timer_sync(&priv->s_tid_retry_timer); + timer_delete_sync(&priv->s_tid_retry_timer); priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 33af2196ef31..49e0f79b950c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1900,7 +1900,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) if (!list_empty(&dev->memwait)) dd_dev_err(dd, "memwait list not empty!\n"); - del_timer_sync(&dev->mem_timer); + timer_delete_sync(&dev->mem_timer); verbs_txreq_exit(dev); kfree(dev_cntr_descs); diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index ce8d821bdad8..23207f13ac1b 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3303,7 +3303,7 @@ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) if (!cm_core) return; - del_timer_sync(&cm_core->tcp_timer); + timer_delete_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index e73b14fd95ef..d66b4f7a84ec 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -963,7 +963,7 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) int ret; iwqp = qp->qp_uk.back_qp; - ret = del_timer(&iwqp->terminate_timer); + ret = timer_delete(&iwqp->terminate_timer); if (ret) irdma_qp_rem_ref(&iwqp->ibqp); } @@ -1570,7 +1570,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi) { struct irdma_vsi_pestat *devstat = vsi->pestat; - del_timer_sync(&devstat->stats_timer); + timer_delete_sync(&devstat->stats_timer); } /** diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b7c8c926c578..5fbebafc8774 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1026,7 +1026,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5r_destroy_cache_entries(dev); destroy_workqueue(dev->cache.wq); - del_timer_sync(&dev->delay_timer); + timer_delete_sync(&dev->delay_timer); } struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index ffb98eaaf1c2..6eabef9aa211 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -171,7 +171,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev) void mthca_stop_catas_poll(struct mthca_dev *dev) { - del_timer_sync(&dev->catas_err.timer); + timer_delete_sync(&dev->catas_err.timer); if (dev->catas_err.map) iounmap(dev->catas_err.map); diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 4fcbef99e400..bdd724add147 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -768,7 +768,7 @@ int qib_reset_device(int unit) ppd = dd->pport + pidx; if (atomic_read(&ppd->led_override_timer_active)) { /* Need to stop LED timer, _then_ shut off LEDs */ - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 78dfe98ebcf7..302c0d19f57d 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1656,7 +1656,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.function) /* if initted */ - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { @@ -2605,7 +2605,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * wait forpending timer, but don't clear .data (ppd)! */ if (ppd->cpspec->chase_timer.expires) { - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); ppd->cpspec->chase_timer.expires = 0; } break; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 9db29916e35a..7b4bf06c3b38 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2512,7 +2512,7 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.function) /* if initted */ - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); /* * Despite the name, actually disables IBC as well. Do it when @@ -4239,7 +4239,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * wait forpending timer, but don't clear .data (ppd)! */ if (ppd->cpspec->chase_timer.expires) { - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); ppd->cpspec->chase_timer.expires = 0; } break; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 4100656fe9a3..33c23adec101 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -796,19 +796,19 @@ static void qib_stop_timers(struct qib_devdata *dd) int pidx; if (dd->stats_timer.function) - del_timer_sync(&dd->stats_timer); + timer_delete_sync(&dd->stats_timer); if (dd->intrchk_timer.function) - del_timer_sync(&dd->intrchk_timer); + timer_delete_sync(&dd->intrchk_timer); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (ppd->hol_timer.function) - del_timer_sync(&ppd->hol_timer); + timer_delete_sync(&ppd->hol_timer); if (ppd->led_override_timer.function) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } if (ppd->symerr_clear_timer.function) - del_timer_sync(&ppd->symerr_clear_timer); + timer_delete_sync(&ppd->symerr_clear_timer); } } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ef02f2bfddb2..568deb77ab4d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2441,7 +2441,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) struct qib_devdata, verbs_dev); if (dd->pport[port_idx].cong_stats.timer.function) - del_timer_sync(&dd->pport[port_idx].cong_stats.timer); + timer_delete_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index 1dc3ccf0cf1f..c4ee120ac7fb 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1375,7 +1375,7 @@ void toggle_7220_rclkrls(struct qib_devdata *dd) void shutdown_7220_relock_poll(struct qib_devdata *dd) { if (dd->cspec->relock_timer_active) - del_timer_sync(&dd->cspec->relock_timer); + timer_delete_sync(&dd->cspec->relock_timer); } static unsigned qib_relock_by_timer = 1; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5fcb41970ad9..9832567a8bb8 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1655,7 +1655,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) if (!list_empty(&dev->memwait)) qib_dev_err(dd, "memwait list not empty!\n"); - del_timer_sync(&dev->mem_timer); + timer_delete_sync(&dev->mem_timer); while (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; struct qib_verbs_txreq *tx; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 614009fb9632..583debe4b9a2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1297,7 +1297,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); } if (qp->s_flags & RVT_S_ANY_WAIT_SEND) @@ -2546,7 +2546,7 @@ void rvt_stop_rc_timers(struct rvt_qp *qp) /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); hrtimer_try_to_cancel(&qp->s_rnr_timer); } } @@ -2575,7 +2575,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) */ void rvt_del_timers_sync(struct rvt_qp *qp) { - del_timer_sync(&qp->s_timer); + timer_delete_sync(&qp->s_timer); hrtimer_cancel(&qp->s_rnr_timer); } EXPORT_SYMBOL(rvt_del_timers_sync); @@ -2596,7 +2596,7 @@ static void rvt_rc_timeout(struct timer_list *t) qp->s_flags &= ~RVT_S_TIMER; rvp->n_rc_timeouts++; - del_timer(&qp->s_timer); + timer_delete(&qp->s_timer); trace_rvt_rc_timeout(qp, qp->s_last_psn + 1); if (rdi->driver_f.notify_restart_rc) rdi->driver_f.notify_restart_rc(qp, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 91d329e90308..7975fb0e2782 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -812,8 +812,8 @@ static void rxe_qp_do_cleanup(struct work_struct *work) qp->qp_timeout_jiffies = 0; if (qp_type(qp) == IB_QPT_RC) { - del_timer_sync(&qp->retrans_timer); - del_timer_sync(&qp->rnr_nak_timer); + timer_delete_sync(&qp->retrans_timer); + timer_delete_sync(&qp->rnr_nak_timer); } if (qp->recv_task.func) diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c index e9120ba6bae0..009822fa61b8 100644 --- a/drivers/input/ff-memless.c +++ b/drivers/input/ff-memless.c @@ -136,7 +136,7 @@ static void ml_schedule_timer(struct ml_device *ml) if (!events) { pr_debug("no actions\n"); - del_timer(&ml->timer); + timer_delete(&ml->timer); } else { pr_debug("timer set\n"); mod_timer(&ml->timer, earliest); @@ -489,7 +489,7 @@ static void ml_ff_destroy(struct ff_device *ff) * do not actually stop the timer, and therefore we should * do it here. */ - del_timer_sync(&ml->timer); + timer_delete_sync(&ml->timer); kfree(ml->private); } diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index 10cc95867415..ae51f108bfae 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -191,7 +191,7 @@ void gameport_stop_polling(struct gameport *gameport) spin_lock(&gameport->timer_lock); if (!--gameport->poll_cnt) - del_timer(&gameport->poll_timer); + timer_delete(&gameport->poll_timer); spin_unlock(&gameport->timer_lock); } @@ -847,7 +847,7 @@ EXPORT_SYMBOL(gameport_open); void gameport_close(struct gameport *gameport) { - del_timer_sync(&gameport->poll_timer); + timer_delete_sync(&gameport->poll_timer); gameport->poll_handler = NULL; gameport->poll_interval = 0; gameport_set_drv(gameport, NULL); diff --git a/drivers/input/input.c b/drivers/input/input.c index c9e3ac64bcd0..ec4346f20efd 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -96,7 +96,7 @@ static void input_start_autorepeat(struct input_dev *dev, int code) static void input_stop_autorepeat(struct input_dev *dev) { - del_timer(&dev->timer); + timer_delete(&dev->timer); } /* @@ -2223,7 +2223,7 @@ static void __input_unregister_device(struct input_dev *dev) handle->handler->disconnect(handle); WARN_ON(!list_empty(&dev->h_list)); - del_timer_sync(&dev->timer); + timer_delete_sync(&dev->timer); list_del_init(&dev->node); input_wakeup_procfs_readers(); diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c index a9f1946cf0d6..d7a253835889 100644 --- a/drivers/input/joystick/db9.c +++ b/drivers/input/joystick/db9.c @@ -531,7 +531,7 @@ static void db9_close(struct input_dev *dev) guard(mutex)(&db9->mutex); if (!--db9->used) { - del_timer_sync(&db9->timer); + timer_delete_sync(&db9->timer); parport_write_control(port, 0x00); parport_data_forward(port); parport_release(db9->pd); diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c index b53cafd7a5ee..9fc629ad58b8 100644 --- a/drivers/input/joystick/gamecon.c +++ b/drivers/input/joystick/gamecon.c @@ -786,7 +786,7 @@ static void gc_close(struct input_dev *dev) guard(mutex)(&gc->mutex); if (!--gc->used) { - del_timer_sync(&gc->timer); + timer_delete_sync(&gc->timer); parport_write_control(gc->pd->port, 0x00); parport_release(gc->pd); } diff --git a/drivers/input/joystick/n64joy.c b/drivers/input/joystick/n64joy.c index c344dbc0c493..94d2f4e96fe6 100644 --- a/drivers/input/joystick/n64joy.c +++ b/drivers/input/joystick/n64joy.c @@ -216,7 +216,7 @@ static void n64joy_close(struct input_dev *dev) guard(mutex)(&priv->n64joy_mutex); if (!--priv->n64joy_opened) - del_timer_sync(&priv->timer); + timer_delete_sync(&priv->timer); } static const u64 __initconst scandata[] ____cacheline_aligned = { diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c index db696ba61a3b..aa3e7d471b96 100644 --- a/drivers/input/joystick/turbografx.c +++ b/drivers/input/joystick/turbografx.c @@ -124,7 +124,7 @@ static void tgfx_close(struct input_dev *dev) guard(mutex)(&tgfx->sem); if (!--tgfx->used) { - del_timer_sync(&tgfx->timer); + timer_delete_sync(&tgfx->timer); parport_write_control(tgfx->pd->port, 0x00); parport_release(tgfx->pd); } diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c index b92268ddfd84..3cd47fa44efc 100644 --- a/drivers/input/keyboard/imx_keypad.c +++ b/drivers/input/keyboard/imx_keypad.c @@ -370,7 +370,7 @@ static void imx_keypad_close(struct input_dev *dev) /* Mark keypad as being inactive */ keypad->enabled = false; synchronize_irq(keypad->irq); - del_timer_sync(&keypad->check_matrix_timer); + timer_delete_sync(&keypad->check_matrix_timer); imx_keypad_inhibit(keypad); diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index f7b5f1e25c80..bbf409dda89f 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -104,7 +104,7 @@ static void imx_snvs_pwrkey_act(void *pdata) { struct pwrkey_drv_data *pd = pdata; - del_timer_sync(&pd->check_timer); + timer_delete_sync(&pd->check_timer); } static int imx_snvs_pwrkey_probe(struct platform_device *pdev) diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 6776dd94ce76..32a676f0de53 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -416,7 +416,7 @@ static void tegra_kbc_stop(struct tegra_kbc *kbc) } disable_irq(kbc->irq); - del_timer_sync(&kbc->timer); + timer_delete_sync(&kbc->timer); clk_disable_unprepare(kbc->clk); } @@ -703,7 +703,7 @@ static int tegra_kbc_suspend(struct device *dev) if (device_may_wakeup(&pdev->dev)) { disable_irq(kbc->irq); - del_timer_sync(&kbc->timer); + timer_delete_sync(&kbc->timer); tegra_kbc_set_fifo_interrupt(kbc, false); /* Forcefully clear the interrupt status */ diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 0728b5c08f02..0bd7b09b0aa3 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1519,7 +1519,7 @@ static psmouse_ret_t alps_handle_interleaved_ps2(struct psmouse *psmouse) return PSMOUSE_GOOD_DATA; } - del_timer(&priv->timer); + timer_delete(&priv->timer); if (psmouse->packet[6] & 0x80) { diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c index 654b38d249f3..4ee084e00a7c 100644 --- a/drivers/input/mouse/byd.c +++ b/drivers/input/mouse/byd.c @@ -425,7 +425,7 @@ static void byd_disconnect(struct psmouse *psmouse) struct byd_data *priv = psmouse->private; if (priv) { - del_timer(&priv->timer); + timer_delete(&priv->timer); kfree(psmouse->private); psmouse->private = NULL; } diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index d36e89d6fc54..94e8bcbbf94d 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -1017,7 +1017,7 @@ static int __init hil_mlc_init(void) static void __exit hil_mlc_exit(void) { - del_timer_sync(&hil_mlcs_kicker); + timer_delete_sync(&hil_mlcs_kicker); tasklet_kill(&hil_mlcs_tasklet); } diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 13eacf6ab431..0eec4c5585cb 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -980,7 +980,7 @@ static void hp_sdc_exit(void) free_irq(hp_sdc.irq, &hp_sdc); write_unlock_irq(&hp_sdc.lock); - del_timer_sync(&hp_sdc.kicker); + timer_delete_sync(&hp_sdc.kicker); tasklet_kill(&hp_sdc.task); diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c index a0598e9c7aff..8d8392ce7005 100644 --- a/drivers/input/touchscreen/ad7877.c +++ b/drivers/input/touchscreen/ad7877.c @@ -415,7 +415,7 @@ static void ad7877_disable(void *data) ts->disabled = true; disable_irq(ts->spi->irq); - if (del_timer_sync(&ts->timer)) + if (timer_delete_sync(&ts->timer)) ad7877_ts_event_release(ts); } diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index e5d69bf2276e..f661e199b63c 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -273,7 +273,7 @@ static void __ad7879_disable(struct ad7879 *ts) AD7879_PM(AD7879_PM_SHUTDOWN); disable_irq(ts->irq); - if (del_timer_sync(&ts->timer)) + if (timer_delete_sync(&ts->timer)) ad7879_ts_event_release(ts); ad7879_write(ts, AD7879_REG_CTRL2, reg); diff --git a/drivers/input/touchscreen/bu21029_ts.c b/drivers/input/touchscreen/bu21029_ts.c index 686d0a6b1570..3c997fba7048 100644 --- a/drivers/input/touchscreen/bu21029_ts.c +++ b/drivers/input/touchscreen/bu21029_ts.c @@ -325,7 +325,7 @@ static void bu21029_stop_chip(struct input_dev *dev) struct bu21029_ts_data *bu21029 = input_get_drvdata(dev); disable_irq(bu21029->client->irq); - del_timer_sync(&bu21029->timer); + timer_delete_sync(&bu21029->timer); bu21029_put_chip_in_reset(bu21029); regulator_disable(bu21029->vdd); diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c index fdda8412b164..9a5977d8cad2 100644 --- a/drivers/input/touchscreen/exc3000.c +++ b/drivers/input/touchscreen/exc3000.c @@ -174,7 +174,7 @@ static int exc3000_handle_mt_event(struct exc3000_data *data) /* * We read full state successfully, no contacts will be "stuck". */ - del_timer_sync(&data->timer); + timer_delete_sync(&data->timer); while (total_slots > 0) { int slots = min(total_slots, EXC3000_SLOTS_PER_FRAME); diff --git a/drivers/input/touchscreen/sx8654.c b/drivers/input/touchscreen/sx8654.c index f5c5881cef6b..e59b8d0ed19e 100644 --- a/drivers/input/touchscreen/sx8654.c +++ b/drivers/input/touchscreen/sx8654.c @@ -290,7 +290,7 @@ static void sx8654_close(struct input_dev *dev) disable_irq(client->irq); if (!sx8654->data->has_irq_penrelease) - del_timer_sync(&sx8654->timer); + timer_delete_sync(&sx8654->timer); /* enable manual mode mode */ error = i2c_smbus_write_byte(client, sx8654->data->cmd_manual); diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c index df39dee13e1c..252a93753ee5 100644 --- a/drivers/input/touchscreen/tsc200x-core.c +++ b/drivers/input/touchscreen/tsc200x-core.c @@ -229,7 +229,7 @@ static void __tsc200x_disable(struct tsc200x *ts) guard(disable_irq)(&ts->irq); - del_timer_sync(&ts->penup_timer); + timer_delete_sync(&ts->penup_timer); cancel_delayed_work_sync(&ts->esd_work); } @@ -388,7 +388,7 @@ static void tsc200x_esd_work(struct work_struct *work) dev_info(ts->dev, "TSC200X not responding - resetting\n"); scoped_guard(disable_irq, &ts->irq) { - del_timer_sync(&ts->penup_timer); + timer_delete_sync(&ts->penup_timer); tsc200x_update_pen_state(ts, 0, 0, 0); tsc200x_reset(ts); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 6054d0ab8023..cb7e29dcac15 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -271,7 +271,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) if (!cookie->fq_domain) return; - del_timer_sync(&cookie->fq_timer); + timer_delete_sync(&cookie->fq_timer); if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) iommu_dma_free_fq_single(cookie->single_fq); else diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 45ff0e198f8f..f6c27ca92c01 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -3249,7 +3249,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); spin_unlock_irqrestore(&hc->lock, flags); __skb_queue_purge(&free_queue); break; @@ -3394,7 +3394,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); #ifdef FIXME if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags)) dchannel_sched_event(&hc->dch, D_CLEARBUSY); @@ -4522,7 +4522,7 @@ release_port(struct hfc_multi *hc, struct dchannel *dch) spin_lock_irqsave(&hc->lock, flags); if (dch->timer.function) { - del_timer(&dch->timer); + timer_delete(&dch->timer); dch->timer.function = NULL; } diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index ce7bccc9faa3..e3870d942699 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -158,7 +158,7 @@ release_io_hfcpci(struct hfc_pci *hc) { /* disable memory mapped ports + busmaster */ pci_write_config_word(hc->pdev, PCI_COMMAND, 0); - del_timer(&hc->hw.timer); + timer_delete(&hc->hw.timer); dma_free_coherent(&hc->pdev->dev, 0x8000, hc->hw.fifos, hc->hw.dmahandle); iounmap(hc->hw.pci_io); @@ -1087,7 +1087,7 @@ hfc_l1callback(struct dchannel *dch, u_int cmd) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); break; case HW_POWERUP_REQ: Write_hfc(hc, HFCPCI_STATES, HFCPCI_DO_ACTION); @@ -1216,7 +1216,7 @@ hfcpci_int(int intno, void *dev_id) receive_dmsg(hc); if (val & 0x04) { /* D tx */ if (test_and_clear_bit(FLG_BUSY_TIMER, &hc->dch.Flags)) - del_timer(&hc->dch.timer); + timer_delete(&hc->dch.timer); tx_dirq(&hc->dch); } spin_unlock(&hc->lock); @@ -1635,7 +1635,7 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); #ifdef FIXME if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags)) dchannel_sched_event(&hc->dch, D_CLEARBUSY); @@ -2064,7 +2064,7 @@ release_card(struct hfc_pci *hc) { mode_hfcpci(&hc->bch[0], 1, ISDN_P_NONE); mode_hfcpci(&hc->bch[1], 2, ISDN_P_NONE); if (hc->dch.timer.function != NULL) { - del_timer(&hc->dch.timer); + timer_delete(&hc->dch.timer); hc->dch.timer.function = NULL; } spin_unlock_irqrestore(&hc->lock, flags); @@ -2342,7 +2342,7 @@ HFC_init(void) err = pci_register_driver(&hfc_driver); if (err) { if (timer_pending(&hfc_tl)) - del_timer(&hfc_tl); + timer_delete(&hfc_tl); } return err; @@ -2351,7 +2351,7 @@ HFC_init(void) static void __exit HFC_cleanup(void) { - del_timer_sync(&hfc_tl); + timer_delete_sync(&hfc_tl); pci_unregister_driver(&hfc_driver); } diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c index d0b7271fbda1..165a63994f04 100644 --- a/drivers/isdn/hardware/mISDN/mISDNipac.c +++ b/drivers/isdn/hardware/mISDN/mISDNipac.c @@ -158,7 +158,7 @@ isac_fill_fifo(struct isac_hw *isac) WriteISAC(isac, ISAC_CMDR, more ? 0x8 : 0xa); if (test_and_set_bit(FLG_BUSY_TIMER, &isac->dch.Flags)) { pr_debug("%s: %s dbusytimer running\n", isac->name, __func__); - del_timer(&isac->dch.timer); + timer_delete(&isac->dch.timer); } isac->dch.timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000); add_timer(&isac->dch.timer); @@ -206,7 +206,7 @@ static void isac_xpr_irq(struct isac_hw *isac) { if (test_and_clear_bit(FLG_BUSY_TIMER, &isac->dch.Flags)) - del_timer(&isac->dch.timer); + timer_delete(&isac->dch.timer); if (isac->dch.tx_skb && isac->dch.tx_idx < isac->dch.tx_skb->len) { isac_fill_fifo(isac); } else { @@ -220,7 +220,7 @@ static void isac_retransmit(struct isac_hw *isac) { if (test_and_clear_bit(FLG_BUSY_TIMER, &isac->dch.Flags)) - del_timer(&isac->dch.timer); + timer_delete(&isac->dch.timer); if (test_bit(FLG_TX_BUSY, &isac->dch.Flags)) { /* Restart frame */ isac->dch.tx_idx = 0; @@ -665,7 +665,7 @@ isac_l1cmd(struct dchannel *dch, u32 cmd) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); break; case HW_POWERUP_REQ: spin_lock_irqsave(isac->hwlock, flags); @@ -698,7 +698,7 @@ isac_release(struct isac_hw *isac) else if (isac->type != 0) WriteISAC(isac, ISAC_MASK, 0xff); if (isac->dch.timer.function != NULL) { - del_timer(&isac->dch.timer); + timer_delete(&isac->dch.timer); isac->dch.timer.function = NULL; } kfree(isac->mon_rx); diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c index b3e03c410544..e48f89cbecf8 100644 --- a/drivers/isdn/hardware/mISDN/mISDNisar.c +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -930,7 +930,7 @@ isar_pump_statev_fax(struct isar_ch *ch, u8 devt) { /* 1s (200 ms) Flags before data */ if (test_and_set_bit(FLG_FTI_RUN, &ch->bch.Flags)) - del_timer(&ch->ftimer); + timer_delete(&ch->ftimer); ch->ftimer.expires = jiffies + ((delay * HZ) / 1000); test_and_set_bit(FLG_LL_CONN, @@ -1603,8 +1603,8 @@ free_isar(struct isar_hw *isar) { modeisar(&isar->ch[0], ISDN_P_NONE); modeisar(&isar->ch[1], ISDN_P_NONE); - del_timer(&isar->ch[0].ftimer); - del_timer(&isar->ch[1].ftimer); + timer_delete(&isar->ch[0].ftimer); + timer_delete(&isar->ch[1].ftimer); test_and_clear_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags); test_and_clear_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags); } diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index ee69212ac351..da933f4bdf4e 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -294,7 +294,7 @@ W6692_fill_Dfifo(struct w6692_hw *card) WriteW6692(card, W_D_CMDR, cmd); if (test_and_set_bit(FLG_BUSY_TIMER, &dch->Flags)) { pr_debug("%s: fill_Dfifo dbusytimer running\n", card->name); - del_timer(&dch->timer); + timer_delete(&dch->timer); } dch->timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000); add_timer(&dch->timer); @@ -311,7 +311,7 @@ d_retransmit(struct w6692_hw *card) struct dchannel *dch = &card->dch; if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); #ifdef FIXME if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags)) dchannel_sched_event(dch, D_CLEARBUSY); @@ -372,7 +372,7 @@ handle_rxD(struct w6692_hw *card) { static void handle_txD(struct w6692_hw *card) { if (test_and_clear_bit(FLG_BUSY_TIMER, &card->dch.Flags)) - del_timer(&card->dch.timer); + timer_delete(&card->dch.timer); if (card->dch.tx_skb && card->dch.tx_idx < card->dch.tx_skb->len) { W6692_fill_Dfifo(card); } else { @@ -1130,7 +1130,7 @@ w6692_l1callback(struct dchannel *dch, u32 cmd) } test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) - del_timer(&dch->timer); + timer_delete(&dch->timer); break; case HW_POWERUP_REQ: spin_lock_irqsave(&card->lock, flags); diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 753232e9fc36..d0aa415a6b09 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -928,7 +928,7 @@ dsp_function(struct mISDNchannel *ch, struct sk_buff *skb) dsp->tone.hardware = 0; dsp->tone.software = 0; if (timer_pending(&dsp->tone.tl)) - del_timer(&dsp->tone.tl); + timer_delete(&dsp->tone.tl); if (dsp->conf) dsp_cmx_conf(dsp, 0); /* dsp_cmx_hardware will also be called here */ @@ -975,7 +975,7 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg) cancel_work_sync(&dsp->workq); spin_lock_irqsave(&dsp_lock, flags); if (timer_pending(&dsp->tone.tl)) - del_timer(&dsp->tone.tl); + timer_delete(&dsp->tone.tl); skb_queue_purge(&dsp->sendq); if (dsp_debug & DEBUG_DSP_CTRL) printk(KERN_DEBUG "%s: releasing member %s\n", @@ -1209,7 +1209,7 @@ static void __exit dsp_cleanup(void) { mISDN_unregister_Bprotocol(&DSP); - del_timer_sync(&dsp_spl_tl); + timer_delete_sync(&dsp_spl_tl); if (!list_empty(&dsp_ilist)) { printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not " diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c index 8389e2105cdc..456b313bfa76 100644 --- a/drivers/isdn/mISDN/dsp_tones.c +++ b/drivers/isdn/mISDN/dsp_tones.c @@ -505,7 +505,7 @@ dsp_tone(struct dsp *dsp, int tone) /* we turn off the tone */ if (!tone) { if (dsp->features.hfc_loops && timer_pending(&tonet->tl)) - del_timer(&tonet->tl); + timer_delete(&tonet->tl); if (dsp->features.hfc_loops) dsp_tone_hw_message(dsp, NULL, 0); tonet->tone = 0; @@ -539,7 +539,7 @@ dsp_tone(struct dsp *dsp, int tone) dsp_tone_hw_message(dsp, pat->data[0], *(pat->siz[0])); /* set timer */ if (timer_pending(&tonet->tl)) - del_timer(&tonet->tl); + timer_delete(&tonet->tl); tonet->tl.expires = jiffies + (pat->seq[0] * HZ) / 8000; add_timer(&tonet->tl); } else { diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c index 7c5c2ca6c6d8..5ed0a6110687 100644 --- a/drivers/isdn/mISDN/fsm.c +++ b/drivers/isdn/mISDN/fsm.c @@ -123,7 +123,7 @@ mISDN_FsmDelTimer(struct FsmTimer *ft, int where) ft->fi->printdebug(ft->fi, "mISDN_FsmDelTimer %lx %d", (long) ft, where); #endif - del_timer(&ft->tl); + timer_delete(&ft->tl); } EXPORT_SYMBOL(mISDN_FsmDelTimer); @@ -167,7 +167,7 @@ mISDN_FsmRestartTimer(struct FsmTimer *ft, #endif if (timer_pending(&ft->tl)) - del_timer(&ft->tl); + timer_delete(&ft->tl); ft->event = event; ft->arg = arg; ft->tl.expires = jiffies + (millisec * HZ) / 1000; diff --git a/drivers/leds/flash/leds-rt8515.c b/drivers/leds/flash/leds-rt8515.c index 6b051f182b72..32ba397a33d2 100644 --- a/drivers/leds/flash/leds-rt8515.c +++ b/drivers/leds/flash/leds-rt8515.c @@ -127,7 +127,7 @@ static int rt8515_led_flash_strobe_set(struct led_classdev_flash *fled, mod_timer(&rt->powerdown_timer, jiffies + usecs_to_jiffies(timeout->val)); } else { - del_timer_sync(&rt->powerdown_timer); + timer_delete_sync(&rt->powerdown_timer); /* Turn the LED off */ rt8515_gpio_led_off(rt); } @@ -372,7 +372,7 @@ static void rt8515_remove(struct platform_device *pdev) struct rt8515 *rt = platform_get_drvdata(pdev); rt8515_v4l2_flash_release(rt); - del_timer_sync(&rt->powerdown_timer); + timer_delete_sync(&rt->powerdown_timer); mutex_destroy(&rt->lock); } diff --git a/drivers/leds/flash/leds-sgm3140.c b/drivers/leds/flash/leds-sgm3140.c index 3c01739c0b46..48fb8a9ec703 100644 --- a/drivers/leds/flash/leds-sgm3140.c +++ b/drivers/leds/flash/leds-sgm3140.c @@ -55,7 +55,7 @@ static int sgm3140_strobe_set(struct led_classdev_flash *fled_cdev, bool state) mod_timer(&priv->powerdown_timer, jiffies + usecs_to_jiffies(priv->timeout)); } else { - del_timer_sync(&priv->powerdown_timer); + timer_delete_sync(&priv->powerdown_timer); gpiod_set_value_cansleep(priv->enable_gpio, 0); gpiod_set_value_cansleep(priv->flash_gpio, 0); ret = regulator_disable(priv->vin_regulator); @@ -117,7 +117,7 @@ static int sgm3140_brightness_set(struct led_classdev *led_cdev, gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 1); } else { - del_timer_sync(&priv->powerdown_timer); + timer_delete_sync(&priv->powerdown_timer); gpiod_set_value_cansleep(priv->flash_gpio, 0); gpiod_set_value_cansleep(priv->enable_gpio, 0); ret = regulator_disable(priv->vin_regulator); @@ -285,7 +285,7 @@ static void sgm3140_remove(struct platform_device *pdev) { struct sgm3140 *priv = platform_get_drvdata(pdev); - del_timer_sync(&priv->powerdown_timer); + timer_delete_sync(&priv->powerdown_timer); v4l2_flash_release(priv->v4l2_flash); } diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index e3d8ddcff567..907fc703e0c5 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -245,7 +245,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - del_timer_sync(&led_cdev->blink_timer); + timer_delete_sync(&led_cdev->blink_timer); clear_bit(LED_BLINK_SW, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); @@ -294,7 +294,7 @@ EXPORT_SYMBOL_GPL(led_blink_set_nosleep); void led_stop_software_blink(struct led_classdev *led_cdev) { - del_timer_sync(&led_cdev->blink_timer); + timer_delete_sync(&led_cdev->blink_timer); led_cdev->blink_delay_on = 0; led_cdev->blink_delay_off = 0; clear_bit(LED_BLINK_SW, &led_cdev->work_flags); diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c index a594bd5e2233..06d052957d37 100644 --- a/drivers/leds/trigger/ledtrig-pattern.c +++ b/drivers/leds/trigger/ledtrig-pattern.c @@ -94,7 +94,7 @@ static void pattern_trig_timer_cancel(struct pattern_trig_data *data) if (data->type == PATTERN_TYPE_HR) hrtimer_cancel(&data->hrtimer); else - del_timer_sync(&data->timer); + timer_delete_sync(&data->timer); } static void pattern_trig_timer_restart(struct pattern_trig_data *data, diff --git a/drivers/leds/trigger/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c index f111fa7635e5..e103c7ed830b 100644 --- a/drivers/leds/trigger/ledtrig-transient.c +++ b/drivers/leds/trigger/ledtrig-transient.c @@ -66,7 +66,7 @@ static ssize_t transient_activate_store(struct device *dev, /* cancel the running timer */ if (state == 0 && transient_data->activate == 1) { - del_timer(&transient_data->timer); + timer_delete(&transient_data->timer); transient_data->activate = state; led_set_brightness_nosleep(led_cdev, transient_data->restore_state); diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c index b2fe7a3dc471..c5aabf238d0a 100644 --- a/drivers/macintosh/adbhid.c +++ b/drivers/macintosh/adbhid.c @@ -724,7 +724,7 @@ adb_message_handler(struct notifier_block *this, unsigned long code, void *x) int i; for (i = 1; i < 16; i++) { if (adbhid[i]) - del_timer_sync(&adbhid[i]->input->timer); + timer_delete_sync(&adbhid[i]->input->timer); } } diff --git a/drivers/mailbox/mailbox-altera.c b/drivers/mailbox/mailbox-altera.c index afb320e9d69c..748128661892 100644 --- a/drivers/mailbox/mailbox-altera.c +++ b/drivers/mailbox/mailbox-altera.c @@ -270,7 +270,7 @@ static void altera_mbox_shutdown(struct mbox_chan *chan) writel_relaxed(~0, mbox->mbox_base + MAILBOX_INTMASK_REG); free_irq(mbox->irq, chan); } else if (!mbox->is_sender) { - del_timer_sync(&mbox->rxpoll_timer); + timer_delete_sync(&mbox->rxpoll_timer); } } diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 68b02216033d..d39dec34b7a3 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -123,7 +123,7 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc) kobject_put(&acc->day.kobj); atomic_set(&acc->closing, 1); - if (del_timer_sync(&acc->timer)) + if (timer_delete_sync(&acc->timer)) closure_return(&acc->cl); } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 8b219b1199b4..2a283feb3319 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2707,7 +2707,7 @@ static void integrity_commit(struct work_struct *w) unsigned int i, j, n; struct bio *flushes; - del_timer(&ic->autocommit_timer); + timer_delete(&ic->autocommit_timer); if (ic->mode == 'I') return; @@ -3606,7 +3606,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier)); - del_timer_sync(&ic->autocommit_timer); + timer_delete_sync(&ic->autocommit_timer); if (ic->recalc_wq) drain_workqueue(ic->recalc_wq); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 637977acc3dc..6c98f4ae5ea9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -815,7 +815,7 @@ static void enable_nopath_timeout(struct multipath *m) static void disable_nopath_timeout(struct multipath *m) { - del_timer_sync(&m->nopath_timer); + timer_delete_sync(&m->nopath_timer); } /* diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 8c6f1f7e6456..9e615b4f1f5e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1182,7 +1182,7 @@ static void mirror_dtr(struct dm_target *ti) { struct mirror_set *ms = ti->private; - del_timer_sync(&ms->timer); + timer_delete_sync(&ms->timer); flush_workqueue(ms->kmirrord_wq); flush_work(&ms->trigger_event); dm_kcopyd_client_destroy(ms->kcopyd_client); diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 5c49d49e023c..3c58b941e067 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -2261,7 +2261,7 @@ static void check_for_drain_complete(struct hash_zone *zone) if ((atomic_read(&zone->timer_state) == DEDUPE_QUERY_TIMER_IDLE) || change_timer_state(zone, DEDUPE_QUERY_TIMER_RUNNING, DEDUPE_QUERY_TIMER_IDLE)) { - del_timer_sync(&zone->timer); + timer_delete_sync(&zone->timer); } else { /* * There is an in flight time-out, which must get processed before we can continue. diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 7ce8847b3404..d6a04a57472d 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -797,7 +797,7 @@ static void writecache_flush(struct dm_writecache *wc) bool need_flush_after_free; wc->uncommitted_blocks = 0; - del_timer(&wc->autocommit_timer); + timer_delete(&wc->autocommit_timer); if (list_empty(&wc->lru)) return; @@ -927,8 +927,8 @@ static void writecache_suspend(struct dm_target *ti) struct dm_writecache *wc = ti->private; bool flush_on_suspend; - del_timer_sync(&wc->autocommit_timer); - del_timer_sync(&wc->max_age_timer); + timer_delete_sync(&wc->autocommit_timer); + timer_delete_sync(&wc->max_age_timer); wc_lock(wc); writecache_flush(wc); diff --git a/drivers/md/md.c b/drivers/md/md.c index 438e71e45c16..9daa78c5fe33 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4064,7 +4064,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) * it must always be in_sync */ mddev->in_sync = 1; - del_timer_sync(&mddev->safemode_timer); + timer_delete_sync(&mddev->safemode_timer); } pers->run(mddev); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); @@ -6405,7 +6405,7 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { - del_timer_sync(&mddev->safemode_timer); + timer_delete_sync(&mddev->safemode_timer); if (mddev->pers && mddev->pers->quiesce) { mddev->pers->quiesce(mddev, 1); diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c index a7047e548245..2952678cce45 100644 --- a/drivers/media/common/saa7146/saa7146_fops.c +++ b/drivers/media/common/saa7146/saa7146_fops.c @@ -147,7 +147,7 @@ void saa7146_buffer_next(struct saa7146_dev *dev, printk("vdma%d.num_line_byte: 0x%08x\n", 1,saa7146_read(dev,NUM_LINE_BYTE1)); */ } - del_timer(&q->timeout); + timer_delete(&q->timeout); } } diff --git a/drivers/media/common/saa7146/saa7146_vbi.c b/drivers/media/common/saa7146/saa7146_vbi.c index a1854b3dd004..6c324a683be9 100644 --- a/drivers/media/common/saa7146/saa7146_vbi.c +++ b/drivers/media/common/saa7146/saa7146_vbi.c @@ -322,8 +322,8 @@ static void vbi_stop(struct saa7146_dev *dev) /* shut down dma 3 transfers */ saa7146_write(dev, MC1, MASK_20); - del_timer(&vv->vbi_dmaq.timeout); - del_timer(&vv->vbi_read_timeout); + timer_delete(&vv->vbi_dmaq.timeout); + timer_delete(&vv->vbi_read_timeout); spin_unlock_irqrestore(&dev->slock, flags); } diff --git a/drivers/media/common/saa7146/saa7146_video.c b/drivers/media/common/saa7146/saa7146_video.c index 94e1cd4eaedb..733e18001d0d 100644 --- a/drivers/media/common/saa7146/saa7146_video.c +++ b/drivers/media/common/saa7146/saa7146_video.c @@ -668,7 +668,7 @@ static void stop_streaming(struct vb2_queue *q) struct saa7146_dev *dev = vb2_get_drv_priv(q); struct saa7146_dmaqueue *dq = &dev->vv_data->video_dmaq; - del_timer(&dq->timeout); + timer_delete(&dq->timeout); video_end(dev); return_buffers(q, VB2_BUF_STATE_ERROR); } diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 6063782e937a..1e985f943944 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -365,7 +365,7 @@ static void dvb_dmxdev_filter_timer(struct dmxdev_filter *dmxdevfilter) { struct dmx_sct_filter_params *para = &dmxdevfilter->params.sec; - del_timer(&dmxdevfilter->timer); + timer_delete(&dmxdevfilter->timer); if (para->timeout) { dmxdevfilter->timer.expires = jiffies + 1 + (HZ / 2 + HZ * para->timeout) / 1000; @@ -391,7 +391,7 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, spin_unlock(&dmxdevfilter->dev->lock); return 0; } - del_timer(&dmxdevfilter->timer); + timer_delete(&dmxdevfilter->timer); dprintk("section callback %*ph\n", 6, buffer1); if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx)) { ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx, @@ -482,7 +482,7 @@ static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter) switch (dmxdevfilter->type) { case DMXDEV_TYPE_SEC: - del_timer(&dmxdevfilter->timer); + timer_delete(&dmxdevfilter->timer); dmxdevfilter->feed.sec->stop_filtering(dmxdevfilter->feed.sec); break; case DMXDEV_TYPE_PES: diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index c50d4e85dfd1..2d5f42f11158 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -2201,7 +2201,7 @@ static int tc358743_probe(struct i2c_client *client) err_work_queues: cec_unregister_adapter(state->cec_adap); if (!state->i2c_client->irq) { - del_timer(&state->timer); + timer_delete(&state->timer); flush_work(&state->work_i2c_poll); } cancel_delayed_work(&state->delayed_work_enable_hotplug); @@ -2218,7 +2218,7 @@ static void tc358743_remove(struct i2c_client *client) struct tc358743_state *state = to_state(sd); if (!state->i2c_client->irq) { - del_timer_sync(&state->timer); + timer_delete_sync(&state->timer); flush_work(&state->work_i2c_poll); } cancel_delayed_work_sync(&state->delayed_work_enable_hotplug); diff --git a/drivers/media/i2c/tvaudio.c b/drivers/media/i2c/tvaudio.c index 654725dfafac..42115118a0bd 100644 --- a/drivers/media/i2c/tvaudio.c +++ b/drivers/media/i2c/tvaudio.c @@ -1787,7 +1787,7 @@ static int tvaudio_s_radio(struct v4l2_subdev *sd) struct CHIPSTATE *chip = to_state(sd); chip->radio = 1; - /* del_timer(&chip->wt); */ + /* timer_delete(&chip->wt); */ return 0; } @@ -2071,7 +2071,7 @@ static void tvaudio_remove(struct i2c_client *client) struct v4l2_subdev *sd = i2c_get_clientdata(client); struct CHIPSTATE *chip = to_state(sd); - del_timer_sync(&chip->wt); + timer_delete_sync(&chip->wt); if (chip->thread) { /* shutdown async thread */ kthread_stop(chip->thread); diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 2782832f5eb8..377a7e7f0499 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -3491,7 +3491,7 @@ static void bttv_remove(struct pci_dev *pci_dev) /* free resources */ free_irq(btv->c.pci->irq,btv); - del_timer_sync(&btv->timeout); + timer_delete_sync(&btv->timeout); iounmap(btv->bt848_mmio); release_mem_region(pci_resource_start(btv->c.pci,0), pci_resource_len(btv->c.pci,0)); diff --git a/drivers/media/pci/bt8xx/bttv-input.c b/drivers/media/pci/bt8xx/bttv-input.c index 41226f1d0e5b..9eb7a5356b4c 100644 --- a/drivers/media/pci/bt8xx/bttv-input.c +++ b/drivers/media/pci/bt8xx/bttv-input.c @@ -304,12 +304,12 @@ static void bttv_ir_start(struct bttv_ir *ir) static void bttv_ir_stop(struct bttv *btv) { if (btv->remote->polling) - del_timer_sync(&btv->remote->timer); + timer_delete_sync(&btv->remote->timer); if (btv->remote->rc5_gpio) { u32 gpio; - del_timer_sync(&btv->remote->timer); + timer_delete_sync(&btv->remote->timer); gpio = bttv_gpio_read(&btv->c); bttv_gpio_write(&btv->c, gpio & ~(1 << 4)); diff --git a/drivers/media/pci/bt8xx/bttv-risc.c b/drivers/media/pci/bt8xx/bttv-risc.c index 241a696e374a..79581cd7bd59 100644 --- a/drivers/media/pci/bt8xx/bttv-risc.c +++ b/drivers/media/pci/bt8xx/bttv-risc.c @@ -376,7 +376,7 @@ static void bttv_set_irq_timer(struct bttv *btv) if (btv->curr.frame_irq || btv->loop_irq || btv->cvbi) mod_timer(&btv->timeout, jiffies + BTTV_TIMEOUT); else - del_timer(&btv->timeout); + timer_delete(&btv->timeout); } static int bttv_set_capture_control(struct bttv *btv, int start_capture) diff --git a/drivers/media/pci/ivtv/ivtv-irq.c b/drivers/media/pci/ivtv/ivtv-irq.c index b7aaa8b4a784..b3b670b6ef70 100644 --- a/drivers/media/pci/ivtv/ivtv-irq.c +++ b/drivers/media/pci/ivtv/ivtv-irq.c @@ -532,7 +532,7 @@ static void ivtv_irq_dma_read(struct ivtv *itv) IVTV_DEBUG_HI_IRQ("DEC DMA READ\n"); - del_timer(&itv->dma_timer); + timer_delete(&itv->dma_timer); if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) && itv->cur_dma_stream < 0) return; @@ -597,7 +597,7 @@ static void ivtv_irq_enc_dma_complete(struct ivtv *itv) ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data); IVTV_DEBUG_HI_IRQ("ENC DMA COMPLETE %x %d (%d)\n", data[0], data[1], itv->cur_dma_stream); - del_timer(&itv->dma_timer); + timer_delete(&itv->dma_timer); if (itv->cur_dma_stream < 0) return; @@ -670,7 +670,7 @@ static void ivtv_irq_dma_err(struct ivtv *itv) u32 data[CX2341X_MBOX_MAX_DATA]; u32 status; - del_timer(&itv->dma_timer); + timer_delete(&itv->dma_timer); ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data); status = read_reg(IVTV_REG_DMASTATUS); diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c index af9e6235c74d..ac085925d3cb 100644 --- a/drivers/media/pci/ivtv/ivtv-streams.c +++ b/drivers/media/pci/ivtv/ivtv-streams.c @@ -891,7 +891,7 @@ int ivtv_stop_v4l2_encode_stream(struct ivtv_stream *s, int gop_end) /* Set the following Interrupt mask bits for capture */ ivtv_set_irq_mask(itv, IVTV_IRQ_MASK_CAPTURE); - del_timer(&itv->dma_timer); + timer_delete(&itv->dma_timer); /* event notification (off) */ if (test_and_clear_bit(IVTV_F_I_DIG_RST, &itv->i_flags)) { @@ -956,7 +956,7 @@ int ivtv_stop_v4l2_decode_stream(struct ivtv_stream *s, int flags, u64 pts) ivtv_vapi(itv, CX2341X_DEC_SET_EVENT_NOTIFICATION, 4, 0, 0, IVTV_IRQ_DEC_AUD_MODE_CHG, -1); ivtv_set_irq_mask(itv, IVTV_IRQ_MASK_DECODE); - del_timer(&itv->dma_timer); + timer_delete(&itv->dma_timer); clear_bit(IVTV_F_S_NEEDS_DATA, &s->s_flags); clear_bit(IVTV_F_S_STREAMING, &s->s_flags); diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c index 557985ba25db..16338d13d9c8 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c @@ -698,7 +698,7 @@ static void netup_unidvb_dma_fini(struct netup_unidvb_dev *ndev, int num) netup_unidvb_dma_enable(dma, 0); msleep(50); cancel_work_sync(&dma->work); - del_timer_sync(&dma->timeout); + timer_delete_sync(&dma->timeout); } static int netup_unidvb_dma_setup(struct netup_unidvb_dev *ndev) diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c index ea0585e43abb..84295bdb8ce4 100644 --- a/drivers/media/pci/saa7134/saa7134-core.c +++ b/drivers/media/pci/saa7134/saa7134-core.c @@ -322,7 +322,7 @@ void saa7134_buffer_next(struct saa7134_dev *dev, /* nothing to do -- just stop DMA */ core_dbg("buffer_next %p\n", NULL); saa7134_set_dmabits(dev); - del_timer(&q->timeout); + timer_delete(&q->timeout); } } @@ -364,7 +364,7 @@ void saa7134_stop_streaming(struct saa7134_dev *dev, struct saa7134_dmaqueue *q) tmp = NULL; } spin_unlock_irqrestore(&dev->slock, flags); - saa7134_buffer_timeout(&q->timeout); /* also calls del_timer(&q->timeout) */ + saa7134_buffer_timeout(&q->timeout); /* also calls timer_delete(&q->timeout) */ } EXPORT_SYMBOL_GPL(saa7134_stop_streaming); @@ -1390,9 +1390,9 @@ static int __maybe_unused saa7134_suspend(struct device *dev_d) /* Disable timeout timers - if we have active buffers, we will fill them on resume*/ - del_timer(&dev->video_q.timeout); - del_timer(&dev->vbi_q.timeout); - del_timer(&dev->ts_q.timeout); + timer_delete(&dev->video_q.timeout); + timer_delete(&dev->vbi_q.timeout); + timer_delete(&dev->ts_q.timeout); if (dev->remote && dev->remote->dev->users) saa7134_ir_close(dev->remote->dev); diff --git a/drivers/media/pci/saa7134/saa7134-input.c b/drivers/media/pci/saa7134/saa7134-input.c index 8610eb473b39..d7d97c7d4a2b 100644 --- a/drivers/media/pci/saa7134/saa7134-input.c +++ b/drivers/media/pci/saa7134/saa7134-input.c @@ -496,7 +496,7 @@ void saa7134_ir_close(struct rc_dev *rc) struct saa7134_card_ir *ir = dev->remote; if (ir->polling) - del_timer_sync(&ir->timer); + timer_delete_sync(&ir->timer); ir->running = false; } diff --git a/drivers/media/pci/saa7134/saa7134-ts.c b/drivers/media/pci/saa7134/saa7134-ts.c index ec699ea14799..1b44033067c5 100644 --- a/drivers/media/pci/saa7134/saa7134-ts.c +++ b/drivers/media/pci/saa7134/saa7134-ts.c @@ -298,7 +298,7 @@ int saa7134_ts_start(struct saa7134_dev *dev) int saa7134_ts_fini(struct saa7134_dev *dev) { - del_timer_sync(&dev->ts_q.timeout); + timer_delete_sync(&dev->ts_q.timeout); saa7134_pgtable_free(dev->pci, &dev->ts_q.pt); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-vbi.c b/drivers/media/pci/saa7134/saa7134-vbi.c index efa6e4fa423a..28bf77449bdb 100644 --- a/drivers/media/pci/saa7134/saa7134-vbi.c +++ b/drivers/media/pci/saa7134/saa7134-vbi.c @@ -183,7 +183,7 @@ int saa7134_vbi_init1(struct saa7134_dev *dev) int saa7134_vbi_fini(struct saa7134_dev *dev) { /* nothing */ - del_timer_sync(&dev->vbi_q.timeout); + timer_delete_sync(&dev->vbi_q.timeout); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c index 43e7b006eb59..c88939bce56b 100644 --- a/drivers/media/pci/saa7134/saa7134-video.c +++ b/drivers/media/pci/saa7134/saa7134-video.c @@ -1741,7 +1741,7 @@ int saa7134_video_init1(struct saa7134_dev *dev) void saa7134_video_fini(struct saa7134_dev *dev) { - del_timer_sync(&dev->video_q.timeout); + timer_delete_sync(&dev->video_q.timeout); /* free stuff */ saa7134_pgtable_free(dev->pci, &dev->video_q.pt); saa7134_pgtable_free(dev->pci, &dev->vbi_q.pt); diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c index c53099c958ca..80bd268926cc 100644 --- a/drivers/media/pci/tw686x/tw686x-core.c +++ b/drivers/media/pci/tw686x/tw686x-core.c @@ -373,7 +373,7 @@ static void tw686x_remove(struct pci_dev *pci_dev) tw686x_video_free(dev); tw686x_audio_free(dev); - del_timer_sync(&dev->dma_delay_timer); + timer_delete_sync(&dev->dma_delay_timer); pci_iounmap(pci_dev, dev->mmio); pci_release_regions(pci_dev); diff --git a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc.c b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc.c index 5f80931f056d..c8e0ee383af3 100644 --- a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc.c @@ -935,7 +935,7 @@ static int s5p_mfc_open(struct file *file) if (dev->num_inst == 1) { if (s5p_mfc_power_off(dev) < 0) mfc_err("power off failed\n"); - del_timer_sync(&dev->watchdog_timer); + timer_delete_sync(&dev->watchdog_timer); } err_ctrls_setup: s5p_mfc_dec_ctrls_delete(ctx); @@ -985,7 +985,7 @@ static int s5p_mfc_release(struct file *file) if (dev->num_inst == 0) { mfc_debug(2, "Last instance\n"); s5p_mfc_deinit_hw(dev); - del_timer_sync(&dev->watchdog_timer); + timer_delete_sync(&dev->watchdog_timer); s5p_mfc_clock_off(dev); if (s5p_mfc_power_off(dev) < 0) mfc_err("Power off failed\n"); @@ -1461,7 +1461,7 @@ static void s5p_mfc_remove(struct platform_device *pdev) } mutex_unlock(&dev->mfc_mutex); - del_timer_sync(&dev->watchdog_timer); + timer_delete_sync(&dev->watchdog_timer); flush_work(&dev->watchdog_work); video_unregister_device(dev->vfd_enc); diff --git a/drivers/media/platform/st/sti/c8sectpfe/c8sectpfe-core.c b/drivers/media/platform/st/sti/c8sectpfe/c8sectpfe-core.c index d151d2ed1f64..87a817dda4a9 100644 --- a/drivers/media/platform/st/sti/c8sectpfe/c8sectpfe-core.c +++ b/drivers/media/platform/st/sti/c8sectpfe/c8sectpfe-core.c @@ -351,7 +351,7 @@ static int c8sectpfe_stop_feed(struct dvb_demux_feed *dvbdmxfeed) dev_dbg(fei->dev, "%s:%d global_feed_count=%d\n" , __func__, __LINE__, fei->global_feed_count); - del_timer(&fei->timer); + timer_delete(&fei->timer); } mutex_unlock(&fei->lock); diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c index a5db9b4dc3de..2ddf1dfa0522 100644 --- a/drivers/media/radio/radio-cadet.c +++ b/drivers/media/radio/radio-cadet.c @@ -471,7 +471,7 @@ static int cadet_release(struct file *file) mutex_lock(&dev->lock); if (v4l2_fh_is_singular_file(file) && dev->rdsstat) { - del_timer_sync(&dev->readtimer); + timer_delete_sync(&dev->readtimer); dev->rdsstat = 0; } v4l2_fh_release(file); diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c index 67722e2e47ff..9435cba3f4d9 100644 --- a/drivers/media/rc/ene_ir.c +++ b/drivers/media/rc/ene_ir.c @@ -1104,7 +1104,7 @@ static void ene_remove(struct pnp_dev *pnp_dev) unsigned long flags; rc_unregister_device(dev->rdev); - del_timer_sync(&dev->tx_sim_timer); + timer_delete_sync(&dev->tx_sim_timer); spin_lock_irqsave(&dev->hw_lock, flags); ene_rx_disable(dev); ene_rx_restore_hw_buffer(dev); diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c index 1464ef9c55bc..bfe86588c69b 100644 --- a/drivers/media/rc/igorplugusb.c +++ b/drivers/media/rc/igorplugusb.c @@ -223,7 +223,7 @@ static int igorplugusb_probe(struct usb_interface *intf, return 0; fail: usb_poison_urb(ir->urb); - del_timer(&ir->timer); + timer_delete(&ir->timer); usb_unpoison_urb(ir->urb); usb_free_urb(ir->urb); rc_free_device(ir->rc); @@ -238,7 +238,7 @@ static void igorplugusb_disconnect(struct usb_interface *intf) rc_unregister_device(ir->rc); usb_poison_urb(ir->urb); - del_timer_sync(&ir->timer); + timer_delete_sync(&ir->timer); usb_set_intfdata(intf, NULL); usb_unpoison_urb(ir->urb); usb_free_urb(ir->urb); diff --git a/drivers/media/rc/img-ir/img-ir-hw.c b/drivers/media/rc/img-ir/img-ir-hw.c index 5da7479c1793..da89ddf771c3 100644 --- a/drivers/media/rc/img-ir/img-ir-hw.c +++ b/drivers/media/rc/img-ir/img-ir-hw.c @@ -556,8 +556,8 @@ static void img_ir_set_decoder(struct img_ir_priv *priv, * acquires the lock and we don't want to deadlock waiting for it. */ spin_unlock_irq(&priv->lock); - del_timer_sync(&hw->end_timer); - del_timer_sync(&hw->suspend_timer); + timer_delete_sync(&hw->end_timer); + timer_delete_sync(&hw->suspend_timer); spin_lock_irq(&priv->lock); hw->stopping = false; diff --git a/drivers/media/rc/img-ir/img-ir-raw.c b/drivers/media/rc/img-ir/img-ir-raw.c index 8b0bdd9603b3..669f3309e237 100644 --- a/drivers/media/rc/img-ir/img-ir-raw.c +++ b/drivers/media/rc/img-ir/img-ir-raw.c @@ -147,5 +147,5 @@ void img_ir_remove_raw(struct img_ir_priv *priv) rc_unregister_device(rdev); - del_timer_sync(&raw->timer); + timer_delete_sync(&raw->timer); } diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 8f1361bcce3a..cb6f36ebe5c8 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -2534,7 +2534,7 @@ static void imon_disconnect(struct usb_interface *interface) ictx->dev_present_intf1 = false; usb_kill_urb(ictx->rx_urb_intf1); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { - del_timer_sync(&ictx->ttimer); + timer_delete_sync(&ictx->ttimer); input_unregister_device(ictx->touch); } usb_put_dev(ictx->usbdev_intf1); diff --git a/drivers/media/rc/ir-mce_kbd-decoder.c b/drivers/media/rc/ir-mce_kbd-decoder.c index 66e8feb9a569..817030fb50c9 100644 --- a/drivers/media/rc/ir-mce_kbd-decoder.c +++ b/drivers/media/rc/ir-mce_kbd-decoder.c @@ -324,7 +324,7 @@ static int ir_mce_kbd_decode(struct rc_dev *dev, struct ir_raw_event ev) msecs_to_jiffies(100); mod_timer(&data->rx_timeout, jiffies + delay); } else { - del_timer(&data->rx_timeout); + timer_delete(&data->rx_timeout); } /* Pass data to keyboard buffer parser */ ir_mce_kbd_process_keyboard_data(dev, scancode); @@ -372,7 +372,7 @@ static int ir_mce_kbd_unregister(struct rc_dev *dev) { struct mce_kbd_dec *mce_kbd = &dev->raw->mce_kbd; - del_timer_sync(&mce_kbd->rx_timeout); + timer_delete_sync(&mce_kbd->rx_timeout); return 0; } diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c index 16e33d7eaaa2..aa4ac43c66fa 100644 --- a/drivers/media/rc/rc-ir-raw.c +++ b/drivers/media/rc/rc-ir-raw.c @@ -662,7 +662,7 @@ void ir_raw_event_unregister(struct rc_dev *dev) return; kthread_stop(dev->raw->thread); - del_timer_sync(&dev->raw->edge_handle); + timer_delete_sync(&dev->raw->edge_handle); mutex_lock(&ir_raw_handler_lock); list_del(&dev->raw->list); diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index a4c539b17cf3..e46358fb8ac0 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -639,7 +639,7 @@ static void ir_do_keyup(struct rc_dev *dev, bool sync) return; dev_dbg(&dev->dev, "keyup key 0x%04x\n", dev->last_keycode); - del_timer(&dev->timer_repeat); + timer_delete(&dev->timer_repeat); input_report_key(dev->input_dev, dev->last_keycode, 0); led_trigger_event(led_feedback, LED_OFF); if (sync) @@ -2021,8 +2021,8 @@ void rc_unregister_device(struct rc_dev *dev) if (dev->driver_type == RC_DRIVER_IR_RAW) ir_raw_event_unregister(dev); - del_timer_sync(&dev->timer_keyup); - del_timer_sync(&dev->timer_repeat); + timer_delete_sync(&dev->timer_keyup); + timer_delete_sync(&dev->timer_repeat); mutex_lock(&dev->lock); if (dev->users && dev->close) diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index fc5fd3927177..992fff82b524 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -798,7 +798,7 @@ static int __init serial_ir_init_module(void) static void __exit serial_ir_exit_module(void) { - del_timer_sync(&serial_ir.timeout_timer); + timer_delete_sync(&serial_ir.timeout_timer); serial_ir_exit(); } diff --git a/drivers/media/usb/au0828/au0828-dvb.c b/drivers/media/usb/au0828/au0828-dvb.c index 09f9948c6f8e..3666f4452d11 100644 --- a/drivers/media/usb/au0828/au0828-dvb.c +++ b/drivers/media/usb/au0828/au0828-dvb.c @@ -143,7 +143,7 @@ static void urb_completion(struct urb *purb) */ dprintk(1, "%s cancelling bulk timeout\n", __func__); dev->bulk_timeout_running = 0; - del_timer(&dev->bulk_timeout); + timer_delete(&dev->bulk_timeout); } /* Feed the transport payload into the kernel demux */ @@ -168,7 +168,7 @@ static int stop_urb_transfer(struct au0828_dev *dev) if (dev->bulk_timeout_running == 1) { dev->bulk_timeout_running = 0; - del_timer(&dev->bulk_timeout); + timer_delete(&dev->bulk_timeout); } dev->urb_streaming = false; diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index e9cd2a335f7f..33d1fad0f7b8 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -857,7 +857,7 @@ static void au0828_stop_streaming(struct vb2_queue *vq) } dev->vid_timeout_running = 0; - del_timer_sync(&dev->vid_timeout); + timer_delete_sync(&dev->vid_timeout); spin_lock_irqsave(&dev->slock, flags); if (dev->isoc_ctl.buf != NULL) { @@ -905,7 +905,7 @@ void au0828_stop_vbi_streaming(struct vb2_queue *vq) spin_unlock_irqrestore(&dev->slock, flags); dev->vbi_timeout_running = 0; - del_timer_sync(&dev->vbi_timeout); + timer_delete_sync(&dev->vbi_timeout); } static const struct vb2_ops au0828_video_qops = { @@ -1040,12 +1040,12 @@ static int au0828_v4l2_close(struct file *filp) if (vdev->vfl_type == VFL_TYPE_VIDEO && dev->vid_timeout_running) { /* Cancel timeout thread in case they didn't call streamoff */ dev->vid_timeout_running = 0; - del_timer_sync(&dev->vid_timeout); + timer_delete_sync(&dev->vid_timeout); } else if (vdev->vfl_type == VFL_TYPE_VBI && dev->vbi_timeout_running) { /* Cancel timeout thread in case they didn't call streamoff */ dev->vbi_timeout_running = 0; - del_timer_sync(&dev->vbi_timeout); + timer_delete_sync(&dev->vbi_timeout); } if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) @@ -1694,9 +1694,9 @@ void au0828_v4l2_suspend(struct au0828_dev *dev) } if (dev->vid_timeout_running) - del_timer_sync(&dev->vid_timeout); + timer_delete_sync(&dev->vid_timeout); if (dev->vbi_timeout_running) - del_timer_sync(&dev->vbi_timeout); + timer_delete_sync(&dev->vbi_timeout); } void au0828_v4l2_resume(struct au0828_dev *dev) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-encoder.c b/drivers/media/usb/pvrusb2/pvrusb2-encoder.c index c8102772344b..a5eabac1ec6e 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-encoder.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-encoder.c @@ -257,7 +257,7 @@ rdData[0]); ret = -EBUSY; } if (ret) { - del_timer_sync(&hdw->encoder_run_timer); + timer_delete_sync(&hdw->encoder_run_timer); hdw->state_encoder_ok = 0; pvr2_trace(PVR2_TRACE_STBITS, "State bit %s <-- %s", diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 29cc207194b9..9a583eeaa329 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -1527,7 +1527,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw) /* Encoder is about to be reset so note that as far as we're concerned now, the encoder has never been run. */ - del_timer_sync(&hdw->encoder_run_timer); + timer_delete_sync(&hdw->encoder_run_timer); if (hdw->state_encoder_runok) { hdw->state_encoder_runok = 0; trace_stbit("state_encoder_runok",hdw->state_encoder_runok); @@ -3724,7 +3724,7 @@ status); hdw->cmd_debug_state = 5; /* Stop timer */ - del_timer_sync(&timer.timer); + timer_delete_sync(&timer.timer); hdw->cmd_debug_state = 6; status = 0; @@ -4248,7 +4248,7 @@ static int state_eval_encoder_config(struct pvr2_hdw *hdw) hdw->state_encoder_waitok = 0; trace_stbit("state_encoder_waitok",hdw->state_encoder_waitok); /* paranoia - solve race if timer just completed */ - del_timer_sync(&hdw->encoder_wait_timer); + timer_delete_sync(&hdw->encoder_wait_timer); } else { if (!hdw->state_pathway_ok || (hdw->pathway_state != PVR2_PATHWAY_ANALOG) || @@ -4261,7 +4261,7 @@ static int state_eval_encoder_config(struct pvr2_hdw *hdw) anything has happened that might have disturbed the encoder. This should be a rare case. */ if (timer_pending(&hdw->encoder_wait_timer)) { - del_timer_sync(&hdw->encoder_wait_timer); + timer_delete_sync(&hdw->encoder_wait_timer); } if (hdw->state_encoder_waitok) { /* Must clear the state - therefore we did @@ -4399,7 +4399,7 @@ static int state_eval_encoder_run(struct pvr2_hdw *hdw) if (hdw->state_encoder_run) { if (!state_check_disable_encoder_run(hdw)) return 0; if (hdw->state_encoder_ok) { - del_timer_sync(&hdw->encoder_run_timer); + timer_delete_sync(&hdw->encoder_run_timer); if (pvr2_encoder_stop(hdw) < 0) return !0; } hdw->state_encoder_run = 0; @@ -4479,11 +4479,11 @@ static int state_eval_decoder_run(struct pvr2_hdw *hdw) hdw->state_decoder_quiescent = 0; hdw->state_decoder_run = 0; /* paranoia - solve race if timer(s) just completed */ - del_timer_sync(&hdw->quiescent_timer); + timer_delete_sync(&hdw->quiescent_timer); /* Kill the stabilization timer, in case we're killing the encoder before the previous stabilization interval has been properly timed. */ - del_timer_sync(&hdw->decoder_stabilization_timer); + timer_delete_sync(&hdw->decoder_stabilization_timer); hdw->state_decoder_ready = 0; } else { if (!hdw->state_decoder_quiescent) { @@ -4517,7 +4517,7 @@ static int state_eval_decoder_run(struct pvr2_hdw *hdw) !hdw->state_pipeline_config || !hdw->state_encoder_config || !hdw->state_encoder_ok) return 0; - del_timer_sync(&hdw->quiescent_timer); + timer_delete_sync(&hdw->quiescent_timer); if (hdw->flag_decoder_missed) return 0; if (pvr2_decoder_enable(hdw,!0) < 0) return 0; hdw->state_decoder_quiescent = 0; diff --git a/drivers/memory/tegra/tegra210-emc-core.c b/drivers/memory/tegra/tegra210-emc-core.c index 2d5d8245a1d3..e63f62690571 100644 --- a/drivers/memory/tegra/tegra210-emc-core.c +++ b/drivers/memory/tegra/tegra210-emc-core.c @@ -583,7 +583,7 @@ static void tegra210_emc_training_start(struct tegra210_emc *emc) static void tegra210_emc_training_stop(struct tegra210_emc *emc) { - del_timer(&emc->training); + timer_delete(&emc->training); } static unsigned int tegra210_emc_get_temperature(struct tegra210_emc *emc) @@ -666,7 +666,7 @@ static void tegra210_emc_poll_refresh(struct timer_list *timer) static void tegra210_emc_poll_refresh_stop(struct tegra210_emc *emc) { atomic_set(&emc->refresh_poll, 0); - del_timer_sync(&emc->refresh_timer); + timer_delete_sync(&emc->refresh_timer); } static void tegra210_emc_poll_refresh_start(struct tegra210_emc *emc) diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index f4398383ae06..7dc2c9987982 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1510,7 +1510,7 @@ static void msb_cache_discard(struct msb_data *msb) if (msb->cache_block_lba == MS_BLOCK_INVALID) return; - del_timer_sync(&msb->cache_flush_timer); + timer_delete_sync(&msb->cache_flush_timer); dbg_verbose("Discarding the write cache"); msb->cache_block_lba = MS_BLOCK_INVALID; @@ -2027,7 +2027,7 @@ static void msb_stop(struct memstick_dev *card) msb->io_queue_stopped = true; spin_unlock_irqrestore(&msb->q_lock, flags); - del_timer_sync(&msb->cache_flush_timer); + timer_delete_sync(&msb->cache_flush_timer); flush_workqueue(msb->io_queue); spin_lock_irqsave(&msb->q_lock, flags); diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index e77eb8b0eb12..a5a9bb3f16be 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -469,7 +469,7 @@ static void jmb38x_ms_complete_cmd(struct memstick_host *msh, int last) unsigned int t_val = 0; int rc; - del_timer(&host->timer); + timer_delete(&host->timer); dev_dbg(&msh->dev, "c control %08x\n", readl(host->addr + HOST_CONTROL)); diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 544a31ff46e5..488ef8eecb26 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -827,7 +827,7 @@ static void r592_remove(struct pci_dev *pdev) /* Stop the processing thread. That ensures that we won't take any more requests */ kthread_stop(dev->io_thread); - del_timer_sync(&dev->detect_timer); + timer_delete_sync(&dev->detect_timer); r592_enable_device(dev, false); while (!error && dev->req) { @@ -854,7 +854,7 @@ static int r592_suspend(struct device *core_dev) r592_clear_interrupts(dev); memstick_suspend_host(dev->host); - del_timer_sync(&dev->detect_timer); + timer_delete_sync(&dev->detect_timer); return 0; } diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index c272453670be..676348eee015 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -337,7 +337,7 @@ static void tifm_ms_complete_cmd(struct tifm_ms *host) struct memstick_host *msh = tifm_get_drvdata(sock); int rc; - del_timer(&host->timer); + timer_delete(&host->timer); host->req->int_reg = readl(sock->addr + SOCK_MS_STATUS) & 0xff; host->req->int_reg = (host->req->int_reg & 1) @@ -600,7 +600,7 @@ static void tifm_ms_remove(struct tifm_dev *sock) spin_lock_irqsave(&sock->lock, flags); host->eject = 1; if (host->req) { - del_timer(&host->timer); + timer_delete(&host->timer); writel(TIFM_FIFO_INT_SETALL, sock->addr + SOCK_DMA_FIFO_INT_ENABLE_CLEAR); writel(TIFM_DMA_RESET, sock->addr + SOCK_DMA_CONTROL); diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c index 59bab76ff0a9..44a2dd80054d 100644 --- a/drivers/misc/bcm-vk/bcm_vk_tty.c +++ b/drivers/misc/bcm-vk/bcm_vk_tty.c @@ -177,7 +177,7 @@ static void bcm_vk_tty_close(struct tty_struct *tty, struct file *file) vk->tty[tty->index].is_opened = false; if (tty->count == 1) - del_timer_sync(&vk->serial_timer); + timer_delete_sync(&vk->serial_timer); } static void bcm_vk_tty_doorbell(struct bcm_vk *vk, u32 db_val) @@ -304,7 +304,7 @@ void bcm_vk_tty_exit(struct bcm_vk *vk) { int i; - del_timer_sync(&vk->serial_timer); + timer_delete_sync(&vk->serial_timer); for (i = 0; i < BCM_VK_NUM_TTY; ++i) { tty_port_unregister_device(&vk->tty[i].port, vk->tty_drv, diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 77b0490a1b38..7314c8d9ae75 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -53,7 +53,7 @@ static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); add_timer(&ucr->sg_timer); usb_sg_wait(&ucr->current_sg); - if (!del_timer_sync(&ucr->sg_timer)) + if (!timer_delete_sync(&ucr->sg_timer)) ret = -ETIMEDOUT; else ret = ucr->current_sg.status; diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 7a3c34306de9..697a008c14d3 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -202,7 +202,7 @@ xpc_start_hb_beater(void) static void xpc_stop_hb_beater(void) { - del_timer_sync(&xpc_hb_timer); + timer_delete_sync(&xpc_hb_timer); xpc_arch_ops.heartbeat_exit(); } diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 1999d02923de..d0467010558c 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -291,7 +291,7 @@ static int __xpc_partition_disengaged(struct xpc_partition *part, /* Cancel the timer function if not called from it */ if (!from_timer) - del_timer_sync(&part->disengage_timer); + timer_delete_sync(&part->disengage_timer); DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && part->act_state != XPC_P_AS_INACTIVE); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index bdb22998357e..dacb5bd9bb71 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -147,13 +147,13 @@ void mmc_retune_disable(struct mmc_host *host) { mmc_retune_unpause(host); host->can_retune = 0; - del_timer_sync(&host->retune_timer); + timer_delete_sync(&host->retune_timer); mmc_retune_clear(host); } void mmc_retune_timer_stop(struct mmc_host *host) { - del_timer_sync(&host->retune_timer); + timer_delete_sync(&host->retune_timer); } EXPORT_SYMBOL(mmc_retune_timer_stop); diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 24fffc702a94..14e981b834b6 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1592,7 +1592,7 @@ static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq) WARN_ON(host->cmd || host->data); - del_timer(&host->timer); + timer_delete(&host->timer); /* * Update the MMC clock rate if necessary. This may be @@ -2357,7 +2357,7 @@ static void atmci_cleanup_slot(struct atmel_mci_slot *slot, if (slot->detect_pin) { free_irq(gpiod_to_irq(slot->detect_pin), slot); - del_timer_sync(&slot->detect_timer); + timer_delete_sync(&slot->detect_timer); } slot->host->slot[id] = NULL; @@ -2585,7 +2585,7 @@ static int atmci_probe(struct platform_device *pdev) pm_runtime_disable(dev); pm_runtime_put_noidle(dev); - del_timer_sync(&host->timer); + timer_delete_sync(&host->timer); if (!IS_ERR(host->dma.chan)) dma_release_channel(host->dma.chan); err_dma_probe_defer: @@ -2613,7 +2613,7 @@ static void atmci_remove(struct platform_device *pdev) atmci_writel(host, ATMCI_CR, ATMCI_CR_MCIDIS); atmci_readl(host, ATMCI_SR); - del_timer_sync(&host->timer); + timer_delete_sync(&host->timer); if (!IS_ERR(host->dma.chan)) dma_release_channel(host->dma.chan); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index bb596d169420..578290015e5b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2040,10 +2040,10 @@ static bool dw_mci_clear_pending_cmd_complete(struct dw_mci *host) * Really be certain that the timer has stopped. This is a bit of * paranoia and could only really happen if we had really bad * interrupt latency and the interrupt routine and timeout were - * running concurrently so that the del_timer() in the interrupt + * running concurrently so that the timer_delete() in the interrupt * handler couldn't run. */ - WARN_ON(del_timer_sync(&host->cto_timer)); + WARN_ON(timer_delete_sync(&host->cto_timer)); clear_bit(EVENT_CMD_COMPLETE, &host->pending_events); return true; @@ -2055,7 +2055,7 @@ static bool dw_mci_clear_pending_data_complete(struct dw_mci *host) return false; /* Extra paranoia just like dw_mci_clear_pending_cmd_complete() */ - WARN_ON(del_timer_sync(&host->dto_timer)); + WARN_ON(timer_delete_sync(&host->dto_timer)); clear_bit(EVENT_DATA_COMPLETE, &host->pending_events); return true; @@ -2788,7 +2788,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host) static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status) { - del_timer(&host->cto_timer); + timer_delete(&host->cto_timer); if (!host->cmd_status) host->cmd_status = status; @@ -2832,13 +2832,13 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) dw_mci_cmd_interrupt(host, pending); spin_unlock(&host->irq_lock); - del_timer(&host->cmd11_timer); + timer_delete(&host->cmd11_timer); } if (pending & DW_MCI_CMD_ERROR_FLAGS) { spin_lock(&host->irq_lock); - del_timer(&host->cto_timer); + timer_delete(&host->cto_timer); mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS); host->cmd_status = pending; smp_wmb(); /* drain writebuffer */ @@ -2851,7 +2851,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) spin_lock(&host->irq_lock); if (host->quirks & DW_MMC_QUIRK_EXTENDED_TMOUT) - del_timer(&host->dto_timer); + timer_delete(&host->dto_timer); /* if there is an error report DATA_ERROR */ mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS); @@ -2872,7 +2872,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) if (pending & SDMMC_INT_DATA_OVER) { spin_lock(&host->irq_lock); - del_timer(&host->dto_timer); + timer_delete(&host->dto_timer); mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER); if (!host->data_status) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 596012d5afac..bd1662e275d4 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -862,7 +862,7 @@ static irqreturn_t jz_mmc_irq(int irq, void *devid) if (host->req && cmd && irq_reg) { if (test_and_clear_bit(0, &host->waiting)) { - del_timer(&host->timeout_timer); + timer_delete(&host->timeout_timer); if (status & JZ_MMC_STATUS_TIMEOUT_RES) { cmd->error = -ETIMEDOUT; @@ -1162,7 +1162,7 @@ static void jz4740_mmc_remove(struct platform_device *pdev) { struct jz4740_mmc_host *host = platform_get_drvdata(pdev); - del_timer_sync(&host->timeout_timer); + timer_delete_sync(&host->timeout_timer); jz4740_mmc_set_irq_enabled(host, 0xff, false); jz4740_mmc_reset(host); diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c index ad351805eed4..e0ae5a0c9670 100644 --- a/drivers/mmc/host/meson-mx-sdio.c +++ b/drivers/mmc/host/meson-mx-sdio.c @@ -446,7 +446,7 @@ static irqreturn_t meson_mx_mmc_irq_thread(int irq, void *irq_data) if (WARN_ON(!cmd)) return IRQ_HANDLED; - del_timer_sync(&host->cmd_timeout); + timer_delete_sync(&host->cmd_timeout); if (cmd->data) { dma_unmap_sg(mmc_dev(host->mmc), cmd->data->sg, @@ -733,7 +733,7 @@ static void meson_mx_mmc_remove(struct platform_device *pdev) struct meson_mx_mmc_host *host = platform_get_drvdata(pdev); struct device *slot_dev = mmc_dev(host->mmc); - del_timer_sync(&host->cmd_timeout); + timer_delete_sync(&host->cmd_timeout); mmc_remove_host(host->mmc); diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c index b92f3ba38663..912ffacbad88 100644 --- a/drivers/mmc/host/mvsdio.c +++ b/drivers/mmc/host/mvsdio.c @@ -464,7 +464,7 @@ static irqreturn_t mvsd_irq(int irq, void *dev) struct mmc_command *cmd = mrq->cmd; u32 err_status = 0; - del_timer(&host->timer); + timer_delete(&host->timer); host->mrq = NULL; host->intr_en &= MVSD_NOR_CARD_INT; @@ -803,7 +803,7 @@ static void mvsd_remove(struct platform_device *pdev) struct mvsd_host *host = mmc_priv(mmc); mmc_remove_host(mmc); - del_timer_sync(&host->timer); + timer_delete_sync(&host->timer); mvsd_power_down(host); if (!IS_ERR(host->clk)) diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 0a9affd12532..95d8d40a06a8 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -352,7 +352,7 @@ static void mxcmci_dma_callback(void *data) struct mxcmci_host *host = data; u32 stat; - del_timer(&host->watchdog); + timer_delete(&host->watchdog); stat = mxcmci_readl(host, MMC_REG_STATUS); @@ -737,7 +737,7 @@ static irqreturn_t mxcmci_irq(int irq, void *devid) mxcmci_cmd_done(host, stat); if (mxcmci_use_dma(host) && (stat & STATUS_WRITE_OP_DONE)) { - del_timer(&host->watchdog); + timer_delete(&host->watchdog); mxcmci_data_done(host, stat); } diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index 3cdb2fc44965..c50617d03709 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -214,7 +214,7 @@ static void mmc_omap_select_slot(struct mmc_omap_slot *slot, int claimed) host->mmc = slot->mmc; spin_unlock_irqrestore(&host->slot_lock, flags); no_claim: - del_timer(&host->clk_timer); + timer_delete(&host->clk_timer); if (host->current_slot != slot || !claimed) mmc_omap_fclk_offdelay(host->current_slot); @@ -273,7 +273,7 @@ static void mmc_omap_release_slot(struct mmc_omap_slot *slot, int clk_enabled) /* Keeps clock running for at least 8 cycles on valid freq */ mod_timer(&host->clk_timer, jiffies + HZ/10); else { - del_timer(&host->clk_timer); + timer_delete(&host->clk_timer); mmc_omap_fclk_offdelay(slot); mmc_omap_fclk_enable(host, 0); } @@ -564,7 +564,7 @@ mmc_omap_cmd_done(struct mmc_omap_host *host, struct mmc_command *cmd) { host->cmd = NULL; - del_timer(&host->cmd_abort_timer); + timer_delete(&host->cmd_abort_timer); if (cmd->flags & MMC_RSP_PRESENT) { if (cmd->flags & MMC_RSP_136) { @@ -836,7 +836,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) } if (cmd_error && host->data) { - del_timer(&host->cmd_abort_timer); + timer_delete(&host->cmd_abort_timer); host->abort = 1; OMAP_MMC_WRITE(host, IE, 0); disable_irq_nosync(host->irq); @@ -1365,7 +1365,7 @@ static void mmc_omap_remove_slot(struct mmc_omap_slot *slot) device_remove_file(&mmc->class_dev, &dev_attr_cover_switch); cancel_work_sync(&slot->cover_bh_work); - del_timer_sync(&slot->cover_timer); + timer_delete_sync(&slot->cover_timer); flush_workqueue(slot->host->mmc_omap_wq); mmc_remove_host(mmc); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 5f91b44891f9..5f78be7ae16d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -517,9 +517,9 @@ EXPORT_SYMBOL_GPL(sdhci_mod_timer); static void sdhci_del_timer(struct sdhci_host *host, struct mmc_request *mrq) { if (sdhci_data_line_cmd(mrq->cmd)) - del_timer(&host->data_timer); + timer_delete(&host->data_timer); else - del_timer(&host->timer); + timer_delete(&host->timer); } static inline bool sdhci_has_requests(struct sdhci_host *host) @@ -4976,8 +4976,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE); free_irq(host->irq, host); - del_timer_sync(&host->timer); - del_timer_sync(&host->data_timer); + timer_delete_sync(&host->timer); + timer_delete_sync(&host->data_timer); destroy_workqueue(host->complete_wq); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index aea14bf3e2e8..713223f2d377 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -735,7 +735,7 @@ static void tifm_sd_end_cmd(struct work_struct *t) spin_lock_irqsave(&sock->lock, flags); - del_timer(&host->timer); + timer_delete(&host->timer); mrq = host->req; host->req = NULL; diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c index f77457105ec3..909d80a02824 100644 --- a/drivers/mmc/host/via-sdmmc.c +++ b/drivers/mmc/host/via-sdmmc.c @@ -971,7 +971,7 @@ static void via_sdc_finish_bh_work(struct work_struct *t) spin_lock_irqsave(&host->lock, flags); - del_timer(&host->timer); + timer_delete(&host->timer); mrq = host->mrq; host->mrq = NULL; host->cmd = NULL; @@ -1202,7 +1202,7 @@ static void via_sd_remove(struct pci_dev *pcidev) free_irq(pcidev->irq, sdhost); - del_timer_sync(&sdhost->timer); + timer_delete_sync(&sdhost->timer); cancel_work_sync(&sdhost->finish_bh_work); diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index fd67c0682b38..dd71e5b8e1a5 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -1452,7 +1452,7 @@ static int __command_read_data(struct vub300_mmc_host *vub300, (linear_length / 16384)); add_timer(&vub300->sg_transfer_timer); usb_sg_wait(&vub300->sg_request); - del_timer(&vub300->sg_transfer_timer); + timer_delete(&vub300->sg_transfer_timer); if (vub300->sg_request.status < 0) { cmd->error = vub300->sg_request.status; data->bytes_xfered = 0; @@ -1572,7 +1572,7 @@ static int __command_write_data(struct vub300_mmc_host *vub300, if (cmd->error) { data->bytes_xfered = 0; } else { - del_timer(&vub300->sg_transfer_timer); + timer_delete(&vub300->sg_transfer_timer); if (vub300->sg_request.status < 0) { cmd->error = vub300->sg_request.status; data->bytes_xfered = 0; @@ -2339,7 +2339,7 @@ static int vub300_probe(struct usb_interface *interface, return 0; error6: - del_timer_sync(&vub300->inactivity_timer); + timer_delete_sync(&vub300->inactivity_timer); error5: mmc_free_host(mmc); /* diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index 8b268e8a0ec9..d5974b355a5a 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -1261,7 +1261,7 @@ static void wbsd_free_mmc(struct device *dev) host = mmc_priv(mmc); BUG_ON(host == NULL); - del_timer_sync(&host->ignore_timer); + timer_delete_sync(&host->ignore_timer); mmc_free_host(mmc); } diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index 485d5ca39951..2199ba821922 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -257,7 +257,7 @@ static int hdm_poison_channel(struct most_interface *iface, int channel) mdev->padding_active[channel] = false; if (mdev->conf[channel].data_type == MOST_CH_ASYNC) { - del_timer_sync(&mdev->link_stat_timer); + timer_delete_sync(&mdev->link_stat_timer); cancel_work_sync(&mdev->poll_work_obj); } mutex_unlock(&mdev->io_mutex); @@ -1115,7 +1115,7 @@ static void hdm_disconnect(struct usb_interface *interface) mdev->usb_device = NULL; mutex_unlock(&mdev->io_mutex); - del_timer_sync(&mdev->link_stat_timer); + timer_delete_sync(&mdev->link_stat_timer); cancel_work_sync(&mdev->poll_work_obj); if (mdev->dci) diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index b5b3c4c44a94..d28d4f1790f5 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1067,7 +1067,7 @@ static int sm_write(struct mtd_blktrans_dev *dev, sm_break_offset(ftl, sec_no << 9, &zone_num, &block, &boffset); /* No need in flush thread running now */ - del_timer(&ftl->timer); + timer_delete(&ftl->timer); mutex_lock(&ftl->mutex); zone = sm_get_zone(ftl, zone_num); @@ -1111,7 +1111,7 @@ static void sm_release(struct mtd_blktrans_dev *dev) { struct sm_ftl *ftl = dev->priv; - del_timer_sync(&ftl->timer); + timer_delete_sync(&ftl->timer); cancel_work_sync(&ftl->flush_work); mutex_lock(&ftl->mutex); sm_cache_flush(ftl); diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 530c15d6a5eb..602e6e1adf00 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -616,7 +616,7 @@ int arcnet_close(struct net_device *dev) struct arcnet_local *lp = netdev_priv(dev); arcnet_led_event(dev, ARCNET_LED_EVENT_STOP); - del_timer_sync(&lp->timer); + timer_delete_sync(&lp->timer); netif_stop_queue(dev); netif_carrier_off(dev); diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 21a61b86f67d..adf3970f070f 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -778,7 +778,7 @@ static irqreturn_t grcan_interrupt(int irq, void *dev_id) */ if (priv->need_txbug_workaround && (sources & (GRCAN_IRQ_TX | GRCAN_IRQ_TXLOSS))) { - del_timer(&priv->hang_timer); + timer_delete(&priv->hang_timer); } /* Frame(s) received or transmitted */ @@ -817,8 +817,8 @@ static void grcan_running_reset(struct timer_list *t) spin_lock_irqsave(&priv->lock, flags); priv->resetting = false; - del_timer(&priv->hang_timer); - del_timer(&priv->rr_timer); + timer_delete(&priv->hang_timer); + timer_delete(&priv->rr_timer); if (!priv->closing) { /* Save and reset - config register preserved by grcan_reset */ @@ -1108,8 +1108,8 @@ static int grcan_close(struct net_device *dev) priv->closing = true; if (priv->need_txbug_workaround) { spin_unlock_irqrestore(&priv->lock, flags); - del_timer_sync(&priv->hang_timer); - del_timer_sync(&priv->rr_timer); + timer_delete_sync(&priv->hang_timer); + timer_delete_sync(&priv->rr_timer); spin_lock_irqsave(&priv->lock, flags); } netif_stop_queue(dev); @@ -1147,7 +1147,7 @@ static void grcan_transmit_catch_up(struct net_device *dev) * so prevent a running reset while catching up */ if (priv->need_txbug_workaround) - del_timer(&priv->hang_timer); + timer_delete(&priv->hang_timer); } spin_unlock_irqrestore(&priv->lock, flags); diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index fa04a7ced02b..cf0d51805272 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -631,7 +631,7 @@ static int kvaser_pciefd_bus_on(struct kvaser_pciefd_can *can) u32 mode; unsigned long irq; - del_timer(&can->bec_poll_timer); + timer_delete(&can->bec_poll_timer); if (!completion_done(&can->flush_comp)) kvaser_pciefd_start_controller_flush(can); @@ -742,7 +742,7 @@ static int kvaser_pciefd_stop(struct net_device *netdev) ret = -ETIMEDOUT; } else { iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - del_timer(&can->bec_poll_timer); + timer_delete(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; close_candev(netdev); @@ -1854,7 +1854,7 @@ static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) if (can) { iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); unregister_candev(can->can.dev); - del_timer(&can->bec_poll_timer); + timer_delete(&can->bec_poll_timer); kvaser_pciefd_pwm_stop(can); free_candev(can->can.dev); } diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index ebd5941c3f53..6c7b1c58f85f 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -167,7 +167,7 @@ static void pcan_start_led_timer(struct pcan_pccard *card) */ static void pcan_stop_led_timer(struct pcan_pccard *card) { - del_timer_sync(&card->led_timer); + timer_delete_sync(&card->led_timer); } /* diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index ee9e5d7e5277..b6d249eb64e7 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -206,7 +206,7 @@ static int mv88e6xxx_phy_ppu_access_get(struct mv88e6xxx_chip *chip) } chip->ppu_disabled = 1; } else { - del_timer(&chip->ppu_timer); + timer_delete(&chip->ppu_timer); ret = 0; } @@ -230,7 +230,7 @@ static void mv88e6xxx_phy_ppu_state_init(struct mv88e6xxx_chip *chip) static void mv88e6xxx_phy_ppu_state_destroy(struct mv88e6xxx_chip *chip) { mutex_lock(&chip->ppu_mutex); - del_timer_sync(&chip->ppu_timer); + timer_delete_sync(&chip->ppu_timer); cancel_work_sync(&chip->ppu_work); mutex_unlock(&chip->ppu_mutex); } diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 08b45fdd1d24..198e787e8560 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -842,7 +842,7 @@ static int sja1105_extts_enable(struct sja1105_private *priv, if (on) sja1105_ptp_extts_setup_timer(&priv->ptp_data); else - del_timer_sync(&priv->ptp_data.extts_timer); + timer_delete_sync(&priv->ptp_data.extts_timer); return 0; } @@ -939,7 +939,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds) if (IS_ERR_OR_NULL(ptp_data->clock)) return; - del_timer_sync(&ptp_data->extts_timer); + timer_delete_sync(&ptp_data->extts_timer); ptp_cancel_worker_sync(ptp_data->clock); skb_queue_purge(&ptp_data->skb_txtstamp_queue); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 3c2efda916f1..5889759b8d83 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -254,7 +254,7 @@ static int eql_close(struct net_device *dev) * at the data structure it scans every so often... */ - del_timer_sync(&eql->timer); + timer_delete_sync(&eql->timer); eql_kill_slave_queue(&eql->queue); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 4725a8cfd695..8ba2ed87fe7c 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1414,7 +1414,7 @@ static int corkscrew_close(struct net_device *dev) dev->name, rx_nocopy, rx_copy, queued_packet); } - del_timer_sync(&vp->timer); + timer_delete_sync(&vp->timer); /* Turn off statistics ASAP. We update lp->stats below. */ outw(StatsDisable, ioaddr + EL3_CMD); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index dc3b7c960611..b295d528a237 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -1140,7 +1140,7 @@ static int el3_close(struct net_device *dev) link->open--; netif_stop_queue(dev); - del_timer_sync(&lp->media); + timer_delete_sync(&lp->media); return 0; } diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index be58dac0502a..ff331a3bde73 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -946,7 +946,7 @@ static int el3_close(struct net_device *dev) link->open--; netif_stop_queue(dev); - del_timer_sync(&lp->media); + timer_delete_sync(&lp->media); return 0; } diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 790270912913..1a10f5dbc4d7 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2691,7 +2691,7 @@ vortex_down(struct net_device *dev, int final_down) netdev_reset_queue(dev); netif_stop_queue(dev); - del_timer_sync(&vp->timer); + timer_delete_sync(&vp->timer); /* Turn off statistics ASAP. We update dev->stats below. */ iowrite16(StatsDisable, ioaddr + EL3_CMD); diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index fea489af72fb..e5be5044e1d4 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -504,7 +504,7 @@ static int axnet_close(struct net_device *dev) link->open--; netif_stop_queue(dev); - del_timer_sync(&info->watchdog); + timer_delete_sync(&info->watchdog); return 0; } /* axnet_close */ diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 780fb4afb6af..a326f25dda09 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -947,7 +947,7 @@ static int pcnet_close(struct net_device *dev) link->open--; netif_stop_queue(dev); - del_timer_sync(&info->watchdog); + timer_delete_sync(&info->watchdog); return 0; } /* pcnet_close */ diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index b325e0cef120..b398adacda91 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3639,7 +3639,7 @@ static int et131x_close(struct net_device *netdev) free_irq(adapter->pdev->irq, netdev); /* Stop the error timer */ - return del_timer_sync(&adapter->error_timer); + return timer_delete_sync(&adapter->error_timer); } /* et131x_set_packet_filter - Configures the Rx Packet filtering */ diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 70fa3adb4934..897720fdf5d8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3245,7 +3245,7 @@ static int ena_destroy_device(struct ena_adapter *adapter, bool graceful) netif_carrier_off(netdev); - del_timer_sync(&adapter->timer_service); + timer_delete_sync(&adapter->timer_service); dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); adapter->dev_up_before_reset = dev_up; @@ -4065,7 +4065,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_worker_destroy: - del_timer(&adapter->timer_service); + timer_delete(&adapter->timer_service); err_device_destroy: ena_com_delete_host_info(ena_dev); ena_com_admin_destroy(ena_dev); @@ -4104,7 +4104,7 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) /* Make sure timer and reset routine won't be called after * freeing device resources. */ - del_timer_sync(&adapter->timer_service); + timer_delete_sync(&adapter->timer_service); cancel_work_sync(&adapter->reset_task); rtnl_lock(); /* lock released inside the below if-else block */ diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 1ca26a8c40eb..b923ad9e1581 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -486,7 +486,7 @@ static int lance_close(struct net_device *dev) volatile struct lance_regs *ll = lp->ll; netif_stop_queue(dev); - del_timer_sync(&lp->multicast_timer); + timer_delete_sync(&lp->multicast_timer); /* Stop the card */ ll->rap = LE_CSR0; diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index f64f96fa17cf..86522e8574cb 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1173,7 +1173,7 @@ static int amd8111e_close(struct net_device *dev) /* Delete ipg timer */ if (lp->options & OPTION_DYN_IPG_ENABLE) - del_timer_sync(&lp->ipg_data.ipg_timer); + timer_delete_sync(&lp->ipg_data.ipg_timer); spin_unlock_irq(&lp->lock); free_irq(dev->irq, dev); @@ -1598,7 +1598,7 @@ static int __maybe_unused amd8111e_suspend(struct device *dev_d) /* stop chip */ spin_lock_irq(&lp->lock); if (lp->options & OPTION_DYN_IPG_ENABLE) - del_timer_sync(&lp->ipg_data.ipg_timer); + timer_delete_sync(&lp->ipg_data.ipg_timer); amd8111e_stop_chip(lp); spin_unlock_irq(&lp->lock); diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index ec8df05e7bf6..b072ca5930fc 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -842,7 +842,7 @@ static int lance_close(struct net_device *dev) volatile struct lance_regs *ll = lp->ll; netif_stop_queue(dev); - del_timer_sync(&lp->multicast_timer); + timer_delete_sync(&lp->multicast_timer); /* Stop the card */ writereg(&ll->rap, LE_CSR0); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index c6bd803f5b0c..e5adafecc686 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -2630,7 +2630,7 @@ static int pcnet32_close(struct net_device *dev) struct pcnet32_private *lp = netdev_priv(dev); unsigned long flags; - del_timer_sync(&lp->watchdog_timer); + timer_delete_sync(&lp->watchdog_timer); netif_stop_queue(dev); napi_disable(&lp->napi); diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 0f98b92408ed..3cd31855a5f6 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -963,7 +963,7 @@ static int lance_close(struct net_device *dev) struct lance_private *lp = netdev_priv(dev); netif_stop_queue(dev); - del_timer_sync(&lp->multicast_timer); + timer_delete_sync(&lp->multicast_timer); STOP_LANCE(lp); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 5475867708f4..d84a310dfcd4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -728,7 +728,7 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) struct xgbe_channel *channel; unsigned int i; - del_timer_sync(&pdata->service_timer); + timer_delete_sync(&pdata->service_timer); for (i = 0; i < pdata->channel_count; i++) { channel = pdata->channel[i]; @@ -736,7 +736,7 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) break; /* Deactivate the Tx timer */ - del_timer_sync(&channel->tx_timer); + timer_delete_sync(&channel->tx_timer); channel->tx_timer_active = 0; } } diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index 785f4b4ff758..b9fdd61f1fdb 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -461,7 +461,7 @@ static int bmac_suspend(struct macio_dev *mdev, pm_message_t state) /* prolly should wait for dma to finish & turn off the chip */ spin_lock_irqsave(&bp->lock, flags); if (bp->timeout_active) { - del_timer(&bp->tx_timeout); + timer_delete(&bp->tx_timeout); bp->timeout_active = 0; } disable_irq(dev->irq); @@ -546,7 +546,7 @@ static inline void bmac_set_timeout(struct net_device *dev) spin_lock_irqsave(&bp->lock, flags); if (bp->timeout_active) - del_timer(&bp->tx_timeout); + timer_delete(&bp->tx_timeout); bp->tx_timeout.expires = jiffies + TX_TIMEOUT; add_timer(&bp->tx_timeout); bp->timeout_active = 1; @@ -755,7 +755,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id) XXDEBUG(("bmac_txdma_intr\n")); } - /* del_timer(&bp->tx_timeout); */ + /* timer_delete(&bp->tx_timeout); */ /* bp->timeout_active = 0; */ while (1) { diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index e6350971c707..1fed112f4e68 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -523,7 +523,7 @@ static inline void mace_set_timeout(struct net_device *dev) struct mace_data *mp = netdev_priv(dev); if (mp->timeout_active) - del_timer(&mp->tx_timeout); + timer_delete(&mp->tx_timeout); mp->tx_timeout.expires = jiffies + TX_TIMEOUT; add_timer(&mp->tx_timeout); mp->timeout_active = 1; @@ -676,7 +676,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) i = mp->tx_empty; while (in_8(&mb->pr) & XMTSV) { - del_timer(&mp->tx_timeout); + timer_delete(&mp->tx_timeout); mp->timeout_active = 0; /* * Clear any interrupt indication associated with this status diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 71e50fc65c14..bf3aa46887a1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1389,13 +1389,13 @@ int aq_nic_stop(struct aq_nic_s *self) netif_tx_disable(self->ndev); netif_carrier_off(self->ndev); - del_timer_sync(&self->service_timer); + timer_delete_sync(&self->service_timer); cancel_work_sync(&self->service_task); self->aq_hw_ops->hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK); if (self->aq_nic_cfg.is_polling) - del_timer_sync(&self->polling_timer); + timer_delete_sync(&self->polling_timer); else aq_pci_func_free_irqs(self); diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 3d4c3d8698e2..67b654889cae 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1391,7 +1391,7 @@ static void ag71xx_hw_disable(struct ag71xx *ag) ag71xx_dma_reset(ag); napi_disable(&ag->napi); - del_timer_sync(&ag->oom_timer); + timer_delete_sync(&ag->oom_timer); ag71xx_rings_cleanup(ag); } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index c571614b1d50..82137f9deae9 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -357,7 +357,7 @@ static void atl1c_common_task(struct work_struct *work) static void atl1c_del_timer(struct atl1c_adapter *adapter) { - del_timer_sync(&adapter->phy_config_timer); + timer_delete_sync(&adapter->phy_config_timer); } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 9b778b34b67e..f664a0edbc49 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -232,7 +232,7 @@ static void atl1e_link_chg_event(struct atl1e_adapter *adapter) static void atl1e_del_timer(struct atl1e_adapter *adapter) { - del_timer_sync(&adapter->phy_config_timer); + timer_delete_sync(&adapter->phy_config_timer); } static void atl1e_cancel_work(struct atl1e_adapter *adapter) diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 3afd3627ce48..38cd84b7677c 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2641,7 +2641,7 @@ static void atl1_down(struct atl1_adapter *adapter) napi_disable(&adapter->napi); netif_stop_queue(netdev); - del_timer_sync(&adapter->phy_config_timer); + timer_delete_sync(&adapter->phy_config_timer); adapter->phy_timer_pending = false; atlx_irq_disable(adapter); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index fa9a4919f25d..88f65f8cf4d3 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -752,8 +752,8 @@ static void atl2_down(struct atl2_adapter *adapter) atl2_irq_disable(adapter); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_config_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_config_timer); clear_bit(0, &adapter->cfg_phy); netif_carrier_off(netdev); @@ -1468,8 +1468,8 @@ static void atl2_remove(struct pci_dev *pdev) * explicitly disable watchdog tasks from being rescheduled */ set_bit(__ATL2_DOWN, &adapter->flags); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_config_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_config_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->link_chg_task); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e5809ad5eb82..c91884373429 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1628,7 +1628,7 @@ static int b44_close(struct net_device *dev) napi_disable(&bp->napi); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); spin_lock_irq(&bp->lock); @@ -2473,7 +2473,7 @@ static int b44_suspend(struct ssb_device *sdev, pm_message_t state) if (!netif_running(dev)) return 0; - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); spin_lock_irq(&bp->lock); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 65e3a0656a4c..19611bdd86e6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1195,7 +1195,7 @@ static int bcm_enet_stop(struct net_device *dev) napi_disable(&priv->napi); if (priv->has_phy) phy_stop(dev->phydev); - del_timer_sync(&priv->rx_timeout); + timer_delete_sync(&priv->rx_timeout); /* mask all interrupts */ enet_writel(priv, 0, ENET_IRMASK_REG); @@ -2346,10 +2346,10 @@ static int bcm_enetsw_stop(struct net_device *dev) priv = netdev_priv(dev); kdev = &priv->pdev->dev; - del_timer_sync(&priv->swphy_poll); + timer_delete_sync(&priv->swphy_poll); netif_stop_queue(dev); napi_disable(&priv->napi); - del_timer_sync(&priv->rx_timeout); + timer_delete_sync(&priv->rx_timeout); /* mask all interrupts */ enet_dmac_writel(priv, 0, ENETDMAC_IRMASK, priv->rx_chan); diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 6ec773e61182..ec0c9584f3bb 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6400,7 +6400,7 @@ bnx2_open(struct net_device *dev) rc = bnx2_request_irq(bp); if (rc) { - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); goto open_err; } bnx2_enable_int(bp); @@ -6752,7 +6752,7 @@ bnx2_close(struct net_device *dev) bnx2_disable_int_sync(bp); bnx2_napi_disable(bp); netif_tx_disable(dev); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); bnx2_shutdown_chip(bp); bnx2_free_irq(bp); bnx2_free_skbs(bp); @@ -8602,7 +8602,7 @@ bnx2_remove_one(struct pci_dev *pdev) unregister_netdev(dev); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); cancel_work_sync(&bp->reset_task); pci_iounmap(bp->pdev, bp->regview); @@ -8629,7 +8629,7 @@ bnx2_suspend(struct device *device) cancel_work_sync(&bp->reset_task); bnx2_netif_stop(bp, true); netif_device_detach(dev); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); bnx2_shutdown_chip(bp); __bnx2_free_irq(bp); bnx2_free_skbs(bp); @@ -8687,7 +8687,7 @@ static pci_ers_result_t bnx2_io_error_detected(struct pci_dev *pdev, if (netif_running(dev)) { bnx2_netif_stop(bp, true); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); bnx2_reset_nic(bp, BNX2_DRV_MSG_CODE_RESET); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index a8e07e51418f..e59530357e2c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3059,7 +3059,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bp->rx_mode = BNX2X_RX_MODE_NONE; - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); if (IS_PF(bp) && !BP_NOMCP(bp)) { /* Set ALWAYS_ALIVE bit in shmem */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 678829646cec..f522ca8ff66b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14140,7 +14140,7 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) bnx2x_tx_disable(bp); netdev_reset_tc(bp->dev); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); cancel_delayed_work_sync(&bp->sp_task); cancel_delayed_work_sync(&bp->period_task); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 28ee12186c37..8725e1e13908 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12958,7 +12958,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); - del_timer_sync(&bp->timer); + timer_delete_sync(&bp->timer); bnxt_free_skbs(bp); /* Save ring stats before shutdown */ diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d9d675f1ebfe..d1f541af4e3b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11252,7 +11252,7 @@ static void tg3_timer_start(struct tg3 *tp) static void tg3_timer_stop(struct tg3 *tp) { - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); } /* Restart hardware after configuration changes, self-test, etc. diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 9c80ab07a735..92c7639d1fc7 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -314,13 +314,13 @@ bfa_ioc_sm_getattr(struct bfa_ioc *ioc, enum ioc_event event) { switch (event) { case IOC_E_FWRSP_GETATTR: - del_timer(&ioc->ioc_timer); + timer_delete(&ioc->ioc_timer); bfa_fsm_set_state(ioc, bfa_ioc_sm_op); break; case IOC_E_PFFAILED: case IOC_E_HWERROR: - del_timer(&ioc->ioc_timer); + timer_delete(&ioc->ioc_timer); fallthrough; case IOC_E_TIMEOUT: ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); @@ -330,7 +330,7 @@ bfa_ioc_sm_getattr(struct bfa_ioc *ioc, enum ioc_event event) break; case IOC_E_DISABLE: - del_timer(&ioc->ioc_timer); + timer_delete(&ioc->ioc_timer); bfa_fsm_set_state(ioc, bfa_ioc_sm_disabling); break; @@ -659,13 +659,13 @@ bfa_iocpf_sm_mismatch(struct bfa_iocpf *iocpf, enum iocpf_event event) break; case IOCPF_E_DISABLE: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_reset); bfa_ioc_pf_disabled(ioc); break; case IOCPF_E_STOP: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_reset); break; @@ -741,7 +741,7 @@ bfa_iocpf_sm_hwinit(struct bfa_iocpf *iocpf, enum iocpf_event event) break; case IOCPF_E_DISABLE: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_ioc_sync_leave(ioc); bfa_nw_ioc_hw_sem_release(ioc); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_disabled); @@ -774,13 +774,13 @@ bfa_iocpf_sm_enabling(struct bfa_iocpf *iocpf, enum iocpf_event event) switch (event) { case IOCPF_E_FWRSP_ENABLE: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_nw_ioc_hw_sem_release(ioc); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_ready); break; case IOCPF_E_INITFAIL: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); fallthrough; case IOCPF_E_TIMEOUT: @@ -791,7 +791,7 @@ bfa_iocpf_sm_enabling(struct bfa_iocpf *iocpf, enum iocpf_event event) break; case IOCPF_E_DISABLE: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_nw_ioc_hw_sem_release(ioc); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_disabling); break; @@ -844,12 +844,12 @@ bfa_iocpf_sm_disabling(struct bfa_iocpf *iocpf, enum iocpf_event event) switch (event) { case IOCPF_E_FWRSP_DISABLE: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); bfa_fsm_set_state(iocpf, bfa_iocpf_sm_disabling_sync); break; case IOCPF_E_FAIL: - del_timer(&ioc->iocpf_timer); + timer_delete(&ioc->iocpf_timer); fallthrough; case IOCPF_E_TIMEOUT: @@ -1210,7 +1210,7 @@ bfa_nw_ioc_hw_sem_release(struct bfa_ioc *ioc) static void bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc) { - del_timer(&ioc->sem_timer); + timer_delete(&ioc->sem_timer); } /* Initialize LPU local memory (aka secondary memory / SRAM) */ @@ -1982,7 +1982,7 @@ bfa_ioc_hb_monitor(struct bfa_ioc *ioc) static void bfa_ioc_hb_stop(struct bfa_ioc *ioc) { - del_timer(&ioc->hb_timer); + timer_delete(&ioc->hb_timer); } /* Initiate a full firmware download. */ diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 3b9107003b00..a03eff3d4425 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -1837,7 +1837,7 @@ bnad_stats_timer_stop(struct bnad *bnad) to_del = 1; spin_unlock_irqrestore(&bnad->bna_lock, flags); if (to_del) - del_timer_sync(&bnad->stats_timer); + timer_delete_sync(&bnad->stats_timer); } /* Utilities */ @@ -2160,7 +2160,7 @@ bnad_destroy_rx(struct bnad *bnad, u32 rx_id) } spin_unlock_irqrestore(&bnad->bna_lock, flags); if (to_del) - del_timer_sync(&bnad->dim_timer); + timer_delete_sync(&bnad->dim_timer); } init_completion(&bnad->bnad_completions.rx_comp); @@ -3726,9 +3726,9 @@ bnad_pci_probe(struct pci_dev *pdev, bnad_res_free(bnad, &bnad->mod_res_info[0], BNA_MOD_RES_T_MAX); disable_ioceth: bnad_ioceth_disable(bnad); - del_timer_sync(&bnad->bna.ioceth.ioc.ioc_timer); - del_timer_sync(&bnad->bna.ioceth.ioc.sem_timer); - del_timer_sync(&bnad->bna.ioceth.ioc.hb_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.ioc_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.sem_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.hb_timer); spin_lock_irqsave(&bnad->bna_lock, flags); bna_uninit(bna); spin_unlock_irqrestore(&bnad->bna_lock, flags); @@ -3769,9 +3769,9 @@ bnad_pci_remove(struct pci_dev *pdev) mutex_lock(&bnad->conf_mutex); bnad_ioceth_disable(bnad); - del_timer_sync(&bnad->bna.ioceth.ioc.ioc_timer); - del_timer_sync(&bnad->bna.ioceth.ioc.sem_timer); - del_timer_sync(&bnad->bna.ioceth.ioc.hb_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.ioc_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.sem_timer); + timer_delete_sync(&bnad->bna.ioceth.ioc.hb_timer); spin_lock_irqsave(&bnad->bna_lock, flags); bna_uninit(bna); spin_unlock_irqrestore(&bnad->bna_lock, flags); diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index d1ad6c9f8140..216e25f26dbb 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -373,7 +373,7 @@ static int bnad_set_coalesce(struct net_device *netdev, } spin_unlock_irqrestore(&bnad->bna_lock, flags); if (to_del) - del_timer_sync(&bnad->dim_timer); + timer_delete_sync(&bnad->dim_timer); spin_lock_irqsave(&bnad->bna_lock, flags); bnad_rx_coalescing_timeo_set(bnad); } diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 861edff5ed89..a10923c7e25c 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1984,9 +1984,9 @@ void t1_sge_stop(struct sge *sge) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ if (is_T2(sge->adapter)) - del_timer_sync(&sge->espibug_timer); + timer_delete_sync(&sge->espibug_timer); - del_timer_sync(&sge->tx_reclaim_timer); + timer_delete_sync(&sge->tx_reclaim_timer); if (sge->tx_sched) tx_sched_stop(sge); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 6268f96cb4aa..bd5c3b3fa5e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -3223,9 +3223,9 @@ void t3_stop_sge_timers(struct adapter *adap) struct sge_qset *q = &adap->sge.qs[i]; if (q->tx_reclaim_timer.function) - del_timer_sync(&q->tx_reclaim_timer); + timer_delete_sync(&q->tx_reclaim_timer); if (q->rx_reclaim_timer.function) - del_timer_sync(&q->rx_reclaim_timer); + timer_delete_sync(&q->rx_reclaim_timer); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index a7d76a8ed050..f991a28a71c3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -4996,9 +4996,9 @@ void t4_sge_stop(struct adapter *adap) struct sge *s = &adap->sge; if (s->rx_timer.function) - del_timer_sync(&s->rx_timer); + timer_delete_sync(&s->rx_timer); if (s->tx_timer.function) - del_timer_sync(&s->tx_timer); + timer_delete_sync(&s->tx_timer); if (is_offload(adap)) { struct sge_uld_txq_info *txq_info; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 5b1d746e6563..f42af01f4114 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2609,9 +2609,9 @@ void t4vf_sge_stop(struct adapter *adapter) struct sge *s = &adapter->sge; if (s->rx_timer.function) - del_timer_sync(&s->rx_timer); + timer_delete_sync(&s->rx_timer); if (s->tx_timer.function) - del_timer_sync(&s->tx_timer); + timer_delete_sync(&s->tx_timer); } /** diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h index 8c4ce50da6e1..5f5284102fb0 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.h +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -26,7 +26,7 @@ static inline void enic_rfs_timer_start(struct enic *enic) static inline void enic_rfs_timer_stop(struct enic *enic) { - del_timer_sync(&enic->rfs_h.rfs_may_expire); + timer_delete_sync(&enic->rfs_h.rfs_may_expire); } #else static inline void enic_rfs_timer_start(struct enic *enic) {} diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 54aa3953bf7b..c753c35b26eb 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1787,7 +1787,7 @@ static int enic_stop(struct net_device *netdev) enic_synchronize_irqs(enic); - del_timer_sync(&enic->notify_timer); + timer_delete_sync(&enic->notify_timer); enic_rfs_flw_tbl_free(enic); enic_dev_disable(enic); diff --git a/drivers/net/ethernet/dec/tulip/21142.c b/drivers/net/ethernet/dec/tulip/21142.c index 369858272650..76767dec216d 100644 --- a/drivers/net/ethernet/dec/tulip/21142.c +++ b/drivers/net/ethernet/dec/tulip/21142.c @@ -216,7 +216,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5) (csr12 & 2) == 2) || (tp->nway && (csr5 & (TPLnkFail)))) { /* Link blew? Maybe restart NWay. */ - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); t21142_start_nway(dev); tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); @@ -226,7 +226,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5) medianame[dev->if_port], (csr12 & 2) ? "failed" : "good"); if ((csr12 & 2) && ! tp->medialock) { - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); t21142_start_nway(dev); tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 0a161a4db242..f9339d0772b5 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1428,7 +1428,7 @@ static int de_close (struct net_device *dev) netif_dbg(de, ifdown, dev, "disabling interface\n"); - del_timer_sync(&de->media_timer); + timer_delete_sync(&de->media_timer); spin_lock_irqsave(&de->lock, flags); de_stop_hw(de); @@ -1452,7 +1452,7 @@ static void de_tx_timeout (struct net_device *dev, unsigned int txqueue) dr32(MacStatus), dr32(MacMode), dr32(SIAStatus), de->rx_tail, de->tx_head, de->tx_tail); - del_timer_sync(&de->media_timer); + timer_delete_sync(&de->media_timer); disable_irq(irq); spin_lock_irq(&de->lock); @@ -2126,7 +2126,7 @@ static int __maybe_unused de_suspend(struct device *dev_d) if (netif_running (dev)) { const int irq = pdev->irq; - del_timer_sync(&de->media_timer); + timer_delete_sync(&de->media_timer); disable_irq(irq); spin_lock_irq(&de->lock); diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 3188ba7b450f..ae34b95ed676 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -745,7 +745,7 @@ static int dmfe_stop(struct net_device *dev) netif_stop_queue(dev); /* deleted timer */ - del_timer_sync(&db->timer); + timer_delete_sync(&db->timer); /* Reset & stop DM910X board */ dw32(DCR0, DM910X_RESET); diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c index 54560f9a1651..2d926a26fbb9 100644 --- a/drivers/net/ethernet/dec/tulip/interrupt.c +++ b/drivers/net/ethernet/dec/tulip/interrupt.c @@ -699,8 +699,8 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance) tulip_start_rxtx(tp); } /* - * NB: t21142_lnk_change() does a del_timer_sync(), so be careful if this - * call is ever done under the spinlock + * NB: t21142_lnk_change() does a timer_delete_sync(), so be careful + * if this call is ever done under the spinlock */ if (csr5 & (TPLnkPass | TPLnkFail | 0x08000000)) { if (tp->link_change) diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c index 72a09156b48b..2e3bdc0fcdc0 100644 --- a/drivers/net/ethernet/dec/tulip/pnic2.c +++ b/drivers/net/ethernet/dec/tulip/pnic2.c @@ -323,7 +323,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5) if (tulip_debug > 2) netdev_dbg(dev, "Ugh! Link blew?\n"); - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); pnic2_start_nway(dev); tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); @@ -348,7 +348,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5) /* if failed then try doing an nway to get in sync */ if ((csr12 & 2) && ! tp->medialock) { - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); pnic2_start_nway(dev); tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); @@ -372,7 +372,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5) /* if failed, try doing an nway to get in sync */ if ((csr12 & 4) && ! tp->medialock) { - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); pnic2_start_nway(dev); tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 75eac18ff246..c8c53121557f 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -747,9 +747,9 @@ static void tulip_down (struct net_device *dev) napi_disable(&tp->napi); #endif - del_timer_sync (&tp->timer); + timer_delete_sync(&tp->timer); #ifdef CONFIG_TULIP_NAPI - del_timer_sync (&tp->oom_timer); + timer_delete_sync(&tp->oom_timer); #endif spin_lock_irqsave (&tp->lock, flags); diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index ff080ab0f116..3f1bd670700b 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -656,7 +656,7 @@ static int uli526x_stop(struct net_device *dev) netif_stop_queue(dev); /* deleted timer */ - del_timer_sync(&db->timer); + timer_delete_sync(&db->timer); /* Reset & stop ULI526X board */ uw32(DCR0, ULI526X_RESET); diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 37fba39c0056..5930cdec6f2f 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -1509,7 +1509,7 @@ static int netdev_close(struct net_device *dev) } #endif /* __i386__ debugging only */ - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); free_rxtx_rings(np); free_ringdesc(np); @@ -1560,7 +1560,7 @@ static int __maybe_unused w840_suspend(struct device *dev_d) rtnl_lock(); if (netif_running (dev)) { - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); spin_lock_irq(&np->lock); netif_device_detach(dev); diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d0ea92607870..d88fbecdab4b 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -1778,7 +1778,7 @@ rio_close (struct net_device *dev) rio_hw_stop(dev); free_irq(pdev->irq, dev); - del_timer_sync (&np->timer); + timer_delete_sync(&np->timer); free_list(dev); @@ -1818,7 +1818,7 @@ static int rio_suspend(struct device *device) return 0; netif_device_detach(dev); - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); rio_hw_stop(dev); return 0; diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index ed18450fd2cc..670b68201376 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -1900,8 +1900,8 @@ static int netdev_close(struct net_device *dev) /* Stop the chip's Tx and Rx processes. */ stop_nic_rxtx(ioaddr, 0); - del_timer_sync(&np->timer); - del_timer_sync(&np->reset_timer); + timer_delete_sync(&np->timer); + timer_delete_sync(&np->reset_timer); free_irq(np->pci_dev->irq, dev); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 4dea1fdce748..eae1a7595a69 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -705,7 +705,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) * sizeof(struct stats)); - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); } return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index f9a73c956861..c3791cf23c87 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -302,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv) if (!priv->stats_report) return; - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -1408,7 +1408,7 @@ static int gve_queues_stop(struct gve_priv *priv) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 42bb341fd80b..d98f8d3ce7c8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1402,7 +1402,7 @@ static void hns_nic_net_down(struct net_device *ndev) if (test_and_set_bit(NIC_STATE_DOWN, &priv->state)) return; - (void)del_timer_sync(&priv->service_timer); + (void) timer_delete_sync(&priv->service_timer); netif_tx_stop_all_queues(ndev); netif_carrier_off(ndev); netif_tx_disable(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 92f9b8ec76d9..3e28a08934ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11492,7 +11492,7 @@ static void hclge_state_uninit(struct hclge_dev *hdev) set_bit(HCLGE_STATE_REMOVING, &hdev->state); if (hdev->reset_timer.function) - del_timer_sync(&hdev->reset_timer); + timer_delete_sync(&hdev->reset_timer); if (hdev->service_task.work.func) cancel_delayed_work_sync(&hdev->service_task); } diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 3a5bbda235cb..c0ead54ea186 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2293,7 +2293,7 @@ static int e100_up(struct nic *nic) return 0; err_no_irq: - del_timer_sync(&nic->watchdog); + timer_delete_sync(&nic->watchdog); err_clean_cbs: e100_clean_cbs(nic); err_rx_clean_list: @@ -2308,7 +2308,7 @@ static void e100_down(struct nic *nic) netif_stop_queue(nic->netdev); e100_hw_reset(nic); free_irq(nic->pdev->irq, nic->netdev); - del_timer_sync(&nic->watchdog); + timer_delete_sync(&nic->watchdog); netif_carrier_off(nic->netdev); e100_clean_cbs(nic); e100_rx_clean_list(nic); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 286155efcedf..8ebcb6a7d608 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4287,8 +4287,8 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset) napi_synchronize(&adapter->napi); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); spin_lock(&adapter->stats64_lock); e1000e_update_stats(adapter); @@ -7741,8 +7741,8 @@ static void e1000_remove(struct pci_dev *pdev) * from being rescheduled. */ set_bit(__E1000_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 92de609b7218..21267ab603ef 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2245,7 +2245,7 @@ static void fm10k_remove(struct pci_dev *pdev) struct fm10k_intfc *interface = pci_get_drvdata(pdev); struct net_device *netdev = interface->netdev; - del_timer_sync(&interface->service_timer); + timer_delete_sync(&interface->service_timer); fm10k_stop_service_event(interface); fm10k_stop_macvlan_task(interface); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 65a702668e21..120d68654e3f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16382,7 +16382,7 @@ static int i40e_io_suspend(struct i40e_pf *pf) set_bit(__I40E_DOWN, pf->state); /* Ensure service task will not be running */ - del_timer_sync(&pf->service_timer); + timer_delete_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); /* Client close must be called explicitly here because the timer @@ -16581,7 +16581,7 @@ static void i40e_shutdown(struct pci_dev *pdev) set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); - del_timer_sync(&pf->service_timer); + timer_delete_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 049edeb60104..d390157b59fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1717,7 +1717,7 @@ static int ice_service_task_stop(struct ice_pf *pf) ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); if (pf->serv_tmr.function) - del_timer_sync(&pf->serv_tmr); + timer_delete_sync(&pf->serv_tmr); if (pf->serv_task.func) cancel_work_sync(&pf->serv_task); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 9be4bd717512..7752920d7a8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1521,7 +1521,7 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc)); spin_unlock_irqrestore(&fdir->ctx_lock, flags); - ret = del_timer(&ctx_irq->rx_tmr); + ret = timer_delete(&ctx_irq->rx_tmr); if (!ret) dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id); @@ -1916,7 +1916,7 @@ static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf) struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq; unsigned long flags; - del_timer(&ctx->rx_tmr); + timer_delete(&ctx->rx_tmr); spin_lock_irqsave(&vf->fdir.ctx_lock, flags); ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d368b753a467..c646c71915f0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2185,8 +2185,8 @@ void igb_down(struct igb_adapter *adapter) } } - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); /* record the stats before reset*/ spin_lock(&adapter->stats64_lock); @@ -3860,8 +3860,8 @@ static void igb_remove(struct pci_dev *pdev) * disable watchdog from being rescheduled. */ set_bit(__IGB_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 02044aa2181b..beb01248600f 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1592,7 +1592,7 @@ void igbvf_down(struct igbvf_adapter *adapter) igbvf_irq_disable(adapter); - del_timer_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->watchdog_timer); /* record the stats before reset*/ igbvf_update_stats(adapter); @@ -2912,7 +2912,7 @@ static void igbvf_remove(struct pci_dev *pdev) * disable it from being rescheduled. */ set_bit(__IGBVF_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->watchdog_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 156d123c0e21..f1330379e6bb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5291,8 +5291,8 @@ void igc_down(struct igc_adapter *adapter) } } - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); /* record the stats before reset*/ spin_lock(&adapter->stats64_lock); @@ -7272,8 +7272,8 @@ static void igc_remove(struct pci_dev *pdev) set_bit(__IGC_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 481f917f7ed2..a2718218963e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6538,7 +6538,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter) adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT; adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; - del_timer_sync(&adapter->service_timer); + timer_delete_sync(&adapter->service_timer); if (adapter->num_vfs) { /* Clear EITR Select mapping */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 6442f115a262..a217c5c04804 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2514,7 +2514,7 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) ixgbevf_napi_disable_all(adapter); - del_timer_sync(&adapter->service_timer); + timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ for (i = 0; i < adapter->num_tx_queues; i++) { diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 87c7e6251a4f..1e2ac1a5f099 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1239,7 +1239,7 @@ static int korina_close(struct net_device *dev) struct korina_private *lp = netdev_priv(dev); u32 tmp; - del_timer(&lp->media_check_timer); + timer_delete(&lp->media_check_timer); /* Disable interrupts */ disable_irq(lp->rx_irq); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 67a6ff07c83d..8cc888bf6094 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2247,7 +2247,7 @@ static int mv643xx_eth_poll(struct napi_struct *napi, int budget) if (unlikely(mp->oom)) { mp->oom = 0; - del_timer(&mp->rx_oom); + timer_delete(&mp->rx_oom); } work_done = 0; @@ -2521,7 +2521,7 @@ static int mv643xx_eth_stop(struct net_device *dev) napi_disable(&mp->napi); - del_timer_sync(&mp->rx_oom); + timer_delete_sync(&mp->rx_oom); netif_carrier_off(dev); if (dev->phydev) @@ -2531,7 +2531,7 @@ static int mv643xx_eth_stop(struct net_device *dev) port_reset(mp); mv643xx_eth_get_stats(dev); mib_counters_update(mp); - del_timer_sync(&mp->mib_counters_timer); + timer_delete_sync(&mp->mib_counters_timer); for (i = 0; i < mp->rxq_count; i++) rxq_deinit(mp->rxq + i); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 2bf426cea6dd..72c1967768f4 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1175,7 +1175,7 @@ static int pxa168_eth_stop(struct net_device *dev) /* Write to ICR to clear interrupts. */ wrl(pep, INT_W_CLEAR, 0); napi_disable(&pep->napi); - del_timer_sync(&pep->timeout); + timer_delete_sync(&pep->timeout); netif_carrier_off(dev); free_irq(dev->irq, dev); rxq_deinit(dev); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index a1bada9eaaf6..b2081d6e34f0 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2662,7 +2662,7 @@ static int skge_down(struct net_device *dev) netif_tx_disable(dev); if (is_genesis(hw) && hw->phy_type == SK_PHY_XMAC) - del_timer_sync(&skge->link_timer); + timer_delete_sync(&skge->link_timer); napi_disable(&skge->napi); netif_carrier_off(dev); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index d7121c836508..e2a9aae8bc9b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5052,7 +5052,7 @@ static int sky2_suspend(struct device *dev) if (!hw) return 0; - del_timer_sync(&hw->watchdog_timer); + timer_delete_sync(&hw->watchdog_timer); cancel_work_sync(&hw->restart_work); rtnl_lock(); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 0d8a362c2673..33ba0a5c38ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -305,7 +305,7 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - del_timer_sync(&priv->catas_err.timer); + timer_delete_sync(&priv->catas_err.timer); if (priv->catas_err.map) { iounmap(priv->catas_err.map); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 6830a49fe682..5442a02c4097 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -246,7 +246,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer_sync(&fw_reset->timer); + timer_delete_sync(&fw_reset->timer); } static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 91613d5a36cd..624452ddebc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -847,7 +847,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); - del_timer_sync(&health->timer); + timer_delete_sync(&health->timer); } void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index dc1d9f774565..1302aa8e0853 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -3951,7 +3951,7 @@ static void ksz_stop_timer(struct ksz_timer_info *info) { if (info->max) { info->max = 0; - del_timer_sync(&info->timer); + timer_delete_sync(&info->timer); } } diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index b7d9657a7af3..7c501a758325 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2482,7 +2482,7 @@ static int myri10ge_close(struct net_device *dev) if (mgp->ss[0].tx.req_bytes == NULL) return 0; - del_timer_sync(&mgp->watchdog_timer); + timer_delete_sync(&mgp->watchdog_timer); mgp->running = MYRI10GE_ETH_STOPPING; for (i = 0; i < mgp->num_slices; i++) napi_disable(&mgp->ss[i].napi); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index ad0c14849115..05606692e631 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -3179,7 +3179,7 @@ static int netdev_close(struct net_device *dev) * the final WOL settings? */ - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); disable_irq(irq); spin_lock_irq(&np->lock); natsemi_irq_disable(dev); @@ -3278,7 +3278,7 @@ static int __maybe_unused natsemi_suspend(struct device *dev_d) if (netif_running (dev)) { const int irq = np->pci_dev->irq; - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); disable_irq(irq); spin_lock_irq(&np->lock); diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index bea969dfa536..bf0347715a05 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1527,7 +1527,7 @@ static int ns83820_stop(struct net_device *ndev) struct ns83820 *dev = PRIV(ndev); /* FIXME: protect against interrupt handler? */ - del_timer_sync(&dev->tx_watchdog); + timer_delete_sync(&dev->tx_watchdog); ns83820_disable_interrupts(dev); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index f8016dc25e0a..3e55e8dc664c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7019,7 +7019,7 @@ static void do_s2io_card_down(struct s2io_nic *sp, int do_io) if (!is_s2io_card_up(sp)) return; - del_timer_sync(&sp->alarm_timer); + timer_delete_sync(&sp->alarm_timer); /* If s2io_set_link task is executing, wait till it completes. */ while (test_and_set_bit(__S2IO_STATE_LINK_TASK, &(sp->state))) msleep(50); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index abba165738a3..95514fabadf2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -227,7 +227,7 @@ static void nfp_net_reconfig_sync_enter(struct nfp_net *nn) spin_unlock_bh(&nn->reconfig_lock); if (cancelled_timer) { - del_timer_sync(&nn->reconfig_timer); + timer_delete_sync(&nn->reconfig_timer); nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 499e5e39d513..29cb74ccb25a 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5623,9 +5623,9 @@ static int nv_close(struct net_device *dev) napi_disable(&np->napi); synchronize_irq(np->pci_dev->irq); - del_timer_sync(&np->oom_kick); - del_timer_sync(&np->nic_poll); - del_timer_sync(&np->stats_poll); + timer_delete_sync(&np->oom_kick); + timer_delete_sync(&np->nic_poll); + timer_delete_sync(&np->stats_poll); netif_stop_queue(dev); spin_lock_irq(&np->lock); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 4ac29cd59f2b..1651df8a7c21 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1916,7 +1916,7 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter) pch_gbe_irq_disable(adapter); pch_gbe_free_irq(adapter); - del_timer_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->watchdog_timer); netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index a36d422b5173..26bc8b3b1ec8 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -1712,7 +1712,7 @@ static int hamachi_close(struct net_device *dev) free_irq(hmp->pci_dev->irq, dev); - del_timer_sync(&hmp->timer); + timer_delete_sync(&hmp->timer); /* Free all the skbuffs in the Rx queue. */ for (i = 0; i < RX_RING_SIZE; i++) { diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index c0515dc63246..21b760e65d73 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -1222,7 +1222,7 @@ static int yellowfin_close(struct net_device *dev) iowrite32(0x80000000, ioaddr + RxCtrl); iowrite32(0x80000000, ioaddr + TxCtrl); - del_timer(&yp->timer); + timer_delete(&yp->timer); #if defined(__i386__) if (yellowfin_debug > 2) { diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index cb4e12df7719..801380729046 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1288,7 +1288,7 @@ static int pasemi_mac_close(struct net_device *dev) phy_disconnect(dev->phydev); } - del_timer_sync(&mac->tx->clean_timer); + timer_delete_sync(&mac->tx->clean_timer); netif_stop_queue(dev); napi_disable(&mac->napi); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index f5dc876eb500..4c377bdc62c8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -441,7 +441,7 @@ static void ionic_reset_prepare(struct pci_dev *pdev) set_bit(IONIC_LIF_F_FW_RESET, lif->state); - del_timer_sync(&ionic->watchdog_timer); + timer_delete_sync(&ionic->watchdog_timer); cancel_work_sync(&lif->deferred.work); mutex_lock(&lif->queue_lock); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index fc78bc959ded..bf5bf8c95c85 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3420,7 +3420,7 @@ static int ql_adapter_down(struct ql3_adapter *qdev, int do_reset) pci_disable_msi(qdev->pdev); } - del_timer_sync(&qdev->adapter_timer); + timer_delete_sync(&qdev->adapter_timer); napi_disable(&qdev->napi); diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 6cbcb3164367..c73a57e4a144 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -832,7 +832,7 @@ net_close(struct net_device *dev) netif_stop_queue(dev); - del_timer_sync(&lp->timer); + timer_delete_sync(&lp->timer); /* Flush the Tx and disable Rx here. */ lp->addr_mode = CMR2h_OFF; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 826990459fa4..d5db26103d82 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2386,7 +2386,7 @@ static void ofdpa_fini(struct rocker *rocker) struct hlist_node *tmp; int bkt; - del_timer_sync(&ofdpa->fdb_cleanup_timer); + timer_delete_sync(&ofdpa->fdb_cleanup_timer); flush_workqueue(rocker->rocker_owq); spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 12c8396b6942..36b63bf343a9 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -91,7 +91,7 @@ void sxgbe_disable_eee_mode(struct sxgbe_priv_data * const priv) { /* Exit and disable EEE in case of we are in LPI state. */ priv->hw->mac->reset_eee_mode(priv->ioaddr); - del_timer_sync(&priv->eee_ctrl_timer); + timer_delete_sync(&priv->eee_ctrl_timer); priv->tx_path_in_lpi_mode = false; } @@ -1044,7 +1044,7 @@ static void sxgbe_tx_del_timer(struct sxgbe_priv_data *priv) SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { struct sxgbe_tx_queue *p = priv->txq[queue_num]; - del_timer_sync(&p->txtimer); + timer_delete_sync(&p->txtimer); } } @@ -1208,7 +1208,7 @@ static int sxgbe_release(struct net_device *dev) struct sxgbe_priv_data *priv = netdev_priv(dev); if (priv->eee_enabled) - del_timer_sync(&priv->eee_ctrl_timer); + timer_delete_sync(&priv->eee_ctrl_timer); /* Stop and disconnect the PHY */ if (dev->phydev) { diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 9319a2675e7b..1d65113fab76 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -181,7 +181,7 @@ static void ether3_ledoff(struct timer_list *t) */ static inline void ether3_ledon(struct net_device *dev) { - del_timer(&priv(dev)->timer); + timer_delete(&priv(dev)->timer); priv(dev)->timer.expires = jiffies + HZ / 50; /* leave on for 1/50th second */ add_timer(&priv(dev)->timer); if (priv(dev)->regs.config2 & CFG2_CTRLO) @@ -454,7 +454,7 @@ static void ether3_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; - del_timer(&priv(dev)->timer); + timer_delete(&priv(dev)->timer); local_irq_save(flags); printk(KERN_ERR "%s: transmit timed out, network cable problem?\n", dev->name); @@ -851,7 +851,7 @@ static void ether3_remove(struct expansion_card *ec) ecard_set_drvdata(ec, NULL); unregister_netdev(dev); - del_timer_sync(&priv(dev)->timer); + timer_delete_sync(&priv(dev)->timer); free_netdev(dev); ecard_release_resources(ec); } diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index 4af56333ea49..b865275beb66 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -2657,7 +2657,7 @@ void falcon_stop_nic_stats(struct ef4_nic *efx) ++nic_data->stats_disable_count; spin_unlock_bh(&efx->stats_lock); - del_timer_sync(&nic_data->stats_timer); + timer_delete_sync(&nic_data->stats_timer); /* Wait enough time for the most recent transfer to * complete. */ diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 6bbdb5d2eebf..38ad7ac07726 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -791,7 +791,7 @@ void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue) netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "shutting down RX queue %d\n", ef4_rx_queue_index(rx_queue)); - del_timer_sync(&rx_queue->slow_fill); + timer_delete_sync(&rx_queue->slow_fill); /* Release RX buffers from the current read ptr to the write ptr */ if (rx_queue->buffer) { diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index dbd2ee915838..dcef0588be96 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -530,7 +530,7 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) * of it aborting the next request. */ if (!timeout) - del_timer_sync(&mcdi->async_timer); + timer_delete_sync(&mcdi->async_timer); spin_lock(&mcdi->async_lock); async = list_first_entry(&mcdi->async_list, @@ -1122,7 +1122,7 @@ void efx_mcdi_flush_async(struct efx_nic *efx) /* We must be in poll or fail mode so no more requests can be queued */ BUG_ON(mcdi->mode == MCDI_MODE_EVENTS); - del_timer_sync(&mcdi->async_timer); + timer_delete_sync(&mcdi->async_timer); /* If a request is still running, make sure we give the MC * time to complete it so that the response won't overwrite our diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 4cc83203e188..8eb272ba674b 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -285,7 +285,7 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); - del_timer_sync(&rx_queue->slow_fill); + timer_delete_sync(&rx_queue->slow_fill); if (rx_queue->grant_credits) flush_work(&rx_queue->grant_work); diff --git a/drivers/net/ethernet/sfc/siena/mcdi.c b/drivers/net/ethernet/sfc/siena/mcdi.c index 3f7899daa86a..99ab5f294691 100644 --- a/drivers/net/ethernet/sfc/siena/mcdi.c +++ b/drivers/net/ethernet/sfc/siena/mcdi.c @@ -534,7 +534,7 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) * of it aborting the next request. */ if (!timeout) - del_timer_sync(&mcdi->async_timer); + timer_delete_sync(&mcdi->async_timer); spin_lock(&mcdi->async_lock); async = list_first_entry(&mcdi->async_list, @@ -1145,7 +1145,7 @@ void efx_siena_mcdi_flush_async(struct efx_nic *efx) /* We must be in poll or fail mode so no more requests can be queued */ BUG_ON(mcdi->mode == MCDI_MODE_EVENTS); - del_timer_sync(&mcdi->async_timer); + timer_delete_sync(&mcdi->async_timer); /* If a request is still running, make sure we give the MC * time to complete it so that the response won't overwrite our diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c index 2839d0e0a9c1..ab493e529d5c 100644 --- a/drivers/net/ethernet/sfc/siena/rx_common.c +++ b/drivers/net/ethernet/sfc/siena/rx_common.c @@ -284,7 +284,7 @@ void efx_siena_fini_rx_queue(struct efx_rx_queue *rx_queue) netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); - del_timer_sync(&rx_queue->slow_fill); + timer_delete_sync(&rx_queue->slow_fill); /* Release RX buffers from the current read ptr to the write ptr */ if (rx_queue->buffer) { diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 4535579018c9..7196e1c607f3 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -718,7 +718,7 @@ static void ioc3_init(struct net_device *dev) struct ioc3_private *ip = netdev_priv(dev); struct ioc3_ethregs *regs = ip->regs; - del_timer_sync(&ip->ioc3_timer); /* Kill if running */ + timer_delete_sync(&ip->ioc3_timer); /* Kill if running */ writel(EMCR_RST, ®s->emcr); /* Reset */ readl(®s->emcr); /* Flush WB */ @@ -801,7 +801,7 @@ static int ioc3_close(struct net_device *dev) { struct ioc3_private *ip = netdev_priv(dev); - del_timer_sync(&ip->ioc3_timer); + timer_delete_sync(&ip->ioc3_timer); netif_stop_queue(dev); @@ -950,7 +950,7 @@ static int ioc3eth_probe(struct platform_device *pdev) return 0; out_stop: - del_timer_sync(&ip->ioc3_timer); + timer_delete_sync(&ip->ioc3_timer); if (ip->rxr) dma_free_coherent(ip->dma_dev, RX_RING_SIZE, ip->rxr, ip->rxr_dma); @@ -971,7 +971,7 @@ static void ioc3eth_remove(struct platform_device *pdev) dma_free_coherent(ip->dma_dev, TX_RING_SIZE + SZ_16K - 1, ip->tx_ring, ip->txr_dma); unregister_netdev(dev); - del_timer_sync(&ip->ioc3_timer); + timer_delete_sync(&ip->ioc3_timer); free_netdev(dev); } diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index dda4e488c77a..d10b14787607 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -758,7 +758,7 @@ static irqreturn_t sis190_irq(int irq, void *__dev) if (status & LinkChange) { netif_info(tp, intr, dev, "link change\n"); - del_timer(&tp->timer); + timer_delete(&tp->timer); schedule_work(&tp->phy_task); } @@ -1034,7 +1034,7 @@ static inline void sis190_delete_timer(struct net_device *dev) { struct sis190_private *tp = netdev_priv(dev); - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); } static inline void sis190_request_timer(struct net_device *dev) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 85b850372efe..332cbd725900 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1983,7 +1983,7 @@ static int sis900_close(struct net_device *net_dev) /* Stop the chip's Tx and Rx Status Machine */ sw32(cr, RxDIS | TxDIS | sr32(cr)); - del_timer(&sis_priv->timer); + timer_delete(&sis_priv->timer); free_irq(pdev->irq, net_dev); diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 013e90d69182..ca0ab3a35b73 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1292,7 +1292,7 @@ static int epic_close(struct net_device *dev) netdev_dbg(dev, "Shutting down ethercard, status was %2.2x.\n", er32(INTSTAT)); - del_timer_sync(&ep->timer); + timer_delete_sync(&ep->timer); epic_disable_int(dev, ep); diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 86e3ec25df07..6fa957fb523b 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -1105,7 +1105,7 @@ static int smc_close(struct net_device *dev) outw(CTL_POWERDOWN, ioaddr + CONTROL ); link->open--; - del_timer_sync(&smc->media); + timer_delete_sync(&smc->media); return 0; } /* smc_close */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 279532609707..59d07d0d3369 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -467,7 +467,7 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv) */ static void stmmac_stop_sw_lpi(struct stmmac_priv *priv) { - del_timer_sync(&priv->eee_ctrl_timer); + timer_delete_sync(&priv->eee_ctrl_timer); stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); priv->tx_path_in_lpi_mode = false; } @@ -1082,7 +1082,7 @@ static void stmmac_mac_disable_tx_lpi(struct phylink_config *config) netdev_dbg(priv->dev, "disable EEE\n"); priv->eee_sw_timer_en = false; - del_timer_sync(&priv->eee_ctrl_timer); + timer_delete_sync(&priv->eee_ctrl_timer); stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); priv->tx_path_in_lpi_mode = false; @@ -7842,7 +7842,7 @@ int stmmac_suspend(struct device *dev) if (priv->eee_sw_timer_en) { priv->tx_path_in_lpi_mode = false; - del_timer_sync(&priv->eee_ctrl_timer); + timer_delete_sync(&priv->eee_ctrl_timer); } /* Stop TX/RX DMA */ diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index b8948d5b779a..b777e5a099eb 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3779,7 +3779,7 @@ static void cas_shutdown(struct cas *cp) /* Make us not-running to avoid timers respawning */ cp->hw_running = 0; - del_timer_sync(&cp->link_timer); + timer_delete_sync(&cp->link_timer); /* Stop the reset task */ #if 0 diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index a9a6670b5ff1..6fc37ab27f7b 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -390,7 +390,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) return 0; err_out_del_timer: - del_timer_sync(&port->clean_timer); + timer_delete_sync(&port->clean_timer); list_del_rcu(&port->list); synchronize_rcu(); netif_napi_del(&port->napi); @@ -408,8 +408,8 @@ static void vsw_port_remove(struct vio_dev *vdev) unsigned long flags; if (port) { - del_timer_sync(&port->vio.timer); - del_timer_sync(&port->clean_timer); + timer_delete_sync(&port->vio.timer); + timer_delete_sync(&port->clean_timer); napi_disable(&port->napi); unregister_netdev(port->dev); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 72177fea1cfb..73c07f10f053 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6165,7 +6165,7 @@ static void niu_full_shutdown(struct niu *np, struct net_device *dev) niu_disable_napi(np); netif_tx_stop_all_queues(dev); - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); spin_lock_irq(&np->lock); @@ -6511,7 +6511,7 @@ static void niu_reset_task(struct work_struct *work) spin_unlock_irqrestore(&np->lock, flags); - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); niu_netif_stop(np); @@ -9914,7 +9914,7 @@ static int __maybe_unused niu_suspend(struct device *dev_d) flush_work(&np->reset_task); niu_netif_stop(np); - del_timer_sync(&np->timer); + timer_delete_sync(&np->timer); spin_lock_irqsave(&np->lock, flags); niu_enable_interrupts(np, 0); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index bbb3a6ca19ed..d2c82102133c 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -931,7 +931,7 @@ static int bigmac_close(struct net_device *dev) { struct bigmac *bp = netdev_priv(dev); - del_timer(&bp->bigmac_timer); + timer_delete(&bp->bigmac_timer); bp->timer_state = asleep; bp->timer_ticks = 0; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 3e5f9b17c777..06579d7b5220 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2180,7 +2180,7 @@ static void gem_do_stop(struct net_device *dev, int wol) gem_disable_ints(gp); /* Stop the link timer */ - del_timer_sync(&gp->link_timer); + timer_delete_sync(&gp->link_timer); /* We cannot cancel the reset task while holding the * rtnl lock, we'd get an A->B / B->A deadlock stituation @@ -2230,7 +2230,7 @@ static void gem_reset_task(struct work_struct *work) } /* Stop the link timer */ - del_timer_sync(&gp->link_timer); + timer_delete_sync(&gp->link_timer); /* Stop NAPI and tx */ gem_netif_stop(gp); @@ -2610,7 +2610,7 @@ static int gem_set_link_ksettings(struct net_device *dev, /* Apply settings and restart link process. */ if (netif_device_present(gp->dev)) { - del_timer_sync(&gp->link_timer); + timer_delete_sync(&gp->link_timer); gem_begin_auto_negotiation(gp, cmd); } @@ -2626,7 +2626,7 @@ static int gem_nway_reset(struct net_device *dev) /* Restart link process */ if (netif_device_present(gp->dev)) { - del_timer_sync(&gp->link_timer); + timer_delete_sync(&gp->link_timer); gem_begin_auto_negotiation(gp, NULL); } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 50ace461a1af..9a7586623318 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1265,7 +1265,7 @@ static int happy_meal_init(struct happy_meal *hp) u32 regtmp, rxcfg; /* If auto-negotiation timer is running, kill it. */ - del_timer(&hp->happy_timer); + timer_delete(&hp->happy_timer); HMD("happy_flags[%08x]\n", hp->happy_flags); if (!(hp->happy_flags & HFLAG_INIT)) { @@ -1922,7 +1922,7 @@ static int happy_meal_close(struct net_device *dev) happy_meal_clean_rings(hp); /* If auto-negotiation timer is running, kill it. */ - del_timer(&hp->happy_timer); + timer_delete(&hp->happy_timer); spin_unlock_irq(&hp->happy_lock); @@ -2184,7 +2184,7 @@ static int hme_set_link_ksettings(struct net_device *dev, /* Ok, do it to it. */ spin_lock_irq(&hp->happy_lock); - del_timer(&hp->happy_timer); + timer_delete(&hp->happy_timer); happy_meal_begin_auto_negotiation(hp, hp->tcvregs, cmd); spin_unlock_irq(&hp->happy_lock); diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 1e887d951a04..a2a3e94da4b8 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -505,7 +505,7 @@ static void vnet_port_remove(struct vio_dev *vdev) struct vnet_port *port = dev_get_drvdata(&vdev->dev); if (port) { - del_timer_sync(&port->vio.timer); + timer_delete_sync(&port->vio.timer); napi_disable(&port->napi); diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 1cacb2a0ee03..ddc6d46a7a86 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1058,7 +1058,7 @@ void sunvnet_clean_timer_expire_common(struct timer_list *t) (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); else - del_timer(&port->clean_timer); + timer_delete(&port->clean_timer); } EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common); @@ -1513,7 +1513,7 @@ sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT); else if (port) - del_timer(&port->clean_timer); + timer_delete(&port->clean_timer); rcu_read_unlock(); dev_kfree_skb(skb); vnet_free_skbs(freeskbs); @@ -1707,7 +1707,7 @@ EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); void vnet_port_reset(struct vnet_port *port) { - del_timer(&port->clean_timer); + timer_delete(&port->clean_timer); sunvnet_port_free_tx_bufs_common(port); port->rmtu = 0; port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index d1793b6154c7..24e4b246f25f 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -405,7 +405,7 @@ static void xlgmac_stop_timers(struct xlgmac_pdata *pdata) if (!channel->tx_ring) break; - del_timer_sync(&channel->tx_timer); + timer_delete_sync(&channel->tx_timer); } } diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 5cc72a91f220..7f77694ecfba 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -1287,7 +1287,7 @@ static void cpsw_ale_aging_stop(struct cpsw_ale *ale) return; } - del_timer_sync(&ale->timer); + timer_delete_sync(&ale->timer); } void cpsw_ale_start(struct cpsw_ale *ale) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 63e686f0b119..fd2b74508980 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3796,7 +3796,7 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv) { struct gbe_priv *gbe_dev = inst_priv; - del_timer_sync(&gbe_dev->timer); + timer_delete_sync(&gbe_dev->timer); cpts_release(gbe_dev->cpts); cpsw_ale_stop(gbe_dev->ale); netcp_txpipe_close(&gbe_dev->tx_pipe); diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index b3da76efa8f5..d9240fb91747 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -332,13 +332,13 @@ static void tlan_stop(struct net_device *dev) { struct tlan_priv *priv = netdev_priv(dev); - del_timer_sync(&priv->media_timer); + timer_delete_sync(&priv->media_timer); tlan_read_and_clear_stats(dev, TLAN_RECORD); outl(TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD); /* Reset and power down phy */ tlan_reset_adapter(dev); if (priv->timer.function != NULL) { - del_timer_sync(&priv->timer); + timer_delete_sync(&priv->timer); priv->timer.function = NULL; } } diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c6957e3b7f0f..7ec0e3c13d54 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1379,7 +1379,7 @@ static int tsi108_close(struct net_device *dev) netif_stop_queue(dev); napi_disable(&data->napi); - del_timer_sync(&data->timer); + timer_delete_sync(&data->timer); tsi108_stop_ethernet(dev); tsi108_kill_phy(dev); diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index f5c25acaa577..54b7f24f3810 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -983,7 +983,7 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) case FZA_STATE_UNINITIALIZED: netif_carrier_off(dev); - del_timer_sync(&fp->reset_timer); + timer_delete_sync(&fp->reset_timer); fp->ring_cmd_index = 0; fp->ring_uns_index = 0; fp->ring_rmc_tx_index = 0; @@ -1017,7 +1017,7 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) fp->queue_active = 0; netif_stop_queue(dev); pr_debug("%s: queue stopped\n", fp->name); - del_timer_sync(&fp->reset_timer); + timer_delete_sync(&fp->reset_timer); pr_warn("%s: halted, reason: %x\n", fp->name, FZA_STATUS_GET_HALT(status)); fza_regs_dump(fp); @@ -1227,7 +1227,7 @@ static int fza_close(struct net_device *dev) netif_stop_queue(dev); pr_debug("%s: queue stopped\n", fp->name); - del_timer_sync(&fp->reset_timer); + timer_delete_sync(&fp->reset_timer); spin_lock_irqsave(&fp->lock, flags); fp->state = FZA_STATE_UNINITIALIZED; fp->state_chg_flag = 0; @@ -1493,7 +1493,7 @@ static int fza_probe(struct device *bdev) return 0; err_out_irq: - del_timer_sync(&fp->reset_timer); + timer_delete_sync(&fp->reset_timer); fza_do_shutdown(fp); free_irq(dev->irq, dev); @@ -1520,7 +1520,7 @@ static int fza_remove(struct device *bdev) unregister_netdev(dev); - del_timer_sync(&fp->reset_timer); + timer_delete_sync(&fp->reset_timer); fza_do_shutdown(fp); free_irq(dev->irq, dev); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 3bf6785f9057..b33d84ed5bbf 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -660,8 +660,8 @@ static void sixpack_close(struct tty_struct *tty) unregister_netdev(sp->dev); - del_timer_sync(&sp->tx_t); - del_timer_sync(&sp->resync_t); + timer_delete_sync(&sp->tx_t); + timer_delete_sync(&sp->resync_t); /* Free all 6pack frame buffers after unreg. */ kfree(sp->xbuff); @@ -937,7 +937,7 @@ sixpack_decode(struct sixpack *sp, const u8 *pre_rbuff, size_t count) inbyte = pre_rbuff[count1]; if (inbyte == SIXP_FOUND_TNC) { tnc_set_sync_state(sp, TNC_IN_SYNC); - del_timer(&sp->resync_t); + timer_delete(&sp->resync_t); } if ((inbyte & SIXP_PRIO_CMD_MASK) != 0) decode_prio_command(sp, inbyte); diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index c71e52249289..f88721dec681 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -794,8 +794,8 @@ static inline void init_brg(struct scc_channel *scc) static void init_channel(struct scc_channel *scc) { - del_timer(&scc->tx_t); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_t); + timer_delete(&scc->tx_wdog); disable_irq(scc->irq); @@ -999,7 +999,7 @@ static void __scc_start_tx_timer(struct scc_channel *scc, void (*handler)(struct timer_list *t), unsigned long when) { - del_timer(&scc->tx_t); + timer_delete(&scc->tx_t); if (when == 0) { @@ -1029,7 +1029,7 @@ static void scc_start_defer(struct scc_channel *scc) unsigned long flags; spin_lock_irqsave(&scc->lock, flags); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); if (scc->kiss.maxdefer != 0 && scc->kiss.maxdefer != TIMER_OFF) { @@ -1045,7 +1045,7 @@ static void scc_start_maxkeyup(struct scc_channel *scc) unsigned long flags; spin_lock_irqsave(&scc->lock, flags); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); if (scc->kiss.maxkeyup != 0 && scc->kiss.maxkeyup != TIMER_OFF) { @@ -1194,7 +1194,7 @@ static void t_tail(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&scc->lock, flags); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); scc_key_trx(scc, TX_OFF); spin_unlock_irqrestore(&scc->lock, flags); @@ -1219,7 +1219,7 @@ static void t_busy(struct timer_list *t) { struct scc_channel *scc = from_timer(scc, t, tx_wdog); - del_timer(&scc->tx_t); + timer_delete(&scc->tx_t); netif_stop_queue(scc->dev); /* don't pile on the wabbit! */ scc_discard_buffers(scc); @@ -1248,7 +1248,7 @@ static void t_maxkeyup(struct timer_list *t) netif_stop_queue(scc->dev); scc_discard_buffers(scc); - del_timer(&scc->tx_t); + timer_delete(&scc->tx_t); cl(scc, R1, TxINT_ENAB); /* force an ABORT, but don't */ cl(scc, R15, TxUIE); /* count it. */ @@ -1272,7 +1272,7 @@ static void t_idle(struct timer_list *t) { struct scc_channel *scc = from_timer(scc, t, tx_t); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); scc_key_trx(scc, TX_OFF); if(scc->kiss.mintime) @@ -1407,7 +1407,7 @@ static void scc_stop_calibrate(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&scc->lock, flags); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); scc_key_trx(scc, TX_OFF); wr(scc, R6, 0); wr(scc, R7, FLAG); @@ -1428,7 +1428,7 @@ scc_start_calibrate(struct scc_channel *scc, int duration, unsigned char pattern netif_stop_queue(scc->dev); scc_discard_buffers(scc); - del_timer(&scc->tx_wdog); + timer_delete(&scc->tx_wdog); scc->tx_wdog.function = scc_stop_calibrate; scc->tx_wdog.expires = jiffies + HZ*duration; @@ -1609,8 +1609,8 @@ static int scc_net_close(struct net_device *dev) wr(scc,R3,0); spin_unlock_irqrestore(&scc->lock, flags); - del_timer_sync(&scc->tx_t); - del_timer_sync(&scc->tx_wdog); + timer_delete_sync(&scc->tx_t); + timer_delete_sync(&scc->tx_wdog); scc_discard_buffers(scc); diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 2ed2f836f09a..f29997b20fd7 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -1158,7 +1158,7 @@ static void __exit yam_cleanup_driver(void) struct yam_mcs *p; int i; - del_timer_sync(&yam_timer); + timer_delete_sync(&yam_timer); for (i = 0; i < NR_PORTS; i++) { struct net_device *dev = yam_devs[i]; if (dev) { diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index aa8f828a0ae7..6342c319c0e4 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1357,7 +1357,7 @@ static int rr_close(struct net_device *dev) rrpriv->fw_running = 0; spin_unlock_irqrestore(&rrpriv->lock, flags); - del_timer_sync(&rrpriv->timer); + timer_delete_sync(&rrpriv->timer); spin_lock_irqsave(&rrpriv->lock, flags); writel(0, ®s->TxPi); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index ef6df0e37bea..ef4204638392 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -291,7 +291,7 @@ static int ntb_netdev_close(struct net_device *ndev) while ((skb = ntb_transport_rx_remove(dev->qp, &len))) dev_kfree_skb(skb); - del_timer_sync(&dev->tx_timer); + timer_delete_sync(&dev->tx_timer); return 0; } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 69ca765485db..b68369e2342b 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -952,7 +952,7 @@ static unsigned int phylink_inband_caps(struct phylink *pl, static void phylink_pcs_poll_stop(struct phylink *pl) { if (pl->cfg_link_an_mode == MLO_AN_INBAND) - del_timer(&pl->link_poll); + timer_delete(&pl->link_poll); } static void phylink_pcs_poll_start(struct phylink *pl) @@ -2448,7 +2448,7 @@ void phylink_stop(struct phylink *pl) sfp_upstream_stop(pl->sfp_bus); if (pl->phydev) phy_stop(pl->phydev); - del_timer_sync(&pl->link_poll); + timer_delete_sync(&pl->link_poll); if (pl->link_irq) { free_irq(pl->link_irq, pl); pl->link_irq = 0; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index fb362ee248ff..3cfa17cd5073 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -899,8 +899,8 @@ static void slip_close(struct tty_struct *tty) /* VSV = very important to remove timers */ #ifdef CONFIG_SLIP_SMART - del_timer_sync(&sl->keepalive_timer); - del_timer_sync(&sl->outfill_timer); + timer_delete_sync(&sl->keepalive_timer); + timer_delete_sync(&sl->outfill_timer); #endif /* Flush network side */ unregister_netdev(sl->dev); @@ -1137,7 +1137,7 @@ static int slip_ioctl(struct tty_struct *tty, unsigned int cmd, jiffies + sl->keepalive * HZ); set_bit(SLF_KEEPTEST, &sl->flags); } else - del_timer(&sl->keepalive_timer); + timer_delete(&sl->keepalive_timer); spin_unlock_bh(&sl->lock); return 0; @@ -1162,7 +1162,7 @@ static int slip_ioctl(struct tty_struct *tty, unsigned int cmd, jiffies + sl->outfill * HZ); set_bit(SLF_OUTWAIT, &sl->flags); } else - del_timer(&sl->outfill_timer); + timer_delete(&sl->outfill_timer); spin_unlock_bh(&sl->lock); return 0; @@ -1217,7 +1217,7 @@ static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, jiffies + sl->keepalive * HZ); set_bit(SLF_KEEPTEST, &sl->flags); } else - del_timer(&sl->keepalive_timer); + timer_delete(&sl->keepalive_timer); break; case SIOCGKEEPALIVE: @@ -1235,7 +1235,7 @@ static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, jiffies + sl->outfill * HZ); set_bit(SLF_OUTWAIT, &sl->flags); } else - del_timer(&sl->outfill_timer); + timer_delete(&sl->outfill_timer); break; case SIOCGOUTFILL: @@ -1421,7 +1421,7 @@ static void sl_keepalive(struct timer_list *t) /* keepalive still high :(, we must hangup */ if (sl->outfill) /* outfill timer must be deleted too */ - (void)del_timer(&sl->outfill_timer); + (void) timer_delete(&sl->outfill_timer); printk(KERN_DEBUG "%s: no packets received during keepalive timeout, hangup.\n", sl->dev->name); /* this must hangup tty & close slip */ tty_hangup(sl->tty); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f75f912a0225..7babd1e9a378 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1295,7 +1295,7 @@ static void tun_flow_init(struct tun_struct *tun) static void tun_flow_uninit(struct tun_struct *tun) { - del_timer_sync(&tun->flow_gc_timer); + timer_delete_sync(&tun->flow_gc_timer); tun_flow_flush(tun); } diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index ff439ef535ac..fc5e441aa7c3 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -738,7 +738,7 @@ static int catc_stop(struct net_device *netdev) netif_stop_queue(netdev); if (!catc->is_f5u011) - del_timer_sync(&catc->timer); + timer_delete_sync(&catc->timer); usb_kill_urb(catc->rx_urb); usb_kill_urb(catc->tx_urb); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 137adf6d5b08..e4f1663b6204 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1661,7 +1661,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (ret < 0) return ret; - del_timer(&dev->stat_monitor); + timer_delete(&dev->stat_monitor); } else if (link && !dev->link_on) { dev->link_on = true; @@ -3304,7 +3304,7 @@ static int lan78xx_stop(struct net_device *net) mutex_lock(&dev->dev_mutex); if (timer_pending(&dev->stat_monitor)) - del_timer_sync(&dev->stat_monitor); + timer_delete_sync(&dev->stat_monitor); clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue(net); @@ -4938,7 +4938,7 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) /* reattach */ netif_device_attach(dev->net); - del_timer(&dev->stat_monitor); + timer_delete(&dev->stat_monitor); if (PMSG_IS_AUTO(message)) { ret = lan78xx_set_auto_suspend(dev); diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 3d239b8d1a1b..dec6e82eb0e0 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -522,7 +522,7 @@ static void sierra_net_kevent(struct work_struct *work) " stopping sync timer", hh.msgspecific.byte); /* Got sync resp - stop timer & clear mask */ - del_timer_sync(&priv->sync_timer); + timer_delete_sync(&priv->sync_timer); clear_bit(SIERRA_NET_TIMER_EXPIRY, &priv->kevent_flags); break; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 724b93aa4f7e..c39dfa17813a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -860,7 +860,7 @@ int usbnet_stop (struct net_device *net) /* deferred work (timer, softirq, task) must also stop */ dev->flags = 0; - del_timer_sync(&dev->delay); + timer_delete_sync(&dev->delay); tasklet_kill(&dev->bh); cancel_work_sync(&dev->kevent); @@ -869,7 +869,7 @@ int usbnet_stop (struct net_device *net) * we have a flag */ tasklet_kill(&dev->bh); - del_timer_sync(&dev->delay); + timer_delete_sync(&dev->delay); cancel_work_sync(&dev->kevent); if (!pm) @@ -1882,7 +1882,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) */ usbnet_mark_going_away(dev); cancel_work_sync(&dev->kevent); - del_timer_sync(&dev->delay); + timer_delete_sync(&dev->delay); free_netdev(net); out: return status; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8c49e903cb3a..9ccc3f09f71b 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3193,7 +3193,7 @@ static int vxlan_stop(struct net_device *dev) vxlan_multicast_leave(vxlan); - del_timer_sync(&vxlan->age_timer); + timer_delete_sync(&vxlan->age_timer); vxlan_flush(vxlan, &desc); vxlan_sock_release(vxlan); diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index cdebe65a7e2d..7e653432c139 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -285,7 +285,7 @@ static void cisco_stop(struct net_device *dev) struct cisco_state *st = state(hdlc); unsigned long flags; - del_timer_sync(&st->timer); + timer_delete_sync(&st->timer); spin_lock_irqsave(&st->lock, flags); netif_dormant_on(dev); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 81e72bc1891f..34014f427060 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1025,7 +1025,7 @@ static void fr_stop(struct net_device *dev) printk(KERN_DEBUG "fr_stop\n"); #endif if (state(hdlc)->settings.lmi != LMI_NONE) - del_timer_sync(&state(hdlc)->timer); + timer_delete_sync(&state(hdlc)->timer); fr_set_link_state(0, dev); } diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 37a3c989cba1..19921b02846d 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -358,7 +358,7 @@ static void ppp_cp_event(struct net_device *dev, u16 pid, u16 event, u8 code, } } if (old_state != CLOSED && proto->state == CLOSED) - del_timer(&proto->timer); + timer_delete(&proto->timer); #if DEBUG_STATE printk(KERN_DEBUG "%s: %s ppp_cp_event(%s) ... %s\n", dev->name, diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index c496d35b266d..3ffeeba5dccf 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -81,7 +81,7 @@ static int wg_pm_notification(struct notifier_block *nb, unsigned long action, v list_for_each_entry(wg, &device_list, device_list) { mutex_lock(&wg->device_update_lock); list_for_each_entry(peer, &wg->peer_list, peer_list) { - del_timer(&peer->timer_zero_key_material); + timer_delete(&peer->timer_zero_key_material); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); } diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c index 968bdb4df0b3..a9e0890c2f77 100644 --- a/drivers/net/wireguard/timers.c +++ b/drivers/net/wireguard/timers.c @@ -48,7 +48,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer) peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2); - del_timer(&peer->timer_send_keepalive); + timer_delete(&peer->timer_send_keepalive); /* We drop all packets without a keypair and don't try again, * if we try unsuccessfully for too long to make a handshake. */ @@ -167,7 +167,7 @@ void wg_timers_data_received(struct wg_peer *peer) */ void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) { - del_timer(&peer->timer_send_keepalive); + timer_delete(&peer->timer_send_keepalive); } /* Should be called after any type of authenticated packet is received, whether @@ -175,7 +175,7 @@ void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) */ void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) { - del_timer(&peer->timer_new_handshake); + timer_delete(&peer->timer_new_handshake); } /* Should be called after a handshake initiation message is sent. */ @@ -191,7 +191,7 @@ void wg_timers_handshake_initiated(struct wg_peer *peer) */ void wg_timers_handshake_complete(struct wg_peer *peer) { - del_timer(&peer->timer_retransmit_handshake); + timer_delete(&peer->timer_retransmit_handshake); peer->timer_handshake_attempts = 0; peer->sent_lastminute_handshake = false; ktime_get_real_ts64(&peer->walltime_last_handshake); diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 156f3650c006..96dc2778022a 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -733,7 +733,7 @@ static void ar5523_data_tx_pkt_put(struct ar5523 *ar) { atomic_dec(&ar->tx_nr_total); if (!atomic_dec_return(&ar->tx_nr_pending)) { - del_timer(&ar->tx_wd_timer); + timer_delete(&ar->tx_wd_timer); wake_up(&ar->tx_flush_waitq); } @@ -1076,7 +1076,7 @@ static void ar5523_stop(struct ieee80211_hw *hw, bool suspend) ar5523_cmd_write(ar, WDCMSG_TARGET_STOP, NULL, 0, 0); - del_timer_sync(&ar->tx_wd_timer); + timer_delete_sync(&ar->tx_wd_timer); cancel_work_sync(&ar->tx_wd_work); cancel_work_sync(&ar->rx_refill_work); ar5523_cancel_rx_bufs(ar); diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 35bfe7232e95..a0c1afeda4dd 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1751,7 +1751,7 @@ void ath10k_debug_stop(struct ath10k *ar) /* Must not use _sync to avoid deadlock, we do that in * ath10k_debug_destroy(). The check for htt_stats_mask is to avoid - * warning from del_timer(). + * warning from timer_delete(). */ if (ar->debug.htt_stats_mask != 0) cancel_delayed_work(&ar->debug.htt_stats_dwork); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 7d28ae5453cf..83eab7479f06 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -287,7 +287,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt) if (htt->ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) return; - del_timer_sync(&htt->rx_ring.refill_retry_timer); + timer_delete_sync(&htt->rx_ring.refill_retry_timer); skb_queue_purge(&htt->rx_msdus_q); skb_queue_purge(&htt->rx_in_ord_compl_q); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index c52a16f8078f..fb2c60ee433c 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -619,7 +619,7 @@ static void ath10k_pci_sleep_sync(struct ath10k *ar) return; } - del_timer_sync(&ar_pci->ps_timer); + timer_delete_sync(&ar_pci->ps_timer); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); @@ -1817,7 +1817,7 @@ static void ath10k_pci_rx_retry_sync(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - del_timer_sync(&ar_pci->rx_post_retry); + timer_delete_sync(&ar_pci->rx_post_retry); } int ath10k_pci_hif_map_service_to_pipe(struct ath10k *ar, u16 service_id, diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 6805357ee29e..7ce74b4ef201 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -1621,7 +1621,7 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "sdio power off\n"); - del_timer_sync(&ar_sdio->sleep_timer); + timer_delete_sync(&ar_sdio->sleep_timer); ath10k_sdio_set_mbox_sleep(ar, true); /* Disable the card */ diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index d436a874cd5a..866bad2db334 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -911,7 +911,7 @@ static void ath10k_snoc_buffer_cleanup(struct ath10k *ar) struct ath10k_snoc_pipe *pipe_info; int pipe_num; - del_timer_sync(&ar_snoc->rx_post_retry); + timer_delete_sync(&ar_snoc->rx_post_retry); for (pipe_num = 0; pipe_num < CE_COUNT; pipe_num++) { pipe_info = &ar_snoc->pipe_info[pipe_num]; ath10k_snoc_rx_pipe_cleanup(pipe_info); diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index eedba3766ba2..2f862f8f10ca 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -397,7 +397,7 @@ static void ath11k_ahb_stop(struct ath11k_base *ab) ath11k_ahb_ce_irqs_disable(ab); ath11k_ahb_sync_ce_irqs(ab); ath11k_ahb_kill_tasklets(ab); - del_timer_sync(&ab->rx_replenish_retry); + timer_delete_sync(&ab->rx_replenish_retry); ath11k_ce_cleanup_pipes(ab); } diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c index f124b7329e1a..3a544e5fefca 100644 --- a/drivers/net/wireless/ath/ath11k/dp.c +++ b/drivers/net/wireless/ath/ath11k/dp.c @@ -875,7 +875,7 @@ void ath11k_dp_pdev_free(struct ath11k_base *ab) struct ath11k *ar; int i; - del_timer_sync(&ab->mon_reap_timer); + timer_delete_sync(&ab->mon_reap_timer); for (i = 0; i < ab->num_radios; i++) { ar = ab->pdevs[i].ar; @@ -1170,7 +1170,7 @@ void ath11k_dp_shadow_stop_timer(struct ath11k_base *ab, if (!update_timer->init) return; - del_timer_sync(&update_timer->timer); + timer_delete_sync(&update_timer->timer); } void ath11k_dp_shadow_init_timer(struct ath11k_base *ab, diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index f2bdbac2a0b7..218ab41c0f3c 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -906,7 +906,7 @@ void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer) rx_tid = &peer->rx_tid[i]; spin_unlock_bh(&ar->ab->base_lock); - del_timer_sync(&rx_tid->frag_timer); + timer_delete_sync(&rx_tid->frag_timer); spin_lock_bh(&ar->ab->base_lock); ath11k_dp_rx_frags_cleanup(rx_tid, true); @@ -927,7 +927,7 @@ void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer) ath11k_dp_rx_frags_cleanup(rx_tid, true); spin_unlock_bh(&ar->ab->base_lock); - del_timer_sync(&rx_tid->frag_timer); + timer_delete_sync(&rx_tid->frag_timer); spin_lock_bh(&ar->ab->base_lock); } } @@ -3710,7 +3710,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar, } spin_unlock_bh(&ab->base_lock); - del_timer_sync(&rx_tid->frag_timer); + timer_delete_sync(&rx_tid->frag_timer); spin_lock_bh(&ab->base_lock); peer = ath11k_peer_find_by_id(ab, peer_id); @@ -5781,7 +5781,7 @@ int ath11k_dp_rx_pktlog_stop(struct ath11k_base *ab, bool stop_timer) int ret; if (stop_timer) - del_timer_sync(&ab->mon_reap_timer); + timer_delete_sync(&ab->mon_reap_timer); /* reap all the monitor related rings */ ret = ath11k_dp_purge_mon_ring(ab); diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index b1f27c3ac723..50c36e6ea102 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -985,7 +985,7 @@ void ath12k_dp_pdev_free(struct ath12k_base *ab) if (!ab->mon_reap_timer.function) return; - del_timer_sync(&ab->mon_reap_timer); + timer_delete_sync(&ab->mon_reap_timer); for (i = 0; i < ab->num_radios; i++) ath12k_dp_rx_pdev_free(ab, i); diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index ff6a709b5042..75bf4211ad42 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -895,7 +895,7 @@ void ath12k_dp_rx_peer_tid_cleanup(struct ath12k *ar, struct ath12k_peer *peer) ath12k_dp_rx_frags_cleanup(rx_tid, true); spin_unlock_bh(&ar->ab->base_lock); - del_timer_sync(&rx_tid->frag_timer); + timer_delete_sync(&rx_tid->frag_timer); spin_lock_bh(&ar->ab->base_lock); } } @@ -3451,7 +3451,7 @@ static int ath12k_dp_rx_frag_h_mpdu(struct ath12k *ar, } spin_unlock_bh(&ab->base_lock); - del_timer_sync(&rx_tid->frag_timer); + timer_delete_sync(&rx_tid->frag_timer); spin_lock_bh(&ab->base_lock); peer = ath12k_peer_find_by_id(ab, peer_id); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 72ce321f2a77..8c2e8081112e 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -149,7 +149,7 @@ static bool __ath6kl_cfg80211_sscan_stop(struct ath6kl_vif *vif) if (!test_and_clear_bit(SCHED_SCANNING, &vif->flags)) return false; - del_timer_sync(&vif->sched_scan_timer); + timer_delete_sync(&vif->sched_scan_timer); if (ar->state == ATH6KL_STATE_RECOVERY) return true; @@ -1200,7 +1200,7 @@ static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, if (((vif->auth_mode == WPA_PSK_AUTH) || (vif->auth_mode == WPA2_PSK_AUTH)) && (key_usage & GROUP_USAGE)) - del_timer(&vif->disconnect_timer); + timer_delete(&vif->disconnect_timer); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d, key_len %d, key_type 0x%x, key_usage 0x%x, seq_len %d\n", @@ -3612,7 +3612,7 @@ void ath6kl_cfg80211_vif_stop(struct ath6kl_vif *vif, bool wmi_ready) discon_issued = test_bit(CONNECTED, &vif->flags) || test_bit(CONNECT_PEND, &vif->flags); ath6kl_disconnect(vif); - del_timer(&vif->disconnect_timer); + timer_delete(&vif->disconnect_timer); if (discon_issued) ath6kl_disconnect_event(vif, DISCONNECT_CMD, diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c index 15f455adb860..9b100ee2ebc3 100644 --- a/drivers/net/wireless/ath/ath6kl/init.c +++ b/drivers/net/wireless/ath/ath6kl/init.c @@ -1915,7 +1915,7 @@ void ath6kl_stop_txrx(struct ath6kl *ar) clear_bit(WMI_READY, &ar->flag); if (ar->fw_recovery.enable) - del_timer_sync(&ar->fw_recovery.hb_timer); + timer_delete_sync(&ar->fw_recovery.hb_timer); /* * After wmi_shudown all WMI events will be dropped. We diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index 8f9fe23e9755..867089a3c096 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -1027,7 +1027,7 @@ void ath6kl_disconnect_event(struct ath6kl_vif *vif, u8 reason, u8 *bssid, aggr_reset_state(vif->aggr_cntxt->aggr_conn); - del_timer(&vif->disconnect_timer); + timer_delete(&vif->disconnect_timer); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "disconnect reason is %d\n", reason); diff --git a/drivers/net/wireless/ath/ath6kl/recovery.c b/drivers/net/wireless/ath/ath6kl/recovery.c index c09e40c9010f..fd2dceb8b63d 100644 --- a/drivers/net/wireless/ath/ath6kl/recovery.c +++ b/drivers/net/wireless/ath/ath6kl/recovery.c @@ -25,7 +25,7 @@ static void ath6kl_recovery_work(struct work_struct *work) ar->state = ATH6KL_STATE_RECOVERY; - del_timer_sync(&ar->fw_recovery.hb_timer); + timer_delete_sync(&ar->fw_recovery.hb_timer); ath6kl_init_hw_restart(ar); @@ -119,7 +119,7 @@ void ath6kl_recovery_cleanup(struct ath6kl *ar) set_bit(RECOVERY_CLEANUP, &ar->flag); - del_timer_sync(&ar->fw_recovery.hb_timer); + timer_delete_sync(&ar->fw_recovery.hb_timer); cancel_work_sync(&ar->fw_recovery.recovery_work); } diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c index 80e66acc5cf6..3a6f0b647e17 100644 --- a/drivers/net/wireless/ath/ath6kl/txrx.c +++ b/drivers/net/wireless/ath/ath6kl/txrx.c @@ -1827,7 +1827,7 @@ void aggr_reset_state(struct aggr_info_conn *aggr_conn) return; if (aggr_conn->timer_scheduled) { - del_timer(&aggr_conn->timer); + timer_delete(&aggr_conn->timer); aggr_conn->timer_scheduled = false; } diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index bae24e3d3168..799be0be24f4 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1556,7 +1556,7 @@ void ath9k_p2p_ps_timer(void *priv) struct ath_node *an; u32 tsf; - del_timer_sync(&sc->sched.timer); + timer_delete_sync(&sc->sched.timer); ath9k_hw_gen_timer_stop(sc->sc_ah, sc->p2p_ps_timer); ath_chanctx_event(sc, NULL, ATH_CHANCTX_EVENT_TSF_TIMER); diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c index b457e52dd365..5a26f1d05f04 100644 --- a/drivers/net/wireless/ath/ath9k/gpio.c +++ b/drivers/net/wireless/ath/ath9k/gpio.c @@ -305,7 +305,7 @@ void ath9k_btcoex_timer_resume(struct ath_softc *sc) ath_dbg(ath9k_hw_common(ah), BTCOEX, "Starting btcoex timers\n"); /* make sure duty cycle timer is also stopped when resuming */ - del_timer_sync(&btcoex->no_stomp_timer); + timer_delete_sync(&btcoex->no_stomp_timer); btcoex->bt_priority_cnt = 0; btcoex->bt_priority_time = jiffies; @@ -329,15 +329,15 @@ void ath9k_btcoex_timer_pause(struct ath_softc *sc) ath_dbg(ath9k_hw_common(ah), BTCOEX, "Stopping btcoex timers\n"); - del_timer_sync(&btcoex->period_timer); - del_timer_sync(&btcoex->no_stomp_timer); + timer_delete_sync(&btcoex->period_timer); + timer_delete_sync(&btcoex->no_stomp_timer); } void ath9k_btcoex_stop_gen_timer(struct ath_softc *sc) { struct ath_btcoex *btcoex = &sc->btcoex; - del_timer_sync(&btcoex->no_stomp_timer); + timer_delete_sync(&btcoex->no_stomp_timer); } u16 ath9k_btcoex_aggr_limit(struct ath_softc *sc, u32 max_4ms_framelen) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 57094bd45d98..19600018e562 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -198,7 +198,7 @@ void ath9k_htc_reset(struct ath9k_htc_priv *priv) ath9k_htc_stop_ani(priv); ieee80211_stop_queues(priv->hw); - del_timer_sync(&priv->tx.cleanup_timer); + timer_delete_sync(&priv->tx.cleanup_timer); ath9k_htc_tx_drain(priv); WMI_CMD(WMI_DISABLE_INTR_CMDID); @@ -260,7 +260,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv, ath9k_htc_ps_wakeup(priv); ath9k_htc_stop_ani(priv); - del_timer_sync(&priv->tx.cleanup_timer); + timer_delete_sync(&priv->tx.cleanup_timer); ath9k_htc_tx_drain(priv); WMI_CMD(WMI_DISABLE_INTR_CMDID); @@ -997,7 +997,7 @@ static void ath9k_htc_stop(struct ieee80211_hw *hw, bool suspend) tasklet_kill(&priv->rx_tasklet); - del_timer_sync(&priv->tx.cleanup_timer); + timer_delete_sync(&priv->tx.cleanup_timer); ath9k_htc_tx_drain(priv); ath9k_wmi_event_drain(priv); diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 01e0dffbf57e..ee951493e993 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -1099,7 +1099,7 @@ static void ath9k_deinit_softc(struct ath_softc *sc) if (ATH_TXQ_SETUP(sc, i)) ath_tx_cleanupq(sc, &sc->tx.txq[i]); - del_timer_sync(&sc->sleep_timer); + timer_delete_sync(&sc->sleep_timer); ath9k_hw_deinit(sc->sc_ah); if (sc->dfs_detector != NULL) sc->dfs_detector->exit(sc->dfs_detector); diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index d078a59d7d3c..7f890997bb53 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -472,7 +472,7 @@ void ath_stop_ani(struct ath_softc *sc) struct ath_common *common = ath9k_hw_common(sc->sc_ah); ath_dbg(common, ANI, "Stopping ANI\n"); - del_timer_sync(&common->ani.timer); + timer_delete_sync(&common->ani.timer); } void ath_check_ani(struct ath_softc *sc) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a70c94564814..92fc5e3d756e 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -123,7 +123,7 @@ void ath9k_ps_wakeup(struct ath_softc *sc) if (++sc->ps_usecount != 1) goto unlock; - del_timer_sync(&sc->sleep_timer); + timer_delete_sync(&sc->sleep_timer); power_mode = sc->sc_ah->power_mode; ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_AWAKE); @@ -2418,7 +2418,7 @@ static void ath9k_cancel_pending_offchannel(struct ath_softc *sc) ath_dbg(common, CHAN_CTX, "%s: Aborting RoC\n", __func__); - del_timer_sync(&sc->offchannel.timer); + timer_delete_sync(&sc->offchannel.timer); if (sc->offchannel.state >= ATH_OFFCHANNEL_ROC_START) ath_roc_complete(sc, ATH_ROC_COMPLETE_ABORT); } @@ -2427,7 +2427,7 @@ static void ath9k_cancel_pending_offchannel(struct ath_softc *sc) ath_dbg(common, CHAN_CTX, "%s: Aborting HW scan\n", __func__); - del_timer_sync(&sc->offchannel.timer); + timer_delete_sync(&sc->offchannel.timer); ath_scan_complete(sc, true); } } @@ -2476,7 +2476,7 @@ static void ath9k_cancel_hw_scan(struct ieee80211_hw *hw, ath_dbg(common, CHAN_CTX, "Cancel HW scan on vif: %pM\n", vif->addr); mutex_lock(&sc->mutex); - del_timer_sync(&sc->offchannel.timer); + timer_delete_sync(&sc->offchannel.timer); ath_scan_complete(sc, true); mutex_unlock(&sc->mutex); } @@ -2526,7 +2526,7 @@ static int ath9k_cancel_remain_on_channel(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); ath_dbg(common, CHAN_CTX, "Cancel RoC\n"); - del_timer_sync(&sc->offchannel.timer); + timer_delete_sync(&sc->offchannel.timer); if (sc->offchannel.roc_vif) { if (sc->offchannel.state >= ATH_OFFCHANNEL_ROC_START) diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 1ff53520f0a3..27d4034c814e 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -1029,7 +1029,7 @@ static int ath_pci_suspend(struct device *device) */ ath9k_stop_btcoex(sc); ath9k_hw_disable(sc->sc_ah); - del_timer_sync(&sc->sleep_timer); + timer_delete_sync(&sc->sleep_timer); ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP); return 0; diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c index d405a4c34059..cc2a033e87f5 100644 --- a/drivers/net/wireless/ath/wcn36xx/dxe.c +++ b/drivers/net/wireless/ath/wcn36xx/dxe.c @@ -350,7 +350,7 @@ void wcn36xx_dxe_tx_ack_ind(struct wcn36xx *wcn, u32 status) spin_lock_irqsave(&wcn->dxe_lock, flags); skb = wcn->tx_ack_skb; wcn->tx_ack_skb = NULL; - del_timer(&wcn->tx_ack_timer); + timer_delete(&wcn->tx_ack_timer); spin_unlock_irqrestore(&wcn->dxe_lock, flags); if (!skb) { @@ -1055,7 +1055,7 @@ void wcn36xx_dxe_deinit(struct wcn36xx *wcn) free_irq(wcn->tx_irq, wcn); free_irq(wcn->rx_irq, wcn); - del_timer(&wcn->tx_ack_timer); + timer_delete(&wcn->tx_ack_timer); if (wcn->tx_ack_skb) { ieee80211_tx_status_irqsafe(wcn->hw, wcn->tx_ack_skb); diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index a1a0a9223e74..5473c01cbe66 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1017,7 +1017,7 @@ static int wil_cfg80211_scan(struct wiphy *wiphy, out_restore: if (rc) { - del_timer_sync(&vif->scan_timer); + timer_delete_sync(&vif->scan_timer); if (vif->mid == 0) wil->radio_wdev = wil->main_ndev->ieee80211_ptr; vif->scan_request = NULL; diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 94e61dbe94f8..44c24c6c8360 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -798,7 +798,7 @@ void wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid, wil_dbg_misc(wil, "disconnecting\n"); - del_timer_sync(&vif->connect_timer); + timer_delete_sync(&vif->connect_timer); _wil6210_disconnect(vif, bssid, reason_code); } @@ -818,7 +818,7 @@ void wil6210_disconnect_complete(struct wil6210_vif *vif, const u8 *bssid, wil_dbg_misc(wil, "got disconnect\n"); - del_timer_sync(&vif->connect_timer); + timer_delete_sync(&vif->connect_timer); _wil6210_disconnect_complete(vif, bssid, reason_code); } @@ -1465,7 +1465,7 @@ void wil_abort_scan(struct wil6210_vif *vif, bool sync) return; wil_dbg_misc(wil, "Abort scan_request 0x%p\n", vif->scan_request); - del_timer_sync(&vif->scan_timer); + timer_delete_sync(&vif->scan_timer); mutex_unlock(&wil->vif_mutex); rc = wmi_abort_scan(vif); if (!rc && sync) diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index d5d364683c0e..59884e8e3765 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -200,8 +200,8 @@ static void wil_dev_setup(struct net_device *dev) static void wil_vif_deinit(struct wil6210_vif *vif) { - del_timer_sync(&vif->scan_timer); - del_timer_sync(&vif->p2p.discovery_timer); + timer_delete_sync(&vif->scan_timer); + timer_delete_sync(&vif->p2p.discovery_timer); cancel_work_sync(&vif->disconnect_worker); cancel_work_sync(&vif->p2p.discovery_expired_work); cancel_work_sync(&vif->p2p.delayed_listen_work); @@ -533,7 +533,7 @@ void wil_vif_remove(struct wil6210_priv *wil, u8 mid) mutex_unlock(&wil->vif_mutex); flush_work(&wil->wmi_event_worker); - del_timer_sync(&vif->connect_timer); + timer_delete_sync(&vif->connect_timer); cancel_work_sync(&vif->disconnect_worker); wil_probe_client_flush(vif); cancel_work_sync(&vif->probe_client_worker); diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c index f26bf046d889..f20caf1a3905 100644 --- a/drivers/net/wireless/ath/wil6210/p2p.c +++ b/drivers/net/wireless/ath/wil6210/p2p.c @@ -184,7 +184,7 @@ u8 wil_p2p_stop_discovery(struct wil6210_vif *vif) /* discovery not really started, only pending */ p2p->pending_listen_wdev = NULL; } else { - del_timer_sync(&p2p->discovery_timer); + timer_delete_sync(&p2p->discovery_timer); wmi_stop_discovery(vif); } p2p->discovery_started = 0; diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 8ff69dc72fb9..74edd007cd8d 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -933,7 +933,7 @@ static void wmi_evt_scan_complete(struct wil6210_vif *vif, int id, wil_dbg_wmi(wil, "SCAN_COMPLETE(0x%08x)\n", status); wil_dbg_misc(wil, "Complete scan_request 0x%p aborted %d\n", vif->scan_request, info.aborted); - del_timer_sync(&vif->scan_timer); + timer_delete_sync(&vif->scan_timer); cfg80211_scan_done(vif->scan_request, &info); if (vif->mid == 0) wil->radio_wdev = wil->main_ndev->ieee80211_ptr; @@ -1023,7 +1023,7 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len) mutex_unlock(&wil->mutex); return; } - del_timer_sync(&vif->connect_timer); + timer_delete_sync(&vif->connect_timer); } else if ((wdev->iftype == NL80211_IFTYPE_AP) || (wdev->iftype == NL80211_IFTYPE_P2P_GO)) { if (wil->sta[evt->cid].status != wil_sta_unused) { @@ -1814,7 +1814,7 @@ wmi_evt_reassoc_status(struct wil6210_vif *vif, int id, void *d, int len) wil->sta[cid].stats.ft_roams++; ether_addr_copy(wil->sta[cid].addr, vif->bss->bssid); mutex_unlock(&wil->mutex); - del_timer_sync(&vif->connect_timer); + timer_delete_sync(&vif->connect_timer); cfg80211_ref_bss(wiphy, vif->bss); freq = ieee80211_channel_to_frequency(ch, NL80211_BAND_60GHZ); diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 504e05ea30f2..4f01189b7c4b 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2417,7 +2417,7 @@ static void at76_delete_device(struct at76_priv *priv) kfree(priv->bulk_out_buffer); - del_timer_sync(&ledtrig_tx_timer); + timer_delete_sync(&ledtrig_tx_timer); kfree_skb(priv->rx_skb); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c index 1e8495f50c16..e0de34a3e43a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c @@ -289,7 +289,7 @@ static void brcmf_btcoex_handler(struct work_struct *work) btci = container_of(work, struct brcmf_btcoex_info, work); if (btci->timer_on) { btci->timer_on = false; - del_timer_sync(&btci->timer); + timer_delete_sync(&btci->timer); } switch (btci->bt_state) { @@ -428,7 +428,7 @@ static void brcmf_btcoex_dhcp_end(struct brcmf_btcoex_info *btci) if (btci->timer_on) { brcmf_dbg(INFO, "disable BT DHCP Timer\n"); btci->timer_on = false; - del_timer_sync(&btci->timer); + timer_delete_sync(&btci->timer); /* schedule worker if transition to IDLE is needed */ if (btci->bt_state != BRCMF_BT_DHCP_IDLE) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index d2caa80e9412..9f1854b3d1a5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -2304,7 +2304,7 @@ brcmf_pcie_fwcon_timer(struct brcmf_pciedev_info *devinfo, bool active) { if (!active) { if (devinfo->console_active) { - del_timer_sync(&devinfo->timer); + timer_delete_sync(&devinfo->timer); devinfo->console_active = false; } return; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index b1727f35217b..93727b9a5f0d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4611,7 +4611,7 @@ void brcmf_sdio_wd_timer(struct brcmf_sdio *bus, bool active) { /* Totally stop the timer */ if (!active && bus->wd_active) { - del_timer_sync(&bus->timer); + timer_delete_sync(&bus->timer); bus->wd_active = false; return; } diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c index 32639e0e8430..dfcc12aa8620 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c @@ -59,7 +59,7 @@ void libipw_crypt_info_free(struct libipw_crypt_info *info) int i; libipw_crypt_quiescing(info); - del_timer_sync(&info->crypt_deinit_timer); + timer_delete_sync(&info->crypt_deinit_timer); libipw_crypt_deinit_entries(info, 1); for (i = 0; i < NUM_WEP_KEYS; i++) { diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 4013443698a2..104748fcdc33 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -2188,7 +2188,7 @@ __il3945_down(struct il_priv *il) /* Stop TX queues watchdog. We need to have S_EXIT_PENDING bit set * to prevent rearm timer */ - del_timer_sync(&il->watchdog); + timer_delete_sync(&il->watchdog); /* Station information will now be cleared in device */ il_clear_ucode_stations(il); diff --git a/drivers/net/wireless/intel/iwlegacy/3945-rs.c b/drivers/net/wireless/intel/iwlegacy/3945-rs.c index 0eaad980c85c..df1b8ec86651 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-rs.c @@ -413,7 +413,7 @@ il3945_rs_free_sta(void *il_priv, struct ieee80211_sta *sta, void *il_sta) * to use il_priv to print out debugging) since it may not be fully * initialized at this point. */ - del_timer_sync(&rs_sta->rate_scale_flush); + timer_delete_sync(&rs_sta->rate_scale_flush); } /* diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 05c4af41bdb9..dc8c408902e6 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -5350,7 +5350,7 @@ __il4965_down(struct il_priv *il) /* Stop TX queues watchdog. We need to have S_EXIT_PENDING bit set * to prevent rearm timer */ - del_timer_sync(&il->watchdog); + timer_delete_sync(&il->watchdog); il_clear_ucode_stations(il); @@ -6243,7 +6243,7 @@ il4965_cancel_deferred_work(struct il_priv *il) il_cancel_scan_deferred_work(il); - del_timer_sync(&il->stats_periodic); + timer_delete_sync(&il->stats_periodic); } static void diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index af4f42534ea0..09fb4b758704 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -4842,7 +4842,7 @@ il_setup_watchdog(struct il_priv *il) mod_timer(&il->watchdog, jiffies + msecs_to_jiffies(IL_WD_TICK(timeout))); else - del_timer(&il->watchdog); + timer_delete(&il->watchdog); } EXPORT_SYMBOL(il_setup_watchdog); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c index b246dbd371b3..2ed4b6e798ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c @@ -1870,7 +1870,7 @@ static ssize_t iwl_dbgfs_ucode_tracing_write(struct file *file, } } else { priv->event_log.ucode_trace = false; - del_timer_sync(&priv->ucode_trace); + timer_delete_sync(&priv->ucode_trace); } return count; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index 30789ba06d9d..a27a72cc017a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1082,8 +1082,8 @@ void iwl_cancel_deferred_work(struct iwl_priv *priv) cancel_work_sync(&priv->bt_full_concurrency); cancel_work_sync(&priv->bt_runtime_config); - del_timer_sync(&priv->statistics_periodic); - del_timer_sync(&priv->ucode_trace); + timer_delete_sync(&priv->statistics_periodic); + timer_delete_sync(&priv->ucode_trace); } static int iwl_init_drv(struct iwl_priv *priv) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tt.c b/drivers/net/wireless/intel/iwlwifi/dvm/tt.c index e1d78550e443..98f0949b3683 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tt.c @@ -257,7 +257,7 @@ static void iwl_legacy_tt_handler(struct iwl_priv *priv, s32 temp, bool force) tt->tt_previous_temp = temp; #endif /* stop ct_kill_waiting_tm timer */ - del_timer_sync(&priv->thermal_throttle.ct_kill_waiting_tm); + timer_delete_sync(&priv->thermal_throttle.ct_kill_waiting_tm); if (tt->state != old_state) { switch (tt->state) { case IWL_TI_0: @@ -378,7 +378,7 @@ static void iwl_advance_tt_handler(struct iwl_priv *priv, s32 temp, bool force) } } /* stop ct_kill_waiting_tm timer */ - del_timer_sync(&priv->thermal_throttle.ct_kill_waiting_tm); + timer_delete_sync(&priv->thermal_throttle.ct_kill_waiting_tm); if (changed) { if (tt->state >= IWL_TI_1) { /* force PI = IWL_POWER_INDEX_5 in the case of TI > 0 */ @@ -506,7 +506,7 @@ static void iwl_bg_ct_exit(struct work_struct *work) return; /* stop ct_kill_exit_tm timer */ - del_timer_sync(&priv->thermal_throttle.ct_kill_exit_tm); + timer_delete_sync(&priv->thermal_throttle.ct_kill_exit_tm); if (tt->state == IWL_TI_CT_KILL) { IWL_ERR(priv, @@ -640,9 +640,9 @@ void iwl_tt_exit(struct iwl_priv *priv) struct iwl_tt_mgmt *tt = &priv->thermal_throttle; /* stop ct_kill_exit_tm timer if activated */ - del_timer_sync(&priv->thermal_throttle.ct_kill_exit_tm); + timer_delete_sync(&priv->thermal_throttle.ct_kill_exit_tm); /* stop ct_kill_waiting_tm timer if activated */ - del_timer_sync(&priv->thermal_throttle.ct_kill_waiting_tm); + timer_delete_sync(&priv->thermal_throttle.ct_kill_waiting_tm); cancel_work_sync(&priv->tt_work); cancel_work_sync(&priv->ct_enter); cancel_work_sync(&priv->ct_exit); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 4a442d03d8d2..4a4f8de4efe2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1697,7 +1697,7 @@ static void iwl_pcie_irq_handle_error(struct iwl_trans *trans) for (i = 0; i < trans->trans_cfg->base_params->num_of_queues; i++) { if (!trans_pcie->txqs.txq[i]) continue; - del_timer(&trans_pcie->txqs.txq[i]->stuck_timer); + timer_delete(&trans_pcie->txqs.txq[i]->stuck_timer); } /* The STATUS_FW_ERROR bit is set in this function. This must happen diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 401919f9fe88..71227fd3dac0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -911,7 +911,7 @@ static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id) kfree_sensitive(txq->entries[i].cmd); kfree_sensitive(txq->entries[i].free_buf); } - del_timer_sync(&txq->stuck_timer); + timer_delete_sync(&txq->stuck_timer); iwl_txq_gen2_free_memory(trans, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 7c1dd5cc084a..bb90bcfc6763 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -469,7 +469,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) kfree(txq->entries); txq->entries = NULL; - del_timer_sync(&txq->stuck_timer); + timer_delete_sync(&txq->stuck_timer); /* 0-fill queue descriptor structure */ memset(txq, 0, sizeof(*txq)); @@ -1054,7 +1054,7 @@ static void iwl_txq_progress(struct iwl_txq *txq) * since we're making progress on this queue */ if (txq->read_ptr == txq->write_ptr) - del_timer(&txq->stuck_timer); + timer_delete(&txq->stuck_timer); else mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); } @@ -2529,7 +2529,7 @@ void iwl_pcie_freeze_txq_timer(struct iwl_trans *trans, /* remember how long until the timer fires */ txq->frozen_expiry_remainder = txq->stuck_timer.expires - now; - del_timer(&txq->stuck_timer); + timer_delete(&txq->stuck_timer); goto next_queue; } diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c index 8393f396eebe..9742d3dba31c 100644 --- a/drivers/net/wireless/marvell/libertas/cmdresp.c +++ b/drivers/net/wireless/marvell/libertas/cmdresp.c @@ -119,7 +119,7 @@ int lbs_process_command_response(struct lbs_private *priv, u8 *data, u32 len) } /* Now we got response from FW, cancel the command timer */ - del_timer(&priv->command_timer); + timer_delete(&priv->command_timer); priv->cmd_timed_out = 0; if (respcmd == CMD_RET(CMD_802_11_PS_MODE)) { diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 2240b4db8c03..ea3cc2eaec36 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -897,7 +897,7 @@ static void if_usb_prog_firmware(struct lbs_private *priv, int ret, /* ... and wait for the process to complete */ wait_event_interruptible(cardp->fw_wq, cardp->surprise_removed || cardp->fwdnldover); - del_timer_sync(&cardp->fw_timeout); + timer_delete_sync(&cardp->fw_timeout); usb_kill_urb(cardp->rx_urb); if (!cardp->fwdnldover) { diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index 017e5c6bbade..26d13e9b3c95 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -202,7 +202,7 @@ int lbs_stop_iface(struct lbs_private *priv) spin_unlock_irqrestore(&priv->driver_lock, flags); cancel_work_sync(&priv->mcast_work); - del_timer_sync(&priv->tx_lockup_timer); + timer_delete_sync(&priv->tx_lockup_timer); /* Disable command processing, and wait for all commands to complete */ lbs_deb_main("waiting for commands to complete\n"); @@ -250,7 +250,7 @@ void lbs_host_to_card_done(struct lbs_private *priv) unsigned long flags; spin_lock_irqsave(&priv->driver_lock, flags); - del_timer(&priv->tx_lockup_timer); + timer_delete(&priv->tx_lockup_timer); priv->dnld_sent = DNLD_RES_RECEIVED; @@ -594,8 +594,8 @@ static int lbs_thread(void *data) spin_unlock_irq(&priv->driver_lock); } - del_timer(&priv->command_timer); - del_timer(&priv->tx_lockup_timer); + timer_delete(&priv->command_timer); + timer_delete(&priv->tx_lockup_timer); return 0; } @@ -798,8 +798,8 @@ static void lbs_free_adapter(struct lbs_private *priv) { lbs_free_cmd_buffer(priv); kfifo_free(&priv->event_fifo); - del_timer(&priv->command_timer); - del_timer(&priv->tx_lockup_timer); + timer_delete(&priv->command_timer); + timer_delete(&priv->tx_lockup_timer); } static const struct net_device_ops lbs_netdev_ops = { diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c index efb98304555a..7fc1bdb6c458 100644 --- a/drivers/net/wireless/marvell/libertas_tf/cmd.c +++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c @@ -757,7 +757,7 @@ int lbtf_process_rx_command(struct lbtf_private *priv) } /* Now we got response from FW, cancel the command timer */ - del_timer(&priv->command_timer); + timer_delete(&priv->command_timer); priv->cmd_timed_out = 0; if (priv->nr_retries) priv->nr_retries = 0; diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index 1750f5e93de2..7c413dc81f9a 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -875,7 +875,7 @@ static int if_usb_prog_firmware(struct lbtf_private *priv) wait_event_interruptible(cardp->fw_wq, cardp->priv->surpriseremoved || cardp->fwdnldover); - del_timer_sync(&cardp->fw_timeout); + timer_delete_sync(&cardp->fw_timeout); usb_kill_urb(cardp->rx_urb); if (!cardp->fwdnldover) { diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index b47a832b9ae2..a57a11be57d8 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -174,7 +174,7 @@ static void lbtf_free_adapter(struct lbtf_private *priv) { lbtf_deb_enter(LBTF_DEB_MAIN); lbtf_free_cmd_buffer(priv); - del_timer(&priv->command_timer); + timer_delete(&priv->command_timer); lbtf_deb_leave(LBTF_DEB_MAIN); } @@ -642,7 +642,7 @@ int lbtf_remove_card(struct lbtf_private *priv) lbtf_deb_enter(LBTF_DEB_MAIN); priv->surpriseremoved = 1; - del_timer(&priv->command_timer); + timer_delete(&priv->command_timer); lbtf_free_adapter(priv); priv->hw = NULL; ieee80211_unregister_hw(hw); diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index cb948ca34373..8aff1df09b40 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -206,7 +206,7 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv, start_win = (tbl->start_win + tbl->win_size) & (MAX_TID_VALUE - 1); mwifiex_11n_dispatch_pkt_until_start_win(priv, tbl, start_win); - del_timer_sync(&tbl->timer_context.timer); + timer_delete_sync(&tbl->timer_context.timer); tbl->timer_context.timer_is_set = false; spin_lock_bh(&priv->rx_reorder_tbl_lock); diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index b30ed321c625..5573e2ded72f 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -836,7 +836,7 @@ int mwifiex_process_cmdresp(struct mwifiex_adapter *adapter) return -1; } /* Now we got response from FW, cancel the command timer */ - del_timer_sync(&adapter->cmd_timer); + timer_delete_sync(&adapter->cmd_timer); clear_bit(MWIFIEX_IS_CMD_TIMEDOUT, &adapter->work_flags); if (adapter->curr_cmd->cmd_flag & CMD_F_HOSTCMD) { diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 8b61e45cd667..ce0d42e72e94 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -390,7 +390,7 @@ static void mwifiex_invalidate_lists(struct mwifiex_adapter *adapter) static void mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter) { - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); cancel_delayed_work_sync(&adapter->devdump_work); mwifiex_cancel_all_pending_cmd(adapter); wake_up_interruptible(&adapter->cmd_wait_q.wait); @@ -613,7 +613,7 @@ mwifiex_shutdown_drv(struct mwifiex_adapter *adapter) if (adapter->curr_cmd) { mwifiex_dbg(adapter, WARN, "curr_cmd is still in processing\n"); - del_timer_sync(&adapter->cmd_timer); + timer_delete_sync(&adapter->cmd_timer); mwifiex_recycle_cmd_node(adapter, adapter->curr_cmd); adapter->curr_cmd = NULL; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index b07cb302a00c..0e1f53940401 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -307,7 +307,7 @@ int mwifiex_main_process(struct mwifiex_adapter *adapter) if (IS_CARD_RX_RCVD(adapter)) { adapter->data_received = false; adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); if (adapter->ps_state == PS_STATE_SLEEP) adapter->ps_state = PS_STATE_AWAKE; } else { diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index e11458fd4d50..dd2a42e732f2 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2437,7 +2437,7 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter, */ adapter->ps_state = PS_STATE_AWAKE; adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); } spin_lock_irqsave(&adapter->int_lock, flags); @@ -2527,7 +2527,7 @@ static int mwifiex_process_int_status(struct mwifiex_adapter *adapter) adapter->ps_state == PS_STATE_SLEEP) { adapter->ps_state = PS_STATE_AWAKE; adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); } } } diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 400348abeee5..fecd88967ceb 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -789,7 +789,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) adapter->ps_state = PS_STATE_AWAKE; adapter->pm_wakeup_card_req = false; adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); break; } if (!mwifiex_send_null_packet @@ -804,7 +804,7 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) adapter->ps_state = PS_STATE_AWAKE; adapter->pm_wakeup_card_req = false; adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); break; diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index 0a5f340876c3..18e8c04d14c4 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -1490,7 +1490,7 @@ void mwifiex_clean_auto_tdls(struct mwifiex_private *priv) priv->adapter->auto_tdls && priv->bss_type == MWIFIEX_BSS_TYPE_STA) { priv->auto_tdls_timer_active = false; - del_timer(&priv->auto_tdls_timer); + timer_delete(&priv->auto_tdls_timer); mwifiex_flush_auto_tdls_list(priv); } } diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 3034c4405cb5..2f565397cf36 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -877,7 +877,7 @@ static int mwifiex_usb_prepare_tx_aggr_skb(struct mwifiex_adapter *adapter, * write complete, delete the tx_aggr timer */ if (port->tx_aggr.timer_cnxt.is_hold_timer_set) { - del_timer(&port->tx_aggr.timer_cnxt.hold_timer); + timer_delete(&port->tx_aggr.timer_cnxt.hold_timer); port->tx_aggr.timer_cnxt.is_hold_timer_set = false; port->tx_aggr.timer_cnxt.hold_tmo_msecs = 0; } @@ -1354,7 +1354,7 @@ static void mwifiex_usb_cleanup_tx_aggr(struct mwifiex_adapter *adapter) mwifiex_write_data_complete(adapter, skb_tmp, 0, -1); if (port->tx_aggr.timer_cnxt.hold_timer.function) - del_timer_sync(&port->tx_aggr.timer_cnxt.hold_timer); + timer_delete_sync(&port->tx_aggr.timer_cnxt.hold_timer); port->tx_aggr.timer_cnxt.is_hold_timer_set = false; port->tx_aggr.timer_cnxt.hold_tmo_msecs = 0; } @@ -1557,7 +1557,7 @@ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter) { /* Simulation of HS_AWAKE event */ adapter->pm_wakeup_fw_try = false; - del_timer(&adapter->wakeup_timer); + timer_delete(&adapter->wakeup_timer); adapter->pm_wakeup_card_req = false; adapter->ps_state = PS_STATE_AWAKE; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 2e7b05eeef7a..c54005df08ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -97,7 +97,7 @@ static void mt7615_stop(struct ieee80211_hw *hw, bool suspend) struct mt7615_phy *phy = mt7615_hw_phy(hw); cancel_delayed_work_sync(&phy->mt76->mac_work); - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); cancel_delayed_work_sync(&dev->pm.ps_work); @@ -1194,7 +1194,7 @@ static int mt7615_cancel_remain_on_channel(struct ieee80211_hw *hw, if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) return 0; - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); mt7615_mutex_acquire(phy->dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c index c2e4e6aabd9f..b795d11d943d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c @@ -220,12 +220,12 @@ void mt7615_mac_reset_work(struct work_struct *work) set_bit(MT76_MCU_RESET, &dev->mphy.state); wake_up(&dev->mt76.mcu.wait); cancel_delayed_work_sync(&dev->mphy.mac_work); - del_timer_sync(&dev->phy.roc_timer); + timer_delete_sync(&dev->phy.roc_timer); cancel_work_sync(&dev->phy.roc_work); if (phy2) { set_bit(MT76_RESET, &phy2->mt76->state); cancel_delayed_work_sync(&phy2->mt76->mac_work); - del_timer_sync(&phy2->roc_timer); + timer_delete_sync(&phy2->roc_timer); cancel_work_sync(&phy2->roc_work); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index 4aa9fa1c4a23..d96e06b4fee1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -85,7 +85,7 @@ static void mt7663u_stop(struct ieee80211_hw *hw, bool suspend) struct mt7615_dev *dev = hw->priv; clear_bit(MT76_STATE_RUNNING, &dev->mphy.state); - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); cancel_delayed_work_sync(&phy->scan_work); cancel_delayed_work_sync(&phy->mt76->mac_work); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 78b77a54d195..826c48a2ee69 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -364,7 +364,7 @@ void mt7921_roc_abort_sync(struct mt792x_dev *dev) { struct mt792x_phy *phy = &dev->phy; - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) ieee80211_iterate_interfaces(mt76_hw(dev), @@ -395,7 +395,7 @@ static int mt7921_abort_roc(struct mt792x_phy *phy, struct mt792x_vif *vif) { int err = 0; - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); mt792x_mutex_acquire(phy->dev); @@ -1476,7 +1476,7 @@ static void mt7921_abort_channel_switch(struct ieee80211_hw *hw, { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - del_timer_sync(&mvif->csa_timer); + timer_delete_sync(&mvif->csa_timer); cancel_work_sync(&mvif->csa_work); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index e79364ac129e..66f327781947 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -453,7 +453,7 @@ void mt7925_roc_abort_sync(struct mt792x_dev *dev) { struct mt792x_phy *phy = &dev->phy; - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) ieee80211_iterate_interfaces(mt76_hw(dev), @@ -485,7 +485,7 @@ static int mt7925_abort_roc(struct mt792x_phy *phy, { int err = 0; - del_timer_sync(&phy->roc_timer); + timer_delete_sync(&phy->roc_timer); cancel_work_sync(&phy->roc_work); mt792x_mutex_acquire(phy->dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index 0f7806f6338d..38dd58f6e493 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -340,7 +340,7 @@ void mt792x_unassign_vif_chanctx(struct ieee80211_hw *hw, mutex_unlock(&dev->mt76.mutex); if (vif->bss_conf.csa_active) { - del_timer_sync(&mvif->csa_timer); + timer_delete_sync(&mvif->csa_timer); cancel_work_sync(&mvif->csa_work); } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index bba53307b960..cb46a39ef757 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -643,7 +643,7 @@ static inline void host_int_parse_assoc_resp_info(struct wilc_vif *vif, } } - del_timer(&hif_drv->connect_timer); + timer_delete(&hif_drv->connect_timer); conn_info->conn_result(CONN_DISCONN_EVENT_CONN_RESP, mac_status, hif_drv->conn_info.priv); @@ -669,7 +669,7 @@ void wilc_handle_disconnect(struct wilc_vif *vif) struct host_if_drv *hif_drv = vif->hif_drv; if (hif_drv->usr_scan_req.scan_result) { - del_timer(&hif_drv->scan_timer); + timer_delete(&hif_drv->scan_timer); handle_scan_done(vif, SCAN_EVENT_ABORTED); } @@ -713,7 +713,7 @@ static void handle_rcvd_gnrl_async_info(struct work_struct *work) if (hif_drv->hif_state == HOST_IF_CONNECTED) { wilc_handle_disconnect(vif); } else if (hif_drv->usr_scan_req.scan_result) { - del_timer(&hif_drv->scan_timer); + timer_delete(&hif_drv->scan_timer); handle_scan_done(vif, SCAN_EVENT_ABORTED); } } @@ -746,7 +746,7 @@ int wilc_disconnect(struct wilc_vif *vif) conn_info = &hif_drv->conn_info; if (scan_req->scan_result) { - del_timer(&hif_drv->scan_timer); + timer_delete(&hif_drv->scan_timer); scan_req->scan_result(SCAN_EVENT_ABORTED, NULL, scan_req->priv); scan_req->scan_result = NULL; } @@ -754,7 +754,7 @@ int wilc_disconnect(struct wilc_vif *vif) if (conn_info->conn_result) { if (hif_drv->hif_state == HOST_IF_WAITING_CONN_RESP || hif_drv->hif_state == HOST_IF_EXTERNAL_AUTH) - del_timer(&hif_drv->connect_timer); + timer_delete(&hif_drv->connect_timer); conn_info->conn_result(CONN_DISCONN_EVENT_DISCONN_NOTIF, 0, conn_info->priv); @@ -959,7 +959,7 @@ static void listen_timer_cb(struct timer_list *t) int result; struct host_if_msg *msg; - del_timer(&vif->hif_drv->remain_on_ch_timer); + timer_delete(&vif->hif_drv->remain_on_ch_timer); msg = wilc_alloc_work(vif, wilc_handle_listen_state_expired, false); if (IS_ERR(msg)) @@ -1066,7 +1066,7 @@ static void handle_scan_complete(struct work_struct *work) { struct host_if_msg *msg = container_of(work, struct host_if_msg, work); - del_timer(&msg->vif->hif_drv->scan_timer); + timer_delete(&msg->vif->hif_drv->scan_timer); handle_scan_done(msg->vif, SCAN_EVENT_DONE); @@ -1551,7 +1551,7 @@ int wilc_deinit(struct wilc_vif *vif) timer_shutdown_sync(&hif_drv->scan_timer); timer_shutdown_sync(&hif_drv->connect_timer); - del_timer_sync(&vif->periodic_rssi); + timer_delete_sync(&vif->periodic_rssi); timer_shutdown_sync(&hif_drv->remain_on_ch_timer); if (hif_drv->usr_scan_req.scan_result) { @@ -1718,7 +1718,7 @@ int wilc_listen_state_expired(struct wilc_vif *vif, u64 cookie) return -EFAULT; } - del_timer(&vif->hif_drv->remain_on_ch_timer); + timer_delete(&vif->hif_drv->remain_on_ch_timer); return wilc_handle_roc_expired(vif, cookie); } diff --git a/drivers/net/wireless/purelifi/plfxlc/usb.c b/drivers/net/wireless/purelifi/plfxlc/usb.c index 56d1139ba8bc..10d2e2124ff8 100644 --- a/drivers/net/wireless/purelifi/plfxlc/usb.c +++ b/drivers/net/wireless/purelifi/plfxlc/usb.c @@ -714,8 +714,8 @@ static void disconnect(struct usb_interface *intf) mac = plfxlc_hw_mac(hw); usb = &mac->chip.usb; - del_timer_sync(&usb->tx.tx_retry_timer); - del_timer_sync(&usb->sta_queue_cleanup); + timer_delete_sync(&usb->tx.tx_retry_timer); + timer_delete_sync(&usb->sta_queue_cleanup); ieee80211_unregister_hw(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ff61867d142f..6189edc1d8d7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -473,7 +473,7 @@ void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq) { struct rtl_priv *rtlpriv = rtl_priv(hw); - del_timer_sync(&rtlpriv->works.watchdog_timer); + timer_delete_sync(&rtlpriv->works.watchdog_timer); cancel_delayed_work_sync(&rtlpriv->works.watchdog_wq); if (ips_wq) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 35875cda30fc..2ad4523d1bef 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -179,9 +179,9 @@ static void rtl88e_deinit_sw_vars(struct ieee80211_hw *hw) } if (rtlpriv->psc.low_power_enable) - del_timer_sync(&rtlpriv->works.fw_clockoff_timer); + timer_delete_sync(&rtlpriv->works.fw_clockoff_timer); - del_timer_sync(&rtlpriv->works.fast_antenna_training_timer); + timer_delete_sync(&rtlpriv->works.fast_antenna_training_timer); } /* get bt coexist status */ diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 2cebe562a1f4..53827657abb2 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -493,7 +493,7 @@ static void bl_cmd_timeout(struct timer_list *t) struct rsi_hw *adapter = from_timer(adapter, t, bl_cmd_timer); adapter->blcmd_timer_expired = true; - del_timer(&adapter->bl_cmd_timer); + timer_delete(&adapter->bl_cmd_timer); } static int bl_start_cmd_timer(struct rsi_hw *adapter, u32 timeout) @@ -511,7 +511,7 @@ static int bl_stop_cmd_timer(struct rsi_hw *adapter) { adapter->blcmd_timer_expired = false; if (timer_pending(&adapter->bl_cmd_timer)) - del_timer(&adapter->bl_cmd_timer); + timer_delete(&adapter->bl_cmd_timer); return 0; } diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 3425a473b9a1..9db08200f4fa 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1754,7 +1754,7 @@ void rsi_roc_timeout(struct timer_list *t) ieee80211_remain_on_channel_expired(common->priv->hw); if (timer_pending(&common->roc_timer)) - del_timer(&common->roc_timer); + timer_delete(&common->roc_timer); rsi_resume_conn_channel(common); mutex_unlock(&common->mutex); @@ -1776,7 +1776,7 @@ static int rsi_mac80211_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (timer_pending(&common->roc_timer)) { rsi_dbg(INFO_ZONE, "Stop on-going ROC\n"); - del_timer(&common->roc_timer); + timer_delete(&common->roc_timer); } common->roc_timer.expires = msecs_to_jiffies(duration) + jiffies; add_timer(&common->roc_timer); @@ -1820,7 +1820,7 @@ static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw, return 0; } - del_timer(&common->roc_timer); + timer_delete(&common->roc_timer); rsi_resume_conn_channel(common); mutex_unlock(&common->mutex); diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c index a54a7b86864f..5d569eeb353f 100644 --- a/drivers/net/wireless/st/cw1200/main.c +++ b/drivers/net/wireless/st/cw1200/main.c @@ -458,7 +458,7 @@ static void cw1200_unregister_common(struct ieee80211_hw *dev) ieee80211_unregister_hw(dev); - del_timer_sync(&priv->mcast_timeout); + timer_delete_sync(&priv->mcast_timeout); cw1200_unregister_bh(priv); cw1200_debug_release(priv); diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index a20ab577a364..2002e3f9fe45 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c @@ -105,7 +105,7 @@ int cw1200_pm_init(struct cw1200_pm_state *pm, void cw1200_pm_deinit(struct cw1200_pm_state *pm) { - del_timer_sync(&pm->stay_awake); + timer_delete_sync(&pm->stay_awake); } void cw1200_pm_stay_awake(struct cw1200_pm_state *pm, diff --git a/drivers/net/wireless/st/cw1200/queue.c b/drivers/net/wireless/st/cw1200/queue.c index 259739e53fc1..4fd76183c368 100644 --- a/drivers/net/wireless/st/cw1200/queue.c +++ b/drivers/net/wireless/st/cw1200/queue.c @@ -244,7 +244,7 @@ void cw1200_queue_stats_deinit(struct cw1200_queue_stats *stats) void cw1200_queue_deinit(struct cw1200_queue *queue) { cw1200_queue_clear(queue); - del_timer_sync(&queue->gc); + timer_delete_sync(&queue->gc); INIT_LIST_HEAD(&queue->free_pool); kfree(queue->pool); kfree(queue->link_map_cache); diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index c259da8161e4..444272caf124 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -113,7 +113,7 @@ void cw1200_stop(struct ieee80211_hw *dev, bool suspend) cancel_work_sync(&priv->unjoin_work); cancel_delayed_work_sync(&priv->link_id_gc_work); flush_workqueue(priv->workqueue); - del_timer_sync(&priv->mcast_timeout); + timer_delete_sync(&priv->mcast_timeout); mutex_lock(&priv->conf_mutex); priv->mode = NL80211_IFTYPE_UNSPECIFIED; priv->listening = false; @@ -2102,7 +2102,7 @@ void cw1200_multicast_stop_work(struct work_struct *work) container_of(work, struct cw1200_common, multicast_stop_work); if (priv->aid0_bit_set) { - del_timer_sync(&priv->mcast_timeout); + timer_delete_sync(&priv->mcast_timeout); wsm_lock_tx(priv); priv->aid0_bit_set = false; cw1200_set_tim_impl(priv, false); @@ -2170,7 +2170,7 @@ void cw1200_suspend_resume(struct cw1200_common *priv, } spin_unlock_bh(&priv->ps_state_lock); if (cancel_tmo) - del_timer_sync(&priv->mcast_timeout); + timer_delete_sync(&priv->mcast_timeout); } else { spin_lock_bh(&priv->ps_state_lock); cw1200_ps_notify(priv, arg->link_id, arg->stop); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 8fb58a5d911c..ea9bc4717a85 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -117,7 +117,7 @@ int wl1271_recalc_rx_streaming(struct wl1271 *wl, struct wl12xx_vif *wlvif) else { ret = wl1271_set_rx_streaming(wl, wlvif, false); /* don't cancel_work_sync since we might deadlock */ - del_timer_sync(&wlvif->rx_streaming_timer); + timer_delete_sync(&wlvif->rx_streaming_timer); } out: return ret; @@ -2841,7 +2841,7 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl, unlock: mutex_unlock(&wl->mutex); - del_timer_sync(&wlvif->rx_streaming_timer); + timer_delete_sync(&wlvif->rx_streaming_timer); cancel_work_sync(&wlvif->rx_streaming_enable_work); cancel_work_sync(&wlvif->rx_streaming_disable_work); cancel_work_sync(&wlvif->rc_update_work); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 325fcb3d1075..a0a438881388 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -329,7 +329,7 @@ static void xenvif_down(struct xenvif *vif) if (queue->tx_irq != queue->rx_irq) disable_irq(queue->rx_irq); napi_disable(&queue->napi); - del_timer_sync(&queue->credit_timeout); + timer_delete_sync(&queue->credit_timeout); } } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 63fe51d0e64d..fc52d5c4c69b 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1819,7 +1819,7 @@ static void xennet_disconnect_backend(struct netfront_info *info) for (i = 0; i < num_queues && info->queues; ++i) { struct netfront_queue *queue = &info->queues[i]; - del_timer_sync(&queue->rx_refill_timer); + timer_delete_sync(&queue->rx_refill_timer); if (queue->tx_irq && (queue->tx_irq == queue->rx_irq)) unbind_from_irqhandler(queue->tx_irq, queue); diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index 93094418fd24..43ce0c9b2355 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -102,10 +102,10 @@ static void fw_dnld_over(struct nfcmrvl_private *priv, u32 error) atomic_set(&priv->ndev->cmd_cnt, 0); if (timer_pending(&priv->ndev->cmd_timer)) - del_timer_sync(&priv->ndev->cmd_timer); + timer_delete_sync(&priv->ndev->cmd_timer); if (timer_pending(&priv->fw_dnld.timer)) - del_timer_sync(&priv->fw_dnld.timer); + timer_delete_sync(&priv->fw_dnld.timer); nfc_info(priv->dev, "FW loading over (%d)]\n", error); @@ -464,7 +464,7 @@ void nfcmrvl_fw_dnld_recv_frame(struct nfcmrvl_private *priv, { /* Discard command timer */ if (timer_pending(&priv->ndev->cmd_timer)) - del_timer_sync(&priv->ndev->cmd_timer); + timer_delete_sync(&priv->ndev->cmd_timer); /* Allow next command */ atomic_set(&priv->ndev->cmd_cnt, 1); diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index e2bc67300a91..34c40d10e260 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -1515,7 +1515,7 @@ static int pn533_poll_complete(struct pn533 *dev, void *arg, cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; if (cur_mod->len == 0) { /* Target mode */ - del_timer(&dev->listen_timer); + timer_delete(&dev->listen_timer); rc = pn533_init_target_complete(dev, resp); goto done; } @@ -1749,7 +1749,7 @@ static void pn533_stop_poll(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); - del_timer(&dev->listen_timer); + timer_delete(&dev->listen_timer); if (!dev->poll_mod_count) { dev_dbg(dev->dev, diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index cfbbe0713317..580c9193e4a7 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c @@ -209,7 +209,7 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, struct pn532_uart_phy *dev = serdev_device_get_drvdata(serdev); size_t i; - del_timer(&dev->cmd_timeout); + timer_delete(&dev->cmd_timeout); for (i = 0; i < count; i++) { skb_put_u8(dev->recv_skb, *data++); if (!pn532_uart_rx_is_frame(dev->recv_skb)) diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index d2aa9f766738..8feac119a4bc 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -161,8 +161,8 @@ static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc) case PCB_SYNC_ACK: skb = skb_dequeue(&ndlc->ack_pending_q); kfree_skb(skb); - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); + timer_delete_sync(&ndlc->t1_timer); + timer_delete_sync(&ndlc->t2_timer); ndlc->t2_active = false; ndlc->t1_active = false; break; @@ -213,8 +213,8 @@ static void llt_ndlc_sm_work(struct work_struct *work) pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n"); ndlc->t2_active = false; ndlc->t1_active = false; - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); + timer_delete_sync(&ndlc->t1_timer); + timer_delete_sync(&ndlc->t2_timer); ndlc_close(ndlc); ndlc->hard_fault = -EREMOTEIO; } @@ -283,8 +283,8 @@ EXPORT_SYMBOL(ndlc_probe); void ndlc_remove(struct llt_ndlc *ndlc) { /* cancel timers */ - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); + timer_delete_sync(&ndlc->t1_timer); + timer_delete_sync(&ndlc->t2_timer); ndlc->t2_active = false; ndlc->t1_active = false; /* cancel work */ diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index b2f1ced8e6dd..8cfe5405bae6 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -257,7 +257,7 @@ static void st_nci_hci_admin_event_received(struct nci_dev *ndev, case ST_NCI_EVT_HOT_PLUG: if (info->se_info.se_active) { if (!ST_NCI_EVT_HOT_PLUG_IS_INHIBITED(skb)) { - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); info->se_info.se_active = false; complete(&info->se_info.req_completion); } else { @@ -282,7 +282,7 @@ static int st_nci_hci_apdu_reader_event_received(struct nci_dev *ndev, switch (event) { case ST_NCI_EVT_TRANSMIT_DATA: - del_timer_sync(&info->se_info.bwi_timer); + timer_delete_sync(&info->se_info.bwi_timer); info->se_info.bwi_active = false; info->se_info.cb(info->se_info.cb_context, skb->data, skb->len, 0); @@ -415,7 +415,7 @@ void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, if (ndev->hci_dev->count_pipes == ndev->hci_dev->expected_pipes) { - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); info->se_info.se_active = false; ndev->hci_dev->count_pipes = 0; complete(&info->se_info.req_completion); @@ -751,9 +751,9 @@ void st_nci_se_deinit(struct nci_dev *ndev) struct st_nci_info *info = nci_get_drvdata(ndev); if (info->se_info.bwi_active) - del_timer_sync(&info->se_info.bwi_timer); + timer_delete_sync(&info->se_info.bwi_timer); if (info->se_info.se_active) - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); info->se_info.se_active = false; info->se_info.bwi_active = false; diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index 161caf2675cf..bec6f607c32c 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -844,7 +844,7 @@ static void st21nfca_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, info->se_info.count_pipes++; if (info->se_info.count_pipes == info->se_info.expected_pipes) { - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); info->se_info.se_active = false; info->se_info.count_pipes = 0; complete(&info->se_info.req_completion); @@ -864,7 +864,7 @@ static int st21nfca_admin_event_received(struct nfc_hci_dev *hdev, u8 event, case ST21NFCA_EVT_HOT_PLUG: if (info->se_info.se_active) { if (!ST21NFCA_EVT_HOT_PLUG_IS_INHIBITED(skb)) { - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); info->se_info.se_active = false; complete(&info->se_info.req_completion); } else { diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index dae288bebcb5..9a50f3c03bd4 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -380,7 +380,7 @@ int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev, switch (event) { case ST21NFCA_EVT_TRANSMIT_DATA: - del_timer_sync(&info->se_info.bwi_timer); + timer_delete_sync(&info->se_info.bwi_timer); cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE, @@ -435,9 +435,9 @@ void st21nfca_se_deinit(struct nfc_hci_dev *hdev) struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); if (info->se_info.bwi_active) - del_timer_sync(&info->se_info.bwi_timer); + timer_delete_sync(&info->se_info.bwi_timer); if (info->se_info.se_active) - del_timer_sync(&info->se_info.se_active_timer); + timer_delete_sync(&info->se_info.se_active_timer); cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 6b12ca80aa27..89be5911b25d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -860,7 +860,7 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl) if (nr_change_groups) mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies); else - del_timer_sync(&ctrl->anatt_timer); + timer_delete_sync(&ctrl->anatt_timer); out_unlock: mutex_unlock(&ctrl->ana_lock); return error; @@ -900,7 +900,7 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl) { if (!nvme_ctrl_use_ana(ctrl)) return; - del_timer_sync(&ctrl->anatt_timer); + timer_delete_sync(&ctrl->anatt_timer); cancel_work_sync(&ctrl->ana_work); } diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index 4547ac44c8d4..474515d27e9c 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -73,7 +73,7 @@ int parport_wait_event (struct parport *port, signed long timeout) timer_setup(&port->timer, timeout_waiting_on_port, 0); mod_timer(&port->timer, jiffies + timeout); ret = down_interruptible (&port->physport->ieee1284.irq); - if (!del_timer_sync(&port->timer) && !ret) + if (!timer_delete_sync(&port->timer) && !ret) /* Timed out. */ ret = 1; diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index c01968ef0bd7..20529d1a3c44 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1794,7 +1794,7 @@ static void interrupt_event_handler(struct controller *ctrl) } else if (ctrl->event_queue[loop].event_type == INT_BUTTON_CANCEL) { dbg("button cancel\n"); - del_timer(&p_slot->task_event); + timer_delete(&p_slot->task_event); mutex_lock(&ctrl->crit_sect); diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index bfbec7c1a6b1..387b85585263 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -564,7 +564,7 @@ void shpchp_release_ctlr(struct controller *ctrl) shpc_writel(ctrl, SERR_INTR_ENABLE, serr_int); if (shpchp_poll_mode) - del_timer(&ctrl->poll_timer); + timer_delete(&ctrl->poll_timer); else { free_irq(ctrl->pci_dev->irq, ctrl); pci_disable_msi(ctrl->pci_dev); diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c index 86a357837a7b..1e464b951ed2 100644 --- a/drivers/pcmcia/i82365.c +++ b/drivers/pcmcia/i82365.c @@ -1324,7 +1324,7 @@ static void __exit exit_i82365(void) } platform_device_unregister(i82365_device); if (poll_interval != 0) - del_timer_sync(&poll_timer); + timer_delete_sync(&poll_timer); if (grab_irq != 0) free_irq(cs_irq, pcic_interrupt); for (i = 0; i < sockets; i++) { diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 1deb9960db34..d361124db993 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -766,7 +766,7 @@ EXPORT_SYMBOL(soc_pcmcia_init_one); void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt) { - del_timer_sync(&skt->poll_timer); + timer_delete_sync(&skt->poll_timer); pcmcia_unregister_socket(&skt->socket); @@ -865,7 +865,7 @@ int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt) return ret; out_err_8: - del_timer_sync(&skt->poll_timer); + timer_delete_sync(&skt->poll_timer); pcmcia_unregister_socket(&skt->socket); out_err_7: diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 5ef888688e23..060aed0edc65 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -509,7 +509,7 @@ static void __exit exit_tcic(void) { int i; - del_timer_sync(&poll_timer); + timer_delete_sync(&poll_timer); if (cs_irq != 0) { tcic_aux_setw(TCIC_AUX_SYSCFG, TCIC_SYSCFG_AUTOBUSY|0x0a00); free_irq(cs_irq, tcic_interrupt); diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 300cdaa75a17..aae99adb29eb 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -1320,7 +1320,7 @@ static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo) int i; fifo->is_ready = false; - del_timer_sync(&fifo->timer); + timer_delete_sync(&fifo->timer); mlxbf_tmfifo_disable_irqs(fifo); cancel_work_sync(&fifo->work); for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 79a7b68c7373..5d717b1c23cf 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -1108,7 +1108,7 @@ static int ips_monitor(void *data) last_sample_period = 1; } while (!kthread_should_stop()); - del_timer_sync(&ips->timer); + timer_delete_sync(&ips->timer); dev_dbg(ips->dev, "ips-monitor thread stopped\n"); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 3197aaa69da7..b52390fbd743 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -538,7 +538,7 @@ static void sony_laptop_remove_input(void) if (!atomic_dec_and_test(&sony_laptop_input.users)) return; - del_timer_sync(&sony_laptop_input.release_key_timer); + timer_delete_sync(&sony_laptop_input.release_key_timer); /* * Generate key-up events for remaining keys. Note that we don't diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 75c1bae30a7c..374ceefd6f2a 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -229,7 +229,7 @@ static void pps_gpio_remove(struct platform_device *pdev) struct pps_gpio_device_data *data = platform_get_drvdata(pdev); pps_unregister_source(data->pps); - del_timer_sync(&data->echo_timer); + timer_delete_sync(&data->echo_timer); /* reset echo pin in any case */ gpiod_set_value(data->echo_pin, 0); dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq); diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c index 2f465549b843..121bd29d863d 100644 --- a/drivers/pps/clients/pps-ktimer.c +++ b/drivers/pps/clients/pps-ktimer.c @@ -58,7 +58,7 @@ static void __exit pps_ktimer_exit(void) { dev_dbg(&pps->dev, "ktimer PPS source unregistered\n"); - del_timer_sync(&ktimer); + timer_delete_sync(&ktimer); pps_unregister_source(pps); } diff --git a/drivers/pps/generators/pps_gen-dummy.c b/drivers/pps/generators/pps_gen-dummy.c index 55de4aecf35e..547fa7fe29f4 100644 --- a/drivers/pps/generators/pps_gen-dummy.c +++ b/drivers/pps/generators/pps_gen-dummy.c @@ -52,7 +52,7 @@ static int pps_gen_dummy_enable(struct pps_gen_device *pps_gen, bool enable) if (enable) mod_timer(&ktimer, jiffies + get_random_delay()); else - del_timer_sync(&ktimer); + timer_delete_sync(&ktimer); return 0; } @@ -73,7 +73,7 @@ static const struct pps_gen_source_info pps_gen_dummy_info = { static void __exit pps_gen_dummy_exit(void) { - del_timer_sync(&ktimer); + timer_delete_sync(&ktimer); pps_gen_unregister_source(pps_gen); } diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index b25635c5c745..7945c6be1f7c 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -4499,7 +4499,7 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_detach_sysfs(bp); ptp_ocp_attr_group_del(bp); if (timer_pending(&bp->watchdog)) - del_timer_sync(&bp->watchdog); + timer_delete_sync(&bp->watchdog); if (bp->ts0) ptp_ocp_unregister_ext(bp->ts0); if (bp->ts1) diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c index c4a3ab53dcd4..0eeae5bcc3aa 100644 --- a/drivers/rtc/dev.c +++ b/drivers/rtc/dev.c @@ -90,7 +90,7 @@ static int clear_uie(struct rtc_device *rtc) rtc->stop_uie_polling = 1; if (rtc->uie_timer_active) { spin_unlock_irq(&rtc->irq_lock); - del_timer_sync(&rtc->uie_timer); + timer_delete_sync(&rtc->uie_timer); spin_lock_irq(&rtc->irq_lock); rtc->uie_timer_active = 0; } diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index a68b8c884102..a9f5b9466fb5 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -44,7 +44,7 @@ static int test_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) timeout = rtc_tm_to_time64(&alrm->time) - ktime_get_real_seconds(); timeout -= rtd->offset; - del_timer(&rtd->alarm); + timer_delete(&rtd->alarm); expires = jiffies + timeout * HZ; if (expires > U32_MAX) @@ -86,7 +86,7 @@ static int test_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) if (enable) add_timer(&rtd->alarm); else - del_timer(&rtd->alarm); + timer_delete(&rtd->alarm); return 0; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 31bfb49588c2..cf36d3bafeca 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1507,7 +1507,7 @@ static void dasd_device_timeout(struct timer_list *t) void dasd_device_set_timer(struct dasd_device *device, int expires) { if (expires == 0) - del_timer(&device->timer); + timer_delete(&device->timer); else mod_timer(&device->timer, jiffies + expires); } @@ -1518,7 +1518,7 @@ EXPORT_SYMBOL(dasd_device_set_timer); */ void dasd_device_clear_timer(struct dasd_device *device) { - del_timer(&device->timer); + timer_delete(&device->timer); } EXPORT_SYMBOL(dasd_device_clear_timer); @@ -2692,7 +2692,7 @@ static void dasd_block_timeout(struct timer_list *t) void dasd_block_set_timer(struct dasd_block *block, int expires) { if (expires == 0) - del_timer(&block->timer); + timer_delete(&block->timer); else mod_timer(&block->timer, jiffies + expires); } @@ -2703,7 +2703,7 @@ EXPORT_SYMBOL(dasd_block_set_timer); */ void dasd_block_clear_timer(struct dasd_block *block) { - del_timer(&block->timer); + timer_delete(&block->timer); } EXPORT_SYMBOL(dasd_block_clear_timer); diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 1a3190848670..34f3820d7f74 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -793,7 +793,7 @@ static void tty3270_deactivate(struct raw3270_view *view) { struct tty3270 *tp = container_of(view, struct tty3270, view); - del_timer(&tp->timer); + timer_delete(&tp->timer); } static void tty3270_irq(struct tty3270 *tp, struct raw3270_request *rq, struct irb *irb) @@ -1060,7 +1060,7 @@ static void tty3270_free(struct raw3270_view *view) { struct tty3270 *tp = container_of(view, struct tty3270, view); - del_timer_sync(&tp->timer); + timer_delete_sync(&tp->timer); tty3270_free_screen(tp->screen, tp->allocated_lines); free_page((unsigned long)tp->converted_line); kfree(tp->input); diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 45bd001206a2..840be75e75d4 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -261,7 +261,7 @@ __sclp_queue_read_req(void) static inline void __sclp_set_request_timer(unsigned long time, void (*cb)(struct timer_list *)) { - del_timer(&sclp_request_timer); + timer_delete(&sclp_request_timer); sclp_request_timer.function = cb; sclp_request_timer.expires = jiffies + time; add_timer(&sclp_request_timer); @@ -407,7 +407,7 @@ __sclp_start_request(struct sclp_req *req) if (sclp_running_state != sclp_running_state_idle) return 0; - del_timer(&sclp_request_timer); + timer_delete(&sclp_request_timer); rc = sclp_service_call_trace(req->command, req->sccb); req->start_count++; @@ -442,7 +442,7 @@ sclp_process_queue(void) spin_unlock_irqrestore(&sclp_lock, flags); return; } - del_timer(&sclp_request_timer); + timer_delete(&sclp_request_timer); while (!list_empty(&sclp_req_queue)) { req = list_entry(sclp_req_queue.next, struct sclp_req, list); rc = __sclp_start_request(req); @@ -662,7 +662,7 @@ static void sclp_interrupt_handler(struct ext_code ext_code, !ok_response(finished_sccb, active_cmd)); if (finished_sccb) { - del_timer(&sclp_request_timer); + timer_delete(&sclp_request_timer); sclp_running_state = sclp_running_state_reset_pending; req = __sclp_find_req(finished_sccb); if (req) { @@ -739,7 +739,7 @@ sclp_sync_wait(void) /* Loop until driver state indicates finished request */ while (sclp_running_state != sclp_running_state_idle) { /* Check for expired request timer */ - if (get_tod_clock_fast() > timeout && del_timer(&sclp_request_timer)) + if (get_tod_clock_fast() > timeout && timer_delete(&sclp_request_timer)) sclp_request_timer.function(&sclp_request_timer); cpu_relax(); } @@ -1165,7 +1165,7 @@ sclp_check_interface(void) * with IRQs enabled. */ irq_subclass_unregister(IRQ_SUBCLASS_SERVICE_SIGNAL); spin_lock_irqsave(&sclp_lock, flags); - del_timer(&sclp_request_timer); + timer_delete(&sclp_request_timer); rc = -EBUSY; if (sclp_init_req.status == SCLP_REQ_DONE) { if (sccb->header.response_code == 0x20) { diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c index 6a030ba38bf3..d8544c485808 100644 --- a/drivers/s390/char/sclp_con.c +++ b/drivers/s390/char/sclp_con.c @@ -109,7 +109,7 @@ static void sclp_console_sync_queue(void) unsigned long flags; spin_lock_irqsave(&sclp_con_lock, flags); - del_timer(&sclp_con_timer); + timer_delete(&sclp_con_timer); while (sclp_con_queue_running) { spin_unlock_irqrestore(&sclp_con_lock, flags); sclp_sync_wait(); diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 33b9c968dbcb..62979adcb381 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -231,7 +231,7 @@ sclp_vt220_emit_current(void) list_add_tail(&sclp_vt220_current_request->list, &sclp_vt220_outqueue); sclp_vt220_current_request = NULL; - del_timer(&sclp_vt220_timer); + timer_delete(&sclp_vt220_timer); } sclp_vt220_flush_later = 0; } @@ -798,7 +798,7 @@ sclp_vt220_notify(struct notifier_block *self, sclp_vt220_emit_current(); spin_lock_irqsave(&sclp_vt220_lock, flags); - del_timer(&sclp_vt220_timer); + timer_delete(&sclp_vt220_timer); while (sclp_vt220_queue_running) { spin_unlock_irqrestore(&sclp_vt220_lock, flags); sclp_sync_wait(); diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index ce8a440598a8..48e8417a5cff 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -1108,7 +1108,7 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) struct tape_request, list); if (req->status == TAPE_REQUEST_LONG_BUSY) { DBF_EVENT(3, "(%08x): del timer\n", device->cdev_id); - if (del_timer(&device->lb_timeout)) { + if (timer_delete(&device->lb_timeout)) { tape_put_device(device); __tape_start_next_request(device); } diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index f7e75d9fedf6..b76038632883 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -73,7 +73,7 @@ tape_std_assign(struct tape_device *device) rc = tape_do_io_interruptible(device, request); - del_timer_sync(&request->timer); + timer_delete_sync(&request->timer); if (rc != 0) { DBF_EVENT(3, "%08x: assign failed - device might be busy\n", diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 42791fa0b80e..e1b1fbdabb1b 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -115,7 +115,7 @@ void ccw_device_set_timeout(struct ccw_device *cdev, int expires) { if (expires == 0) - del_timer(&cdev->private->timer); + timer_delete(&cdev->private->timer); else mod_timer(&cdev->private->timer, jiffies + expires); } diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index 165de1552301..ac382355dc04 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -114,7 +114,7 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires) struct eadm_private *private = get_eadm_private(sch); if (expires == 0) - del_timer(&private->timer); + timer_delete(&private->timer); else mod_timer(&private->timer, jiffies + expires); } diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 9a0e6e4d8a5e..4088fda07197 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -1289,7 +1289,7 @@ void ap_queue_prepare_remove(struct ap_queue *aq) /* move queue device state to SHUTDOWN in progress */ aq->dev_state = AP_DEV_STATE_SHUTDOWN; spin_unlock_bh(&aq->lock); - del_timer_sync(&aq->timeout); + timer_delete_sync(&aq->timeout); } void ap_queue_remove(struct ap_queue *aq) diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index 8672d225ba77..5fcdce116862 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -158,7 +158,7 @@ fsm_deltimer(fsm_timer *this) printk(KERN_DEBUG "fsm(%s): Delete timer %p\n", this->fi->name, this); #endif - del_timer(&this->tl); + timer_delete(&this->tl); } int @@ -188,7 +188,7 @@ fsm_modtimer(fsm_timer *this, int millisec, int event, void *arg) this->fi->name, this, millisec); #endif - del_timer(&this->tl); + timer_delete(&this->tl); timer_setup(&this->tl, fsm_expire_timer, 0); this->expire_event = event; this->event_arg = arg; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 20328d695ef9..f5cfaebfb7c9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -7088,7 +7088,7 @@ int qeth_stop(struct net_device *dev) netif_tx_disable(dev); qeth_for_each_output_queue(card, queue, i) { - del_timer_sync(&queue->timer); + timer_delete_sync(&queue->timer); /* Queues may get re-allocated, so remove the NAPIs. */ netif_napi_del(&queue->napi); } diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 99d6b3f8692b..d5f5f563881e 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -458,7 +458,7 @@ static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req) return; } - del_timer_sync(&req->timer); + timer_delete_sync(&req->timer); zfcp_fsf_protstatus_eval(req); zfcp_fsf_fsfstatus_eval(req); req->handler(req); @@ -891,7 +891,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) req->qdio_req.qdio_outb_usage = atomic_read(&qdio->req_q_free); req->issued = get_tod_clock(); if (zfcp_qdio_send(qdio, &req->qdio_req)) { - del_timer_sync(&req->timer); + timer_delete_sync(&req->timer); /* lookup request again, list might have changed */ if (zfcp_reqlist_find_rm(adapter->req_list, req_id) == NULL) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 8cbc5e1711af..0957e3f8b46e 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -408,7 +408,7 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio) tasklet_disable(&qdio->irq_tasklet); tasklet_disable(&qdio->request_tasklet); - del_timer_sync(&qdio->request_timer); + timer_delete_sync(&qdio->request_timer); qdio_stop_irq(adapter->ccw_device); qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR); diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 3e3100dbfda3..f9372a81cd4e 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -6181,7 +6181,7 @@ ahd_shutdown(void *arg) /* * Stop periodic timer callbacks. */ - del_timer_sync(&ahd->stat_timer); + timer_delete_sync(&ahd->stat_timer); /* This will reset most registers to 0, but not all */ ahd_reset(ahd, /*reinit*/FALSE); @@ -6975,7 +6975,7 @@ static const char *termstat_strings[] = { static void ahd_timer_reset(struct timer_list *timer, int usec) { - del_timer(timer); + timer_delete(timer); timer->expires = jiffies + (usec * HZ)/1000000; add_timer(timer); } diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c index 9dda296c0152..e74393357025 100644 --- a/drivers/scsi/aic94xx/aic94xx_hwi.c +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -731,7 +731,7 @@ static void asd_dl_tasklet_handler(unsigned long data) goto next_1; } else if (ascb->scb->header.opcode == EMPTY_SCB) { goto out; - } else if (!ascb->uldd_timer && !del_timer(&ascb->timer)) { + } else if (!ascb->uldd_timer && !timer_delete(&ascb->timer)) { goto next_1; } spin_lock_irqsave(&seq->pend_q_lock, flags); diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 538a5867e8ab..adf3d9145606 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -851,7 +851,7 @@ static void asd_free_queues(struct asd_ha_struct *asd_ha) * times out. Apparently we don't wait for the CONTROL PHY * to complete, so it doesn't matter if we kill the timer. */ - del_timer_sync(&ascb->timer); + timer_delete_sync(&ascb->timer); WARN_ON(ascb->scb->header.opcode != CONTROL_PHY); list_del_init(pos); diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c b/drivers/scsi/aic94xx/aic94xx_tmf.c index 27d32b8c2987..d45dbf98f25e 100644 --- a/drivers/scsi/aic94xx/aic94xx_tmf.c +++ b/drivers/scsi/aic94xx/aic94xx_tmf.c @@ -31,7 +31,7 @@ static int asd_enqueue_internal(struct asd_ascb *ascb, res = asd_post_ascb_list(ascb->ha, ascb, 1); if (unlikely(res)) - del_timer(&ascb->timer); + timer_delete(&ascb->timer); return res; } @@ -58,7 +58,7 @@ static void asd_clear_nexus_tasklet_complete(struct asd_ascb *ascb, { struct tasklet_completion_status *tcs = ascb->uldd_task; ASD_DPRINTK("%s: here\n", __func__); - if (!del_timer(&ascb->timer)) { + if (!timer_delete(&ascb->timer)) { ASD_DPRINTK("%s: couldn't delete timer\n", __func__); return; } @@ -303,7 +303,7 @@ static void asd_tmf_tasklet_complete(struct asd_ascb *ascb, { struct tasklet_completion_status *tcs; - if (!del_timer(&ascb->timer)) + if (!timer_delete(&ascb->timer)) return; tcs = ascb->uldd_task; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 221a520e8a9b..b450b1fc6bbb 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -1161,8 +1161,8 @@ static int arcmsr_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; out_free_sysfs: if (set_date_time) - del_timer_sync(&acb->refresh_timer); - del_timer_sync(&acb->eternal_timer); + timer_delete_sync(&acb->refresh_timer); + timer_delete_sync(&acb->eternal_timer); flush_work(&acb->arcmsr_do_message_isr_bh); arcmsr_stop_adapter_bgrb(acb); arcmsr_flush_adapter_cache(acb); @@ -1204,9 +1204,9 @@ static int __maybe_unused arcmsr_suspend(struct device *dev) arcmsr_disable_outbound_ints(acb); arcmsr_free_irq(pdev, acb); - del_timer_sync(&acb->eternal_timer); + timer_delete_sync(&acb->eternal_timer); if (set_date_time) - del_timer_sync(&acb->refresh_timer); + timer_delete_sync(&acb->refresh_timer); flush_work(&acb->arcmsr_do_message_isr_bh); arcmsr_stop_adapter_bgrb(acb); arcmsr_flush_adapter_cache(acb); @@ -1685,9 +1685,9 @@ static void arcmsr_free_pcidev(struct AdapterControlBlock *acb) arcmsr_free_sysfs_attr(acb); scsi_remove_host(host); flush_work(&acb->arcmsr_do_message_isr_bh); - del_timer_sync(&acb->eternal_timer); + timer_delete_sync(&acb->eternal_timer); if (set_date_time) - del_timer_sync(&acb->refresh_timer); + timer_delete_sync(&acb->refresh_timer); pdev = acb->pdev; arcmsr_free_irq(pdev, acb); arcmsr_free_ccb_pool(acb); @@ -1718,9 +1718,9 @@ static void arcmsr_remove(struct pci_dev *pdev) arcmsr_free_sysfs_attr(acb); scsi_remove_host(host); flush_work(&acb->arcmsr_do_message_isr_bh); - del_timer_sync(&acb->eternal_timer); + timer_delete_sync(&acb->eternal_timer); if (set_date_time) - del_timer_sync(&acb->refresh_timer); + timer_delete_sync(&acb->refresh_timer); arcmsr_disable_outbound_ints(acb); arcmsr_stop_adapter_bgrb(acb); arcmsr_flush_adapter_cache(acb); @@ -1765,9 +1765,9 @@ static void arcmsr_shutdown(struct pci_dev *pdev) (struct AdapterControlBlock *)host->hostdata; if (acb->acb_flags & ACB_F_ADAPTER_REMOVED) return; - del_timer_sync(&acb->eternal_timer); + timer_delete_sync(&acb->eternal_timer); if (set_date_time) - del_timer_sync(&acb->refresh_timer); + timer_delete_sync(&acb->refresh_timer); arcmsr_disable_outbound_ints(acb); arcmsr_free_irq(pdev, acb); flush_work(&acb->arcmsr_do_message_isr_bh); diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 4ce0b2d73614..e0b55d869a35 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -2331,7 +2331,7 @@ static void fas216_eh_timer(struct timer_list *t) fas216_log(info, LOG_ERROR, "error handling timed out\n"); - del_timer(&info->eh_timer); + timer_delete(&info->eh_timer); if (info->rst_bus_status == 0) info->rst_bus_status = -1; @@ -2532,7 +2532,7 @@ int fas216_eh_device_reset(struct scsi_cmnd *SCpnt) */ wait_event(info->eh_wait, info->rst_dev_status); - del_timer_sync(&info->eh_timer); + timer_delete_sync(&info->eh_timer); spin_lock_irqsave(&info->host_lock, flags); info->rstSCpnt = NULL; @@ -2622,7 +2622,7 @@ int fas216_eh_bus_reset(struct scsi_cmnd *SCpnt) * Wait one second for the interrupt. */ wait_event(info->eh_wait, info->rst_bus_status); - del_timer_sync(&info->eh_timer); + timer_delete_sync(&info->eh_timer); fas216_log(info, LOG_ERROR, "bus reset complete: %s\n", info->rst_bus_status == 1 ? "success" : "failed"); diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index a8b399ed98fc..7d1b767d87fb 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5448,7 +5448,7 @@ static pci_ers_result_t beiscsi_eeh_err_detected(struct pci_dev *pdev, "BM_%d : EEH error detected\n"); /* first stop UE detection when PCI error detected */ - del_timer_sync(&phba->hw_check); + timer_delete_sync(&phba->hw_check); cancel_delayed_work_sync(&phba->recover_port); /* sessions are no longer valid, so first fail the sessions */ @@ -5746,7 +5746,7 @@ static void beiscsi_remove(struct pci_dev *pcidev) } /* first stop UE detection before unloading */ - del_timer_sync(&phba->hw_check); + timer_delete_sync(&phba->hw_check); cancel_delayed_work_sync(&phba->recover_port); cancel_work_sync(&phba->sess_work); diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index f015c53de0d4..598f2fc93ef2 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -327,7 +327,7 @@ bfad_sm_failed(struct bfad_s *bfad, enum bfad_sm_event event) case BFAD_E_EXIT_COMP: bfa_sm_set_state(bfad, bfad_sm_uninit); bfad_remove_intr(bfad); - del_timer_sync(&bfad->hal_tmo); + timer_delete_sync(&bfad->hal_tmo); break; default: @@ -376,7 +376,7 @@ bfad_sm_stopping(struct bfad_s *bfad, enum bfad_sm_event event) case BFAD_E_EXIT_COMP: bfa_sm_set_state(bfad, bfad_sm_uninit); bfad_remove_intr(bfad); - del_timer_sync(&bfad->hal_tmo); + timer_delete_sync(&bfad->hal_tmo); bfad_im_probe_undo(bfad); bfad->bfad_flags &= ~BFAD_FC4_PROBE_DONE; bfad_uncfg_pport(bfad); @@ -1421,7 +1421,7 @@ bfad_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) /* Suspend/fail all bfa operations */ bfa_ioc_suspend(&bfad->bfa.ioc); spin_unlock_irqrestore(&bfad->bfad_lock, flags); - del_timer_sync(&bfad->hal_tmo); + timer_delete_sync(&bfad->hal_tmo); ret = PCI_ERS_RESULT_CAN_RECOVER; break; case pci_channel_io_frozen: /* fatal error */ @@ -1435,7 +1435,7 @@ bfad_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) wait_for_completion(&bfad->comp); bfad_remove_intr(bfad); - del_timer_sync(&bfad->hal_tmo); + timer_delete_sync(&bfad->hal_tmo); pci_disable_device(pdev); ret = PCI_ERS_RESULT_NEED_RESET; break; @@ -1566,7 +1566,7 @@ bfad_pci_mmio_enabled(struct pci_dev *pdev) wait_for_completion(&bfad->comp); bfad_remove_intr(bfad); - del_timer_sync(&bfad->hal_tmo); + timer_delete_sync(&bfad->hal_tmo); pci_disable_device(pdev); return PCI_ERS_RESULT_NEED_RESET; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 5ac20c93637c..de6574cccf58 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -1599,7 +1599,7 @@ static void bnx2fc_interface_cleanup(struct bnx2fc_interface *interface) struct bnx2fc_hba *hba = interface->hba; /* Stop the transmit retry timer */ - del_timer_sync(&port->timer); + timer_delete_sync(&port->timer); /* Free existing transmit skbs */ fcoe_clean_pending_queue(lport); @@ -1938,7 +1938,7 @@ static void bnx2fc_fw_destroy(struct bnx2fc_hba *hba) if (signal_pending(current)) flush_signals(current); - del_timer_sync(&hba->destroy_timer); + timer_delete_sync(&hba->destroy_timer); } bnx2fc_unbind_adapter_devices(hba); } diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index eb3209103312..b8227cfef64f 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -74,7 +74,7 @@ static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) &tgt->flags))); if (signal_pending(current)) flush_signals(current); - del_timer_sync(&tgt->ofld_timer); + timer_delete_sync(&tgt->ofld_timer); } static void bnx2fc_offload_session(struct fcoe_port *port, @@ -283,7 +283,7 @@ static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) &tgt->flags))); if (signal_pending(current)) flush_signals(current); - del_timer_sync(&tgt->upld_timer); + timer_delete_sync(&tgt->upld_timer); } static void bnx2fc_upload_session(struct fcoe_port *port, diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 9971f32a663c..6c80e5b514fd 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1626,7 +1626,7 @@ static int bnx2i_conn_start(struct iscsi_cls_conn *cls_conn) if (signal_pending(current)) flush_signals(current); - del_timer_sync(&bnx2i_conn->ep->ofld_timer); + timer_delete_sync(&bnx2i_conn->ep->ofld_timer); iscsi_conn_start(cls_conn); return 0; @@ -1749,7 +1749,7 @@ static int bnx2i_tear_down_conn(struct bnx2i_hba *hba, if (signal_pending(current)) flush_signals(current); - del_timer_sync(&ep->ofld_timer); + timer_delete_sync(&ep->ofld_timer); bnx2i_ep_destroy_list_del(hba, ep); @@ -1861,7 +1861,7 @@ static struct iscsi_endpoint *bnx2i_ep_connect(struct Scsi_Host *shost, if (signal_pending(current)) flush_signals(current); - del_timer_sync(&bnx2i_ep->ofld_timer); + timer_delete_sync(&bnx2i_ep->ofld_timer); bnx2i_ep_ofld_list_del(hba, bnx2i_ep); @@ -2100,7 +2100,7 @@ int bnx2i_hw_ep_disconnect(struct bnx2i_endpoint *bnx2i_ep) if (signal_pending(current)) flush_signals(current); - del_timer_sync(&bnx2i_ep->ofld_timer); + timer_delete_sync(&bnx2i_ep->ofld_timer); destroy_conn: bnx2i_ep_active_list_del(hba, bnx2i_ep); diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index e43c5413ce29..beded091dff1 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -3701,7 +3701,7 @@ csio_mberr_worker(void *data) struct csio_mb *mbp_next; int rv; - del_timer_sync(&mbm->timer); + timer_delete_sync(&mbm->timer); spin_lock_irq(&hw->lock); if (list_empty(&mbm->cbfn_q)) { @@ -4210,7 +4210,7 @@ csio_mgmtm_init(struct csio_mgmtm *mgmtm, struct csio_hw *hw) static void csio_mgmtm_exit(struct csio_mgmtm *mgmtm) { - del_timer_sync(&mgmtm->mgmt_timer); + timer_delete_sync(&mgmtm->mgmt_timer); } diff --git a/drivers/scsi/csiostor/csio_mb.c b/drivers/scsi/csiostor/csio_mb.c index 94810b19e747..c7b4c464f6b8 100644 --- a/drivers/scsi/csiostor/csio_mb.c +++ b/drivers/scsi/csiostor/csio_mb.c @@ -1619,7 +1619,7 @@ csio_mb_cancel_all(struct csio_hw *hw, struct list_head *cbfn_q) mbp = mbm->mcurrent; /* Stop mailbox completion timer */ - del_timer_sync(&mbm->timer); + timer_delete_sync(&mbm->timer); /* Add completion to tail of cbfn queue */ list_add_tail(&mbp->list, cbfn_q); @@ -1682,7 +1682,7 @@ csio_mbm_init(struct csio_mbm *mbm, struct csio_hw *hw, void csio_mbm_exit(struct csio_mbm *mbm) { - del_timer_sync(&mbm->timer); + timer_delete_sync(&mbm->timer); CSIO_DB_ASSERT(mbm->mcurrent == NULL); CSIO_DB_ASSERT(list_empty(&mbm->req_q)); diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index ec6530240707..461d38e2fb19 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -495,7 +495,7 @@ static int do_act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) spin_lock_bh(&csk->lock); if (csk->retry_timer.function) { - del_timer(&csk->retry_timer); + timer_delete(&csk->retry_timer); csk->retry_timer.function = NULL; } diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index c07d2e3b4bcf..aaba294ecb58 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -930,7 +930,7 @@ static void do_act_establish(struct cxgbi_device *cdev, struct sk_buff *skb) csk, csk->state, csk->flags, csk->tid); if (csk->retry_timer.function) { - del_timer(&csk->retry_timer); + timer_delete(&csk->retry_timer); csk->retry_timer.function = NULL; } diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index e71de2419758..8dc6be9a00c1 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -765,7 +765,7 @@ static void waiting_process_next(struct AdapterCtlBlk *acb) return; if (timer_pending(&acb->waiting_timer)) - del_timer(&acb->waiting_timer); + timer_delete(&acb->waiting_timer); if (list_empty(dcb_list_head)) return; @@ -1153,7 +1153,7 @@ static int __dc395x_eh_bus_reset(struct scsi_cmnd *cmd) cmd, cmd->device->id, (u8)cmd->device->lun, cmd); if (timer_pending(&acb->waiting_timer)) - del_timer(&acb->waiting_timer); + timer_delete(&acb->waiting_timer); /* * disable interrupt @@ -1561,7 +1561,7 @@ static void dc395x_handle_interrupt(struct AdapterCtlBlk *acb, /*dprintkl(KERN_DEBUG, "handle_interrupt: intstatus = 0x%02x ", scsi_intstatus); */ if (timer_pending(&acb->selto_timer)) - del_timer(&acb->selto_timer); + timer_delete(&acb->selto_timer); if (scsi_intstatus & (INT_SELTIMEOUT | INT_DISCONNECT)) { disconnect(acb); /* bus free interrupt */ @@ -3454,7 +3454,7 @@ static void scsi_reset_detect(struct AdapterCtlBlk *acb) dprintkl(KERN_INFO, "scsi_reset_detect: acb=%p\n", acb); /* delay half a second */ if (timer_pending(&acb->waiting_timer)) - del_timer(&acb->waiting_timer); + timer_delete(&acb->waiting_timer); DC395x_write8(acb, TRM_S1040_SCSI_CONTROL, DO_RSTMODULE); DC395x_write8(acb, TRM_S1040_DMA_CONTROL, DMARESETMODULE); @@ -4415,9 +4415,9 @@ static void adapter_uninit(struct AdapterCtlBlk *acb) /* remove timers */ if (timer_pending(&acb->waiting_timer)) - del_timer(&acb->waiting_timer); + timer_delete(&acb->waiting_timer); if (timer_pending(&acb->selto_timer)) - del_timer(&acb->selto_timer); + timer_delete(&acb->selto_timer); adapter_uninit_chip(acb); adapter_remove_and_free_all_devices(acb); diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c index 59f277593785..1bd42f7db177 100644 --- a/drivers/scsi/elx/efct/efct_driver.c +++ b/drivers/scsi/elx/efct/efct_driver.c @@ -310,7 +310,7 @@ efct_fw_reset(struct efct *efct) * during attach. */ if (timer_pending(&efct->xport->stats_timer)) - del_timer(&efct->xport->stats_timer); + timer_delete(&efct->xport->stats_timer); if (efct_hw_reset(&efct->hw, EFCT_HW_RESET_FIRMWARE)) { efc_log_info(efct, "failed to reset firmware\n"); diff --git a/drivers/scsi/elx/efct/efct_xport.c b/drivers/scsi/elx/efct/efct_xport.c index cf4dced20b8b..2aca60f6428e 100644 --- a/drivers/scsi/elx/efct/efct_xport.c +++ b/drivers/scsi/elx/efct/efct_xport.c @@ -508,7 +508,7 @@ efct_xport_detach(struct efct_xport *xport) /*Shutdown FC Statistics timer*/ if (timer_pending(&xport->stats_timer)) - del_timer(&xport->stats_timer); + timer_delete(&xport->stats_timer); efct_hw_teardown(&efct->hw); diff --git a/drivers/scsi/elx/libefc/efc_fabric.c b/drivers/scsi/elx/libefc/efc_fabric.c index 9661eea93aa1..cf7e738c4edc 100644 --- a/drivers/scsi/elx/libefc/efc_fabric.c +++ b/drivers/scsi/elx/libefc/efc_fabric.c @@ -888,7 +888,7 @@ gidpt_delay_timer_cb(struct timer_list *t) { struct efc_node *node = from_timer(node, t, gidpt_delay_timer); - del_timer(&node->gidpt_delay_timer); + timer_delete(&node->gidpt_delay_timer); efc_node_post_event(node, EFC_EVT_GIDPT_DELAY_EXPIRED, NULL); } diff --git a/drivers/scsi/elx/libefc/efc_node.c b/drivers/scsi/elx/libefc/efc_node.c index a1b4ce6a27b4..f17e052fe537 100644 --- a/drivers/scsi/elx/libefc/efc_node.c +++ b/drivers/scsi/elx/libefc/efc_node.c @@ -149,7 +149,7 @@ efc_node_free(struct efc_node *node) /* if the gidpt_delay_timer is still running, then delete it */ if (timer_pending(&node->gidpt_delay_timer)) - del_timer(&node->gidpt_delay_timer); + timer_delete(&node->gidpt_delay_timer); xa_erase(&nport->lookup, node->rnode.fc_id); diff --git a/drivers/scsi/esas2r/esas2r_init.c b/drivers/scsi/esas2r/esas2r_init.c index 0cea5f3d1a08..04a07fe57be2 100644 --- a/drivers/scsi/esas2r/esas2r_init.c +++ b/drivers/scsi/esas2r/esas2r_init.c @@ -439,7 +439,7 @@ static void esas2r_adapter_power_down(struct esas2r_adapter *a, if ((test_bit(AF2_INIT_DONE, &a->flags2)) && (!test_bit(AF_DEGRADED_MODE, &a->flags))) { if (!power_management) { - del_timer_sync(&a->timer); + timer_delete_sync(&a->timer); tasklet_kill(&a->tasklet); } esas2r_power_down(a); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 038e38578676..b911fdb387f3 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1013,7 +1013,7 @@ static void fcoe_if_destroy(struct fc_lport *lport) fc_lport_destroy(lport); /* Stop the transmit retry timer */ - del_timer_sync(&port->timer); + timer_delete_sync(&port->timer); /* Free existing transmit skbs */ fcoe_clean_pending_queue(lport); diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 5c8d1ba3f8f3..56d270526c9c 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -302,7 +302,7 @@ void fcoe_ctlr_destroy(struct fcoe_ctlr *fip) fcoe_ctlr_set_state(fip, FIP_ST_DISABLED); fcoe_ctlr_reset_fcfs(fip); mutex_unlock(&fip->ctlr_mutex); - del_timer_sync(&fip->timer); + timer_delete_sync(&fip->timer); cancel_work_sync(&fip->timer_work); } EXPORT_SYMBOL(fcoe_ctlr_destroy); @@ -478,7 +478,7 @@ EXPORT_SYMBOL(fcoe_ctlr_link_up); static void fcoe_ctlr_reset(struct fcoe_ctlr *fip) { fcoe_ctlr_reset_fcfs(fip); - del_timer(&fip->timer); + timer_delete(&fip->timer); fip->ctlr_ka_time = 0; fip->port_ka_time = 0; fip->sol_time = 0; diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c index 4c6bbf417a9a..c2b6f4eb338e 100644 --- a/drivers/scsi/fnic/fdls_disc.c +++ b/drivers/scsi/fnic/fdls_disc.c @@ -394,7 +394,7 @@ void fnic_del_fabric_timer_sync(struct fnic *fnic) { fnic->iport.fabric.del_timer_inprogress = 1; spin_unlock_irqrestore(&fnic->fnic_lock, fnic->lock_flags); - del_timer_sync(&fnic->iport.fabric.retry_timer); + timer_delete_sync(&fnic->iport.fabric.retry_timer); spin_lock_irqsave(&fnic->fnic_lock, fnic->lock_flags); fnic->iport.fabric.del_timer_inprogress = 0; } @@ -404,7 +404,7 @@ void fnic_del_tport_timer_sync(struct fnic *fnic, { tport->del_timer_inprogress = 1; spin_unlock_irqrestore(&fnic->fnic_lock, fnic->lock_flags); - del_timer_sync(&tport->retry_timer); + timer_delete_sync(&tport->retry_timer); spin_lock_irqsave(&fnic->fnic_lock, fnic->lock_flags); tport->del_timer_inprogress = 0; } @@ -3617,7 +3617,7 @@ static void fdls_process_fdmi_plogi_rsp(struct fnic_iport_s *iport, fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi); if (ntoh24(fchdr->fh_s_id) == FC_FID_MGMT_SERV) { - del_timer_sync(&iport->fabric.fdmi_timer); + timer_delete_sync(&iport->fabric.fdmi_timer); iport->fabric.fdmi_pending = 0; switch (plogi_rsp->els.fl_cmd) { case ELS_LS_ACC: @@ -3686,7 +3686,7 @@ static void fdls_process_fdmi_reg_ack(struct fnic_iport_s *iport, iport->fcid); if (!iport->fabric.fdmi_pending) { - del_timer_sync(&iport->fabric.fdmi_timer); + timer_delete_sync(&iport->fabric.fdmi_timer); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, "iport fcid: 0x%x: Canceling FDMI timer\n", iport->fcid); @@ -3728,7 +3728,7 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, break; } - del_timer_sync(&iport->fabric.fdmi_timer); + timer_delete_sync(&iport->fabric.fdmi_timer); iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; fdls_send_fdmi_plogi(iport); @@ -4971,7 +4971,7 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport) } if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) { - del_timer_sync(&iport->fabric.fdmi_timer); + timer_delete_sync(&iport->fabric.fdmi_timer); iport->fabric.fdmi_pending = 0; } diff --git a/drivers/scsi/fnic/fip.c b/drivers/scsi/fnic/fip.c index 7bb85949033f..6e7c0b00eb41 100644 --- a/drivers/scsi/fnic/fip.c +++ b/drivers/scsi/fnic/fip.c @@ -319,7 +319,7 @@ void fnic_fcoe_fip_discovery_resp(struct fnic *fnic, struct fip_header *fiph) round_jiffies(fcs_ka_tov)); } else { if (timer_pending(&fnic->fcs_ka_timer)) - del_timer_sync(&fnic->fcs_ka_timer); + timer_delete_sync(&fnic->fcs_ka_timer); } if (fka_has_changed) { @@ -497,7 +497,7 @@ void fnic_fcoe_process_flogi_resp(struct fnic *fnic, struct fip_header *fiph) oxid = FNIC_STD_GET_OX_ID(fchdr); fdls_free_oxid(iport, oxid, &iport->active_oxid_fabric_req); - del_timer_sync(&fnic->retry_fip_timer); + timer_delete_sync(&fnic->retry_fip_timer); if ((be16_to_cpu(flogi_rsp->fip.fip_dl_len) == FIP_FLOGI_LEN) && (flogi_rsp->rsp_desc.flogi.els.fl_cmd == ELS_LS_ACC)) { @@ -580,10 +580,10 @@ void fnic_common_fip_cleanup(struct fnic *fnic) iport->fip.state = FDLS_FIP_INIT; - del_timer_sync(&fnic->retry_fip_timer); - del_timer_sync(&fnic->fcs_ka_timer); - del_timer_sync(&fnic->enode_ka_timer); - del_timer_sync(&fnic->vn_ka_timer); + timer_delete_sync(&fnic->retry_fip_timer); + timer_delete_sync(&fnic->fcs_ka_timer); + timer_delete_sync(&fnic->enode_ka_timer); + timer_delete_sync(&fnic->vn_ka_timer); if (!is_zero_ether_addr(iport->fpma)) vnic_dev_del_addr(fnic->vdev, iport->fpma); diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 3dd06376e97b..9a357ff42085 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -1149,20 +1149,20 @@ static void fnic_remove(struct pci_dev *pdev) fnic_scsi_unload(fnic); if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI) - del_timer_sync(&fnic->notify_timer); + timer_delete_sync(&fnic->notify_timer); if (fnic->config.flags & VFCF_FIP_CAPABLE) { - del_timer_sync(&fnic->retry_fip_timer); - del_timer_sync(&fnic->fcs_ka_timer); - del_timer_sync(&fnic->enode_ka_timer); - del_timer_sync(&fnic->vn_ka_timer); + timer_delete_sync(&fnic->retry_fip_timer); + timer_delete_sync(&fnic->fcs_ka_timer); + timer_delete_sync(&fnic->enode_ka_timer); + timer_delete_sync(&fnic->vn_ka_timer); fnic_free_txq(&fnic->fip_frame_queue); fnic_fcoe_reset_vlans(fnic); } if ((fnic_fdmi_support == 1) && (fnic->iport.fabric.fdmi_pending > 0)) - del_timer_sync(&fnic->iport.fabric.fdmi_timer); + timer_delete_sync(&fnic->iport.fabric.fdmi_timer); fnic_stats_debugfs_remove(fnic); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 3596414d970b..5cb1d3db4907 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1548,7 +1548,7 @@ void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba) * which is also only used for v1/v2 hw to skip it for v3 hw */ if (hisi_hba->hw->sht) - del_timer_sync(&hisi_hba->timer); + timer_delete_sync(&hisi_hba->timer); set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags); } @@ -2363,7 +2363,7 @@ void hisi_sas_free(struct hisi_hba *hisi_hba) for (i = 0; i < hisi_hba->n_phy; i++) { struct hisi_sas_phy *phy = &hisi_hba->phy[i]; - del_timer_sync(&phy->timer); + timer_delete_sync(&phy->timer); } if (hisi_hba->wq) @@ -2625,7 +2625,7 @@ void hisi_sas_remove(struct platform_device *pdev) struct hisi_hba *hisi_hba = sha->lldd_ha; struct Scsi_Host *shost = sha->shost; - del_timer_sync(&hisi_hba->timer); + timer_delete_sync(&hisi_hba->timer); sas_unregister_ha(sha); sas_remove_host(shost); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 04ee02797ca3..a1fc400ab4c3 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2372,18 +2372,18 @@ static void slot_complete_v2_hw(struct hisi_hba *hisi_hba, case STAT_IO_COMPLETE: /* internal abort command complete */ ts->stat = TMF_RESP_FUNC_SUCC; - del_timer_sync(&slot->internal_abort_timer); + timer_delete_sync(&slot->internal_abort_timer); goto out; case STAT_IO_NO_DEVICE: ts->stat = TMF_RESP_FUNC_COMPLETE; - del_timer_sync(&slot->internal_abort_timer); + timer_delete_sync(&slot->internal_abort_timer); goto out; case STAT_IO_NOT_VALID: /* abort single io, controller don't find * the io need to abort */ ts->stat = TMF_RESP_FUNC_FAILED; - del_timer_sync(&slot->internal_abort_timer); + timer_delete_sync(&slot->internal_abort_timer); goto out; default: break; @@ -2654,7 +2654,7 @@ static int phy_up_v2_hw(int phy_no, struct hisi_hba *hisi_hba) if (is_sata_phy_v2_hw(hisi_hba, phy_no)) goto end; - del_timer(&phy->timer); + timer_delete(&phy->timer); if (phy_no == 8) { u32 port_state = hisi_sas_read32(hisi_hba, PORT_STATE); @@ -2730,7 +2730,7 @@ static int phy_down_v2_hw(int phy_no, struct hisi_hba *hisi_hba) struct hisi_sas_port *port = phy->port; struct device *dev = hisi_hba->dev; - del_timer(&phy->timer); + timer_delete(&phy->timer); hisi_sas_phy_write32(hisi_hba, phy_no, PHYCTRL_NOT_RDY_MSK, 1); phy_state = hisi_sas_read32(hisi_hba, PHY_STATE); @@ -2744,7 +2744,7 @@ static int phy_down_v2_hw(int phy_no, struct hisi_hba *hisi_hba) if (port && !get_wideport_bitmap_v2_hw(hisi_hba, port->id)) if (!check_any_wideports_v2_hw(hisi_hba) && timer_pending(&hisi_hba->timer)) - del_timer(&hisi_hba->timer); + timer_delete(&hisi_hba->timer); txid_auto = hisi_sas_phy_read32(hisi_hba, phy_no, TXID_AUTO); hisi_sas_phy_write32(hisi_hba, phy_no, TXID_AUTO, @@ -3204,7 +3204,7 @@ static irqreturn_t sata_int_v2_hw(int irq_no, void *p) u8 attached_sas_addr[SAS_ADDR_SIZE] = {0}; int phy_no, offset; - del_timer(&phy->timer); + timer_delete(&phy->timer); phy_no = sas_phy->id; initial_fis = &hisi_hba->initial_fis[phy_no]; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 095bbf80c34e..2684d6482067 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1609,7 +1609,7 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) phy->port_id = port_id; spin_lock(&phy->lock); /* Delete timer and set phy_attached atomically */ - del_timer(&phy->timer); + timer_delete(&phy->timer); phy->phy_attached = 1; spin_unlock(&phy->lock); @@ -1643,7 +1643,7 @@ static irqreturn_t phy_down_v3_hw(int phy_no, struct hisi_hba *hisi_hba) atomic_inc(&phy->down_cnt); - del_timer(&phy->timer); + timer_delete(&phy->timer); hisi_sas_phy_write32(hisi_hba, phy_no, PHYCTRL_NOT_RDY_MSK, 1); phy_state = hisi_sas_read32(hisi_hba, PHY_STATE); diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 773ec2f31bc4..4c493b06062a 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1110,7 +1110,7 @@ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) } else evt->xfer_iu->mad_common.status = cpu_to_be16(IBMVFC_MAD_DRIVER_FAILED); - del_timer(&evt->timer); + timer_delete(&evt->timer); } /** @@ -1754,7 +1754,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, atomic_set(&evt->active, 0); list_del(&evt->queue_list); spin_unlock_irqrestore(&evt->queue->l_lock, flags); - del_timer(&evt->timer); + timer_delete(&evt->timer); /* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY. * Firmware will send a CRQ with a transport event (0xFF) to @@ -3832,7 +3832,7 @@ static void ibmvfc_tasklet(void *data) spin_unlock_irqrestore(vhost->host->host_lock, flags); list_for_each_entry_safe(evt, temp, &evt_doneq, queue_list) { - del_timer(&evt->timer); + timer_delete(&evt->timer); list_del(&evt->queue_list); ibmvfc_trc_end(evt); evt->done(evt); @@ -3938,7 +3938,7 @@ static void ibmvfc_drain_sub_crq(struct ibmvfc_queue *scrq) spin_unlock_irqrestore(scrq->q_lock, flags); list_for_each_entry_safe(evt, temp, &evt_doneq, queue_list) { - del_timer(&evt->timer); + timer_delete(&evt->timer); list_del(&evt->queue_list); ibmvfc_trc_end(evt); evt->done(evt); @@ -4542,7 +4542,7 @@ static void ibmvfc_tgt_adisc_done(struct ibmvfc_event *evt) vhost->discovery_threads--; ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); - del_timer(&tgt->timer); + timer_delete(&tgt->timer); switch (status) { case IBMVFC_MAD_SUCCESS: @@ -4741,7 +4741,7 @@ static void ibmvfc_tgt_adisc(struct ibmvfc_target *tgt) ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT); if (ibmvfc_send_event(evt, vhost, IBMVFC_ADISC_PLUS_CANCEL_TIMEOUT)) { vhost->discovery_threads--; - del_timer(&tgt->timer); + timer_delete(&tgt->timer); ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); kref_put(&tgt->kref, ibmvfc_release_tgt); } else @@ -5519,7 +5519,7 @@ static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt) ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DELETED_RPORT); spin_unlock_irqrestore(vhost->host->host_lock, flags); fc_remote_port_delete(rport); - del_timer_sync(&tgt->timer); + timer_delete_sync(&tgt->timer); kref_put(&tgt->kref, ibmvfc_release_tgt); return; } else if (rport && tgt->action == IBMVFC_TGT_ACTION_DEL_AND_LOGOUT_RPORT) { @@ -5672,7 +5672,7 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost) spin_unlock_irqrestore(vhost->host->host_lock, flags); if (rport) fc_remote_port_delete(rport); - del_timer_sync(&tgt->timer); + timer_delete_sync(&tgt->timer); kref_put(&tgt->kref, ibmvfc_release_tgt); return; } else if (tgt->action == IBMVFC_TGT_ACTION_DEL_AND_LOGOUT_RPORT) { diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 16a1aac11911..d65a45860b33 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -789,7 +789,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) while (!list_empty(&hostdata->sent)) { evt = list_first_entry(&hostdata->sent, struct srp_event_struct, list); list_del(&evt->list); - del_timer(&evt->timer); + timer_delete(&evt->timer); spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (evt->cmnd) { @@ -944,7 +944,7 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, be64_to_cpu(crq_as_u64[1])); if (rc != 0) { list_del(&evt_struct->list); - del_timer(&evt_struct->timer); + timer_delete(&evt_struct->timer); /* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY. * Firmware will send a CRQ with a transport event (0xFF) to @@ -1840,7 +1840,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, atomic_add(be32_to_cpu(evt_struct->xfer_iu->srp.rsp.req_lim_delta), &hostdata->request_limit); - del_timer(&evt_struct->timer); + timer_delete(&evt_struct->timer); if ((crq->status != VIOSRP_OK && crq->status != VIOSRP_OK2) && evt_struct->cmnd) evt_struct->cmnd->result = DID_ERROR << 16; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 3bfafd43e42a..d89135fb8faa 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -873,7 +873,7 @@ static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); + timer_delete(&ipr_cmd->timer); ipr_cmd->done(ipr_cmd); } spin_unlock(&hrrq->_lock); @@ -5347,7 +5347,7 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_reg); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg; list_del(&ioa_cfg->reset_cmd->queue); - del_timer(&ioa_cfg->reset_cmd->timer); + timer_delete(&ioa_cfg->reset_cmd->timer); ipr_reset_ioa_job(ioa_cfg->reset_cmd); return IRQ_HANDLED; } @@ -5362,7 +5362,7 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); list_del(&ioa_cfg->reset_cmd->queue); - del_timer(&ioa_cfg->reset_cmd->timer); + timer_delete(&ioa_cfg->reset_cmd->timer); ipr_reset_ioa_job(ioa_cfg->reset_cmd); } else if ((int_reg & IPR_PCII_HRRQ_UPDATED) == int_reg) { if (ioa_cfg->clear_isr) { @@ -5481,7 +5481,7 @@ static int ipr_iopoll(struct irq_poll *iop, int budget) list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); - del_timer(&ipr_cmd->timer); + timer_delete(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } @@ -5550,7 +5550,7 @@ static irqreturn_t ipr_isr(int irq, void *devp) spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); - del_timer(&ipr_cmd->timer); + timer_delete(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } return rc; @@ -5600,7 +5600,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); - del_timer(&ipr_cmd->timer); + timer_delete(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } return rc; diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 35589b6af90d..c108b5b940c3 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1271,22 +1271,22 @@ void isci_host_deinit(struct isci_host *ihost) /* Cancel any/all outstanding port timers */ for (i = 0; i < ihost->logical_port_entries; i++) { struct isci_port *iport = &ihost->ports[i]; - del_timer_sync(&iport->timer.timer); + timer_delete_sync(&iport->timer.timer); } /* Cancel any/all outstanding phy timers */ for (i = 0; i < SCI_MAX_PHYS; i++) { struct isci_phy *iphy = &ihost->phys[i]; - del_timer_sync(&iphy->sata_timer.timer); + timer_delete_sync(&iphy->sata_timer.timer); } - del_timer_sync(&ihost->port_agent.timer.timer); + timer_delete_sync(&ihost->port_agent.timer.timer); - del_timer_sync(&ihost->power_control.timer.timer); + timer_delete_sync(&ihost->power_control.timer.timer); - del_timer_sync(&ihost->timer.timer); + timer_delete_sync(&ihost->timer.timer); - del_timer_sync(&ihost->phy_timer.timer); + timer_delete_sync(&ihost->phy_timer.timer); } static void __iomem *scu_base(struct isci_host *isci_host) diff --git a/drivers/scsi/isci/isci.h b/drivers/scsi/isci/isci.h index f6a8fe206415..d827e49c1d55 100644 --- a/drivers/scsi/isci/isci.h +++ b/drivers/scsi/isci/isci.h @@ -481,9 +481,9 @@ irqreturn_t isci_error_isr(int vec, void *data); /* * Each timer is associated with a cancellation flag that is set when - * del_timer() is called and checked in the timer callback function. This - * is needed since del_timer_sync() cannot be called with sci_lock held. - * For deinit however, del_timer_sync() is used without holding the lock. + * timer_delete() is called and checked in the timer callback function. This + * is needed since timer_delete_sync() cannot be called with sci_lock held. + * For deinit however, timer_delete_sync() is used without holding the lock. */ struct sci_timer { struct timer_list timer; @@ -506,7 +506,7 @@ static inline void sci_mod_timer(struct sci_timer *tmr, unsigned long msec) static inline void sci_del_timer(struct sci_timer *tmr) { tmr->cancel = true; - del_timer(&tmr->timer); + timer_delete(&tmr->timer); } struct sci_base_state_machine { diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index fd1ef06655cb..e705c30b4e1b 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1329,7 +1329,7 @@ static int fc_lun_reset(struct fc_lport *lport, struct fc_fcp_pkt *fsp, fsp->state |= FC_SRB_COMPL; spin_unlock_bh(&fsp->scsi_pkt_lock); - del_timer_sync(&fsp->timer); + timer_delete_sync(&fsp->timer); spin_lock_bh(&fsp->scsi_pkt_lock); if (fsp->seq_ptr) { @@ -1961,7 +1961,7 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp) fsp->state |= FC_SRB_COMPL; if (!(fsp->state & FC_SRB_FCP_PROCESSING_TMO)) { spin_unlock_bh(&fsp->scsi_pkt_lock); - del_timer_sync(&fsp->timer); + timer_delete_sync(&fsp->timer); spin_lock_bh(&fsp->scsi_pkt_lock); } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 2b1bf990a9dc..1ddaf7228340 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1945,7 +1945,7 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn, session->tmf_state != TMF_QUEUED); if (signal_pending(current)) flush_signals(current); - del_timer_sync(&session->tmf_timer); + timer_delete_sync(&session->tmf_timer); mutex_lock(&session->eh_mutex); spin_lock_bh(&session->frwd_lock); @@ -3247,7 +3247,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) iscsi_remove_conn(cls_conn); - del_timer_sync(&conn->transport_timer); + timer_delete_sync(&conn->transport_timer); mutex_lock(&session->eh_mutex); spin_lock_bh(&session->frwd_lock); @@ -3411,7 +3411,7 @@ void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) conn->stop_stage = flag; spin_unlock_bh(&session->frwd_lock); - del_timer_sync(&conn->transport_timer); + timer_delete_sync(&conn->transport_timer); iscsi_suspend_tx(conn); spin_lock_bh(&session->frwd_lock); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 2b8004eb6f1b..869b5d4db44c 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -89,7 +89,7 @@ static int smp_execute_task_sg(struct domain_device *dev, res = i->dft->lldd_execute_task(task, GFP_KERNEL); if (res) { - del_timer_sync(&task->slow_task->timer); + timer_delete_sync(&task->slow_task->timer); pr_notice("executing SMP task failed:%d\n", res); break; } diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 55ce7892f217..feb2461b90e8 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -859,7 +859,7 @@ EXPORT_SYMBOL_GPL(sas_bios_param); void sas_task_internal_done(struct sas_task *task) { - del_timer(&task->slow_task->timer); + timer_delete(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -911,7 +911,7 @@ static int sas_execute_internal_abort(struct domain_device *device, res = i->dft->lldd_execute_task(task, GFP_KERNEL); if (res) { - del_timer_sync(&task->slow_task->timer); + timer_delete_sync(&task->slow_task->timer); pr_err("Executing internal abort failed %016llx (%d)\n", SAS_ADDR(device->sas_addr), res); break; @@ -1010,7 +1010,7 @@ int sas_execute_tmf(struct domain_device *device, void *parameter, res = i->dft->lldd_execute_task(task, GFP_KERNEL); if (res) { - del_timer_sync(&task->slow_task->timer); + timer_delete_sync(&task->slow_task->timer); pr_err("executing TMF task failed %016llx (%d)\n", SAS_ADDR(device->sas_addr), res); break; @@ -1180,7 +1180,7 @@ void sas_task_abort(struct sas_task *task) if (!slow) return; - if (!del_timer(&slow->timer)) + if (!timer_delete(&slow->timer)) return; slow->timer.function(&slow->timer); return; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 0d0213bba35d..397216ff2c7e 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2578,7 +2578,7 @@ lpfc_poll_store(struct device *dev, struct device_attribute *attr, (old_val & DISABLE_FCP_RING_INT)) { spin_unlock_irq(&phba->hbalock); - del_timer(&phba->fcp_poll_timer); + timer_delete(&phba->fcp_poll_timer); spin_lock_irq(&phba->hbalock); if (lpfc_readl(phba->HCregaddr, &creg_val)) { spin_unlock_irq(&phba->hbalock); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index e08b48b1b655..375a879c31f1 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4333,7 +4333,7 @@ lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp) if (!test_and_clear_bit(NLP_DELAY_TMO, &nlp->nlp_flag)) return; - del_timer_sync(&nlp->nlp_delayfunc); + timer_delete_sync(&nlp->nlp_delayfunc); nlp->nlp_last_elscmd = 0; if (!list_empty(&nlp->els_retry_evt.evt_listp)) { list_del_init(&nlp->els_retry_evt.evt_listp); @@ -4431,7 +4431,7 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp) * firing and before processing the timer, cancel the * nlp_delayfunc. */ - del_timer_sync(&ndlp->nlp_delayfunc); + timer_delete_sync(&ndlp->nlp_delayfunc); retry = ndlp->nlp_retry; ndlp->nlp_retry = 0; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 8ca590e8469b..179be6c5a43e 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1228,7 +1228,7 @@ lpfc_linkdown_port(struct lpfc_vport *vport) /* Stop delayed Nport discovery */ clear_bit(FC_DISC_DELAYED, &vport->fc_flag); - del_timer_sync(&vport->delayed_disc_tmo); + timer_delete_sync(&vport->delayed_disc_tmo); if (phba->sli_rev == LPFC_SLI_REV4 && vport->port_type == LPFC_PHYSICAL_PORT && @@ -1418,7 +1418,7 @@ lpfc_linkup(struct lpfc_hba *phba) /* Unblock fabric iocbs if they are blocked */ clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); - del_timer_sync(&phba->fabric_block_timer); + timer_delete_sync(&phba->fabric_block_timer); vports = lpfc_create_vport_work_array(phba); if (vports != NULL) @@ -5010,7 +5010,7 @@ lpfc_can_disctmo(struct lpfc_vport *vport) if (test_bit(FC_DISC_TMO, &vport->fc_flag) || timer_pending(&vport->fc_disctmo)) { clear_bit(FC_DISC_TMO, &vport->fc_flag); - del_timer_sync(&vport->fc_disctmo); + timer_delete_sync(&vport->fc_disctmo); spin_lock_irqsave(&vport->work_port_lock, iflags); vport->work_port_events &= ~WORKER_DISC_TMO; spin_unlock_irqrestore(&vport->work_port_lock, iflags); @@ -5501,7 +5501,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) clear_bit(NLP_DELAY_TMO, &ndlp->nlp_flag); ndlp->nlp_last_elscmd = 0; - del_timer_sync(&ndlp->nlp_delayfunc); + timer_delete_sync(&ndlp->nlp_delayfunc); list_del_init(&ndlp->els_retry_evt.evt_listp); list_del_init(&ndlp->dev_loss_evt.evt_listp); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 7238608ca49f..90021653e59e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3120,8 +3120,8 @@ lpfc_cleanup(struct lpfc_vport *vport) void lpfc_stop_vport_timers(struct lpfc_vport *vport) { - del_timer_sync(&vport->els_tmofunc); - del_timer_sync(&vport->delayed_disc_tmo); + timer_delete_sync(&vport->els_tmofunc); + timer_delete_sync(&vport->delayed_disc_tmo); lpfc_can_disctmo(vport); return; } @@ -3140,7 +3140,7 @@ __lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *phba) phba->fcf.fcf_flag &= ~FCF_REDISC_PEND; /* Now, try to stop the timer */ - del_timer(&phba->fcf.redisc_wait); + timer_delete(&phba->fcf.redisc_wait); } /** @@ -3302,12 +3302,12 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba) lpfc_stop_vport_timers(phba->pport); cancel_delayed_work_sync(&phba->eq_delay_work); cancel_delayed_work_sync(&phba->idle_stat_delay_work); - del_timer_sync(&phba->sli.mbox_tmo); - del_timer_sync(&phba->fabric_block_timer); - del_timer_sync(&phba->eratt_poll); - del_timer_sync(&phba->hb_tmofunc); + timer_delete_sync(&phba->sli.mbox_tmo); + timer_delete_sync(&phba->fabric_block_timer); + timer_delete_sync(&phba->eratt_poll); + timer_delete_sync(&phba->hb_tmofunc); if (phba->sli_rev == LPFC_SLI_REV4) { - del_timer_sync(&phba->rrq_tmr); + timer_delete_sync(&phba->rrq_tmr); clear_bit(HBA_RRQ_ACTIVE, &phba->hba_flag); } clear_bit(HBA_HBEAT_INP, &phba->hba_flag); @@ -3316,7 +3316,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba) switch (phba->pci_dev_grp) { case LPFC_PCI_DEV_LP: /* Stop any LightPulse device specific driver timers */ - del_timer_sync(&phba->fcp_poll_timer); + timer_delete_sync(&phba->fcp_poll_timer); break; case LPFC_PCI_DEV_OC: /* Stop any OneConnect device specific driver timers */ @@ -12761,7 +12761,7 @@ static void __lpfc_cpuhp_remove(struct lpfc_hba *phba) * timer. Wait for the poll timer to retire. */ synchronize_rcu(); - del_timer_sync(&phba->cpuhp_poll_timer); + timer_delete_sync(&phba->cpuhp_poll_timer); } static void lpfc_cpuhp_remove(struct lpfc_hba *phba) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index f0158fc00f78..9edf80b14b1a 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5488,7 +5488,7 @@ void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport) struct lpfc_vmid *cur; if (vport->port_type == LPFC_PHYSICAL_PORT) - del_timer_sync(&vport->phba->inactive_vmid_poll); + timer_delete_sync(&vport->phba->inactive_vmid_poll); kfree(vport->qfpa_res); kfree(vport->vmid_priority.vmid_range); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 4e0d48fcb204..6574f9e74476 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -5041,7 +5041,7 @@ lpfc_sli_brdkill(struct lpfc_hba *phba) return 1; } - del_timer_sync(&psli->mbox_tmo); + timer_delete_sync(&psli->mbox_tmo); if (ha_copy & HA_ERATT) { writel(HA_ERATT, phba->HAregaddr); phba->pport->stopped = 1; @@ -12076,7 +12076,7 @@ lpfc_sli_hba_down(struct lpfc_hba *phba) local_bh_enable(); /* Return any active mbox cmds */ - del_timer_sync(&psli->mbox_tmo); + timer_delete_sync(&psli->mbox_tmo); spin_lock_irqsave(&phba->pport->work_port_lock, flags); phba->pport->work_port_events &= ~WORKER_MBOX_TMO; @@ -13802,7 +13802,7 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) phba->sli.mbox_active = NULL; spin_unlock_irqrestore(&phba->hbalock, iflag); phba->last_completion_time = jiffies; - del_timer(&phba->sli.mbox_tmo); + timer_delete(&phba->sli.mbox_tmo); if (pmb->mbox_cmpl) { lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); @@ -14302,7 +14302,7 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe) /* Reset heartbeat timer */ phba->last_completion_time = jiffies; - del_timer(&phba->sli.mbox_tmo); + timer_delete(&phba->sli.mbox_tmo); /* Move mbox data to caller's mailbox region, do endian swapping */ if (pmb->mbox_cmpl && mbox) @@ -15689,7 +15689,7 @@ static inline void lpfc_sli4_remove_from_poll_list(struct lpfc_queue *eq) synchronize_rcu(); if (list_empty(&phba->poll_list)) - del_timer_sync(&phba->cpuhp_poll_timer); + timer_delete_sync(&phba->cpuhp_poll_timer); } void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba) diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 3ba837b3093f..d533a8aa72cc 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -3951,7 +3951,7 @@ megaraid_sysfs_get_ldmap(adapter_t *adapter) } - del_timer_sync(&timeout.timer); + timer_delete_sync(&timeout.timer); destroy_timer_on_stack(&timeout.timer); mutex_unlock(&raid_dev->sysfs_mtx); diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index c509440bd161..1f2cd15e3361 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -703,7 +703,7 @@ lld_ioctl(mraid_mmadp_t *adp, uioc_t *kioc) */ wait_event(wait_q, (kioc->status != -ENODATA)); if (timeout.timer.function) { - del_timer_sync(&timeout.timer); + timer_delete_sync(&timeout.timer); destroy_timer_on_stack(&timeout.timer); } diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 28c75865967a..c20447b39cb9 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6521,7 +6521,7 @@ static int megasas_init_fw(struct megasas_instance *instance) fail_start_watchdog: if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); fail_get_ld_pd_list: instance->instancet->disable_intr(instance); megasas_destroy_irqs(instance); @@ -7603,7 +7603,7 @@ static int megasas_probe_one(struct pci_dev *pdev, megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = NULL; if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); instance->instancet->disable_intr(instance); megasas_destroy_irqs(instance); @@ -7743,7 +7743,7 @@ megasas_suspend(struct device *dev) /* Shutdown SR-IOV heartbeat timer */ if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); /* Stop the FW fault detection watchdog */ if (instance->adapter_type != MFI_SERIES) @@ -7907,7 +7907,7 @@ megasas_resume(struct device *dev) fail_start_watchdog: if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); fail_init_mfi: megasas_free_ctrl_dma_buffers(instance); megasas_free_ctrl_mem(instance); @@ -7971,7 +7971,7 @@ static void megasas_detach_one(struct pci_dev *pdev) /* Shutdown SR-IOV heartbeat timer */ if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); /* Stop the FW fault detection watchdog */ if (instance->adapter_type != MFI_SERIES) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 1eec23da28e2..721860cb1ef6 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4969,7 +4969,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) } if (instance->requestorId && !instance->skip_heartbeat_timer_del) - del_timer_sync(&instance->sriov_heartbeat_timer); + timer_delete_sync(&instance->sriov_heartbeat_timer); set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index c4592de4fefc..52ac10226cb0 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -976,7 +976,7 @@ static u32 mvs_is_sig_fis_received(u32 irq_status) static void mvs_sig_remove_timer(struct mvs_phy *phy) { if (phy->timer.function) - del_timer(&phy->timer); + timer_delete(&phy->timer); phy->timer.function = NULL; } diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 3ba53916fd86..e0aeb206df8d 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -495,7 +495,7 @@ static void pmcraid_clr_trans_op( } if (pinstance->reset_cmd != NULL) { - del_timer(&pinstance->reset_cmd->timer); + timer_delete(&pinstance->reset_cmd->timer); spin_lock_irqsave( pinstance->host->host_lock, lock_flags); pinstance->reset_cmd->cmd_done(pinstance->reset_cmd); @@ -1999,7 +1999,7 @@ static void pmcraid_fail_outstanding_cmds(struct pmcraid_instance *pinstance) cpu_to_le32(PMCRAID_DRIVER_ILID); /* In case the command timer is still running */ - del_timer(&cmd->timer); + timer_delete(&cmd->timer); /* If this is an IO command, complete it by invoking scsi_done * function. If this is one of the internal commands other @@ -3982,7 +3982,7 @@ static void pmcraid_tasklet_function(unsigned long instance) list_del(&cmd->free_list); spin_unlock_irqrestore(&pinstance->pending_pool_lock, pending_lock_flags); - del_timer(&cmd->timer); + timer_delete(&cmd->timer); atomic_dec(&pinstance->outstanding_cmds); if (cmd->cmd_done == pmcraid_ioa_reset) { diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 47d74f881948..078a9c80bce2 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2454,7 +2454,7 @@ qla1280_mailbox_command(struct scsi_qla_host *ha, uint8_t mr, uint16_t *mb) qla1280_debounce_register(®->istatus); wait_for_completion(&wait); - del_timer_sync(&ha->mailbox_timer); + timer_delete_sync(&ha->mailbox_timer); spin_lock_irq(ha->host->host_lock); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 79cdfec2bca3..0c2dd782b675 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -67,7 +67,7 @@ void qla2x00_sp_free(srb_t *sp) { struct srb_iocb *iocb = &sp->u.iocb_cmd; - del_timer(&iocb->timer); + timer_delete(&iocb->timer); qla2x00_rel_sp(sp); } diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 0b41e8a06602..3224044f1775 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2572,7 +2572,7 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk) static void qla2x00_async_done(struct srb *sp, int res) { - if (del_timer(&sp->u.iocb_cmd.timer)) { + if (timer_delete(&sp->u.iocb_cmd.timer)) { /* * Successfully cancelled the timeout handler * ref: TMR @@ -2645,7 +2645,7 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) elsio->u.els_logo.els_logo_pyld, elsio->u.els_logo.els_logo_pyld_dma); - del_timer(&elsio->timer); + timer_delete(&elsio->timer); qla2x00_rel_sp(sp); } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 79879c4743e6..8b71ac0b1d99 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -20,7 +20,7 @@ void qla2x00_vp_stop_timer(scsi_qla_host_t *vha) { if (vha->vp_idx && vha->timer_active) { - del_timer_sync(&vha->timer); + timer_delete_sync(&vha->timer); vha->timer_active = 0; } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 6b9b8218b512..b44d134e7105 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -401,7 +401,7 @@ qla2x00_restart_timer(scsi_qla_host_t *vha, unsigned long interval) static __inline__ void qla2x00_stop_timer(scsi_qla_host_t *vha) { - del_timer_sync(&vha->timer); + timer_delete_sync(&vha->timer); vha->timer_active = 0; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 6b0e6b4cd8af..d540d66e6ffc 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4021,7 +4021,7 @@ static void qla4xxx_start_timer(struct scsi_qla_host *ha, static void qla4xxx_stop_timer(struct scsi_qla_host *ha) { - del_timer_sync(&ha->timer); + timer_delete_sync(&ha->timer); ha->timer_active = 0; } diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 0da7be40c925..88135fdb8bd1 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -3853,7 +3853,7 @@ static void pqi_start_heartbeat_timer(struct pqi_ctrl_info *ctrl_info) static inline void pqi_stop_heartbeat_timer(struct pqi_ctrl_info *ctrl_info) { - del_timer_sync(&ctrl_info->heartbeat_timer); + timer_delete_sync(&ctrl_info->heartbeat_timer); } static void pqi_ofa_capture_event_payload(struct pqi_ctrl_info *ctrl_info, diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 212d89d0d23e..1a6eb72ca281 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1657,7 +1657,7 @@ static int sym_detach(struct Scsi_Host *shost, struct pci_dev *pdev) struct sym_hcb *np = sym_get_hcb(shost); printk("%s: detaching ...\n", sym_name(np)); - del_timer_sync(&np->s.timer); + timer_delete_sync(&np->s.timer); /* * Reset NCR chip. diff --git a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c index 67bf125645c0..da229965d98e 100644 --- a/drivers/staging/gpib/agilent_82357a/agilent_82357a.c +++ b/drivers/staging/gpib/agilent_82357a/agilent_82357a.c @@ -102,7 +102,7 @@ static int agilent_82357a_send_bulk_msg(struct agilent_82357a_priv *a_priv, void cleanup: if (timeout_msecs) { if (timer_pending(&a_priv->bulk_timer)) - del_timer_sync(&a_priv->bulk_timer); + timer_delete_sync(&a_priv->bulk_timer); } mutex_lock(&a_priv->bulk_alloc_lock); if (a_priv->bulk_urb) { @@ -169,7 +169,7 @@ static int agilent_82357a_receive_bulk_msg(struct agilent_82357a_priv *a_priv, v *actual_data_length = a_priv->bulk_urb->actual_length; cleanup: if (timeout_msecs) - del_timer_sync(&a_priv->bulk_timer); + timer_delete_sync(&a_priv->bulk_timer); mutex_lock(&a_priv->bulk_alloc_lock); if (a_priv->bulk_urb) { diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c index cb77fe0a4b9a..8456b97290b8 100644 --- a/drivers/staging/gpib/common/gpib_os.c +++ b/drivers/staging/gpib/common/gpib_os.c @@ -109,7 +109,7 @@ void os_remove_timer(struct gpib_board *board) /* Removes the timeout task */ { if (timer_pending(&board->timer)) - del_timer_sync(&board->timer); + timer_delete_sync(&board->timer); } int io_timed_out(struct gpib_board *board) @@ -163,7 +163,7 @@ void gpib_free_pseudo_irq(struct gpib_board *board) { atomic_set(&board->pseudo_irq.active, 0); - del_timer_sync(&board->pseudo_irq.timer); + timer_delete_sync(&board->pseudo_irq.timer); board->pseudo_irq.handler = NULL; } EXPORT_SYMBOL(gpib_free_pseudo_irq); diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c index 6cca8a49e839..b297261818f2 100644 --- a/drivers/staging/gpib/common/iblib.c +++ b/drivers/staging/gpib/common/iblib.c @@ -610,7 +610,7 @@ static void start_wait_timer(struct wait_info *winfo) static void remove_wait_timer(struct wait_info *winfo) { - del_timer_sync(&winfo->timer); + timer_delete_sync(&winfo->timer); destroy_timer_on_stack(&winfo->timer); } diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 14f7049a8e5e..9f1b9927f025 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -136,7 +136,7 @@ static int ni_usb_nonblocking_send_bulk_msg(struct ni_usb_priv *ni_priv, void *d retval = usb_submit_urb(ni_priv->bulk_urb, GFP_KERNEL); if (retval) { - del_timer_sync(&ni_priv->bulk_timer); + timer_delete_sync(&ni_priv->bulk_timer); usb_free_urb(ni_priv->bulk_urb); ni_priv->bulk_urb = NULL; dev_err(&usb_dev->dev, "failed to submit bulk out urb, retval=%i\n", @@ -154,7 +154,7 @@ static int ni_usb_nonblocking_send_bulk_msg(struct ni_usb_priv *ni_priv, void *d retval = ni_priv->bulk_urb->status; } - del_timer_sync(&ni_priv->bulk_timer); + timer_delete_sync(&ni_priv->bulk_timer); *actual_data_length = ni_priv->bulk_urb->actual_length; mutex_lock(&ni_priv->bulk_transfer_lock); usb_free_urb(ni_priv->bulk_urb); @@ -222,7 +222,7 @@ static int ni_usb_nonblocking_receive_bulk_msg(struct ni_usb_priv *ni_priv, retval = usb_submit_urb(ni_priv->bulk_urb, GFP_KERNEL); if (retval) { - del_timer_sync(&ni_priv->bulk_timer); + timer_delete_sync(&ni_priv->bulk_timer); usb_free_urb(ni_priv->bulk_urb); ni_priv->bulk_urb = NULL; dev_err(&usb_dev->dev, "failed to submit bulk in urb, retval=%i\n", retval); @@ -256,7 +256,7 @@ static int ni_usb_nonblocking_receive_bulk_msg(struct ni_usb_priv *ni_priv, if (ni_priv->bulk_urb->status) retval = ni_priv->bulk_urb->status; } - del_timer_sync(&ni_priv->bulk_timer); + timer_delete_sync(&ni_priv->bulk_timer); *actual_data_length = ni_priv->bulk_urb->actual_length; mutex_lock(&ni_priv->bulk_transfer_lock); usb_free_urb(ni_priv->bulk_urb); diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c b/drivers/staging/media/imx/imx-ic-prpencvf.c index 17fd980c9d3c..2855ba2296ac 100644 --- a/drivers/staging/media/imx/imx-ic-prpencvf.c +++ b/drivers/staging/media/imx/imx-ic-prpencvf.c @@ -781,7 +781,7 @@ static void prp_stop(struct prp_priv *priv) imx_media_free_dma_buf(ic_priv->ipu_dev, &priv->underrun_buf); /* cancel the EOF timeout timer */ - del_timer_sync(&priv->eof_timeout_timer); + timer_delete_sync(&priv->eof_timeout_timer); prp_put_ipu_resources(priv); } diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index 3edbc57be2ca..f1d7fce8c020 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -695,7 +695,7 @@ static void csi_idmac_stop(struct csi_priv *priv) imx_media_free_dma_buf(priv->dev, &priv->underrun_buf); /* cancel the EOF timeout timer */ - del_timer_sync(&priv->eof_timeout_timer); + timer_delete_sync(&priv->eof_timeout_timer); csi_idmac_put_ipu_resources(priv); } diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c b/drivers/staging/rtl8723bs/core/rtw_cmd.c index 64ce33c6fba1..1c9e8b01d9d8 100644 --- a/drivers/staging/rtl8723bs/core/rtw_cmd.c +++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c @@ -1846,7 +1846,7 @@ void rtw_createbss_cmd_callback(struct adapter *padapter, struct cmd_obj *pcmd) if (pcmd->res != H2C_SUCCESS) _set_timer(&pmlmepriv->assoc_timer, 1); - del_timer_sync(&pmlmepriv->assoc_timer); + timer_delete_sync(&pmlmepriv->assoc_timer); spin_lock_bh(&pmlmepriv->lock); diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c index 5ded183aa08c..58de0c2fdd68 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c @@ -681,7 +681,7 @@ void rtw_surveydone_event_callback(struct adapter *adapter, u8 *pbuf) if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY)) { spin_unlock_bh(&pmlmepriv->lock); - del_timer_sync(&pmlmepriv->scan_to_timer); + timer_delete_sync(&pmlmepriv->scan_to_timer); spin_lock_bh(&pmlmepriv->lock); _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY); } @@ -1166,7 +1166,7 @@ void rtw_joinbss_event_prehandle(struct adapter *adapter, u8 *pbuf) spin_unlock_bh(&pmlmepriv->lock); /* s5. Cancel assoc_timer */ - del_timer_sync(&pmlmepriv->assoc_timer); + timer_delete_sync(&pmlmepriv->assoc_timer); spin_lock_bh(&pmlmepriv->lock); } else { spin_unlock_bh(&(pmlmepriv->scanned_queue.lock)); diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index 952ce6dd5af9..3d36b6f005e0 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -412,9 +412,9 @@ void free_mlme_ext_priv(struct mlme_ext_priv *pmlmeext) return; if (padapter->bDriverStopped) { - del_timer_sync(&pmlmeext->survey_timer); - del_timer_sync(&pmlmeext->link_timer); - /* del_timer_sync(&pmlmeext->ADDBA_timer); */ + timer_delete_sync(&pmlmeext->survey_timer); + timer_delete_sync(&pmlmeext->link_timer); + /* timer_delete_sync(&pmlmeext->ADDBA_timer); */ } } @@ -1390,7 +1390,7 @@ unsigned int OnAssocRsp(struct adapter *padapter, union recv_frame *precv_frame) if (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) return _SUCCESS; - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); /* status */ status = le16_to_cpu(*(__le16 *)(pframe + WLAN_HDR_A3_LEN + 2)); @@ -1862,7 +1862,7 @@ unsigned int OnAction_sa_query(struct adapter *padapter, union recv_frame *precv break; case 1: /* SA Query rsp */ - del_timer_sync(&pmlmeext->sa_query_timer); + timer_delete_sync(&pmlmeext->sa_query_timer); break; default: break; @@ -4185,7 +4185,7 @@ void start_clnt_auth(struct adapter *padapter) struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); pmlmeinfo->state &= (~WIFI_FW_AUTH_NULL); pmlmeinfo->state |= WIFI_FW_AUTH_STATE; @@ -4210,7 +4210,7 @@ void start_clnt_assoc(struct adapter *padapter) struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv; struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info); - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); pmlmeinfo->state &= (~(WIFI_FW_AUTH_NULL | WIFI_FW_AUTH_STATE)); pmlmeinfo->state |= (WIFI_FW_AUTH_SUCCESS | WIFI_FW_ASSOC_STATE); @@ -4792,7 +4792,7 @@ static void rtw_mlmeext_disconnect(struct adapter *padapter) flush_all_cam_entry(padapter); - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); /* pmlmepriv->LinkDetectInfo.TrafficBusyState = false; */ pmlmepriv->LinkDetectInfo.TrafficTransitionCount = 0; @@ -5268,7 +5268,7 @@ u8 createbss_hdl(struct adapter *padapter, u8 *pbuf) /* rtw_hal_set_hwreg(padapter, HW_VAR_INITIAL_GAIN, (u8 *)(&initialgain)); */ /* cancel link timer */ - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); /* clear CAM */ flush_all_cam_entry(padapter); @@ -5312,7 +5312,7 @@ u8 join_cmd_hdl(struct adapter *padapter, u8 *pbuf) /* clear CAM */ flush_all_cam_entry(padapter); - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); /* set MSR to nolink -> infra. mode */ /* Set_MSR(padapter, _HW_STATE_NOLINK_); */ @@ -5425,7 +5425,7 @@ u8 join_cmd_hdl(struct adapter *padapter, u8 *pbuf) set_channel_bwmode(padapter, ch, offset, bw); /* cancel link timer */ - del_timer_sync(&pmlmeext->link_timer); + timer_delete_sync(&pmlmeext->link_timer); start_clnt_join(padapter); diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c index a389ba5ecc6f..895116e9f4e7 100644 --- a/drivers/staging/rtl8723bs/core/rtw_recv.c +++ b/drivers/staging/rtl8723bs/core/rtw_recv.c @@ -1893,7 +1893,7 @@ static int recv_indicatepkt_reorder(struct adapter *padapter, union recv_frame * spin_unlock_bh(&ppending_recvframe_queue->lock); } else { spin_unlock_bh(&ppending_recvframe_queue->lock); - del_timer_sync(&preorder_ctrl->reordering_ctrl_timer); + timer_delete_sync(&preorder_ctrl->reordering_ctrl_timer); } return _SUCCESS; diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c index 1b72f2196a1c..1d2b53c76afc 100644 --- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c +++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c @@ -158,7 +158,7 @@ u32 _rtw_free_sta_priv(struct sta_priv *pstapriv) for (i = 0; i < 16 ; i++) { preorder_ctrl = &psta->recvreorder_ctrl[i]; - del_timer_sync(&preorder_ctrl->reordering_ctrl_timer); + timer_delete_sync(&preorder_ctrl->reordering_ctrl_timer); } } } @@ -343,7 +343,7 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) /* _rtw_init_sta_xmit_priv(&psta->sta_xmitpriv); */ /* _rtw_init_sta_recv_priv(&psta->sta_recvpriv); */ - del_timer_sync(&psta->addba_retry_timer); + timer_delete_sync(&psta->addba_retry_timer); /* for A-MPDU Rx reordering buffer control, cancel reordering_ctrl_timer */ for (i = 0; i < 16 ; i++) { @@ -354,7 +354,7 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta) preorder_ctrl = &psta->recvreorder_ctrl[i]; - del_timer_sync(&preorder_ctrl->reordering_ctrl_timer); + timer_delete_sync(&preorder_ctrl->reordering_ctrl_timer); ppending_recvframe_queue = &preorder_ctrl->pending_recvframe_queue; diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c index 21e9f1858745..8736c124f857 100644 --- a/drivers/staging/rtl8723bs/hal/sdio_ops.c +++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c @@ -871,7 +871,7 @@ void sd_int_dpc(struct adapter *adapter) } if (hal->sdio_hisr & SDIO_HISR_CPWM1) { - del_timer_sync(&(pwrctl->pwr_rpwm_timer)); + timer_delete_sync(&(pwrctl->pwr_rpwm_timer)); SdioLocalCmd52Read1Byte(adapter, SDIO_REG_HCPWM1_8723B); diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c index de48c3454ab3..0248dff8f2aa 100644 --- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c +++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c @@ -697,18 +697,18 @@ u8 rtw_init_drv_sw(struct adapter *padapter) void rtw_cancel_all_timer(struct adapter *padapter) { - del_timer_sync(&padapter->mlmepriv.assoc_timer); + timer_delete_sync(&padapter->mlmepriv.assoc_timer); - del_timer_sync(&padapter->mlmepriv.scan_to_timer); + timer_delete_sync(&padapter->mlmepriv.scan_to_timer); - del_timer_sync(&padapter->mlmepriv.dynamic_chk_timer); + timer_delete_sync(&padapter->mlmepriv.dynamic_chk_timer); - del_timer_sync(&(adapter_to_pwrctl(padapter)->pwr_state_check_timer)); + timer_delete_sync(&(adapter_to_pwrctl(padapter)->pwr_state_check_timer)); - del_timer_sync(&padapter->mlmepriv.set_scan_deny_timer); + timer_delete_sync(&padapter->mlmepriv.set_scan_deny_timer); rtw_clear_scan_deny(padapter); - del_timer_sync(&padapter->recvpriv.signal_stat_timer); + timer_delete_sync(&padapter->recvpriv.signal_stat_timer); /* cancel dm timer */ rtw_hal_dm_deinit(padapter); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 07e9cf431edd..f0d7eebfcad6 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -810,7 +810,7 @@ int iscsit_stop_time2retain_timer(struct iscsit_session *sess) sess->time2retain_timer_flags |= ISCSI_TF_STOP; spin_unlock(&se_tpg->session_lock); - del_timer_sync(&sess->time2retain_timer); + timer_delete_sync(&sess->time2retain_timer); spin_lock(&se_tpg->session_lock); sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING; diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index d9a6242264b7..e7c3c4cdaae4 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -1227,7 +1227,7 @@ void iscsit_stop_dataout_timer(struct iscsit_cmd *cmd) cmd->dataout_timer_flags |= ISCSI_TF_STOP; spin_unlock_bh(&cmd->dataout_timeout_lock); - del_timer_sync(&cmd->dataout_timer); + timer_delete_sync(&cmd->dataout_timer); spin_lock_bh(&cmd->dataout_timeout_lock); cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index ed2dadb21f75..0bd62ab9a1cd 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -922,7 +922,7 @@ void iscsit_stop_nopin_response_timer(struct iscsit_conn *conn) conn->nopin_response_timer_flags |= ISCSI_TF_STOP; spin_unlock_bh(&conn->nopin_timer_lock); - del_timer_sync(&conn->nopin_response_timer); + timer_delete_sync(&conn->nopin_response_timer); spin_lock_bh(&conn->nopin_timer_lock); conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING; @@ -989,7 +989,7 @@ void iscsit_stop_nopin_timer(struct iscsit_conn *conn) conn->nopin_timer_flags |= ISCSI_TF_STOP; spin_unlock_bh(&conn->nopin_timer_lock); - del_timer_sync(&conn->nopin_timer); + timer_delete_sync(&conn->nopin_timer); spin_lock_bh(&conn->nopin_timer_lock); conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0f5d820af119..43872ccc07cc 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1232,7 +1232,7 @@ static void tcmu_set_next_deadline(struct list_head *queue, cmd = list_first_entry(queue, struct tcmu_cmd, queue_entry); mod_timer(timer, cmd->deadline); } else - del_timer(timer); + timer_delete(timer); } static int @@ -2321,8 +2321,8 @@ static void tcmu_destroy_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); - del_timer_sync(&udev->cmd_timer); - del_timer_sync(&udev->qfull_timer); + timer_delete_sync(&udev->cmd_timer); + timer_delete_sync(&udev->qfull_timer); mutex_lock(&root_udev_mutex); list_del(&udev->node); @@ -2408,7 +2408,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) tcmu_flush_dcache_range(mb, sizeof(*mb)); clear_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); - del_timer(&udev->cmd_timer); + timer_delete(&udev->cmd_timer); /* * ring is empty and qfull queue never contains aborted commands. diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c index 001ec318a918..c13f52337035 100644 --- a/drivers/tty/ipwireless/hardware.c +++ b/drivers/tty/ipwireless/hardware.c @@ -1496,7 +1496,7 @@ static void __handle_setup_get_version_rsp(struct ipw_hardware *hw) static void handle_setup_get_version_rsp(struct ipw_hardware *hw, unsigned char vers_no) { - del_timer(&hw->setup_timer); + timer_delete(&hw->setup_timer); hw->initializing = 0; printk(KERN_INFO IPWIRELESS_PCCARD_NAME ": card is ready.\n"); @@ -1721,7 +1721,7 @@ void ipwireless_stop_interrupts(struct ipw_hardware *hw) if (!hw->shutting_down) { /* Tell everyone we are going down. */ hw->shutting_down = 1; - del_timer(&hw->setup_timer); + timer_delete(&hw->setup_timer); /* Prevent the hardware from sending any more interrupts */ do_close_hardware(hw); diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c index 58b28be63c79..fa47bfcf9e86 100644 --- a/drivers/tty/mips_ejtag_fdc.c +++ b/drivers/tty/mips_ejtag_fdc.c @@ -1031,7 +1031,7 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev) raw_spin_unlock_irq(&priv->lock); } else { priv->removing = true; - del_timer_sync(&priv->poll_timer); + timer_delete_sync(&priv->poll_timer); } kthread_stop(priv->thread); err_destroy_ports: @@ -1061,7 +1061,7 @@ static int mips_ejtag_fdc_tty_cpu_down(struct mips_cdmm_device *dev) raw_spin_unlock_irq(&priv->lock); } else { priv->removing = true; - del_timer_sync(&priv->poll_timer); + timer_delete_sync(&priv->poll_timer); } kthread_stop(priv->thread); diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index 1348e2214b81..329b30fac8fc 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -1187,7 +1187,7 @@ static void __exit moxa_exit(void) { pci_unregister_driver(&moxa_pci_driver); - del_timer_sync(&moxaTimer); + timer_delete_sync(&moxaTimer); tty_unregister_driver(moxaDriver); tty_driver_kref_put(moxaDriver); diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 363afe11974f..40a336ef8c7e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1941,7 +1941,7 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, /* Does the reply match our command */ if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) { /* Our command was replied to, kill the retry timer */ - del_timer(&gsm->t2_timer); + timer_delete(&gsm->t2_timer); gsm->pending_cmd = NULL; /* Rejected by the other end */ if (command == CMD_NSC) @@ -2131,7 +2131,7 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) static void gsm_dlci_close(struct gsm_dlci *dlci) { - del_timer(&dlci->t1); + timer_delete(&dlci->t1); if (debug & DBG_ERRORS) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; @@ -2144,7 +2144,7 @@ static void gsm_dlci_close(struct gsm_dlci *dlci) tty_port_set_initialized(&dlci->port, false); wake_up_interruptible(&dlci->port.open_wait); } else { - del_timer(&dlci->gsm->ka_timer); + timer_delete(&dlci->gsm->ka_timer); dlci->gsm->dead = true; } /* A DLCI 0 close is a MUX termination so we need to kick that @@ -2166,7 +2166,7 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) /* Note that SABM UA .. SABM UA first UA lost can mean that we go open -> open */ - del_timer(&dlci->t1); + timer_delete(&dlci->t1); /* This will let a tty open continue */ dlci->state = DLCI_OPEN; dlci->constipated = false; @@ -3144,9 +3144,9 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) } /* Finish outstanding timers, making sure they are done */ - del_timer_sync(&gsm->kick_timer); - del_timer_sync(&gsm->t2_timer); - del_timer_sync(&gsm->ka_timer); + timer_delete_sync(&gsm->kick_timer); + timer_delete_sync(&gsm->t2_timer); + timer_delete_sync(&gsm->ka_timer); /* Finish writing to ldisc */ flush_work(&gsm->tx_work); diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index e5da9ce26006..392447038bfb 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -550,7 +550,7 @@ static void aspeed_vuart_remove(struct platform_device *pdev) { struct aspeed_vuart *vuart = platform_get_drvdata(pdev); - del_timer_sync(&vuart->unthrottle_timer); + timer_delete_sync(&vuart->unthrottle_timer); aspeed_vuart_set_enabled(vuart, false); serial8250_unregister_port(vuart->line); sysfs_remove_group(&vuart->dev->kobj, &aspeed_vuart_attr_group); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 6f676bb37ac3..5a56f853cf6d 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -298,7 +298,7 @@ static void univ8250_release_irq(struct uart_8250_port *up) { struct uart_port *port = &up->port; - del_timer_sync(&up->timer); + timer_delete_sync(&up->timer); up->timer.function = serial8250_timeout; if (port->irq) serial_unlink_irq_chain(up); diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c index 1759137121cc..011f38681131 100644 --- a/drivers/tty/serial/altera_uart.c +++ b/drivers/tty/serial/altera_uart.c @@ -339,7 +339,7 @@ static void altera_uart_shutdown(struct uart_port *port) if (port->irq) free_irq(port->irq, port); else - del_timer_sync(&pp->tmr); + timer_delete_sync(&pp->tmr); } static const char *altera_uart_type(struct uart_port *port) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index dc092204b472..11d65097578c 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1084,7 +1084,7 @@ static void pl011_dma_rx_poll(struct timer_list *t) uap->dmarx.running = false; dmaengine_terminate_all(rxchan); - del_timer(&uap->dmarx.timer); + timer_delete(&uap->dmarx.timer); } else { mod_timer(&uap->dmarx.timer, jiffies + msecs_to_jiffies(uap->dmarx.poll_rate)); @@ -1199,7 +1199,7 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (uap->dmarx.poll_rate) - del_timer_sync(&uap->dmarx.timer); + timer_delete_sync(&uap->dmarx.timer); uap->using_rx_dma = false; } } diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 8918fbd4bddd..18dba502144d 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2017,7 +2017,7 @@ static void atmel_shutdown(struct uart_port *port) * Prevent any tasklets being scheduled during * cleanup */ - del_timer_sync(&atmel_port->uart_timer); + timer_delete_sync(&atmel_port->uart_timer); /* Make sure that no interrupt is on the fly */ synchronize_irq(port->irq); diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 4470966b826c..fe5aed99d55a 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1433,7 +1433,7 @@ static void lpuart_dma_rx_free(struct uart_port *port) dmaengine_terminate_sync(chan); if (!sport->dma_idle_int) - del_timer_sync(&sport->lpuart_timer); + timer_delete_sync(&sport->lpuart_timer); dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE); kfree(sport->rx_ring.buf); @@ -2071,7 +2071,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, * baud rate and restart Rx DMA path. * * Since timer function acqures port->lock, need to stop before - * acquring same lock because otherwise del_timer_sync() can deadlock. + * acquring same lock because otherwise timer_delete_sync() can deadlock. */ if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(port); @@ -2316,7 +2316,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, * baud rate and restart Rx DMA path. * * Since timer function acqures port->lock, need to stop before - * acquring same lock because otherwise del_timer_sync() can deadlock. + * acquring same lock because otherwise timer_delete_sync() can deadlock. */ if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(port); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 19c819705bf9..e4b6f1bfdc95 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1619,7 +1619,7 @@ static void imx_uart_shutdown(struct uart_port *port) /* * Stop our timer. */ - del_timer_sync(&sport->timer); + timer_delete_sync(&sport->timer); /* * Disable all interrupts, port and break condition. @@ -1752,7 +1752,7 @@ imx_uart_set_termios(struct uart_port *port, struct ktermios *termios, old_csize = CS8; } - del_timer_sync(&sport->timer); + timer_delete_sync(&sport->timer); /* * Ask the core to calculate the divisor for us. diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c index 6c13cf1ab646..3a0960c97c77 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -96,7 +96,7 @@ static void liteuart_stop_rx(struct uart_port *port) struct liteuart_port *uart = to_liteuart_port(port); /* just delete timer */ - del_timer(&uart->timer); + timer_delete(&uart->timer); } static void liteuart_rx_chars(struct uart_port *port) @@ -220,7 +220,7 @@ static void liteuart_shutdown(struct uart_port *port) if (port->irq) free_irq(port->irq, port); else - del_timer_sync(&uart->timer); + timer_delete_sync(&uart->timer); } static void liteuart_set_termios(struct uart_port *port, struct ktermios *new, diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index cde5f1c86353..f2dd83692b2c 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -506,7 +506,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios, MAX3100_STATUS_PE | MAX3100_STATUS_FE | MAX3100_STATUS_OE; - del_timer_sync(&s->timer); + timer_delete_sync(&s->timer); uart_update_timeout(port, termios->c_cflag, baud); spin_lock(&s->conf_lock); @@ -532,7 +532,7 @@ static void max3100_shutdown(struct uart_port *port) s->force_end_work = 1; - del_timer_sync(&s->timer); + timer_delete_sync(&s->timer); if (s->workqueue) { destroy_workqueue(s->workqueue); diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c index 85ce1e9af44a..b417faead20f 100644 --- a/drivers/tty/serial/mux.c +++ b/drivers/tty/serial/mux.c @@ -563,7 +563,7 @@ static void __exit mux_exit(void) { /* Delete the Mux timer. */ if(port_cnt > 0) { - del_timer_sync(&mux_timer); + timer_delete_sync(&mux_timer); #ifdef CONFIG_SERIAL_MUX_CONSOLE unregister_console(&mux_console); #endif diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c index 3c34027687d2..8587ebbe1073 100644 --- a/drivers/tty/serial/sa1100.c +++ b/drivers/tty/serial/sa1100.c @@ -369,7 +369,7 @@ static void sa1100_shutdown(struct uart_port *port) /* * Stop our timer. */ - del_timer_sync(&sport->timer); + timer_delete_sync(&sport->timer); /* * Free the interrupt @@ -421,7 +421,7 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios, baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); quot = uart_get_divisor(port, baud); - del_timer_sync(&sport->timer); + timer_delete_sync(&sport->timer); uart_port_lock_irqsave(&sport->port, &flags); diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index 4c851dae6624..553e3c1321ca 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -1033,7 +1033,7 @@ static void sccnxp_remove(struct platform_device *pdev) if (!s->poll) devm_free_irq(&pdev->dev, s->irq, s); else - del_timer_sync(&s->timer); + timer_delete_sync(&s->timer); for (i = 0; i < s->uart.nr; i++) uart_remove_one_port(&s->uart, &s->port[i]); diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 1c8480d0338e..7e7813ccda41 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2337,7 +2337,7 @@ static void sci_shutdown(struct uart_port *port) #endif if (s->rx_trigger > 1 && s->rx_fifo_timeout > 0) - del_timer_sync(&s->rx_fifo_timer); + timer_delete_sync(&s->rx_fifo_timer); sci_free_irq(s); sci_free_dma(port); } diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index bd8d92ee7c53..4c703f42680d 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -2220,7 +2220,7 @@ static void isr_txeom(struct slgt_info *info, unsigned short status) } info->tx_active = false; - del_timer(&info->tx_timer); + timer_delete(&info->tx_timer); if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) { info->signals &= ~SerialSignal_RTS; @@ -2375,8 +2375,8 @@ static void shutdown(struct slgt_info *info) wake_up_interruptible(&info->status_event_wait_q); wake_up_interruptible(&info->event_wait_q); - del_timer_sync(&info->tx_timer); - del_timer_sync(&info->rx_timer); + timer_delete_sync(&info->tx_timer); + timer_delete_sync(&info->rx_timer); kfree(info->tx_buf); info->tx_buf = NULL; @@ -3955,7 +3955,7 @@ static void tx_stop(struct slgt_info *info) { unsigned short val; - del_timer(&info->tx_timer); + timer_delete(&info->tx_timer); tdma_reset(info); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index f85ce02e4725..6853c4660e7c 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -743,7 +743,7 @@ static void sysrq_detect_reset_sequence(struct sysrq_state *state, */ if (value && state->reset_seq_cnt) { state->reset_canceled = true; - del_timer(&state->keyreset_timer); + timer_delete(&state->keyreset_timer); } } else if (value == 0) { /* @@ -751,7 +751,7 @@ static void sysrq_detect_reset_sequence(struct sysrq_state *state, * to be pressed and held for the reset timeout * to hold. */ - del_timer(&state->keyreset_timer); + timer_delete(&state->keyreset_timer); if (--state->reset_seq_cnt == 0) state->reset_canceled = false; diff --git a/drivers/tty/vcc.c b/drivers/tty/vcc.c index 5b625f20233b..7ac3048377d5 100644 --- a/drivers/tty/vcc.c +++ b/drivers/tty/vcc.c @@ -683,8 +683,8 @@ static void vcc_remove(struct vio_dev *vdev) { struct vcc_port *port = dev_get_drvdata(&vdev->dev); - del_timer_sync(&port->rx_timer); - del_timer_sync(&port->tx_timer); + timer_delete_sync(&port->rx_timer); + timer_delete_sync(&port->tx_timer); /* If there's a process with the device open, do a synchronous * hangup of the TTY. This *may* cause the process to call close @@ -700,7 +700,7 @@ static void vcc_remove(struct vio_dev *vdev) tty_unregister_device(vcc_tty_driver, port->index); - del_timer_sync(&port->vio.timer); + timer_delete_sync(&port->vio.timer); vio_ldc_free(&port->vio); sysfs_remove_group(&vdev->dev.kobj, &vcc_attribute_group); dev_set_drvdata(&vdev->dev, NULL); diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 804355da46f5..ae92e6a50a65 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -275,7 +275,7 @@ static DEFINE_TIMER(kd_mksound_timer, kd_nosound); void kd_mksound(unsigned int hz, unsigned int ticks) { - del_timer_sync(&kd_mksound_timer); + timer_delete_sync(&kd_mksound_timer); input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index be5564ed8c01..f5642b3038e4 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4501,7 +4501,7 @@ void do_blank_screen(int entering_gfx) } hide_cursor(vc); - del_timer_sync(&console_timer); + timer_delete_sync(&console_timer); blank_timer_expired = 0; save_screen(vc); @@ -4606,7 +4606,7 @@ void poke_blanked_console(void) /* This isn't perfectly race free, but a race here would be mostly harmless, * at worst, we'll do a spurious blank and it's unlikely */ - del_timer(&console_timer); + timer_delete(&console_timer); blank_timer_expired = 0; if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS) diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 47d06af33747..c6b9ad12e8fe 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -597,7 +597,7 @@ static int cxacru_start_wait_urb(struct urb *urb, struct completion *done, timer_setup_on_stack(&timer.timer, cxacru_timeout_kill, 0); mod_timer(&timer.timer, jiffies + msecs_to_jiffies(CMD_TIMEOUT)); wait_for_completion(done); - del_timer_sync(&timer.timer); + timer_delete_sync(&timer.timer); destroy_timer_on_stack(&timer.timer); if (actual_length) diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c index 973548b5c15c..27e3d35ee7dd 100644 --- a/drivers/usb/atm/speedtch.c +++ b/drivers/usb/atm/speedtch.c @@ -612,7 +612,7 @@ static void speedtch_handle_int(struct urb *int_urb) } if ((count == 6) && !memcmp(up_int, instance->int_data, 6)) { - del_timer(&instance->status_check_timer); + timer_delete(&instance->status_check_timer); atm_info(usbatm, "DSL line goes up\n"); } else if ((count == 6) && !memcmp(down_int, instance->int_data, 6)) { atm_info(usbatm, "DSL line goes down\n"); @@ -688,7 +688,7 @@ static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_de atm_dbg(usbatm, "%s entered\n", __func__); - del_timer_sync(&instance->status_check_timer); + timer_delete_sync(&instance->status_check_timer); /* * Since resubmit_timer and int_urb can schedule themselves and @@ -697,14 +697,14 @@ static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_de instance->int_urb = NULL; /* signal shutdown */ mb(); usb_kill_urb(int_urb); - del_timer_sync(&instance->resubmit_timer); + timer_delete_sync(&instance->resubmit_timer); /* * At this point, speedtch_handle_int and speedtch_resubmit_int * can run or be running, but instance->int_urb == NULL means that * they will not reschedule */ usb_kill_urb(int_urb); - del_timer_sync(&instance->resubmit_timer); + timer_delete_sync(&instance->resubmit_timer); usb_free_urb(int_urb); flush_work(&instance->status_check_work); diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index d1e622bb1406..a6a05e85ef8c 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -1237,8 +1237,8 @@ void usbatm_usb_disconnect(struct usb_interface *intf) for (i = 0; i < num_rcv_urbs + num_snd_urbs; i++) usb_kill_urb(instance->urbs[i]); - del_timer_sync(&instance->rx_channel.delay); - del_timer_sync(&instance->tx_channel.delay); + timer_delete_sync(&instance->rx_channel.delay); + timer_delete_sync(&instance->tx_channel.delay); /* turn usbatm_[rt]x_process into something close to a no-op */ /* no need to take the spinlock */ diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 46026b331267..a63c793bac21 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -842,7 +842,7 @@ static int usb_rh_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) } else { /* Status URB */ if (!hcd->uses_new_polling) - del_timer (&hcd->rh_timer); + timer_delete(&hcd->rh_timer); if (urb == hcd->status_urb) { hcd->status_urb = NULL; usb_hcd_unlink_urb_from_ep(hcd, urb); @@ -2768,14 +2768,14 @@ static void usb_stop_hcd(struct usb_hcd *hcd) { hcd->rh_pollable = 0; clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); hcd->driver->stop(hcd); hcd->state = HC_STATE_HALT; /* In case the HCD restarted the timer, stop it again. */ clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); } /** diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 8c7f9cc785bb..0e1dd6ef60a7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1385,7 +1385,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) } /* Stop hub_wq and related activity */ - del_timer_sync(&hub->irq_urb_retry); + timer_delete_sync(&hub->irq_urb_retry); usb_kill_urb(hub->urb); if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 869245238d6c..60ef8092259a 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5081,7 +5081,7 @@ static void dwc2_hcd_free(struct dwc2_hsotg *hsotg) cancel_work_sync(&hsotg->phy_reset_work); - del_timer(&hsotg->wkp_timer); + timer_delete(&hsotg->wkp_timer); } static void dwc2_hcd_release(struct dwc2_hsotg *hsotg) diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 2a542a99ec44..b0098792dd22 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -1302,7 +1302,7 @@ static int dwc2_schedule_periodic(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) } /* Cancel pending unreserve; if canceled OK, unreserve was pending */ - if (del_timer(&qh->unreserve_timer)) + if (timer_delete(&qh->unreserve_timer)) WARN_ON(!qh->unreserve_pending); /* @@ -1614,7 +1614,7 @@ struct dwc2_qh *dwc2_hcd_qh_create(struct dwc2_hsotg *hsotg, void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) { /* Make sure any unreserve work is finished. */ - if (del_timer_sync(&qh->unreserve_timer)) { + if (timer_delete_sync(&qh->unreserve_timer)) { unsigned long flags; spin_lock_irqsave(&hsotg->lock, flags); diff --git a/drivers/usb/gadget/legacy/zero.c b/drivers/usb/gadget/legacy/zero.c index e25e0d8dd387..a05785bdeb30 100644 --- a/drivers/usb/gadget/legacy/zero.c +++ b/drivers/usb/gadget/legacy/zero.c @@ -194,7 +194,7 @@ static void zero_suspend(struct usb_composite_dev *cdev) static void zero_resume(struct usb_composite_dev *cdev) { DBG(cdev, "%s\n", __func__); - del_timer(&autoresume_timer); + timer_delete(&autoresume_timer); } /*-------------------------------------------------------------------------*/ @@ -398,7 +398,7 @@ static int zero_bind(struct usb_composite_dev *cdev) static int zero_unbind(struct usb_composite_dev *cdev) { - del_timer_sync(&autoresume_timer); + timer_delete_sync(&autoresume_timer); if (!IS_ERR_OR_NULL(func_ss)) usb_put_function(func_ss); usb_put_function_instance(func_inst_ss); diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 8902abe3ca76..c93ea210c17c 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -252,7 +252,7 @@ static int omap_ep_disable(struct usb_ep *_ep) ep->has_dma = 0; omap_writew(UDC_SET_HALT, UDC_CTRL); list_del_init(&ep->iso); - del_timer(&ep->timer); + timer_delete(&ep->timer); spin_unlock_irqrestore(&ep->udc->lock, flags); diff --git a/drivers/usb/gadget/udc/pxa25x_udc.c b/drivers/usb/gadget/udc/pxa25x_udc.c index 1e9998024aaa..24eb1ae78e45 100644 --- a/drivers/usb/gadget/udc/pxa25x_udc.c +++ b/drivers/usb/gadget/udc/pxa25x_udc.c @@ -1503,7 +1503,7 @@ reset_gadget(struct pxa25x_udc *dev, struct usb_gadget_driver *driver) ep->stopped = 1; nuke(ep, -ESHUTDOWN); } - del_timer_sync(&dev->timer); + timer_delete_sync(&dev->timer); /* report reset; the driver is already quiesced */ if (driver) @@ -1530,7 +1530,7 @@ stop_activity(struct pxa25x_udc *dev, struct usb_gadget_driver *driver) ep->stopped = 1; nuke(ep, -ESHUTDOWN); } - del_timer_sync(&dev->timer); + timer_delete_sync(&dev->timer); /* report disconnect; the driver is already quiesced */ if (driver) @@ -1607,14 +1607,14 @@ static void handle_ep0 (struct pxa25x_udc *dev) if (udccs0 & UDCCS0_SST) { nuke(ep, -EPIPE); udc_ep0_set_UDCCS(dev, UDCCS0_SST); - del_timer(&dev->timer); + timer_delete(&dev->timer); ep0_idle(dev); } /* previous request unfinished? non-error iff back-to-back ... */ if ((udccs0 & UDCCS0_SA) != 0 && dev->ep0state != EP0_IDLE) { nuke(ep, 0); - del_timer(&dev->timer); + timer_delete(&dev->timer); ep0_idle(dev); } diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index ff6f846b1d41..6c1d15b2250c 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -1810,7 +1810,7 @@ static void r8a66597_remove(struct platform_device *pdev) struct r8a66597 *r8a66597 = platform_get_drvdata(pdev); usb_del_gadget_udc(&r8a66597->gadget); - del_timer_sync(&r8a66597->timer); + timer_delete_sync(&r8a66597->timer); r8a66597_free_request(&r8a66597->ep[0].ep, r8a66597->ep0_req); if (r8a66597->pdata->on_chip) { diff --git a/drivers/usb/gadget/udc/snps_udc_core.c b/drivers/usb/gadget/udc/snps_udc_core.c index 1f8a99d2a643..373942ceb076 100644 --- a/drivers/usb/gadget/udc/snps_udc_core.c +++ b/drivers/usb/gadget/udc/snps_udc_core.c @@ -3035,12 +3035,12 @@ void udc_remove(struct udc *dev) stop_timer++; if (timer_pending(&udc_timer)) wait_for_completion(&on_exit); - del_timer_sync(&udc_timer); + timer_delete_sync(&udc_timer); /* remove pollstall timer */ stop_pollstall_timer++; if (timer_pending(&udc_pollstall_timer)) wait_for_completion(&on_pollstall_exit); - del_timer_sync(&udc_pollstall_timer); + timer_delete_sync(&udc_pollstall_timer); udc = NULL; } EXPORT_SYMBOL_GPL(udc_remove); diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index cdf41886e8ca..150d2542cef0 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -224,7 +224,7 @@ static void quirk_poll_init(struct ehci_platform_priv *priv) static void quirk_poll_end(struct ehci_platform_priv *priv) { - del_timer_sync(&priv->poll_timer); + timer_delete_sync(&priv->poll_timer); cancel_delayed_work(&priv->poll_work); } diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c index 2d3a082cb52f..954fc5ad565b 100644 --- a/drivers/usb/host/isp1362-hcd.c +++ b/drivers/usb/host/isp1362-hcd.c @@ -2357,7 +2357,7 @@ static void isp1362_hc_stop(struct usb_hcd *hcd) pr_debug("%s:\n", __func__); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); spin_lock_irqsave(&isp1362_hcd->lock, flags); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 9b24181fee60..c7784bf8101d 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -1003,7 +1003,7 @@ static void ohci_stop (struct usb_hcd *hcd) if (quirk_nec(ohci)) flush_work(&ohci->nec_work); - del_timer_sync(&ohci->io_watchdog); + timer_delete_sync(&ohci->io_watchdog); ohci->prev_frame_no = IO_WATCHDOG_OFF; ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 90cee192e96d..b3d734ab6201 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -315,7 +315,7 @@ static int ohci_bus_suspend (struct usb_hcd *hcd) spin_unlock_irq (&ohci->lock); if (rc == 0) { - del_timer_sync(&ohci->io_watchdog); + timer_delete_sync(&ohci->io_watchdog); ohci->prev_frame_no = IO_WATCHDOG_OFF; } return rc; diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index fce800ba4c61..d75b1b9b4db0 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -1127,7 +1127,7 @@ static void ehci_mem_cleanup(struct oxu_hcd *oxu) qh_put(oxu->async); oxu->async = NULL; - del_timer(&oxu->urb_timer); + timer_delete(&oxu->urb_timer); oxu->periodic = NULL; @@ -3154,7 +3154,7 @@ static void oxu_stop(struct usb_hcd *hcd) ehci_port_power(oxu, 0); /* no more interrupts ... */ - del_timer_sync(&oxu->watchdog); + timer_delete_sync(&oxu->watchdog); spin_lock_irq(&oxu->lock); if (HC_IS_RUNNING(hcd->state)) @@ -3887,7 +3887,7 @@ static int oxu_bus_suspend(struct usb_hcd *hcd) spin_unlock_irq(&oxu->lock); /* turn off now-idle HC */ - del_timer_sync(&oxu->watchdog); + timer_delete_sync(&oxu->watchdog); spin_lock_irq(&oxu->lock); ehci_halt(oxu); hcd->state = HC_STATE_SUSPENDED; diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index a44992e2561b..67e472116d11 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -2384,7 +2384,7 @@ static void r8a66597_remove(struct platform_device *pdev) struct r8a66597 *r8a66597 = platform_get_drvdata(pdev); struct usb_hcd *hcd = r8a66597_to_hcd(r8a66597); - del_timer_sync(&r8a66597->rh_timer); + timer_delete_sync(&r8a66597->rh_timer); usb_remove_hcd(hcd); iounmap(r8a66597->reg); if (r8a66597->pdata->on_chip) diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index fa2e4badd288..718b1b7fe366 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1515,7 +1515,7 @@ sl811h_stop(struct usb_hcd *hcd) struct sl811 *sl811 = hcd_to_sl811(hcd); unsigned long flags; - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); spin_lock_irqsave(&sl811->lock, flags); port_power(sl811, 0); diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index fd2408b553cf..14e6dfef16c6 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -716,7 +716,7 @@ static void uhci_stop(struct usb_hcd *hcd) spin_unlock_irq(&uhci->lock); synchronize_irq(hcd->irq); - del_timer_sync(&uhci->fsbr_timer); + timer_delete_sync(&uhci->fsbr_timer); release_uhci(uhci); } diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index 35fcb826152c..45a8256a665f 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -84,7 +84,7 @@ static void uhci_urbp_wants_fsbr(struct uhci_hcd *uhci, struct urb_priv *urbp) uhci_fsbr_on(uhci); else if (uhci->fsbr_expiring) { uhci->fsbr_expiring = 0; - del_timer(&uhci->fsbr_timer); + timer_delete(&uhci->fsbr_timer); } } } diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index 46fdab940092..05943f2213e4 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -327,7 +327,7 @@ static int xenhcd_bus_suspend(struct usb_hcd *hcd) } spin_unlock_irq(&info->lock); - del_timer_sync(&info->watchdog); + timer_delete_sync(&info->watchdog); return ret; } @@ -1307,7 +1307,7 @@ static void xenhcd_stop(struct usb_hcd *hcd) { struct xenhcd_info *info = xenhcd_hcd_to_info(hcd); - del_timer_sync(&info->watchdog); + timer_delete_sync(&info->watchdog); spin_lock_irq(&info->lock); /* cancel all urbs */ hcd->state = HC_STATE_HALT; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 69c278b64084..c0f226584a40 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -926,7 +926,7 @@ static void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status, if ((xhci->port_status_u0 != all_ports_seen_u0) && port_in_u0) { xhci->port_status_u0 |= 1 << wIndex; if (xhci->port_status_u0 == all_ports_seen_u0) { - del_timer_sync(&xhci->comp_mode_recovery_timer); + timer_delete_sync(&xhci->comp_mode_recovery_timer); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "All USB3 ports have entered U0 already!"); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 904831344440..208558cf822d 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -746,10 +746,10 @@ static int __maybe_unused xhci_mtk_suspend(struct device *dev) xhci_dbg(xhci, "%s: stop port polling\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); if (shared_hcd) { clear_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags); - del_timer_sync(&shared_hcd->rh_timer); + timer_delete_sync(&shared_hcd->rh_timer); } ret = xhci_mtk_host_disable(mtk); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 83a4adf57bae..0452b8d65832 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -627,7 +627,7 @@ void xhci_stop(struct usb_hcd *hcd) /* Deleting Compliance Mode Recovery Timer */ if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && (!(xhci_all_ports_seen_u0(xhci)))) { - del_timer_sync(&xhci->comp_mode_recovery_timer); + timer_delete_sync(&xhci->comp_mode_recovery_timer); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "%s: compliance mode recovery timer deleted", __func__); @@ -672,11 +672,11 @@ void xhci_shutdown(struct usb_hcd *hcd) xhci_dbg(xhci, "%s: stopping usb%d port polling.\n", __func__, hcd->self.busnum); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); if (xhci->shared_hcd) { clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); - del_timer_sync(&xhci->shared_hcd->rh_timer); + timer_delete_sync(&xhci->shared_hcd->rh_timer); } spin_lock_irq(&xhci->lock); @@ -908,10 +908,10 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) xhci_dbg(xhci, "%s: stopping usb%d port polling.\n", __func__, hcd->self.busnum); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + timer_delete_sync(&hcd->rh_timer); if (xhci->shared_hcd) { clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); - del_timer_sync(&xhci->shared_hcd->rh_timer); + timer_delete_sync(&xhci->shared_hcd->rh_timer); } if (xhci->quirks & XHCI_SUSPEND_DELAY) @@ -978,7 +978,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) */ if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && (!(xhci_all_ports_seen_u0(xhci)))) { - del_timer_sync(&xhci->comp_mode_recovery_timer); + timer_delete_sync(&xhci->comp_mode_recovery_timer); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "%s: compliance mode recovery timer deleted", __func__); @@ -1071,7 +1071,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume) if (power_lost) { if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { - del_timer_sync(&xhci->comp_mode_recovery_timer); + timer_delete_sync(&xhci->comp_mode_recovery_timer); xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "Compliance Mode Recovery Timer deleted!"); } diff --git a/drivers/usb/isp1760/isp1760-hcd.c b/drivers/usb/isp1760/isp1760-hcd.c index add2d2e3b61b..8dcd9cc22413 100644 --- a/drivers/usb/isp1760/isp1760-hcd.c +++ b/drivers/usb/isp1760/isp1760-hcd.c @@ -2458,7 +2458,7 @@ static void isp1760_stop(struct usb_hcd *hcd) { struct isp1760_hcd *priv = hcd_to_priv(hcd); - del_timer(&errata2_timer); + timer_delete(&errata2_timer); isp1760_hub_control(hcd, ClearPortFeature, USB_PORT_FEAT_POWER, 1, NULL, 0); diff --git a/drivers/usb/isp1760/isp1760-udc.c b/drivers/usb/isp1760/isp1760-udc.c index 5cafd23345ca..2af89ee28baa 100644 --- a/drivers/usb/isp1760/isp1760-udc.c +++ b/drivers/usb/isp1760/isp1760-udc.c @@ -1145,7 +1145,7 @@ static void isp1760_udc_disconnect(struct isp1760_udc *udc) if (udc->driver->disconnect) udc->driver->disconnect(&udc->gadget); - del_timer(&udc->vbus_timer); + timer_delete(&udc->vbus_timer); /* TODO Reset all endpoints ? */ } @@ -1314,7 +1314,7 @@ static int isp1760_udc_stop(struct usb_gadget *gadget) dev_dbg(udc->isp->dev, "%s\n", __func__); - del_timer_sync(&udc->vbus_timer); + timer_delete_sync(&udc->vbus_timer); isp1760_reg_write(udc->regs, mode_reg, 0); diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 8d379ae835bc..853a5f082a70 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -626,7 +626,7 @@ static int perform_sglist( mod_timer(&timeout.timer, jiffies + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); - if (!del_timer_sync(&timeout.timer)) + if (!timer_delete_sync(&timeout.timer)) retval = -ETIMEDOUT; else retval = req->status; diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 26fd71a5f9b2..eebb24ab3ec8 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -204,7 +204,7 @@ static void __maybe_unused da8xx_musb_try_idle(struct musb *musb, unsigned long musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) { dev_dbg(musb->controller, "%s active, deleting timer\n", usb_otg_state_string(musb->xceiv->otg->state)); - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); last_timer = jiffies; return; } @@ -290,7 +290,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) MUSB_HST_MODE(musb); musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; portstate(musb->port1_status |= USB_PORT_STAT_POWER); - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); } else if (!(musb->int_usb & MUSB_INTR_BABBLE)) { /* * When babble condition happens, drvvbus interrupt @@ -419,7 +419,7 @@ static int da8xx_musb_exit(struct musb *musb) { struct da8xx_glue *glue = dev_get_drvdata(musb->controller->parent); - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); phy_power_off(glue->phy); phy_exit(glue->phy); diff --git a/drivers/usb/musb/mpfs.c b/drivers/usb/musb/mpfs.c index 71e4271cba75..020348a98514 100644 --- a/drivers/usb/musb/mpfs.c +++ b/drivers/usb/musb/mpfs.c @@ -165,7 +165,7 @@ static void __maybe_unused mpfs_musb_try_idle(struct musb *musb, unsigned long t musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) { dev_dbg(musb->controller, "%s active, deleting timer\n", usb_otg_state_string(musb->xceiv->otg->state)); - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); last_timer = jiffies; return; } @@ -232,7 +232,7 @@ static int mpfs_musb_init(struct musb *musb) static int mpfs_musb_exit(struct musb *musb) { - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); return 0; } diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 96fa700eaed1..cbbb27178024 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -921,7 +921,7 @@ static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb) musb_set_state(musb, OTG_STATE_B_HOST); if (musb->hcd) musb->hcd->self.is_b_host = 1; - del_timer(&musb->otg_timer); + timer_delete(&musb->otg_timer); break; default: if ((devctl & MUSB_DEVCTL_VBUS) @@ -1015,7 +1015,7 @@ static void musb_handle_intr_reset(struct musb *musb) + msecs_to_jiffies(TA_WAIT_BCON(musb))); break; case OTG_STATE_A_PERIPHERAL: - del_timer(&musb->otg_timer); + timer_delete(&musb->otg_timer); musb_g_reset(musb); break; case OTG_STATE_B_WAIT_ACON: diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index f877faf5a930..e5e813f97fac 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -201,7 +201,7 @@ static void dsps_musb_disable(struct musb *musb) musb_writel(reg_base, wrp->coreintr_clear, wrp->usb_bitmap); musb_writel(reg_base, wrp->epintr_clear, wrp->txep_bitmap | wrp->rxep_bitmap); - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); } /* Caller must take musb->lock */ @@ -215,7 +215,7 @@ static int dsps_check_status(struct musb *musb, void *unused) int skip_session = 0; if (glue->vbus_irq) - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); /* * We poll because DSPS IP's won't expose several OTG-critical @@ -499,7 +499,7 @@ static int dsps_musb_exit(struct musb *musb) struct device *dev = musb->controller; struct dsps_glue *glue = dev_get_drvdata(dev->parent); - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); phy_power_off(musb->phy); phy_exit(musb->phy); debugfs_remove_recursive(glue->dbgfs_root); @@ -983,7 +983,7 @@ static int dsps_suspend(struct device *dev) return ret; } - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); mbase = musb->ctrl_base; glue->context.control = musb_readl(mbase, wrp->control); diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 90b760a95e4e..abd2472da7f7 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -525,7 +525,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout) && (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) { dev_dbg(musb->controller, "%s active, deleting timer\n", usb_otg_state_string(musb->xceiv->otg->state)); - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); last_timer = jiffies; return; } @@ -875,7 +875,7 @@ static irqreturn_t tusb_musb_interrupt(int irq, void *__hci) } if (int_src & TUSB_INT_SRC_USB_IP_CONN) - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); /* OTG state change reports (annoyingly) not issued by Mentor core */ if (int_src & (TUSB_INT_SRC_VBUS_SENSE_CHNG @@ -984,7 +984,7 @@ static void tusb_musb_disable(struct musb *musb) musb_writel(tbase, TUSB_DMA_INT_MASK, 0x7fffffff); musb_writel(tbase, TUSB_GPIO_INT_MASK, 0x1ff); - del_timer(&musb->dev_timer); + timer_delete(&musb->dev_timer); if (is_dma_capable() && !dma_off) { printk(KERN_WARNING "%s %s: dma still active\n", @@ -1174,7 +1174,7 @@ static int tusb_musb_exit(struct musb *musb) { struct tusb6010_glue *glue = dev_get_drvdata(musb->controller->parent); - del_timer_sync(&musb->dev_timer); + timer_delete_sync(&musb->dev_timer); the_musb = NULL; gpiod_set_value(glue->enable, 0); diff --git a/drivers/usb/phy/phy-mv-usb.c b/drivers/usb/phy/phy-mv-usb.c index 30d6c8840a5e..638fba58420c 100644 --- a/drivers/usb/phy/phy-mv-usb.c +++ b/drivers/usb/phy/phy-mv-usb.c @@ -110,7 +110,7 @@ static int mv_otg_cancel_timer(struct mv_otg *mvotg, unsigned int id) timer = &mvotg->otg_ctrl.timer[id]; if (timer_pending(timer)) - del_timer(timer); + timer_delete(timer); return 0; } diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 4e516b445136..b387863c245f 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -934,7 +934,7 @@ static void realtek_cr_destructor(void *extra) #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) { - del_timer(&chip->rts51x_suspend_timer); + timer_delete(&chip->rts51x_suspend_timer); chip->timer_expires = 0; } #endif diff --git a/drivers/video/fbdev/aty/radeon_backlight.c b/drivers/video/fbdev/aty/radeon_backlight.c index 9e41d2a18649..bf764c92bcf1 100644 --- a/drivers/video/fbdev/aty/radeon_backlight.c +++ b/drivers/video/fbdev/aty/radeon_backlight.c @@ -59,7 +59,7 @@ static int radeon_bl_update_status(struct backlight_device *bd) */ level = backlight_get_brightness(bd); - del_timer_sync(&rinfo->lvds_timer); + timer_delete_sync(&rinfo->lvds_timer); radeon_engine_idle(); lvds_gen_cntl = INREG(LVDS_GEN_CNTL); diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index d866608da8d1..c6c4753f6415 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -1082,7 +1082,7 @@ int radeon_screen_blank(struct radeonfb_info *rinfo, int blank, int mode_switch) } break; case MT_LCD: - del_timer_sync(&rinfo->lvds_timer); + timer_delete_sync(&rinfo->lvds_timer); val = INREG(LVDS_GEN_CNTL); if (unblank) { u32 target_val = (val & ~LVDS_DISPLAY_DIS) | LVDS_BLON | LVDS_ON @@ -2516,7 +2516,7 @@ static void radeonfb_pci_unregister(struct pci_dev *pdev) if (rinfo->mon2_EDID) sysfs_remove_bin_file(&rinfo->pdev->dev.kobj, &edid2_attr); - del_timer_sync(&rinfo->lvds_timer); + timer_delete_sync(&rinfo->lvds_timer); arch_phys_wc_del(rinfo->wc_cookie); radeonfb_bl_exit(rinfo); unregister_framebuffer(info); diff --git a/drivers/video/fbdev/aty/radeon_pm.c b/drivers/video/fbdev/aty/radeon_pm.c index 97a5972f5b1f..5ff4a964055a 100644 --- a/drivers/video/fbdev/aty/radeon_pm.c +++ b/drivers/video/fbdev/aty/radeon_pm.c @@ -2650,7 +2650,7 @@ static int radeonfb_pci_suspend_late(struct device *dev, pm_message_t mesg) /* Sleep */ rinfo->asleep = 1; rinfo->lock_blank = 1; - del_timer_sync(&rinfo->lvds_timer); + timer_delete_sync(&rinfo->lvds_timer); #ifdef CONFIG_PPC_PMAC /* On powermac, we have hooks to properly suspend/resume AGP now, diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c index 161fc65d6b57..64e76e1f5388 100644 --- a/drivers/video/fbdev/omap/hwa742.c +++ b/drivers/video/fbdev/omap/hwa742.c @@ -597,7 +597,7 @@ static int hwa742_set_update_mode(enum omapfb_update_mode mode) break; case OMAPFB_AUTO_UPDATE: hwa742.stop_auto_update = 1; - del_timer_sync(&hwa742.auto_update_timer); + timer_delete_sync(&hwa742.auto_update_timer); break; case OMAPFB_UPDATE_DISABLED: break; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c index 1f3434c040c1..370e8623754e 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c @@ -835,7 +835,7 @@ static irqreturn_t omap_dsi_irq_handler(int irq, void *arg) #ifdef DSI_CATCH_MISSING_TE if (irqstatus & DSI_IRQ_TE_TRIGGER) - del_timer(&dsi->te_timer); + timer_delete(&dsi->te_timer); #endif /* make a copy and unlock, so that isrs can unregister diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index c6e9855998ab..f1674f3ed923 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -495,7 +495,7 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) */ static void vbg_heartbeat_exit(struct vbg_dev *gdev) { - del_timer_sync(&gdev->heartbeat_timer); + timer_delete_sync(&gdev->heartbeat_timer); vbg_heartbeat_host_config(gdev, false); vbg_req_free(gdev->guest_heartbeat_req, sizeof(*gdev->guest_heartbeat_req)); diff --git a/drivers/watchdog/alim7101_wdt.c b/drivers/watchdog/alim7101_wdt.c index 9c7cf939ba3d..03a559b41f5b 100644 --- a/drivers/watchdog/alim7101_wdt.c +++ b/drivers/watchdog/alim7101_wdt.c @@ -166,7 +166,7 @@ static void wdt_startup(void) static void wdt_turnoff(void) { /* Stop the timer */ - del_timer_sync(&timer); + timer_delete_sync(&timer); wdt_change(WDT_DISABLE); pr_info("Watchdog timer is now disabled...\n"); } @@ -223,7 +223,7 @@ static int fop_close(struct inode *inode, struct file *file) if (wdt_expect_close == 42) wdt_turnoff(); else { - /* wim: shouldn't there be a: del_timer(&timer); */ + /* wim: shouldn't there be a: timer_delete(&timer); */ pr_crit("device file closed unexpectedly. Will not stop the WDT!\n"); } clear_bit(0, &wdt_is_open); diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index 7be70b98d091..1b47a2fc7d17 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -242,7 +242,7 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt) return 0; out_stop_timer: - del_timer(&wdt->timer); + timer_delete(&wdt->timer); return err; } @@ -378,7 +378,7 @@ static void at91wdt_remove(struct platform_device *pdev) watchdog_unregister_device(&wdt->wdd); pr_warn("I quit now, hardware will probably reboot!\n"); - del_timer(&wdt->timer); + timer_delete(&wdt->timer); } #if defined(CONFIG_OF) diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index 06a54c7de40b..4c0951307421 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -139,7 +139,7 @@ static int bcm47xx_wdt_soft_stop(struct watchdog_device *wdd) { struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); - del_timer_sync(&wdt->soft_timer); + timer_delete_sync(&wdt->soft_timer); wdt->timer_set(wdt, 0); return 0; @@ -213,7 +213,7 @@ static int bcm47xx_wdt_probe(struct platform_device *pdev) err_timer: if (soft) - del_timer_sync(&wdt->soft_timer); + timer_delete_sync(&wdt->soft_timer); return ret; } diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 4fb92c9e046a..13a4d47e68cd 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -240,7 +240,7 @@ static void cpwd_brokentimer(struct timer_list *unused) * were called directly instead of by kernel timer */ if (timer_pending(&cpwd_timer)) - del_timer(&cpwd_timer); + timer_delete(&cpwd_timer); for (id = 0; id < WD_NUMDEVS; id++) { if (p->devs[id].runstatus & WD_STAT_BSTOP) { @@ -629,7 +629,7 @@ static void cpwd_remove(struct platform_device *op) } if (p->broken) - del_timer_sync(&cpwd_timer); + timer_delete_sync(&cpwd_timer); if (p->initialized) free_irq(p->irq, p); diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c index f19580e1b318..28e3fc0df4c6 100644 --- a/drivers/watchdog/lpc18xx_wdt.c +++ b/drivers/watchdog/lpc18xx_wdt.c @@ -135,7 +135,7 @@ static int lpc18xx_wdt_start(struct watchdog_device *wdt_dev) unsigned int val; if (timer_pending(&lpc18xx_wdt->timer)) - del_timer(&lpc18xx_wdt->timer); + timer_delete(&lpc18xx_wdt->timer); val = readl(lpc18xx_wdt->base + LPC18XX_WDT_MOD); val |= LPC18XX_WDT_MOD_WDEN; @@ -266,7 +266,7 @@ static void lpc18xx_wdt_remove(struct platform_device *pdev) struct lpc18xx_wdt_dev *lpc18xx_wdt = platform_get_drvdata(pdev); dev_warn(&pdev->dev, "I quit now, hardware will probably reboot!\n"); - del_timer_sync(&lpc18xx_wdt->timer); + timer_delete_sync(&lpc18xx_wdt->timer); } static const struct of_device_id lpc18xx_wdt_match[] = { diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index 73d641486909..0ae8e5bc10ae 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -189,7 +189,7 @@ static void zf_timer_off(void) unsigned long flags; /* stop internal ping */ - del_timer_sync(&zf_timer); + timer_delete_sync(&zf_timer); spin_lock_irqsave(&zf_port_lock, flags); /* stop watchdog timer */ @@ -337,7 +337,7 @@ static int zf_close(struct inode *inode, struct file *file) if (zf_expect_close == 42) zf_timer_off(); else { - del_timer(&zf_timer); + timer_delete(&zf_timer); pr_err("device file closed unexpectedly. Will not stop the WDT!\n"); } clear_bit(0, &zf_is_open); diff --git a/drivers/watchdog/mixcomwd.c b/drivers/watchdog/mixcomwd.c index 70d9cf84c342..1ecd5c48a005 100644 --- a/drivers/watchdog/mixcomwd.c +++ b/drivers/watchdog/mixcomwd.c @@ -141,7 +141,7 @@ static int mixcomwd_open(struct inode *inode, struct file *file) __module_get(THIS_MODULE); else { if (mixcomwd_timer_alive) { - del_timer(&mixcomwd_timer); + timer_delete(&mixcomwd_timer); mixcomwd_timer_alive = 0; } } @@ -295,7 +295,7 @@ static void __exit mixcomwd_exit(void) if (!nowayout) { if (mixcomwd_timer_alive) { pr_warn("I quit now, hardware will probably reboot!\n"); - del_timer_sync(&mixcomwd_timer); + timer_delete_sync(&mixcomwd_timer); mixcomwd_timer_alive = 0; } } diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 31d3dcbf815e..d4ea7d6ccd6a 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -432,7 +432,7 @@ static int pcwd_stop(void) int stat_reg; /* Stop the timer */ - del_timer(&pcwd_private.timer); + timer_delete(&pcwd_private.timer); /* Disable the board */ if (pcwd_private.revision == PCWD_REVISION_C) { diff --git a/drivers/watchdog/pika_wdt.c b/drivers/watchdog/pika_wdt.c index 393aa4b1bc13..87b8988d2520 100644 --- a/drivers/watchdog/pika_wdt.c +++ b/drivers/watchdog/pika_wdt.c @@ -129,7 +129,7 @@ static int pikawdt_release(struct inode *inode, struct file *file) { /* stop internal ping */ if (!pikawdt_private.expect_close) - del_timer(&pikawdt_private.timer); + timer_delete(&pikawdt_private.timer); clear_bit(0, &pikawdt_private.open); pikawdt_private.expect_close = 0; diff --git a/drivers/watchdog/sbc60xxwdt.c b/drivers/watchdog/sbc60xxwdt.c index e9bf12918ed8..03eaf48c8f0f 100644 --- a/drivers/watchdog/sbc60xxwdt.c +++ b/drivers/watchdog/sbc60xxwdt.c @@ -146,7 +146,7 @@ static void wdt_startup(void) static void wdt_turnoff(void) { /* Stop the timer */ - del_timer_sync(&timer); + timer_delete_sync(&timer); inb_p(wdt_stop); pr_info("Watchdog timer is now disabled...\n"); } @@ -210,7 +210,7 @@ static int fop_close(struct inode *inode, struct file *file) if (wdt_expect_close == 42) wdt_turnoff(); else { - del_timer(&timer); + timer_delete(&timer); pr_crit("device file closed unexpectedly. Will not stop the WDT!\n"); } clear_bit(0, &wdt_is_open); diff --git a/drivers/watchdog/sc520_wdt.c b/drivers/watchdog/sc520_wdt.c index e849e1af267b..005f62e4a4fb 100644 --- a/drivers/watchdog/sc520_wdt.c +++ b/drivers/watchdog/sc520_wdt.c @@ -186,7 +186,7 @@ static int wdt_startup(void) static int wdt_turnoff(void) { /* Stop the timer */ - del_timer_sync(&timer); + timer_delete_sync(&timer); /* Stop the watchdog */ wdt_config(0); diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index 7f0150c39421..95af9ad94d16 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -129,7 +129,7 @@ static int sh_wdt_stop(struct watchdog_device *wdt_dev) spin_lock_irqsave(&wdt->lock, flags); - del_timer(&wdt->timer); + timer_delete(&wdt->timer); csr = sh_wdt_read_csr(); csr &= ~WTCSR_TME; diff --git a/drivers/watchdog/via_wdt.c b/drivers/watchdog/via_wdt.c index eeb39f96e72e..d647923d68fe 100644 --- a/drivers/watchdog/via_wdt.c +++ b/drivers/watchdog/via_wdt.c @@ -233,7 +233,7 @@ static int wdt_probe(struct pci_dev *pdev, static void wdt_remove(struct pci_dev *pdev) { watchdog_unregister_device(&wdt_dev); - del_timer_sync(&timer); + timer_delete_sync(&timer); iounmap(wdt_mem); release_mem_region(mmio, VIA_WDT_MMIO_LEN); release_resource(&wdt_res); diff --git a/drivers/watchdog/w83877f_wdt.c b/drivers/watchdog/w83877f_wdt.c index 1937084c182c..53db59ef774b 100644 --- a/drivers/watchdog/w83877f_wdt.c +++ b/drivers/watchdog/w83877f_wdt.c @@ -166,7 +166,7 @@ static void wdt_startup(void) static void wdt_turnoff(void) { /* Stop the timer */ - del_timer_sync(&timer); + timer_delete_sync(&timer); wdt_change(WDT_DISABLE); @@ -228,7 +228,7 @@ static int fop_close(struct inode *inode, struct file *file) if (wdt_expect_close == 42) wdt_turnoff(); else { - del_timer(&timer); + timer_delete(&timer); pr_crit("device file closed unexpectedly. Will not stop the WDT!\n"); } clear_bit(0, &wdt_is_open); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 07a8bfbdd9b9..e0030ac74ea0 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -534,6 +534,6 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta */ void afs_fs_probe_cleanup(struct afs_net *net) { - if (del_timer_sync(&net->fs_probe_timer)) + if (timer_delete_sync(&net->fs_probe_timer)) afs_dec_servers_outstanding(net); } diff --git a/fs/afs/server.c b/fs/afs/server.c index c530d1ca15df..8755f2703815 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -318,7 +318,7 @@ struct afs_server *afs_use_server(struct afs_server *server, bool activate, a = atomic_inc_return(&server->active); if (a == 1 && activate && !test_bit(AFS_SERVER_FL_EXPIRED, &server->flags)) - del_timer(&server->timer); + timer_delete(&server->timer); trace_afs_server(server->debug_id, r + 1, a, reason); return server; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 1f8e035d7119..d6dd12d74d4f 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -121,7 +121,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, } while (0); __set_current_state(TASK_RUNNING); - del_timer_sync(&wait.cpu_timer); + timer_delete_sync(&wait.cpu_timer); destroy_timer_on_stack(&wait.cpu_timer); bch2_io_timer_del(clock, &wait.io_timer); } diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index cd5f38d6fbaa..3541efa765c7 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -225,7 +225,7 @@ void zstd_cleanup_workspace_manager(void) } spin_unlock_bh(&wsm.lock); - del_timer_sync(&wsm.timer); + timer_delete_sync(&wsm.timer); } /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8122d4ffb3b5..181934499624 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5680,7 +5680,7 @@ failed_mount8: __maybe_unused /* flush s_sb_upd_work before sbi destroy */ flush_work(&sbi->s_sb_upd_work); ext4_stop_mmpd(sbi); - del_timer_sync(&sbi->s_err_report); + timer_delete_sync(&sbi->s_err_report); ext4_group_desc_free(sbi); failed_mount: #if IS_ENABLED(CONFIG_UNICODE) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a5ccba25ff47..743a1d7633cd 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -197,7 +197,7 @@ static int kjournald2(void *arg) if (journal->j_commit_sequence != journal->j_commit_request) { jbd2_debug(1, "OK, requests differ\n"); write_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); + timer_delete_sync(&journal->j_commit_timer); jbd2_journal_commit_transaction(journal); write_lock(&journal->j_state_lock); goto loop; @@ -246,7 +246,7 @@ static int kjournald2(void *arg) goto loop; end_loop: - del_timer_sync(&journal->j_commit_timer); + timer_delete_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd2_debug(1, "Journal thread exiting.\n"); diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 4061e0ba7010..bb815a002984 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -584,7 +584,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) size_t retlen; /* Nothing to do if not write-buffering the flash. In particular, we shouldn't - del_timer() the timer we never initialised. */ + call timer_delete() on the timer we never initialised. */ if (!jffs2_is_writebuffered(c)) return 0; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 3a202e51b360..83970d97840b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2424,7 +2424,7 @@ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) * the area protected by sc_state_lock. */ if (thread_is_alive) - del_timer_sync(&sci->sc_timer); + timer_delete_sync(&sci->sc_timer); } /** diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0f46b22561d6..fce9beb214f0 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -724,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work) if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) { /* we shouldn't flush as we're in the thread, the * races with pending sc work structs are harmless */ - del_timer_sync(&sc->sc_idle_timeout); + timer_delete_sync(&sc->sc_idle_timeout); o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); sc_put(sc); kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 557cf9d40177..f8b9c9c73997 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -563,7 +563,7 @@ void pstore_unregister(struct pstore_info *psi) pstore_unregister_kmsg(); /* Stop timer and make sure all work has finished. */ - del_timer_sync(&pstore_timer); + timer_delete_sync(&pstore_timer); flush_work(&pstore_work); /* Remove all backend records from filesystem tree. */ diff --git a/include/linux/timer.h b/include/linux/timer.h index e67ecd1cbc97..10596d7c3a34 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -30,7 +30,7 @@ * * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and * it's safe to wait for the completion of the running instance from - * IRQ handlers, for example, by calling del_timer_sync(). + * IRQ handlers, for example, by calling timer_delete_sync(). * * Note: The irq disabled callback execution is a special case for * workqueue locking issues. It's not meant for executing random crap @@ -168,40 +168,6 @@ extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -/** - * del_timer_sync - Delete a pending timer and wait for a running callback - * @timer: The timer to be deleted - * - * See timer_delete_sync() for detailed explanation. - * - * Do not use in new code. Use timer_delete_sync() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer_sync(struct timer_list *timer) -{ - return timer_delete_sync(timer); -} - -/** - * del_timer - Delete a pending timer - * @timer: The timer to be deleted - * - * See timer_delete() for detailed explanation. - * - * Do not use in new code. Use timer_delete() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer(struct timer_list *timer) -{ - return timer_delete(timer); -} - extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 84e6b9fd5610..d8da764cf6de 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -636,7 +636,7 @@ static inline void sctp_transport_pl_reset(struct sctp_transport *t) } } else { if (t->pl.state != SCTP_PL_DISABLED) { - if (del_timer(&t->probe_timer)) + if (timer_delete(&t->probe_timer)) sctp_transport_put(t); t->pl.state = SCTP_PL_DISABLED; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ac2db99941ca..27f08aa17b56 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1695,7 +1695,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) cfile->kn = NULL; spin_unlock_irq(&cgroup_file_kn_lock); - del_timer_sync(&cfile->notify_timer); + timer_delete_sync(&cfile->notify_timer); } kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 117d9d4d3c3b..6ce73cceaf53 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -1500,7 +1500,7 @@ static int access_thread(void *arg) func(); } } while (!torture_must_stop()); - del_timer_sync(&timer); + timer_delete_sync(&timer); destroy_timer_on_stack(&timer); torture_kthread_stopping("access_thread"); diff --git a/kernel/kthread.c b/kernel/kthread.c index 5dc5b0d7238e..77c44924cf54 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1362,14 +1362,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work, struct kthread_worker *worker = work->worker; /* - * del_timer_sync() must be called to make sure that the timer + * timer_delete_sync() must be called to make sure that the timer * callback is not running. The lock must be temporary released * to avoid a deadlock with the callback. In the meantime, * any queuing is blocked by setting the canceling counter. */ work->canceling++; raw_spin_unlock_irqrestore(&worker->lock, *flags); - del_timer_sync(&dwork->timer); + timer_delete_sync(&dwork->timer); raw_spin_lock_irqsave(&worker->lock, *flags); work->canceling--; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 65095664f5c5..4fa7772be183 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2324,7 +2324,7 @@ rcu_torture_reader(void *arg) stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); if (irqreader && cur_ops->irq_capable) { - del_timer_sync(&t); + timer_delete_sync(&t); destroy_timer_on_stack(&t); } tick_dep_clear_task(current, TICK_DEP_BIT_RCU); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d2a694944553..9a59b071501b 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -690,7 +690,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); - del_timer_sync(&sdp->delay_work); + timer_delete_sync(&sdp->delay_work); flush_work(&sdp->work); if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) return; /* Forgot srcu_barrier(), so just leak it! */ diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 466668eb4fad..c0cc7ae41106 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1086,7 +1086,7 @@ static void rcu_tasks_postscan(struct list_head *hop) } if (!IS_ENABLED(CONFIG_TINY_RCU)) - del_timer_sync(&tasks_rcu_exit_srcu_stall_timer); + timer_delete_sync(&tasks_rcu_exit_srcu_stall_timer); } /* See if tasks are still holding out, complain if so. */ diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 5ff3bc56ff51..fa269d34167a 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -206,7 +206,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp, if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); - del_timer(&rdp_gp->nocb_timer); + timer_delete(&rdp_gp->nocb_timer); } if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { @@ -822,7 +822,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); - del_timer(&my_rdp->nocb_timer); + timer_delete(&my_rdp->nocb_timer); } WRITE_ONCE(my_rdp->nocb_gp_sleep, true); raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index bb56805e3d47..1396674fa722 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1440,7 +1440,7 @@ void psi_trigger_destroy(struct psi_trigger *t) group->rtpoll_task, lockdep_is_held(&group->rtpoll_trigger_lock)); rcu_assign_pointer(group->rtpoll_task, NULL); - del_timer(&group->rtpoll_timer); + timer_delete(&group->rtpoll_timer); } } mutex_unlock(&group->rtpoll_trigger_lock); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0eeacbe2521..bb48498ebb5a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -619,7 +619,7 @@ static inline void clocksource_stop_watchdog(void) { if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) return; - del_timer(&watchdog_timer); + timer_delete(&watchdog_timer); watchdog_running = 0; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 22376a1a75b9..0cf8d39d650c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1429,7 +1429,7 @@ static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) * running. * * This prevents priority inversion: if the soft irq thread is preempted - * in the middle of a timer callback, then calling del_timer_sync() can + * in the middle of a timer callback, then calling hrtimer_cancel() can * lead to two issues: * * - If the caller is on a remote CPU then it has to spin wait for the timer diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c index dfe939f6e4ec..c0e960a5de39 100644 --- a/kernel/time/sleep_timeout.c +++ b/kernel/time/sleep_timeout.c @@ -97,7 +97,7 @@ signed long __sched schedule_timeout(signed long timeout) timer.timer.expires = expire; add_timer(&timer.timer); schedule(); - del_timer_sync(&timer.timer); + timer_delete_sync(&timer.timer); /* Remove the timer from the object tracker */ destroy_timer_on_stack(&timer.timer); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index c8f776dc6ee0..4d915c0a263c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -744,7 +744,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_ACTIVE: - del_timer_sync(timer); + timer_delete_sync(timer); debug_object_init(timer, &timer_debug_descr); return true; default: @@ -790,7 +790,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_ACTIVE: - del_timer_sync(timer); + timer_delete_sync(timer); debug_object_free(timer, &timer_debug_descr); return true; default: @@ -1212,10 +1212,10 @@ EXPORT_SYMBOL(mod_timer_pending); * * mod_timer(timer, expires) is equivalent to: * - * del_timer(timer); timer->expires = expires; add_timer(timer); + * timer_delete(timer); timer->expires = expires; add_timer(timer); * * mod_timer() is more efficient than the above open coded sequence. In - * case that the timer is inactive, the del_timer() part is a NOP. The + * case that the timer is inactive, the timer_delete() part is a NOP. The * timer is in any case activated with the new expiry time @expires. * * Note that if there are multiple unserialized concurrent users of the diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bfe030b443e2..cf6203282737 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2057,11 +2057,11 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags, struct delayed_work *dwork = to_delayed_work(work); /* - * dwork->timer is irqsafe. If del_timer() fails, it's + * dwork->timer is irqsafe. If timer_delete() fails, it's * guaranteed that the timer is not queued anywhere and not * running on the local CPU. */ - if (likely(del_timer(&dwork->timer))) + if (likely(timer_delete(&dwork->timer))) return 1; } @@ -3069,7 +3069,7 @@ __acquires(&pool->lock) break; } - del_timer_sync(&pool->mayday_timer); + timer_delete_sync(&pool->mayday_timer); raw_spin_lock_irq(&pool->lock); /* * This is necessary even after a new worker was just successfully @@ -4281,7 +4281,7 @@ EXPORT_SYMBOL_GPL(flush_work); bool flush_delayed_work(struct delayed_work *dwork) { local_irq_disable(); - if (del_timer_sync(&dwork->timer)) + if (timer_delete_sync(&dwork->timer)) __queue_work(dwork->cpu, dwork->wq, &dwork->work); local_irq_enable(); return flush_work(&dwork->work); @@ -4984,9 +4984,9 @@ static void put_unbound_pool(struct worker_pool *pool) reap_dying_workers(&cull_list); /* shut down the timers */ - del_timer_sync(&pool->idle_timer); + timer_delete_sync(&pool->idle_timer); cancel_work_sync(&pool->idle_cull_work); - del_timer_sync(&pool->mayday_timer); + timer_delete_sync(&pool->mayday_timer); /* RCU protected to allow dereferences from get_work_pool() */ call_rcu(&pool->rcu, rcu_free_pool); @@ -7637,7 +7637,7 @@ notrace void wq_watchdog_touch(int cpu) static void wq_watchdog_set_thresh(unsigned long thresh) { wq_watchdog_thresh = 0; - del_timer_sync(&wq_watchdog_timer); + timer_delete_sync(&wq_watchdog_timer); if (thresh) { wq_watchdog_thresh = thresh; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e61bbb1bd622..783904d8c5ef 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1151,7 +1151,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { - del_timer_sync(&bdi->laptop_mode_wb_timer); + timer_delete_sync(&bdi->laptop_mode_wb_timer); /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 18456ddd463b..c81624bc3969 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -640,7 +640,7 @@ int wb_domain_init(struct wb_domain *dom, gfp_t gfp) #ifdef CONFIG_CGROUP_WRITEBACK void wb_domain_exit(struct wb_domain *dom) { - del_timer_sync(&dom->period_timer); + timer_delete_sync(&dom->period_timer); fprop_global_destroy(&dom->completions); } #endif @@ -2229,7 +2229,7 @@ void laptop_sync_completion(void) rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) - del_timer(&bdi->laptop_mode_wb_timer); + timer_delete(&bdi->laptop_mode_wb_timer); rcu_read_unlock(); } diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 05cbb3c227c5..9c787e2e4b17 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -856,7 +856,7 @@ int __init aarp_proto_init(void) add_timer(&aarp_timer); rc = register_netdevice_notifier(&aarp_notifier); if (rc) { - del_timer_sync(&aarp_timer); + timer_delete_sync(&aarp_timer); unregister_snap_client(aarp_dl); } return rc; @@ -1011,7 +1011,7 @@ const struct seq_operations aarp_seq_ops = { /* General module cleanup. Called from cleanup_module() in ddp.c. */ void aarp_cleanup_module(void) { - del_timer_sync(&aarp_timer); + timer_delete_sync(&aarp_timer); unregister_netdevice_notifier(&aarp_notifier); unregister_snap_client(aarp_dl); aarp_purge(); diff --git a/net/atm/clip.c b/net/atm/clip.c index 42b910cb4e8e..61b5b700817d 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -904,7 +904,7 @@ static void atm_clip_exit_noproc(void) /* First, stop the idle timer, so it stops banging * on the table. */ - del_timer_sync(&idle_timer); + timer_delete_sync(&idle_timer); dev = clip_devs; while (dev) { diff --git a/net/atm/lec.c b/net/atm/lec.c index a948dd47c3f3..ded2f0df2ee6 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1302,7 +1302,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) return -1; hlist_del(&to_remove->next); - del_timer(&to_remove->timer); + timer_delete(&to_remove->timer); /* * If this is the only MAC connected to this VCC, @@ -1482,7 +1482,7 @@ static void lec_arp_destroy(struct lec_priv *priv) hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { - del_timer_sync(&entry->timer); + timer_delete_sync(&entry->timer); lec_arp_clear_vccs(entry); hlist_del(&entry->next); lec_arp_put(entry); @@ -1491,7 +1491,7 @@ static void lec_arp_destroy(struct lec_priv *priv) hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { - del_timer_sync(&entry->timer); + timer_delete_sync(&entry->timer); lec_arp_clear_vccs(entry); hlist_del(&entry->next); lec_arp_put(entry); @@ -1575,7 +1575,7 @@ static void lec_arp_expire_vcc(struct timer_list *t) struct lec_arp_table *to_remove = from_timer(to_remove, t, timer); struct lec_priv *priv = to_remove->priv; - del_timer(&to_remove->timer); + timer_delete(&to_remove->timer); pr_debug("%p %p: vpi:%d vci:%d\n", to_remove, priv, @@ -1843,16 +1843,16 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, &priv->lec_arp_empty_ones, next) { if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { hlist_del(&entry->next); - del_timer(&entry->timer); + timer_delete(&entry->timer); tmp = lec_arp_find(priv, mac_addr); if (tmp) { - del_timer(&tmp->timer); + timer_delete(&tmp->timer); tmp->status = ESI_FORWARD_DIRECT; memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); tmp->vcc = entry->vcc; tmp->old_push = entry->old_push; tmp->last_used = jiffies; - del_timer(&entry->timer); + timer_delete(&entry->timer); lec_arp_put(entry); entry = tmp; } else { @@ -1883,7 +1883,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, /* Temporary, changes before end of function */ } memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); - del_timer(&entry->timer); + timer_delete(&entry->timer); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { hlist_for_each_entry(tmp, &priv->lec_arp_tables[i], next) { @@ -1946,7 +1946,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry = make_entry(priv, bus_mac); if (entry == NULL) goto out; - del_timer(&entry->timer); + timer_delete(&entry->timer); memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); entry->recv_vcc = vcc; entry->old_recv_push = old_push; @@ -1988,7 +1988,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->recv_vcc ? entry->recv_vcc-> vci : 0); found_entry = 1; - del_timer(&entry->timer); + timer_delete(&entry->timer); entry->vcc = vcc; entry->old_push = old_push; if (entry->status == ESI_VC_PENDING) { @@ -2172,7 +2172,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) &priv->lec_arp_empty_ones, next) { if (entry->vcc == vcc) { lec_arp_clear_vccs(entry); - del_timer(&entry->timer); + timer_delete(&entry->timer); hlist_del(&entry->next); lec_arp_put(entry); } @@ -2182,7 +2182,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) &priv->lec_no_forward, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); - del_timer(&entry->timer); + timer_delete(&entry->timer); hlist_del(&entry->next); lec_arp_put(entry); } @@ -2215,7 +2215,7 @@ lec_arp_check_empties(struct lec_priv *priv, hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (vcc == entry->vcc) { - del_timer(&entry->timer); + timer_delete(&entry->timer); ether_addr_copy(entry->mac_addr, src); entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 12da0269275c..f6b447bba329 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -804,7 +804,7 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg) /* This lets us now how our LECs are doing */ err = register_netdevice_notifier(&mpoa_notifier); if (err < 0) { - del_timer(&mpc_timer); + timer_delete(&mpc_timer); return err; } } @@ -1495,7 +1495,7 @@ static void __exit atm_mpoa_cleanup(void) mpc_proc_clean(); - del_timer_sync(&mpc_timer); + timer_delete_sync(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); deregister_atm_ioctl(&atm_ioctl_ops); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 3ee7dba34310..b790bb92ed1c 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1071,11 +1071,11 @@ static int ax25_release(struct socket *sock) } if (ax25_dev) { if (!ax25_dev->device_up) { - del_timer_sync(&ax25->timer); - del_timer_sync(&ax25->t1timer); - del_timer_sync(&ax25->t2timer); - del_timer_sync(&ax25->t3timer); - del_timer_sync(&ax25->idletimer); + timer_delete_sync(&ax25->timer); + timer_delete_sync(&ax25->t1timer); + timer_delete_sync(&ax25->t2timer); + timer_delete_sync(&ax25->t3timer); + timer_delete_sync(&ax25->idletimer); } netdev_put(ax25_dev->dev, &ax25->dev_tracker); ax25_dev_put(ax25_dev); diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index c4f8adbf8144..8d9fba069001 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -44,7 +44,7 @@ void ax25_ds_setup_timer(ax25_dev *ax25_dev) void ax25_ds_del_timer(ax25_dev *ax25_dev) { if (ax25_dev) - del_timer(&ax25_dev->dama.slave_timer); + timer_delete(&ax25_dev->dama.slave_timer); } void ax25_ds_set_timer(ax25_dev *ax25_dev) diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 9ff98f46dc6b..bff4b203a893 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -262,11 +262,11 @@ void ax25_disconnect(ax25_cb *ax25, int reason) ax25_clear_queues(ax25); if (reason == ENETUNREACH) { - del_timer_sync(&ax25->timer); - del_timer_sync(&ax25->t1timer); - del_timer_sync(&ax25->t2timer); - del_timer_sync(&ax25->t3timer); - del_timer_sync(&ax25->idletimer); + timer_delete_sync(&ax25->timer); + timer_delete_sync(&ax25->t1timer); + timer_delete_sync(&ax25->t2timer); + timer_delete_sync(&ax25->t3timer); + timer_delete_sync(&ax25->idletimer); } else { if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index 9f7cb0a7c73f..3891a3923d6c 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -65,7 +65,7 @@ void ax25_start_t3timer(ax25_cb *ax25) if (ax25->t3 > 0) mod_timer(&ax25->t3timer, jiffies + ax25->t3); else - del_timer(&ax25->t3timer); + timer_delete(&ax25->t3timer); } void ax25_start_idletimer(ax25_cb *ax25) @@ -73,32 +73,32 @@ void ax25_start_idletimer(ax25_cb *ax25) if (ax25->idle > 0) mod_timer(&ax25->idletimer, jiffies + ax25->idle); else - del_timer(&ax25->idletimer); + timer_delete(&ax25->idletimer); } void ax25_stop_heartbeat(ax25_cb *ax25) { - del_timer(&ax25->timer); + timer_delete(&ax25->timer); } void ax25_stop_t1timer(ax25_cb *ax25) { - del_timer(&ax25->t1timer); + timer_delete(&ax25->t1timer); } void ax25_stop_t2timer(ax25_cb *ax25) { - del_timer(&ax25->t2timer); + timer_delete(&ax25->t2timer); } void ax25_stop_t3timer(ax25_cb *ax25) { - del_timer(&ax25->t3timer); + timer_delete(&ax25->t3timer); } void ax25_stop_idletimer(ax25_cb *ax25) { - del_timer(&ax25->idletimer); + timer_delete(&ax25->idletimer); } int ax25_t1timer_running(ax25_cb *ax25) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 9fb14e40e156..adbadb436033 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -384,13 +384,13 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, atomic_dec(&tp_vars->bat_priv->tp_num); /* kill the timer and remove its reference */ - del_timer_sync(&tp_vars->timer); + timer_delete_sync(&tp_vars->timer); /* the worker might have rearmed itself therefore we kill it again. Note * that if the worker should run again before invoking the following - * del_timer(), it would not re-arm itself once again because the status + * timer_delete(), it would not re-arm itself once again because the status * is OFF now */ - del_timer(&tp_vars->timer); + timer_delete(&tp_vars->timer); batadv_tp_vars_put(tp_vars); } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 707f229f896a..fc5af8639b1e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -433,7 +433,7 @@ static void hidp_set_timer(struct hidp_session *session) static void hidp_del_timer(struct hidp_session *session) { if (session->idle_to > 0) - del_timer_sync(&session->timer); + timer_delete_sync(&session->timer); } static void hidp_process_report(struct hidp_session *session, int type, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ad5177e3a69b..20ea7dba0a9a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -254,7 +254,7 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - del_timer_sync(&s->timer); + timer_delete_sync(&s->timer); } /* ---- RFCOMM DLCs ---- */ @@ -281,7 +281,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); - if (del_timer(&d->timer)) + if (timer_delete(&d->timer)) rfcomm_dlc_put(d); } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 7e1ad229e133..722203b98ff7 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -732,7 +732,7 @@ static int br_mdb_replace_group_sg(const struct br_mdb_config *cfg, mod_timer(&pg->timer, now + brmctx->multicast_membership_interval); else - del_timer(&pg->timer); + timer_delete(&pg->timer); br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB); @@ -853,7 +853,7 @@ static int br_mdb_add_group_src(const struct br_mdb_config *cfg, cfg->entry->state == MDB_TEMPORARY) mod_timer(&ent->timer, now + br_multicast_gmi(brmctx)); else - del_timer(&ent->timer); + timer_delete(&ent->timer); /* Install a (S, G) forwarding entry for the source. */ err = br_mdb_add_group_src_fwd(cfg, &src->addr, brmctx, extack); @@ -953,7 +953,7 @@ static int br_mdb_replace_group_star_g(const struct br_mdb_config *cfg, mod_timer(&pg->timer, now + brmctx->multicast_membership_interval); else - del_timer(&pg->timer); + timer_delete(&pg->timer); br_mdb_notify(cfg->br->dev, mp, pg, RTM_NEWMDB); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b2ae0d2434d2..dcbf058de1e3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -546,7 +546,7 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) return; /* the kernel is now responsible for removing this S,G */ - del_timer(&sg->timer); + timer_delete(&sg->timer); star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr); if (!star_mp) return; @@ -2015,9 +2015,9 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&pmctx->ip6_mc_router_timer); + timer_delete_sync(&pmctx->ip6_mc_router_timer); #endif - del_timer_sync(&pmctx->ip4_mc_router_timer); + timer_delete_sync(&pmctx->ip4_mc_router_timer); } int br_multicast_add_port(struct net_bridge_port *port) @@ -2062,7 +2062,7 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) query->startup_sent = 0; if (try_to_del_timer_sync(&query->timer) >= 0 || - del_timer(&query->timer)) + timer_delete(&query->timer)) mod_timer(&query->timer, jiffies); } @@ -2127,12 +2127,12 @@ static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) br_multicast_find_del_pg(pmctx->port->br, pg); del |= br_ip4_multicast_rport_del(pmctx); - del_timer(&pmctx->ip4_mc_router_timer); - del_timer(&pmctx->ip4_own_query.timer); + timer_delete(&pmctx->ip4_mc_router_timer); + timer_delete(&pmctx->ip4_own_query.timer); del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&pmctx->ip6_mc_router_timer); - del_timer(&pmctx->ip6_own_query.timer); + timer_delete(&pmctx->ip6_mc_router_timer); + timer_delete(&pmctx->ip6_own_query.timer); #endif br_multicast_rport_del_notify(pmctx, del); } @@ -4199,15 +4199,15 @@ void br_multicast_open(struct net_bridge *br) static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { - del_timer_sync(&brmctx->ip4_mc_router_timer); - del_timer_sync(&brmctx->ip4_other_query.timer); - del_timer_sync(&brmctx->ip4_other_query.delay_timer); - del_timer_sync(&brmctx->ip4_own_query.timer); + timer_delete_sync(&brmctx->ip4_mc_router_timer); + timer_delete_sync(&brmctx->ip4_other_query.timer); + timer_delete_sync(&brmctx->ip4_other_query.delay_timer); + timer_delete_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&brmctx->ip6_mc_router_timer); - del_timer_sync(&brmctx->ip6_other_query.timer); - del_timer_sync(&brmctx->ip6_other_query.delay_timer); - del_timer_sync(&brmctx->ip6_own_query.timer); + timer_delete_sync(&brmctx->ip6_mc_router_timer); + timer_delete_sync(&brmctx->ip6_other_query.timer); + timer_delete_sync(&brmctx->ip6_other_query.delay_timer); + timer_delete_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4384,9 +4384,9 @@ int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val) case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM); - del_timer(&brmctx->ip4_mc_router_timer); + timer_delete(&brmctx->ip4_mc_router_timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&brmctx->ip6_mc_router_timer); + timer_delete(&brmctx->ip6_mc_router_timer); #endif brmctx->multicast_router = val; err = 0; @@ -4455,10 +4455,10 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, case MDB_RTR_TYPE_DISABLED: pmctx->multicast_router = MDB_RTR_TYPE_DISABLED; del |= br_ip4_multicast_rport_del(pmctx); - del_timer(&pmctx->ip4_mc_router_timer); + timer_delete(&pmctx->ip4_mc_router_timer); del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&pmctx->ip6_mc_router_timer); + timer_delete(&pmctx->ip6_mc_router_timer); #endif br_multicast_rport_del_notify(pmctx, del); break; @@ -4470,10 +4470,10 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, break; case MDB_RTR_TYPE_PERM: pmctx->multicast_router = MDB_RTR_TYPE_PERM; - del_timer(&pmctx->ip4_mc_router_timer); + timer_delete(&pmctx->ip4_mc_router_timer); br_ip4_multicast_add_router(brmctx, pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&pmctx->ip6_mc_router_timer); + timer_delete(&pmctx->ip6_mc_router_timer); #endif br_ip6_multicast_add_router(brmctx, pmctx); break; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 7d27b2e6038f..024210f95468 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -198,7 +198,7 @@ void br_become_root_bridge(struct net_bridge *br) br->hello_time = br->bridge_hello_time; br->forward_delay = br->bridge_forward_delay; br_topology_change_detection(br); - del_timer(&br->tcn_timer); + timer_delete(&br->tcn_timer); if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); @@ -363,7 +363,7 @@ static int br_supersedes_port_info(const struct net_bridge_port *p, static void br_topology_change_acknowledged(struct net_bridge *br) { br->topology_change_detected = 0; - del_timer(&br->tcn_timer); + timer_delete(&br->tcn_timer); } /* called under bridge lock */ @@ -439,7 +439,7 @@ static void br_make_blocking(struct net_bridge_port *p) br_set_state(p, BR_STATE_BLOCKING); br_ifinfo_notify(RTM_NEWLINK, NULL, p); - del_timer(&p->forward_delay_timer); + timer_delete(&p->forward_delay_timer); } } @@ -454,7 +454,7 @@ static void br_make_forwarding(struct net_bridge_port *p) if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { br_set_state(p, BR_STATE_FORWARDING); br_topology_change_detection(br); - del_timer(&p->forward_delay_timer); + timer_delete(&p->forward_delay_timer); } else if (br->stp_enabled == BR_KERNEL_STP) br_set_state(p, BR_STATE_LISTENING); else @@ -483,7 +483,7 @@ void br_port_state_selection(struct net_bridge *br) p->topology_change_ack = 0; br_make_forwarding(p); } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); + timer_delete(&p->message_age_timer); br_make_forwarding(p); } else { p->config_pending = 0; @@ -533,9 +533,9 @@ void br_received_config_bpdu(struct net_bridge_port *p, br_port_state_selection(br); if (!br_is_root_bridge(br) && was_root) { - del_timer(&br->hello_timer); + timer_delete(&br->hello_timer); if (br->topology_change_detected) { - del_timer(&br->topology_change_timer); + timer_delete(&br->topology_change_timer); br_transmit_tcn(br); mod_timer(&br->tcn_timer, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 75204d36d7f9..c20a41bf253b 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -81,9 +81,9 @@ void br_stp_disable_bridge(struct net_bridge *br) br->topology_change_detected = 0; spin_unlock_bh(&br->lock); - del_timer_sync(&br->hello_timer); - del_timer_sync(&br->topology_change_timer); - del_timer_sync(&br->tcn_timer); + timer_delete_sync(&br->hello_timer); + timer_delete_sync(&br->topology_change_timer); + timer_delete_sync(&br->tcn_timer); cancel_delayed_work_sync(&br->gc_work); } @@ -109,9 +109,9 @@ void br_stp_disable_port(struct net_bridge_port *p) br_ifinfo_notify(RTM_NEWLINK, NULL, p); - del_timer(&p->message_age_timer); - del_timer(&p->forward_delay_timer); - del_timer(&p->hold_timer); + timer_delete(&p->message_age_timer); + timer_delete(&p->forward_delay_timer); + timer_delete(&p->hold_timer); if (!rcu_access_pointer(p->backup_port)) br_fdb_delete_by_port(br, p, 0, 0); diff --git a/net/can/af_can.c b/net/can/af_can.c index 4c059e41c831..4aab7033c933 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -825,7 +825,7 @@ static void can_pernet_exit(struct net *net) if (IS_ENABLED(CONFIG_PROC_FS)) { can_remove_proc(net); if (stats_timer) - del_timer_sync(&net->can.stattimer); + timer_delete_sync(&net->can.stattimer); } kfree(net->can.rx_alldev_list); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 212f0a048cab..8a7ce640f74d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1088,7 +1088,7 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct sk_buff *skb; - del_timer_sync(&hw_data->send_timer); + timer_delete_sync(&hw_data->send_timer); cancel_work_sync(&hw_data->dm_alert_work); while ((skb = __skb_dequeue(&hw_data->drop_queue))) { struct devlink_trap_metadata *hw_metadata; @@ -1122,7 +1122,7 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); struct sk_buff *skb; - del_timer_sync(&hw_data->send_timer); + timer_delete_sync(&hw_data->send_timer); cancel_work_sync(&hw_data->dm_alert_work); while ((skb = __skb_dequeue(&hw_data->drop_queue))) { struct devlink_trap_metadata *hw_metadata; @@ -1183,7 +1183,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack) struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct sk_buff *skb; - del_timer_sync(&data->send_timer); + timer_delete_sync(&data->send_timer); cancel_work_sync(&data->dm_alert_work); while ((skb = __skb_dequeue(&data->drop_queue))) consume_skb(skb); @@ -1211,7 +1211,7 @@ static void net_dm_trace_off_set(void) struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); struct sk_buff *skb; - del_timer_sync(&data->send_timer); + timer_delete_sync(&data->send_timer); cancel_work_sync(&data->dm_alert_work); while ((skb = __skb_dequeue(&data->drop_queue))) consume_skb(skb); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 412816076b8b..2b821b9a8699 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -177,7 +177,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, spin_lock_bh(lock); old = rcu_dereference_protected(*rate_est, 1); if (old) { - del_timer_sync(&old->timer); + timer_delete_sync(&old->timer); est->avbps = old->avbps; est->avpps = old->avpps; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0738aa6cca25..a07249b59ae1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -309,7 +309,7 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when) static int neigh_del_timer(struct neighbour *n) { if ((n->nud_state & NUD_IN_TIMER) && - del_timer(&n->timer)) { + timer_delete(&n->timer)) { neigh_release(n); return 1; } @@ -427,7 +427,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) - del_timer_sync(&tbl->proxy_timer); + timer_delete_sync(&tbl->proxy_timer); return 0; } @@ -1597,7 +1597,7 @@ static void neigh_proxy_process(struct timer_list *t) } else if (!sched_next || tdif < sched_next) sched_next = tdif; } - del_timer(&tbl->proxy_timer); + timer_delete(&tbl->proxy_timer); if (sched_next) mod_timer(&tbl->proxy_timer, jiffies + sched_next); spin_unlock(&tbl->proxy_queue.lock); @@ -1628,7 +1628,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); - if (del_timer(&tbl->proxy_timer)) { + if (timer_delete(&tbl->proxy_timer)) { if (time_before(tbl->proxy_timer.expires, sched_next)) sched_next = tbl->proxy_timer.expires; } @@ -1786,7 +1786,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); - del_timer_sync(&tbl->proxy_timer); + timer_delete_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) diff --git a/net/core/sock.c b/net/core/sock.c index 323892066def..f67a3c5b0988 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3598,14 +3598,14 @@ EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) { - if (del_timer(timer)) + if (timer_delete(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer); void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) { - if (del_timer_sync(timer)) + if (timer_delete_sync(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer_sync); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2c394c364cb9..ca7d539b3846 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -205,7 +205,7 @@ static void ip_sf_list_clear_all(struct ip_sf_list *psf) static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); - if (del_timer(&im->timer)) + if (timer_delete(&im->timer)) refcount_dec(&im->refcnt); im->tm_running = 0; im->reporter = 0; @@ -251,7 +251,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) { spin_lock_bh(&im->lock); im->unsolicit_count = 0; - if (del_timer(&im->timer)) { + if (timer_delete(&im->timer)) { if ((long)(im->timer.expires-jiffies) < max_delay) { add_timer(&im->timer); im->tm_running = 1; @@ -974,7 +974,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, } /* cancel the interface change timer */ WRITE_ONCE(in_dev->mr_ifc_count, 0); - if (del_timer(&in_dev->mr_ifc_timer)) + if (timer_delete(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ igmpv3_clear_delrec(in_dev); @@ -1830,10 +1830,10 @@ void ip_mc_down(struct in_device *in_dev) #ifdef CONFIG_IP_MULTICAST WRITE_ONCE(in_dev->mr_ifc_count, 0); - if (del_timer(&in_dev->mr_ifc_timer)) + if (timer_delete(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; - if (del_timer(&in_dev->mr_gq_timer)) + if (timer_delete(&in_dev->mr_gq_timer)) __in_dev_put(in_dev); #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 19fae4811ab2..470ab17ceb51 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -133,7 +133,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) struct inet_frag_queue *fq = ptr; int count; - count = del_timer_sync(&fq->timer) ? 1 : 0; + count = timer_delete_sync(&fq->timer) ? 1 : 0; spin_lock_bh(&fq->lock); fq->flags |= INET_FRAG_DROP; @@ -227,7 +227,7 @@ EXPORT_SYMBOL(fqdir_exit); void inet_frag_kill(struct inet_frag_queue *fq, int *refs) { - if (del_timer(&fq->timer)) + if (timer_delete(&fq->timer)) (*refs)++; if (!(fq->flags & INET_FRAG_COMPLETE)) { @@ -297,7 +297,7 @@ void inet_frag_destroy(struct inet_frag_queue *q) reason = (q->flags & INET_FRAG_DROP) ? SKB_DROP_REASON_FRAG_REASM_TIMEOUT : SKB_CONSUMED; - WARN_ON(del_timer(&q->timer) != 0); + WARN_ON(timer_delete(&q->timer) != 0); /* Release all fragment data. */ fqdir = q->fqdir; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b81c8131e23f..a8b04d4abcaa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1289,7 +1289,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, } } if (list_empty(&mrt->mfc_unres_queue)) - del_timer(&mrt->ipmr_expire_timer); + timer_delete(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (found) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b908fccbc1..2cffb8f4a2bc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -313,7 +313,7 @@ static inline bool addrconf_link_ready(const struct net_device *dev) static void addrconf_del_rs_timer(struct inet6_dev *idev) { - if (del_timer(&idev->rs_timer)) + if (timer_delete(&idev->rs_timer)) __in6_dev_put(idev); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c134ba202c4c..bf727149fdec 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2383,7 +2383,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else - del_timer(&net->ipv6.ip6_fib_timer); + timer_delete(&net->ipv6.ip6_fib_timer); spin_unlock_bh(&net->ipv6.fib6_gc_lock); } @@ -2470,7 +2470,7 @@ static void fib6_net_exit(struct net *net) { unsigned int i; - del_timer_sync(&net->ipv6.ip6_fib_timer); + timer_delete_sync(&net->ipv6.ip6_fib_timer); for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { struct hlist_head *head = &net->ipv6.fib_table_hash[i]; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eca07e10e21f..a3ff575798dd 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -907,6 +907,6 @@ int ip6_flowlabel_init(void) void ip6_flowlabel_cleanup(void) { static_key_deferred_flush(&ipv6_flowlabel_exclusive); - del_timer(&ip6_fl_gc_timer); + timer_delete(&ip6_fl_gc_timer); unregister_pernet_subsys(&ip6_flowlabel_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e8ade93a0f0e..b413c9c8a21c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1526,7 +1526,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, } } if (list_empty(&mrt->mfc_unres_queue)) - del_timer(&mrt->ipmr_expire_timer); + timer_delete(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (found) { diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 0971ca48ba15..a0596e1f91da 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -194,8 +194,8 @@ int lapb_unregister(struct net_device *dev) spin_unlock_bh(&lapb->lock); /* Wait for running timers to stop */ - del_timer_sync(&lapb->t1timer); - del_timer_sync(&lapb->t2timer); + timer_delete_sync(&lapb->t1timer); + timer_delete_sync(&lapb->t2timer); __lapb_remove_cb(lapb); diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 5be68869064d..5b3f3b444d19 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -35,7 +35,7 @@ static void lapb_t2timer_expiry(struct timer_list *); void lapb_start_t1timer(struct lapb_cb *lapb) { - del_timer(&lapb->t1timer); + timer_delete(&lapb->t1timer); lapb->t1timer.function = lapb_t1timer_expiry; lapb->t1timer.expires = jiffies + lapb->t1; @@ -46,7 +46,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb) void lapb_start_t2timer(struct lapb_cb *lapb) { - del_timer(&lapb->t2timer); + timer_delete(&lapb->t2timer); lapb->t2timer.function = lapb_t2timer_expiry; lapb->t2timer.expires = jiffies + lapb->t2; @@ -58,13 +58,13 @@ void lapb_start_t2timer(struct lapb_cb *lapb) void lapb_stop_t1timer(struct lapb_cb *lapb) { lapb->t1timer_running = false; - del_timer(&lapb->t1timer); + timer_delete(&lapb->t1timer); } void lapb_stop_t2timer(struct lapb_cb *lapb) { lapb->t2timer_running = false; - del_timer(&lapb->t2timer); + timer_delete(&lapb->t2timer); } int lapb_t1timer_running(struct lapb_cb *lapb) diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 40ca3c1e42a2..7e8fc710c590 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -51,7 +51,7 @@ int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb) struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); llc->remote_busy_flag = 0; - del_timer(&llc->busy_state_timer.timer); + timer_delete(&llc->busy_state_timer.timer); nr = LLC_I_GET_NR(pdu); llc_conn_resend_i_pdu_as_cmd(sk, nr, 0); } @@ -191,7 +191,7 @@ int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock *sk, struct llc_sock *llc = llc_sk(sk); if (llc->data_flag == 2) - del_timer(&llc->rej_sent_timer.timer); + timer_delete(&llc->rej_sent_timer.timer); return 0; } @@ -1111,9 +1111,9 @@ int llc_conn_ac_stop_other_timers(struct sock *sk, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(sk); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->busy_state_timer.timer); + timer_delete(&llc->rej_sent_timer.timer); + timer_delete(&llc->pf_cycle_timer.timer); + timer_delete(&llc->busy_state_timer.timer); llc->ack_must_be_send = 0; llc->ack_pf = 0; return 0; @@ -1149,7 +1149,7 @@ int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk, int llc_conn_ac_stop_ack_timer(struct sock *sk, struct sk_buff *skb) { - del_timer(&llc_sk(sk)->ack_timer.timer); + timer_delete(&llc_sk(sk)->ack_timer.timer); return 0; } @@ -1157,14 +1157,14 @@ int llc_conn_ac_stop_p_timer(struct sock *sk, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(sk); - del_timer(&llc->pf_cycle_timer.timer); + timer_delete(&llc->pf_cycle_timer.timer); llc_conn_set_p_flag(sk, 0); return 0; } int llc_conn_ac_stop_rej_timer(struct sock *sk, struct sk_buff *skb) { - del_timer(&llc_sk(sk)->rej_sent_timer.timer); + timer_delete(&llc_sk(sk)->rej_sent_timer.timer); return 0; } @@ -1180,7 +1180,7 @@ int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb) /* On loopback we don't queue I frames in unack_pdu_q queue. */ if (acked > 0 || (llc->dev->flags & IFF_LOOPBACK)) { llc->retry_count = 0; - del_timer(&llc->ack_timer.timer); + timer_delete(&llc->ack_timer.timer); if (llc->failed_data_req) { /* already, we did not accept data from upper layer * (tx_window full or unacceptable state). Now, we diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index afc6974eafda..5c0ac243b248 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -949,15 +949,15 @@ void llc_sk_stop_all_timers(struct sock *sk, bool sync) struct llc_sock *llc = llc_sk(sk); if (sync) { - del_timer_sync(&llc->pf_cycle_timer.timer); - del_timer_sync(&llc->ack_timer.timer); - del_timer_sync(&llc->rej_sent_timer.timer); - del_timer_sync(&llc->busy_state_timer.timer); + timer_delete_sync(&llc->pf_cycle_timer.timer); + timer_delete_sync(&llc->ack_timer.timer); + timer_delete_sync(&llc->rej_sent_timer.timer); + timer_delete_sync(&llc->busy_state_timer.timer); } else { - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->ack_timer.timer); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->busy_state_timer.timer); + timer_delete(&llc->pf_cycle_timer.timer); + timer_delete(&llc->ack_timer.timer); + timer_delete(&llc->rej_sent_timer.timer); + timer_delete(&llc->busy_state_timer.timer); } llc->ack_must_be_send = 0; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index aeb99d102c6e..85612234742a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -103,13 +103,13 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (!tid_rx) return; - del_timer_sync(&tid_rx->session_timer); + timer_delete_sync(&tid_rx->session_timer); /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ spin_lock_bh(&tid_rx->reorder_lock); tid_rx->removed = true; spin_unlock_bh(&tid_rx->reorder_lock); - del_timer_sync(&tid_rx->reorder_timer); + timer_delete_sync(&tid_rx->reorder_timer); call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 63a5e48291ac..8dc8c3c96b96 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -362,8 +362,8 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); - del_timer_sync(&tid_tx->addba_resp_timer); - del_timer_sync(&tid_tx->session_timer); + timer_delete_sync(&tid_tx->addba_resp_timer); + timer_delete_sync(&tid_tx->session_timer); /* * After this packets are no longer handed right through @@ -1002,7 +1002,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, return; } - del_timer_sync(&tid_tx->addba_resp_timer); + timer_delete_sync(&tid_tx->addba_resp_timer); ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 05a945df3259..4246d168374f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1844,7 +1844,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) skb_queue_purge(&sdata->skb_queue); - del_timer_sync(&sdata->u.ibss.timer); + timer_delete_sync(&sdata->u.ibss.timer); return 0; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0423046028c..f0f4a250b10e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -526,7 +526,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do netif_addr_unlock_bh(sdata->dev); } - del_timer_sync(&local->dynamic_ps_timer); + timer_delete_sync(&local->dynamic_ps_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); WARN(ieee80211_vif_is_mld(&sdata->vif), diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 2dc732147e85..885fa6aa3fc1 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -342,7 +342,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) return; tpt_trig->running = false; - del_timer_sync(&tpt_trig->timer); + timer_delete_sync(&tpt_trig->timer); led_trigger_event(&local->tpt_led, LED_OFF); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 974081324aa4..7257f5610af5 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -706,7 +706,7 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) else { clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); /* stop running timer */ - del_timer_sync(&ifmsh->mesh_path_root_timer); + timer_delete_sync(&ifmsh->mesh_path_root_timer); } } @@ -1241,9 +1241,9 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); skb_queue_purge(&ifmsh->ps.bc_buf); - del_timer_sync(&sdata->u.mesh.housekeeping_timer); - del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); - del_timer_sync(&sdata->u.mesh.mesh_path_timer); + timer_delete_sync(&sdata->u.mesh.housekeeping_timer); + timer_delete_sync(&sdata->u.mesh.mesh_path_root_timer); + timer_delete_sync(&sdata->u.mesh.mesh_path_timer); /* clear any mesh work (for next join) we may have accrued */ ifmsh->wrkq_flags = 0; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5a0156e11c91..96e0a861886a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -417,7 +417,7 @@ u64 mesh_plink_deactivate(struct sta_info *sta) } spin_unlock_bh(&sta->mesh->plink_lock); if (!sdata->u.mesh.user_mpm) - del_timer_sync(&sta->mesh->plink_timer); + timer_delete_sync(&sta->mesh->plink_timer); mesh_path_flush_by_nexthop(sta); /* make sure no readers can access nexthop sta from here on */ @@ -666,7 +666,7 @@ void mesh_plink_timer(struct timer_list *t) /* * This STA is valid because sta_info_destroy() will - * del_timer_sync() this timer after having made sure + * timer_delete_sync() this timer after having made sure * it cannot be re-added (by deleting the plink.) */ sta = mesh->plink_sta; @@ -689,7 +689,7 @@ void mesh_plink_timer(struct timer_list *t) return; } - /* del_timer() and handler may race when entering these states */ + /* timer_delete() and handler may race when entering these states */ if (sta->mesh->plink_state == NL80211_PLINK_LISTEN || sta->mesh->plink_state == NL80211_PLINK_ESTAB) { mpl_dbg(sta->sdata, @@ -735,7 +735,7 @@ void mesh_plink_timer(struct timer_list *t) break; case NL80211_PLINK_HOLDING: /* holding timer */ - del_timer(&sta->mesh->plink_timer); + timer_delete(&sta->mesh->plink_timer); mesh_plink_fsm_restart(sta); break; default: @@ -848,7 +848,7 @@ static u64 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; u64 changed = 0; - del_timer(&sta->mesh->plink_timer); + timer_delete(&sta->mesh->plink_timer); sta->mesh->plink_state = NL80211_PLINK_ESTAB; changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); @@ -975,7 +975,7 @@ static u64 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, case NL80211_PLINK_HOLDING: switch (event) { case CLS_ACPT: - del_timer(&sta->mesh->plink_timer); + timer_delete(&sta->mesh->plink_timer); mesh_plink_fsm_restart(sta); break; case OPN_ACPT: diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c010bb3d24e3..5d1f2d6d09ad 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3194,7 +3194,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local) } else if (conf->flags & IEEE80211_CONF_PS) { conf->flags &= ~IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - del_timer_sync(&local->dynamic_ps_timer); + timer_delete_sync(&local->dynamic_ps_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); } @@ -4069,7 +4069,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL; - del_timer_sync(&local->dynamic_ps_timer); + timer_delete_sync(&local->dynamic_ps_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); /* Disable ARP filtering */ @@ -4097,9 +4097,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* disassociated - set to defaults now */ ieee80211_set_wmm_default(&sdata->deflink, false, false); - del_timer_sync(&sdata->u.mgd.conn_mon_timer); - del_timer_sync(&sdata->u.mgd.bcn_mon_timer); - del_timer_sync(&sdata->u.mgd.timer); + timer_delete_sync(&sdata->u.mgd.conn_mon_timer); + timer_delete_sync(&sdata->u.mgd.bcn_mon_timer); + timer_delete_sync(&sdata->u.mgd.timer); sdata->vif.bss_conf.dtim_period = 0; sdata->vif.bss_conf.beacon_rate = NULL; @@ -4589,7 +4589,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, * running is the timeout for the authentication response which * which is not relevant anymore. */ - del_timer_sync(&sdata->u.mgd.timer); + timer_delete_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, auth_data->ap_addr); /* other links are destroyed */ @@ -4628,7 +4628,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, * running is the timeout for the association response which * which is not relevant anymore. */ - del_timer_sync(&sdata->u.mgd.timer); + timer_delete_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->ap_addr); eth_zero_addr(sdata->deflink.u.mgd.bssid); @@ -9852,7 +9852,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) ifmgd->assoc_req_ies = NULL; ifmgd->assoc_req_ies_len = 0; spin_unlock_bh(&ifmgd->teardown_lock); - del_timer_sync(&ifmgd->timer); + timer_delete_sync(&ifmgd->timer); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 6218abc3e441..ece1e83c7b2f 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -230,7 +230,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) skb_queue_purge(&sdata->skb_queue); - del_timer_sync(&sdata->u.ocb.housekeeping_timer); + timer_delete_sync(&sdata->u.ocb.housekeeping_timer); /* If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again * but will not rearm the timer again because it checks diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 29fab7ae47b4..2b9abc27462e 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -30,9 +30,9 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) /* FIXME: what to do when local->pspolling is true? */ - del_timer_sync(&local->dynamic_ps_timer); - del_timer_sync(&ifmgd->bcn_mon_timer); - del_timer_sync(&ifmgd->conn_mon_timer); + timer_delete_sync(&local->dynamic_ps_timer); + timer_delete_sync(&ifmgd->bcn_mon_timer); + timer_delete_sync(&ifmgd->conn_mon_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7be52345f218..a9cc832240a5 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -69,14 +69,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) flush_workqueue(local->workqueue); /* Don't try to run timers while suspended. */ - del_timer_sync(&local->sta_cleanup); + timer_delete_sync(&local->sta_cleanup); /* * Note that this particular timer doesn't need to be * restarted at resume. */ wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); - del_timer_sync(&local->dynamic_ps_timer); + timer_delete_sync(&local->dynamic_ps_timer); local->wowlan = wowlan; if (local->wowlan) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7f89cd1b7d7..09beb65d6108 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1242,7 +1242,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, tid_agg_rx->reorder_time[j] + 1 + HT_RX_REORDER_BUF_TIMEOUT); } else { - del_timer(&tid_agg_rx->reorder_timer); + timer_delete(&tid_agg_rx->reorder_timer); } } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 30cdc783999d..248e1f63bf73 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1592,7 +1592,7 @@ int sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { - del_timer_sync(&local->sta_cleanup); + timer_delete_sync(&local->sta_cleanup); rhltable_destroy(&local->sta_hash); rhltable_destroy(&local->link_sta_hash); } diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index f6de136008f6..dd895617defd 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -663,7 +663,7 @@ static void mctp_sk_unhash(struct sock *sk) * keys), stop any pending expiry events. the timer cannot be re-queued * as the sk is no longer observable */ - del_timer_sync(&msk->key_expiry); + timer_delete_sync(&msk->key_expiry); } static void mctp_sk_destruct(struct sock *sk) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 18b19dbccbba..31747f974941 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -327,7 +327,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); - /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + /* no lock, because sk_stop_timer_sync() is calling timer_delete_sync() */ if (add_timer) sk_stop_timer_sync(sk, add_timer); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 7891a537bddd..b36947063783 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -189,7 +189,7 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc) nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); - del_timer_sync(&nc->monitor.timer); + timer_delete_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, @@ -396,7 +396,7 @@ void ncsi_free_request(struct ncsi_request *nr) if (nr->enabled) { nr->enabled = false; - del_timer_sync(&nr->timer); + timer_delete_sync(&nr->timer); } spin_lock_irqsave(&ndp->lock, flags); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index cb48a2b9cb9f..6ae042f702d2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -294,7 +294,7 @@ mtype_cancel_gc(struct ip_set *set) struct mtype *map = set->data; if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); + timer_delete_sync(&map->gc); } static const struct ip_set_type_variant mtype = { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 20a1727e2457..8699944c0baf 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -822,7 +822,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) /* Try to delete connection while not holding reference */ static void ip_vs_conn_del(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) { + if (timer_delete(&cp->timer)) { /* Drop cp->control chain too */ if (cp->control) cp->timeout = 0; @@ -833,7 +833,7 @@ static void ip_vs_conn_del(struct ip_vs_conn *cp) /* Try to delete connection while holding reference */ static void ip_vs_conn_del_put(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) { + if (timer_delete(&cp->timer)) { /* Drop cp->control chain too */ if (cp->control) cp->timeout = 0; @@ -860,7 +860,7 @@ static void ip_vs_conn_expire(struct timer_list *t) struct ip_vs_conn *ct = cp->control; /* delete the timer if it is activated by other users */ - del_timer(&cp->timer); + timer_delete(&cp->timer); /* does anybody control me? */ if (ct) { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0633276d96bf..7d5b7418f8c7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -848,7 +848,7 @@ static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { struct ip_vs_dest *dest, *nxt; - del_timer_sync(&ipvs->dest_trash_timer); + timer_delete_sync(&ipvs->dest_trash_timer); /* No need to use dest_trash_lock */ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { list_del(&dest->t_list); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 21fa550966f0..21d22fa22e4e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -118,7 +118,7 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { - if (del_timer(&exp->timeout)) { + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); return true; @@ -214,11 +214,11 @@ nf_ct_find_expectation(struct net *net, if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) { refcount_inc(&exp->use); return exp; - } else if (del_timer(&exp->timeout)) { + } else if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); return exp; } - /* Undo exp->master refcnt increase, if del_timer() failed */ + /* Undo exp->master refcnt increase, if timer_delete() failed */ nf_ct_put(exp->master); return NULL; @@ -520,7 +520,7 @@ void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, vo hlist_for_each_entry_safe(exp, next, &nf_ct_expect_hash[i], hnode) { - if (iter(exp, data) && del_timer(&exp->timeout)) { + if (iter(exp, data) && timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } @@ -550,7 +550,7 @@ void nf_ct_expect_iterate_net(struct net *net, if (!net_eq(nf_ct_exp_net(exp), net)) continue; - if (iter(exp, data) && del_timer(&exp->timeout)) { + if (iter(exp, data) && timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, portid, report); nf_ct_expect_put(exp); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index db23876a6016..2cc0fde23344 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3448,7 +3448,7 @@ static int ctnetlink_del_expect(struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_expect_lock); - if (del_timer(&exp->timeout)) { + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); nf_ct_expect_put(exp); @@ -3477,7 +3477,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, const struct nlattr * const cda[]) { if (cda[CTA_EXPECT_TIMEOUT]) { - if (!del_timer(&x->timeout)) + if (!timer_delete(&x->timeout)) return -ETIME; x->timeout.expires = jiffies + diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 134e05d31061..882962f3c84d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -381,7 +381,7 @@ static void __nfulnl_flush(struct nfulnl_instance *inst) { /* timer holds a reference */ - if (del_timer(&inst->timer)) + if (timer_delete(&inst->timer)) instance_put(inst); if (inst->skb) __nfulnl_send(inst); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 511819fbfa67..7a9d765b30c0 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -68,6 +68,6 @@ static void nr_loopback_timer(struct timer_list *unused) void nr_loopback_clear(void) { - del_timer_sync(&loopback_timer); + timer_delete_sync(&loopback_timer); skb_queue_purge(&loopback_queue); } diff --git a/net/nfc/core.c b/net/nfc/core.c index e58dc6405054..75ed8a9146ba 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -464,7 +464,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) } if (dev->ops->check_presence) - del_timer_sync(&dev->check_pres_timer); + timer_delete_sync(&dev->check_pres_timer); dev->ops->deactivate_target(dev, dev->active_target, mode); dev->active_target = NULL; @@ -509,7 +509,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, } if (dev->ops->check_presence) - del_timer_sync(&dev->check_pres_timer); + timer_delete_sync(&dev->check_pres_timer); rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, cb_context); @@ -1172,7 +1172,7 @@ void nfc_unregister_device(struct nfc_dev *dev) device_unlock(&dev->dev); if (dev->ops->check_presence) { - del_timer_sync(&dev->check_pres_timer); + timer_delete_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index ceb87db57cdb..aa493344d93e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -148,7 +148,7 @@ static void nfc_hci_msg_rx_work(struct work_struct *work) static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err, struct sk_buff *skb) { - del_timer_sync(&hdev->cmd_timer); + timer_delete_sync(&hdev->cmd_timer); if (hdev->cmd_pending_msg->cb) hdev->cmd_pending_msg->cb(hdev->cmd_pending_msg->cb_context, @@ -1046,7 +1046,7 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) mutex_unlock(&hdev->msg_tx_mutex); - del_timer_sync(&hdev->cmd_timer); + timer_delete_sync(&hdev->cmd_timer); cancel_work_sync(&hdev->msg_tx_work); cancel_work_sync(&hdev->msg_rx_work); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index e90f70385813..ce9c683a3ead 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -198,7 +198,7 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) if (skb_queue_empty(&shdlc->ack_pending_q)) { if (shdlc->t2_active) { - del_timer_sync(&shdlc->t2_timer); + timer_delete_sync(&shdlc->t2_timer); shdlc->t2_active = false; pr_debug("All sent frames acked. Stopped T2(retransmit)\n"); @@ -289,7 +289,7 @@ static void llc_shdlc_rcv_rej(struct llc_shdlc *shdlc, int y_nr) if (llc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) { if (shdlc->t2_active) { - del_timer_sync(&shdlc->t2_timer); + timer_delete_sync(&shdlc->t2_timer); shdlc->t2_active = false; pr_debug("Stopped T2(retransmit)\n"); } @@ -342,7 +342,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) { pr_debug("result=%d\n", r); - del_timer_sync(&shdlc->connect_timer); + timer_delete_sync(&shdlc->connect_timer); if (r == 0) { shdlc->ns = 0; @@ -526,7 +526,7 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) (shdlc->rnr == false)) { if (shdlc->t1_active) { - del_timer_sync(&shdlc->t1_timer); + timer_delete_sync(&shdlc->t1_timer); shdlc->t1_active = false; pr_debug("Stopped T1(send ack)\n"); } diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 18be13fb9b75..27e863f96ed1 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -160,14 +160,14 @@ static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) static void local_cleanup(struct nfc_llcp_local *local) { nfc_llcp_socket_release(local, false, ENXIO); - del_timer_sync(&local->link_timer); + timer_delete_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); local->rx_pending = NULL; - del_timer_sync(&local->sdreq_timer); + timer_delete_sync(&local->sdreq_timer); cancel_work_sync(&local->sdreq_timeout_work); nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); } @@ -1536,7 +1536,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb) { local->rx_pending = skb; - del_timer(&local->link_timer); + timer_delete(&local->link_timer); schedule_work(&local->rx_work); } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 1ec5955fe469..0171bf3c7016 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -565,8 +565,8 @@ static int nci_close_device(struct nci_dev *ndev) * there is a queued/running cmd_work */ flush_workqueue(ndev->cmd_wq); - del_timer_sync(&ndev->cmd_timer); - del_timer_sync(&ndev->data_timer); + timer_delete_sync(&ndev->cmd_timer); + timer_delete_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); return 0; } @@ -597,7 +597,7 @@ static int nci_close_device(struct nci_dev *ndev) /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); - del_timer_sync(&ndev->cmd_timer); + timer_delete_sync(&ndev->cmd_timer); /* Clear flags except NCI_UNREG */ ndev->flags &= BIT(NCI_UNREG); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 3d36ea5701f0..78f4131af3cf 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -42,7 +42,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, pr_debug("len %d, err %d\n", skb ? skb->len : 0, err); /* data exchange is complete, stop the data timer */ - del_timer_sync(&ndev->data_timer); + timer_delete_sync(&ndev->data_timer); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); if (cb) { diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index b911ab78bed9..9eeb862825c5 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -347,7 +347,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) __u16 rsp_opcode = nci_opcode(skb->data); /* we got a rsp, stop the cmd timer */ - del_timer(&ndev->cmd_timer); + timer_delete(&ndev->cmd_timer); pr_debug("NCI RX: MT=rsp, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", nci_pbf(skb->data), diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3e9ddf72cd03..d4dba06297c3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -581,7 +581,7 @@ static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb) static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { - del_timer_sync(&pkc->retire_blk_timer); + timer_delete_sync(&pkc->retire_blk_timer); } static void prb_shutdown_retire_blk_timer(struct packet_sock *po, diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 0f77ae8ef944..9f9629e6fdae 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -32,7 +32,7 @@ static void rose_transmit_restart_request(struct rose_neigh *neigh); void rose_start_ftimer(struct rose_neigh *neigh) { - del_timer(&neigh->ftimer); + timer_delete(&neigh->ftimer); neigh->ftimer.function = rose_ftimer_expiry; neigh->ftimer.expires = @@ -43,7 +43,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) static void rose_start_t0timer(struct rose_neigh *neigh) { - del_timer(&neigh->t0timer); + timer_delete(&neigh->t0timer); neigh->t0timer.function = rose_t0timer_expiry; neigh->t0timer.expires = @@ -54,12 +54,12 @@ static void rose_start_t0timer(struct rose_neigh *neigh) void rose_stop_ftimer(struct rose_neigh *neigh) { - del_timer(&neigh->ftimer); + timer_delete(&neigh->ftimer); } void rose_stop_t0timer(struct rose_neigh *neigh) { - del_timer(&neigh->t0timer); + timer_delete(&neigh->t0timer); } int rose_ftimer_running(struct rose_neigh *neigh) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 036d92c0ad79..b538e39b3df5 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -124,7 +124,7 @@ void __exit rose_loopback_clear(void) { struct sk_buff *skb; - del_timer(&loopback_timer); + timer_delete(&loopback_timer); while ((skb = skb_dequeue(&loopback_queue)) != NULL) { skb->sk = NULL; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index fee772b4637c..2dd6bd3a3011 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; - del_timer_sync(&rose_neigh->ftimer); - del_timer_sync(&rose_neigh->t0timer); + timer_delete_sync(&rose_neigh->ftimer); + timer_delete_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8e477f7f8850..fec59d9338b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -469,7 +469,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) out: if (__rxrpc_call_is_complete(call)) { - del_timer_sync(&call->timer); + timer_delete_sync(&call->timer); if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); if (call->security) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c4c8b46a68c6..fce58be65e7c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -688,7 +688,7 @@ static void rxrpc_destroy_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); - del_timer_sync(&call->timer); + timer_delete_sync(&call->timer); rxrpc_cleanup_tx_buffers(call); rxrpc_cleanup_rx_buffers(call); @@ -711,7 +711,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - del_timer(&call->timer); + timer_delete(&call->timer); if (rcu_read_lock_held()) /* Can't use the rxrpc workqueue as we need to cancel/flush diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index db0099197890..63bbcc567f59 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -818,7 +818,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) local->kill_all_client_conns = true; - del_timer_sync(&local->client_conn_reap_timer); + timer_delete_sync(&local->client_conn_reap_timer); while ((conn = list_first_entry_or_null(&local->idle_client_conns, struct rxrpc_connection, cache_link))) { diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 2f1fd1e2e7e4..8ac22dde8b39 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -314,9 +314,9 @@ static void rxrpc_clean_up_connection(struct work_struct *work) !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); - del_timer_sync(&conn->timer); + timer_delete_sync(&conn->timer); cancel_work_sync(&conn->processor); /* Processing may restart the timer */ - del_timer_sync(&conn->timer); + timer_delete_sync(&conn->timer); write_lock(&rxnet->conn_lock); list_del_init(&conn->proc_link); @@ -365,7 +365,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn, dead = __refcount_dec_and_test(&conn->ref, &r); trace_rxrpc_conn(debug_id, r - 1, why); if (dead) { - del_timer(&conn->timer); + timer_delete(&conn->timer); cancel_work(&conn->processor); if (in_softirq() || work_busy(&conn->processor) || @@ -470,7 +470,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) atomic_dec(&rxnet->nr_conns); - del_timer_sync(&rxnet->service_conn_reap_timer); + timer_delete_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index a4c135d0fbcc..9a9834145e81 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -105,10 +105,10 @@ static __net_exit void rxrpc_exit_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); rxnet->live = false; - del_timer_sync(&rxnet->peer_keepalive_timer); + timer_delete_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); /* Remove the timer again as the worker may have restarted it. */ - del_timer_sync(&rxnet->peer_keepalive_timer); + timer_delete_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 93c36afbf576..f3b8203d3e85 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -555,7 +555,7 @@ static void fq_pie_destroy(struct Qdisc *sch) tcf_block_put(q->block); q->p_params.tupdate = 0; - del_timer_sync(&q->adapt_timer); + timer_delete_sync(&q->adapt_timer); kvfree(q->flows); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 14ab2f4c190a..514b1b6ac681 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -567,7 +567,7 @@ EXPORT_SYMBOL_GPL(netdev_watchdog_up); static void netdev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); - if (del_timer(&dev->watchdog_timer)) + if (timer_delete(&dev->watchdog_timer)) netdev_put(dev, &dev->watchdog_dev_tracker); netif_tx_unlock_bh(dev); } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index bb1fa9aa530b..3771d000b30d 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -545,7 +545,7 @@ static void pie_destroy(struct Qdisc *sch) struct pie_sched_data *q = qdisc_priv(sch); q->params.tupdate = 0; - del_timer_sync(&q->adapt_timer); + timer_delete_sync(&q->adapt_timer); } static struct Qdisc_ops pie_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index ef8a2afed26b..1ba3e0bba54f 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -218,7 +218,7 @@ static void red_destroy(struct Qdisc *sch) tcf_qevent_destroy(&q->qe_mark, sch); tcf_qevent_destroy(&q->qe_early_drop, sch); - del_timer_sync(&q->adapt_timer); + timer_delete_sync(&q->adapt_timer); red_offload(sch, false); qdisc_put(q->qdisc); } @@ -297,7 +297,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, max_P); red_set_vars(&q->vars); - del_timer(&q->adapt_timer); + timer_delete(&q->adapt_timer); if (ctl->flags & TC_RED_ADAPTATIVE) mod_timer(&q->adapt_timer, jiffies + HZ/2); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 65d5b59da583..9ed197e96396 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -696,7 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, rtnl_kfree_skbs(to_free, tail); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); - del_timer(&q->perturb_timer); + timer_delete(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); get_random_bytes(&q->perturbation, sizeof(q->perturbation)); @@ -722,7 +722,7 @@ static void sfq_destroy(struct Qdisc *sch) tcf_block_put(q->block); WRITE_ONCE(q->perturb_period, 0); - del_timer_sync(&q->perturb_timer); + timer_delete_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); kfree(q->red_parms); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b0794f164cf..760152e751c7 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -362,7 +362,7 @@ void sctp_association_free(struct sctp_association *asoc) * on our state. */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { - if (del_timer(&asoc->timers[i])) + if (timer_delete(&asoc->timers[i])) sctp_association_put(asoc); } @@ -1521,7 +1521,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) /* Stop the SACK timer. */ timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (del_timer(timer)) + if (timer_delete(timer)) sctp_association_put(asoc); } } diff --git a/net/sctp/input.c b/net/sctp/input.c index a8a254a5008e..0c0d2757f6f8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -446,7 +446,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, pr_debug("%s: unrecognized next header type " "encountered!\n", __func__); - if (del_timer(&t->proto_unreach_timer)) + if (timer_delete(&t->proto_unreach_timer)) sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, diff --git a/net/sctp/output.c b/net/sctp/output.c index a63df055ac57..23e96305cad7 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -312,7 +312,7 @@ static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, SCTP_MIB_OUTCTRLCHUNKS); asoc->stats.octrlchunks++; asoc->peer.sack_needed = 0; - if (del_timer(timer)) + if (timer_delete(timer)) sctp_association_put(asoc); } } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 0dc6b8ab9963..f6b8c13dafa4 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1630,8 +1630,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * as the receiver acknowledged any data. */ if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING && - del_timer(&asoc->timers - [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD])) + timer_delete(&asoc->timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD])) sctp_association_put(asoc); /* Mark the destination transport address as @@ -1688,7 +1687,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * address. */ if (!transport->flight_size) { - if (del_timer(&transport->T3_rtx_timer)) + if (timer_delete(&transport->T3_rtx_timer)) sctp_transport_put(transport); } else if (restart_timer) { if (!mod_timer(&transport->T3_rtx_timer, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5407a3922101..8c3b80c4d40b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -695,7 +695,7 @@ static void sctp_free_addr_wq(struct net *net) struct sctp_sockaddr_entry *temp; spin_lock_bh(&net->sctp.addr_wq_lock); - del_timer(&net->sctp.addr_wq_timer); + timer_delete(&net->sctp.addr_wq_timer); list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { list_del(&addrw->list); kfree(addrw); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 23d6633966b1..3aa5da5e3bbd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -734,7 +734,7 @@ static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds, list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (del_timer(&t->hb_timer)) + if (timer_delete(&t->hb_timer)) sctp_transport_put(t); } } @@ -747,7 +747,7 @@ static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds, list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (del_timer(&t->T3_rtx_timer)) + if (timer_delete(&t->T3_rtx_timer)) sctp_transport_put(t); } } @@ -1557,7 +1557,7 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, case SCTP_CMD_TIMER_STOP: timer = &asoc->timers[cmd->obj.to]; - if (del_timer(timer)) + if (timer_delete(timer)) sctp_association_put(asoc); break; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index bfcff6d6a438..f205556c5b24 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -576,7 +576,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_transport *t; t = asoc->strreset_chunk->transport; - if (del_timer(&t->reconf_timer)) + if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); @@ -825,7 +825,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_transport *t; t = asoc->strreset_chunk->transport; - if (del_timer(&t->reconf_timer)) + if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); @@ -1076,7 +1076,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* remove everything for this reconf request */ if (!asoc->strreset_outstanding) { t = asoc->strreset_chunk->transport; - if (del_timer(&t->reconf_timer)) + if (timer_delete(&t->reconf_timer)) sctp_transport_put(t); sctp_chunk_put(asoc->strreset_chunk); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2abe45af98e7..59675f6d9e7d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -118,7 +118,7 @@ struct sctp_transport *sctp_transport_new(struct net *net, void sctp_transport_free(struct sctp_transport *transport) { /* Try to delete the heartbeat timer. */ - if (del_timer(&transport->hb_timer)) + if (timer_delete(&transport->hb_timer)) sctp_transport_put(transport); /* Delete the T3_rtx timer if it's active. @@ -126,17 +126,17 @@ void sctp_transport_free(struct sctp_transport *transport) * structure hang around in memory since we know * the transport is going away. */ - if (del_timer(&transport->T3_rtx_timer)) + if (timer_delete(&transport->T3_rtx_timer)) sctp_transport_put(transport); - if (del_timer(&transport->reconf_timer)) + if (timer_delete(&transport->reconf_timer)) sctp_transport_put(transport); - if (del_timer(&transport->probe_timer)) + if (timer_delete(&transport->probe_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ - if (del_timer(&transport->proto_unreach_timer)) + if (timer_delete(&transport->proto_unreach_timer)) sctp_transport_put(transport); sctp_transport_put(transport); @@ -829,7 +829,7 @@ void sctp_transport_reset(struct sctp_transport *t) void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ - if (del_timer(&t->T3_rtx_timer)) + if (timer_delete(&t->T3_rtx_timer)) sctp_transport_put(t); sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 09f245cda526..0eab15465511 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1167,7 +1167,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) spin_unlock(&xprt->queue_lock); /* Turn off autodisconnect */ - del_timer_sync(&xprt->timer); + timer_delete_sync(&xprt->timer); return 0; } @@ -2138,7 +2138,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) * can only run *before* del_time_sync(), never after. */ spin_lock(&xprt->transport_lock); - del_timer_sync(&xprt->timer); + timer_delete_sync(&xprt->timer); spin_unlock(&xprt->transport_lock); /* diff --git a/net/tipc/node.c b/net/tipc/node.c index 500320e5ca47..ccf5e427f43e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -638,7 +638,7 @@ static void tipc_node_delete(struct tipc_node *node) trace_tipc_node_delete(node, true, " "); tipc_node_delete_from_list(node); - del_timer_sync(&node->timer); + timer_delete_sync(&node->timer); tipc_node_put(node); } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 05d49ad81290..621addab2834 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -177,7 +177,7 @@ void tipc_sub_unsubscribe(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) - del_timer_sync(&sub->timer); + timer_delete_sync(&sub->timer); list_del(&sub->sub_list); tipc_sub_put(sub); } diff --git a/net/wireless/core.c b/net/wireless/core.c index 9e6b31903121..dcce326fdb8c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1722,7 +1722,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, trace_wiphy_delayed_work_queue(wiphy, &dwork->work, delay); if (!delay) { - del_timer(&dwork->timer); + timer_delete(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } @@ -1737,7 +1737,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, { lockdep_assert_held(&wiphy->mtx); - del_timer_sync(&dwork->timer); + timer_delete_sync(&dwork->timer); wiphy_work_cancel(wiphy, &dwork->work); } EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel); @@ -1747,7 +1747,7 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, { lockdep_assert_held(&wiphy->mtx); - del_timer_sync(&dwork->timer); + timer_delete_sync(&dwork->timer); wiphy_work_flush(wiphy, &dwork->work); } EXPORT_SYMBOL_GPL(wiphy_delayed_work_flush); diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 5460b9146dd8..37b190499405 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -55,7 +55,7 @@ static void x25_t20timer_expiry(struct timer_list *t) static inline void x25_stop_t20timer(struct x25_neigh *nb) { - del_timer(&nb->t20timer); + timer_delete(&nb->t20timer); } /* diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index 9376365cdcc9..e4c5ad5b070f 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -41,7 +41,7 @@ void x25_start_heartbeat(struct sock *sk) void x25_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + timer_delete(&sk->sk_timer); } void x25_start_t2timer(struct sock *sk) @@ -74,7 +74,7 @@ void x25_start_t23timer(struct sock *sk) void x25_stop_timer(struct sock *sk) { - del_timer(&x25_sk(sk)->timer); + timer_delete(&x25_sk(sk)->timer); } unsigned long x25_display_timer(struct sock *sk) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 30970d40a454..143ac3aa7537 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -462,7 +462,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) { BUG_ON(!policy->walk.dead); - if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) + if (timer_delete(&policy->timer) || timer_delete(&policy->polq.hold_timer)) BUG(); xfrm_dev_policy_free(policy); @@ -487,11 +487,11 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); - if (del_timer(&policy->polq.hold_timer)) + if (timer_delete(&policy->polq.hold_timer)) xfrm_pol_put(policy); skb_queue_purge(&policy->polq.hold_queue); - if (del_timer(&policy->timer)) + if (timer_delete(&policy->timer)) xfrm_pol_put(policy); /* XXX: Flush state cache */ @@ -1469,7 +1469,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); - if (del_timer(&pq->hold_timer)) + if (timer_delete(&pq->hold_timer)) xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); @@ -3004,7 +3004,7 @@ static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *s sched_next = jiffies + pq->timeout; - if (del_timer(&pq->hold_timer)) { + if (timer_delete(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d896c3fefb07..341d79ecb5c2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -598,7 +598,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) if (x->mode_cbs && x->mode_cbs->destroy_state) x->mode_cbs->destroy_state(x); hrtimer_cancel(&x->mtimer); - del_timer_sync(&x->rtimer); + timer_delete_sync(&x->rtimer); kfree(x->aead); kfree(x->aalg); kfree(x->ealg); diff --git a/samples/connector/cn_test.c b/samples/connector/cn_test.c index 0958a171d048..73d50b4aebb6 100644 --- a/samples/connector/cn_test.c +++ b/samples/connector/cn_test.c @@ -172,7 +172,7 @@ static int cn_test_init(void) static void cn_test_fini(void) { - del_timer_sync(&cn_test_timer); + timer_delete_sync(&cn_test_timer); cn_del_callback(&cn_test_id); cn_test_id.val--; cn_del_callback(&cn_test_id); diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index d0ee9001c7b3..dac67c367457 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -82,7 +82,7 @@ static int simple_thread(void *arg) while (!kthread_should_stop()) simple_thread_func(count++); - del_timer(&mytimer); + timer_delete(&mytimer); cancel_work_sync(&trace_work); /* diff --git a/sound/core/timer.c b/sound/core/timer.c index d774b9b71ce2..1de4b90fd4d1 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1152,7 +1152,7 @@ static int snd_timer_s_stop(struct snd_timer * timer) unsigned long jiff; priv = (struct snd_timer_system_private *) timer->private_data; - del_timer(&priv->tlist); + timer_delete(&priv->tlist); jiff = jiffies; if (time_before(jiff, priv->last_expires)) timer->sticks = priv->last_expires - jiff; @@ -1167,7 +1167,7 @@ static int snd_timer_s_close(struct snd_timer *timer) struct snd_timer_system_private *priv; priv = (struct snd_timer_system_private *)timer->private_data; - del_timer_sync(&priv->tlist); + timer_delete_sync(&priv->tlist); return 0; } diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index eb8a68a06c4d..4b02ec127cc0 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -261,7 +261,7 @@ static int loopback_snd_timer_start(struct loopback_pcm *dpcm) /* call in cable->lock */ static inline int loopback_jiffies_timer_stop(struct loopback_pcm *dpcm) { - del_timer(&dpcm->timer); + timer_delete(&dpcm->timer); dpcm->timer.expires = 0; return 0; @@ -292,7 +292,7 @@ static int loopback_snd_timer_stop(struct loopback_pcm *dpcm) static inline int loopback_jiffies_timer_stop_sync(struct loopback_pcm *dpcm) { - del_timer_sync(&dpcm->timer); + timer_delete_sync(&dpcm->timer); return 0; } diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index c1a3efb633c5..1d923cbe8cd0 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -279,7 +279,7 @@ static int dummy_systimer_stop(struct snd_pcm_substream *substream) { struct dummy_systimer_pcm *dpcm = substream->runtime->private_data; spin_lock(&dpcm->lock); - del_timer(&dpcm->timer); + timer_delete(&dpcm->timer); spin_unlock(&dpcm->lock); return 0; } diff --git a/sound/drivers/mpu401/mpu401_uart.c b/sound/drivers/mpu401/mpu401_uart.c index 8e3318e17717..a63e7558ac07 100644 --- a/sound/drivers/mpu401/mpu401_uart.c +++ b/sound/drivers/mpu401/mpu401_uart.c @@ -197,7 +197,7 @@ static void snd_mpu401_uart_remove_timer (struct snd_mpu401 *mpu, int input) mpu->timer_invoked &= input ? ~MPU401_MODE_INPUT_TIMER : ~MPU401_MODE_OUTPUT_TIMER; if (! mpu->timer_invoked) - del_timer(&mpu->timer); + timer_delete(&mpu->timer); } spin_unlock_irqrestore (&mpu->timer_lock, flags); } diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index 946184a2a758..dffcdf9e10d4 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -412,7 +412,7 @@ static void snd_mtpav_add_output_timer(struct mtpav *chip) /* spinlock held! */ static void snd_mtpav_remove_output_timer(struct mtpav *chip) { - del_timer(&chip->timer); + timer_delete(&chip->timer); } /* diff --git a/sound/drivers/opl3/opl3_seq.c b/sound/drivers/opl3/opl3_seq.c index 75de1299c3dc..9fc78b7fb780 100644 --- a/sound/drivers/opl3/opl3_seq.c +++ b/sound/drivers/opl3/opl3_seq.c @@ -74,7 +74,7 @@ void snd_opl3_synth_cleanup(struct snd_opl3 * opl3) /* Stop system timer */ spin_lock_irqsave(&opl3->sys_timer_lock, flags); if (opl3->sys_timer_status) { - del_timer(&opl3->tlist); + timer_delete(&opl3->tlist); opl3->sys_timer_status = 0; } spin_unlock_irqrestore(&opl3->sys_timer_lock, flags); diff --git a/sound/drivers/serial-u16550.c b/sound/drivers/serial-u16550.c index f66e01624c68..1857a78f55d7 100644 --- a/sound/drivers/serial-u16550.c +++ b/sound/drivers/serial-u16550.c @@ -166,7 +166,7 @@ static inline void snd_uart16550_add_timer(struct snd_uart16550 *uart) static inline void snd_uart16550_del_timer(struct snd_uart16550 *uart) { if (uart->timer_running) { - del_timer(&uart->buffer_timer); + timer_delete(&uart->buffer_timer); uart->timer_running = 0; } } diff --git a/sound/i2c/other/ak4117.c b/sound/i2c/other/ak4117.c index 165fdda8fda5..657b331d7a79 100644 --- a/sound/i2c/other/ak4117.c +++ b/sound/i2c/other/ak4117.c @@ -99,7 +99,7 @@ void snd_ak4117_reinit(struct ak4117 *chip) { unsigned char old = chip->regmap[AK4117_REG_PWRDN], reg; - del_timer(&chip->timer); + timer_delete(&chip->timer); chip->init = 1; /* bring the chip to reset state and powerdown state */ reg_write(chip, AK4117_REG_PWRDN, 0); diff --git a/sound/isa/sb/emu8000_pcm.c b/sound/isa/sb/emu8000_pcm.c index 9234d4fe8ada..016235209928 100644 --- a/sound/isa/sb/emu8000_pcm.c +++ b/sound/isa/sb/emu8000_pcm.c @@ -364,7 +364,7 @@ static void stop_voice(struct snd_emu8k_pcm *rec, int ch) /* stop timer */ spin_lock_irqsave(&rec->timer_lock, flags); if (rec->timer_running) { - del_timer(&rec->timer); + timer_delete(&rec->timer); rec->timer_running = 0; } spin_unlock_irqrestore(&rec->timer_lock, flags); diff --git a/sound/isa/sb/sb8_midi.c b/sound/isa/sb/sb8_midi.c index 618366d5d984..d2908fc280f8 100644 --- a/sound/isa/sb/sb8_midi.c +++ b/sound/isa/sb/sb8_midi.c @@ -125,7 +125,7 @@ static int snd_sb8dsp_midi_output_close(struct snd_rawmidi_substream *substream) struct snd_sb *chip; chip = substream->rmidi->private_data; - del_timer_sync(&chip->midi_timer); + timer_delete_sync(&chip->midi_timer); spin_lock_irqsave(&chip->open_lock, flags); chip->open &= ~(SB_OPEN_MIDI_OUTPUT | SB_OPEN_MIDI_OUTPUT_TRIGGER); chip->midi_substream_output = NULL; @@ -174,7 +174,7 @@ static void snd_sb8dsp_midi_output_write(struct snd_rawmidi_substream *substream spin_lock_irqsave(&chip->open_lock, flags); if (snd_rawmidi_transmit_peek(substream, &byte, 1) != 1) { chip->open &= ~SB_OPEN_MIDI_OUTPUT_TRIGGER; - del_timer(&chip->midi_timer); + timer_delete(&chip->midi_timer); spin_unlock_irqrestore(&chip->open_lock, flags); break; } diff --git a/sound/isa/wavefront/wavefront_midi.c b/sound/isa/wavefront/wavefront_midi.c index ead8cbe638de..fcc2a0d67792 100644 --- a/sound/isa/wavefront/wavefront_midi.c +++ b/sound/isa/wavefront/wavefront_midi.c @@ -157,7 +157,7 @@ static void snd_wavefront_midi_output_write(snd_wavefront_card_t *card) } else { if (midi->istimer) { if (--midi->istimer <= 0) - del_timer(&midi->timer); + timer_delete(&midi->timer); } midi->mode[midi->output_mpu] &= ~MPU401_MODE_OUTPUT_TRIGGER; spin_unlock_irqrestore (&midi->virtual, flags); @@ -212,7 +212,7 @@ static void snd_wavefront_midi_output_write(snd_wavefront_card_t *card) __timer: if (midi->istimer) { if (--midi->istimer <= 0) - del_timer(&midi->timer); + timer_delete(&midi->timer); } midi->mode[mpu] &= ~MPU401_MODE_OUTPUT_TRIGGER; spin_unlock_irqrestore (&midi->virtual, flags); diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index 5a84591b13fc..65100f925b72 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -518,7 +518,7 @@ static void snd_card_asihpi_pcm_timer_stop(struct snd_pcm_substream *substream) struct snd_card_asihpi_pcm *dpcm = runtime->private_data; dpcm->respawn_timer = 0; - del_timer(&dpcm->timer); + timer_delete(&dpcm->timer); } static void snd_card_asihpi_pcm_int_start(struct snd_pcm_substream *substream) diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c index 0bb447ccd77c..89e47fa14f70 100644 --- a/sound/pci/ctxfi/cttimer.c +++ b/sound/pci/ctxfi/cttimer.c @@ -112,7 +112,7 @@ static void ct_systimer_stop(struct ct_timer_instance *ti) spin_lock_irqsave(&ti->lock, flags); ti->running = 0; - del_timer(&ti->timer); + timer_delete(&ti->timer); spin_unlock_irqrestore(&ti->lock, flags); } diff --git a/sound/pci/echoaudio/midi.c b/sound/pci/echoaudio/midi.c index 47b2c023ee3d..2ef59184249c 100644 --- a/sound/pci/echoaudio/midi.c +++ b/sound/pci/echoaudio/midi.c @@ -264,7 +264,7 @@ static void snd_echo_midi_output_trigger(struct snd_rawmidi_substream *substream if (chip->tinuse) { chip->tinuse = 0; spin_unlock_irq(&chip->lock); - del_timer_sync(&chip->timer); + timer_delete_sync(&chip->timer); dev_dbg(chip->card->dev, "Timer removed\n"); return; } diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index fd3dfbad397a..dc326face54a 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -1427,7 +1427,7 @@ static void snd_hdsp_midi_output_trigger(struct snd_rawmidi_substream *substream } } else { if (hmidi->istimer && --hmidi->istimer <= 0) - del_timer (&hmidi->timer); + timer_delete(&hmidi->timer); } spin_unlock_irqrestore (&hmidi->lock, flags); if (up) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index f89718b19d23..1935de046f00 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1978,7 +1978,7 @@ snd_hdspm_midi_output_trigger(struct snd_rawmidi_substream *substream, int up) } } else { if (hmidi->istimer && --hmidi->istimer <= 0) - del_timer (&hmidi->timer); + timer_delete(&hmidi->timer); } spin_unlock_irqrestore (&hmidi->lock, flags); if (up) diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 39bf51ff43a1..5a93f4587356 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -354,7 +354,7 @@ static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - del_timer_sync(&dreamcastcard->timer); + timer_delete_sync(&dreamcastcard->timer); cancel_work_sync(&dreamcastcard->spu_dma_work); return 0; } diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 343e3bcef0ca..dba78efadc85 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -4286,7 +4286,7 @@ static void rt5645_i2c_remove(struct i2c_client *i2c) * Since the rt5645_btn_check_callback() can queue jack_detect_work, * the timer need to be delted first */ - del_timer_sync(&rt5645->btn_check_timer); + timer_delete_sync(&rt5645->btn_check_timer); cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); @@ -4318,7 +4318,7 @@ static int rt5645_sys_suspend(struct device *dev) { struct rt5645_priv *rt5645 = dev_get_drvdata(dev); - del_timer_sync(&rt5645->btn_check_timer); + timer_delete_sync(&rt5645->btn_check_timer); cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); diff --git a/sound/soc/fsl/imx-pcm-rpmsg.c b/sound/soc/fsl/imx-pcm-rpmsg.c index 1daf0be3d100..de5f87600fac 100644 --- a/sound/soc/fsl/imx-pcm-rpmsg.c +++ b/sound/soc/fsl/imx-pcm-rpmsg.c @@ -301,7 +301,7 @@ static int imx_rpmsg_pcm_close(struct snd_soc_component *component, info->send_message(msg, info); - del_timer(&info->stream_timer[substream->stream].timer); + timer_delete(&info->stream_timer[substream->stream].timer); rtd->dai_link->ignore_suspend = 0; @@ -452,7 +452,7 @@ static int imx_rpmsg_terminate_all(struct snd_soc_component *component, info->msg[RX_POINTER].r_msg.param.buffer_offset = 0; } - del_timer(&info->stream_timer[substream->stream].timer); + timer_delete(&info->stream_timer[substream->stream].timer); return imx_rpmsg_insert_workqueue(substream, msg, info); } diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c index 8a4423646aea..9b8cb80ec81a 100644 --- a/sound/soc/ti/ams-delta.c +++ b/sound/soc/ti/ams-delta.c @@ -303,7 +303,7 @@ static void cx81801_close(struct tty_struct *tty) struct snd_soc_component *component = tty->disc_data; struct snd_soc_dapm_context *dapm; - del_timer_sync(&cx81801_timer); + timer_delete_sync(&cx81801_timer); /* Prevent the hook switch from further changing the DAPM pins */ INIT_LIST_HEAD(&ams_delta_hook_switch.pins); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 826ac870f246..dcdd7e9e1ae9 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1553,7 +1553,7 @@ void snd_usbmidi_disconnect(struct list_head *p) spin_unlock_irq(&umidi->disc_lock); up_write(&umidi->disc_rwsem); - del_timer_sync(&umidi->error_timer); + timer_delete_sync(&umidi->error_timer); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; From 48ad7bbfd53af0d3fe6490a4dd30c169db6f12aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 Apr 2025 19:31:15 +0200 Subject: [PATCH 2133/2157] treewide: Convert new and leftover hrtimer_init() users hrtimer_setup() takes the callback function pointer as argument and initializes the timer completely. Replace hrtimer_init() and the open coded initialization of hrtimer::function with the new setup mechanism. Coccinelle scripted cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- drivers/input/joystick/walkera0701.c | 3 +-- drivers/input/keyboard/gpio_keys.c | 10 ++++------ drivers/net/netdevsim/netdev.c | 4 ++-- drivers/pps/generators/pps_gen_tio.c | 4 ++-- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c index 59eea813f258..15370fb82317 100644 --- a/drivers/input/joystick/walkera0701.c +++ b/drivers/input/joystick/walkera0701.c @@ -232,8 +232,7 @@ static void walkera0701_attach(struct parport *pp) goto err_unregister_device; } - hrtimer_init(&w->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - w->timer.function = timer_handler; + hrtimer_setup(&w->timer, timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL); w->input_dev = input_allocate_device(); if (!w->input_dev) { diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 5eef66516e37..5c39a217b94c 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -590,9 +590,8 @@ static int gpio_keys_setup_key(struct platform_device *pdev, INIT_DELAYED_WORK(&bdata->work, gpio_keys_gpio_work_func); - hrtimer_init(&bdata->debounce_timer, - CLOCK_REALTIME, HRTIMER_MODE_REL); - bdata->debounce_timer.function = gpio_keys_debounce_timer; + hrtimer_setup(&bdata->debounce_timer, gpio_keys_debounce_timer, + CLOCK_REALTIME, HRTIMER_MODE_REL); isr = gpio_keys_gpio_isr; irqflags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; @@ -628,9 +627,8 @@ static int gpio_keys_setup_key(struct platform_device *pdev, } bdata->release_delay = button->debounce_interval; - hrtimer_init(&bdata->release_timer, - CLOCK_REALTIME, HRTIMER_MODE_REL_HARD); - bdata->release_timer.function = gpio_keys_irq_timer; + hrtimer_setup(&bdata->release_timer, gpio_keys_irq_timer, + CLOCK_REALTIME, HRTIMER_MODE_REL_HARD); isr = gpio_keys_irq_isr; irqflags = 0; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index ddda0c1e7a6d..0e0321a7ddd7 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -441,8 +441,8 @@ static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer) static void nsim_rq_timer_init(struct nsim_rq *rq) { - hrtimer_init(&rq->napi_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rq->napi_timer.function = nsim_napi_schedule; + hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } static void nsim_enable_napi(struct netdevsim *ns) diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c index 6c46b46c66cd..1d5ffe055463 100644 --- a/drivers/pps/generators/pps_gen_tio.c +++ b/drivers/pps/generators/pps_gen_tio.c @@ -227,8 +227,8 @@ static int pps_gen_tio_probe(struct platform_device *pdev) return PTR_ERR(tio->base); pps_tio_disable(tio); - hrtimer_init(&tio->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tio->timer.function = hrtimer_callback; + hrtimer_setup(&tio->timer, hrtimer_callback, CLOCK_REALTIME, + HRTIMER_MODE_ABS); spin_lock_init(&tio->lock); platform_set_drvdata(pdev, &tio); From 9779489a31d77a7b9cb6f20d2d2caced4e29dbe6 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:10 +0100 Subject: [PATCH 2134/2157] hrtimers: Delete hrtimer_init() hrtimer_init() is now unused. Delete it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/003722f60c7a2a4f8d4ed24fb741aa313b7e5136.1738746927.git.namcao@linutronix.de --- include/linux/hrtimer.h | 2 -- include/linux/hrtimer_types.h | 2 +- kernel/time/hrtimer.c | 20 -------------------- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 88e078871158..1adcba3ddd76 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -231,8 +231,6 @@ static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) /* Exported timer functions: */ /* Initialize timers: */ -extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_setup_on_stack(struct hrtimer *timer, diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index ad66a3081735..7c5b27daa89d 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -34,7 +34,7 @@ enum hrtimer_restart { * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * - * The hrtimer structure must be initialized by hrtimer_init() + * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { struct timerqueue_node node; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 0cf8d39d650c..b7555ba7033a 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1640,26 +1640,6 @@ static void __hrtimer_setup(struct hrtimer *timer, timer->function = function; } -/** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: The modes which are relevant for initialization: - * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, - * HRTIMER_MODE_REL_SOFT - * - * The PINNED variants of the above can be handed in, - * but the PINNED bit is ignored as pinning happens - * when the hrtimer is started - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) -{ - debug_init(timer, clock_id, mode); - __hrtimer_init(timer, clock_id, mode); -} -EXPORT_SYMBOL_GPL(hrtimer_init); - /** * hrtimer_setup - initialize a timer to the given clock * @timer: the timer to be initialized From 50177a8b2ec756a03f635444538da928dc5ac488 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:11 +0100 Subject: [PATCH 2135/2157] hrtimers: Switch to use __htimer_setup() __hrtimer_init_sleeper() calls __hrtimer_init() and also sets up the callback function. But there is already __hrtimer_setup() which does both actions. Switch to use __hrtimer_setup() to simplify the code. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/d9a45a51b6a8aa0045310d63f73753bf6b33f385.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index b7555ba7033a..2d2835cf2659 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2061,8 +2061,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, mode |= HRTIMER_MODE_HARD; } - __hrtimer_init(&sl->timer, clock_id, mode); - sl->timer.function = hrtimer_wakeup; + __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode); sl->task = current; } From 87d82cff3829733fa6838492a9215303ad98a61c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:12 +0100 Subject: [PATCH 2136/2157] hrtimers: Merge __hrtimer_init() into __hrtimer_setup() __hrtimer_init() is only called by __hrtimer_setup(). Simplify by merging __hrtimer_init() into __hrtimer_setup(). Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/8a0a847a35f711f66b2d05b57255aa44e7e61279.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 2d2835cf2659..163cde35f0c3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1592,8 +1592,9 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) } } -static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_setup(struct hrtimer *timer, + enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode) { bool softtimer = !!(mode & HRTIMER_MODE_SOFT); struct hrtimer_cpu_base *cpu_base; @@ -1626,13 +1627,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, timer->is_hard = !!(mode & HRTIMER_MODE_HARD); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); -} - -static void __hrtimer_setup(struct hrtimer *timer, - enum hrtimer_restart (*function)(struct hrtimer *), - clockid_t clock_id, enum hrtimer_mode mode) -{ - __hrtimer_init(timer, clock_id, mode); if (WARN_ON_ONCE(!function)) timer->function = hrtimer_dummy_timeout; From 04257da0c99c9d4ff7c5bb93046482e1f7d34938 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:16 +0100 Subject: [PATCH 2137/2157] hrtimers: Make callback function pointer private Make the struct hrtimer::function field private, to prevent users from changing this field in an unsafe way. hrtimer_update_function() should be used if the callback function needs to be changed. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/7d0e6e0c5c59a64a9bea940051aac05d750bc0c2.1738746927.git.namcao@linutronix.de --- include/linux/hrtimer_types.h | 2 +- include/trace/events/timer.h | 4 ++-- kernel/time/hrtimer.c | 8 ++++---- kernel/time/timer_list.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index 7c5b27daa89d..8fbbb6bdf7a1 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -39,7 +39,7 @@ enum hrtimer_restart { struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); + enum hrtimer_restart (*__private function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1ef58a04fc57..f8c906be4cd0 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -235,7 +235,7 @@ TRACE_EVENT(hrtimer_start, TP_fast_assign( __entry->hrtimer = hrtimer; - __entry->function = hrtimer->function; + __entry->function = ACCESS_PRIVATE(hrtimer, function); __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); __entry->mode = mode; @@ -271,7 +271,7 @@ TRACE_EVENT(hrtimer_expire_entry, TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = *now; - __entry->function = hrtimer->function; + __entry->function = ACCESS_PRIVATE(hrtimer, function); ), TP_printk("hrtimer=%p function=%ps now=%llu", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 163cde35f0c3..88ea4bbea9c1 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1316,7 +1316,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; - if (WARN_ON_ONCE(!timer->function)) + if (WARN_ON_ONCE(!ACCESS_PRIVATE(timer, function))) return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft @@ -1629,9 +1629,9 @@ static void __hrtimer_setup(struct hrtimer *timer, timerqueue_init(&timer->node); if (WARN_ON_ONCE(!function)) - timer->function = hrtimer_dummy_timeout; + ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout; else - timer->function = function; + ACCESS_PRIVATE(timer, function) = function; } /** @@ -1743,7 +1743,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - fn = timer->function; + fn = ACCESS_PRIVATE(timer, function); /* * Clear the 'is relative' flag for the TIME_LOW_RES case. If the diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index cfbb46cc4e76..b03d0ada6469 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -46,7 +46,7 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { - SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, timer->function); + SEQ_printf(m, " #%d: <%p>, %ps", idx, taddr, ACCESS_PRIVATE(timer, function)); SEQ_printf(m, ", S:%02x", timer->state); SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", From 1cc24f2e766c5a6606b834a677bd58991c1b9781 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:17 +0100 Subject: [PATCH 2138/2157] hrtimers: Remove unnecessary NULL check in hrtimer_start_range_ns() The struct hrtimer::function field can only be changed using hrtimer_setup*() or hrtimer_update_function(), and both already null-check 'function'. Therefore, null-checking 'function' in hrtimer_start_range_ns() is not necessary. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/4661c571ee87980c340ccc318fc1a473c0c8f6bc.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 88ea4bbea9c1..e883f65cd175 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1316,8 +1316,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; - if (WARN_ON_ONCE(!ACCESS_PRIVATE(timer, function))) - return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard From fcea1ccf2476ca793b0ca3f80ca23f5a28cbb0b3 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:18 +0100 Subject: [PATCH 2139/2157] hrtimers: Rename __hrtimer_init_sleeper() to __hrtimer_setup_sleeper() All the hrtimer_init*() functions have been renamed to hrtimer_setup*(). Rename __hrtimer_init_sleeper() to __hrtimer_setup_sleeper() as well, to keep the names consistent. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/807694aedad9353421c4a7347629a30c5c31026f.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e883f65cd175..8cb2c85cebe0 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2016,7 +2016,7 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, * Make the enqueue delivery mode check work on RT. If the sleeper * was initialized for hard interrupt delivery, force the mode bit. * This is a special case for hrtimer_sleepers because - * __hrtimer_init_sleeper() determines the delivery mode on RT so the + * __hrtimer_setup_sleeper() determines the delivery mode on RT so the * fiddling with this decision is avoided at the call sites. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) @@ -2026,8 +2026,8 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, } EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); -static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - clockid_t clock_id, enum hrtimer_mode mode) +static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, + clockid_t clock_id, enum hrtimer_mode mode) { /* * On PREEMPT_RT enabled kernels hrtimers which are not explicitly @@ -2067,7 +2067,7 @@ void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { debug_init_on_stack(&sl->timer, clock_id, mode); - __hrtimer_init_sleeper(sl, clock_id, mode); + __hrtimer_setup_sleeper(sl, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); From e9ef2093ad9edec8d8a060e14891952570c82b8b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:19 +0100 Subject: [PATCH 2140/2157] hrtimers: Rename debug_init() to debug_setup() All the hrtimer_init*() functions have been renamed to hrtimer_setup*(). Rename debug_init() to debug_setup() as well, to keep the names consistent. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/4b730c1f79648b16a1c5413f928fdc2e138dfc43.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8cb2c85cebe0..472c29816d79 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -465,9 +465,7 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer, static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif -static inline void -debug_init(struct hrtimer *timer, clockid_t clockid, - enum hrtimer_mode mode) +static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) { debug_hrtimer_init(timer); trace_hrtimer_init(timer, clockid, mode); @@ -1648,7 +1646,7 @@ static void __hrtimer_setup(struct hrtimer *timer, void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) { - debug_init(timer, clock_id, mode); + debug_setup(timer, clock_id, mode); __hrtimer_setup(timer, function, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup); From 59c9edafc0f3843c3e616eb8136a310c7c552595 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:20 +0100 Subject: [PATCH 2141/2157] hrtimers: Rename debug_init_on_stack() to debug_setup_on_stack() All the hrtimer_init*() functions have been renamed to hrtimer_setup*(). Rename debug_init_on_stack() to debug_setup_on_stack() as well, to keep the names consistent. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/073cf6162779a2f5b12624677d4c49ee7eccc1ed.1738746927.git.namcao@linutronix.de --- kernel/time/hrtimer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 472c29816d79..4bf91fa2d9bd 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -471,8 +471,8 @@ static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hr trace_hrtimer_init(timer, clockid, mode); } -static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid, - enum hrtimer_mode mode) +static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode) { debug_hrtimer_init_on_stack(timer); trace_hrtimer_init(timer, clockid, mode); @@ -1665,7 +1665,7 @@ void hrtimer_setup_on_stack(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) { - debug_init_on_stack(timer, clock_id, mode); + debug_setup_on_stack(timer, clock_id, mode); __hrtimer_setup(timer, function, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); @@ -2064,7 +2064,7 @@ static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { - debug_init_on_stack(&sl->timer, clock_id, mode); + debug_setup_on_stack(&sl->timer, clock_id, mode); __hrtimer_setup_sleeper(sl, clock_id, mode); } EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); From 244132c4e5777fe0a4544ef23afba0d9a50e5ec5 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:21 +0100 Subject: [PATCH 2142/2157] tracing/timers: Rename the hrtimer_init event to hrtimer_setup The function hrtimer_init() doesn't exist anymore. It was replaced by hrtimer_setup(). Thus, rename the hrtimer_init trace event to hrtimer_setup to keep it consistent. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/cba84c3d853c5258aa3a262363a6eac08e2c7afc.1738746927.git.namcao@linutronix.de --- Documentation/trace/ftrace.rst | 4 ++-- include/trace/events/timer.h | 4 ++-- kernel/time/hrtimer.c | 4 ++-- tools/perf/tests/shell/trace_btf_enum.sh | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 2b74f96d09d5..c9e88bf65709 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -3077,7 +3077,7 @@ Notice that we lost the sys_nanosleep. # cat set_ftrace_filter hrtimer_run_queues hrtimer_run_pending - hrtimer_init + hrtimer_setup hrtimer_cancel hrtimer_try_to_cancel hrtimer_forward @@ -3115,7 +3115,7 @@ Again, now we want to append. # cat set_ftrace_filter hrtimer_run_queues hrtimer_run_pending - hrtimer_init + hrtimer_setup hrtimer_cancel hrtimer_try_to_cancel hrtimer_forward diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index f8c906be4cd0..1641ae3e6ca0 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -185,12 +185,12 @@ TRACE_EVENT(timer_base_idle, { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** - * hrtimer_init - called when the hrtimer is initialized + * hrtimer_setup - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer * @clockid: the hrtimers clock * @mode: the hrtimers mode */ -TRACE_EVENT(hrtimer_init, +TRACE_EVENT(hrtimer_setup, TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4bf91fa2d9bd..517ee2590a29 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -468,14 +468,14 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) { debug_hrtimer_init(timer); - trace_hrtimer_init(timer, clockid, mode); + trace_hrtimer_setup(timer, clockid, mode); } static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) { debug_hrtimer_init_on_stack(timer); - trace_hrtimer_init(timer, clockid, mode); + trace_hrtimer_setup(timer, clockid, mode); } static inline void debug_activate(struct hrtimer *timer, diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 60b3fa254cf6..f0b49f7fb57d 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -6,7 +6,7 @@ err=0 set -e syscall="landlock_add_rule" -non_syscall="timer:hrtimer_init,timer:hrtimer_start" +non_syscall="timer:hrtimer_setup,timer:hrtimer_start" TESTPROG="perf test -w landlock" From 427011db477dfb8cca001e492c2b312fdf7c7173 Mon Sep 17 00:00:00 2001 From: Artur Rojek Date: Sun, 16 Feb 2025 18:55:44 +0100 Subject: [PATCH 2143/2157] sh: Align .bss section padding to 8-byte boundary J2-based devices expect to find a device tree blob at the end of the .bss section. As of a77725a9a3c5 ("scripts/dtc: Update to upstream version v1.6.1-19-g0a3a9d3449c8"), libfdt enforces 8-byte alignment for the DTB, causing J2 devices to fail early in sh_fdt_init(). As the J2 loader firmware calculates the DTB location based on the kernel image .bss section size rather than the __bss_stop symbol offset, the required alignment can't be enforced with BSS_SECTION(0, PAGE_SIZE, 8). To fix this, inline a modified version of the above macro which grows .bss by the required size. While this change affects all existing SH boards, it should be benign on platforms which don't need this alignment. Signed-off-by: Artur Rojek Reviewed-by: John Paul Adrian Glaubitz Tested-by: Rob Landley Signed-off-by: John Paul Adrian Glaubitz --- arch/sh/kernel/vmlinux.lds.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 9644fe187a3f..008c30289eaa 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -71,7 +71,20 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; - BSS_SECTION(0, PAGE_SIZE, 4) + __bss_start = .; + SBSS(0) + . = ALIGN(PAGE_SIZE); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + BSS_FIRST_SECTIONS + . = ALIGN(PAGE_SIZE); + *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); + *(.dynbss) + *(BSS_MAIN) + *(COMMON) + . = ALIGN(8); + } + __bss_stop = .; _end = . ; STABS_DEBUG From 5f2efd67a17e5f4e2fccdb86014efaf8725f57a7 Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Sun, 23 Mar 2025 20:13:30 +0100 Subject: [PATCH 2144/2157] sh: defconfig: Drop obsolete CONFIG_NET_CLS_TCINDEX This option was removed from Kconfig in 8c710f75256b ("net/sched: Retire tcindex classifier") but from the defconfigs. Fixes: 8c710f75256b ("net/sched: Retire tcindex classifier") Signed-off-by: Johan Korsnes Cc: Yoshinori Sato Cc: Rich Felker Cc: John Paul Adrian Glaubitz Reviewed-by: Geert Uytterhoeven Reviewed-by: John Paul Adrian Glaubitz Signed-off-by: John Paul Adrian Glaubitz --- arch/sh/configs/se7712_defconfig | 1 - arch/sh/configs/se7721_defconfig | 1 - arch/sh/configs/sh7710voipgw_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 20f07aee5bde..49a4961889de 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -57,7 +57,6 @@ CONFIG_NET_SCH_TBF=y CONFIG_NET_SCH_GRED=y CONFIG_NET_SCH_DSMARK=y CONFIG_NET_SCH_NETEM=y -CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y CONFIG_MTD=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index 00862d3c030d..de293792db84 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -56,7 +56,6 @@ CONFIG_NET_SCH_TBF=y CONFIG_NET_SCH_GRED=y CONFIG_NET_SCH_DSMARK=y CONFIG_NET_SCH_NETEM=y -CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y CONFIG_MTD=y diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index 99a5d0760532..5b151bb2bc43 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -27,7 +27,6 @@ CONFIG_NETFILTER=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=y CONFIG_NET_CLS_BASIC=y -CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_U32=y CONFIG_MTD=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 99bc0e889287..e2928311a126 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -119,7 +119,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m From 3b8241f64c469e449e862cf2bdc50b879fa18f93 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Dec 2024 09:30:53 +0900 Subject: [PATCH 2145/2157] nios2: migrate to the generic rule for built-in DTB Commit 654102df2ac2 ("kbuild: add generic support for built-in boot DTBs") introduced generic support for built-in DTBs. Select GENERIC_BUILTIN_DTB when built-in DTB support is enabled. To keep consistency across architectures, this commit also renames CONFIG_NIOS2_DTB_SOURCE_BOOL to CONFIG_BUILTIN_DTB, and CONFIG_NIOS2_DTB_SOURCE to CONFIG_BUILTIN_DTB_NAME. Signed-off-by: Masahiro Yamada --- arch/nios2/Kbuild | 2 +- arch/nios2/boot/dts/Makefile | 4 ++-- arch/nios2/kernel/prom.c | 2 +- arch/nios2/platform/Kconfig.platform | 11 ++++++----- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/nios2/Kbuild b/arch/nios2/Kbuild index fc2952edd2de..fa64c5954b20 100644 --- a/arch/nios2/Kbuild +++ b/arch/nios2/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ platform/ boot/dts/ +obj-y += kernel/ mm/ platform/ # for cleaning subdir- += boot diff --git a/arch/nios2/boot/dts/Makefile b/arch/nios2/boot/dts/Makefile index 1a2e8996bec7..1b8f41c4154f 100644 --- a/arch/nios2/boot/dts/Makefile +++ b/arch/nios2/boot/dts/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := $(patsubst %.dts,%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE)) +dtb-y := $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME)) -dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) +dtb-$(CONFIG_OF_ALL_DTBS) += $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index db049249766f..4f8c14da6490 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -32,7 +32,7 @@ void __init early_init_devtree(void *params) } #endif -#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL +#ifdef CONFIG_BUILTIN_DTB if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER) params = (void *)__dtb_start; #endif diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index e849daff6fd1..c75cadd92388 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -35,19 +35,20 @@ config NIOS2_DTB_PHYS_ADDR help Physical address of a dtb blob. -config NIOS2_DTB_SOURCE_BOOL +config BUILTIN_DTB bool "Compile and link device tree into kernel image" depends on !COMPILE_TEST + select GENERIC_BUILTIN_DTB help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. -config NIOS2_DTB_SOURCE - string "Device tree source file" - depends on NIOS2_DTB_SOURCE_BOOL +config BUILTIN_DTB_NAME + string "Built-in device tree name" + depends on BUILTIN_DTB default "" help - Absolute path to the device tree source (dts) file describing your + Relative path to the device tree without suffix describing your system. comment "Nios II instructions" From a26fe287eed112b4e21e854f173c8918a6a8596d Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Fri, 28 Mar 2025 14:28:37 +0000 Subject: [PATCH 2146/2157] kconfig: merge_config: use an empty file as initfile The scripts/kconfig/merge_config.sh script requires an existing $INITFILE (or the $1 argument) as a base file for merging Kconfig fragments. However, an empty $INITFILE can serve as an initial starting point, later referenced by the KCONFIG_ALLCONFIG Makefile variable if -m is not used. This variable can point to any configuration file containing preset config symbols (the merged output) as stated in Documentation/kbuild/kconfig.rst. When -m is used $INITFILE will contain just the merge output requiring the user to run make (i.e. KCONFIG_ALLCONFIG=<$INITFILE> make or make olddefconfig). Instead of failing when `$INITFILE` is missing, create an empty file and use it as the starting point for merges. Signed-off-by: Daniel Gomez Signed-off-by: Masahiro Yamada --- scripts/kconfig/merge_config.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 0b7952471c18..79c09b378be8 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -112,8 +112,8 @@ INITFILE=$1 shift; if [ ! -r "$INITFILE" ]; then - echo "The base file '$INITFILE' does not exist. Exit." >&2 - exit 1 + echo "The base file '$INITFILE' does not exist. Creating one..." >&2 + touch "$INITFILE" fi MERGE_LIST=$* From a7c699d090a1f3795c3271c2b399230e182db06e Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Mon, 31 Mar 2025 16:46:32 -0600 Subject: [PATCH 2147/2157] kbuild: rpm-pkg: build a debuginfo RPM The rpm-pkg make target currently suffers from a few issues related to debuginfo: 1. debuginfo for things built into the kernel (vmlinux) is not available in any RPM produced by make rpm-pkg. This makes using tools like systemtap against a make rpm-pkg kernel impossible. 2. debug source for the kernel is not available. This means that commands like 'disas /s' in gdb, which display source intermixed with assembly, can only print file names/line numbers which then must be painstakingly resolved to actual source in a separate editor. 3. debuginfo for modules is available, but it remains bundled with the .ko files that contain module code, in the main kernel RPM. This is a waste of space for users who do not need to debug the kernel (i.e. most users). Address all of these issues by additionally building a debuginfo RPM when the kernel configuration allows for it, in line with standard patterns followed by RPM distributors. With these changes: 1. systemtap now works (when these changes are backported to 6.11, since systemtap lags a bit behind in compatibility), as verified by the following simple test script: # stap -e 'probe kernel.function("do_sys_open").call { printf("%s\n", $$parms); }' dfd=0xffffffffffffff9c filename=0x7fe18800b160 flags=0x88800 mode=0x0 ... 2. disas /s works correctly in gdb, with source and disassembly interspersed: # gdb vmlinux --batch -ex 'disas /s blk_op_str' Dump of assembler code for function blk_op_str: block/blk-core.c: 125 { 0xffffffff814c8740 <+0>: endbr64 127 128 if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) 0xffffffff814c8744 <+4>: mov $0xffffffff824a7378,%rax 0xffffffff814c874b <+11>: cmp $0x23,%edi 0xffffffff814c874e <+14>: ja 0xffffffff814c8768 0xffffffff814c8750 <+16>: mov %edi,%edi 126 const char *op_str = "UNKNOWN"; 0xffffffff814c8752 <+18>: mov $0xffffffff824a7378,%rdx 127 128 if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) 0xffffffff814c8759 <+25>: mov -0x7dfa0160(,%rdi,8),%rax 126 const char *op_str = "UNKNOWN"; 0xffffffff814c8761 <+33>: test %rax,%rax 0xffffffff814c8764 <+36>: cmove %rdx,%rax 129 op_str = blk_op_name[op]; 130 131 return op_str; 132 } 0xffffffff814c8768 <+40>: jmp 0xffffffff81d01360 <__x86_return_thunk> End of assembler dump. 3. The size of the main kernel package goes down substantially, especially if many modules are built (quite typical). Here is a comparison of installed size of the kernel package (configured with allmodconfig, dwarf4 debuginfo, and module compression turned off) before and after this patch: # rpm -qi kernel-6.13* | grep -E '^(Version|Size)' Version : 6.13.0postpatch+ Size : 1382874089 Version : 6.13.0prepatch+ Size : 17870795887 This is a ~92% size reduction. Note that a debuginfo package can only be produced if the following configs are set: - CONFIG_DEBUG_INFO=y - CONFIG_MODULE_COMPRESS=n - CONFIG_DEBUG_INFO_SPLIT=n The first of these is obvious - we can't produce debuginfo if the build does not generate it. The second two requirements can in principle be removed, but doing so is difficult with the current approach, which uses a generic rpmbuild script find-debuginfo.sh that processes all packaged executables. If we want to remove those requirements the best path forward is likely to add some debuginfo extraction/installation logic to the modules_install target (controllable by flags). That way, it's easier to operate on modules before they're compressed, and the logic can be reused by all packaging targets. Signed-off-by: Uday Shankar Signed-off-by: Masahiro Yamada --- scripts/package/kernel.spec | 46 +++++++++++++++++++++++++++++++++++-- scripts/package/mkspec | 10 ++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index ac3e5ac01d8a..726f34e11960 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -2,8 +2,6 @@ %{!?_arch: %define _arch dummy} %{!?make: %define make make} %define makeflags %{?_smp_mflags} ARCH=%{ARCH} -%define __spec_install_post /usr/lib/rpm/brp-compress || : -%define debug_package %{nil} Name: kernel Summary: The Linux Kernel @@ -46,6 +44,36 @@ This package provides kernel headers and makefiles sufficient to build modules against the %{version} kernel package. %endif +%if %{with_debuginfo} +# list of debuginfo-related options taken from distribution kernel.spec +# files +%undefine _include_minidebuginfo +%undefine _find_debuginfo_dwz_opts +%undefine _unique_build_ids +%undefine _unique_debug_names +%undefine _unique_debug_srcs +%undefine _debugsource_packages +%undefine _debuginfo_subpackages +%global _find_debuginfo_opts -r +%global _missing_build_ids_terminate_build 1 +%global _no_recompute_build_ids 1 +%{debug_package} +%endif +# some (but not all) versions of rpmbuild emit %%debug_package with +# %%install. since we've already emitted it manually, that would cause +# a package redefinition error. ensure that doesn't happen +%define debug_package %{nil} + +# later, we make all modules executable so that find-debuginfo.sh strips +# them up. but they don't actually need to be executable, so remove the +# executable bit, taking care to do it _after_ find-debuginfo.sh has run +%define __spec_install_post \ + %{?__debug_package:%{__debug_install_post}} \ + %{__arch_install_post} \ + %{__os_install_post} \ + find %{buildroot}/lib/modules/%{KERNELRELEASE} -name "*.ko" -type f \\\ + | xargs --no-run-if-empty chmod u-x + %prep %setup -q -n linux cp %{SOURCE1} .config @@ -89,8 +117,22 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA echo "%exclude /lib/modules/%{KERNELRELEASE}/build" } > %{buildroot}/kernel.list +# make modules executable so that find-debuginfo.sh strips them. this +# will be undone later in %%__spec_install_post +find %{buildroot}/lib/modules/%{KERNELRELEASE} -name "*.ko" -type f \ + | xargs --no-run-if-empty chmod u+x + +%if %{with_debuginfo} +# copying vmlinux directly to the debug directory means it will not get +# stripped (but its source paths will still be collected + fixed up) +mkdir -p %{buildroot}/usr/lib/debug/lib/modules/%{KERNELRELEASE} +cp vmlinux %{buildroot}/usr/lib/debug/lib/modules/%{KERNELRELEASE} +%endif + %clean rm -rf %{buildroot} +rm -f debugfiles.list debuglinks.list debugsourcefiles.list debugsources.list \ + elfbins.list %post if [ -x /usr/bin/kernel-install ]; then diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 4dc1466dfc81..c7375bfc25a9 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -23,6 +23,16 @@ else echo '%define with_devel 0' fi +# debuginfo package generation uses find-debuginfo.sh under the hood, +# which only works on uncompressed modules that contain debuginfo +if grep -q CONFIG_DEBUG_INFO=y include/config/auto.conf && + (! grep -q CONFIG_MODULE_COMPRESS=y include/config/auto.conf) && + (! grep -q CONFIG_DEBUG_INFO_SPLIT=y include/config/auto.conf); then +echo '%define with_debuginfo %{?_without_debuginfo: 0} %{?!_without_debuginfo: 1}' +else +echo '%define with_debuginfo 0' +fi + cat< Date: Wed, 19 Mar 2025 15:27:31 -0500 Subject: [PATCH 2148/2157] tools/power turbostat: Increase CPU_SUBSET_MAXCPUS to 8192 On systems with >= 1024 cpus (in my case 1152), turbostat fails with the error output: "turbostat: /sys/fs/cgroup/cpuset.cpus.effective: cpu str malformat 0-1151" A similar error appears with the use of turbostat --cpu when the inputted cpu range contains a cpu number >= 1024: # turbostat -c 1100-1151 "--cpu 1100-1151" malformed ... Both errors are caused by parse_cpu_str() reaching its limit of CPU_SUBSET_MAXCPUS. It's a good idea to limit the maximum cpu number being parsed, but 1024 is too low. For a small increase in compute and allocated memory, increasing CPU_SUBSET_MAXCPUS brings support for parsing cpu numbers >= 1024. Increase CPU_SUBSET_MAXCPUS to 8192, a common setting for CONFIG_NR_CPUS on x86_64. Signed-off-by: Justin Ernst Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f29e47fe4249..218aca958923 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1121,7 +1121,7 @@ void probe_platform_features(unsigned int family, unsigned int model) int backwards_count; char *progname; -#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ +#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 From f729775f79a9c942c6c82ed6b44bd030afe10423 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 11:18:39 -0400 Subject: [PATCH 2149/2157] tools/power turbostat: report CoreThr per measurement interval The CoreThr column displays total thermal throttling events since boot time. Change it to report events during the measurement interval. This is more useful for showing a user the current conditions. Total events since boot time are still available to the user via /sys/devices/system/cpu/cpu*/thermal_throttle/* Document CoreThr on turbostat.8 Fixes: eae97e053fe30 ("turbostat: Support thermal throttle count print") Reported-by: Arjan van de Ven Signed-off-by: Len Brown Cc: Chen Yu --- tools/power/x86/turbostat/turbostat.8 | 2 ++ tools/power/x86/turbostat/turbostat.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 52d727e29ea7..144565151e1e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -172,6 +172,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. .PP +\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/* +.PP \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used. .PP \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 218aca958923..70e17d4ad9b6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3485,7 +3485,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; - old->core_throt_cnt = new->core_throt_cnt; + old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); From 3ae8508663372b93c5556a887e96ed0ca5df0711 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:23:22 -0400 Subject: [PATCH 2150/2157] tools/power turbostat: Document GNR UncMHz domain convention Document that on Intel Granite Rapids Systems, Uncore domains 0-2 are CPU domains, and uncore domains 3-4 are IO domains. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 144565151e1e..e86493880c16 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -205,6 +205,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. From f8b136ef2605c1bf62020462d10e35228760aa19 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 19 Mar 2025 08:53:07 +0800 Subject: [PATCH 2151/2157] tools/power turbostat: Restore GFX sysfs fflush() call Do fflush() to discard the buffered data, before each read of the graphics sysfs knobs. Fixes: ba99a4fc8c24 ("tools/power turbostat: Remove unnecessary fflush() call") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 70e17d4ad9b6..c9a34c16c7a8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6039,6 +6039,7 @@ int snapshot_graphics(int idx) int retval; rewind(gfx_info[idx].fp); + fflush(gfx_info[idx].fp); switch (idx) { case GFX_rc6: From 994633894f208a0151baaee1688ab3c431912553 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:53:18 -0400 Subject: [PATCH 2152/2157] tools/power turbostat: re-factor sysfs code Probe cpuidle "sysfs" residency and counts separately, since soon we will make one disabled on, and the other disabled off. Clarify that some BIC (build-in-counters) are actually "groups". since we're about to re-name some of those groups. no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c9a34c16c7a8..df0391bedcde 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -273,10 +273,10 @@ struct msr_counter bic[] = { #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -2354,16 +2354,16 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -10260,7 +10260,7 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; @@ -10304,6 +10304,16 @@ void probe_sysfs(void) if (state < min_state) min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; for (state = 10; state >= 0; --state) { @@ -10602,7 +10612,8 @@ int main(int argc, char **argv) print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); From 6f110a5e4f9977c31ce76fefbfef6fd4eab6bfb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 10:00:04 -0700 Subject: [PATCH 2153/2157] Disable SLUB_TINY for build testing ... and don't error out so hard on missing module descriptions. Before commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") we used to warn about missing module descriptions, but only when building with extra warnigns (ie 'W=1'). After that commit the warning became an unconditional hard error. And it turns out not all modules have been converted despite the claims to the contrary. As reported by Damian Tometzki, the slub KUnit test didn't have a module description, and apparently nobody ever really noticed. The reason nobody noticed seems to be that the slub KUnit tests get disabled by SLUB_TINY, which also ends up disabling a lot of other code, both in tests and in slub itself. And so anybody doing full build tests didn't actually see this failre. So let's disable SLUB_TINY for build-only tests, since it clearly ends up limiting build coverage. Also turn the missing module descriptions error back into a warning, but let's keep it around for non-'W=1' builds. Reported-by: Damian Tometzki Link: https://lore.kernel.org/all/01070196099fd059-e8463438-7b1b-4ec8-816d-173874be9966-000000@eu-central-1.amazonses.com/ Cc: Masahiro Yamada Cc: Jeff Johnson Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- scripts/mod/modpost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index d3fb3762887b..e113f713b493 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -201,7 +201,7 @@ config KVFREE_RCU_BATCHED config SLUB_TINY bool "Configure for minimal memory footprint" - depends on EXPERT + depends on EXPERT && !COMPILE_TEST select SLAB_MERGE_DEFAULT help Configures the slab allocator in a way to achieve minimal memory diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 92627e8d0e16..be89921d60b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1603,7 +1603,7 @@ static void read_symbols(const char *modname) } if (!get_modinfo(&info, "description")) - error("missing MODULE_DESCRIPTION() in %s\n", modname); + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { From ec4acd3166d8a7a03b059d01b9c6f11a658e833f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:29:57 -0400 Subject: [PATCH 2154/2157] tools/power turbostat: disable "cpuidle" invocation counters, by default Create "pct_idle" counter group, the sofware notion of residency so it can now be singled out, independent of other counter groups. Create "cpuidle" group, the cpuidle invocation counts. Disable "cpuidle", by default. Create "swidle" = "cpuidle" + "pct_idle". Undocument "sysfs", the old name for "swidle", but keep it working for backwards compatibilty. Create "hwidle", all the HW idle counters Modify "idle", enabled by default "idle" = "hwidle" + "pct_idle" (and now excludes "cpuidle") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 12 +++++----- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e86493880c16..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,15 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index df0391bedcde..ab184f95cdaf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) +#define BIC_pct_idle (1ULL << 63) #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -2362,6 +2366,15 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { retval |= BIC_GROUP_IDLE; break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; + break; } else if (!strcmp(name_list, "frequency")) { retval |= BIC_GROUP_FREQUENCY; break; @@ -2372,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10269,6 +10283,9 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10291,7 +10308,7 @@ void probe_cpuidle_residency(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) @@ -10315,6 +10332,9 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_cpuidle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10327,7 +10347,7 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) From 03e00e373cab981ad808271b2650700cfa0fbda6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:49:20 -0400 Subject: [PATCH 2155/2157] tools/power turbostat: v2025.05.06 Support up to 8192 processors Add cpuidle governor debug telemetry, disabled by default Update default output to exclude cpuidle invocation counts Bug fixes Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab184f95cdaf..1b9fdc1a7ee8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9594,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 From 0efdedb3358aa78102967f242379686f94315830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 2 Apr 2025 21:21:57 +0100 Subject: [PATCH 2156/2157] tools/include: make uapi/linux/types.h usable from assembly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "real" linux/types.h UAPI header gracefully degrades to a NOOP when included from assembly code. Mirror this behaviour in the tools/ variant. Test for __ASSEMBLER__ over __ASSEMBLY__ as the former is provided by the toolchain automatically. Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/af553c62-ca2f-4956-932c-dd6e3a126f58@sirena.org.uk/ Fixes: c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20250321-uapi-consistency-v1-1-439070118dc0@linutronix.de Signed-off-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Linus Torvalds --- tools/include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ From 0af2f6be1b4281385b618cb86ad946eded089ac8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 13:11:33 -0700 Subject: [PATCH 2157/2157] Linux 6.15-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e55726a71d95..38689a0c3605 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION*